From dd17c8f72993f9461e9c19250e3f155d6d99df22 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 29 Oct 2009 22:34:15 +0900
Subject: percpu: remove per_cpu__ prefix.

Now that the return from alloc_percpu is compatible with the address
of per-cpu vars, it makes sense to hand around the address of per-cpu
variables.  To make this sane, we remove the per_cpu__ prefix we used
created to stop people accidentally using these vars directly.

Now we have sparse, we can use that (next patch).

tj: * Updated to convert stuff which were missed by or added after the
      original patch.

    * Kill per_cpu_var() macro.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
---
 include/asm-generic/percpu.h | 12 ++++++------
 include/linux/percpu-defs.h  | 18 ++++++------------
 include/linux/percpu.h       |  5 ++---
 include/linux/vmstat.h       |  8 ++++----
 4 files changed, 18 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 8087b90d4673..ca6f0491412b 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -50,11 +50,11 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
  * offset.
  */
 #define per_cpu(var, cpu) \
-	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), per_cpu_offset(cpu)))
+	(*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu)))
 #define __get_cpu_var(var) \
-	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), my_cpu_offset))
+	(*SHIFT_PERCPU_PTR(&(var), my_cpu_offset))
 #define __raw_get_cpu_var(var) \
-	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), __my_cpu_offset))
+	(*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset))
 
 #define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
 #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
@@ -66,9 +66,9 @@ extern void setup_per_cpu_areas(void);
 
 #else /* ! SMP */
 
-#define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu_var(var)))
-#define __get_cpu_var(var)			per_cpu_var(var)
-#define __raw_get_cpu_var(var)			per_cpu_var(var)
+#define per_cpu(var, cpu)			(*((void)(cpu), &(var)))
+#define __get_cpu_var(var)			(var)
+#define __raw_get_cpu_var(var)			(var)
 #define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0)
 #define __this_cpu_ptr(ptr) this_cpu_ptr(ptr)
 
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 5a5d6ce4bd55..ee99f6c2cdcd 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -1,12 +1,6 @@
 #ifndef _LINUX_PERCPU_DEFS_H
 #define _LINUX_PERCPU_DEFS_H
 
-/*
- * Determine the real variable name from the name visible in the
- * kernel sources.
- */
-#define per_cpu_var(var) per_cpu__##var
-
 /*
  * Base implementations of per-CPU variable declarations and definitions, where
  * the section in which the variable is to be placed is provided by the
@@ -56,24 +50,24 @@
  */
 #define DECLARE_PER_CPU_SECTION(type, name, sec)			\
 	extern __PCPU_DUMMY_ATTRS char __pcpu_scope_##name;		\
-	extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name
+	extern __PCPU_ATTRS(sec) __typeof__(type) name
 
 #define DEFINE_PER_CPU_SECTION(type, name, sec)				\
 	__PCPU_DUMMY_ATTRS char __pcpu_scope_##name;			\
 	extern __PCPU_DUMMY_ATTRS char __pcpu_unique_##name;		\
 	__PCPU_DUMMY_ATTRS char __pcpu_unique_##name;			\
 	__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES __weak			\
-	__typeof__(type) per_cpu__##name
+	__typeof__(type) name
 #else
 /*
  * Normal declaration and definition macros.
  */
 #define DECLARE_PER_CPU_SECTION(type, name, sec)			\
-	extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name
+	extern __PCPU_ATTRS(sec) __typeof__(type) name
 
 #define DEFINE_PER_CPU_SECTION(type, name, sec)				\
 	__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES			\
-	__typeof__(type) per_cpu__##name
+	__typeof__(type) name
 #endif
 
 /*
@@ -137,8 +131,8 @@
 /*
  * Intermodule exports for per-CPU variables.
  */
-#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
-#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
+#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var)
+#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var)
 
 
 #endif /* _LINUX_PERCPU_DEFS_H */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 522f421ec213..e12410e55e05 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -182,7 +182,7 @@ static inline void *pcpu_lpage_remapped(void *kaddr)
 #ifndef percpu_read
 # define percpu_read(var)						\
   ({									\
-	typeof(per_cpu_var(var)) __tmp_var__;				\
+	typeof(var) __tmp_var__;					\
 	__tmp_var__ = get_cpu_var(var);					\
 	put_cpu_var(var);						\
 	__tmp_var__;							\
@@ -253,8 +253,7 @@ do {									\
 
 /*
  * Optimized manipulation for memory allocated through the per cpu
- * allocator or for addresses of per cpu variables (can be determined
- * using per_cpu_var(xx).
+ * allocator or for addresses of per cpu variables.
  *
  * These operation guarantee exclusivity of access for other operations
  * on the *same* processor. The assumption is that per cpu data is only
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d85889710f9b..3e489fda11a1 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -76,22 +76,22 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
 
 static inline void __count_vm_event(enum vm_event_item item)
 {
-	__this_cpu_inc(per_cpu_var(vm_event_states).event[item]);
+	__this_cpu_inc(vm_event_states.event[item]);
 }
 
 static inline void count_vm_event(enum vm_event_item item)
 {
-	this_cpu_inc(per_cpu_var(vm_event_states).event[item]);
+	this_cpu_inc(vm_event_states.event[item]);
 }
 
 static inline void __count_vm_events(enum vm_event_item item, long delta)
 {
-	__this_cpu_add(per_cpu_var(vm_event_states).event[item], delta);
+	__this_cpu_add(vm_event_states.event[item], delta);
 }
 
 static inline void count_vm_events(enum vm_event_item item, long delta)
 {
-	this_cpu_add(per_cpu_var(vm_event_states).event[item], delta);
+	this_cpu_add(vm_event_states.event[item], delta);
 }
 
 extern void all_vm_events(unsigned long *);
-- 
cgit v1.2.3


From f7b64fe806029e0a0454df132eec3c5ab576102c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 29 Oct 2009 22:34:15 +0900
Subject: percpu: make access macros universal

Now that per_cpu__ prefix is gone, there's no distinction between
static and dynamic percpu variables.  Make get_cpu_var() take dynamic
percpu variables and ensure that all macros have parentheses around
the parameter evaluation and evaluate the variable parameter only once
such that any expression which evaluates to percpu address can be used
safely.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index e12410e55e05..f965f833a643 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -27,10 +27,13 @@
  * we force a syntax error here if it isn't.
  */
 #define get_cpu_var(var) (*({				\
-	extern int simple_identifier_##var(void);	\
 	preempt_disable();				\
 	&__get_cpu_var(var); }))
-#define put_cpu_var(var) preempt_enable()
+
+#define put_cpu_var(var) do {				\
+	(void)(var);					\
+	preempt_enable();				\
+} while (0)
 
 #ifdef CONFIG_SMP
 
@@ -182,17 +185,19 @@ static inline void *pcpu_lpage_remapped(void *kaddr)
 #ifndef percpu_read
 # define percpu_read(var)						\
   ({									\
-	typeof(var) __tmp_var__;					\
-	__tmp_var__ = get_cpu_var(var);					\
-	put_cpu_var(var);						\
-	__tmp_var__;							\
+	typeof(var) *pr_ptr__ = &(var);					\
+	typeof(var) pr_ret__;						\
+	pr_ret__ = get_cpu_var(*pr_ptr__);				\
+	put_cpu_var(*pr_ptr__);						\
+	pr_ret__;							\
   })
 #endif
 
 #define __percpu_generic_to_op(var, val, op)				\
 do {									\
-	get_cpu_var(var) op val;					\
-	put_cpu_var(var);						\
+	typeof(var) *pgto_ptr__ = &(var);				\
+	get_cpu_var(*pgto_ptr__) op val;				\
+	put_cpu_var(*pgto_ptr__);					\
 } while (0)
 
 #ifndef percpu_write
@@ -304,7 +309,7 @@ do {									\
 #define _this_cpu_generic_to_op(pcp, val, op)				\
 do {									\
 	preempt_disable();						\
-	*__this_cpu_ptr(&pcp) op val;					\
+	*__this_cpu_ptr(&(pcp)) op val;					\
 	preempt_enable();						\
 } while (0)
 
-- 
cgit v1.2.3


From e0fdb0e050eae331046385643618f12452aa7e73 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 29 Oct 2009 22:34:15 +0900
Subject: percpu: add __percpu for sparse.

We have to make __kernel "__attribute__((address_space(0)))" so we can
cast to it.

tj: * put_cpu_var() update.

    * Annotations added to dynamic allocator interface.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/asm-generic/percpu.h |  4 +++-
 include/linux/compiler.h     |  4 +++-
 include/linux/percpu-defs.h  |  2 +-
 include/linux/percpu.h       | 18 +++++++++++-------
 4 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index ca6f0491412b..fded453fd25c 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -41,7 +41,9 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
  * Only S390 provides its own means of moving the pointer.
  */
 #ifndef SHIFT_PERCPU_PTR
-#define SHIFT_PERCPU_PTR(__p, __offset)	RELOC_HIDE((__p), (__offset))
+/* Weird cast keeps both GCC and sparse happy. */
+#define SHIFT_PERCPU_PTR(__p, __offset)				\
+	RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset))
 #endif
 
 /*
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 04fb5135b4e1..abba8045c6ef 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -5,7 +5,7 @@
 
 #ifdef __CHECKER__
 # define __user		__attribute__((noderef, address_space(1)))
-# define __kernel	/* default address space */
+# define __kernel	__attribute__((address_space(0)))
 # define __safe		__attribute__((safe))
 # define __force	__attribute__((force))
 # define __nocast	__attribute__((nocast))
@@ -15,6 +15,7 @@
 # define __acquire(x)	__context__(x,1)
 # define __release(x)	__context__(x,-1)
 # define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
+# define __percpu	__attribute__((noderef, address_space(3)))
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
 #else
@@ -32,6 +33,7 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __acquire(x) (void)0
 # define __release(x) (void)0
 # define __cond_lock(x,c) (c)
+# define __percpu
 #endif
 
 #ifdef __KERNEL__
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index ee99f6c2cdcd..0fa0cb524250 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -12,7 +12,7 @@
  * that section.
  */
 #define __PCPU_ATTRS(sec)						\
-	__attribute__((section(PER_CPU_BASE_SECTION sec)))		\
+	__percpu __attribute__((section(PER_CPU_BASE_SECTION sec)))	\
 	PER_CPU_ATTRIBUTES
 
 #define __PCPU_DUMMY_ATTRS						\
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index f965f833a643..2c0d31a3f6b6 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -30,8 +30,12 @@
 	preempt_disable();				\
 	&__get_cpu_var(var); }))
 
+/*
+ * The weird & is necessary because sparse considers (void)(var) to be
+ * a direct dereference of percpu variable (var).
+ */
 #define put_cpu_var(var) do {				\
-	(void)(var);					\
+	(void)&(var);					\
 	preempt_enable();				\
 } while (0)
 
@@ -130,9 +134,9 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
  */
 #define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
 
-extern void *__alloc_reserved_percpu(size_t size, size_t align);
-extern void *__alloc_percpu(size_t size, size_t align);
-extern void free_percpu(void *__pdata);
+extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
+extern void __percpu *__alloc_percpu(size_t size, size_t align);
+extern void free_percpu(void __percpu *__pdata);
 
 #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
 extern void __init setup_per_cpu_areas(void);
@@ -142,7 +146,7 @@ extern void __init setup_per_cpu_areas(void);
 
 #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
 
-static inline void *__alloc_percpu(size_t size, size_t align)
+static inline void __percpu *__alloc_percpu(size_t size, size_t align)
 {
 	/*
 	 * Can't easily make larger alignment work with kmalloc.  WARN
@@ -153,7 +157,7 @@ static inline void *__alloc_percpu(size_t size, size_t align)
 	return kzalloc(size, GFP_KERNEL);
 }
 
-static inline void free_percpu(void *p)
+static inline void free_percpu(void __percpu *p)
 {
 	kfree(p);
 }
@@ -168,7 +172,7 @@ static inline void *pcpu_lpage_remapped(void *kaddr)
 #endif /* CONFIG_SMP */
 
 #define alloc_percpu(type)	\
-	(typeof(type) *)__alloc_percpu(sizeof(type), __alignof__(type))
+	(typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type))
 
 /*
  * Optional methods for optimized non-lvalue per-cpu variable access.
-- 
cgit v1.2.3


From 545695fb41da117928ab946067a42d9e15fd009d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 29 Oct 2009 22:34:15 +0900
Subject: percpu: make accessors check for percpu pointer in sparse

The previous patch made sparse warn about percpu variables being used
directly without going through percpu accessors.  This patch
implements the other half - checking whether non percpu variable is
passed into percpu accessors.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
---
 include/asm-generic/percpu.h |  6 ++++--
 include/linux/percpu-defs.h  | 20 ++++++++++++++++++--
 include/linux/percpu.h       |  2 ++
 3 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index fded453fd25c..04f91c2d3f7b 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -42,8 +42,10 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
  */
 #ifndef SHIFT_PERCPU_PTR
 /* Weird cast keeps both GCC and sparse happy. */
-#define SHIFT_PERCPU_PTR(__p, __offset)				\
-	RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset))
+#define SHIFT_PERCPU_PTR(__p, __offset)	({				\
+	__verify_pcpu_ptr((__p));					\
+	RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \
+})
 #endif
 
 /*
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 0fa0cb524250..1fa36eb54b6a 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -18,6 +18,16 @@
 #define __PCPU_DUMMY_ATTRS						\
 	__attribute__((section(".discard"), unused))
 
+/*
+ * Macro which verifies @ptr is a percpu pointer without evaluating
+ * @ptr.  This is to be used in percpu accessors to verify that the
+ * input parameter is a percpu pointer.
+ */
+#define __verify_pcpu_ptr(ptr)	do {					\
+	void __percpu *__vpp_verify = (typeof(ptr))NULL;		\
+	(void)__vpp_verify;						\
+} while (0)
+
 /*
  * s390 and alpha modules require percpu variables to be defined as
  * weak to force the compiler to generate GOT based external
@@ -129,10 +139,16 @@
 	__aligned(PAGE_SIZE)
 
 /*
- * Intermodule exports for per-CPU variables.
+ * Intermodule exports for per-CPU variables.  sparse forgets about
+ * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to
+ * noop if __CHECKER__.
  */
+#ifndef __CHECKER__
 #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var)
 #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var)
-
+#else
+#define EXPORT_PER_CPU_SYMBOL(var)
+#define EXPORT_PER_CPU_SYMBOL_GPL(var)
+#endif
 
 #endif /* _LINUX_PERCPU_DEFS_H */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 2c0d31a3f6b6..42878f0cd0e2 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -237,6 +237,7 @@ extern void __bad_size_call_parameter(void);
 
 #define __pcpu_size_call_return(stem, variable)				\
 ({	typeof(variable) pscr_ret__;					\
+	__verify_pcpu_ptr(&(variable));					\
 	switch(sizeof(variable)) {					\
 	case 1: pscr_ret__ = stem##1(variable);break;			\
 	case 2: pscr_ret__ = stem##2(variable);break;			\
@@ -250,6 +251,7 @@ extern void __bad_size_call_parameter(void);
 
 #define __pcpu_size_call(stem, variable, ...)				\
 do {									\
+	__verify_pcpu_ptr(&(variable));					\
 	switch(sizeof(variable)) {					\
 		case 1: stem##1(variable, __VA_ARGS__);break;		\
 		case 2: stem##2(variable, __VA_ARGS__);break;		\
-- 
cgit v1.2.3


From 5db53f3e80dee2d9dff5e534f9e9fe1db17c9936 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Fri, 20 Nov 2009 20:13:39 +0100
Subject: [LogFS] add new flash file system

This is a new flash file system. See
Documentation/filesystems/logfs.txt

Signed-off-by: Joern Engel <joern@logfs.org>
---
 Documentation/filesystems/00-INDEX  |    2 +
 Documentation/filesystems/logfs.txt |  241 ++++
 fs/Kconfig                          |    1 +
 fs/Makefile                         |    1 +
 fs/logfs/Kconfig                    |   17 +
 fs/logfs/Makefile                   |   13 +
 fs/logfs/compr.c                    |   95 ++
 fs/logfs/dev_bdev.c                 |  263 ++++
 fs/logfs/dev_mtd.c                  |  253 ++++
 fs/logfs/dir.c                      |  818 +++++++++++++
 fs/logfs/file.c                     |  263 ++++
 fs/logfs/gc.c                       |  730 ++++++++++++
 fs/logfs/inode.c                    |  417 +++++++
 fs/logfs/journal.c                  |  879 ++++++++++++++
 fs/logfs/logfs.h                    |  722 +++++++++++
 fs/logfs/logfs_abi.h                |  627 ++++++++++
 fs/logfs/readwrite.c                | 2246 +++++++++++++++++++++++++++++++++++
 fs/logfs/segment.c                  |  924 ++++++++++++++
 fs/logfs/super.c                    |  634 ++++++++++
 include/linux/btree-128.h           |  109 ++
 include/linux/btree-type.h          |  147 +++
 include/linux/btree.h               |  243 ++++
 lib/Kconfig                         |    3 +
 lib/Makefile                        |    1 +
 lib/btree.c                         |  797 +++++++++++++
 25 files changed, 10446 insertions(+)
 create mode 100644 Documentation/filesystems/logfs.txt
 create mode 100644 fs/logfs/Kconfig
 create mode 100644 fs/logfs/Makefile
 create mode 100644 fs/logfs/compr.c
 create mode 100644 fs/logfs/dev_bdev.c
 create mode 100644 fs/logfs/dev_mtd.c
 create mode 100644 fs/logfs/dir.c
 create mode 100644 fs/logfs/file.c
 create mode 100644 fs/logfs/gc.c
 create mode 100644 fs/logfs/inode.c
 create mode 100644 fs/logfs/journal.c
 create mode 100644 fs/logfs/logfs.h
 create mode 100644 fs/logfs/logfs_abi.h
 create mode 100644 fs/logfs/readwrite.c
 create mode 100644 fs/logfs/segment.c
 create mode 100644 fs/logfs/super.c
 create mode 100644 include/linux/btree-128.h
 create mode 100644 include/linux/btree-type.h
 create mode 100644 include/linux/btree.h
 create mode 100644 lib/btree.c

(limited to 'include')

diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index f15621ee5599..d362aa543b27 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -62,6 +62,8 @@ jfs.txt
 	- info and mount options for the JFS filesystem.
 locks.txt
 	- info on file locking implementations, flock() vs. fcntl(), etc.
+logfs.txt
+	- info on the LogFS flash filesystem.
 mandatory-locking.txt
 	- info on the Linux implementation of Sys V mandatory file locking.
 ncpfs.txt
diff --git a/Documentation/filesystems/logfs.txt b/Documentation/filesystems/logfs.txt
new file mode 100644
index 000000000000..e64c94ba401a
--- /dev/null
+++ b/Documentation/filesystems/logfs.txt
@@ -0,0 +1,241 @@
+
+The LogFS Flash Filesystem
+==========================
+
+Specification
+=============
+
+Superblocks
+-----------
+
+Two superblocks exist at the beginning and end of the filesystem.
+Each superblock is 256 Bytes large, with another 3840 Bytes reserved
+for future purposes, making a total of 4096 Bytes.
+
+Superblock locations may differ for MTD and block devices.  On MTD the
+first non-bad block contains a superblock in the first 4096 Bytes and
+the last non-bad block contains a superblock in the last 4096 Bytes.
+On block devices, the first 4096 Bytes of the device contain the first
+superblock and the last aligned 4096 Byte-block contains the second
+superblock.
+
+For the most part, the superblocks can be considered read-only.  They
+are written only to correct errors detected within the superblocks,
+move the journal and change the filesystem parameters through tunefs.
+As a result, the superblock does not contain any fields that require
+constant updates, like the amount of free space, etc.
+
+Segments
+--------
+
+The space in the device is split up into equal-sized segments.
+Segments are the primary write unit of LogFS.  Within each segments,
+writes happen from front (low addresses) to back (high addresses.  If
+only a partial segment has been written, the segment number, the
+current position within and optionally a write buffer are stored in
+the journal.
+
+Segments are erased as a whole.  Therefore Garbage Collection may be
+required to completely free a segment before doing so.
+
+Journal
+--------
+
+The journal contains all global information about the filesystem that
+is subject to frequent change.  At mount time, it has to be scanned
+for the most recent commit entry, which contains a list of pointers to
+all currently valid entries.
+
+Object Store
+------------
+
+All space except for the superblocks and journal is part of the object
+store.  Each segment contains a segment header and a number of
+objects, each consisting of the object header and the payload.
+Objects are either inodes, directory entries (dentries), file data
+blocks or indirect blocks.
+
+Levels
+------
+
+Garbage collection (GC) may fail if all data is written
+indiscriminately.  One requirement of GC is that data is seperated
+roughly according to the distance between the tree root and the data.
+Effectively that means all file data is on level 0, indirect blocks
+are on levels 1, 2, 3 4 or 5 for 1x, 2x, 3x, 4x or 5x indirect blocks,
+respectively.  Inode file data is on level 6 for the inodes and 7-11
+for indirect blocks.
+
+Each segment contains objects of a single level only.  As a result,
+each level requires its own seperate segment to be open for writing.
+
+Inode File
+----------
+
+All inodes are stored in a special file, the inode file.  Single
+exception is the inode file's inode (master inode) which for obvious
+reasons is stored in the journal instead.  Instead of data blocks, the
+leaf nodes of the inode files are inodes.
+
+Aliases
+-------
+
+Writes in LogFS are done by means of a wandering tree.  A naïve
+implementation would require that for each write or a block, all
+parent blocks are written as well, since the block pointers have
+changed.  Such an implementation would not be very efficient.
+
+In LogFS, the block pointer changes are cached in the journal by means
+of alias entries.  Each alias consists of its logical address - inode
+number, block index, level and child number (index into block) - and
+the changed data.  Any 8-byte word can be changes in this manner.
+
+Currently aliases are used for block pointers, file size, file used
+bytes and the height of an inodes indirect tree.
+
+Segment Aliases
+---------------
+
+Related to regular aliases, these are used to handle bad blocks.
+Initially, bad blocks are handled by moving the affected segment
+content to a spare segment and noting this move in the journal with a
+segment alias, a simple (to, from) tupel.  GC will later empty this
+segment and the alias can be removed again.  This is used on MTD only.
+
+Vim
+---
+
+By cleverly predicting the life time of data, it is possible to
+seperate long-living data from short-living data and thereby reduce
+the GC overhead later.  Each type of distinc life expectency (vim) can
+have a seperate segment open for writing.  Each (level, vim) tupel can
+be open just once.  If an open segment with unknown vim is encountered
+at mount time, it is closed and ignored henceforth.
+
+Indirect Tree
+-------------
+
+Inodes in LogFS are similar to FFS-style filesystems with direct and
+indirect block pointers.  One difference is that LogFS uses a single
+indirect pointer that can be either a 1x, 2x, etc. indirect pointer.
+A height field in the inode defines the height of the indirect tree
+and thereby the indirection of the pointer.
+
+Another difference is the addressing of indirect blocks.  In LogFS,
+the first 16 pointers in the first indirect block are left empty,
+corresponding to the 16 direct pointers in the inode.  In ext2 (maybe
+others as well) the first pointer in the first indirect block
+corresponds to logical block 12, skipping the 12 direct pointers.
+So where ext2 is using arithmetic to better utilize space, LogFS keeps
+arithmetic simple and uses compression to save space.
+
+Compression
+-----------
+
+Both file data and metadata can be compressed.  Compression for file
+data can be enabled with chattr +c and disabled with chattr -c.  Doing
+so has no effect on existing data, but new data will be stored
+accordingly.  New inodes will inherit the compression flag of the
+parent directory.
+
+Metadata is always compressed.  However, the space accounting ignores
+this and charges for the uncompressed size.  Failing to do so could
+result in GC failures when, after moving some data, indirect blocks
+compress worse than previously.  Even on a 100% full medium, GC may
+not consume any extra space, so the compression gains are lost space
+to the user.
+
+However, they are not lost space to the filesystem internals.  By
+cheating the user for those bytes, the filesystem gained some slack
+space and GC will run less often and faster.
+
+Garbage Collection and Wear Leveling
+------------------------------------
+
+Garbage collection is invoked whenever the number of free segments
+falls below a threshold.  The best (known) candidate is picked based
+on the least amount of valid data contained in the segment.  All
+remaining valid data is copied elsewhere, thereby invalidating it.
+
+The GC code also checks for aliases and writes then back if their
+number gets too large.
+
+Wear leveling is done by occasionally picking a suboptimal segment for
+garbage collection.  If a stale segments erase count is significantly
+lower than the active segments' erase counts, it will be picked.  Wear
+leveling is rate limited, so it will never monopolize the device for
+more than one segment worth at a time.
+
+Values for "occasionally", "significantly lower" are compile time
+constants.
+
+Hashed directories
+------------------
+
+To satisfy efficient lookup(), directory entries are hashed and
+located based on the hash.  In order to both support large directories
+and not be overly inefficient for small directories, several hash
+tables of increasing size are used.  For each table, the hash value
+modulo the table size gives the table index.
+
+Tables sizes are chosen to limit the number of indirect blocks with a
+fully populated table to 0, 1, 2 or 3 respectively.  So the first
+table contains 16 entries, the second 512-16, etc.
+
+The last table is special in several ways.  First its size depends on
+the effective 32bit limit on telldir/seekdir cookies.  Since logfs
+uses the upper half of the address space for indirect blocks, the size
+is limited to 2^31.  Secondly the table contains hash buckets with 16
+entries each.
+
+Using single-entry buckets would result in birthday "attacks".  At
+just 2^16 used entries, hash collisions would be likely (P >= 0.5).
+My math skills are insufficient to do the combinatorics for the 17x
+collisions necessary to overflow a bucket, but testing showed that in
+10,000 runs the lowest directory fill before a bucket overflow was
+188,057,130 entries with an average of 315,149,915 entries.  So for
+directory sizes of up to a million, bucket overflows should be
+virtually impossible under normal circumstances.
+
+With carefully chosen filenames, it is obviously possible to cause an
+overflow with just 21 entries (4 higher tables + 16 entries + 1).  So
+there may be a security concern if a malicious user has write access
+to a directory.
+
+Open For Discussion
+===================
+
+Device Address Space
+--------------------
+
+A device address space is used for caching.  Both block devices and
+MTD provide functions to either read a single page or write a segment.
+Partial segments may be written for data integrity, but where possible
+complete segments are written for performance on simple block device
+flash media.
+
+Meta Inodes
+-----------
+
+Inodes are stored in the inode file, which is just a regular file for
+most purposes.  At umount time, however, the inode file needs to
+remain open until all dirty inodes are written.  So
+generic_shutdown_super() may not close this inode, but shouldn't
+complain about remaining inodes due to the inode file either.  Same
+goes for mapping inode of the device address space.
+
+Currently logfs uses a hack that essentially copies part of fs/inode.c
+code over.  A general solution would be preferred.
+
+Indirect block mapping
+----------------------
+
+With compression, the block device (or mapping inode) cannot be used
+to cache indirect blocks.  Some other place is required.  Currently
+logfs uses the top half of each inode's address space.  The low 8TB
+(on 32bit) are filled with file data, the high 8TB are used for
+indirect blocks.
+
+One problem is that 16TB files created on 64bit systems actually have
+data in the top 8TB.  But files >16TB would cause problems anyway, so
+only the limit has changed.
diff --git a/fs/Kconfig b/fs/Kconfig
index 64d44efad7a5..7405f071be67 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -177,6 +177,7 @@ source "fs/efs/Kconfig"
 source "fs/jffs2/Kconfig"
 # UBIFS File system configuration
 source "fs/ubifs/Kconfig"
+source "fs/logfs/Kconfig"
 source "fs/cramfs/Kconfig"
 source "fs/squashfs/Kconfig"
 source "fs/freevxfs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index af6d04700d9c..c3633aa46911 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_NTFS_FS)		+= ntfs/
 obj-$(CONFIG_UFS_FS)		+= ufs/
 obj-$(CONFIG_EFS_FS)		+= efs/
 obj-$(CONFIG_JFFS2_FS)		+= jffs2/
+obj-$(CONFIG_LOGFS)		+= logfs/
 obj-$(CONFIG_UBIFS_FS)		+= ubifs/
 obj-$(CONFIG_AFFS_FS)		+= affs/
 obj-$(CONFIG_ROMFS_FS)		+= romfs/
diff --git a/fs/logfs/Kconfig b/fs/logfs/Kconfig
new file mode 100644
index 000000000000..daf9a9b32dd3
--- /dev/null
+++ b/fs/logfs/Kconfig
@@ -0,0 +1,17 @@
+config LOGFS
+	tristate "LogFS file system (EXPERIMENTAL)"
+	depends on (MTD || BLOCK) && EXPERIMENTAL
+	select ZLIB_INFLATE
+	select ZLIB_DEFLATE
+	select CRC32
+	select BTREE
+	help
+	  Flash filesystem aimed to scale efficiently to large devices.
+	  In comparison to JFFS2 it offers significantly faster mount
+	  times and potentially less RAM usage, although the latter has
+	  not been measured yet.
+
+	  In its current state it is still very experimental and should
+	  not be used for other than testing purposes.
+
+	  If unsure, say N.
diff --git a/fs/logfs/Makefile b/fs/logfs/Makefile
new file mode 100644
index 000000000000..4820027787ee
--- /dev/null
+++ b/fs/logfs/Makefile
@@ -0,0 +1,13 @@
+obj-$(CONFIG_LOGFS)	+= logfs.o
+
+logfs-y	+= compr.o
+logfs-y	+= dir.o
+logfs-y	+= file.o
+logfs-y	+= gc.o
+logfs-y	+= inode.o
+logfs-y	+= journal.o
+logfs-y	+= readwrite.o
+logfs-y	+= segment.o
+logfs-y	+= super.o
+logfs-$(CONFIG_BLOCK)	+= dev_bdev.o
+logfs-$(CONFIG_MTD)	+= dev_mtd.o
diff --git a/fs/logfs/compr.c b/fs/logfs/compr.c
new file mode 100644
index 000000000000..44bbfd249abc
--- /dev/null
+++ b/fs/logfs/compr.c
@@ -0,0 +1,95 @@
+/*
+ * fs/logfs/compr.c	- compression routines
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ */
+#include "logfs.h"
+#include <linux/vmalloc.h>
+#include <linux/zlib.h>
+
+#define COMPR_LEVEL 3
+
+static DEFINE_MUTEX(compr_mutex);
+static struct z_stream_s stream;
+
+int logfs_compress(void *in, void *out, size_t inlen, size_t outlen)
+{
+	int err, ret;
+
+	ret = -EIO;
+	mutex_lock(&compr_mutex);
+	err = zlib_deflateInit(&stream, COMPR_LEVEL);
+	if (err != Z_OK)
+		goto error;
+
+	stream.next_in = in;
+	stream.avail_in = inlen;
+	stream.total_in = 0;
+	stream.next_out = out;
+	stream.avail_out = outlen;
+	stream.total_out = 0;
+
+	err = zlib_deflate(&stream, Z_FINISH);
+	if (err != Z_STREAM_END)
+		goto error;
+
+	err = zlib_deflateEnd(&stream);
+	if (err != Z_OK)
+		goto error;
+
+	if (stream.total_out >= stream.total_in)
+		goto error;
+
+	ret = stream.total_out;
+error:
+	mutex_unlock(&compr_mutex);
+	return ret;
+}
+
+int logfs_uncompress(void *in, void *out, size_t inlen, size_t outlen)
+{
+	int err, ret;
+
+	ret = -EIO;
+	mutex_lock(&compr_mutex);
+	err = zlib_inflateInit(&stream);
+	if (err != Z_OK)
+		goto error;
+
+	stream.next_in = in;
+	stream.avail_in = inlen;
+	stream.total_in = 0;
+	stream.next_out = out;
+	stream.avail_out = outlen;
+	stream.total_out = 0;
+
+	err = zlib_inflate(&stream, Z_FINISH);
+	if (err != Z_STREAM_END)
+		goto error;
+
+	err = zlib_inflateEnd(&stream);
+	if (err != Z_OK)
+		goto error;
+
+	ret = 0;
+error:
+	mutex_unlock(&compr_mutex);
+	return ret;
+}
+
+int __init logfs_compr_init(void)
+{
+	size_t size = max(zlib_deflate_workspacesize(),
+			zlib_inflate_workspacesize());
+	stream.workspace = vmalloc(size);
+	if (!stream.workspace)
+		return -ENOMEM;
+	return 0;
+}
+
+void logfs_compr_exit(void)
+{
+	vfree(stream.workspace);
+}
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
new file mode 100644
index 000000000000..58a057b6e1af
--- /dev/null
+++ b/fs/logfs/dev_bdev.c
@@ -0,0 +1,263 @@
+/*
+ * fs/logfs/dev_bdev.c	- Device access methods for block devices
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ */
+#include "logfs.h"
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+
+#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
+
+static void request_complete(struct bio *bio, int err)
+{
+	complete((struct completion *)bio->bi_private);
+}
+
+static int sync_request(struct page *page, struct block_device *bdev, int rw)
+{
+	struct bio bio;
+	struct bio_vec bio_vec;
+	struct completion complete;
+
+	bio_init(&bio);
+	bio.bi_io_vec = &bio_vec;
+	bio_vec.bv_page = page;
+	bio_vec.bv_len = PAGE_SIZE;
+	bio_vec.bv_offset = 0;
+	bio.bi_vcnt = 1;
+	bio.bi_idx = 0;
+	bio.bi_size = PAGE_SIZE;
+	bio.bi_bdev = bdev;
+	bio.bi_sector = page->index * (PAGE_SIZE >> 9);
+	init_completion(&complete);
+	bio.bi_private = &complete;
+	bio.bi_end_io = request_complete;
+
+	submit_bio(rw, &bio);
+	generic_unplug_device(bdev_get_queue(bdev));
+	wait_for_completion(&complete);
+	return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO;
+}
+
+static int bdev_readpage(void *_sb, struct page *page)
+{
+	struct super_block *sb = _sb;
+	struct block_device *bdev = logfs_super(sb)->s_bdev;
+	int err;
+
+	err = sync_request(page, bdev, READ);
+	if (err) {
+		ClearPageUptodate(page);
+		SetPageError(page);
+	} else {
+		SetPageUptodate(page);
+		ClearPageError(page);
+	}
+	unlock_page(page);
+	return err;
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(wq);
+
+static void writeseg_end_io(struct bio *bio, int err)
+{
+	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+	struct super_block *sb = bio->bi_private;
+	struct logfs_super *super = logfs_super(sb);
+	struct page *page;
+
+	BUG_ON(!uptodate); /* FIXME: Retry io or write elsewhere */
+	BUG_ON(err);
+	BUG_ON(bio->bi_vcnt == 0);
+	do {
+		page = bvec->bv_page;
+		if (--bvec >= bio->bi_io_vec)
+			prefetchw(&bvec->bv_page->flags);
+
+		end_page_writeback(page);
+	} while (bvec >= bio->bi_io_vec);
+	bio_put(bio);
+	if (atomic_dec_and_test(&super->s_pending_writes))
+		wake_up(&wq);
+}
+
+static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
+		size_t nr_pages)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	struct bio *bio;
+	struct page *page;
+	struct request_queue *q = bdev_get_queue(sb->s_bdev);
+	unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
+	int i;
+
+	bio = bio_alloc(GFP_NOFS, max_pages);
+	BUG_ON(!bio); /* FIXME: handle this */
+
+	for (i = 0; i < nr_pages; i++) {
+		if (i >= max_pages) {
+			/* Block layer cannot split bios :( */
+			bio->bi_vcnt = i;
+			bio->bi_idx = 0;
+			bio->bi_size = i * PAGE_SIZE;
+			bio->bi_bdev = super->s_bdev;
+			bio->bi_sector = ofs >> 9;
+			bio->bi_private = sb;
+			bio->bi_end_io = writeseg_end_io;
+			atomic_inc(&super->s_pending_writes);
+			submit_bio(WRITE, bio);
+
+			ofs += i * PAGE_SIZE;
+			index += i;
+			nr_pages -= i;
+			i = 0;
+
+			bio = bio_alloc(GFP_NOFS, max_pages);
+			BUG_ON(!bio);
+		}
+		page = find_lock_page(mapping, index + i);
+		BUG_ON(!page);
+		bio->bi_io_vec[i].bv_page = page;
+		bio->bi_io_vec[i].bv_len = PAGE_SIZE;
+		bio->bi_io_vec[i].bv_offset = 0;
+
+		BUG_ON(PageWriteback(page));
+		set_page_writeback(page);
+		unlock_page(page);
+	}
+	bio->bi_vcnt = nr_pages;
+	bio->bi_idx = 0;
+	bio->bi_size = nr_pages * PAGE_SIZE;
+	bio->bi_bdev = super->s_bdev;
+	bio->bi_sector = ofs >> 9;
+	bio->bi_private = sb;
+	bio->bi_end_io = writeseg_end_io;
+	atomic_inc(&super->s_pending_writes);
+	submit_bio(WRITE, bio);
+	return 0;
+}
+
+static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int head;
+
+	BUG_ON(super->s_flags & LOGFS_SB_FLAG_RO);
+
+	if (len == 0) {
+		/* This can happen when the object fit perfectly into a
+		 * segment, the segment gets written per sync and subsequently
+		 * closed.
+		 */
+		return;
+	}
+	head = ofs & (PAGE_SIZE - 1);
+	if (head) {
+		ofs -= head;
+		len += head;
+	}
+	len = PAGE_ALIGN(len);
+	__bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
+	generic_unplug_device(bdev_get_queue(logfs_super(sb)->s_bdev));
+}
+
+static int bdev_erase(struct super_block *sb, loff_t to, size_t len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	struct page *page;
+	pgoff_t index = to >> PAGE_SHIFT;
+	int i, nr_pages = len >> PAGE_SHIFT;
+
+	BUG_ON(to & (PAGE_SIZE - 1));
+	BUG_ON(len & (PAGE_SIZE - 1));
+
+	if (logfs_super(sb)->s_flags & LOGFS_SB_FLAG_RO)
+		return -EROFS;
+
+	for (i = 0; i < nr_pages; i++) {
+		page = find_get_page(mapping, index + i);
+		if (page) {
+			memset(page_address(page), 0xFF, PAGE_SIZE);
+			page_cache_release(page);
+		}
+	}
+	return 0;
+}
+
+static void bdev_sync(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	wait_event(wq, atomic_read(&super->s_pending_writes) == 0);
+}
+
+static struct page *bdev_find_first_sb(struct super_block *sb, u64 *ofs)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	filler_t *filler = bdev_readpage;
+
+	*ofs = 0;
+	return read_cache_page(mapping, 0, filler, sb);
+}
+
+static struct page *bdev_find_last_sb(struct super_block *sb, u64 *ofs)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	filler_t *filler = bdev_readpage;
+	u64 pos = (super->s_bdev->bd_inode->i_size & ~0xfffULL) - 0x1000;
+	pgoff_t index = pos >> PAGE_SHIFT;
+
+	*ofs = pos;
+	return read_cache_page(mapping, index, filler, sb);
+}
+
+static int bdev_write_sb(struct super_block *sb, struct page *page)
+{
+	struct block_device *bdev = logfs_super(sb)->s_bdev;
+
+	/* Nothing special to do for block devices. */
+	return sync_request(page, bdev, WRITE);
+}
+
+static void bdev_put_device(struct super_block *sb)
+{
+	close_bdev_exclusive(logfs_super(sb)->s_bdev, FMODE_READ|FMODE_WRITE);
+}
+
+static const struct logfs_device_ops bd_devops = {
+	.find_first_sb	= bdev_find_first_sb,
+	.find_last_sb	= bdev_find_last_sb,
+	.write_sb	= bdev_write_sb,
+	.readpage	= bdev_readpage,
+	.writeseg	= bdev_writeseg,
+	.erase		= bdev_erase,
+	.sync		= bdev_sync,
+	.put_device	= bdev_put_device,
+};
+
+int logfs_get_sb_bdev(struct file_system_type *type, int flags,
+		const char *devname, struct vfsmount *mnt)
+{
+	struct block_device *bdev;
+
+	bdev = open_bdev_exclusive(devname, FMODE_READ|FMODE_WRITE, type);
+	if (IS_ERR(bdev))
+		return PTR_ERR(bdev);
+
+	if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
+		int mtdnr = MINOR(bdev->bd_dev);
+		close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
+		return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
+	}
+
+	return logfs_get_sb_device(type, flags, NULL, bdev, &bd_devops, mnt);
+}
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
new file mode 100644
index 000000000000..68e99d046c23
--- /dev/null
+++ b/fs/logfs/dev_mtd.c
@@ -0,0 +1,253 @@
+/*
+ * fs/logfs/dev_mtd.c	- Device access methods for MTD
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ */
+#include "logfs.h"
+#include <linux/completion.h>
+#include <linux/mount.h>
+#include <linux/sched.h>
+
+#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
+
+static int mtd_read(struct super_block *sb, loff_t ofs, size_t len, void *buf)
+{
+	struct mtd_info *mtd = logfs_super(sb)->s_mtd;
+	size_t retlen;
+	int ret;
+
+	ret = mtd->read(mtd, ofs, len, &retlen, buf);
+	BUG_ON(ret == -EINVAL);
+	if (ret)
+		return ret;
+
+	/* Not sure if we should loop instead. */
+	if (retlen != len)
+		return -EIO;
+
+	return 0;
+}
+
+static int mtd_write(struct super_block *sb, loff_t ofs, size_t len, void *buf)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct mtd_info *mtd = super->s_mtd;
+	size_t retlen;
+	loff_t page_start, page_end;
+	int ret;
+
+	if (super->s_flags & LOGFS_SB_FLAG_RO)
+		return -EROFS;
+
+	BUG_ON((ofs >= mtd->size) || (len > mtd->size - ofs));
+	BUG_ON(ofs != (ofs >> super->s_writeshift) << super->s_writeshift);
+	BUG_ON(len > PAGE_CACHE_SIZE);
+	page_start = ofs & PAGE_CACHE_MASK;
+	page_end = PAGE_CACHE_ALIGN(ofs + len) - 1;
+	ret = mtd->write(mtd, ofs, len, &retlen, buf);
+	if (ret || (retlen != len))
+		return -EIO;
+
+	return 0;
+}
+
+/*
+ * For as long as I can remember (since about 2001) mtd->erase has been an
+ * asynchronous interface lacking the first driver to actually use the
+ * asynchronous properties.  So just to prevent the first implementor of such
+ * a thing from breaking logfs in 2350, we do the usual pointless dance to
+ * declare a completion variable and wait for completion before returning
+ * from mtd_erase().  What an excercise in futility!
+ */
+static void logfs_erase_callback(struct erase_info *ei)
+{
+	complete((struct completion *)ei->priv);
+}
+
+static int mtd_erase_mapping(struct super_block *sb, loff_t ofs, size_t len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	struct page *page;
+	pgoff_t index = ofs >> PAGE_SHIFT;
+
+	for (index = ofs >> PAGE_SHIFT; index < (ofs + len) >> PAGE_SHIFT; index++) {
+		page = find_get_page(mapping, index);
+		if (!page)
+			continue;
+		memset(page_address(page), 0xFF, PAGE_SIZE);
+		page_cache_release(page);
+	}
+	return 0;
+}
+
+static int mtd_erase(struct super_block *sb, loff_t ofs, size_t len)
+{
+	struct mtd_info *mtd = logfs_super(sb)->s_mtd;
+	struct erase_info ei;
+	DECLARE_COMPLETION_ONSTACK(complete);
+	int ret;
+
+	BUG_ON(len % mtd->erasesize);
+	if (logfs_super(sb)->s_flags & LOGFS_SB_FLAG_RO)
+		return -EROFS;
+
+	memset(&ei, 0, sizeof(ei));
+	ei.mtd = mtd;
+	ei.addr = ofs;
+	ei.len = len;
+	ei.callback = logfs_erase_callback;
+	ei.priv = (long)&complete;
+	ret = mtd->erase(mtd, &ei);
+	if (ret)
+		return -EIO;
+
+	wait_for_completion(&complete);
+	if (ei.state != MTD_ERASE_DONE)
+		return -EIO;
+	return mtd_erase_mapping(sb, ofs, len);
+}
+
+static void mtd_sync(struct super_block *sb)
+{
+	struct mtd_info *mtd = logfs_super(sb)->s_mtd;
+
+	if (mtd->sync)
+		mtd->sync(mtd);
+}
+
+static int mtd_readpage(void *_sb, struct page *page)
+{
+	struct super_block *sb = _sb;
+	int err;
+
+	err = mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE,
+			page_address(page));
+	if (err == -EUCLEAN) {
+		err = 0;
+		/* FIXME: force GC this segment */
+	}
+	if (err) {
+		ClearPageUptodate(page);
+		SetPageError(page);
+	} else {
+		SetPageUptodate(page);
+		ClearPageError(page);
+	}
+	unlock_page(page);
+	return err;
+}
+
+static struct page *mtd_find_first_sb(struct super_block *sb, u64 *ofs)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	filler_t *filler = mtd_readpage;
+	struct mtd_info *mtd = super->s_mtd;
+
+	if (!mtd->block_isbad)
+		return NULL;
+
+	*ofs = 0;
+	while (mtd->block_isbad(mtd, *ofs)) {
+		*ofs += mtd->erasesize;
+		if (*ofs >= mtd->size)
+			return NULL;
+	}
+	BUG_ON(*ofs & ~PAGE_MASK);
+	return read_cache_page(mapping, *ofs >> PAGE_SHIFT, filler, sb);
+}
+
+static struct page *mtd_find_last_sb(struct super_block *sb, u64 *ofs)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	filler_t *filler = mtd_readpage;
+	struct mtd_info *mtd = super->s_mtd;
+
+	if (!mtd->block_isbad)
+		return NULL;
+
+	*ofs = mtd->size - mtd->erasesize;
+	while (mtd->block_isbad(mtd, *ofs)) {
+		*ofs -= mtd->erasesize;
+		if (*ofs <= 0)
+			return NULL;
+	}
+	*ofs = *ofs + mtd->erasesize - 0x1000;
+	BUG_ON(*ofs & ~PAGE_MASK);
+	return read_cache_page(mapping, *ofs >> PAGE_SHIFT, filler, sb);
+}
+
+static int __mtd_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
+		size_t nr_pages)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	struct page *page;
+	int i, err;
+
+	for (i = 0; i < nr_pages; i++) {
+		page = find_lock_page(mapping, index + i);
+		BUG_ON(!page);
+
+		err = mtd_write(sb, page->index << PAGE_SHIFT, PAGE_SIZE,
+				page_address(page));
+		unlock_page(page);
+		page_cache_release(page);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static void mtd_writeseg(struct super_block *sb, u64 ofs, size_t len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int head;
+
+	if (super->s_flags & LOGFS_SB_FLAG_RO)
+		return;
+
+	if (len == 0) {
+		/* This can happen when the object fit perfectly into a
+		 * segment, the segment gets written per sync and subsequently
+		 * closed.
+		 */
+		return;
+	}
+	head = ofs & (PAGE_SIZE - 1);
+	if (head) {
+		ofs -= head;
+		len += head;
+	}
+	len = PAGE_ALIGN(len);
+	__mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
+}
+
+static void mtd_put_device(struct super_block *sb)
+{
+	put_mtd_device(logfs_super(sb)->s_mtd);
+}
+
+static const struct logfs_device_ops mtd_devops = {
+	.find_first_sb	= mtd_find_first_sb,
+	.find_last_sb	= mtd_find_last_sb,
+	.readpage	= mtd_readpage,
+	.writeseg	= mtd_writeseg,
+	.erase		= mtd_erase,
+	.sync		= mtd_sync,
+	.put_device	= mtd_put_device,
+};
+
+int logfs_get_sb_mtd(struct file_system_type *type, int flags,
+		int mtdnr, struct vfsmount *mnt)
+{
+	struct mtd_info *mtd;
+	const struct logfs_device_ops *devops = &mtd_devops;
+
+	mtd = get_mtd_device(NULL, mtdnr);
+	return logfs_get_sb_device(type, flags, mtd, NULL, devops, mnt);
+}
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
new file mode 100644
index 000000000000..89104e6f81c4
--- /dev/null
+++ b/fs/logfs/dir.c
@@ -0,0 +1,818 @@
+/*
+ * fs/logfs/dir.c	- directory-related code
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ */
+#include "logfs.h"
+
+
+/*
+ * Atomic dir operations
+ *
+ * Directory operations are by default not atomic.  Dentries and Inodes are
+ * created/removed/altered in seperate operations.  Therefore we need to do
+ * a small amount of journaling.
+ *
+ * Create, link, mkdir, mknod and symlink all share the same function to do
+ * the work: __logfs_create.  This function works in two atomic steps:
+ * 1. allocate inode (remember in journal)
+ * 2. allocate dentry (clear journal)
+ *
+ * As we can only get interrupted between the two, when the inode we just
+ * created is simply stored in the anchor.  On next mount, if we were
+ * interrupted, we delete the inode.  From a users point of view the
+ * operation never happened.
+ *
+ * Unlink and rmdir also share the same function: unlink.  Again, this
+ * function works in two atomic steps
+ * 1. remove dentry (remember inode in journal)
+ * 2. unlink inode (clear journal)
+ *
+ * And again, on the next mount, if we were interrupted, we delete the inode.
+ * From a users point of view the operation succeeded.
+ *
+ * Rename is the real pain to deal with, harder than all the other methods
+ * combined.  Depending on the circumstances we can run into three cases.
+ * A "target rename" where the target dentry already existed, a "local
+ * rename" where both parent directories are identical or a "cross-directory
+ * rename" in the remaining case.
+ *
+ * Local rename is atomic, as the old dentry is simply rewritten with a new
+ * name.
+ *
+ * Cross-directory rename works in two steps, similar to __logfs_create and
+ * logfs_unlink:
+ * 1. Write new dentry (remember old dentry in journal)
+ * 2. Remove old dentry (clear journal)
+ *
+ * Here we remember a dentry instead of an inode.  On next mount, if we were
+ * interrupted, we delete the dentry.  From a users point of view, the
+ * operation succeeded.
+ *
+ * Target rename works in three atomic steps:
+ * 1. Attach old inode to new dentry (remember old dentry and new inode)
+ * 2. Remove old dentry (still remember the new inode)
+ * 3. Remove victim inode
+ *
+ * Here we remember both an inode an a dentry.  If we get interrupted
+ * between steps 1 and 2, we delete both the dentry and the inode.  If
+ * we get interrupted between steps 2 and 3, we delete just the inode.
+ * In either case, the remaining objects are deleted on next mount.  From
+ * a users point of view, the operation succeeded.
+ */
+
+static int write_dir(struct inode *dir, struct logfs_disk_dentry *dd,
+		loff_t pos)
+{
+	return logfs_inode_write(dir, dd, sizeof(*dd), pos, WF_LOCK, NULL);
+}
+
+static int write_inode(struct inode *inode)
+{
+	return __logfs_write_inode(inode, WF_LOCK);
+}
+
+static s64 dir_seek_data(struct inode *inode, s64 pos)
+{
+	s64 new_pos = logfs_seek_data(inode, pos);
+
+	return max(pos, new_pos - 1);
+}
+
+static int beyond_eof(struct inode *inode, loff_t bix)
+{
+	loff_t pos = bix << inode->i_sb->s_blocksize_bits;
+	return pos >= i_size_read(inode);
+}
+
+/*
+ * Prime value was chosen to be roughly 256 + 26.  r5 hash uses 11,
+ * so short names (len <= 9) don't even occupy the complete 32bit name
+ * space.  A prime >256 ensures short names quickly spread the 32bit
+ * name space.  Add about 26 for the estimated amount of information
+ * of each character and pick a prime nearby, preferrably a bit-sparse
+ * one.
+ */
+static u32 hash_32(const char *s, int len, u32 seed)
+{
+	u32 hash = seed;
+	int i;
+
+	for (i = 0; i < len; i++)
+		hash = hash * 293 + s[i];
+	return hash;
+}
+
+/*
+ * We have to satisfy several conflicting requirements here.  Small
+ * directories should stay fairly compact and not require too many
+ * indirect blocks.  The number of possible locations for a given hash
+ * should be small to make lookup() fast.  And we should try hard not
+ * to overflow the 32bit name space or nfs and 32bit host systems will
+ * be unhappy.
+ *
+ * So we use the following scheme.  First we reduce the hash to 0..15
+ * and try a direct block.  If that is occupied we reduce the hash to
+ * 16..255 and try an indirect block.  Same for 2x and 3x indirect
+ * blocks.  Lastly we reduce the hash to 0x800_0000 .. 0xffff_ffff,
+ * but use buckets containing eight entries instead of a single one.
+ *
+ * Using 16 entries should allow for a reasonable amount of hash
+ * collisions, so the 32bit name space can be packed fairly tight
+ * before overflowing.  Oh and currently we don't overflow but return
+ * and error.
+ *
+ * How likely are collisions?  Doing the appropriate math is beyond me
+ * and the Bronstein textbook.  But running a test program to brute
+ * force collisions for a couple of days showed that on average the
+ * first collision occurs after 598M entries, with 290M being the
+ * smallest result.  Obviously 21 entries could already cause a
+ * collision if all entries are carefully chosen.
+ */
+static pgoff_t hash_index(u32 hash, int round)
+{
+	switch (round) {
+	case 0:
+		return hash % I0_BLOCKS;
+	case 1:
+		return I0_BLOCKS + hash % (I1_BLOCKS - I0_BLOCKS);
+	case 2:
+		return I1_BLOCKS + hash % (I2_BLOCKS - I1_BLOCKS);
+	case 3:
+		return I2_BLOCKS + hash % (I3_BLOCKS - I2_BLOCKS);
+	case 4 ... 19:
+		return I3_BLOCKS + 16 * (hash % (((1<<31) - I3_BLOCKS) / 16))
+			+ round - 4;
+	}
+	BUG();
+}
+
+static struct page *logfs_get_dd_page(struct inode *dir, struct dentry *dentry)
+{
+	struct qstr *name = &dentry->d_name;
+	struct page *page;
+	struct logfs_disk_dentry *dd;
+	u32 hash = hash_32(name->name, name->len, 0);
+	pgoff_t index;
+	int round;
+
+	if (name->len > LOGFS_MAX_NAMELEN)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	for (round = 0; round < 20; round++) {
+		index = hash_index(hash, round);
+
+		if (beyond_eof(dir, index))
+			return NULL;
+		if (!logfs_exist_block(dir, index))
+			continue;
+		page = read_cache_page(dir->i_mapping, index,
+				(filler_t *)logfs_readpage, NULL);
+		if (IS_ERR(page))
+			return page;
+		dd = kmap_atomic(page, KM_USER0);
+		BUG_ON(dd->namelen == 0);
+
+		if (name->len != be16_to_cpu(dd->namelen) ||
+				memcmp(name->name, dd->name, name->len)) {
+			kunmap_atomic(dd, KM_USER0);
+			page_cache_release(page);
+			continue;
+		}
+
+		kunmap_atomic(dd, KM_USER0);
+		return page;
+	}
+	return NULL;
+}
+
+static int logfs_remove_inode(struct inode *inode)
+{
+	int ret;
+
+	inode->i_nlink--;
+	ret = write_inode(inode);
+	LOGFS_BUG_ON(ret, inode->i_sb);
+	return ret;
+}
+
+static void abort_transaction(struct inode *inode, struct logfs_transaction *ta)
+{
+	if (logfs_inode(inode)->li_block)
+		logfs_inode(inode)->li_block->ta = NULL;
+	kfree(ta);
+}
+
+static int logfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct logfs_super *super = logfs_super(dir->i_sb);
+	struct inode *inode = dentry->d_inode;
+	struct logfs_transaction *ta;
+	struct page *page;
+	pgoff_t index;
+	int ret;
+
+	ta = kzalloc(sizeof(*ta), GFP_KERNEL);
+	if (!ta)
+		return -ENOMEM;
+
+	ta->state = UNLINK_1;
+	ta->ino = inode->i_ino;
+
+	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+
+	page = logfs_get_dd_page(dir, dentry);
+	if (!page)
+		return -ENOENT;
+	if (IS_ERR(page))
+		return PTR_ERR(page);
+	index = page->index;
+	page_cache_release(page);
+
+	mutex_lock(&super->s_dirop_mutex);
+	logfs_add_transaction(dir, ta);
+
+	ret = logfs_delete(dir, index, NULL);
+	if (!ret)
+		ret = write_inode(dir);
+
+	if (ret) {
+		abort_transaction(dir, ta);
+		printk(KERN_ERR"LOGFS: unable to delete inode\n");
+		goto out;
+	}
+
+	ta->state = UNLINK_2;
+	logfs_add_transaction(inode, ta);
+	ret = logfs_remove_inode(inode);
+out:
+	mutex_unlock(&super->s_dirop_mutex);
+	return ret;
+}
+
+static inline int logfs_empty_dir(struct inode *dir)
+{
+	u64 data;
+
+	data = logfs_seek_data(dir, 0) << dir->i_sb->s_blocksize_bits;
+	return data >= i_size_read(dir);
+}
+
+static int logfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+
+	if (!logfs_empty_dir(inode))
+		return -ENOTEMPTY;
+
+	return logfs_unlink(dir, dentry);
+}
+
+/* FIXME: readdir currently has it's own dir_walk code.  I don't see a good
+ * way to combine the two copies */
+#define IMPLICIT_NODES 2
+static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir)
+{
+	struct inode *dir = file->f_dentry->d_inode;
+	loff_t pos = file->f_pos - IMPLICIT_NODES;
+	struct page *page;
+	struct logfs_disk_dentry *dd;
+	int full;
+
+	BUG_ON(pos < 0);
+	for (;; pos++) {
+		if (beyond_eof(dir, pos))
+			break;
+		if (!logfs_exist_block(dir, pos)) {
+			/* deleted dentry */
+			pos = dir_seek_data(dir, pos);
+			continue;
+		}
+		page = read_cache_page(dir->i_mapping, pos,
+				(filler_t *)logfs_readpage, NULL);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+		dd = kmap_atomic(page, KM_USER0);
+		BUG_ON(dd->namelen == 0);
+
+		full = filldir(buf, (char *)dd->name, be16_to_cpu(dd->namelen),
+				pos, be64_to_cpu(dd->ino), dd->type);
+		kunmap_atomic(dd, KM_USER0);
+		page_cache_release(page);
+		if (full)
+			break;
+	}
+
+	file->f_pos = pos + IMPLICIT_NODES;
+	return 0;
+}
+
+static int logfs_readdir(struct file *file, void *buf, filldir_t filldir)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	ino_t pino = parent_ino(file->f_dentry);
+	int err;
+
+	if (file->f_pos < 0)
+		return -EINVAL;
+
+	if (file->f_pos == 0) {
+		if (filldir(buf, ".", 1, 1, inode->i_ino, DT_DIR) < 0)
+			return 0;
+		file->f_pos++;
+	}
+	if (file->f_pos == 1) {
+		if (filldir(buf, "..", 2, 2, pino, DT_DIR) < 0)
+			return 0;
+		file->f_pos++;
+	}
+
+	err = __logfs_readdir(file, buf, filldir);
+	return err;
+}
+
+static void logfs_set_name(struct logfs_disk_dentry *dd, struct qstr *name)
+{
+	dd->namelen = cpu_to_be16(name->len);
+	memcpy(dd->name, name->name, name->len);
+}
+
+static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry,
+		struct nameidata *nd)
+{
+	struct page *page;
+	struct logfs_disk_dentry *dd;
+	pgoff_t index;
+	u64 ino = 0;
+	struct inode *inode;
+
+	page = logfs_get_dd_page(dir, dentry);
+	if (IS_ERR(page))
+		return ERR_CAST(page);
+	if (!page) {
+		d_add(dentry, NULL);
+		return NULL;
+	}
+	index = page->index;
+	dd = kmap_atomic(page, KM_USER0);
+	ino = be64_to_cpu(dd->ino);
+	kunmap_atomic(dd, KM_USER0);
+	page_cache_release(page);
+
+	inode = logfs_iget(dir->i_sb, ino);
+	if (IS_ERR(inode)) {
+		printk(KERN_ERR"LogFS: Cannot read inode #%llx for dentry (%lx, %lx)n",
+				ino, dir->i_ino, index);
+		return ERR_CAST(inode);
+	}
+	return d_splice_alias(inode, dentry);
+}
+
+static void grow_dir(struct inode *dir, loff_t index)
+{
+	index = (index + 1) << dir->i_sb->s_blocksize_bits;
+	if (i_size_read(dir) < index)
+		i_size_write(dir, index);
+}
+
+static int logfs_write_dir(struct inode *dir, struct dentry *dentry,
+		struct inode *inode)
+{
+	struct page *page;
+	struct logfs_disk_dentry *dd;
+	u32 hash = hash_32(dentry->d_name.name, dentry->d_name.len, 0);
+	pgoff_t index;
+	int round, err;
+
+	for (round = 0; round < 20; round++) {
+		index = hash_index(hash, round);
+
+		if (logfs_exist_block(dir, index))
+			continue;
+		page = find_or_create_page(dir->i_mapping, index, GFP_KERNEL);
+		if (!page)
+			return -ENOMEM;
+
+		dd = kmap_atomic(page, KM_USER0);
+		memset(dd, 0, sizeof(*dd));
+		dd->ino = cpu_to_be64(inode->i_ino);
+		dd->type = logfs_type(inode);
+		logfs_set_name(dd, &dentry->d_name);
+		kunmap_atomic(dd, KM_USER0);
+
+		err = logfs_write_buf(dir, page, WF_LOCK);
+		unlock_page(page);
+		page_cache_release(page);
+		if (!err)
+			grow_dir(dir, index);
+		return err;
+	}
+	/* FIXME: Is there a better return value?  In most cases neither
+	 * the filesystem nor the directory are full.  But we have had
+	 * too many collisions for this particular hash and no fallback.
+	 */
+	return -ENOSPC;
+}
+
+static int __logfs_create(struct inode *dir, struct dentry *dentry,
+		struct inode *inode, const char *dest, long destlen)
+{
+	struct logfs_super *super = logfs_super(dir->i_sb);
+	struct logfs_inode *li = logfs_inode(inode);
+	struct logfs_transaction *ta;
+	int ret;
+
+	ta = kzalloc(sizeof(*ta), GFP_KERNEL);
+	if (!ta)
+		return -ENOMEM;
+
+	ta->state = CREATE_1;
+	ta->ino = inode->i_ino;
+	mutex_lock(&super->s_dirop_mutex);
+	logfs_add_transaction(inode, ta);
+
+	if (dest) {
+		/* symlink */
+		ret = logfs_inode_write(inode, dest, destlen, 0, WF_LOCK, NULL);
+		if (!ret)
+			ret = write_inode(inode);
+	} else {
+		/* creat/mkdir/mknod */
+		ret = write_inode(inode);
+	}
+	if (ret) {
+		abort_transaction(inode, ta);
+		li->li_flags |= LOGFS_IF_STILLBORN;
+		/* FIXME: truncate symlink */
+		inode->i_nlink--;
+		iput(inode);
+		goto out;
+	}
+
+	ta->state = CREATE_2;
+	logfs_add_transaction(dir, ta);
+	ret = logfs_write_dir(dir, dentry, inode);
+	/* sync directory */
+	if (!ret)
+		ret = write_inode(dir);
+
+	if (ret) {
+		logfs_del_transaction(dir, ta);
+		ta->state = CREATE_2;
+		logfs_add_transaction(inode, ta);
+		logfs_remove_inode(inode);
+		iput(inode);
+		goto out;
+	}
+	d_instantiate(dentry, inode);
+out:
+	mutex_unlock(&super->s_dirop_mutex);
+	return ret;
+}
+
+static int logfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	struct inode *inode;
+
+	/*
+	 * FIXME: why do we have to fill in S_IFDIR, while the mode is
+	 * correct for mknod, creat, etc.?  Smells like the vfs *should*
+	 * do it for us but for some reason fails to do so.
+	 */
+	inode = logfs_new_inode(dir, S_IFDIR | mode);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &logfs_dir_iops;
+	inode->i_fop = &logfs_dir_fops;
+
+	return __logfs_create(dir, dentry, inode, NULL, 0);
+}
+
+static int logfs_create(struct inode *dir, struct dentry *dentry, int mode,
+		struct nameidata *nd)
+{
+	struct inode *inode;
+
+	inode = logfs_new_inode(dir, mode);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &logfs_reg_iops;
+	inode->i_fop = &logfs_reg_fops;
+	inode->i_mapping->a_ops = &logfs_reg_aops;
+
+	return __logfs_create(dir, dentry, inode, NULL, 0);
+}
+
+static int logfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
+		dev_t rdev)
+{
+	struct inode *inode;
+
+	if (dentry->d_name.len > LOGFS_MAX_NAMELEN)
+		return -ENAMETOOLONG;
+
+	inode = logfs_new_inode(dir, mode);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	init_special_inode(inode, mode, rdev);
+
+	return __logfs_create(dir, dentry, inode, NULL, 0);
+}
+
+static int logfs_symlink(struct inode *dir, struct dentry *dentry,
+		const char *target)
+{
+	struct inode *inode;
+	size_t destlen = strlen(target) + 1;
+
+	if (destlen > dir->i_sb->s_blocksize)
+		return -ENAMETOOLONG;
+
+	inode = logfs_new_inode(dir, S_IFLNK | 0777);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &logfs_symlink_iops;
+	inode->i_mapping->a_ops = &logfs_reg_aops;
+
+	return __logfs_create(dir, dentry, inode, target, destlen);
+}
+
+static int logfs_permission(struct inode *inode, int mask)
+{
+	return generic_permission(inode, mask, NULL);
+}
+
+static int logfs_link(struct dentry *old_dentry, struct inode *dir,
+		struct dentry *dentry)
+{
+	struct inode *inode = old_dentry->d_inode;
+
+	if (inode->i_nlink >= LOGFS_LINK_MAX)
+		return -EMLINK;
+
+	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+	atomic_inc(&inode->i_count);
+	inode->i_nlink++;
+	mark_inode_dirty_sync(inode);
+
+	return __logfs_create(dir, dentry, inode, NULL, 0);
+}
+
+static int logfs_get_dd(struct inode *dir, struct dentry *dentry,
+		struct logfs_disk_dentry *dd, loff_t *pos)
+{
+	struct page *page;
+	void *map;
+
+	page = logfs_get_dd_page(dir, dentry);
+	if (IS_ERR(page))
+		return PTR_ERR(page);
+	*pos = page->index;
+	map = kmap_atomic(page, KM_USER0);
+	memcpy(dd, map, sizeof(*dd));
+	kunmap_atomic(map, KM_USER0);
+	page_cache_release(page);
+	return 0;
+}
+
+static int logfs_delete_dd(struct inode *dir, loff_t pos)
+{
+	/*
+	 * Getting called with pos somewhere beyond eof is either a goofup
+	 * within this file or means someone maliciously edited the
+	 * (crc-protected) journal.
+	 */
+	BUG_ON(beyond_eof(dir, pos));
+	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+	log_dir(" Delete dentry (%lx, %llx)\n", dir->i_ino, pos);
+	return logfs_delete(dir, pos, NULL);
+}
+
+/*
+ * Cross-directory rename, target does not exist.  Just a little nasty.
+ * Create a new dentry in the target dir, then remove the old dentry,
+ * all the while taking care to remember our operation in the journal.
+ */
+static int logfs_rename_cross(struct inode *old_dir, struct dentry *old_dentry,
+			      struct inode *new_dir, struct dentry *new_dentry)
+{
+	struct logfs_super *super = logfs_super(old_dir->i_sb);
+	struct logfs_disk_dentry dd;
+	struct logfs_transaction *ta;
+	loff_t pos;
+	int err;
+
+	/* 1. locate source dd */
+	err = logfs_get_dd(old_dir, old_dentry, &dd, &pos);
+	if (err)
+		return err;
+
+	ta = kzalloc(sizeof(*ta), GFP_KERNEL);
+	if (!ta)
+		return -ENOMEM;
+
+	ta->state = CROSS_RENAME_1;
+	ta->dir = old_dir->i_ino;
+	ta->pos = pos;
+
+	/* 2. write target dd */
+	mutex_lock(&super->s_dirop_mutex);
+	logfs_add_transaction(new_dir, ta);
+	err = logfs_write_dir(new_dir, new_dentry, old_dentry->d_inode);
+	if (!err)
+		err = write_inode(new_dir);
+
+	if (err) {
+		super->s_rename_dir = 0;
+		super->s_rename_pos = 0;
+		abort_transaction(new_dir, ta);
+		goto out;
+	}
+
+	/* 3. remove source dd */
+	ta->state = CROSS_RENAME_2;
+	logfs_add_transaction(old_dir, ta);
+	err = logfs_delete_dd(old_dir, pos);
+	if (!err)
+		err = write_inode(old_dir);
+	LOGFS_BUG_ON(err, old_dir->i_sb);
+out:
+	mutex_unlock(&super->s_dirop_mutex);
+	return err;
+}
+
+static int logfs_replace_inode(struct inode *dir, struct dentry *dentry,
+		struct logfs_disk_dentry *dd, struct inode *inode)
+{
+	loff_t pos;
+	int err;
+
+	err = logfs_get_dd(dir, dentry, dd, &pos);
+	if (err)
+		return err;
+	dd->ino = cpu_to_be64(inode->i_ino);
+	dd->type = logfs_type(inode);
+
+	err = write_dir(dir, dd, pos);
+	if (err)
+		return err;
+	log_dir("Replace dentry (%lx, %llx) %s -> %llx\n", dir->i_ino, pos,
+			dd->name, be64_to_cpu(dd->ino));
+	return write_inode(dir);
+}
+
+/* Target dentry exists - the worst case.  We need to attach the source
+ * inode to the target dentry, then remove the orphaned target inode and
+ * source dentry.
+ */
+static int logfs_rename_target(struct inode *old_dir, struct dentry *old_dentry,
+			       struct inode *new_dir, struct dentry *new_dentry)
+{
+	struct logfs_super *super = logfs_super(old_dir->i_sb);
+	struct inode *old_inode = old_dentry->d_inode;
+	struct inode *new_inode = new_dentry->d_inode;
+	int isdir = S_ISDIR(old_inode->i_mode);
+	struct logfs_disk_dentry dd;
+	struct logfs_transaction *ta;
+	loff_t pos;
+	int err;
+
+	BUG_ON(isdir != S_ISDIR(new_inode->i_mode));
+	if (isdir) {
+		if (!logfs_empty_dir(new_inode))
+			return -ENOTEMPTY;
+	}
+
+	/* 1. locate source dd */
+	err = logfs_get_dd(old_dir, old_dentry, &dd, &pos);
+	if (err)
+		return err;
+
+	ta = kzalloc(sizeof(*ta), GFP_KERNEL);
+	if (!ta)
+		return -ENOMEM;
+
+	ta->state = TARGET_RENAME_1;
+	ta->dir = old_dir->i_ino;
+	ta->pos = pos;
+	ta->ino = new_inode->i_ino;
+
+	/* 2. attach source inode to target dd */
+	mutex_lock(&super->s_dirop_mutex);
+	logfs_add_transaction(new_dir, ta);
+	err = logfs_replace_inode(new_dir, new_dentry, &dd, old_inode);
+	if (err) {
+		super->s_rename_dir = 0;
+		super->s_rename_pos = 0;
+		super->s_victim_ino = 0;
+		abort_transaction(new_dir, ta);
+		goto out;
+	}
+
+	/* 3. remove source dd */
+	ta->state = TARGET_RENAME_2;
+	logfs_add_transaction(old_dir, ta);
+	err = logfs_delete_dd(old_dir, pos);
+	if (!err)
+		err = write_inode(old_dir);
+	LOGFS_BUG_ON(err, old_dir->i_sb);
+
+	/* 4. remove target inode */
+	ta->state = TARGET_RENAME_3;
+	logfs_add_transaction(new_inode, ta);
+	err = logfs_remove_inode(new_inode);
+
+out:
+	mutex_unlock(&super->s_dirop_mutex);
+	return err;
+}
+
+static int logfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+			struct inode *new_dir, struct dentry *new_dentry)
+{
+	if (new_dentry->d_inode)
+		return logfs_rename_target(old_dir, old_dentry,
+					   new_dir, new_dentry);
+	return logfs_rename_cross(old_dir, old_dentry, new_dir, new_dentry);
+}
+
+/* No locking done here, as this is called before .get_sb() returns. */
+int logfs_replay_journal(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct inode *inode;
+	u64 ino, pos;
+	int err;
+
+	if (super->s_victim_ino) {
+		/* delete victim inode */
+		ino = super->s_victim_ino;
+		printk(KERN_INFO"LogFS: delete unmapped inode #%llx\n", ino);
+		inode = logfs_iget(sb, ino);
+		if (IS_ERR(inode))
+			goto fail;
+
+		LOGFS_BUG_ON(i_size_read(inode) > 0, sb);
+		super->s_victim_ino = 0;
+		err = logfs_remove_inode(inode);
+		iput(inode);
+		if (err) {
+			super->s_victim_ino = ino;
+			goto fail;
+		}
+	}
+	if (super->s_rename_dir) {
+		/* delete old dd from rename */
+		ino = super->s_rename_dir;
+		pos = super->s_rename_pos;
+		printk(KERN_INFO"LogFS: delete unbacked dentry (%llx, %llx)\n",
+				ino, pos);
+		inode = logfs_iget(sb, ino);
+		if (IS_ERR(inode))
+			goto fail;
+
+		super->s_rename_dir = 0;
+		super->s_rename_pos = 0;
+		err = logfs_delete_dd(inode, pos);
+		iput(inode);
+		if (err) {
+			super->s_rename_dir = ino;
+			super->s_rename_pos = pos;
+			goto fail;
+		}
+	}
+	return 0;
+fail:
+	LOGFS_BUG(sb);
+	return -EIO;
+}
+
+const struct inode_operations logfs_symlink_iops = {
+	.readlink	= generic_readlink,
+	.follow_link	= page_follow_link_light,
+};
+
+const struct inode_operations logfs_dir_iops = {
+	.create		= logfs_create,
+	.link		= logfs_link,
+	.lookup		= logfs_lookup,
+	.mkdir		= logfs_mkdir,
+	.mknod		= logfs_mknod,
+	.rename		= logfs_rename,
+	.rmdir		= logfs_rmdir,
+	.permission	= logfs_permission,
+	.symlink	= logfs_symlink,
+	.unlink		= logfs_unlink,
+};
+const struct file_operations logfs_dir_fops = {
+	.fsync		= logfs_fsync,
+	.ioctl		= logfs_ioctl,
+	.readdir	= logfs_readdir,
+	.read		= generic_read_dir,
+};
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
new file mode 100644
index 000000000000..370f367a933e
--- /dev/null
+++ b/fs/logfs/file.c
@@ -0,0 +1,263 @@
+/*
+ * fs/logfs/file.c	- prepare_write, commit_write and friends
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ */
+#include "logfs.h"
+#include <linux/sched.h>
+#include <linux/writeback.h>
+
+static int logfs_write_begin(struct file *file, struct address_space *mapping,
+		loff_t pos, unsigned len, unsigned flags,
+		struct page **pagep, void **fsdata)
+{
+	struct inode *inode = mapping->host;
+	struct page *page;
+	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+
+	page = grab_cache_page_write_begin(mapping, index, flags);
+	if (!page)
+		return -ENOMEM;
+	*pagep = page;
+
+	if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
+		return 0;
+	if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
+		unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+		unsigned end = start + len;
+
+		/* Reading beyond i_size is simple: memset to zero */
+		zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
+		return 0;
+	}
+	return logfs_readpage_nolock(page);
+}
+
+static int logfs_write_end(struct file *file, struct address_space *mapping,
+		loff_t pos, unsigned len, unsigned copied, struct page *page,
+		void *fsdata)
+{
+	struct inode *inode = mapping->host;
+	pgoff_t index = page->index;
+	unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned end = start + copied;
+	int ret = 0;
+
+	BUG_ON(PAGE_CACHE_SIZE != inode->i_sb->s_blocksize);
+	BUG_ON(page->index > I3_BLOCKS);
+
+	if (copied < len) {
+		/*
+		 * Short write of a non-initialized paged.  Just tell userspace
+		 * to retry the entire page.
+		 */
+		if (!PageUptodate(page)) {
+			copied = 0;
+			goto out;
+		}
+	}
+	if (copied == 0)
+		goto out; /* FIXME: do we need to update inode? */
+
+	if (i_size_read(inode) < (index << PAGE_CACHE_SHIFT) + end) {
+		i_size_write(inode, (index << PAGE_CACHE_SHIFT) + end);
+		mark_inode_dirty_sync(inode);
+	}
+
+	SetPageUptodate(page);
+	if (!PageDirty(page)) {
+		if (!get_page_reserve(inode, page))
+			__set_page_dirty_nobuffers(page);
+		else
+			ret = logfs_write_buf(inode, page, WF_LOCK);
+	}
+out:
+	unlock_page(page);
+	page_cache_release(page);
+	return ret ? ret : copied;
+}
+
+int logfs_readpage(struct file *file, struct page *page)
+{
+	int ret;
+
+	ret = logfs_readpage_nolock(page);
+	unlock_page(page);
+	return ret;
+}
+
+/* Clear the page's dirty flag in the radix tree. */
+/* TODO: mucking with PageWriteback is silly.  Add a generic function to clear
+ * the dirty bit from the radix tree for filesystems that don't have to wait
+ * for page writeback to finish (i.e. any compressing filesystem).
+ */
+static void clear_radix_tree_dirty(struct page *page)
+{
+	BUG_ON(PagePrivate(page) || page->private);
+	set_page_writeback(page);
+	end_page_writeback(page);
+}
+
+static int __logfs_writepage(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	int err;
+
+	err = logfs_write_buf(inode, page, WF_LOCK);
+	if (err)
+		set_page_dirty(page);
+	else
+		clear_radix_tree_dirty(page);
+	unlock_page(page);
+	return err;
+}
+
+static int logfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	loff_t i_size = i_size_read(inode);
+	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	unsigned offset;
+	u64 bix;
+	level_t level;
+
+	log_file("logfs_writepage(%lx, %lx, %p)\n", inode->i_ino, page->index,
+			page);
+
+	logfs_unpack_index(page->index, &bix, &level);
+
+	/* Indirect blocks are never truncated */
+	if (level != 0)
+		return __logfs_writepage(page);
+
+	/*
+	 * TODO: everything below is a near-verbatim copy of nobh_writepage().
+	 * The relevant bits should be factored out after logfs is merged.
+	 */
+
+	/* Is the page fully inside i_size? */
+	if (bix < end_index)
+		return __logfs_writepage(page);
+
+	 /* Is the page fully outside i_size? (truncate in progress) */
+	offset = i_size & (PAGE_CACHE_SIZE-1);
+	if (bix > end_index || offset == 0) {
+		unlock_page(page);
+		return 0; /* don't care */
+	}
+
+	/*
+	 * The page straddles i_size.  It must be zeroed out on each and every
+	 * writepage invokation because it may be mmapped.  "A file is mapped
+	 * in multiples of the page size.  For a file that is not a multiple of
+	 * the  page size, the remaining memory is zeroed when mapped, and
+	 * writes to that region are not written out to the file."
+	 */
+	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+	return __logfs_writepage(page);
+}
+
+static void logfs_invalidatepage(struct page *page, unsigned long offset)
+{
+	move_page_to_btree(page);
+	BUG_ON(PagePrivate(page) || page->private);
+}
+
+static int logfs_releasepage(struct page *page, gfp_t only_xfs_uses_this)
+{
+	return 0; /* None of these are easy to release */
+}
+
+
+int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	unsigned int oldflags, flags;
+	int err;
+
+	switch (cmd) {
+	case FS_IOC_GETFLAGS:
+		flags = li->li_flags & LOGFS_FL_USER_VISIBLE;
+		return put_user(flags, (int __user *)arg);
+	case FS_IOC_SETFLAGS:
+		if (IS_RDONLY(inode))
+			return -EROFS;
+
+		if (!is_owner_or_cap(inode))
+			return -EACCES;
+
+		err = get_user(flags, (int __user *)arg);
+		if (err)
+			return err;
+
+		mutex_lock(&inode->i_mutex);
+		oldflags = li->li_flags;
+		flags &= LOGFS_FL_USER_MODIFIABLE;
+		flags |= oldflags & ~LOGFS_FL_USER_MODIFIABLE;
+		li->li_flags = flags;
+		mutex_unlock(&inode->i_mutex);
+
+		inode->i_ctime = CURRENT_TIME;
+		mark_inode_dirty_sync(inode);
+		return 0;
+
+	default:
+		return -ENOTTY;
+	}
+}
+
+int logfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+	struct super_block *sb = dentry->d_inode->i_sb;
+	struct logfs_super *super = logfs_super(sb);
+
+	/* FIXME: write anchor */
+	super->s_devops->sync(sb);
+	return 0;
+}
+
+static int logfs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	int err = 0;
+
+	if (attr->ia_valid & ATTR_SIZE)
+		err = logfs_truncate(inode, attr->ia_size);
+	attr->ia_valid &= ~ATTR_SIZE;
+
+	if (!err)
+		err = inode_change_ok(inode, attr);
+	if (!err)
+		err = inode_setattr(inode, attr);
+	return err;
+}
+
+const struct inode_operations logfs_reg_iops = {
+	.setattr	= logfs_setattr,
+};
+
+const struct file_operations logfs_reg_fops = {
+	.aio_read	= generic_file_aio_read,
+	.aio_write	= generic_file_aio_write,
+	.fsync		= logfs_fsync,
+	.ioctl		= logfs_ioctl,
+	.llseek		= generic_file_llseek,
+	.mmap		= generic_file_readonly_mmap,
+	.open		= generic_file_open,
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+};
+
+const struct address_space_operations logfs_reg_aops = {
+	.invalidatepage	= logfs_invalidatepage,
+	.readpage	= logfs_readpage,
+	.releasepage	= logfs_releasepage,
+	.set_page_dirty	= __set_page_dirty_nobuffers,
+	.writepage	= logfs_writepage,
+	.writepages	= generic_writepages,
+	.write_begin	= logfs_write_begin,
+	.write_end	= logfs_write_end,
+};
diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c
new file mode 100644
index 000000000000..b3656c44190e
--- /dev/null
+++ b/fs/logfs/gc.c
@@ -0,0 +1,730 @@
+/*
+ * fs/logfs/gc.c	- garbage collection code
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ */
+#include "logfs.h"
+#include <linux/sched.h>
+
+/*
+ * Wear leveling needs to kick in when the difference between low erase
+ * counts and high erase counts gets too big.  A good value for "too big"
+ * may be somewhat below 10% of maximum erase count for the device.
+ * Why not 397, to pick a nice round number with no specific meaning? :)
+ *
+ * WL_RATELIMIT is the minimum time between two wear level events.  A huge
+ * number of segments may fulfil the requirements for wear leveling at the
+ * same time.  If that happens we don't want to cause a latency from hell,
+ * but just gently pick one segment every so often and minimize overhead.
+ */
+#define WL_DELTA 397
+#define WL_RATELIMIT 100
+#define MAX_OBJ_ALIASES	2600
+#define SCAN_RATIO 512	/* number of scanned segments per gc'd segment */
+#define LIST_SIZE 64	/* base size of candidate lists */
+#define SCAN_ROUNDS 128	/* maximum number of complete medium scans */
+#define SCAN_ROUNDS_HIGH 4 /* maximum number of higher-level scans */
+
+static int no_free_segments(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	return super->s_free_list.count;
+}
+
+/* journal has distance -1, top-most ifile layer distance 0 */
+static u8 root_distance(struct super_block *sb, gc_level_t __gc_level)
+{
+	struct logfs_super *super = logfs_super(sb);
+	u8 gc_level = (__force u8)__gc_level;
+
+	switch (gc_level) {
+	case 0: /* fall through */
+	case 1: /* fall through */
+	case 2: /* fall through */
+	case 3:
+		/* file data or indirect blocks */
+		return super->s_ifile_levels + super->s_iblock_levels - gc_level;
+	case 6: /* fall through */
+	case 7: /* fall through */
+	case 8: /* fall through */
+	case 9:
+		/* inode file data or indirect blocks */
+		return super->s_ifile_levels - (gc_level - 6);
+	default:
+		printk(KERN_ERR"LOGFS: segment of unknown level %x found\n",
+				gc_level);
+		WARN_ON(1);
+		return super->s_ifile_levels + super->s_iblock_levels;
+	}
+}
+
+static int segment_is_reserved(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_area *area;
+	void *reserved;
+	int i;
+
+	/* Some segments are reserved.  Just pretend they were all valid */
+	reserved = btree_lookup32(&super->s_reserved_segments, segno);
+	if (reserved)
+		return 1;
+
+	/* Currently open segments */
+	for_each_area(i) {
+		area = super->s_area[i];
+		if (area->a_is_open && area->a_segno == segno)
+			return 1;
+	}
+
+	return 0;
+}
+
+static void logfs_mark_segment_bad(struct super_block *sb, u32 segno)
+{
+	BUG();
+}
+
+/*
+ * Returns the bytes consumed by valid objects in this segment.  Object headers
+ * are counted, the segment header is not.
+ */
+static u32 logfs_valid_bytes(struct super_block *sb, u32 segno, u32 *ec,
+		gc_level_t *gc_level)
+{
+	struct logfs_segment_entry se;
+	u32 ec_level;
+
+	logfs_get_segment_entry(sb, segno, &se);
+	if (se.ec_level == cpu_to_be32(BADSEG) ||
+			se.valid == cpu_to_be32(RESERVED))
+		return RESERVED;
+
+	ec_level = be32_to_cpu(se.ec_level);
+	*ec = ec_level >> 4;
+	*gc_level = GC_LEVEL(ec_level & 0xf);
+	return be32_to_cpu(se.valid);
+}
+
+static void logfs_cleanse_block(struct super_block *sb, u64 ofs, u64 ino,
+		u64 bix, gc_level_t gc_level)
+{
+	struct inode *inode;
+	int err, cookie;
+
+	inode = logfs_safe_iget(sb, ino, &cookie);
+	err = logfs_rewrite_block(inode, bix, ofs, gc_level, 0);
+	BUG_ON(err);
+	logfs_safe_iput(inode, cookie);
+}
+
+static u32 logfs_gc_segment(struct super_block *sb, u32 segno, u8 dist)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_segment_header sh;
+	struct logfs_object_header oh;
+	u64 ofs, ino, bix;
+	u32 seg_ofs, logical_segno, cleaned = 0;
+	int err, len, valid;
+	gc_level_t gc_level;
+
+	LOGFS_BUG_ON(segment_is_reserved(sb, segno), sb);
+
+	btree_insert32(&super->s_reserved_segments, segno, (void *)1, GFP_NOFS);
+	err = wbuf_read(sb, dev_ofs(sb, segno, 0), sizeof(sh), &sh);
+	BUG_ON(err);
+	gc_level = GC_LEVEL(sh.level);
+	logical_segno = be32_to_cpu(sh.segno);
+	if (sh.crc != logfs_crc32(&sh, sizeof(sh), 4)) {
+		logfs_mark_segment_bad(sb, segno);
+		cleaned = -1;
+		goto out;
+	}
+
+	for (seg_ofs = LOGFS_SEGMENT_HEADERSIZE;
+			seg_ofs + sizeof(oh) < super->s_segsize; ) {
+		ofs = dev_ofs(sb, logical_segno, seg_ofs);
+		err = wbuf_read(sb, dev_ofs(sb, segno, seg_ofs), sizeof(oh),
+				&oh);
+		BUG_ON(err);
+
+		if (!memchr_inv(&oh, 0xff, sizeof(oh)))
+			break;
+
+		if (oh.crc != logfs_crc32(&oh, sizeof(oh) - 4, 4)) {
+			logfs_mark_segment_bad(sb, segno);
+			cleaned = super->s_segsize - 1;
+			goto out;
+		}
+
+		ino = be64_to_cpu(oh.ino);
+		bix = be64_to_cpu(oh.bix);
+		len = sizeof(oh) + be16_to_cpu(oh.len);
+		valid = logfs_is_valid_block(sb, ofs, ino, bix, gc_level);
+		if (valid == 1) {
+			logfs_cleanse_block(sb, ofs, ino, bix, gc_level);
+			cleaned += len;
+		} else if (valid == 2) {
+			/* Will be invalid upon journal commit */
+			cleaned += len;
+		}
+		seg_ofs += len;
+	}
+out:
+	btree_remove32(&super->s_reserved_segments, segno);
+	return cleaned;
+}
+
+static struct gc_candidate *add_list(struct gc_candidate *cand,
+		struct candidate_list *list)
+{
+	struct rb_node **p = &list->rb_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct gc_candidate *cur;
+	int comp;
+
+	cand->list = list;
+	while (*p) {
+		parent = *p;
+		cur = rb_entry(parent, struct gc_candidate, rb_node);
+
+		if (list->sort_by_ec)
+			comp = cand->erase_count < cur->erase_count;
+		else
+			comp = cand->valid < cur->valid;
+
+		if (comp)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+	rb_link_node(&cand->rb_node, parent, p);
+	rb_insert_color(&cand->rb_node, &list->rb_tree);
+
+	if (list->count <= list->maxcount) {
+		list->count++;
+		return NULL;
+	}
+	cand = rb_entry(rb_last(&list->rb_tree), struct gc_candidate, rb_node);
+	rb_erase(&cand->rb_node, &list->rb_tree);
+	cand->list = NULL;
+	return cand;
+}
+
+static void remove_from_list(struct gc_candidate *cand)
+{
+	struct candidate_list *list = cand->list;
+
+	rb_erase(&cand->rb_node, &list->rb_tree);
+	list->count--;
+}
+
+static void free_candidate(struct super_block *sb, struct gc_candidate *cand)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	btree_remove32(&super->s_cand_tree, cand->segno);
+	kfree(cand);
+}
+
+u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec)
+{
+	struct gc_candidate *cand;
+	u32 segno;
+
+	BUG_ON(list->count == 0);
+
+	cand = rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
+	remove_from_list(cand);
+	segno = cand->segno;
+	if (ec)
+		*ec = cand->erase_count;
+	free_candidate(sb, cand);
+	return segno;
+}
+
+/*
+ * We have several lists to manage segments with.  The reserve_list is used to
+ * deal with bad blocks.  We try to keep the best (lowest ec) segments on this
+ * list.
+ * The free_list contains free segments for normal usage.  It usually gets the
+ * second pick after the reserve_list.  But when the free_list is running short
+ * it is more important to keep the free_list full than to keep a reserve.
+ *
+ * Segments that are not free are put onto a per-level low_list.  If we have
+ * to run garbage collection, we pick a candidate from there.  All segments on
+ * those lists should have at least some free space so GC will make progress.
+ *
+ * And last we have the ec_list, which is used to pick segments for wear
+ * leveling.
+ *
+ * If all appropriate lists are full, we simply free the candidate and forget
+ * about that segment for a while.  We have better candidates for each purpose.
+ */
+static void __add_candidate(struct super_block *sb, struct gc_candidate *cand)
+{
+	struct logfs_super *super = logfs_super(sb);
+	u32 full = super->s_segsize - LOGFS_SEGMENT_RESERVE;
+
+	if (cand->valid == 0) {
+		/* 100% free segments */
+		log_gc_noisy("add reserve segment %x (ec %x) at %llx\n",
+				cand->segno, cand->erase_count,
+				dev_ofs(sb, cand->segno, 0));
+		cand = add_list(cand, &super->s_reserve_list);
+		if (cand) {
+			log_gc_noisy("add free segment %x (ec %x) at %llx\n",
+					cand->segno, cand->erase_count,
+					dev_ofs(sb, cand->segno, 0));
+			cand = add_list(cand, &super->s_free_list);
+		}
+	} else {
+		/* good candidates for Garbage Collection */
+		if (cand->valid < full)
+			cand = add_list(cand, &super->s_low_list[cand->dist]);
+		/* good candidates for wear leveling,
+		 * segments that were recently written get ignored */
+		if (cand)
+			cand = add_list(cand, &super->s_ec_list);
+	}
+	if (cand)
+		free_candidate(sb, cand);
+}
+
+static int add_candidate(struct super_block *sb, u32 segno, u32 valid, u32 ec,
+		u8 dist)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct gc_candidate *cand;
+
+	cand = kmalloc(sizeof(*cand), GFP_NOFS);
+	if (!cand)
+		return -ENOMEM;
+
+	cand->segno = segno;
+	cand->valid = valid;
+	cand->erase_count = ec;
+	cand->dist = dist;
+
+	btree_insert32(&super->s_cand_tree, segno, cand, GFP_NOFS);
+	__add_candidate(sb, cand);
+	return 0;
+}
+
+static void remove_segment_from_lists(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct gc_candidate *cand;
+
+	cand = btree_lookup32(&super->s_cand_tree, segno);
+	if (cand) {
+		remove_from_list(cand);
+		free_candidate(sb, cand);
+	}
+}
+
+static void scan_segment(struct super_block *sb, u32 segno)
+{
+	u32 valid, ec = 0;
+	gc_level_t gc_level = 0;
+	u8 dist;
+
+	if (segment_is_reserved(sb, segno))
+		return;
+
+	remove_segment_from_lists(sb, segno);
+	valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
+	if (valid == RESERVED)
+		return;
+
+	dist = root_distance(sb, gc_level);
+	add_candidate(sb, segno, valid, ec, dist);
+}
+
+static struct gc_candidate *first_in_list(struct candidate_list *list)
+{
+	if (list->count == 0)
+		return NULL;
+	return rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
+}
+
+/*
+ * Find the best segment for garbage collection.  Main criterion is
+ * the segment requiring the least effort to clean.  Secondary
+ * criterion is to GC on the lowest level available.
+ *
+ * So we search the least effort segment on the lowest level first,
+ * then move up and pick another segment iff is requires significantly
+ * less effort.  Hence the LOGFS_MAX_OBJECTSIZE in the comparison.
+ */
+static struct gc_candidate *get_candidate(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int i, max_dist;
+	struct gc_candidate *cand = NULL, *this;
+
+	max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS);
+
+	for (i = max_dist; i >= 0; i--) {
+		this = first_in_list(&super->s_low_list[i]);
+		if (!this)
+			continue;
+		if (!cand)
+			cand = this;
+		if (this->valid + LOGFS_MAX_OBJECTSIZE <= cand->valid)
+			cand = this;
+	}
+	return cand;
+}
+
+static int __logfs_gc_once(struct super_block *sb, struct gc_candidate *cand)
+{
+	struct logfs_super *super = logfs_super(sb);
+	gc_level_t gc_level;
+	u32 cleaned, valid, segno, ec;
+	u8 dist;
+
+	if (!cand) {
+		log_gc("GC attempted, but no candidate found\n");
+		return 0;
+	}
+
+	segno = cand->segno;
+	dist = cand->dist;
+	valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
+	free_candidate(sb, cand);
+	log_gc("GC segment #%02x at %llx, %x required, %x free, %x valid, %llx free\n",
+			segno, (u64)segno << super->s_segshift,
+			dist, no_free_segments(sb), valid,
+			super->s_free_bytes);
+	cleaned = logfs_gc_segment(sb, segno, dist);
+	log_gc("GC segment #%02x complete - now %x valid\n", segno,
+			valid - cleaned);
+	BUG_ON(cleaned != valid);
+	return 1;
+}
+
+static int logfs_gc_once(struct super_block *sb)
+{
+	struct gc_candidate *cand;
+
+	cand = get_candidate(sb);
+	if (cand)
+		remove_from_list(cand);
+	return __logfs_gc_once(sb, cand);
+}
+
+/* returns 1 if a wrap occurs, 0 otherwise */
+static int logfs_scan_some(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	u32 segno;
+	int i, ret = 0;
+
+	segno = super->s_sweeper;
+	for (i = SCAN_RATIO; i > 0; i--) {
+		segno++;
+		if (segno >= super->s_no_segs) {
+			segno = 0;
+			ret = 1;
+			/* Break out of the loop.  We want to read a single
+			 * block from the segment size on next invocation if
+			 * SCAN_RATIO is set to match block size
+			 */
+			break;
+		}
+
+		scan_segment(sb, segno);
+	}
+	super->s_sweeper = segno;
+	return ret;
+}
+
+/*
+ * In principle, this function should loop forever, looking for GC candidates
+ * and moving data.  LogFS is designed in such a way that this loop is
+ * guaranteed to terminate.
+ *
+ * Limiting the loop to some iterations serves purely to catch cases when
+ * these guarantees have failed.  An actual endless loop is an obvious bug
+ * and should be reported as such.
+ */
+static void __logfs_gc_pass(struct super_block *sb, int target)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_block *block;
+	int round, progress, last_progress = 0;
+
+	if (no_free_segments(sb) >= target &&
+			super->s_no_object_aliases < MAX_OBJ_ALIASES)
+		return;
+
+	log_gc("__logfs_gc_pass(%x)\n", target);
+	for (round = 0; round < SCAN_ROUNDS; ) {
+		if (no_free_segments(sb) >= target)
+			goto write_alias;
+
+		/* Sync in-memory state with on-medium state in case they
+		 * diverged */
+		logfs_write_anchor(super->s_master_inode);
+		round += logfs_scan_some(sb);
+		if (no_free_segments(sb) >= target)
+			goto write_alias;
+		progress = logfs_gc_once(sb);
+		if (progress)
+			last_progress = round;
+		else if (round - last_progress > 2)
+			break;
+		continue;
+
+		/*
+		 * The goto logic is nasty, I just don't know a better way to
+		 * code it.  GC is supposed to ensure two things:
+		 * 1. Enough free segments are available.
+		 * 2. The number of aliases is bounded.
+		 * When 1. is achieved, we take a look at 2. and write back
+		 * some alias-containing blocks, if necessary.  However, after
+		 * each such write we need to go back to 1., as writes can
+		 * consume free segments.
+		 */
+write_alias:
+		if (super->s_no_object_aliases < MAX_OBJ_ALIASES)
+			return;
+		if (list_empty(&super->s_object_alias)) {
+			/* All aliases are still in btree */
+			return;
+		}
+		log_gc("Write back one alias\n");
+		block = list_entry(super->s_object_alias.next,
+				struct logfs_block, alias_list);
+		block->ops->write_block(block);
+		/*
+		 * To round off the nasty goto logic, we reset round here.  It
+		 * is a safety-net for GC not making any progress and limited
+		 * to something reasonably small.  If incremented it for every
+		 * single alias, the loop could terminate rather quickly.
+		 */
+		round = 0;
+	}
+	LOGFS_BUG(sb);
+}
+
+static int wl_ratelimit(struct super_block *sb, u64 *next_event)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	if (*next_event < super->s_gec) {
+		*next_event = super->s_gec + WL_RATELIMIT;
+		return 0;
+	}
+	return 1;
+}
+
+static void logfs_wl_pass(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct gc_candidate *wl_cand, *free_cand;
+
+	if (wl_ratelimit(sb, &super->s_wl_gec_ostore))
+		return;
+
+	wl_cand = first_in_list(&super->s_ec_list);
+	if (!wl_cand)
+		return;
+	free_cand = first_in_list(&super->s_free_list);
+	if (!free_cand)
+		return;
+
+	if (wl_cand->erase_count < free_cand->erase_count + WL_DELTA) {
+		remove_from_list(wl_cand);
+		__logfs_gc_once(sb, wl_cand);
+	}
+}
+
+/*
+ * The journal needs wear leveling as well.  But moving the journal is an
+ * expensive operation so we try to avoid it as much as possible.  And if we
+ * have to do it, we move the whole journal, not individual segments.
+ *
+ * Ratelimiting is not strictly necessary here, it mainly serves to avoid the
+ * calculations.  First we check whether moving the journal would be a
+ * significant improvement.  That means that a) the current journal segments
+ * have more wear than the future journal segments and b) the current journal
+ * segments have more wear than normal ostore segments.
+ * Rationale for b) is that we don't have to move the journal if it is aging
+ * less than the ostore, even if the reserve segments age even less (they are
+ * excluded from wear leveling, after all).
+ * Next we check that the superblocks have less wear than the journal.  Since
+ * moving the journal requires writing the superblocks, we have to protect the
+ * superblocks even more than the journal.
+ *
+ * Also we double the acceptable wear difference, compared to ostore wear
+ * leveling.  Journal data is read and rewritten rapidly, comparatively.  So
+ * soft errors have much less time to accumulate and we allow the journal to
+ * be a bit worse than the ostore.
+ */
+static void logfs_journal_wl_pass(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct gc_candidate *cand;
+	u32 min_journal_ec = -1, max_reserve_ec = 0;
+	int i;
+
+	if (wl_ratelimit(sb, &super->s_wl_gec_journal))
+		return;
+
+	if (super->s_reserve_list.count < super->s_no_journal_segs) {
+		/* Reserve is not full enough to move complete journal */
+		return;
+	}
+
+	journal_for_each(i)
+		if (super->s_journal_seg[i])
+			min_journal_ec = min(min_journal_ec,
+					super->s_journal_ec[i]);
+	cand = rb_entry(rb_first(&super->s_free_list.rb_tree),
+			struct gc_candidate, rb_node);
+	max_reserve_ec = cand->erase_count;
+	for (i = 0; i < 2; i++) {
+		struct logfs_segment_entry se;
+		u32 segno = seg_no(sb, super->s_sb_ofs[i]);
+		u32 ec;
+
+		logfs_get_segment_entry(sb, segno, &se);
+		ec = be32_to_cpu(se.ec_level) >> 4;
+		max_reserve_ec = max(max_reserve_ec, ec);
+	}
+
+	if (min_journal_ec > max_reserve_ec + 2 * WL_DELTA) {
+		do_logfs_journal_wl_pass(sb);
+	}
+}
+
+void logfs_gc_pass(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	//BUG_ON(mutex_trylock(&logfs_super(sb)->s_w_mutex));
+	/* Write journal before free space is getting saturated with dirty
+	 * objects.
+	 */
+	if (super->s_dirty_used_bytes + super->s_dirty_free_bytes
+			+ LOGFS_MAX_OBJECTSIZE >= super->s_free_bytes)
+		logfs_write_anchor(super->s_master_inode);
+	__logfs_gc_pass(sb, logfs_super(sb)->s_total_levels);
+	logfs_wl_pass(sb);
+	logfs_journal_wl_pass(sb);
+}
+
+static int check_area(struct super_block *sb, int i)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_area *area = super->s_area[i];
+	struct logfs_object_header oh;
+	u32 segno = area->a_segno;
+	u32 ofs = area->a_used_bytes;
+	__be32 crc;
+	int err;
+
+	if (!area->a_is_open)
+		return 0;
+
+	for (ofs = area->a_used_bytes;
+	     ofs <= super->s_segsize - sizeof(oh);
+	     ofs += (u32)be16_to_cpu(oh.len) + sizeof(oh)) {
+		err = wbuf_read(sb, dev_ofs(sb, segno, ofs), sizeof(oh), &oh);
+		if (err)
+			return err;
+
+		if (!memchr_inv(&oh, 0xff, sizeof(oh)))
+			break;
+
+		crc = logfs_crc32(&oh, sizeof(oh) - 4, 4);
+		if (crc != oh.crc) {
+			printk(KERN_INFO "interrupted header at %llx\n",
+					dev_ofs(sb, segno, ofs));
+			return 0;
+		}
+	}
+	if (ofs != area->a_used_bytes) {
+		printk(KERN_INFO "%x bytes unaccounted data found at %llx\n",
+				ofs - area->a_used_bytes,
+				dev_ofs(sb, segno, area->a_used_bytes));
+		area->a_used_bytes = ofs;
+	}
+	return 0;
+}
+
+int logfs_check_areas(struct super_block *sb)
+{
+	int i, err;
+
+	for_each_area(i) {
+		err = check_area(sb, i);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static void logfs_init_candlist(struct candidate_list *list, int maxcount,
+		int sort_by_ec)
+{
+	list->count = 0;
+	list->maxcount = maxcount;
+	list->sort_by_ec = sort_by_ec;
+	list->rb_tree = RB_ROOT;
+}
+
+int logfs_init_gc(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int i;
+
+	btree_init_mempool32(&super->s_cand_tree, super->s_btree_pool);
+	logfs_init_candlist(&super->s_free_list, LIST_SIZE + SCAN_RATIO, 1);
+	logfs_init_candlist(&super->s_reserve_list,
+			super->s_bad_seg_reserve, 1);
+	for_each_area(i)
+		logfs_init_candlist(&super->s_low_list[i], LIST_SIZE, 0);
+	logfs_init_candlist(&super->s_ec_list, LIST_SIZE, 1);
+	return 0;
+}
+
+static void logfs_cleanup_list(struct super_block *sb,
+		struct candidate_list *list)
+{
+	struct gc_candidate *cand;
+
+	while (list->count) {
+		cand = rb_entry(list->rb_tree.rb_node, struct gc_candidate,
+				rb_node);
+		remove_from_list(cand);
+		free_candidate(sb, cand);
+	}
+	BUG_ON(list->rb_tree.rb_node);
+}
+
+void logfs_cleanup_gc(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int i;
+
+	if (!super->s_free_list.count)
+		return;
+
+	/*
+	 * FIXME: The btree may still contain a single empty node.  So we
+	 * call the grim visitor to clean up that mess.  Btree code should
+	 * do it for us, really.
+	 */
+	btree_grim_visitor32(&super->s_cand_tree, 0, NULL);
+	logfs_cleanup_list(sb, &super->s_free_list);
+	logfs_cleanup_list(sb, &super->s_reserve_list);
+	for_each_area(i)
+		logfs_cleanup_list(sb, &super->s_low_list[i]);
+	logfs_cleanup_list(sb, &super->s_ec_list);
+}
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
new file mode 100644
index 000000000000..6d08b3762641
--- /dev/null
+++ b/fs/logfs/inode.c
@@ -0,0 +1,417 @@
+/*
+ * fs/logfs/inode.c	- inode handling code
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ */
+#include "logfs.h"
+#include <linux/writeback.h>
+#include <linux/backing-dev.h>
+
+/*
+ * How soon to reuse old inode numbers?  LogFS doesn't store deleted inodes
+ * on the medium.  It therefore also lacks a method to store the previous
+ * generation number for deleted inodes.  Instead a single generation number
+ * is stored which will be used for new inodes.  Being just a 32bit counter,
+ * this can obvious wrap relatively quickly.  So we only reuse inodes if we
+ * know that a fair number of inodes can be created before we have to increment
+ * the generation again - effectively adding some bits to the counter.
+ * But being too aggressive here means we keep a very large and very sparse
+ * inode file, wasting space on indirect blocks.
+ * So what is a good value?  Beats me.  64k seems moderately bad on both
+ * fronts, so let's use that for now...
+ *
+ * NFS sucks, as everyone already knows.
+ */
+#define INOS_PER_WRAP (0x10000)
+
+/*
+ * Logfs' requirement to read inodes for garbage collection makes life a bit
+ * harder.  GC may have to read inodes that are in I_FREEING state, when they
+ * are being written out - and waiting for GC to make progress, naturally.
+ *
+ * So we cannot just call iget() or some variant of it, but first have to check
+ * wether the inode in question might be in I_FREEING state.  Therefore we
+ * maintain our own per-sb list of "almost deleted" inodes and check against
+ * that list first.  Normally this should be at most 1-2 entries long.
+ *
+ * Also, inodes have logfs-specific reference counting on top of what the vfs
+ * does.  When .destroy_inode is called, normally the reference count will drop
+ * to zero and the inode gets deleted.  But if GC accessed the inode, its
+ * refcount will remain nonzero and final deletion will have to wait.
+ *
+ * As a result we have two sets of functions to get/put inodes:
+ * logfs_safe_iget/logfs_safe_iput	- safe to call from GC context
+ * logfs_iget/iput			- normal version
+ */
+static struct kmem_cache *logfs_inode_cache;
+
+static DEFINE_SPINLOCK(logfs_inode_lock);
+
+static void logfs_inode_setops(struct inode *inode)
+{
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFDIR:
+		inode->i_op = &logfs_dir_iops;
+		inode->i_fop = &logfs_dir_fops;
+		inode->i_mapping->a_ops = &logfs_reg_aops;
+		break;
+	case S_IFREG:
+		inode->i_op = &logfs_reg_iops;
+		inode->i_fop = &logfs_reg_fops;
+		inode->i_mapping->a_ops = &logfs_reg_aops;
+		break;
+	case S_IFLNK:
+		inode->i_op = &logfs_symlink_iops;
+		inode->i_mapping->a_ops = &logfs_reg_aops;
+		break;
+	case S_IFSOCK:	/* fall through */
+	case S_IFBLK:	/* fall through */
+	case S_IFCHR:	/* fall through */
+	case S_IFIFO:
+		init_special_inode(inode, inode->i_mode, inode->i_rdev);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static struct inode *__logfs_iget(struct super_block *sb, ino_t ino)
+{
+	struct inode *inode = iget_locked(sb, ino);
+	int err;
+
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+	if (!(inode->i_state & I_NEW))
+		return inode;
+
+	err = logfs_read_inode(inode);
+	if (err || inode->i_nlink == 0) {
+		/* inode->i_nlink == 0 can be true when called from
+		 * block validator */
+		/* set i_nlink to 0 to prevent caching */
+		inode->i_nlink = 0;
+		logfs_inode(inode)->li_flags |= LOGFS_IF_ZOMBIE;
+		iget_failed(inode);
+		if (!err)
+			err = -ENOENT;
+		return ERR_PTR(err);
+	}
+
+	logfs_inode_setops(inode);
+	unlock_new_inode(inode);
+	return inode;
+}
+
+struct inode *logfs_iget(struct super_block *sb, ino_t ino)
+{
+	BUG_ON(ino == LOGFS_INO_MASTER);
+	BUG_ON(ino == LOGFS_INO_SEGFILE);
+	return __logfs_iget(sb, ino);
+}
+
+/*
+ * is_cached is set to 1 if we hand out a cached inode, 0 otherwise.
+ * this allows logfs_iput to do the right thing later
+ */
+struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *is_cached)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_inode *li;
+
+	if (ino == LOGFS_INO_MASTER)
+		return super->s_master_inode;
+	if (ino == LOGFS_INO_SEGFILE)
+		return super->s_segfile_inode;
+
+	spin_lock(&logfs_inode_lock);
+	list_for_each_entry(li, &super->s_freeing_list, li_freeing_list)
+		if (li->vfs_inode.i_ino == ino) {
+			li->li_refcount++;
+			spin_unlock(&logfs_inode_lock);
+			*is_cached = 1;
+			return &li->vfs_inode;
+		}
+	spin_unlock(&logfs_inode_lock);
+
+	*is_cached = 0;
+	return __logfs_iget(sb, ino);
+}
+
+static void __logfs_destroy_inode(struct inode *inode)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	BUG_ON(li->li_block);
+	list_del(&li->li_freeing_list);
+	kmem_cache_free(logfs_inode_cache, li);
+}
+
+static void logfs_destroy_inode(struct inode *inode)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	BUG_ON(list_empty(&li->li_freeing_list));
+	spin_lock(&logfs_inode_lock);
+	li->li_refcount--;
+	if (li->li_refcount == 0)
+		__logfs_destroy_inode(inode);
+	spin_unlock(&logfs_inode_lock);
+}
+
+void logfs_safe_iput(struct inode *inode, int is_cached)
+{
+	if (inode->i_ino == LOGFS_INO_MASTER)
+		return;
+	if (inode->i_ino == LOGFS_INO_SEGFILE)
+		return;
+
+	if (is_cached) {
+		logfs_destroy_inode(inode);
+		return;
+	}
+
+	iput(inode);
+}
+
+static void logfs_init_inode(struct super_block *sb, struct inode *inode)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	int i;
+
+	li->li_flags	= 0;
+	li->li_height	= 0;
+	li->li_used_bytes = 0;
+	li->li_block	= NULL;
+	inode->i_uid	= 0;
+	inode->i_gid	= 0;
+	inode->i_size	= 0;
+	inode->i_blocks	= 0;
+	inode->i_ctime	= CURRENT_TIME;
+	inode->i_mtime	= CURRENT_TIME;
+	inode->i_nlink	= 1;
+	INIT_LIST_HEAD(&li->li_freeing_list);
+
+	for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
+		li->li_data[i] = 0;
+
+	return;
+}
+
+static struct inode *logfs_alloc_inode(struct super_block *sb)
+{
+	struct logfs_inode *li;
+
+	li = kmem_cache_alloc(logfs_inode_cache, GFP_NOFS);
+	if (!li)
+		return NULL;
+	logfs_init_inode(sb, &li->vfs_inode);
+	return &li->vfs_inode;
+}
+
+/*
+ * In logfs inodes are written to an inode file.  The inode file, like any
+ * other file, is managed with a inode.  The inode file's inode, aka master
+ * inode, requires special handling in several respects.  First, it cannot be
+ * written to the inode file, so it is stored in the journal instead.
+ *
+ * Secondly, this inode cannot be written back and destroyed before all other
+ * inodes have been written.  The ordering is important.  Linux' VFS is happily
+ * unaware of the ordering constraint and would ordinarily destroy the master
+ * inode at umount time while other inodes are still in use and dirty.  Not
+ * good.
+ *
+ * So logfs makes sure the master inode is not written until all other inodes
+ * have been destroyed.  Sadly, this method has another side-effect.  The VFS
+ * will notice one remaining inode and print a frightening warning message.
+ * Worse, it is impossible to judge whether such a warning was caused by the
+ * master inode or any other inodes have leaked as well.
+ *
+ * Our attempt of solving this is with logfs_new_meta_inode() below.  Its
+ * purpose is to create a new inode that will not trigger the warning if such
+ * an inode is still in use.  An ugly hack, no doubt.  Suggections for
+ * improvement are welcome.
+ */
+struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino)
+{
+	struct inode *inode;
+
+	inode = logfs_alloc_inode(sb);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	inode->i_mode = S_IFREG;
+	inode->i_ino = ino;
+	inode->i_sb = sb;
+
+	/* This is a blatant copy of alloc_inode code.  We'd need alloc_inode
+	 * to be nonstatic, alas. */
+	{
+		struct address_space * const mapping = &inode->i_data;
+
+		mapping->a_ops = &logfs_reg_aops;
+		mapping->host = inode;
+		mapping->flags = 0;
+		mapping_set_gfp_mask(mapping, GFP_NOFS);
+		mapping->assoc_mapping = NULL;
+		mapping->backing_dev_info = &default_backing_dev_info;
+		inode->i_mapping = mapping;
+		inode->i_nlink = 1;
+	}
+
+	return inode;
+}
+
+struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino)
+{
+	struct inode *inode;
+	int err;
+
+	inode = logfs_new_meta_inode(sb, ino);
+	if (IS_ERR(inode))
+		return inode;
+
+	err = logfs_read_inode(inode);
+	if (err) {
+		destroy_meta_inode(inode);
+		return ERR_PTR(err);
+	}
+	logfs_inode_setops(inode);
+	return inode;
+}
+
+static int logfs_write_inode(struct inode *inode, int do_sync)
+{
+	int ret;
+	long flags = WF_LOCK;
+
+	/* Can only happen if creat() failed.  Safe to skip. */
+	if (logfs_inode(inode)->li_flags & LOGFS_IF_STILLBORN)
+		return 0;
+
+	ret = __logfs_write_inode(inode, flags);
+	LOGFS_BUG_ON(ret, inode->i_sb);
+	return ret;
+}
+
+void destroy_meta_inode(struct inode *inode)
+{
+	if (inode) {
+		if (inode->i_data.nrpages)
+			truncate_inode_pages(&inode->i_data, 0);
+		logfs_clear_inode(inode);
+		kmem_cache_free(logfs_inode_cache, logfs_inode(inode));
+	}
+}
+
+/* called with inode_lock held */
+static void logfs_drop_inode(struct inode *inode)
+{
+	struct logfs_super *super = logfs_super(inode->i_sb);
+	struct logfs_inode *li = logfs_inode(inode);
+
+	spin_lock(&logfs_inode_lock);
+	list_move(&li->li_freeing_list, &super->s_freeing_list);
+	spin_unlock(&logfs_inode_lock);
+	generic_drop_inode(inode);
+}
+
+static void logfs_set_ino_generation(struct super_block *sb,
+		struct inode *inode)
+{
+	struct logfs_super *super = logfs_super(sb);
+	u64 ino;
+
+	mutex_lock(&super->s_journal_mutex);
+	ino = logfs_seek_hole(super->s_master_inode, super->s_last_ino);
+	super->s_last_ino = ino;
+	super->s_inos_till_wrap--;
+	if (super->s_inos_till_wrap < 0) {
+		super->s_last_ino = LOGFS_RESERVED_INOS;
+		super->s_generation++;
+		super->s_inos_till_wrap = INOS_PER_WRAP;
+	}
+	inode->i_ino = ino;
+	inode->i_generation = super->s_generation;
+	mutex_unlock(&super->s_journal_mutex);
+}
+
+struct inode *logfs_new_inode(struct inode *dir, int mode)
+{
+	struct super_block *sb = dir->i_sb;
+	struct inode *inode;
+
+	inode = new_inode(sb);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	logfs_init_inode(sb, inode);
+
+	/* inherit parent flags */
+	logfs_inode(inode)->li_flags |=
+		logfs_inode(dir)->li_flags & LOGFS_FL_INHERITED;
+
+	inode->i_mode = mode;
+	logfs_set_ino_generation(sb, inode);
+
+	inode->i_uid = current_fsuid();
+	inode->i_gid = current_fsgid();
+	if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		if (S_ISDIR(mode))
+			inode->i_mode |= S_ISGID;
+	}
+
+	logfs_inode_setops(inode);
+	insert_inode_hash(inode);
+
+	return inode;
+}
+
+static void logfs_init_once(void *_li)
+{
+	struct logfs_inode *li = _li;
+	int i;
+
+	li->li_flags = 0;
+	li->li_used_bytes = 0;
+	li->li_refcount = 1;
+	for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
+		li->li_data[i] = 0;
+	inode_init_once(&li->vfs_inode);
+}
+
+static int logfs_sync_fs(struct super_block *sb, int wait)
+{
+	/* FIXME: write anchor */
+	logfs_super(sb)->s_devops->sync(sb);
+	return 0;
+}
+
+const struct super_operations logfs_super_operations = {
+	.alloc_inode	= logfs_alloc_inode,
+	.clear_inode	= logfs_clear_inode,
+	.delete_inode	= logfs_delete_inode,
+	.destroy_inode	= logfs_destroy_inode,
+	.drop_inode	= logfs_drop_inode,
+	.write_inode	= logfs_write_inode,
+	.statfs		= logfs_statfs,
+	.sync_fs	= logfs_sync_fs,
+};
+
+int logfs_init_inode_cache(void)
+{
+	logfs_inode_cache = kmem_cache_create("logfs_inode_cache",
+			sizeof(struct logfs_inode), 0, SLAB_RECLAIM_ACCOUNT,
+			logfs_init_once);
+	if (!logfs_inode_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void logfs_destroy_inode_cache(void)
+{
+	kmem_cache_destroy(logfs_inode_cache);
+}
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
new file mode 100644
index 000000000000..7a023dbba9f8
--- /dev/null
+++ b/fs/logfs/journal.c
@@ -0,0 +1,879 @@
+/*
+ * fs/logfs/journal.c	- journal handling code
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ */
+#include "logfs.h"
+
+static void logfs_calc_free(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	u64 reserve, no_segs = super->s_no_segs;
+	s64 free;
+	int i;
+
+	/* superblock segments */
+	no_segs -= 2;
+	super->s_no_journal_segs = 0;
+	/* journal */
+	journal_for_each(i)
+		if (super->s_journal_seg[i]) {
+			no_segs--;
+			super->s_no_journal_segs++;
+		}
+
+	/* open segments plus one extra per level for GC */
+	no_segs -= 2 * super->s_total_levels;
+
+	free = no_segs * (super->s_segsize - LOGFS_SEGMENT_RESERVE);
+	free -= super->s_used_bytes;
+	/* just a bit extra */
+	free -= super->s_total_levels * 4096;
+
+	/* Bad blocks are 'paid' for with speed reserve - the filesystem
+	 * simply gets slower as bad blocks accumulate.  Until the bad blocks
+	 * exceed the speed reserve - then the filesystem gets smaller.
+	 */
+	reserve = super->s_bad_segments + super->s_bad_seg_reserve;
+	reserve *= super->s_segsize - LOGFS_SEGMENT_RESERVE;
+	reserve = max(reserve, super->s_speed_reserve);
+	free -= reserve;
+	if (free < 0)
+		free = 0;
+
+	super->s_free_bytes = free;
+}
+
+static void reserve_sb_and_journal(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct btree_head32 *head = &super->s_reserved_segments;
+	int i, err;
+
+	err = btree_insert32(head, seg_no(sb, super->s_sb_ofs[0]), (void *)1,
+			GFP_KERNEL);
+	BUG_ON(err);
+
+	err = btree_insert32(head, seg_no(sb, super->s_sb_ofs[1]), (void *)1,
+			GFP_KERNEL);
+	BUG_ON(err);
+
+	journal_for_each(i) {
+		if (!super->s_journal_seg[i])
+			continue;
+		err = btree_insert32(head, super->s_journal_seg[i], (void *)1,
+				GFP_KERNEL);
+		BUG_ON(err);
+	}
+}
+
+static void read_dynsb(struct super_block *sb,
+		struct logfs_je_dynsb *dynsb)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	super->s_gec		= be64_to_cpu(dynsb->ds_gec);
+	super->s_sweeper	= be64_to_cpu(dynsb->ds_sweeper);
+	super->s_victim_ino	= be64_to_cpu(dynsb->ds_victim_ino);
+	super->s_rename_dir	= be64_to_cpu(dynsb->ds_rename_dir);
+	super->s_rename_pos	= be64_to_cpu(dynsb->ds_rename_pos);
+	super->s_used_bytes	= be64_to_cpu(dynsb->ds_used_bytes);
+	super->s_generation	= be32_to_cpu(dynsb->ds_generation);
+}
+
+static void read_anchor(struct super_block *sb,
+		struct logfs_je_anchor *da)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct inode *inode = super->s_master_inode;
+	struct logfs_inode *li = logfs_inode(inode);
+	int i;
+
+	super->s_last_ino = be64_to_cpu(da->da_last_ino);
+	li->li_flags	= 0;
+	li->li_height	= da->da_height;
+	i_size_write(inode, be64_to_cpu(da->da_size));
+	li->li_used_bytes = be64_to_cpu(da->da_used_bytes);
+
+	for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
+		li->li_data[i] = be64_to_cpu(da->da_data[i]);
+}
+
+static void read_erasecount(struct super_block *sb,
+		struct logfs_je_journal_ec *ec)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int i;
+
+	journal_for_each(i)
+		super->s_journal_ec[i] = be32_to_cpu(ec->ec[i]);
+}
+
+static int read_area(struct super_block *sb, struct logfs_je_area *a)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_area *area = super->s_area[a->gc_level];
+	u64 ofs;
+	u32 writemask = ~(super->s_writesize - 1);
+
+	if (a->gc_level >= LOGFS_NO_AREAS)
+		return -EIO;
+	if (a->vim != VIM_DEFAULT)
+		return -EIO; /* TODO: close area and continue */
+
+	area->a_used_bytes = be32_to_cpu(a->used_bytes);
+	area->a_written_bytes = area->a_used_bytes & writemask;
+	area->a_segno = be32_to_cpu(a->segno);
+	if (area->a_segno)
+		area->a_is_open = 1;
+
+	ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
+	if (super->s_writesize > 1)
+		logfs_buf_recover(area, ofs, a + 1, super->s_writesize);
+	else
+		logfs_buf_recover(area, ofs, NULL, 0);
+	return 0;
+}
+
+static void *unpack(void *from, void *to)
+{
+	struct logfs_journal_header *jh = from;
+	void *data = from + sizeof(struct logfs_journal_header);
+	int err;
+	size_t inlen, outlen;
+
+	inlen = be16_to_cpu(jh->h_len);
+	outlen = be16_to_cpu(jh->h_datalen);
+
+	if (jh->h_compr == COMPR_NONE)
+		memcpy(to, data, inlen);
+	else {
+		err = logfs_uncompress(data, to, inlen, outlen);
+		BUG_ON(err);
+	}
+	return to;
+}
+
+static int __read_je_header(struct super_block *sb, u64 ofs,
+		struct logfs_journal_header *jh)
+{
+	struct logfs_super *super = logfs_super(sb);
+	size_t bufsize = max_t(size_t, sb->s_blocksize, super->s_writesize)
+		+ MAX_JOURNAL_HEADER;
+	u16 type, len, datalen;
+	int err;
+
+	/* read header only */
+	err = wbuf_read(sb, ofs, sizeof(*jh), jh);
+	if (err)
+		return err;
+	type = be16_to_cpu(jh->h_type);
+	len = be16_to_cpu(jh->h_len);
+	datalen = be16_to_cpu(jh->h_datalen);
+	if (len > sb->s_blocksize)
+		return -EIO;
+	if ((type < JE_FIRST) || (type > JE_LAST))
+		return -EIO;
+	if (datalen > bufsize)
+		return -EIO;
+	return 0;
+}
+
+static int __read_je_payload(struct super_block *sb, u64 ofs,
+		struct logfs_journal_header *jh)
+{
+	u16 len;
+	int err;
+
+	len = be16_to_cpu(jh->h_len);
+	err = wbuf_read(sb, ofs + sizeof(*jh), len, jh + 1);
+	if (err)
+		return err;
+	if (jh->h_crc != logfs_crc32(jh, len + sizeof(*jh), 4)) {
+		/* Old code was confused.  It forgot about the header length
+		 * and stopped calculating the crc 16 bytes before the end
+		 * of data - ick!
+		 * FIXME: Remove this hack once the old code is fixed.
+		 */
+		if (jh->h_crc == logfs_crc32(jh, len, 4))
+			WARN_ON_ONCE(1);
+		else
+			return -EIO;
+	}
+	return 0;
+}
+
+/*
+ * jh needs to be large enough to hold the complete entry, not just the header
+ */
+static int __read_je(struct super_block *sb, u64 ofs,
+		struct logfs_journal_header *jh)
+{
+	int err;
+
+	err = __read_je_header(sb, ofs, jh);
+	if (err)
+		return err;
+	return __read_je_payload(sb, ofs, jh);
+}
+
+static int read_je(struct super_block *sb, u64 ofs)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_journal_header *jh = super->s_compressed_je;
+	void *scratch = super->s_je;
+	u16 type, datalen;
+	int err;
+
+	err = __read_je(sb, ofs, jh);
+	if (err)
+		return err;
+	type = be16_to_cpu(jh->h_type);
+	datalen = be16_to_cpu(jh->h_datalen);
+
+	switch (type) {
+	case JE_DYNSB:
+		read_dynsb(sb, unpack(jh, scratch));
+		break;
+	case JE_ANCHOR:
+		read_anchor(sb, unpack(jh, scratch));
+		break;
+	case JE_ERASECOUNT:
+		read_erasecount(sb, unpack(jh, scratch));
+		break;
+	case JE_AREA:
+		read_area(sb, unpack(jh, scratch));
+		break;
+	case JE_OBJ_ALIAS:
+		err = logfs_load_object_aliases(sb, unpack(jh, scratch),
+				datalen);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return -EIO;
+	}
+	return err;
+}
+
+static int logfs_read_segment(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_journal_header *jh = super->s_compressed_je;
+	u64 ofs, seg_ofs = dev_ofs(sb, segno, 0);
+	u32 h_ofs, last_ofs = 0;
+	u16 len, datalen, last_len;
+	int i, err;
+
+	/* search for most recent commit */
+	for (h_ofs = 0; h_ofs < super->s_segsize; h_ofs += sizeof(*jh)) {
+		ofs = seg_ofs + h_ofs;
+		err = __read_je_header(sb, ofs, jh);
+		if (err)
+			continue;
+		if (jh->h_type != cpu_to_be16(JE_COMMIT))
+			continue;
+		err = __read_je_payload(sb, ofs, jh);
+		if (err)
+			continue;
+		len = be16_to_cpu(jh->h_len);
+		datalen = be16_to_cpu(jh->h_datalen);
+		if ((datalen > sizeof(super->s_je_array)) ||
+				(datalen % sizeof(__be64)))
+			continue;
+		last_ofs = h_ofs;
+		last_len = datalen;
+		h_ofs += ALIGN(len, sizeof(*jh)) - sizeof(*jh);
+	}
+	/* read commit */
+	if (last_ofs == 0)
+		return -ENOENT;
+	ofs = seg_ofs + last_ofs;
+	log_journal("Read commit from %llx\n", ofs);
+	err = __read_je(sb, ofs, jh);
+	BUG_ON(err); /* We should have caught it in the scan loop already */
+	if (err)
+		return err;
+	/* uncompress */
+	unpack(jh, super->s_je_array);
+	super->s_no_je = last_len / sizeof(__be64);
+	/* iterate over array */
+	for (i = 0; i < super->s_no_je; i++) {
+		err = read_je(sb, be64_to_cpu(super->s_je_array[i]));
+		if (err)
+			return err;
+	}
+	super->s_journal_area->a_segno = segno;
+	return 0;
+}
+
+static u64 read_gec(struct super_block *sb, u32 segno)
+{
+	struct logfs_segment_header sh;
+	__be32 crc;
+	int err;
+
+	if (!segno)
+		return 0;
+	err = wbuf_read(sb, dev_ofs(sb, segno, 0), sizeof(sh), &sh);
+	if (err)
+		return 0;
+	crc = logfs_crc32(&sh, sizeof(sh), 4);
+	if (crc != sh.crc) {
+		WARN_ON(sh.gec != cpu_to_be64(0xffffffffffffffffull));
+		/* Most likely it was just erased */
+		return 0;
+	}
+	return be64_to_cpu(sh.gec);
+}
+
+static int logfs_read_journal(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	u64 gec[LOGFS_JOURNAL_SEGS], max;
+	u32 segno;
+	int i, max_i;
+
+	max = 0;
+	max_i = -1;
+	journal_for_each(i) {
+		segno = super->s_journal_seg[i];
+		gec[i] = read_gec(sb, super->s_journal_seg[i]);
+		if (gec[i] > max) {
+			max = gec[i];
+			max_i = i;
+		}
+	}
+	if (max_i == -1)
+		return -EIO;
+	/* FIXME: Try older segments in case of error */
+	return logfs_read_segment(sb, super->s_journal_seg[max_i]);
+}
+
+/*
+ * First search the current segment (outer loop), then pick the next segment
+ * in the array, skipping any zero entries (inner loop).
+ */
+static void journal_get_free_segment(struct logfs_area *area)
+{
+	struct logfs_super *super = logfs_super(area->a_sb);
+	int i;
+
+	journal_for_each(i) {
+		if (area->a_segno != super->s_journal_seg[i])
+			continue;
+
+		do {
+			i++;
+			if (i == LOGFS_JOURNAL_SEGS)
+				i = 0;
+		} while (!super->s_journal_seg[i]);
+
+		area->a_segno = super->s_journal_seg[i];
+		area->a_erase_count = ++(super->s_journal_ec[i]);
+		log_journal("Journal now at %x (ec %x)\n", area->a_segno,
+				area->a_erase_count);
+		return;
+	}
+	BUG();
+}
+
+static void journal_get_erase_count(struct logfs_area *area)
+{
+	/* erase count is stored globally and incremented in
+	 * journal_get_free_segment() - nothing to do here */
+}
+
+static int journal_erase_segment(struct logfs_area *area)
+{
+	struct super_block *sb = area->a_sb;
+	struct logfs_segment_header sh;
+	u64 ofs;
+	int err;
+
+	err = logfs_erase_segment(sb, area->a_segno);
+	if (err)
+		return err;
+
+	sh.pad = 0;
+	sh.type = SEG_JOURNAL;
+	sh.level = 0;
+	sh.segno = cpu_to_be32(area->a_segno);
+	sh.ec = cpu_to_be32(area->a_erase_count);
+	sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
+	sh.crc = logfs_crc32(&sh, sizeof(sh), 4);
+
+	/* This causes a bug in segment.c.  Not yet. */
+	//logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0);
+
+	ofs = dev_ofs(sb, area->a_segno, 0);
+	area->a_used_bytes = ALIGN(sizeof(sh), 16);
+	logfs_buf_write(area, ofs, &sh, sizeof(sh));
+	return 0;
+}
+
+static size_t __logfs_write_header(struct logfs_super *super,
+		struct logfs_journal_header *jh, size_t len, size_t datalen,
+		u16 type, u8 compr)
+{
+	jh->h_len	= cpu_to_be16(len);
+	jh->h_type	= cpu_to_be16(type);
+	jh->h_version	= cpu_to_be16(++super->s_last_version);
+	jh->h_datalen	= cpu_to_be16(datalen);
+	jh->h_compr	= compr;
+	jh->h_pad[0]	= 'H';
+	jh->h_pad[1]	= 'A';
+	jh->h_pad[2]	= 'T';
+	jh->h_crc	= logfs_crc32(jh, len + sizeof(*jh), 4);
+	return ALIGN(len, 16) + sizeof(*jh);
+}
+
+static size_t logfs_write_header(struct logfs_super *super,
+		struct logfs_journal_header *jh, size_t datalen, u16 type)
+{
+	size_t len = datalen;
+
+	return __logfs_write_header(super, jh, len, datalen, type, COMPR_NONE);
+}
+
+static inline size_t logfs_journal_erasecount_size(struct logfs_super *super)
+{
+	return LOGFS_JOURNAL_SEGS * sizeof(__be32);
+}
+
+static void *logfs_write_erasecount(struct super_block *sb, void *_ec,
+		u16 *type, size_t *len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_je_journal_ec *ec = _ec;
+	int i;
+
+	journal_for_each(i)
+		ec->ec[i] = cpu_to_be32(super->s_journal_ec[i]);
+	*type = JE_ERASECOUNT;
+	*len = logfs_journal_erasecount_size(super);
+	return ec;
+}
+
+static void account_shadow(void *_shadow, unsigned long _sb, u64 ignore,
+		size_t ignore2)
+{
+	struct logfs_shadow *shadow = _shadow;
+	struct super_block *sb = (void *)_sb;
+	struct logfs_super *super = logfs_super(sb);
+
+	/* consume new space */
+	super->s_free_bytes	  -= shadow->new_len;
+	super->s_used_bytes	  += shadow->new_len;
+	super->s_dirty_used_bytes -= shadow->new_len;
+
+	/* free up old space */
+	super->s_free_bytes	  += shadow->old_len;
+	super->s_used_bytes	  -= shadow->old_len;
+	super->s_dirty_free_bytes -= shadow->old_len;
+
+	logfs_set_segment_used(sb, shadow->old_ofs, -shadow->old_len);
+	logfs_set_segment_used(sb, shadow->new_ofs, shadow->new_len);
+
+	log_journal("account_shadow(%llx, %llx, %x) %llx->%llx %x->%x\n",
+			shadow->ino, shadow->bix, shadow->gc_level,
+			shadow->old_ofs, shadow->new_ofs,
+			shadow->old_len, shadow->new_len);
+	mempool_free(shadow, super->s_shadow_pool);
+}
+
+static void account_shadows(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct inode *inode = super->s_master_inode;
+	struct logfs_inode *li = logfs_inode(inode);
+	struct shadow_tree *tree = &super->s_shadow_tree;
+
+	btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow);
+	btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow);
+
+	if (li->li_block) {
+		/*
+		 * We never actually use the structure, when attached to the
+		 * master inode.  But it is easier to always free it here than
+		 * to have checks in several places elsewhere when allocating
+		 * it.
+		 */
+		li->li_block->ops->free_block(sb, li->li_block);
+	}
+	BUG_ON((s64)li->li_used_bytes < 0);
+}
+
+static void *__logfs_write_anchor(struct super_block *sb, void *_da,
+		u16 *type, size_t *len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_je_anchor *da = _da;
+	struct inode *inode = super->s_master_inode;
+	struct logfs_inode *li = logfs_inode(inode);
+	int i;
+
+	da->da_height	= li->li_height;
+	da->da_last_ino = cpu_to_be64(super->s_last_ino);
+	da->da_size	= cpu_to_be64(i_size_read(inode));
+	da->da_used_bytes = cpu_to_be64(li->li_used_bytes);
+	for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
+		da->da_data[i] = cpu_to_be64(li->li_data[i]);
+	*type = JE_ANCHOR;
+	*len = sizeof(*da);
+	return da;
+}
+
+static void *logfs_write_dynsb(struct super_block *sb, void *_dynsb,
+		u16 *type, size_t *len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_je_dynsb *dynsb = _dynsb;
+
+	dynsb->ds_gec		= cpu_to_be64(super->s_gec);
+	dynsb->ds_sweeper	= cpu_to_be64(super->s_sweeper);
+	dynsb->ds_victim_ino	= cpu_to_be64(super->s_victim_ino);
+	dynsb->ds_rename_dir	= cpu_to_be64(super->s_rename_dir);
+	dynsb->ds_rename_pos	= cpu_to_be64(super->s_rename_pos);
+	dynsb->ds_used_bytes	= cpu_to_be64(super->s_used_bytes);
+	dynsb->ds_generation	= cpu_to_be32(super->s_generation);
+	*type = JE_DYNSB;
+	*len = sizeof(*dynsb);
+	return dynsb;
+}
+
+static void write_wbuf(struct super_block *sb, struct logfs_area *area,
+		void *wbuf)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	u64 ofs;
+	pgoff_t index;
+	int page_ofs;
+	struct page *page;
+
+	ofs = dev_ofs(sb, area->a_segno,
+			area->a_used_bytes & ~(super->s_writesize - 1));
+	index = ofs >> PAGE_SHIFT;
+	page_ofs = ofs & (PAGE_SIZE - 1);
+
+	page = find_lock_page(mapping, index);
+	BUG_ON(!page);
+	memcpy(wbuf, page_address(page) + page_ofs, super->s_writesize);
+	unlock_page(page);
+}
+
+static void *logfs_write_area(struct super_block *sb, void *_a,
+		u16 *type, size_t *len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_area *area = super->s_area[super->s_sum_index];
+	struct logfs_je_area *a = _a;
+
+	a->vim = VIM_DEFAULT;
+	a->gc_level = super->s_sum_index;
+	a->used_bytes = cpu_to_be32(area->a_used_bytes);
+	a->segno = cpu_to_be32(area->a_segno);
+	if (super->s_writesize > 1)
+		write_wbuf(sb, area, a + 1);
+
+	*type = JE_AREA;
+	*len = sizeof(*a) + super->s_writesize;
+	return a;
+}
+
+static void *logfs_write_commit(struct super_block *sb, void *h,
+		u16 *type, size_t *len)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	*type = JE_COMMIT;
+	*len = super->s_no_je * sizeof(__be64);
+	return super->s_je_array;
+}
+
+static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type,
+		size_t len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	void *header = super->s_compressed_je;
+	void *data = header + sizeof(struct logfs_journal_header);
+	ssize_t compr_len, pad_len;
+	u8 compr = COMPR_ZLIB;
+
+	if (len == 0)
+		return logfs_write_header(super, header, 0, type);
+
+	compr_len = logfs_compress(buf, data, len, sb->s_blocksize);
+	if (compr_len < 0 || type == JE_ANCHOR) {
+		BUG_ON(len > sb->s_blocksize);
+		memcpy(data, buf, len);
+		compr_len = len;
+		compr = COMPR_NONE;
+	}
+
+	pad_len = ALIGN(compr_len, 16);
+	memset(data + compr_len, 0, pad_len - compr_len);
+
+	return __logfs_write_header(super, header, compr_len, len, type, compr);
+}
+
+static s64 logfs_get_free_bytes(struct logfs_area *area, size_t *bytes,
+		int must_pad)
+{
+	u32 writesize = logfs_super(area->a_sb)->s_writesize;
+	s32 ofs;
+	int ret;
+
+	ret = logfs_open_area(area, *bytes);
+	if (ret)
+		return -EAGAIN;
+
+	ofs = area->a_used_bytes;
+	area->a_used_bytes += *bytes;
+
+	if (must_pad) {
+		area->a_used_bytes = ALIGN(area->a_used_bytes, writesize);
+		*bytes = area->a_used_bytes - ofs;
+	}
+
+	return dev_ofs(area->a_sb, area->a_segno, ofs);
+}
+
+static int logfs_write_je_buf(struct super_block *sb, void *buf, u16 type,
+		size_t buf_len)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_area *area = super->s_journal_area;
+	struct logfs_journal_header *jh = super->s_compressed_je;
+	size_t len;
+	int must_pad = 0;
+	s64 ofs;
+
+	len = __logfs_write_je(sb, buf, type, buf_len);
+	if (jh->h_type == cpu_to_be16(JE_COMMIT))
+		must_pad = 1;
+
+	ofs = logfs_get_free_bytes(area, &len, must_pad);
+	if (ofs < 0)
+		return ofs;
+	logfs_buf_write(area, ofs, super->s_compressed_je, len);
+	super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs);
+	return 0;
+}
+
+static int logfs_write_je(struct super_block *sb,
+		void* (*write)(struct super_block *sb, void *scratch,
+			u16 *type, size_t *len))
+{
+	void *buf;
+	size_t len;
+	u16 type;
+
+	buf = write(sb, logfs_super(sb)->s_je, &type, &len);
+	return logfs_write_je_buf(sb, buf, type, len);
+}
+
+int write_alias_journal(struct super_block *sb, u64 ino, u64 bix,
+		level_t level, int child_no, __be64 val)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_obj_alias *oa = super->s_je;
+	int err = 0, fill = super->s_je_fill;
+
+	log_aliases("logfs_write_obj_aliases #%x(%llx, %llx, %x, %x) %llx\n",
+			fill, ino, bix, level, child_no, be64_to_cpu(val));
+	oa[fill].ino = cpu_to_be64(ino);
+	oa[fill].bix = cpu_to_be64(bix);
+	oa[fill].val = val;
+	oa[fill].level = (__force u8)level;
+	oa[fill].child_no = cpu_to_be16(child_no);
+	fill++;
+	if (fill >= sb->s_blocksize / sizeof(*oa)) {
+		err = logfs_write_je_buf(sb, oa, JE_OBJ_ALIAS, sb->s_blocksize);
+		fill = 0;
+	}
+
+	super->s_je_fill = fill;
+	return err;
+}
+
+static int logfs_write_obj_aliases(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int err;
+
+	log_journal("logfs_write_obj_aliases: %d aliases to write\n",
+			super->s_no_object_aliases);
+	super->s_je_fill = 0;
+	err = logfs_write_obj_aliases_pagecache(sb);
+	if (err)
+		return err;
+
+	if (super->s_je_fill)
+		err = logfs_write_je_buf(sb, super->s_je, JE_OBJ_ALIAS,
+				super->s_je_fill
+				* sizeof(struct logfs_obj_alias));
+	return err;
+}
+
+/*
+ * Write all journal entries.  The goto logic ensures that all journal entries
+ * are written whenever a new segment is used.  It is ugly and potentially a
+ * bit wasteful, but robustness is more important.  With this we can *always*
+ * erase all journal segments except the one containing the most recent commit.
+ */
+void logfs_write_anchor(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_area *area = super->s_journal_area;
+	int i, err;
+
+	BUG_ON(logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
+	mutex_lock(&super->s_journal_mutex);
+
+	/* Do this first or suffer corruption */
+	logfs_sync_segments(sb);
+	account_shadows(sb);
+
+again:
+	super->s_no_je = 0;
+	for_each_area(i) {
+		if (!super->s_area[i]->a_is_open)
+			continue;
+		super->s_sum_index = i;
+		err = logfs_write_je(sb, logfs_write_area);
+		if (err)
+			goto again;
+	}
+	err = logfs_write_obj_aliases(sb);
+	if (err)
+		goto again;
+	err = logfs_write_je(sb, logfs_write_erasecount);
+	if (err)
+		goto again;
+	err = logfs_write_je(sb, __logfs_write_anchor);
+	if (err)
+		goto again;
+	err = logfs_write_je(sb, logfs_write_dynsb);
+	if (err)
+		goto again;
+	/*
+	 * Order is imperative.  First we sync all writes, including the
+	 * non-committed journal writes.  Then we write the final commit and
+	 * sync the current journal segment.
+	 * There is a theoretical bug here.  Syncing the journal segment will
+	 * write a number of journal entries and the final commit.  All these
+	 * are written in a single operation.  If the device layer writes the
+	 * data back-to-front, the commit will precede the other journal
+	 * entries, leaving a race window.
+	 * Two fixes are possible.  Preferred is to fix the device layer to
+	 * ensure writes happen front-to-back.  Alternatively we can insert
+	 * another logfs_sync_area() super->s_devops->sync() combo before
+	 * writing the commit.
+	 */
+	/*
+	 * On another subject, super->s_devops->sync is usually not necessary.
+	 * Unless called from sys_sync or friends, a barrier would suffice.
+	 */
+	super->s_devops->sync(sb);
+	err = logfs_write_je(sb, logfs_write_commit);
+	if (err)
+		goto again;
+	log_journal("Write commit to %llx\n",
+			be64_to_cpu(super->s_je_array[super->s_no_je - 1]));
+	logfs_sync_area(area);
+	BUG_ON(area->a_used_bytes != area->a_written_bytes);
+	super->s_devops->sync(sb);
+
+	mutex_unlock(&super->s_journal_mutex);
+	return;
+}
+
+void do_logfs_journal_wl_pass(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_area *area = super->s_journal_area;
+	u32 segno, ec;
+	int i, err;
+
+	log_journal("Journal requires wear-leveling.\n");
+	/* Drop old segments */
+	journal_for_each(i)
+		if (super->s_journal_seg[i]) {
+			logfs_set_segment_unreserved(sb,
+					super->s_journal_seg[i],
+					super->s_journal_ec[i]);
+			super->s_journal_seg[i] = 0;
+			super->s_journal_ec[i] = 0;
+		}
+	/* Get new segments */
+	for (i = 0; i < super->s_no_journal_segs; i++) {
+		segno = get_best_cand(sb, &super->s_reserve_list, &ec);
+		super->s_journal_seg[i] = segno;
+		super->s_journal_ec[i] = ec;
+		logfs_set_segment_reserved(sb, segno);
+	}
+	/* Manually move journal_area */
+	area->a_segno = super->s_journal_seg[0];
+	area->a_is_open = 0;
+	area->a_used_bytes = 0;
+	/* Write journal */
+	logfs_write_anchor(super->s_master_inode);
+	/* Write superblocks */
+	err = logfs_write_sb(sb);
+	BUG_ON(err);
+}
+
+static const struct logfs_area_ops journal_area_ops = {
+	.get_free_segment	= journal_get_free_segment,
+	.get_erase_count	= journal_get_erase_count,
+	.erase_segment		= journal_erase_segment,
+};
+
+int logfs_init_journal(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	size_t bufsize = max_t(size_t, sb->s_blocksize, super->s_writesize)
+		+ MAX_JOURNAL_HEADER;
+	int ret = -ENOMEM;
+
+	mutex_init(&super->s_journal_mutex);
+	btree_init_mempool32(&super->s_reserved_segments, super->s_btree_pool);
+
+	super->s_je = kzalloc(bufsize, GFP_KERNEL);
+	if (!super->s_je)
+		return ret;
+
+	super->s_compressed_je = kzalloc(bufsize, GFP_KERNEL);
+	if (!super->s_compressed_je)
+		return ret;
+
+	super->s_master_inode = logfs_new_meta_inode(sb, LOGFS_INO_MASTER);
+	if (IS_ERR(super->s_master_inode))
+		return PTR_ERR(super->s_master_inode);
+
+	ret = logfs_read_journal(sb);
+	if (ret)
+		return -EIO;
+
+	reserve_sb_and_journal(sb);
+	logfs_calc_free(sb);
+
+	super->s_journal_area->a_ops = &journal_area_ops;
+	return 0;
+}
+
+void logfs_cleanup_journal(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	btree_grim_visitor32(&super->s_reserved_segments, 0, NULL);
+	destroy_meta_inode(super->s_master_inode);
+	super->s_master_inode = NULL;
+
+	kfree(super->s_compressed_je);
+	kfree(super->s_je);
+}
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
new file mode 100644
index 000000000000..e3082abe9e3b
--- /dev/null
+++ b/fs/logfs/logfs.h
@@ -0,0 +1,722 @@
+/*
+ * fs/logfs/logfs.h
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ *
+ * Private header for logfs.
+ */
+#ifndef FS_LOGFS_LOGFS_H
+#define FS_LOGFS_LOGFS_H
+
+#undef __CHECK_ENDIAN__
+#define __CHECK_ENDIAN__
+
+#include <linux/btree.h>
+#include <linux/crc32.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/mempool.h>
+#include <linux/pagemap.h>
+#include <linux/mtd/mtd.h>
+#include "logfs_abi.h"
+
+#define LOGFS_DEBUG_SUPER	(0x0001)
+#define LOGFS_DEBUG_SEGMENT	(0x0002)
+#define LOGFS_DEBUG_JOURNAL	(0x0004)
+#define LOGFS_DEBUG_DIR		(0x0008)
+#define LOGFS_DEBUG_FILE	(0x0010)
+#define LOGFS_DEBUG_INODE	(0x0020)
+#define LOGFS_DEBUG_READWRITE	(0x0040)
+#define LOGFS_DEBUG_GC		(0x0080)
+#define LOGFS_DEBUG_GC_NOISY	(0x0100)
+#define LOGFS_DEBUG_ALIASES	(0x0200)
+#define LOGFS_DEBUG_BLOCKMOVE	(0x0400)
+#define LOGFS_DEBUG_ALL		(0xffffffff)
+
+#define LOGFS_DEBUG		(0x01)
+/*
+ * To enable specific log messages, simply define LOGFS_DEBUG to match any
+ * or all of the above.
+ */
+#ifndef LOGFS_DEBUG
+#define LOGFS_DEBUG		(0)
+#endif
+
+#define log_cond(cond, fmt, arg...) do {	\
+	if (cond)				\
+		printk(KERN_DEBUG fmt, ##arg);	\
+} while (0)
+
+#define log_super(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_SUPER, fmt, ##arg)
+#define log_segment(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_SEGMENT, fmt, ##arg)
+#define log_journal(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_JOURNAL, fmt, ##arg)
+#define log_dir(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_DIR, fmt, ##arg)
+#define log_file(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_FILE, fmt, ##arg)
+#define log_inode(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_INODE, fmt, ##arg)
+#define log_readwrite(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_READWRITE, fmt, ##arg)
+#define log_gc(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_GC, fmt, ##arg)
+#define log_gc_noisy(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_GC_NOISY, fmt, ##arg)
+#define log_aliases(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_ALIASES, fmt, ##arg)
+#define log_blockmove(fmt, arg...) \
+	log_cond(LOGFS_DEBUG & LOGFS_DEBUG_BLOCKMOVE, fmt, ##arg)
+
+#define PG_pre_locked		PG_owner_priv_1
+#define PagePreLocked(page)	test_bit(PG_pre_locked, &(page)->flags)
+#define SetPagePreLocked(page)	set_bit(PG_pre_locked, &(page)->flags)
+#define ClearPagePreLocked(page) clear_bit(PG_pre_locked, &(page)->flags)
+
+/* FIXME: This should really be somewhere in the 64bit area. */
+#define LOGFS_LINK_MAX		(1<<30)
+
+/* Read-only filesystem */
+#define LOGFS_SB_FLAG_RO	0x0001
+#define LOGFS_SB_FLAG_SEG_ALIAS	0x0002
+#define LOGFS_SB_FLAG_OBJ_ALIAS	0x0004
+#define LOGFS_SB_FLAG_SHUTDOWN	0x0008
+
+/* Write Control Flags */
+#define WF_LOCK			0x01 /* take write lock */
+#define WF_WRITE		0x02 /* write block */
+#define WF_DELETE		0x04 /* delete old block */
+
+typedef u8 __bitwise level_t;
+typedef u8 __bitwise gc_level_t;
+
+#define LEVEL(level) ((__force level_t)(level))
+#define GC_LEVEL(gc_level) ((__force gc_level_t)(gc_level))
+
+#define SUBLEVEL(level) ( (void)((level) == LEVEL(1)),	\
+		(__force level_t)((__force u8)(level) - 1) )
+
+/**
+ * struct logfs_area - area management information
+ *
+ * @a_sb:			the superblock this area belongs to
+ * @a_is_open:			1 if the area is currently open, else 0
+ * @a_segno:			segment number of area
+ * @a_written_bytes:		number of bytes already written back
+ * @a_used_bytes:		number of used bytes
+ * @a_ops:			area operations (either journal or ostore)
+ * @a_erase_count:		erase count
+ * @a_level:			GC level
+ */
+struct logfs_area { /* a segment open for writing */
+	struct super_block *a_sb;
+	int	a_is_open;
+	u32	a_segno;
+	u32	a_written_bytes;
+	u32	a_used_bytes;
+	const struct logfs_area_ops *a_ops;
+	u32	a_erase_count;
+	gc_level_t a_level;
+};
+
+/**
+ * struct logfs_area_ops - area operations
+ *
+ * @get_free_segment:		fill area->ofs with the offset of a free segment
+ * @get_erase_count:		fill area->erase_count (needs area->ofs)
+ * @erase_segment:		erase and setup segment
+ */
+struct logfs_area_ops {
+	void	(*get_free_segment)(struct logfs_area *area);
+	void	(*get_erase_count)(struct logfs_area *area);
+	int	(*erase_segment)(struct logfs_area *area);
+};
+
+/**
+ * struct logfs_device_ops - device access operations
+ *
+ * @readpage:			read one page (mm page)
+ * @writeseg:			write one segment.  may be a partial segment
+ * @erase:			erase one segment
+ * @read:			read from the device
+ * @erase:			erase part of the device
+ */
+struct logfs_device_ops {
+	struct page *(*find_first_sb)(struct super_block *sb, u64 *ofs);
+	struct page *(*find_last_sb)(struct super_block *sb, u64 *ofs);
+	int (*write_sb)(struct super_block *sb, struct page *page);
+	int (*readpage)(void *_sb, struct page *page);
+	void (*writeseg)(struct super_block *sb, u64 ofs, size_t len);
+	int (*erase)(struct super_block *sb, loff_t ofs, size_t len);
+	void (*sync)(struct super_block *sb);
+	void (*put_device)(struct super_block *sb);
+};
+
+/**
+ * struct candidate_list - list of similar candidates
+ */
+struct candidate_list {
+	struct rb_root rb_tree;
+	int count;
+	int maxcount;
+	int sort_by_ec;
+};
+
+/**
+ * struct gc_candidate - "candidate" segment to be garbage collected next
+ *
+ * @list:			list (either free of low)
+ * @segno:			segment number
+ * @valid:			number of valid bytes
+ * @erase_count:		erase count of segment
+ * @dist:			distance from tree root
+ *
+ * Candidates can be on two lists.  The free list contains electees rather
+ * than candidates - segments that no longer contain any valid data.  The
+ * low list contains candidates to be picked for GC.  It should be kept
+ * short.  It is not required to always pick a perfect candidate.  In the
+ * worst case GC will have to move more data than absolutely necessary.
+ */
+struct gc_candidate {
+	struct rb_node rb_node;
+	struct candidate_list *list;
+	u32	segno;
+	u32	valid;
+	u32	erase_count;
+	u8	dist;
+};
+
+/**
+ * struct logfs_journal_entry - temporary structure used during journal scan
+ *
+ * @used:
+ * @version:			normalized version
+ * @len:			length
+ * @offset:			offset
+ */
+struct logfs_journal_entry {
+	int used;
+	s16 version;
+	u16 len;
+	u16 datalen;
+	u64 offset;
+};
+
+enum transaction_state {
+	CREATE_1 = 1,
+	CREATE_2,
+	UNLINK_1,
+	UNLINK_2,
+	CROSS_RENAME_1,
+	CROSS_RENAME_2,
+	TARGET_RENAME_1,
+	TARGET_RENAME_2,
+	TARGET_RENAME_3
+};
+
+/**
+ * struct logfs_transaction - essential fields to support atomic dirops
+ *
+ * @ino:			target inode
+ * @dir:			inode of directory containing dentry
+ * @pos:			pos of dentry in directory
+ */
+struct logfs_transaction {
+	enum transaction_state state;
+	u64	 ino;
+	u64	 dir;
+	u64	 pos;
+};
+
+/**
+ * struct logfs_shadow - old block in the shadow of a not-yet-committed new one
+ * @old_ofs:			offset of old block on medium
+ * @new_ofs:			offset of new block on medium
+ * @ino:			inode number
+ * @bix:			block index
+ * @old_len:			size of old block, including header
+ * @new_len:			size of new block, including header
+ * @level:			block level
+ */
+struct logfs_shadow {
+	u64 old_ofs;
+	u64 new_ofs;
+	u64 ino;
+	u64 bix;
+	int old_len;
+	int new_len;
+	gc_level_t gc_level;
+};
+
+/**
+ * struct shadow_tree
+ * @new:			shadows where old_ofs==0, indexed by new_ofs
+ * @old:			shadows where old_ofs!=0, indexed by old_ofs
+ */
+struct shadow_tree {
+	struct btree_head64 new;
+	struct btree_head64 old;
+};
+
+struct object_alias_item {
+	struct list_head list;
+	__be64 val;
+	int child_no;
+};
+
+/**
+ * struct logfs_block - contains any block state
+ * @type:			indirect block or inode
+ * @full:			number of fully populated children
+ * @partial:			number of partially populated children
+ *
+ * Most blocks are directly represented by page cache pages.  But when a block
+ * becomes dirty, is part of a transaction, contains aliases or is otherwise
+ * special, a struct logfs_block is allocated to track the additional state.
+ * Inodes are very similar to indirect blocks, so they can also get one of
+ * these structures added when appropriate.
+ */
+#define BLOCK_INDIRECT	1	/* Indirect block */
+#define BLOCK_INODE	2	/* Inode */
+struct logfs_block_ops;
+struct logfs_block {
+	struct list_head alias_list;
+	struct list_head item_list;
+	struct super_block *sb;
+	u64 ino;
+	u64 bix;
+	level_t level;
+	struct page *page;
+	struct inode *inode;
+	struct logfs_transaction *ta;
+	unsigned long alias_map[LOGFS_BLOCK_FACTOR / BITS_PER_LONG];
+	struct logfs_block_ops *ops;
+	int full;
+	int partial;
+	int reserved_bytes;
+};
+
+typedef int write_alias_t(struct super_block *sb, u64 ino, u64 bix,
+		level_t level, int child_no, __be64 val);
+struct logfs_block_ops {
+	void	(*write_block)(struct logfs_block *block);
+	gc_level_t	(*block_level)(struct logfs_block *block);
+	void	(*free_block)(struct super_block *sb, struct logfs_block*block);
+	int	(*write_alias)(struct super_block *sb,
+			struct logfs_block *block,
+			write_alias_t *write_one_alias);
+};
+
+struct logfs_super {
+	struct mtd_info *s_mtd;			/* underlying device */
+	struct block_device *s_bdev;		/* underlying device */
+	const struct logfs_device_ops *s_devops;/* device access */
+	struct inode	*s_master_inode;	/* inode file */
+	struct inode	*s_segfile_inode;	/* segment file */
+	struct inode *s_mapping_inode;		/* device mapping */
+	atomic_t s_pending_writes;		/* outstanting bios */
+	long	 s_flags;
+	mempool_t *s_btree_pool;		/* for btree nodes */
+	mempool_t *s_alias_pool;		/* aliases in segment.c */
+	u64	 s_feature_incompat;
+	u64	 s_feature_ro_compat;
+	u64	 s_feature_compat;
+	u64	 s_feature_flags;
+	u64	 s_sb_ofs[2];
+	/* alias.c fields */
+	struct btree_head32 s_segment_alias;	/* remapped segments */
+	int	 s_no_object_aliases;
+	struct list_head s_object_alias;	/* remapped objects */
+	struct btree_head128 s_object_alias_tree; /* remapped objects */
+	struct mutex s_object_alias_mutex;
+	/* dir.c fields */
+	struct mutex s_dirop_mutex;		/* for creat/unlink/rename */
+	u64	 s_victim_ino;			/* used for atomic dir-ops */
+	u64	 s_rename_dir;			/* source directory ino */
+	u64	 s_rename_pos;			/* position of source dd */
+	/* gc.c fields */
+	long	 s_segsize;			/* size of a segment */
+	int	 s_segshift;			/* log2 of segment size */
+	long	 s_segmask;			/* 1 << s_segshift - 1 */
+	long	 s_no_segs;			/* segments on device */
+	long	 s_no_journal_segs;		/* segments used for journal */
+	long	 s_no_blocks;			/* blocks per segment */
+	long	 s_writesize;			/* minimum write size */
+	int	 s_writeshift;			/* log2 of write size */
+	u64	 s_size;			/* filesystem size */
+	struct logfs_area *s_area[LOGFS_NO_AREAS];	/* open segment array */
+	u64	 s_gec;				/* global erase count */
+	u64	 s_wl_gec_ostore;		/* time of last wl event */
+	u64	 s_wl_gec_journal;		/* time of last wl event */
+	u64	 s_sweeper;			/* current sweeper pos */
+	u8	 s_ifile_levels;		/* max level of ifile */
+	u8	 s_iblock_levels;		/* max level of regular files */
+	u8	 s_data_levels;			/* # of segments to leaf block*/
+	u8	 s_total_levels;		/* sum of above three */
+	struct btree_head32 s_cand_tree;	/* all candidates */
+	struct candidate_list s_free_list;	/* 100% free segments */
+	struct candidate_list s_reserve_list;	/* Bad segment reserve */
+	struct candidate_list s_low_list[LOGFS_NO_AREAS];/* good candidates */
+	struct candidate_list s_ec_list;	/* wear level candidates */
+	struct btree_head32 s_reserved_segments;/* sb, journal, bad, etc. */
+	/* inode.c fields */
+	u64	 s_last_ino;			/* highest ino used */
+	long	 s_inos_till_wrap;
+	u32	 s_generation;			/* i_generation for new files */
+	struct list_head s_freeing_list;	/* inodes being freed */
+	/* journal.c fields */
+	struct mutex s_journal_mutex;
+	void	*s_je;				/* journal entry to compress */
+	void	*s_compressed_je;		/* block to write to journal */
+	u32	 s_journal_seg[LOGFS_JOURNAL_SEGS]; /* journal segments */
+	u32	 s_journal_ec[LOGFS_JOURNAL_SEGS]; /* journal erasecounts */
+	u64	 s_last_version;
+	struct logfs_area *s_journal_area;	/* open journal segment */
+	__be64	s_je_array[64];
+	int	s_no_je;
+
+	int	 s_sum_index;			/* for the 12 summaries */
+	struct shadow_tree s_shadow_tree;
+	int	 s_je_fill;			/* index of current je */
+	/* readwrite.c fields */
+	struct mutex s_write_mutex;
+	int	 s_lock_count;
+	mempool_t *s_block_pool;		/* struct logfs_block pool */
+	mempool_t *s_shadow_pool;		/* struct logfs_shadow pool */
+	/*
+	 * Space accounting:
+	 * - s_used_bytes specifies space used to store valid data objects.
+	 * - s_dirty_used_bytes is space used to store non-committed data
+	 *   objects.  Those objects have already been written themselves,
+	 *   but they don't become valid until all indirect blocks up to the
+	 *   journal have been written as well.
+	 * - s_dirty_free_bytes is space used to store the old copy of a
+	 *   replaced object, as long as the replacement is non-committed.
+	 *   In other words, it is the amount of space freed when all dirty
+	 *   blocks are written back.
+	 * - s_free_bytes is the amount of free space available for any
+	 *   purpose.
+	 * - s_root_reserve is the amount of free space available only to
+	 *   the root user.  Non-privileged users can no longer write once
+	 *   this watermark has been reached.
+	 * - s_speed_reserve is space which remains unused to speed up
+	 *   garbage collection performance.
+	 * - s_dirty_pages is the space reserved for currently dirty pages.
+	 *   It is a pessimistic estimate, so some/most will get freed on
+	 *   page writeback.
+	 *
+	 * s_used_bytes + s_free_bytes + s_speed_reserve = total usable size
+	 */
+	u64	 s_free_bytes;
+	u64	 s_used_bytes;
+	u64	 s_dirty_free_bytes;
+	u64	 s_dirty_used_bytes;
+	u64	 s_root_reserve;
+	u64	 s_speed_reserve;
+	u64	 s_dirty_pages;
+	/* Bad block handling:
+	 * - s_bad_seg_reserve is a number of segments usually kept
+	 *   free.  When encountering bad blocks, the affected segment's data
+	 *   is _temporarily_ moved to a reserved segment.
+	 * - s_bad_segments is the number of known bad segments.
+	 */
+	u32	 s_bad_seg_reserve;
+	u32	 s_bad_segments;
+};
+
+/**
+ * struct logfs_inode - in-memory inode
+ *
+ * @vfs_inode:			struct inode
+ * @li_data:			data pointers
+ * @li_used_bytes:		number of used bytes
+ * @li_freeing_list:		used to track inodes currently being freed
+ * @li_flags:			inode flags
+ * @li_refcount:		number of internal (GC-induced) references
+ */
+struct logfs_inode {
+	struct inode vfs_inode;
+	u64	li_data[LOGFS_EMBEDDED_FIELDS];
+	u64	li_used_bytes;
+	struct list_head li_freeing_list;
+	struct logfs_block *li_block;
+	u32	li_flags;
+	u8	li_height;
+	int	li_refcount;
+};
+
+#define journal_for_each(__i) for (__i = 0; __i < LOGFS_JOURNAL_SEGS; __i++)
+#define for_each_area(__i) for (__i = 0; __i < LOGFS_NO_AREAS; __i++)
+#define for_each_area_down(__i) for (__i = LOGFS_NO_AREAS - 1; __i >= 0; __i--)
+
+/* compr.c */
+int logfs_compress(void *in, void *out, size_t inlen, size_t outlen);
+int logfs_uncompress(void *in, void *out, size_t inlen, size_t outlen);
+int __init logfs_compr_init(void);
+void logfs_compr_exit(void);
+
+/* dev_bdev.c */
+#ifdef CONFIG_BLOCK
+int logfs_get_sb_bdev(struct file_system_type *type, int flags,
+		const char *devname, struct vfsmount *mnt);
+#else
+static inline int logfs_get_sb_bdev(struct file_system_type *type, int flags,
+		const char *devname, struct vfsmount *mnt)
+{
+	return -ENODEV;
+}
+#endif
+
+/* dev_mtd.c */
+#ifdef CONFIG_MTD
+int logfs_get_sb_mtd(struct file_system_type *type, int flags,
+		int mtdnr, struct vfsmount *mnt);
+#else
+static inline int logfs_get_sb_mtd(struct file_system_type *type, int flags,
+		int mtdnr, struct vfsmount *mnt)
+{
+	return -ENODEV;
+}
+#endif
+
+/* dir.c */
+extern const struct inode_operations logfs_symlink_iops;
+extern const struct inode_operations logfs_dir_iops;
+extern const struct file_operations logfs_dir_fops;
+int logfs_replay_journal(struct super_block *sb);
+
+/* file.c */
+extern const struct inode_operations logfs_reg_iops;
+extern const struct file_operations logfs_reg_fops;
+extern const struct address_space_operations logfs_reg_aops;
+int logfs_readpage(struct file *file, struct page *page);
+int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+		unsigned long arg);
+int logfs_fsync(struct file *file, struct dentry *dentry, int datasync);
+
+/* gc.c */
+u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec);
+void logfs_gc_pass(struct super_block *sb);
+int logfs_check_areas(struct super_block *sb);
+int logfs_init_gc(struct super_block *sb);
+void logfs_cleanup_gc(struct super_block *sb);
+
+/* inode.c */
+extern const struct super_operations logfs_super_operations;
+struct inode *logfs_iget(struct super_block *sb, ino_t ino);
+struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *cookie);
+void logfs_safe_iput(struct inode *inode, int cookie);
+struct inode *logfs_new_inode(struct inode *dir, int mode);
+struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino);
+struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino);
+int logfs_init_inode_cache(void);
+void logfs_destroy_inode_cache(void);
+void destroy_meta_inode(struct inode *inode);
+void logfs_set_blocks(struct inode *inode, u64 no);
+/* these logically belong into inode.c but actually reside in readwrite.c */
+int logfs_read_inode(struct inode *inode);
+int __logfs_write_inode(struct inode *inode, long flags);
+void logfs_delete_inode(struct inode *inode);
+void logfs_clear_inode(struct inode *inode);
+
+/* journal.c */
+void logfs_write_anchor(struct inode *inode);
+int logfs_init_journal(struct super_block *sb);
+void logfs_cleanup_journal(struct super_block *sb);
+int write_alias_journal(struct super_block *sb, u64 ino, u64 bix,
+		level_t level, int child_no, __be64 val);
+void do_logfs_journal_wl_pass(struct super_block *sb);
+
+/* readwrite.c */
+pgoff_t logfs_pack_index(u64 bix, level_t level);
+void logfs_unpack_index(pgoff_t index, u64 *bix, level_t *level);
+int logfs_inode_write(struct inode *inode, const void *buf, size_t count,
+		loff_t bix, long flags, struct shadow_tree *shadow_tree);
+int logfs_readpage_nolock(struct page *page);
+int logfs_write_buf(struct inode *inode, struct page *page, long flags);
+int logfs_delete(struct inode *inode, pgoff_t index,
+		struct shadow_tree *shadow_tree);
+int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
+		gc_level_t gc_level, long flags);
+int logfs_is_valid_block(struct super_block *sb, u64 ofs, u64 ino, u64 bix,
+		gc_level_t gc_level);
+int logfs_truncate(struct inode *inode, u64 size);
+u64 logfs_seek_hole(struct inode *inode, u64 bix);
+u64 logfs_seek_data(struct inode *inode, u64 bix);
+int logfs_open_segfile(struct super_block *sb);
+int logfs_init_rw(struct super_block *sb);
+void logfs_cleanup_rw(struct super_block *sb);
+void logfs_add_transaction(struct inode *inode, struct logfs_transaction *ta);
+void logfs_del_transaction(struct inode *inode, struct logfs_transaction *ta);
+void logfs_write_block(struct logfs_block *block, long flags);
+int logfs_write_obj_aliases_pagecache(struct super_block *sb);
+void logfs_get_segment_entry(struct super_block *sb, u32 segno,
+		struct logfs_segment_entry *se);
+void logfs_set_segment_used(struct super_block *sb, u64 ofs, int increment);
+void logfs_set_segment_erased(struct super_block *sb, u32 segno, u32 ec,
+		gc_level_t gc_level);
+void logfs_set_segment_reserved(struct super_block *sb, u32 segno);
+void logfs_set_segment_unreserved(struct super_block *sb, u32 segno, u32 ec);
+struct logfs_block *__alloc_block(struct super_block *sb,
+		u64 ino, u64 bix, level_t level);
+void __free_block(struct super_block *sb, struct logfs_block *block);
+void btree_write_block(struct logfs_block *block);
+void initialize_block_counters(struct page *page, struct logfs_block *block,
+		__be64 *array, int page_is_empty);
+int logfs_exist_block(struct inode *inode, u64 bix);
+int get_page_reserve(struct inode *inode, struct page *page);
+extern struct logfs_block_ops indirect_block_ops;
+
+/* segment.c */
+int logfs_erase_segment(struct super_block *sb, u32 ofs);
+int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf);
+int logfs_segment_read(struct inode *inode, struct page *page, u64 ofs, u64 bix,
+		level_t level);
+int logfs_segment_write(struct inode *inode, struct page *page,
+		struct logfs_shadow *shadow);
+int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow);
+int logfs_load_object_aliases(struct super_block *sb,
+		struct logfs_obj_alias *oa, int count);
+void move_page_to_btree(struct page *page);
+int logfs_init_mapping(struct super_block *sb);
+void logfs_sync_area(struct logfs_area *area);
+void logfs_sync_segments(struct super_block *sb);
+
+/* area handling */
+int logfs_init_areas(struct super_block *sb);
+void logfs_cleanup_areas(struct super_block *sb);
+int logfs_open_area(struct logfs_area *area, size_t bytes);
+void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
+		int use_filler);
+
+static inline void logfs_buf_write(struct logfs_area *area, u64 ofs,
+		void *buf, size_t len)
+{
+	__logfs_buf_write(area, ofs, buf, len, 0);
+}
+
+static inline void logfs_buf_recover(struct logfs_area *area, u64 ofs,
+		void *buf, size_t len)
+{
+	__logfs_buf_write(area, ofs, buf, len, 1);
+}
+
+/* super.c */
+struct page *emergency_read_begin(struct address_space *mapping, pgoff_t index);
+void emergency_read_end(struct page *page);
+void logfs_crash_dump(struct super_block *sb);
+void *memchr_inv(const void *s, int c, size_t n);
+int logfs_statfs(struct dentry *dentry, struct kstatfs *stats);
+int logfs_get_sb_device(struct file_system_type *type, int flags,
+		struct mtd_info *mtd, struct block_device *bdev,
+		const struct logfs_device_ops *devops, struct vfsmount *mnt);
+int logfs_check_ds(struct logfs_disk_super *ds);
+int logfs_write_sb(struct super_block *sb);
+
+static inline struct logfs_super *logfs_super(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+
+static inline struct logfs_inode *logfs_inode(struct inode *inode)
+{
+	return container_of(inode, struct logfs_inode, vfs_inode);
+}
+
+static inline void logfs_set_ro(struct super_block *sb)
+{
+	logfs_super(sb)->s_flags |= LOGFS_SB_FLAG_RO;
+}
+
+#define LOGFS_BUG(sb) do {					\
+	struct super_block *__sb = sb;				\
+	logfs_crash_dump(__sb);					\
+	logfs_super(__sb)->s_flags |= LOGFS_SB_FLAG_RO;		\
+	BUG();							\
+} while (0)
+
+#define LOGFS_BUG_ON(condition, sb) \
+	do { if (unlikely(condition)) LOGFS_BUG((sb)); } while (0)
+
+static inline __be32 logfs_crc32(void *data, size_t len, size_t skip)
+{
+	return cpu_to_be32(crc32(~0, data+skip, len-skip));
+}
+
+static inline u8 logfs_type(struct inode *inode)
+{
+	return (inode->i_mode >> 12) & 15;
+}
+
+static inline pgoff_t logfs_index(struct super_block *sb, u64 pos)
+{
+	return pos >> sb->s_blocksize_bits;
+}
+
+static inline u64 dev_ofs(struct super_block *sb, u32 segno, u32 ofs)
+{
+	return ((u64)segno << logfs_super(sb)->s_segshift) + ofs;
+}
+
+static inline u32 seg_no(struct super_block *sb, u64 ofs)
+{
+	return ofs >> logfs_super(sb)->s_segshift;
+}
+
+static inline u32 seg_ofs(struct super_block *sb, u64 ofs)
+{
+	return ofs & logfs_super(sb)->s_segmask;
+}
+
+static inline u64 seg_align(struct super_block *sb, u64 ofs)
+{
+	return ofs & ~logfs_super(sb)->s_segmask;
+}
+
+static inline struct logfs_block *logfs_block(struct page *page)
+{
+	return (void *)page->private;
+}
+
+static inline level_t shrink_level(gc_level_t __level)
+{
+	u8 level = (__force u8)__level;
+
+	if (level >= LOGFS_MAX_LEVELS)
+		level -= LOGFS_MAX_LEVELS;
+	return (__force level_t)level;
+}
+
+static inline gc_level_t expand_level(u64 ino, level_t __level)
+{
+	u8 level = (__force u8)__level;
+
+	if (ino == LOGFS_INO_MASTER) {
+		/* ifile has seperate areas */
+		level += LOGFS_MAX_LEVELS;
+	}
+	return (__force gc_level_t)level;
+}
+
+static inline int logfs_block_shift(struct super_block *sb, level_t level)
+{
+	level = shrink_level((__force gc_level_t)level);
+	return (__force int)level * (sb->s_blocksize_bits - 3);
+}
+
+static inline u64 logfs_block_mask(struct super_block *sb, level_t level)
+{
+	return ~0ull << logfs_block_shift(sb, level);
+}
+
+static inline struct logfs_area *get_area(struct super_block *sb,
+		gc_level_t gc_level)
+{
+	return logfs_super(sb)->s_area[(__force u8)gc_level];
+}
+
+#endif
diff --git a/fs/logfs/logfs_abi.h b/fs/logfs/logfs_abi.h
new file mode 100644
index 000000000000..5d3782ddecc8
--- /dev/null
+++ b/fs/logfs/logfs_abi.h
@@ -0,0 +1,627 @@
+/*
+ * fs/logfs/logfs_abi.h
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ *
+ * Public header for logfs.
+ */
+#ifndef FS_LOGFS_LOGFS_ABI_H
+#define FS_LOGFS_LOGFS_ABI_H
+
+/* For out-of-kernel compiles */
+#ifndef BUILD_BUG_ON
+#define BUILD_BUG_ON(condition) /**/
+#endif
+
+#define SIZE_CHECK(type, size)					\
+static inline void check_##type(void)				\
+{								\
+	BUILD_BUG_ON(sizeof(struct type) != (size));		\
+}
+
+/*
+ * Throughout the logfs code, we're constantly dealing with blocks at
+ * various positions or offsets.  To remove confusion, we stricly
+ * distinguish between a "position" - the logical position within a
+ * file and an "offset" - the physical location within the device.
+ *
+ * Any usage of the term offset for a logical location or position for
+ * a physical one is a bug and should get fixed.
+ */
+
+/*
+ * Block are allocated in one of several segments depending on their
+ * level.  The following levels are used:
+ *  0	- regular data block
+ *  1	- i1 indirect blocks
+ *  2	- i2 indirect blocks
+ *  3	- i3 indirect blocks
+ *  4	- i4 indirect blocks
+ *  5	- i5 indirect blocks
+ *  6	- ifile data blocks
+ *  7	- ifile i1 indirect blocks
+ *  8	- ifile i2 indirect blocks
+ *  9	- ifile i3 indirect blocks
+ * 10	- ifile i4 indirect blocks
+ * 11	- ifile i5 indirect blocks
+ * Potential levels to be used in the future:
+ * 12	- gc recycled blocks, long-lived data
+ * 13	- replacement blocks, short-lived data
+ *
+ * Levels 1-11 are necessary for robust gc operations and help seperate
+ * short-lived metadata from longer-lived file data.  In the future,
+ * file data should get seperated into several segments based on simple
+ * heuristics.  Old data recycled during gc operation is expected to be
+ * long-lived.  New data is of uncertain life expectancy.  New data
+ * used to replace older blocks in existing files is expected to be
+ * short-lived.
+ */
+
+
+/* Magic numbers.  64bit for superblock, 32bit for statfs f_type */
+#define LOGFS_MAGIC		0xb21f205ac97e8168ull
+#define LOGFS_MAGIC_U32		0xc97e8168u
+
+/*
+ * Various blocksize related macros.  Blocksize is currently fixed at 4KiB.
+ * Sooner or later that should become configurable and the macros replaced
+ * by something superblock-dependent.  Pointers in indirect blocks are and
+ * will remain 64bit.
+ *
+ * LOGFS_BLOCKSIZE	- self-explaining
+ * LOGFS_BLOCK_FACTOR	- number of pointers per indirect block
+ * LOGFS_BLOCK_BITS	- log2 of LOGFS_BLOCK_FACTOR, used for shifts
+ */
+#define LOGFS_BLOCKSIZE		(4096ull)
+#define LOGFS_BLOCK_FACTOR	(LOGFS_BLOCKSIZE / sizeof(u64))
+#define LOGFS_BLOCK_BITS	(9)
+
+/*
+ * Number of blocks at various levels of indirection.  There are 16 direct
+ * block pointers plus a single indirect pointer.
+ */
+#define I0_BLOCKS		(16)
+#define I1_BLOCKS		LOGFS_BLOCK_FACTOR
+#define I2_BLOCKS		(LOGFS_BLOCK_FACTOR * I1_BLOCKS)
+#define I3_BLOCKS		(LOGFS_BLOCK_FACTOR * I2_BLOCKS)
+#define I4_BLOCKS		(LOGFS_BLOCK_FACTOR * I3_BLOCKS)
+#define I5_BLOCKS		(LOGFS_BLOCK_FACTOR * I4_BLOCKS)
+
+#define INDIRECT_INDEX		I0_BLOCKS
+#define LOGFS_EMBEDDED_FIELDS	(I0_BLOCKS + 1)
+
+/*
+ * Sizes at which files require another level of indirection.  Files smaller
+ * than LOGFS_EMBEDDED_SIZE can be completely stored in the inode itself,
+ * similar like ext2 fast symlinks.
+ *
+ * Data at a position smaller than LOGFS_I0_SIZE is accessed through the
+ * direct pointers, else through the 1x indirect pointer and so forth.
+ */
+#define LOGFS_EMBEDDED_SIZE	(LOGFS_EMBEDDED_FIELDS * sizeof(u64))
+#define LOGFS_I0_SIZE		(I0_BLOCKS * LOGFS_BLOCKSIZE)
+#define LOGFS_I1_SIZE		(I1_BLOCKS * LOGFS_BLOCKSIZE)
+#define LOGFS_I2_SIZE		(I2_BLOCKS * LOGFS_BLOCKSIZE)
+#define LOGFS_I3_SIZE		(I3_BLOCKS * LOGFS_BLOCKSIZE)
+#define LOGFS_I4_SIZE		(I4_BLOCKS * LOGFS_BLOCKSIZE)
+#define LOGFS_I5_SIZE		(I5_BLOCKS * LOGFS_BLOCKSIZE)
+
+/*
+ * Each indirect block pointer must have this flag set, if all block pointers
+ * behind it are set, i.e. there is no hole hidden in the shadow of this
+ * indirect block pointer.
+ */
+#define LOGFS_FULLY_POPULATED (1ULL << 63)
+#define pure_ofs(ofs) (ofs & ~LOGFS_FULLY_POPULATED)
+
+/*
+ * LogFS needs to seperate data into levels.  Each level is defined as the
+ * maximal possible distance from the master inode (inode of the inode file).
+ * Data blocks reside on level 0, 1x indirect block on level 1, etc.
+ * Inodes reside on level 6, indirect blocks for the inode file on levels 7-11.
+ * This effort is necessary to guarantee garbage collection to always make
+ * progress.
+ *
+ * LOGFS_MAX_INDIRECT is the maximal indirection through indirect blocks,
+ * LOGFS_MAX_LEVELS is one more for the actual data level of a file.  It is
+ * the maximal number of levels for one file.
+ * LOGFS_NO_AREAS is twice that, as the inode file and regular files are
+ * effectively stacked on top of each other.
+ */
+#define LOGFS_MAX_INDIRECT	(5)
+#define LOGFS_MAX_LEVELS	(LOGFS_MAX_INDIRECT + 1)
+#define LOGFS_NO_AREAS		(2 * LOGFS_MAX_LEVELS)
+
+/* Maximum size of filenames */
+#define LOGFS_MAX_NAMELEN	(255)
+
+/* Number of segments in the primary journal. */
+#define LOGFS_JOURNAL_SEGS	(16)
+
+/* Maximum number of free/erased/etc. segments in journal entries */
+#define MAX_CACHED_SEGS		(64)
+
+
+/*
+ * LOGFS_OBJECT_HEADERSIZE is the size of a single header in the object store,
+ * LOGFS_MAX_OBJECTSIZE the size of the largest possible object, including
+ * its header,
+ * LOGFS_SEGMENT_RESERVE is the amount of space reserved for each segment for
+ * its segment header and the padded space at the end when no further objects
+ * fit.
+ */
+#define LOGFS_OBJECT_HEADERSIZE	(0x1c)
+#define LOGFS_SEGMENT_HEADERSIZE (0x18)
+#define LOGFS_MAX_OBJECTSIZE	(LOGFS_OBJECT_HEADERSIZE + LOGFS_BLOCKSIZE)
+#define LOGFS_SEGMENT_RESERVE	\
+	(LOGFS_SEGMENT_HEADERSIZE + LOGFS_MAX_OBJECTSIZE - 1)
+
+/*
+ * Segment types:
+ * SEG_SUPER	- Data or indirect block
+ * SEG_JOURNAL	- Inode
+ * SEG_OSTORE	- Dentry
+ */
+enum {
+	SEG_SUPER	= 0x01,
+	SEG_JOURNAL	= 0x02,
+	SEG_OSTORE	= 0x03,
+};
+
+/**
+ * struct logfs_segment_header - per-segment header in the ostore
+ *
+ * @crc:			crc32 of header (there is no data)
+ * @pad:			unused, must be 0
+ * @type:			segment type, see above
+ * @level:			GC level for all objects in this segment
+ * @segno:			segment number
+ * @ec:				erase count for this segment
+ * @gec:			global erase count at time of writing
+ */
+struct logfs_segment_header {
+	__be32	crc;
+	__be16	pad;
+	__u8	type;
+	__u8	level;
+	__be32	segno;
+	__be32	ec;
+	__be64	gec;
+};
+
+SIZE_CHECK(logfs_segment_header, LOGFS_SEGMENT_HEADERSIZE);
+
+/**
+ * struct logfs_disk_super - on-medium superblock
+ *
+ * @ds_magic:			magic number, must equal LOGFS_MAGIC
+ * @ds_crc:			crc32 of structure starting with the next field
+ * @ds_ifile_levels:		maximum number of levels for ifile
+ * @ds_iblock_levels:		maximum number of levels for regular files
+ * @ds_data_levels:		number of seperate levels for data
+ * @pad0:			reserved, must be 0
+ * @ds_feature_incompat:	incompatible filesystem features
+ * @ds_feature_ro_compat:	read-only compatible filesystem features
+ * @ds_feature_compat:		compatible filesystem features
+ * @ds_flags:			flags
+ * @ds_segment_shift:		log2 of segment size
+ * @ds_block_shift:		log2 of block size
+ * @ds_write_shift:		log2 of write size
+ * @pad1:			reserved, must be 0
+ * @ds_journal_seg:		segments used by primary journal
+ * @ds_root_reserve:		bytes reserved for the superuser
+ * @ds_speed_reserve:		bytes reserved to speed up GC
+ * @ds_bad_seg_reserve:		number of segments reserved to handle bad blocks
+ * @pad2:			reserved, must be 0
+ * @pad3:			reserved, must be 0
+ *
+ * Contains only read-only fields.  Read-write fields like the amount of used
+ * space is tracked in the dynamic superblock, which is stored in the journal.
+ */
+struct logfs_disk_super {
+	struct logfs_segment_header ds_sh;
+	__be64	ds_magic;
+
+	__be32	ds_crc;
+	__u8	ds_ifile_levels;
+	__u8	ds_iblock_levels;
+	__u8	ds_data_levels;
+	__u8	ds_segment_shift;
+	__u8	ds_block_shift;
+	__u8	ds_write_shift;
+	__u8	pad0[6];
+
+	__be64	ds_filesystem_size;
+	__be32	ds_segment_size;
+	__be32  ds_bad_seg_reserve;
+
+	__be64	ds_feature_incompat;
+	__be64	ds_feature_ro_compat;
+
+	__be64	ds_feature_compat;
+	__be64	ds_feature_flags;
+
+	__be64	ds_root_reserve;
+	__be64  ds_speed_reserve;
+
+	__be32	ds_journal_seg[LOGFS_JOURNAL_SEGS];
+
+	__be64	ds_super_ofs[2];
+	__be64	pad3[8];
+};
+
+SIZE_CHECK(logfs_disk_super, 256);
+
+/*
+ * Object types:
+ * OBJ_BLOCK	- Data or indirect block
+ * OBJ_INODE	- Inode
+ * OBJ_DENTRY	- Dentry
+ */
+enum {
+	OBJ_BLOCK	= 0x04,
+	OBJ_INODE	= 0x05,
+	OBJ_DENTRY	= 0x06,
+};
+
+/**
+ * struct logfs_object_header - per-object header in the ostore
+ *
+ * @crc:			crc32 of header, excluding data_crc
+ * @len:			length of data
+ * @type:			object type, see above
+ * @compr:			compression type
+ * @ino:			inode number
+ * @bix:			block index
+ * @data_crc:			crc32 of payload
+ */
+struct logfs_object_header {
+	__be32	crc;
+	__be16	len;
+	__u8	type;
+	__u8	compr;
+	__be64	ino;
+	__be64	bix;
+	__be32	data_crc;
+} __attribute__((packed));
+
+SIZE_CHECK(logfs_object_header, LOGFS_OBJECT_HEADERSIZE);
+
+/*
+ * Reserved inode numbers:
+ * LOGFS_INO_MASTER	- master inode (for inode file)
+ * LOGFS_INO_ROOT	- root directory
+ * LOGFS_INO_SEGFILE	- per-segment used bytes and erase count
+ */
+enum {
+	LOGFS_INO_MAPPING	= 0x00,
+	LOGFS_INO_MASTER	= 0x01,
+	LOGFS_INO_ROOT		= 0x02,
+	LOGFS_INO_SEGFILE	= 0x03,
+	LOGFS_RESERVED_INOS	= 0x10,
+};
+
+/*
+ * Inode flags.  High bits should never be written to the medium.  They are
+ * reserved for in-memory usage.
+ * Low bits should either remain in sync with the corresponding FS_*_FL or
+ * reuse slots that obviously don't make sense for logfs.
+ *
+ * LOGFS_IF_DIRTY	Inode must be written back
+ * LOGFS_IF_ZOMBIE	Inode has been deleted
+ * LOGFS_IF_STILLBORN	-ENOSPC happened when creating inode
+ */
+#define LOGFS_IF_COMPRESSED	0x00000004 /* == FS_COMPR_FL */
+#define LOGFS_IF_DIRTY		0x20000000
+#define LOGFS_IF_ZOMBIE		0x40000000
+#define LOGFS_IF_STILLBORN	0x80000000
+
+/* Flags available to chattr */
+#define LOGFS_FL_USER_VISIBLE	(LOGFS_IF_COMPRESSED)
+#define LOGFS_FL_USER_MODIFIABLE (LOGFS_IF_COMPRESSED)
+/* Flags inherited from parent directory on file/directory creation */
+#define LOGFS_FL_INHERITED	(LOGFS_IF_COMPRESSED)
+
+/**
+ * struct logfs_disk_inode - on-medium inode
+ *
+ * @di_mode:			file mode
+ * @di_pad:			reserved, must be 0
+ * @di_flags:			inode flags, see above
+ * @di_uid:			user id
+ * @di_gid:			group id
+ * @di_ctime:			change time
+ * @di_mtime:			modify time
+ * @di_refcount:		reference count (aka nlink or link count)
+ * @di_generation:		inode generation, for nfs
+ * @di_used_bytes:		number of bytes used
+ * @di_size:			file size
+ * @di_data:			data pointers
+ */
+struct logfs_disk_inode {
+	__be16	di_mode;
+	__u8	di_height;
+	__u8	di_pad;
+	__be32	di_flags;
+	__be32	di_uid;
+	__be32	di_gid;
+
+	__be64	di_ctime;
+	__be64	di_mtime;
+
+	__be64	di_atime;
+	__be32	di_refcount;
+	__be32	di_generation;
+
+	__be64	di_used_bytes;
+	__be64	di_size;
+
+	__be64	di_data[LOGFS_EMBEDDED_FIELDS];
+};
+
+SIZE_CHECK(logfs_disk_inode, 200);
+
+#define INODE_POINTER_OFS \
+	(offsetof(struct logfs_disk_inode, di_data) / sizeof(__be64))
+#define INODE_USED_OFS \
+	(offsetof(struct logfs_disk_inode, di_used_bytes) / sizeof(__be64))
+#define INODE_SIZE_OFS \
+	(offsetof(struct logfs_disk_inode, di_size) / sizeof(__be64))
+#define INODE_HEIGHT_OFS	(0)
+
+/**
+ * struct logfs_disk_dentry - on-medium dentry structure
+ *
+ * @ino:			inode number
+ * @namelen:			length of file name
+ * @type:			file type, identical to bits 12..15 of mode
+ * @name:			file name
+ */
+/* FIXME: add 6 bytes of padding to remove the __packed */
+struct logfs_disk_dentry {
+	__be64	ino;
+	__be16	namelen;
+	__u8	type;
+	__u8	name[LOGFS_MAX_NAMELEN];
+} __attribute__((packed));
+
+SIZE_CHECK(logfs_disk_dentry, 266);
+
+#define RESERVED		0xffffffff
+#define BADSEG			0xffffffff
+/**
+ * struct logfs_segment_entry - segment file entry
+ *
+ * @ec_level:			erase count and level
+ * @valid:			number of valid bytes
+ *
+ * Segment file contains one entry for every segment.  ec_level contains the
+ * erasecount in the upper 28 bits and the level in the lower 4 bits.  An
+ * ec_level of BADSEG (-1) identifies bad segments.  valid contains the number
+ * of valid bytes or RESERVED (-1 again) if the segment is used for either the
+ * superblock or the journal, or when the segment is bad.
+ */
+struct logfs_segment_entry {
+	__be32	ec_level;
+	__be32	valid;
+};
+
+SIZE_CHECK(logfs_segment_entry, 8);
+
+/**
+ * struct logfs_journal_header - header for journal entries (JEs)
+ *
+ * @h_crc:			crc32 of journal entry
+ * @h_len:			length of compressed journal entry,
+ *				not including header
+ * @h_datalen:			length of uncompressed data
+ * @h_type:			JE type
+ * @h_version:			unnormalized version of journal entry
+ * @h_compr:			compression type
+ * @h_pad:			reserved
+ */
+struct logfs_journal_header {
+	__be32	h_crc;
+	__be16	h_len;
+	__be16	h_datalen;
+	__be16	h_type;
+	__be16	h_version;
+	__u8	h_compr;
+	__u8	h_pad[3];
+};
+
+SIZE_CHECK(logfs_journal_header, 16);
+
+/*
+ * Life expectency of data.
+ * VIM_DEFAULT		- default vim
+ * VIM_SEGFILE		- for segment file only - very short-living
+ * VIM_GC		- GC'd data - likely long-living
+ */
+enum logfs_vim {
+	VIM_DEFAULT	= 0,
+	VIM_SEGFILE	= 1,
+};
+
+/**
+ * struct logfs_je_area - wbuf header
+ *
+ * @segno:			segment number of area
+ * @used_bytes:			number of bytes already used
+ * @gc_level:			GC level
+ * @vim:			life expectancy of data
+ *
+ * "Areas" are segments currently being used for writing.  There is at least
+ * one area per GC level.  Several may be used to seperate long-living from
+ * short-living data.  If an area with unknown vim is encountered, it can
+ * simply be closed.
+ * The write buffer immediately follow this header.
+ */
+struct logfs_je_area {
+	__be32	segno;
+	__be32	used_bytes;
+	__u8	gc_level;
+	__u8	vim;
+} __attribute__((packed));
+
+SIZE_CHECK(logfs_je_area, 10);
+
+#define MAX_JOURNAL_HEADER \
+	(sizeof(struct logfs_journal_header) + sizeof(struct logfs_je_area))
+
+/**
+ * struct logfs_je_dynsb - dynamic superblock
+ *
+ * @ds_gec:			global erase count
+ * @ds_sweeper:			current position of GC "sweeper"
+ * @ds_rename_dir:		source directory ino (see dir.c documentation)
+ * @ds_rename_pos:		position of source dd (see dir.c documentation)
+ * @ds_victim_ino:		victims of incomplete dir operation (see dir.c)
+ * @ds_victim_ino:		parent inode of victim (see dir.c)
+ * @ds_used_bytes:		number of used bytes
+ */
+struct logfs_je_dynsb {
+	__be64	ds_gec;
+	__be64	ds_sweeper;
+
+	__be64	ds_rename_dir;
+	__be64	ds_rename_pos;
+
+	__be64	ds_victim_ino;
+	__be64	ds_victim_parent; /* XXX */
+
+	__be64	ds_used_bytes;
+	__be32	ds_generation;
+	__be32	pad;
+};
+
+SIZE_CHECK(logfs_je_dynsb, 64);
+
+/**
+ * struct logfs_je_anchor - anchor of filesystem tree, aka master inode
+ *
+ * @da_size:			size of inode file
+ * @da_last_ino:		last created inode
+ * @da_used_bytes:		number of bytes used
+ * @da_data:			data pointers
+ */
+struct logfs_je_anchor {
+	__be64	da_size;
+	__be64	da_last_ino;
+
+	__be64	da_used_bytes;
+	u8	da_height;
+	u8	pad[7];
+
+	__be64	da_data[LOGFS_EMBEDDED_FIELDS];
+};
+
+SIZE_CHECK(logfs_je_anchor, 168);
+
+/**
+ * struct logfs_je_spillout - spillout entry (from 1st to 2nd journal)
+ *
+ * @so_segment:			segments used for 2nd journal
+ *
+ * Length of the array is given by h_len field in the header.
+ */
+struct logfs_je_spillout {
+	__be64	so_segment[0];
+};
+
+SIZE_CHECK(logfs_je_spillout, 0);
+
+/**
+ * struct logfs_je_journal_ec - erase counts for all journal segments
+ *
+ * @ec:				erase count
+ *
+ * Length of the array is given by h_len field in the header.
+ */
+struct logfs_je_journal_ec {
+	__be32	ec[0];
+};
+
+SIZE_CHECK(logfs_je_journal_ec, 0);
+
+/**
+ * struct logfs_je_free_segments - list of free segmetns with erase count
+ */
+struct logfs_je_free_segments {
+	__be32	segno;
+	__be32	ec;
+};
+
+SIZE_CHECK(logfs_je_free_segments, 8);
+
+/**
+ * struct logfs_seg_alias - list of segment aliases
+ */
+struct logfs_seg_alias {
+	__be32	old_segno;
+	__be32	new_segno;
+};
+
+SIZE_CHECK(logfs_seg_alias, 8);
+
+/**
+ * struct logfs_obj_alias - list of object aliases
+ */
+struct logfs_obj_alias {
+	__be64	ino;
+	__be64	bix;
+	__be64	val;
+	u8	level;
+	u8	pad[5];
+	__be16	child_no;
+};
+
+SIZE_CHECK(logfs_obj_alias, 32);
+
+/**
+ * Compression types.
+ *
+ * COMPR_NONE	- uncompressed
+ * COMPR_ZLIB	- compressed with zlib
+ */
+enum {
+	COMPR_NONE	= 0,
+	COMPR_ZLIB	= 1,
+};
+
+/*
+ * Journal entries come in groups of 16.  First group contains unique
+ * entries, next groups contain one entry per level
+ *
+ * JE_FIRST	- smallest possible journal entry number
+ *
+ * JEG_BASE	- base group, containing unique entries
+ * JE_COMMIT	- commit entry, validates all previous entries
+ * JE_DYNSB	- dynamic superblock, anything that ought to be in the
+ *		  superblock but cannot because it is read-write data
+ * JE_ANCHOR	- anchor aka master inode aka inode file's inode
+ * JE_ERASECOUNT  erasecounts for all journal segments
+ * JE_SPILLOUT	- unused
+ * JE_SEG_ALIAS	- aliases segments
+ * JE_AREA	- area description
+ *
+ * JE_LAST	- largest possible journal entry number
+ */
+enum {
+	JE_FIRST	= 0x01,
+
+	JEG_BASE	= 0x00,
+	JE_COMMIT	= 0x02,
+	JE_DYNSB	= 0x03,
+	JE_ANCHOR	= 0x04,
+	JE_ERASECOUNT	= 0x05,
+	JE_SPILLOUT	= 0x06,
+	JE_OBJ_ALIAS	= 0x0d,
+	JE_AREA		= 0x0e,
+
+	JE_LAST		= 0x0e,
+};
+
+#endif
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
new file mode 100644
index 000000000000..1dbe6e8cccec
--- /dev/null
+++ b/fs/logfs/readwrite.c
@@ -0,0 +1,2246 @@
+/*
+ * fs/logfs/readwrite.c
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ *
+ *
+ * Actually contains five sets of very similar functions:
+ * read		read blocks from a file
+ * seek_hole	find next hole
+ * seek_data	find next data block
+ * valid	check whether a block still belongs to a file
+ * write	write blocks to a file
+ * delete	delete a block (for directories and ifile)
+ * rewrite	move existing blocks of a file to a new location (gc helper)
+ * truncate	truncate a file
+ */
+#include "logfs.h"
+#include <linux/sched.h>
+
+static u64 adjust_bix(u64 bix, level_t level)
+{
+	switch (level) {
+	case 0:
+		return bix;
+	case LEVEL(1):
+		return max_t(u64, bix, I0_BLOCKS);
+	case LEVEL(2):
+		return max_t(u64, bix, I1_BLOCKS);
+	case LEVEL(3):
+		return max_t(u64, bix, I2_BLOCKS);
+	case LEVEL(4):
+		return max_t(u64, bix, I3_BLOCKS);
+	case LEVEL(5):
+		return max_t(u64, bix, I4_BLOCKS);
+	default:
+		WARN_ON(1);
+		return bix;
+	}
+}
+
+static inline u64 maxbix(u8 height)
+{
+	return 1ULL << (LOGFS_BLOCK_BITS * height);
+}
+
+/**
+ * The inode address space is cut in two halves.  Lower half belongs to data
+ * pages, upper half to indirect blocks.  If the high bit (INDIRECT_BIT) is
+ * set, the actual block index (bix) and level can be derived from the page
+ * index.
+ *
+ * The lowest three bits of the block index are set to 0 after packing and
+ * unpacking.  Since the lowest n bits (9 for 4KiB blocksize) are ignored
+ * anyway this is harmless.
+ */
+#define ARCH_SHIFT	(BITS_PER_LONG - 32)
+#define INDIRECT_BIT	(0x80000000UL << ARCH_SHIFT)
+#define LEVEL_SHIFT	(28 + ARCH_SHIFT)
+static inline pgoff_t first_indirect_block(void)
+{
+	return INDIRECT_BIT | (1ULL << LEVEL_SHIFT);
+}
+
+pgoff_t logfs_pack_index(u64 bix, level_t level)
+{
+	pgoff_t index;
+
+	BUG_ON(bix >= INDIRECT_BIT);
+	if (level == 0)
+		return bix;
+
+	index  = INDIRECT_BIT;
+	index |= (__force long)level << LEVEL_SHIFT;
+	index |= bix >> ((__force u8)level * LOGFS_BLOCK_BITS);
+	return index;
+}
+
+void logfs_unpack_index(pgoff_t index, u64 *bix, level_t *level)
+{
+	u8 __level;
+
+	if (!(index & INDIRECT_BIT)) {
+		*bix = index;
+		*level = 0;
+		return;
+	}
+
+	__level = (index & ~INDIRECT_BIT) >> LEVEL_SHIFT;
+	*level = LEVEL(__level);
+	*bix = (index << (__level * LOGFS_BLOCK_BITS)) & ~INDIRECT_BIT;
+	*bix = adjust_bix(*bix, *level);
+	return;
+}
+#undef ARCH_SHIFT
+#undef INDIRECT_BIT
+#undef LEVEL_SHIFT
+
+/*
+ * Time is stored as nanoseconds since the epoch.
+ */
+static struct timespec be64_to_timespec(__be64 betime)
+{
+	return ns_to_timespec(be64_to_cpu(betime));
+}
+
+static __be64 timespec_to_be64(struct timespec tsp)
+{
+	return cpu_to_be64((u64)tsp.tv_sec * NSEC_PER_SEC + tsp.tv_nsec);
+}
+
+static void logfs_disk_to_inode(struct logfs_disk_inode *di, struct inode*inode)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	int i;
+
+	inode->i_mode	= be16_to_cpu(di->di_mode);
+	li->li_height	= di->di_height;
+	li->li_flags	= be32_to_cpu(di->di_flags);
+	inode->i_uid	= be32_to_cpu(di->di_uid);
+	inode->i_gid	= be32_to_cpu(di->di_gid);
+	inode->i_size	= be64_to_cpu(di->di_size);
+	logfs_set_blocks(inode, be64_to_cpu(di->di_used_bytes));
+	inode->i_atime	= be64_to_timespec(di->di_atime);
+	inode->i_ctime	= be64_to_timespec(di->di_ctime);
+	inode->i_mtime	= be64_to_timespec(di->di_mtime);
+	inode->i_nlink	= be32_to_cpu(di->di_refcount);
+	inode->i_generation = be32_to_cpu(di->di_generation);
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFSOCK:	/* fall through */
+	case S_IFBLK:	/* fall through */
+	case S_IFCHR:	/* fall through */
+	case S_IFIFO:
+		inode->i_rdev = be64_to_cpu(di->di_data[0]);
+		break;
+	case S_IFDIR:	/* fall through */
+	case S_IFREG:	/* fall through */
+	case S_IFLNK:
+		for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
+			li->li_data[i] = be64_to_cpu(di->di_data[i]);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static void logfs_inode_to_disk(struct inode *inode, struct logfs_disk_inode*di)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	int i;
+
+	di->di_mode	= cpu_to_be16(inode->i_mode);
+	di->di_height	= li->li_height;
+	di->di_pad	= 0;
+	di->di_flags	= cpu_to_be32(li->li_flags);
+	di->di_uid	= cpu_to_be32(inode->i_uid);
+	di->di_gid	= cpu_to_be32(inode->i_gid);
+	di->di_size	= cpu_to_be64(i_size_read(inode));
+	di->di_used_bytes = cpu_to_be64(li->li_used_bytes);
+	di->di_atime	= timespec_to_be64(inode->i_atime);
+	di->di_ctime	= timespec_to_be64(inode->i_ctime);
+	di->di_mtime	= timespec_to_be64(inode->i_mtime);
+	di->di_refcount	= cpu_to_be32(inode->i_nlink);
+	di->di_generation = cpu_to_be32(inode->i_generation);
+
+	switch (inode->i_mode & S_IFMT) {
+	case S_IFSOCK:	/* fall through */
+	case S_IFBLK:	/* fall through */
+	case S_IFCHR:	/* fall through */
+	case S_IFIFO:
+		di->di_data[0] = cpu_to_be64(inode->i_rdev);
+		break;
+	case S_IFDIR:	/* fall through */
+	case S_IFREG:	/* fall through */
+	case S_IFLNK:
+		for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
+			di->di_data[i] = cpu_to_be64(li->li_data[i]);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static void __logfs_set_blocks(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct logfs_inode *li = logfs_inode(inode);
+
+	inode->i_blocks = ULONG_MAX;
+	if (li->li_used_bytes >> sb->s_blocksize_bits < ULONG_MAX)
+		inode->i_blocks = ALIGN(li->li_used_bytes, 512) >> 9;
+}
+
+void logfs_set_blocks(struct inode *inode, u64 bytes)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	li->li_used_bytes = bytes;
+	__logfs_set_blocks(inode);
+}
+
+static void prelock_page(struct super_block *sb, struct page *page, int lock)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	BUG_ON(!PageLocked(page));
+	if (lock) {
+		BUG_ON(PagePreLocked(page));
+		SetPagePreLocked(page);
+	} else {
+		/* We are in GC path. */
+		if (PagePreLocked(page))
+			super->s_lock_count++;
+		else
+			SetPagePreLocked(page);
+	}
+}
+
+static void preunlock_page(struct super_block *sb, struct page *page, int lock)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	BUG_ON(!PageLocked(page));
+	if (lock)
+		ClearPagePreLocked(page);
+	else {
+		/* We are in GC path. */
+		BUG_ON(!PagePreLocked(page));
+		if (super->s_lock_count)
+			super->s_lock_count--;
+		else
+			ClearPagePreLocked(page);
+	}
+}
+
+/*
+ * Logfs is prone to an AB-BA deadlock where one task tries to acquire
+ * s_write_mutex with a locked page and GC tries to get that page while holding
+ * s_write_mutex.
+ * To solve this issue logfs will ignore the page lock iff the page in question
+ * is waiting for s_write_mutex.  We annotate this fact by setting PG_pre_locked
+ * in addition to PG_locked.
+ */
+static void logfs_get_wblocks(struct super_block *sb, struct page *page,
+		int lock)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	if (page)
+		prelock_page(sb, page, lock);
+
+	if (lock) {
+		mutex_lock(&super->s_write_mutex);
+		logfs_gc_pass(sb);
+		/* FIXME: We also have to check for shadowed space
+		 * and mempool fill grade */
+	}
+}
+
+static void logfs_put_wblocks(struct super_block *sb, struct page *page,
+		int lock)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	if (page)
+		preunlock_page(sb, page, lock);
+	/* Order matters - we must clear PG_pre_locked before releasing
+	 * s_write_mutex or we could race against another task. */
+	if (lock)
+		mutex_unlock(&super->s_write_mutex);
+}
+
+static struct page *logfs_get_read_page(struct inode *inode, u64 bix,
+		level_t level)
+{
+	return find_or_create_page(inode->i_mapping,
+			logfs_pack_index(bix, level), GFP_NOFS);
+}
+
+static void logfs_put_read_page(struct page *page)
+{
+	unlock_page(page);
+	page_cache_release(page);
+}
+
+static void logfs_lock_write_page(struct page *page)
+{
+	int loop = 0;
+
+	while (unlikely(!trylock_page(page))) {
+		if (loop++ > 0x1000) {
+			/* Has been observed once so far... */
+			printk(KERN_ERR "stack at %p\n", &loop);
+			BUG();
+		}
+		if (PagePreLocked(page)) {
+			/* Holder of page lock is waiting for us, it
+			 * is safe to use this page. */
+			break;
+		}
+		/* Some other process has this page locked and has
+		 * nothing to do with us.  Wait for it to finish.
+		 */
+		schedule();
+	}
+	BUG_ON(!PageLocked(page));
+}
+
+static struct page *logfs_get_write_page(struct inode *inode, u64 bix,
+		level_t level)
+{
+	struct address_space *mapping = inode->i_mapping;
+	pgoff_t index = logfs_pack_index(bix, level);
+	struct page *page;
+	int err;
+
+repeat:
+	page = find_get_page(mapping, index);
+	if (!page) {
+		page = __page_cache_alloc(GFP_NOFS);
+		if (!page)
+			return NULL;
+		err = add_to_page_cache_lru(page, mapping, index, GFP_NOFS);
+		if (unlikely(err)) {
+			page_cache_release(page);
+			if (err == -EEXIST)
+				goto repeat;
+			return NULL;
+		}
+	} else logfs_lock_write_page(page);
+	BUG_ON(!PageLocked(page));
+	return page;
+}
+
+static void logfs_unlock_write_page(struct page *page)
+{
+	if (!PagePreLocked(page))
+		unlock_page(page);
+}
+
+static void logfs_put_write_page(struct page *page)
+{
+	logfs_unlock_write_page(page);
+	page_cache_release(page);
+}
+
+static struct page *logfs_get_page(struct inode *inode, u64 bix, level_t level,
+		int rw)
+{
+	if (rw == READ)
+		return logfs_get_read_page(inode, bix, level);
+	else
+		return logfs_get_write_page(inode, bix, level);
+}
+
+static void logfs_put_page(struct page *page, int rw)
+{
+	if (rw == READ)
+		logfs_put_read_page(page);
+	else
+		logfs_put_write_page(page);
+}
+
+static unsigned long __get_bits(u64 val, int skip, int no)
+{
+	u64 ret = val;
+
+	ret >>= skip * no;
+	ret <<= 64 - no;
+	ret >>= 64 - no;
+	return ret;
+}
+
+static unsigned long get_bits(u64 val, level_t skip)
+{
+	return __get_bits(val, (__force int)skip, LOGFS_BLOCK_BITS);
+}
+
+static inline void init_shadow_tree(struct super_block *sb,
+		struct shadow_tree *tree)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	btree_init_mempool64(&tree->new, super->s_btree_pool);
+	btree_init_mempool64(&tree->old, super->s_btree_pool);
+}
+
+static void indirect_write_block(struct logfs_block *block)
+{
+	struct page *page;
+	struct inode *inode;
+	int ret;
+
+	page = block->page;
+	inode = page->mapping->host;
+	logfs_lock_write_page(page);
+	ret = logfs_write_buf(inode, page, 0);
+	logfs_unlock_write_page(page);
+	/*
+	 * This needs some rework.  Unless you want your filesystem to run
+	 * completely synchronously (you don't), the filesystem will always
+	 * report writes as 'successful' before the actual work has been
+	 * done.  The actual work gets done here and this is where any errors
+	 * will show up.  And there isn't much we can do about it, really.
+	 *
+	 * Some attempts to fix the errors (move from bad blocks, retry io,...)
+	 * have already been done, so anything left should be either a broken
+	 * device or a bug somewhere in logfs itself.  Being relatively new,
+	 * the odds currently favor a bug, so for now the line below isn't
+	 * entirely tasteles.
+	 */
+	BUG_ON(ret);
+}
+
+static void inode_write_block(struct logfs_block *block)
+{
+	struct inode *inode;
+	int ret;
+
+	inode = block->inode;
+	if (inode->i_ino == LOGFS_INO_MASTER)
+		logfs_write_anchor(inode);
+	else {
+		ret = __logfs_write_inode(inode, 0);
+		/* see indirect_write_block comment */
+		BUG_ON(ret);
+	}
+}
+
+static gc_level_t inode_block_level(struct logfs_block *block)
+{
+	BUG_ON(block->inode->i_ino == LOGFS_INO_MASTER);
+	return GC_LEVEL(LOGFS_MAX_LEVELS);
+}
+
+static gc_level_t indirect_block_level(struct logfs_block *block)
+{
+	struct page *page;
+	struct inode *inode;
+	u64 bix;
+	level_t level;
+
+	page = block->page;
+	inode = page->mapping->host;
+	logfs_unpack_index(page->index, &bix, &level);
+	return expand_level(inode->i_ino, level);
+}
+
+/*
+ * This silences a false, yet annoying gcc warning.  I hate it when my editor
+ * jumps into bitops.h each time I recompile this file.
+ * TODO: Complain to gcc folks about this and upgrade compiler.
+ */
+static unsigned long fnb(const unsigned long *addr,
+		unsigned long size, unsigned long offset)
+{
+	return find_next_bit(addr, size, offset);
+}
+
+static __be64 inode_val0(struct inode *inode)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	u64 val;
+
+	/*
+	 * Explicit shifting generates good code, but must match the format
+	 * of the structure.  Add some paranoia just in case.
+	 */
+	BUILD_BUG_ON(offsetof(struct logfs_disk_inode, di_mode) != 0);
+	BUILD_BUG_ON(offsetof(struct logfs_disk_inode, di_height) != 2);
+	BUILD_BUG_ON(offsetof(struct logfs_disk_inode, di_flags) != 4);
+
+	val =	(u64)inode->i_mode << 48 |
+		(u64)li->li_height << 40 |
+		(u64)li->li_flags;
+	return cpu_to_be64(val);
+}
+
+static int inode_write_alias(struct super_block *sb,
+		struct logfs_block *block, write_alias_t *write_one_alias)
+{
+	struct inode *inode = block->inode;
+	struct logfs_inode *li = logfs_inode(inode);
+	unsigned long pos;
+	u64 ino , bix;
+	__be64 val;
+	level_t level;
+	int err;
+
+	for (pos = 0; ; pos++) {
+		pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos);
+		if (pos >= LOGFS_EMBEDDED_FIELDS + INODE_POINTER_OFS)
+			return 0;
+
+		switch (pos) {
+		case INODE_HEIGHT_OFS:
+			val = inode_val0(inode);
+			break;
+		case INODE_USED_OFS:
+			val = cpu_to_be64(li->li_used_bytes);;
+			break;
+		case INODE_SIZE_OFS:
+			val = cpu_to_be64(i_size_read(inode));
+			break;
+		case INODE_POINTER_OFS ... INODE_POINTER_OFS + LOGFS_EMBEDDED_FIELDS - 1:
+			val = cpu_to_be64(li->li_data[pos - INODE_POINTER_OFS]);
+			break;
+		default:
+			BUG();
+		}
+
+		ino = LOGFS_INO_MASTER;
+		bix = inode->i_ino;
+		level = LEVEL(0);
+		err = write_one_alias(sb, ino, bix, level, pos, val);
+		if (err)
+			return err;
+	}
+}
+
+static int indirect_write_alias(struct super_block *sb,
+		struct logfs_block *block, write_alias_t *write_one_alias)
+{
+	unsigned long pos;
+	struct page *page = block->page;
+	u64 ino , bix;
+	__be64 *child, val;
+	level_t level;
+	int err;
+
+	for (pos = 0; ; pos++) {
+		pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos);
+		if (pos >= LOGFS_BLOCK_FACTOR)
+			return 0;
+
+		ino = page->mapping->host->i_ino;
+		logfs_unpack_index(page->index, &bix, &level);
+		child = kmap_atomic(page, KM_USER0);
+		val = child[pos];
+		kunmap_atomic(child, KM_USER0);
+		err = write_one_alias(sb, ino, bix, level, pos, val);
+		if (err)
+			return err;
+	}
+}
+
+int logfs_write_obj_aliases_pagecache(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_block *block;
+	int err;
+
+	list_for_each_entry(block, &super->s_object_alias, alias_list) {
+		err = block->ops->write_alias(sb, block, write_alias_journal);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+void __free_block(struct super_block *sb, struct logfs_block *block)
+{
+	BUG_ON(!list_empty(&block->item_list));
+	list_del(&block->alias_list);
+	mempool_free(block, logfs_super(sb)->s_block_pool);
+}
+
+static void inode_free_block(struct super_block *sb, struct logfs_block *block)
+{
+	struct inode *inode = block->inode;
+
+	logfs_inode(inode)->li_block = NULL;
+	__free_block(sb, block);
+}
+
+static void indirect_free_block(struct super_block *sb,
+		struct logfs_block *block)
+{
+	ClearPagePrivate(block->page);
+	block->page->private = 0;
+	__free_block(sb, block);
+}
+
+
+static struct logfs_block_ops inode_block_ops = {
+	.write_block = inode_write_block,
+	.block_level = inode_block_level,
+	.free_block = inode_free_block,
+	.write_alias = inode_write_alias,
+};
+
+struct logfs_block_ops indirect_block_ops = {
+	.write_block = indirect_write_block,
+	.block_level = indirect_block_level,
+	.free_block = indirect_free_block,
+	.write_alias = indirect_write_alias,
+};
+
+struct logfs_block *__alloc_block(struct super_block *sb,
+		u64 ino, u64 bix, level_t level)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_block *block;
+
+	block = mempool_alloc(super->s_block_pool, GFP_NOFS);
+	memset(block, 0, sizeof(*block));
+	INIT_LIST_HEAD(&block->alias_list);
+	INIT_LIST_HEAD(&block->item_list);
+	block->sb = sb;
+	block->ino = ino;
+	block->bix = bix;
+	block->level = level;
+	return block;
+}
+
+static void alloc_inode_block(struct inode *inode)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	struct logfs_block *block;
+
+	if (li->li_block)
+		return;
+
+	block = __alloc_block(inode->i_sb, LOGFS_INO_MASTER, inode->i_ino, 0);
+	block->inode = inode;
+	li->li_block = block;
+	block->ops = &inode_block_ops;
+}
+
+void initialize_block_counters(struct page *page, struct logfs_block *block,
+		__be64 *array, int page_is_empty)
+{
+	u64 ptr;
+	int i, start;
+
+	block->partial = 0;
+	block->full = 0;
+	start = 0;
+	if (page->index < first_indirect_block()) {
+		/* Counters are pointless on level 0 */
+		return;
+	}
+	if (page->index == first_indirect_block()) {
+		/* Skip unused pointers */
+		start = I0_BLOCKS;
+		block->full = I0_BLOCKS;
+	}
+	if (!page_is_empty) {
+		for (i = start; i < LOGFS_BLOCK_FACTOR; i++) {
+			ptr = be64_to_cpu(array[i]);
+			if (ptr)
+				block->partial++;
+			if (ptr & LOGFS_FULLY_POPULATED)
+				block->full++;
+		}
+	}
+}
+
+static void alloc_data_block(struct inode *inode, struct page *page)
+{
+	struct logfs_block *block;
+	u64 bix;
+	level_t level;
+
+	if (PagePrivate(page))
+		return;
+
+	logfs_unpack_index(page->index, &bix, &level);
+	block = __alloc_block(inode->i_sb, inode->i_ino, bix, level);
+	block->page = page;
+	SetPagePrivate(page);
+	page->private = (unsigned long)block;
+	block->ops = &indirect_block_ops;
+}
+
+static void alloc_indirect_block(struct inode *inode, struct page *page,
+		int page_is_empty)
+{
+	struct logfs_block *block;
+	__be64 *array;
+
+	if (PagePrivate(page))
+		return;
+
+	alloc_data_block(inode, page);
+
+	block = logfs_block(page);
+	array = kmap_atomic(page, KM_USER0);
+	initialize_block_counters(page, block, array, page_is_empty);
+	kunmap_atomic(array, KM_USER0);
+}
+
+static void block_set_pointer(struct page *page, int index, u64 ptr)
+{
+	struct logfs_block *block = logfs_block(page);
+	__be64 *array;
+	u64 oldptr;
+
+	BUG_ON(!block);
+	array = kmap_atomic(page, KM_USER0);
+	oldptr = be64_to_cpu(array[index]);
+	array[index] = cpu_to_be64(ptr);
+	kunmap_atomic(array, KM_USER0);
+	SetPageUptodate(page);
+
+	block->full += !!(ptr & LOGFS_FULLY_POPULATED)
+		- !!(oldptr & LOGFS_FULLY_POPULATED);
+	block->partial += !!ptr - !!oldptr;
+}
+
+static u64 block_get_pointer(struct page *page, int index)
+{
+	__be64 *block;
+	u64 ptr;
+
+	block = kmap_atomic(page, KM_USER0);
+	ptr = be64_to_cpu(block[index]);
+	kunmap_atomic(block, KM_USER0);
+	return ptr;
+}
+
+static int logfs_read_empty(struct page *page)
+{
+	zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+	return 0;
+}
+
+static int logfs_read_direct(struct inode *inode, struct page *page)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	pgoff_t index = page->index;
+	u64 block;
+
+	block = li->li_data[index];
+	if (!block)
+		return logfs_read_empty(page);
+
+	return logfs_segment_read(inode, page, block, index, 0);
+}
+
+static int logfs_read_loop(struct inode *inode, struct page *page,
+		int rw_context)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	u64 bix, bofs = li->li_data[INDIRECT_INDEX];
+	level_t level, target_level;
+	int ret;
+	struct page *ipage;
+
+	logfs_unpack_index(page->index, &bix, &target_level);
+	if (!bofs)
+		return logfs_read_empty(page);
+
+	if (bix >= maxbix(li->li_height))
+		return logfs_read_empty(page);
+
+	for (level = LEVEL(li->li_height);
+			(__force u8)level > (__force u8)target_level;
+			level = SUBLEVEL(level)){
+		ipage = logfs_get_page(inode, bix, level, rw_context);
+		if (!ipage)
+			return -ENOMEM;
+
+		ret = logfs_segment_read(inode, ipage, bofs, bix, level);
+		if (ret) {
+			logfs_put_read_page(ipage);
+			return ret;
+		}
+
+		bofs = block_get_pointer(ipage, get_bits(bix, SUBLEVEL(level)));
+		logfs_put_page(ipage, rw_context);
+		if (!bofs)
+			return logfs_read_empty(page);
+	}
+
+	return logfs_segment_read(inode, page, bofs, bix, 0);
+}
+
+static int logfs_read_block(struct inode *inode, struct page *page,
+		int rw_context)
+{
+	pgoff_t index = page->index;
+
+	if (index < I0_BLOCKS)
+		return logfs_read_direct(inode, page);
+	return logfs_read_loop(inode, page, rw_context);
+}
+
+static int logfs_exist_loop(struct inode *inode, u64 bix)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	u64 bofs = li->li_data[INDIRECT_INDEX];
+	level_t level;
+	int ret;
+	struct page *ipage;
+
+	if (!bofs)
+		return 0;
+	if (bix >= maxbix(li->li_height))
+		return 0;
+
+	for (level = LEVEL(li->li_height); level != 0; level = SUBLEVEL(level)) {
+		ipage = logfs_get_read_page(inode, bix, level);
+		if (!ipage)
+			return -ENOMEM;
+
+		ret = logfs_segment_read(inode, ipage, bofs, bix, level);
+		if (ret) {
+			logfs_put_read_page(ipage);
+			return ret;
+		}
+
+		bofs = block_get_pointer(ipage, get_bits(bix, SUBLEVEL(level)));
+		logfs_put_read_page(ipage);
+		if (!bofs)
+			return 0;
+	}
+
+	return 1;
+}
+
+int logfs_exist_block(struct inode *inode, u64 bix)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	if (bix < I0_BLOCKS)
+		return !!li->li_data[bix];
+	return logfs_exist_loop(inode, bix);
+}
+
+static u64 seek_holedata_direct(struct inode *inode, u64 bix, int data)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	for (; bix < I0_BLOCKS; bix++)
+		if (data ^ (li->li_data[bix] == 0))
+			return bix;
+	return I0_BLOCKS;
+}
+
+static u64 seek_holedata_loop(struct inode *inode, u64 bix, int data)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	__be64 *rblock;
+	u64 increment, bofs = li->li_data[INDIRECT_INDEX];
+	level_t level;
+	int ret, slot;
+	struct page *page;
+
+	BUG_ON(!bofs);
+
+	for (level = LEVEL(li->li_height); level != 0; level = SUBLEVEL(level)) {
+		increment = 1 << (LOGFS_BLOCK_BITS * ((__force u8)level-1));
+		page = logfs_get_read_page(inode, bix, level);
+		if (!page)
+			return bix;
+
+		ret = logfs_segment_read(inode, page, bofs, bix, level);
+		if (ret) {
+			logfs_put_read_page(page);
+			return bix;
+		}
+
+		slot = get_bits(bix, SUBLEVEL(level));
+		rblock = kmap_atomic(page, KM_USER0);
+		while (slot < LOGFS_BLOCK_FACTOR) {
+			if (data && (rblock[slot] != 0))
+				break;
+			if (!data && !(be64_to_cpu(rblock[slot]) & LOGFS_FULLY_POPULATED))
+				break;
+			slot++;
+			bix += increment;
+			bix &= ~(increment - 1);
+		}
+		if (slot >= LOGFS_BLOCK_FACTOR) {
+			kunmap_atomic(rblock, KM_USER0);
+			logfs_put_read_page(page);
+			return bix;
+		}
+		bofs = be64_to_cpu(rblock[slot]);
+		kunmap_atomic(rblock, KM_USER0);
+		logfs_put_read_page(page);
+		if (!bofs) {
+			BUG_ON(data);
+			return bix;
+		}
+	}
+	return bix;
+}
+
+/**
+ * logfs_seek_hole - find next hole starting at a given block index
+ * @inode:		inode to search in
+ * @bix:		block index to start searching
+ *
+ * Returns next hole.  If the file doesn't contain any further holes, the
+ * block address next to eof is returned instead.
+ */
+u64 logfs_seek_hole(struct inode *inode, u64 bix)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	if (bix < I0_BLOCKS) {
+		bix = seek_holedata_direct(inode, bix, 0);
+		if (bix < I0_BLOCKS)
+			return bix;
+	}
+
+	if (!li->li_data[INDIRECT_INDEX])
+		return bix;
+	else if (li->li_data[INDIRECT_INDEX] & LOGFS_FULLY_POPULATED)
+		bix = maxbix(li->li_height);
+	else {
+		bix = seek_holedata_loop(inode, bix, 0);
+		if (bix < maxbix(li->li_height))
+			return bix;
+		/* Should not happen anymore.  But if some port writes semi-
+		 * corrupt images (as this one used to) we might run into it.
+		 */
+		WARN_ON_ONCE(bix == maxbix(li->li_height));
+	}
+
+	return bix;
+}
+
+static u64 __logfs_seek_data(struct inode *inode, u64 bix)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	if (bix < I0_BLOCKS) {
+		bix = seek_holedata_direct(inode, bix, 1);
+		if (bix < I0_BLOCKS)
+			return bix;
+	}
+
+	if (bix < maxbix(li->li_height)) {
+		if (!li->li_data[INDIRECT_INDEX])
+			bix = maxbix(li->li_height);
+		else
+			return seek_holedata_loop(inode, bix, 1);
+	}
+
+	return bix;
+}
+
+/**
+ * logfs_seek_data - find next data block after a given block index
+ * @inode:		inode to search in
+ * @bix:		block index to start searching
+ *
+ * Returns next data block.  If the file doesn't contain any further data
+ * blocks, the last block in the file is returned instead.
+ */
+u64 logfs_seek_data(struct inode *inode, u64 bix)
+{
+	struct super_block *sb = inode->i_sb;
+	u64 ret, end;
+
+	ret = __logfs_seek_data(inode, bix);
+	end = i_size_read(inode) >> sb->s_blocksize_bits;
+	if (ret >= end)
+		ret = max(bix, end);
+	return ret;
+}
+
+static int logfs_is_valid_direct(struct logfs_inode *li, u64 bix, u64 ofs)
+{
+	return pure_ofs(li->li_data[bix]) == ofs;
+}
+
+static int __logfs_is_valid_loop(struct inode *inode, u64 bix,
+		u64 ofs, u64 bofs)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	level_t level;
+	int ret;
+	struct page *page;
+
+	for (level = LEVEL(li->li_height); level != 0; level = SUBLEVEL(level)){
+		page = logfs_get_write_page(inode, bix, level);
+		BUG_ON(!page);
+
+		ret = logfs_segment_read(inode, page, bofs, bix, level);
+		if (ret) {
+			logfs_put_write_page(page);
+			return 0;
+		}
+
+		bofs = block_get_pointer(page, get_bits(bix, SUBLEVEL(level)));
+		logfs_put_write_page(page);
+		if (!bofs)
+			return 0;
+
+		if (pure_ofs(bofs) == ofs)
+			return 1;
+	}
+	return 0;
+}
+
+static int logfs_is_valid_loop(struct inode *inode, u64 bix, u64 ofs)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	u64 bofs = li->li_data[INDIRECT_INDEX];
+
+	if (!bofs)
+		return 0;
+
+	if (bix >= maxbix(li->li_height))
+		return 0;
+
+	if (pure_ofs(bofs) == ofs)
+		return 1;
+
+	return __logfs_is_valid_loop(inode, bix, ofs, bofs);
+}
+
+static int __logfs_is_valid_block(struct inode *inode, u64 bix, u64 ofs)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	if ((inode->i_nlink == 0) && atomic_read(&inode->i_count) == 1)
+		return 0;
+
+	if (bix < I0_BLOCKS)
+		return logfs_is_valid_direct(li, bix, ofs);
+	return logfs_is_valid_loop(inode, bix, ofs);
+}
+
+/**
+ * logfs_is_valid_block - check whether this block is still valid
+ *
+ * @sb	- superblock
+ * @ofs	- block physical offset
+ * @ino	- block inode number
+ * @bix	- block index
+ * @level - block level
+ *
+ * Returns 0 if the block is invalid, 1 if it is valid and 2 if it will
+ * become invalid once the journal is written.
+ */
+int logfs_is_valid_block(struct super_block *sb, u64 ofs, u64 ino, u64 bix,
+		gc_level_t gc_level)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct inode *inode;
+	int ret, cookie;
+
+	/* Umount closes a segment with free blocks remaining.  Those
+	 * blocks are by definition invalid. */
+	if (ino == -1)
+		return 0;
+
+	LOGFS_BUG_ON((u64)(u_long)ino != ino, sb);
+
+	inode = logfs_safe_iget(sb, ino, &cookie);
+	if (IS_ERR(inode))
+		goto invalid;
+
+	ret = __logfs_is_valid_block(inode, bix, ofs);
+	logfs_safe_iput(inode, cookie);
+	if (ret)
+		return ret;
+
+invalid:
+	/* Block is nominally invalid, but may still sit in the shadow tree,
+	 * waiting for a journal commit.
+	 */
+	if (btree_lookup64(&super->s_shadow_tree.old, ofs))
+		return 2;
+	return 0;
+}
+
+int logfs_readpage_nolock(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	int ret = -EIO;
+
+	ret = logfs_read_block(inode, page, READ);
+
+	if (ret) {
+		ClearPageUptodate(page);
+		SetPageError(page);
+	} else {
+		SetPageUptodate(page);
+		ClearPageError(page);
+	}
+	flush_dcache_page(page);
+
+	return ret;
+}
+
+static int logfs_reserve_bytes(struct inode *inode, int bytes)
+{
+	struct logfs_super *super = logfs_super(inode->i_sb);
+	u64 available = super->s_free_bytes + super->s_dirty_free_bytes
+			- super->s_dirty_used_bytes - super->s_dirty_pages;
+
+	if (!bytes)
+		return 0;
+
+	if (available < bytes)
+		return -ENOSPC;
+
+	if (available < bytes + super->s_root_reserve &&
+			!capable(CAP_SYS_RESOURCE))
+		return -ENOSPC;
+
+	return 0;
+}
+
+int get_page_reserve(struct inode *inode, struct page *page)
+{
+	struct logfs_super *super = logfs_super(inode->i_sb);
+	int ret;
+
+	if (logfs_block(page) && logfs_block(page)->reserved_bytes)
+		return 0;
+
+	logfs_get_wblocks(inode->i_sb, page, WF_LOCK);
+	ret = logfs_reserve_bytes(inode, 6 * LOGFS_MAX_OBJECTSIZE);
+	if (!ret) {
+		alloc_data_block(inode, page);
+		logfs_block(page)->reserved_bytes += 6 * LOGFS_MAX_OBJECTSIZE;
+		super->s_dirty_pages += 6 * LOGFS_MAX_OBJECTSIZE;
+	}
+	logfs_put_wblocks(inode->i_sb, page, WF_LOCK);
+	return ret;
+}
+
+/*
+ * We are protected by write lock.  Push victims up to superblock level
+ * and release transaction when appropriate.
+ */
+/* FIXME: This is currently called from the wrong spots. */
+static void logfs_handle_transaction(struct inode *inode,
+		struct logfs_transaction *ta)
+{
+	struct logfs_super *super = logfs_super(inode->i_sb);
+
+	if (!ta)
+		return;
+	logfs_inode(inode)->li_block->ta = NULL;
+
+	if (inode->i_ino != LOGFS_INO_MASTER) {
+		BUG(); /* FIXME: Yes, this needs more thought */
+		/* just remember the transaction until inode is written */
+		//BUG_ON(logfs_inode(inode)->li_transaction);
+		//logfs_inode(inode)->li_transaction = ta;
+		return;
+	}
+
+	switch (ta->state) {
+	case CREATE_1: /* fall through */
+	case UNLINK_1:
+		BUG_ON(super->s_victim_ino);
+		super->s_victim_ino = ta->ino;
+		break;
+	case CREATE_2: /* fall through */
+	case UNLINK_2:
+		BUG_ON(super->s_victim_ino != ta->ino);
+		super->s_victim_ino = 0;
+		/* transaction ends here - free it */
+		kfree(ta);
+		break;
+	case CROSS_RENAME_1:
+		BUG_ON(super->s_rename_dir);
+		BUG_ON(super->s_rename_pos);
+		super->s_rename_dir = ta->dir;
+		super->s_rename_pos = ta->pos;
+		break;
+	case CROSS_RENAME_2:
+		BUG_ON(super->s_rename_dir != ta->dir);
+		BUG_ON(super->s_rename_pos != ta->pos);
+		super->s_rename_dir = 0;
+		super->s_rename_pos = 0;
+		kfree(ta);
+		break;
+	case TARGET_RENAME_1:
+		BUG_ON(super->s_rename_dir);
+		BUG_ON(super->s_rename_pos);
+		BUG_ON(super->s_victim_ino);
+		super->s_rename_dir = ta->dir;
+		super->s_rename_pos = ta->pos;
+		super->s_victim_ino = ta->ino;
+		break;
+	case TARGET_RENAME_2:
+		BUG_ON(super->s_rename_dir != ta->dir);
+		BUG_ON(super->s_rename_pos != ta->pos);
+		BUG_ON(super->s_victim_ino != ta->ino);
+		super->s_rename_dir = 0;
+		super->s_rename_pos = 0;
+		break;
+	case TARGET_RENAME_3:
+		BUG_ON(super->s_rename_dir);
+		BUG_ON(super->s_rename_pos);
+		BUG_ON(super->s_victim_ino != ta->ino);
+		super->s_victim_ino = 0;
+		kfree(ta);
+		break;
+	default:
+		BUG();
+	}
+}
+
+/*
+ * Not strictly a reservation, but rather a check that we still have enough
+ * space to satisfy the write.
+ */
+static int logfs_reserve_blocks(struct inode *inode, int blocks)
+{
+	return logfs_reserve_bytes(inode, blocks * LOGFS_MAX_OBJECTSIZE);
+}
+
+struct write_control {
+	u64 ofs;
+	long flags;
+};
+
+static struct logfs_shadow *alloc_shadow(struct inode *inode, u64 bix,
+		level_t level, u64 old_ofs)
+{
+	struct logfs_super *super = logfs_super(inode->i_sb);
+	struct logfs_shadow *shadow;
+
+	shadow = mempool_alloc(super->s_shadow_pool, GFP_NOFS);
+	memset(shadow, 0, sizeof(*shadow));
+	shadow->ino = inode->i_ino;
+	shadow->bix = bix;
+	shadow->gc_level = expand_level(inode->i_ino, level);
+	shadow->old_ofs = old_ofs & ~LOGFS_FULLY_POPULATED;
+	return shadow;
+}
+
+static void free_shadow(struct inode *inode, struct logfs_shadow *shadow)
+{
+	struct logfs_super *super = logfs_super(inode->i_sb);
+
+	mempool_free(shadow, super->s_shadow_pool);
+}
+
+/**
+ * fill_shadow_tree - Propagate shadow tree changes due to a write
+ * @inode:	Inode owning the page
+ * @page:	Struct page that was written
+ * @shadow:	Shadow for the current write
+ *
+ * Writes in logfs can result in two semi-valid objects.  The old object
+ * is still valid as long as it can be reached by following pointers on
+ * the medium.  Only when writes propagate all the way up to the journal
+ * has the new object safely replaced the old one.
+ *
+ * To handle this problem, a struct logfs_shadow is used to represent
+ * every single write.  It is attached to the indirect block, which is
+ * marked dirty.  When the indirect block is written, its shadows are
+ * handed up to the next indirect block (or inode).  Untimately they
+ * will reach the master inode and be freed upon journal commit.
+ *
+ * This function handles a single step in the propagation.  It adds the
+ * shadow for the current write to the tree, along with any shadows in
+ * the page's tree, in case it was an indirect block.  If a page is
+ * written, the inode parameter is left NULL, if an inode is written,
+ * the page parameter is left NULL.
+ */
+static void fill_shadow_tree(struct inode *inode, struct page *page,
+		struct logfs_shadow *shadow)
+{
+	struct logfs_super *super = logfs_super(inode->i_sb);
+	struct logfs_block *block = logfs_block(page);
+	struct shadow_tree *tree = &super->s_shadow_tree;
+
+	if (PagePrivate(page)) {
+		if (block->alias_map)
+			super->s_no_object_aliases -= bitmap_weight(
+					block->alias_map, LOGFS_BLOCK_FACTOR);
+		logfs_handle_transaction(inode, block->ta);
+		block->ops->free_block(inode->i_sb, block);
+	}
+	if (shadow) {
+		if (shadow->old_ofs)
+			btree_insert64(&tree->old, shadow->old_ofs, shadow,
+					GFP_NOFS);
+		else
+			btree_insert64(&tree->new, shadow->new_ofs, shadow,
+					GFP_NOFS);
+
+		super->s_dirty_used_bytes += shadow->new_len;
+		super->s_dirty_free_bytes += shadow->old_len;
+	}
+}
+
+static void logfs_set_alias(struct super_block *sb, struct logfs_block *block,
+		long child_no)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	if (block->inode && block->inode->i_ino == LOGFS_INO_MASTER) {
+		/* Aliases in the master inode are pointless. */
+		return;
+	}
+
+	if (!test_bit(child_no, block->alias_map)) {
+		set_bit(child_no, block->alias_map);
+		super->s_no_object_aliases++;
+	}
+	list_move_tail(&block->alias_list, &super->s_object_alias);
+}
+
+/*
+ * Object aliases can and often do change the size and occupied space of a
+ * file.  So not only do we have to change the pointers, we also have to
+ * change inode->i_size and li->li_used_bytes.  Which is done by setting
+ * another two object aliases for the inode itself.
+ */
+static void set_iused(struct inode *inode, struct logfs_shadow *shadow)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	if (shadow->new_len == shadow->old_len)
+		return;
+
+	alloc_inode_block(inode);
+	li->li_used_bytes += shadow->new_len - shadow->old_len;
+	__logfs_set_blocks(inode);
+	logfs_set_alias(inode->i_sb, li->li_block, INODE_USED_OFS);
+	logfs_set_alias(inode->i_sb, li->li_block, INODE_SIZE_OFS);
+}
+
+static int logfs_write_i0(struct inode *inode, struct page *page,
+		struct write_control *wc)
+{
+	struct logfs_shadow *shadow;
+	u64 bix;
+	level_t level;
+	int full, err = 0;
+
+	logfs_unpack_index(page->index, &bix, &level);
+	if (wc->ofs == 0)
+		if (logfs_reserve_blocks(inode, 1))
+			return -ENOSPC;
+
+	shadow = alloc_shadow(inode, bix, level, wc->ofs);
+	if (wc->flags & WF_WRITE)
+		err = logfs_segment_write(inode, page, shadow);
+	if (wc->flags & WF_DELETE)
+		logfs_segment_delete(inode, shadow);
+	if (err) {
+		free_shadow(inode, shadow);
+		return err;
+	}
+
+	set_iused(inode, shadow);
+	full = 1;
+	if (level != 0) {
+		alloc_indirect_block(inode, page, 0);
+		full = logfs_block(page)->full == LOGFS_BLOCK_FACTOR;
+	}
+	fill_shadow_tree(inode, page, shadow);
+	wc->ofs = shadow->new_ofs;
+	if (wc->ofs && full)
+		wc->ofs |= LOGFS_FULLY_POPULATED;
+	return 0;
+}
+
+static int logfs_write_direct(struct inode *inode, struct page *page,
+		long flags)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	struct write_control wc = {
+		.ofs = li->li_data[page->index],
+		.flags = flags,
+	};
+	int err;
+
+	alloc_inode_block(inode);
+
+	err = logfs_write_i0(inode, page, &wc);
+	if (err)
+		return err;
+
+	li->li_data[page->index] = wc.ofs;
+	logfs_set_alias(inode->i_sb, li->li_block,
+			page->index + INODE_POINTER_OFS);
+	return 0;
+}
+
+static int ptr_change(u64 ofs, struct page *page)
+{
+	struct logfs_block *block = logfs_block(page);
+	int empty0, empty1, full0, full1;
+
+	empty0 = ofs == 0;
+	empty1 = block->partial == 0;
+	if (empty0 != empty1)
+		return 1;
+
+	/* The !! is necessary to shrink result to int */
+	full0 = !!(ofs & LOGFS_FULLY_POPULATED);
+	full1 = block->full == LOGFS_BLOCK_FACTOR;
+	if (full0 != full1)
+		return 1;
+	return 0;
+}
+
+static int __logfs_write_rec(struct inode *inode, struct page *page,
+		struct write_control *this_wc,
+		pgoff_t bix, level_t target_level, level_t level)
+{
+	int ret, page_empty = 0;
+	int child_no = get_bits(bix, SUBLEVEL(level));
+	struct page *ipage;
+	struct write_control child_wc = {
+		.flags = this_wc->flags,
+	};
+
+	ipage = logfs_get_write_page(inode, bix, level);
+	if (!ipage)
+		return -ENOMEM;
+
+	if (this_wc->ofs) {
+		ret = logfs_segment_read(inode, ipage, this_wc->ofs, bix, level);
+		if (ret)
+			goto out;
+	} else if (!PageUptodate(ipage)) {
+		page_empty = 1;
+		logfs_read_empty(ipage);
+	}
+
+	child_wc.ofs = block_get_pointer(ipage, child_no);
+
+	if ((__force u8)level-1 > (__force u8)target_level)
+		ret = __logfs_write_rec(inode, page, &child_wc, bix,
+				target_level, SUBLEVEL(level));
+	else
+		ret = logfs_write_i0(inode, page, &child_wc);
+
+	if (ret)
+		goto out;
+
+	alloc_indirect_block(inode, ipage, page_empty);
+	block_set_pointer(ipage, child_no, child_wc.ofs);
+	/* FIXME: first condition seems superfluous */
+	if (child_wc.ofs || logfs_block(ipage)->partial)
+		this_wc->flags |= WF_WRITE;
+	/* the condition on this_wc->ofs ensures that we won't consume extra
+	 * space for indirect blocks in the future, which we cannot reserve */
+	if (!this_wc->ofs || ptr_change(this_wc->ofs, ipage))
+		ret = logfs_write_i0(inode, ipage, this_wc);
+	else
+		logfs_set_alias(inode->i_sb, logfs_block(ipage), child_no);
+out:
+	logfs_put_write_page(ipage);
+	return ret;
+}
+
+static int logfs_write_rec(struct inode *inode, struct page *page,
+		pgoff_t bix, level_t target_level, long flags)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	struct write_control wc = {
+		.ofs = li->li_data[INDIRECT_INDEX],
+		.flags = flags,
+	};
+	int ret;
+
+	alloc_inode_block(inode);
+
+	if (li->li_height > (__force u8)target_level)
+		ret = __logfs_write_rec(inode, page, &wc, bix, target_level,
+				LEVEL(li->li_height));
+	else
+		ret = logfs_write_i0(inode, page, &wc);
+	if (ret)
+		return ret;
+
+	if (li->li_data[INDIRECT_INDEX] != wc.ofs) {
+		li->li_data[INDIRECT_INDEX] = wc.ofs;
+		logfs_set_alias(inode->i_sb, li->li_block,
+				INDIRECT_INDEX + INODE_POINTER_OFS);
+	}
+	return ret;
+}
+
+void logfs_add_transaction(struct inode *inode, struct logfs_transaction *ta)
+{
+	alloc_inode_block(inode);
+	logfs_inode(inode)->li_block->ta = ta;
+}
+
+void logfs_del_transaction(struct inode *inode, struct logfs_transaction *ta)
+{
+	struct logfs_block *block = logfs_inode(inode)->li_block;
+
+	if (block && block->ta)
+		block->ta = NULL;
+}
+
+static int grow_inode(struct inode *inode, u64 bix, level_t level)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	u8 height = (__force u8)level;
+	struct page *page;
+	struct write_control wc = {
+		.flags = WF_WRITE,
+	};
+	int err;
+
+	BUG_ON(height > 5 || li->li_height > 5);
+	while (height > li->li_height || bix >= maxbix(li->li_height)) {
+		page = logfs_get_write_page(inode, I0_BLOCKS + 1,
+				LEVEL(li->li_height + 1));
+		if (!page)
+			return -ENOMEM;
+		logfs_read_empty(page);
+		alloc_indirect_block(inode, page, 1);
+		block_set_pointer(page, 0, li->li_data[INDIRECT_INDEX]);
+		err = logfs_write_i0(inode, page, &wc);
+		logfs_put_write_page(page);
+		if (err)
+			return err;
+		li->li_data[INDIRECT_INDEX] = wc.ofs;
+		wc.ofs = 0;
+		li->li_height++;
+		logfs_set_alias(inode->i_sb, li->li_block, INODE_HEIGHT_OFS);
+	}
+	return 0;
+}
+
+static int __logfs_write_buf(struct inode *inode, struct page *page, long flags)
+{
+	struct logfs_super *super = logfs_super(inode->i_sb);
+	pgoff_t index = page->index;
+	u64 bix;
+	level_t level;
+	int err;
+
+	flags |= WF_WRITE | WF_DELETE;
+	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+
+	logfs_unpack_index(index, &bix, &level);
+	if (logfs_block(page) && logfs_block(page)->reserved_bytes)
+		super->s_dirty_pages -= logfs_block(page)->reserved_bytes;
+
+	if (index < I0_BLOCKS)
+		return logfs_write_direct(inode, page, flags);
+
+	bix = adjust_bix(bix, level);
+	err = grow_inode(inode, bix, level);
+	if (err)
+		return err;
+	return logfs_write_rec(inode, page, bix, level, flags);
+}
+
+int logfs_write_buf(struct inode *inode, struct page *page, long flags)
+{
+	struct super_block *sb = inode->i_sb;
+	int ret;
+
+	logfs_get_wblocks(sb, page, flags & WF_LOCK);
+	ret = __logfs_write_buf(inode, page, flags);
+	logfs_put_wblocks(sb, page, flags & WF_LOCK);
+	return ret;
+}
+
+static int __logfs_delete(struct inode *inode, struct page *page)
+{
+	long flags = WF_DELETE;
+
+	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+
+	if (page->index < I0_BLOCKS)
+		return logfs_write_direct(inode, page, flags);
+	return logfs_write_rec(inode, page, page->index, 0, flags);
+}
+
+int logfs_delete(struct inode *inode, pgoff_t index,
+		struct shadow_tree *shadow_tree)
+{
+	struct super_block *sb = inode->i_sb;
+	struct page *page;
+	int ret;
+
+	page = logfs_get_read_page(inode, index, 0);
+	if (!page)
+		return -ENOMEM;
+
+	logfs_get_wblocks(sb, page, 1);
+	ret = __logfs_delete(inode, page);
+	logfs_put_wblocks(sb, page, 1);
+
+	logfs_put_read_page(page);
+
+	return ret;
+}
+
+/* Rewrite cannot mark the inode dirty but has to write it immediatly. */
+int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
+		gc_level_t gc_level, long flags)
+{
+	level_t level = shrink_level(gc_level);
+	struct page *page;
+	int err;
+
+	page = logfs_get_write_page(inode, bix, level);
+	if (!page)
+		return -ENOMEM;
+
+	err = logfs_segment_read(inode, page, ofs, bix, level);
+	if (!err) {
+		if (level != 0)
+			alloc_indirect_block(inode, page, 0);
+		err = logfs_write_buf(inode, page, flags);
+	}
+	logfs_put_write_page(page);
+	return err;
+}
+
+static int truncate_data_block(struct inode *inode, struct page *page,
+		u64 ofs, struct logfs_shadow *shadow, u64 size)
+{
+	loff_t pageofs = page->index << inode->i_sb->s_blocksize_bits;
+	u64 bix;
+	level_t level;
+	int err;
+
+	/* Does truncation happen within this page? */
+	if (size <= pageofs || size - pageofs >= PAGE_SIZE)
+		return 0;
+
+	logfs_unpack_index(page->index, &bix, &level);
+	BUG_ON(level != 0);
+
+	err = logfs_segment_read(inode, page, ofs, bix, level);
+	if (err)
+		return err;
+
+	zero_user_segment(page, size - pageofs, PAGE_CACHE_SIZE);
+	return logfs_segment_write(inode, page, shadow);
+}
+
+static int logfs_truncate_i0(struct inode *inode, struct page *page,
+		struct write_control *wc, u64 size)
+{
+	struct logfs_shadow *shadow;
+	u64 bix;
+	level_t level;
+	int err = 0;
+
+	logfs_unpack_index(page->index, &bix, &level);
+	BUG_ON(level != 0);
+	shadow = alloc_shadow(inode, bix, level, wc->ofs);
+
+	err = truncate_data_block(inode, page, wc->ofs, shadow, size);
+	if (err) {
+		free_shadow(inode, shadow);
+		return err;
+	}
+
+	logfs_segment_delete(inode, shadow);
+	set_iused(inode, shadow);
+	fill_shadow_tree(inode, page, shadow);
+	wc->ofs = shadow->new_ofs;
+	return 0;
+}
+
+static int logfs_truncate_direct(struct inode *inode, u64 size)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	struct write_control wc;
+	struct page *page;
+	int e;
+	int err;
+
+	alloc_inode_block(inode);
+
+	for (e = I0_BLOCKS - 1; e >= 0; e--) {
+		if (size > (e+1) * LOGFS_BLOCKSIZE)
+			break;
+
+		wc.ofs = li->li_data[e];
+		if (!wc.ofs)
+			continue;
+
+		page = logfs_get_write_page(inode, e, 0);
+		if (!page)
+			return -ENOMEM;
+		err = logfs_segment_read(inode, page, wc.ofs, e, 0);
+		if (err) {
+			logfs_put_write_page(page);
+			return err;
+		}
+		err = logfs_truncate_i0(inode, page, &wc, size);
+		logfs_put_write_page(page);
+		if (err)
+			return err;
+
+		li->li_data[e] = wc.ofs;
+	}
+	return 0;
+}
+
+/* FIXME: these need to become per-sb once we support different blocksizes */
+static u64 __logfs_step[] = {
+	1,
+	I1_BLOCKS,
+	I2_BLOCKS,
+	I3_BLOCKS,
+};
+
+static u64 __logfs_start_index[] = {
+	I0_BLOCKS,
+	I1_BLOCKS,
+	I2_BLOCKS,
+	I3_BLOCKS
+};
+
+static inline u64 logfs_step(level_t level)
+{
+	return __logfs_step[(__force u8)level];
+}
+
+static inline u64 logfs_factor(u8 level)
+{
+	return __logfs_step[level] * LOGFS_BLOCKSIZE;
+}
+
+static inline u64 logfs_start_index(level_t level)
+{
+	return __logfs_start_index[(__force u8)level];
+}
+
+static void logfs_unpack_raw_index(pgoff_t index, u64 *bix, level_t *level)
+{
+	logfs_unpack_index(index, bix, level);
+	if (*bix <= logfs_start_index(SUBLEVEL(*level)))
+		*bix = 0;
+}
+
+static int __logfs_truncate_rec(struct inode *inode, struct page *ipage,
+		struct write_control *this_wc, u64 size)
+{
+	int truncate_happened = 0;
+	int e, err = 0;
+	u64 bix, child_bix, next_bix;
+	level_t level;
+	struct page *page;
+	struct write_control child_wc = { /* FIXME: flags */ };
+
+	logfs_unpack_raw_index(ipage->index, &bix, &level);
+	err = logfs_segment_read(inode, ipage, this_wc->ofs, bix, level);
+	if (err)
+		return err;
+
+	for (e = LOGFS_BLOCK_FACTOR - 1; e >= 0; e--) {
+		child_bix = bix + e * logfs_step(SUBLEVEL(level));
+		next_bix = child_bix + logfs_step(SUBLEVEL(level));
+		if (size > next_bix * LOGFS_BLOCKSIZE)
+			break;
+
+		child_wc.ofs = pure_ofs(block_get_pointer(ipage, e));
+		if (!child_wc.ofs)
+			continue;
+
+		page = logfs_get_write_page(inode, child_bix, SUBLEVEL(level));
+		if (!page)
+			return -ENOMEM;
+
+		if ((__force u8)level > 1)
+			err = __logfs_truncate_rec(inode, page, &child_wc, size);
+		else
+			err = logfs_truncate_i0(inode, page, &child_wc, size);
+		logfs_put_write_page(page);
+		if (err)
+			return err;
+
+		truncate_happened = 1;
+		alloc_indirect_block(inode, ipage, 0);
+		block_set_pointer(ipage, e, child_wc.ofs);
+	}
+
+	if (!truncate_happened) {
+		printk("ineffectual truncate (%lx, %lx, %llx)\n", inode->i_ino, ipage->index, size);
+		return 0;
+	}
+
+	this_wc->flags = WF_DELETE;
+	if (logfs_block(ipage)->partial)
+		this_wc->flags |= WF_WRITE;
+
+	return logfs_write_i0(inode, ipage, this_wc);
+}
+
+static int logfs_truncate_rec(struct inode *inode, u64 size)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	struct write_control wc = {
+		.ofs = li->li_data[INDIRECT_INDEX],
+	};
+	struct page *page;
+	int err;
+
+	alloc_inode_block(inode);
+
+	if (!wc.ofs)
+		return 0;
+
+	page = logfs_get_write_page(inode, 0, LEVEL(li->li_height));
+	if (!page)
+		return -ENOMEM;
+
+	err = __logfs_truncate_rec(inode, page, &wc, size);
+	logfs_put_write_page(page);
+	if (err)
+		return err;
+
+	if (li->li_data[INDIRECT_INDEX] != wc.ofs)
+		li->li_data[INDIRECT_INDEX] = wc.ofs;
+	return 0;
+}
+
+static int __logfs_truncate(struct inode *inode, u64 size)
+{
+	int ret;
+
+	if (size >= logfs_factor(logfs_inode(inode)->li_height))
+		return 0;
+
+	ret = logfs_truncate_rec(inode, size);
+	if (ret)
+		return ret;
+
+	return logfs_truncate_direct(inode, size);
+}
+
+int logfs_truncate(struct inode *inode, u64 size)
+{
+	struct super_block *sb = inode->i_sb;
+	int err;
+
+	logfs_get_wblocks(sb, NULL, 1);
+	err = __logfs_truncate(inode, size);
+	if (!err)
+		err = __logfs_write_inode(inode, 0);
+	logfs_put_wblocks(sb, NULL, 1);
+
+	if (!err)
+		err = vmtruncate(inode, size);
+
+	/* I don't trust error recovery yet. */
+	WARN_ON(err);
+	return err;
+}
+
+static void move_page_to_inode(struct inode *inode, struct page *page)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	struct logfs_block *block = logfs_block(page);
+
+	if (!block)
+		return;
+
+	log_blockmove("move_page_to_inode(%llx, %llx, %x)\n",
+			block->ino, block->bix, block->level);
+	BUG_ON(li->li_block);
+	block->ops = &inode_block_ops;
+	block->inode = inode;
+	li->li_block = block;
+
+	block->page = NULL;
+	page->private = 0;
+	ClearPagePrivate(page);
+}
+
+static void move_inode_to_page(struct page *page, struct inode *inode)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+	struct logfs_block *block = li->li_block;
+
+	if (!block)
+		return;
+
+	log_blockmove("move_inode_to_page(%llx, %llx, %x)\n",
+			block->ino, block->bix, block->level);
+	BUG_ON(PagePrivate(page));
+	block->ops = &indirect_block_ops;
+	block->page = page;
+	page->private = (unsigned long)block;
+	SetPagePrivate(page);
+
+	block->inode = NULL;
+	li->li_block = NULL;
+}
+
+int logfs_read_inode(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct logfs_super *super = logfs_super(sb);
+	struct inode *master_inode = super->s_master_inode;
+	struct page *page;
+	struct logfs_disk_inode *di;
+	u64 ino = inode->i_ino;
+
+	if (ino << sb->s_blocksize_bits > i_size_read(master_inode))
+		return -ENODATA;
+	if (!logfs_exist_block(master_inode, ino))
+		return -ENODATA;
+
+	page = read_cache_page(master_inode->i_mapping, ino,
+			(filler_t *)logfs_readpage, NULL);
+	if (IS_ERR(page))
+		return PTR_ERR(page);
+
+	di = kmap_atomic(page, KM_USER0);
+	logfs_disk_to_inode(di, inode);
+	kunmap_atomic(di, KM_USER0);
+	move_page_to_inode(inode, page);
+	page_cache_release(page);
+	return 0;
+}
+
+/* Caller must logfs_put_write_page(page); */
+static struct page *inode_to_page(struct inode *inode)
+{
+	struct inode *master_inode = logfs_super(inode->i_sb)->s_master_inode;
+	struct logfs_disk_inode *di;
+	struct page *page;
+
+	BUG_ON(inode->i_ino == LOGFS_INO_MASTER);
+
+	page = logfs_get_write_page(master_inode, inode->i_ino, 0);
+	if (!page)
+		return NULL;
+
+	di = kmap_atomic(page, KM_USER0);
+	logfs_inode_to_disk(inode, di);
+	kunmap_atomic(di, KM_USER0);
+	move_inode_to_page(page, inode);
+	return page;
+}
+
+/* Cheaper version of write_inode.  All changes are concealed in
+ * aliases, which are moved back.  No write to the medium happens.
+ */
+void logfs_clear_inode(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct logfs_inode *li = logfs_inode(inode);
+	struct logfs_block *block = li->li_block;
+	struct page *page;
+
+	/* Only deleted files may be dirty at this point */
+	BUG_ON(inode->i_state & I_DIRTY && inode->i_nlink);
+	if (!block)
+		return;
+	if ((logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN)) {
+		block->ops->free_block(inode->i_sb, block);
+		return;
+	}
+
+	BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS);
+	page = inode_to_page(inode);
+	BUG_ON(!page); /* FIXME: Use emergency page */
+	logfs_put_write_page(page);
+}
+
+static int do_write_inode(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct inode *master_inode = logfs_super(sb)->s_master_inode;
+	loff_t size = (inode->i_ino + 1) << inode->i_sb->s_blocksize_bits;
+	struct page *page;
+	int err;
+
+	BUG_ON(inode->i_ino == LOGFS_INO_MASTER);
+	/* FIXME: lock inode */
+
+	if (i_size_read(master_inode) < size)
+		i_size_write(master_inode, size);
+
+	/* TODO: Tell vfs this inode is clean now */
+
+	page = inode_to_page(inode);
+	if (!page)
+		return -ENOMEM;
+
+	/* FIXME: transaction is part of logfs_block now.  Is that enough? */
+	err = logfs_write_buf(master_inode, page, 0);
+	logfs_put_write_page(page);
+	return err;
+}
+
+static void logfs_mod_segment_entry(struct super_block *sb, u32 segno,
+		int write,
+		void (*change_se)(struct logfs_segment_entry *, long),
+		long arg)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct inode *inode;
+	struct page *page;
+	struct logfs_segment_entry *se;
+	pgoff_t page_no;
+	int child_no;
+
+	page_no = segno >> (sb->s_blocksize_bits - 3);
+	child_no = segno & ((sb->s_blocksize >> 3) - 1);
+
+	inode = super->s_segfile_inode;
+	page = logfs_get_write_page(inode, page_no, 0);
+	BUG_ON(!page); /* FIXME: We need some reserve page for this case */
+	if (!PageUptodate(page))
+		logfs_read_block(inode, page, WRITE);
+
+	if (write)
+		alloc_indirect_block(inode, page, 0);
+	se = kmap_atomic(page, KM_USER0);
+	change_se(se + child_no, arg);
+	if (write) {
+		logfs_set_alias(sb, logfs_block(page), child_no);
+		BUG_ON((int)be32_to_cpu(se[child_no].valid) > super->s_segsize);
+	}
+	kunmap_atomic(se, KM_USER0);
+
+	logfs_put_write_page(page);
+}
+
+static void __get_segment_entry(struct logfs_segment_entry *se, long _target)
+{
+	struct logfs_segment_entry *target = (void *)_target;
+
+	*target = *se;
+}
+
+void logfs_get_segment_entry(struct super_block *sb, u32 segno,
+		struct logfs_segment_entry *se)
+{
+	logfs_mod_segment_entry(sb, segno, 0, __get_segment_entry, (long)se);
+}
+
+static void __set_segment_used(struct logfs_segment_entry *se, long increment)
+{
+	u32 valid;
+
+	valid = be32_to_cpu(se->valid);
+	valid += increment;
+	se->valid = cpu_to_be32(valid);
+}
+
+void logfs_set_segment_used(struct super_block *sb, u64 ofs, int increment)
+{
+	struct logfs_super *super = logfs_super(sb);
+	u32 segno = ofs >> super->s_segshift;
+
+	if (!increment)
+		return;
+
+	logfs_mod_segment_entry(sb, segno, 1, __set_segment_used, increment);
+}
+
+static void __set_segment_erased(struct logfs_segment_entry *se, long ec_level)
+{
+	se->ec_level = cpu_to_be32(ec_level);
+}
+
+void logfs_set_segment_erased(struct super_block *sb, u32 segno, u32 ec,
+		gc_level_t gc_level)
+{
+	u32 ec_level = ec << 4 | (__force u8)gc_level;
+
+	logfs_mod_segment_entry(sb, segno, 1, __set_segment_erased, ec_level);
+}
+
+static void __set_segment_reserved(struct logfs_segment_entry *se, long ignore)
+{
+	se->valid = cpu_to_be32(RESERVED);
+}
+
+void logfs_set_segment_reserved(struct super_block *sb, u32 segno)
+{
+	logfs_mod_segment_entry(sb, segno, 1, __set_segment_reserved, 0);
+}
+
+static void __set_segment_unreserved(struct logfs_segment_entry *se,
+		long ec_level)
+{
+	se->valid = 0;
+	se->ec_level = cpu_to_be32(ec_level);
+}
+
+void logfs_set_segment_unreserved(struct super_block *sb, u32 segno, u32 ec)
+{
+	u32 ec_level = ec << 4;
+
+	logfs_mod_segment_entry(sb, segno, 1, __set_segment_unreserved,
+			ec_level);
+}
+
+int __logfs_write_inode(struct inode *inode, long flags)
+{
+	struct super_block *sb = inode->i_sb;
+	int ret;
+
+	logfs_get_wblocks(sb, NULL, flags & WF_LOCK);
+	ret = do_write_inode(inode);
+	logfs_put_wblocks(sb, NULL, flags & WF_LOCK);
+	return ret;
+}
+
+static int do_delete_inode(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct inode *master_inode = logfs_super(sb)->s_master_inode;
+	struct page *page;
+	int ret;
+
+	page = logfs_get_write_page(master_inode, inode->i_ino, 0);
+	if (!page)
+		return -ENOMEM;
+
+	move_inode_to_page(page, inode);
+
+	logfs_get_wblocks(sb, page, 1);
+	ret = __logfs_delete(master_inode, page);
+	logfs_put_wblocks(sb, page, 1);
+
+	logfs_put_write_page(page);
+	return ret;
+}
+
+/*
+ * ZOMBIE inodes have already been deleted before and should remain dead,
+ * if it weren't for valid checking.  No need to kill them again here.
+ */
+void logfs_delete_inode(struct inode *inode)
+{
+	struct logfs_inode *li = logfs_inode(inode);
+
+	if (!(li->li_flags & LOGFS_IF_ZOMBIE)) {
+		li->li_flags |= LOGFS_IF_ZOMBIE;
+		if (i_size_read(inode) > 0)
+			logfs_truncate(inode, 0);
+		do_delete_inode(inode);
+	}
+	truncate_inode_pages(&inode->i_data, 0);
+	clear_inode(inode);
+}
+
+void btree_write_block(struct logfs_block *block)
+{
+	struct inode *inode;
+	struct page *page;
+	int err, cookie;
+
+	inode = logfs_safe_iget(block->sb, block->ino, &cookie);
+	page = logfs_get_write_page(inode, block->bix, block->level);
+
+	err = logfs_readpage_nolock(page);
+	BUG_ON(err);
+	BUG_ON(!PagePrivate(page));
+	BUG_ON(logfs_block(page) != block);
+	err = __logfs_write_buf(inode, page, 0);
+	BUG_ON(err);
+	BUG_ON(PagePrivate(page) || page->private);
+
+	logfs_put_write_page(page);
+	logfs_safe_iput(inode, cookie);
+}
+
+/**
+ * logfs_inode_write - write inode or dentry objects
+ *
+ * @inode:		parent inode (ifile or directory)
+ * @buf:		object to write (inode or dentry)
+ * @n:			object size
+ * @_pos:		object number (file position in blocks/objects)
+ * @flags:		write flags
+ * @lock:		0 if write lock is already taken, 1 otherwise
+ * @shadow_tree:	shadow below this inode
+ *
+ * FIXME: All caller of this put a 200-300 byte variable on the stack,
+ * only to call here and do a memcpy from that stack variable.  A good
+ * example of wasted performance and stack space.
+ */
+int logfs_inode_write(struct inode *inode, const void *buf, size_t count,
+		loff_t bix, long flags, struct shadow_tree *shadow_tree)
+{
+	loff_t pos = bix << inode->i_sb->s_blocksize_bits;
+	int err;
+	struct page *page;
+	void *pagebuf;
+
+	BUG_ON(pos & (LOGFS_BLOCKSIZE-1));
+	BUG_ON(count > LOGFS_BLOCKSIZE);
+	page = logfs_get_write_page(inode, bix, 0);
+	if (!page)
+		return -ENOMEM;
+
+	pagebuf = kmap_atomic(page, KM_USER0);
+	memcpy(pagebuf, buf, count);
+	flush_dcache_page(page);
+	kunmap_atomic(pagebuf, KM_USER0);
+
+	if (i_size_read(inode) < pos + LOGFS_BLOCKSIZE)
+		i_size_write(inode, pos + LOGFS_BLOCKSIZE);
+
+	err = logfs_write_buf(inode, page, flags);
+	logfs_put_write_page(page);
+	return err;
+}
+
+int logfs_open_segfile(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct inode *inode;
+
+	inode = logfs_read_meta_inode(sb, LOGFS_INO_SEGFILE);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+	super->s_segfile_inode = inode;
+	return 0;
+}
+
+int logfs_init_rw(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int min_fill = 3 * super->s_no_blocks;
+
+	INIT_LIST_HEAD(&super->s_object_alias);
+	mutex_init(&super->s_write_mutex);
+	super->s_block_pool = mempool_create_kmalloc_pool(min_fill,
+			sizeof(struct logfs_block));
+	super->s_shadow_pool = mempool_create_kmalloc_pool(min_fill,
+			sizeof(struct logfs_shadow));
+	return 0;
+}
+
+void logfs_cleanup_rw(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	destroy_meta_inode(super->s_segfile_inode);
+	if (super->s_block_pool)
+		mempool_destroy(super->s_block_pool);
+	if (super->s_shadow_pool)
+		mempool_destroy(super->s_shadow_pool);
+}
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
new file mode 100644
index 000000000000..5f58b74516ca
--- /dev/null
+++ b/fs/logfs/segment.c
@@ -0,0 +1,924 @@
+/*
+ * fs/logfs/segment.c	- Handling the Object Store
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ *
+ * Object store or ostore makes up the complete device with exception of
+ * the superblock and journal areas.  Apart from its own metadata it stores
+ * three kinds of objects: inodes, dentries and blocks, both data and indirect.
+ */
+#include "logfs.h"
+
+static int logfs_mark_segment_bad(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct btree_head32 *head = &super->s_reserved_segments;
+	int err;
+
+	err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
+	if (err)
+		return err;
+	logfs_super(sb)->s_bad_segments++;
+	/* FIXME: write to journal */
+	return 0;
+}
+
+int logfs_erase_segment(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	super->s_gec++;
+
+	return super->s_devops->erase(sb, (u64)segno << super->s_segshift,
+			super->s_segsize);
+}
+
+static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes)
+{
+	s32 ofs;
+
+	logfs_open_area(area, bytes);
+
+	ofs = area->a_used_bytes;
+	area->a_used_bytes += bytes;
+	BUG_ON(area->a_used_bytes >= logfs_super(area->a_sb)->s_segsize);
+
+	return dev_ofs(area->a_sb, area->a_segno, ofs);
+}
+
+static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
+		int use_filler)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	filler_t *filler = super->s_devops->readpage;
+	struct page *page;
+
+	BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS);
+	if (use_filler)
+		page = read_cache_page(mapping, index, filler, sb);
+	else {
+		page = find_or_create_page(mapping, index, GFP_NOFS);
+		unlock_page(page);
+	}
+	return page;
+}
+
+void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
+		int use_filler)
+{
+	pgoff_t index = ofs >> PAGE_SHIFT;
+	struct page *page;
+	long offset = ofs & (PAGE_SIZE-1);
+	long copylen;
+
+	/* Only logfs_wbuf_recover may use len==0 */
+	BUG_ON(!len && !use_filler);
+	do {
+		copylen = min((ulong)len, PAGE_SIZE - offset);
+
+		page = get_mapping_page(area->a_sb, index, use_filler);
+		SetPageUptodate(page);
+		BUG_ON(!page); /* FIXME: reserve a pool */
+		memcpy(page_address(page) + offset, buf, copylen);
+		SetPagePrivate(page);
+		page_cache_release(page);
+
+		buf += copylen;
+		len -= copylen;
+		offset = 0;
+		index++;
+	} while (len);
+}
+
+/*
+ * bdev_writeseg will write full pages.  Memset the tail to prevent data leaks.
+ */
+static void pad_wbuf(struct logfs_area *area, int final)
+{
+	struct super_block *sb = area->a_sb;
+	struct logfs_super *super = logfs_super(sb);
+	struct page *page;
+	u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
+	pgoff_t index = ofs >> PAGE_SHIFT;
+	long offset = ofs & (PAGE_SIZE-1);
+	u32 len = PAGE_SIZE - offset;
+
+	if (len == PAGE_SIZE) {
+		/* The math in this function can surely use some love */
+		len = 0;
+	}
+	if (len) {
+		BUG_ON(area->a_used_bytes >= super->s_segsize);
+
+		page = get_mapping_page(area->a_sb, index, 0);
+		BUG_ON(!page); /* FIXME: reserve a pool */
+		memset(page_address(page) + offset, 0xff, len);
+		SetPagePrivate(page);
+		page_cache_release(page);
+	}
+
+	if (!final)
+		return;
+
+	area->a_used_bytes += len;
+	for ( ; area->a_used_bytes < super->s_segsize;
+			area->a_used_bytes += PAGE_SIZE) {
+		/* Memset another page */
+		index++;
+		page = get_mapping_page(area->a_sb, index, 0);
+		BUG_ON(!page); /* FIXME: reserve a pool */
+		memset(page_address(page), 0xff, PAGE_SIZE);
+		SetPagePrivate(page);
+		page_cache_release(page);
+	}
+}
+
+/*
+ * We have to be careful with the alias tree.  Since lookup is done by bix,
+ * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with
+ * indirect blocks.  So always use it through accessor functions.
+ */
+static void *alias_tree_lookup(struct super_block *sb, u64 ino, u64 bix,
+		level_t level)
+{
+	struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
+	pgoff_t index = logfs_pack_index(bix, level);
+
+	return btree_lookup128(head, ino, index);
+}
+
+static int alias_tree_insert(struct super_block *sb, u64 ino, u64 bix,
+		level_t level, void *val)
+{
+	struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
+	pgoff_t index = logfs_pack_index(bix, level);
+
+	return btree_insert128(head, ino, index, val, GFP_NOFS);
+}
+
+static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
+		write_alias_t *write_one_alias)
+{
+	struct object_alias_item *item;
+	int err;
+
+	list_for_each_entry(item, &block->item_list, list) {
+		err = write_alias_journal(sb, block->ino, block->bix,
+				block->level, item->child_no, item->val);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static gc_level_t btree_block_level(struct logfs_block *block)
+{
+	return expand_level(block->ino, block->level);
+}
+
+static struct logfs_block_ops btree_block_ops = {
+	.write_block	= btree_write_block,
+	.block_level	= btree_block_level,
+	.free_block	= __free_block,
+	.write_alias	= btree_write_alias,
+};
+
+int logfs_load_object_aliases(struct super_block *sb,
+		struct logfs_obj_alias *oa, int count)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_block *block;
+	struct object_alias_item *item;
+	u64 ino, bix;
+	level_t level;
+	int i, err;
+
+	super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
+	count /= sizeof(*oa);
+	for (i = 0; i < count; i++) {
+		item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
+		if (!item)
+			return -ENOMEM;
+		memset(item, 0, sizeof(*item));
+
+		super->s_no_object_aliases++;
+		item->val = oa[i].val;
+		item->child_no = be16_to_cpu(oa[i].child_no);
+
+		ino = be64_to_cpu(oa[i].ino);
+		bix = be64_to_cpu(oa[i].bix);
+		level = LEVEL(oa[i].level);
+
+		log_aliases("logfs_load_object_aliases(%llx, %llx, %x, %x) %llx\n",
+				ino, bix, level, item->child_no,
+				be64_to_cpu(item->val));
+		block = alias_tree_lookup(sb, ino, bix, level);
+		if (!block) {
+			block = __alloc_block(sb, ino, bix, level);
+			block->ops = &btree_block_ops;
+			err = alias_tree_insert(sb, ino, bix, level, block);
+			BUG_ON(err); /* mempool empty */
+		}
+		if (test_and_set_bit(item->child_no, block->alias_map)) {
+			printk(KERN_ERR"LogFS: Alias collision detected\n");
+			return -EIO;
+		}
+		list_move_tail(&block->alias_list, &super->s_object_alias);
+		list_add(&item->list, &block->item_list);
+	}
+	return 0;
+}
+
+static void kill_alias(void *_block, unsigned long ignore0,
+		u64 ignore1, u64 ignore2, size_t ignore3)
+{
+	struct logfs_block *block = _block;
+	struct super_block *sb = block->sb;
+	struct logfs_super *super = logfs_super(sb);
+	struct object_alias_item *item;
+
+	while (!list_empty(&block->item_list)) {
+		item = list_entry(block->item_list.next, typeof(*item), list);
+		list_del(&item->list);
+		mempool_free(item, super->s_alias_pool);
+	}
+	block->ops->free_block(sb, block);
+}
+
+static int obj_type(struct inode *inode, level_t level)
+{
+	if (level == 0) {
+		if (S_ISDIR(inode->i_mode))
+			return OBJ_DENTRY;
+		if (inode->i_ino == LOGFS_INO_MASTER)
+			return OBJ_INODE;
+	}
+	return OBJ_BLOCK;
+}
+
+static int obj_len(struct super_block *sb, int obj_type)
+{
+	switch (obj_type) {
+	case OBJ_DENTRY:
+		return sizeof(struct logfs_disk_dentry);
+	case OBJ_INODE:
+		return sizeof(struct logfs_disk_inode);
+	case OBJ_BLOCK:
+		return sb->s_blocksize;
+	default:
+		BUG();
+	}
+}
+
+static int __logfs_segment_write(struct inode *inode, void *buf,
+		struct logfs_shadow *shadow, int type, int len, int compr)
+{
+	struct logfs_area *area;
+	struct super_block *sb = inode->i_sb;
+	s64 ofs;
+	struct logfs_object_header h;
+	int acc_len;
+
+	if (shadow->gc_level == 0)
+		acc_len = len;
+	else
+		acc_len = obj_len(sb, type);
+
+	area = get_area(sb, shadow->gc_level);
+	ofs = logfs_get_free_bytes(area, len + LOGFS_OBJECT_HEADERSIZE);
+	LOGFS_BUG_ON(ofs <= 0, sb);
+	/*
+	 * Order is important.  logfs_get_free_bytes(), by modifying the
+	 * segment file, may modify the content of the very page we're about
+	 * to write now.  Which is fine, as long as the calculated crc and
+	 * written data still match.  So do the modifications _before_
+	 * calculating the crc.
+	 */
+
+	h.len	= cpu_to_be16(len);
+	h.type	= type;
+	h.compr	= compr;
+	h.ino	= cpu_to_be64(inode->i_ino);
+	h.bix	= cpu_to_be64(shadow->bix);
+	h.crc	= logfs_crc32(&h, sizeof(h) - 4, 4);
+	h.data_crc = logfs_crc32(buf, len, 0);
+
+	logfs_buf_write(area, ofs, &h, sizeof(h));
+	logfs_buf_write(area, ofs + LOGFS_OBJECT_HEADERSIZE, buf, len);
+
+	shadow->new_ofs = ofs;
+	shadow->new_len = acc_len + LOGFS_OBJECT_HEADERSIZE;
+
+	return 0;
+}
+
+static s64 logfs_segment_write_compress(struct inode *inode, void *buf,
+		struct logfs_shadow *shadow, int type, int len)
+{
+	struct super_block *sb = inode->i_sb;
+	void *compressor_buf = logfs_super(sb)->s_compressed_je;
+	ssize_t compr_len;
+	int ret;
+
+	mutex_lock(&logfs_super(sb)->s_journal_mutex);
+	compr_len = logfs_compress(buf, compressor_buf, len, len);
+
+	if (compr_len >= 0) {
+		ret = __logfs_segment_write(inode, compressor_buf, shadow,
+				type, compr_len, COMPR_ZLIB);
+	} else {
+		ret = __logfs_segment_write(inode, buf, shadow, type, len,
+				COMPR_NONE);
+	}
+	mutex_unlock(&logfs_super(sb)->s_journal_mutex);
+	return ret;
+}
+
+/**
+ * logfs_segment_write - write data block to object store
+ * @inode:		inode containing data
+ *
+ * Returns an errno or zero.
+ */
+int logfs_segment_write(struct inode *inode, struct page *page,
+		struct logfs_shadow *shadow)
+{
+	struct super_block *sb = inode->i_sb;
+	struct logfs_super *super = logfs_super(sb);
+	int do_compress, type, len;
+	int ret;
+	void *buf;
+
+	BUG_ON(logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
+	do_compress = logfs_inode(inode)->li_flags & LOGFS_IF_COMPRESSED;
+	if (shadow->gc_level != 0) {
+		/* temporarily disable compression for indirect blocks */
+		do_compress = 0;
+	}
+
+	type = obj_type(inode, shrink_level(shadow->gc_level));
+	len = obj_len(sb, type);
+	buf = kmap(page);
+	if (do_compress)
+		ret = logfs_segment_write_compress(inode, buf, shadow, type,
+				len);
+	else
+		ret = __logfs_segment_write(inode, buf, shadow, type, len,
+				COMPR_NONE);
+	kunmap(page);
+
+	log_segment("logfs_segment_write(%llx, %llx, %x) %llx->%llx %x->%x\n",
+			shadow->ino, shadow->bix, shadow->gc_level,
+			shadow->old_ofs, shadow->new_ofs,
+			shadow->old_len, shadow->new_len);
+	/* this BUG_ON did catch a locking bug.  useful */
+	BUG_ON(!(shadow->new_ofs & (super->s_segsize - 1)));
+	return ret;
+}
+
+int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf)
+{
+	pgoff_t index = ofs >> PAGE_SHIFT;
+	struct page *page;
+	long offset = ofs & (PAGE_SIZE-1);
+	long copylen;
+
+	while (len) {
+		copylen = min((ulong)len, PAGE_SIZE - offset);
+
+		page = get_mapping_page(sb, index, 1);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+		memcpy(buf, page_address(page) + offset, copylen);
+		page_cache_release(page);
+
+		buf += copylen;
+		len -= copylen;
+		offset = 0;
+		index++;
+	}
+	return 0;
+}
+
+/*
+ * The "position" of indirect blocks is ambiguous.  It can be the position
+ * of any data block somewhere behind this indirect block.  So we need to
+ * normalize the positions through logfs_block_mask() before comparing.
+ */
+static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, level_t level)
+{
+	return	(pos1 & logfs_block_mask(sb, level)) !=
+		(pos2 & logfs_block_mask(sb, level));
+}
+
+#if 0
+static int read_seg_header(struct super_block *sb, u64 ofs,
+		struct logfs_segment_header *sh)
+{
+	__be32 crc;
+	int err;
+
+	err = wbuf_read(sb, ofs, sizeof(*sh), sh);
+	if (err)
+		return err;
+	crc = logfs_crc32(sh, sizeof(*sh), 4);
+	if (crc != sh->crc) {
+		printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
+				"got %x\n", ofs, be32_to_cpu(sh->crc),
+				be32_to_cpu(crc));
+		return -EIO;
+	}
+	return 0;
+}
+#endif
+
+static int read_obj_header(struct super_block *sb, u64 ofs,
+		struct logfs_object_header *oh)
+{
+	__be32 crc;
+	int err;
+
+	err = wbuf_read(sb, ofs, sizeof(*oh), oh);
+	if (err)
+		return err;
+	crc = logfs_crc32(oh, sizeof(*oh) - 4, 4);
+	if (crc != oh->crc) {
+		printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
+				"got %x\n", ofs, be32_to_cpu(oh->crc),
+				be32_to_cpu(crc));
+		return -EIO;
+	}
+	return 0;
+}
+
+static void move_btree_to_page(struct inode *inode, struct page *page,
+		__be64 *data)
+{
+	struct super_block *sb = inode->i_sb;
+	struct logfs_super *super = logfs_super(sb);
+	struct btree_head128 *head = &super->s_object_alias_tree;
+	struct logfs_block *block;
+	struct object_alias_item *item, *next;
+
+	if (!(super->s_flags & LOGFS_SB_FLAG_OBJ_ALIAS))
+		return;
+
+	block = btree_remove128(head, inode->i_ino, page->index);
+	if (!block)
+		return;
+
+	log_blockmove("move_btree_to_page(%llx, %llx, %x)\n",
+			block->ino, block->bix, block->level);
+	list_for_each_entry_safe(item, next, &block->item_list, list) {
+		data[item->child_no] = item->val;
+		list_del(&item->list);
+		mempool_free(item, super->s_alias_pool);
+	}
+	block->page = page;
+	SetPagePrivate(page);
+	page->private = (unsigned long)block;
+	block->ops = &indirect_block_ops;
+	initialize_block_counters(page, block, data, 0);
+}
+
+/*
+ * This silences a false, yet annoying gcc warning.  I hate it when my editor
+ * jumps into bitops.h each time I recompile this file.
+ * TODO: Complain to gcc folks about this and upgrade compiler.
+ */
+static unsigned long fnb(const unsigned long *addr,
+		unsigned long size, unsigned long offset)
+{
+	return find_next_bit(addr, size, offset);
+}
+
+void move_page_to_btree(struct page *page)
+{
+	struct logfs_block *block = logfs_block(page);
+	struct super_block *sb = block->sb;
+	struct logfs_super *super = logfs_super(sb);
+	struct object_alias_item *item;
+	unsigned long pos;
+	__be64 *child;
+	int err;
+
+	if (super->s_flags & LOGFS_SB_FLAG_SHUTDOWN) {
+		block->ops->free_block(sb, block);
+		return;
+	}
+	log_blockmove("move_page_to_btree(%llx, %llx, %x)\n",
+			block->ino, block->bix, block->level);
+	super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
+
+	for (pos = 0; ; pos++) {
+		pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos);
+		if (pos >= LOGFS_BLOCK_FACTOR)
+			break;
+
+		item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
+		BUG_ON(!item); /* mempool empty */
+		memset(item, 0, sizeof(*item));
+
+		child = kmap_atomic(page, KM_USER0);
+		item->val = child[pos];
+		kunmap_atomic(child, KM_USER0);
+		item->child_no = pos;
+		list_add(&item->list, &block->item_list);
+	}
+	block->page = NULL;
+	ClearPagePrivate(page);
+	page->private = 0;
+	block->ops = &btree_block_ops;
+	err = alias_tree_insert(block->sb, block->ino, block->bix, block->level,
+			block);
+	BUG_ON(err); /* mempool empty */
+	ClearPageUptodate(page);
+}
+
+static int __logfs_segment_read(struct inode *inode, void *buf,
+		u64 ofs, u64 bix, level_t level)
+{
+	struct super_block *sb = inode->i_sb;
+	void *compressor_buf = logfs_super(sb)->s_compressed_je;
+	struct logfs_object_header oh;
+	__be32 crc;
+	u16 len;
+	int err, block_len;
+
+	block_len = obj_len(sb, obj_type(inode, level));
+	err = read_obj_header(sb, ofs, &oh);
+	if (err)
+		goto out_err;
+
+	err = -EIO;
+	if (be64_to_cpu(oh.ino) != inode->i_ino
+			|| check_pos(sb, be64_to_cpu(oh.bix), bix, level)) {
+		printk(KERN_ERR"LOGFS: (ino, bix) don't match at %llx: "
+				"expected (%lx, %llx), got (%llx, %llx)\n",
+				ofs, inode->i_ino, bix,
+				be64_to_cpu(oh.ino), be64_to_cpu(oh.bix));
+		goto out_err;
+	}
+
+	len = be16_to_cpu(oh.len);
+
+	switch (oh.compr) {
+	case COMPR_NONE:
+		err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, buf);
+		if (err)
+			goto out_err;
+		crc = logfs_crc32(buf, len, 0);
+		if (crc != oh.data_crc) {
+			printk(KERN_ERR"LOGFS: uncompressed data crc error at "
+					"%llx: expected %x, got %x\n", ofs,
+					be32_to_cpu(oh.data_crc),
+					be32_to_cpu(crc));
+			goto out_err;
+		}
+		break;
+	case COMPR_ZLIB:
+		mutex_lock(&logfs_super(sb)->s_journal_mutex);
+		err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len,
+				compressor_buf);
+		if (err) {
+			mutex_unlock(&logfs_super(sb)->s_journal_mutex);
+			goto out_err;
+		}
+		crc = logfs_crc32(compressor_buf, len, 0);
+		if (crc != oh.data_crc) {
+			printk(KERN_ERR"LOGFS: compressed data crc error at "
+					"%llx: expected %x, got %x\n", ofs,
+					be32_to_cpu(oh.data_crc),
+					be32_to_cpu(crc));
+			mutex_unlock(&logfs_super(sb)->s_journal_mutex);
+			goto out_err;
+		}
+		err = logfs_uncompress(compressor_buf, buf, len, block_len);
+		mutex_unlock(&logfs_super(sb)->s_journal_mutex);
+		if (err) {
+			printk(KERN_ERR"LOGFS: uncompress error at %llx\n", ofs);
+			goto out_err;
+		}
+		break;
+	default:
+		LOGFS_BUG(sb);
+		err = -EIO;
+		goto out_err;
+	}
+	return 0;
+
+out_err:
+	logfs_set_ro(sb);
+	printk(KERN_ERR"LOGFS: device is read-only now\n");
+	LOGFS_BUG(sb);
+	return err;
+}
+
+/**
+ * logfs_segment_read - read data block from object store
+ * @inode:		inode containing data
+ * @buf:		data buffer
+ * @ofs:		physical data offset
+ * @bix:		block index
+ * @level:		block level
+ *
+ * Returns 0 on success or a negative errno.
+ */
+int logfs_segment_read(struct inode *inode, struct page *page,
+		u64 ofs, u64 bix, level_t level)
+{
+	int err;
+	void *buf;
+
+	if (PageUptodate(page))
+		return 0;
+
+	ofs &= ~LOGFS_FULLY_POPULATED;
+
+	buf = kmap(page);
+	err = __logfs_segment_read(inode, buf, ofs, bix, level);
+	if (!err) {
+		move_btree_to_page(inode, page, buf);
+		SetPageUptodate(page);
+	}
+	kunmap(page);
+	log_segment("logfs_segment_read(%lx, %llx, %x) %llx (%d)\n",
+			inode->i_ino, bix, level, ofs, err);
+	return err;
+}
+
+int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow)
+{
+	struct super_block *sb = inode->i_sb;
+	struct logfs_object_header h;
+	u16 len;
+	int err;
+
+	BUG_ON(logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
+	BUG_ON(shadow->old_ofs & LOGFS_FULLY_POPULATED);
+	if (!shadow->old_ofs)
+		return 0;
+
+	log_segment("logfs_segment_delete(%llx, %llx, %x) %llx->%llx %x->%x\n",
+			shadow->ino, shadow->bix, shadow->gc_level,
+			shadow->old_ofs, shadow->new_ofs,
+			shadow->old_len, shadow->new_len);
+	err = read_obj_header(sb, shadow->old_ofs, &h);
+	LOGFS_BUG_ON(err, sb);
+	LOGFS_BUG_ON(be64_to_cpu(h.ino) != inode->i_ino, sb);
+	LOGFS_BUG_ON(check_pos(sb, shadow->bix, be64_to_cpu(h.bix),
+				shrink_level(shadow->gc_level)), sb);
+
+	if (shadow->gc_level == 0)
+		len = be16_to_cpu(h.len);
+	else
+		len = obj_len(sb, h.type);
+	shadow->old_len = len + sizeof(h);
+	return 0;
+}
+
+static void freeseg(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping = super->s_mapping_inode->i_mapping;
+	struct page *page;
+	u64 ofs, start, end;
+
+	start = dev_ofs(sb, segno, 0);
+	end = dev_ofs(sb, segno + 1, 0);
+	for (ofs = start; ofs < end; ofs += PAGE_SIZE) {
+		page = find_get_page(mapping, ofs >> PAGE_SHIFT);
+		if (!page)
+			continue;
+		ClearPagePrivate(page);
+		page_cache_release(page);
+	}
+}
+
+int logfs_open_area(struct logfs_area *area, size_t bytes)
+{
+	struct super_block *sb = area->a_sb;
+	struct logfs_super *super = logfs_super(sb);
+	int err, closed = 0;
+
+	if (area->a_is_open && area->a_used_bytes + bytes <= super->s_segsize)
+		return 0;
+
+	if (area->a_is_open) {
+		u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
+		u32 len = super->s_segsize - area->a_written_bytes;
+
+		log_gc("logfs_close_area(%x)\n", area->a_segno);
+		pad_wbuf(area, 1);
+		super->s_devops->writeseg(area->a_sb, ofs, len);
+		freeseg(sb, area->a_segno);
+		closed = 1;
+	}
+
+	area->a_used_bytes = 0;
+	area->a_written_bytes = 0;
+again:
+	area->a_ops->get_free_segment(area);
+	area->a_ops->get_erase_count(area);
+
+	log_gc("logfs_open_area(%x, %x)\n", area->a_segno, area->a_level);
+	err = area->a_ops->erase_segment(area);
+	if (err) {
+		printk(KERN_WARNING "LogFS: Error erasing segment %x\n",
+				area->a_segno);
+		logfs_mark_segment_bad(sb, area->a_segno);
+		goto again;
+	}
+	area->a_is_open = 1;
+	return closed;
+}
+
+void logfs_sync_area(struct logfs_area *area)
+{
+	struct super_block *sb = area->a_sb;
+	struct logfs_super *super = logfs_super(sb);
+	u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
+	u32 len = (area->a_used_bytes - area->a_written_bytes);
+
+	if (super->s_writesize)
+		len &= ~(super->s_writesize - 1);
+	if (len == 0)
+		return;
+	pad_wbuf(area, 0);
+	super->s_devops->writeseg(sb, ofs, len);
+	area->a_written_bytes += len;
+}
+
+void logfs_sync_segments(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int i;
+
+	for_each_area(i)
+		logfs_sync_area(super->s_area[i]);
+}
+
+/*
+ * Pick a free segment to be used for this area.  Effectively takes a
+ * candidate from the free list (not really a candidate anymore).
+ */
+static void ostore_get_free_segment(struct logfs_area *area)
+{
+	struct super_block *sb = area->a_sb;
+	struct logfs_super *super = logfs_super(sb);
+
+	if (super->s_free_list.count == 0) {
+		printk(KERN_ERR"LOGFS: ran out of free segments\n");
+		LOGFS_BUG(sb);
+	}
+
+	area->a_segno = get_best_cand(sb, &super->s_free_list, NULL);
+}
+
+static void ostore_get_erase_count(struct logfs_area *area)
+{
+	struct logfs_segment_entry se;
+	u32 ec_level;
+
+	logfs_get_segment_entry(area->a_sb, area->a_segno, &se);
+	BUG_ON(se.ec_level == cpu_to_be32(BADSEG) ||
+			se.valid == cpu_to_be32(RESERVED));
+
+	ec_level = be32_to_cpu(se.ec_level);
+	area->a_erase_count = (ec_level >> 4) + 1;
+}
+
+static int ostore_erase_segment(struct logfs_area *area)
+{
+	struct super_block *sb = area->a_sb;
+	struct logfs_segment_header sh;
+	u64 ofs;
+	int err;
+
+	err = logfs_erase_segment(sb, area->a_segno);
+	if (err)
+		return err;
+
+	sh.pad = 0;
+	sh.type = SEG_OSTORE;
+	sh.level = (__force u8)area->a_level;
+	sh.segno = cpu_to_be32(area->a_segno);
+	sh.ec = cpu_to_be32(area->a_erase_count);
+	sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
+	sh.crc = logfs_crc32(&sh, sizeof(sh), 4);
+
+	logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count,
+			area->a_level);
+
+	ofs = dev_ofs(sb, area->a_segno, 0);
+	area->a_used_bytes = sizeof(sh);
+	logfs_buf_write(area, ofs, &sh, sizeof(sh));
+	return 0;
+}
+
+static const struct logfs_area_ops ostore_area_ops = {
+	.get_free_segment	= ostore_get_free_segment,
+	.get_erase_count	= ostore_get_erase_count,
+	.erase_segment		= ostore_erase_segment,
+};
+
+static void free_area(struct logfs_area *area)
+{
+	if (area)
+		freeseg(area->a_sb, area->a_segno);
+	kfree(area);
+}
+
+static struct logfs_area *alloc_area(struct super_block *sb)
+{
+	struct logfs_area *area;
+
+	area = kzalloc(sizeof(*area), GFP_KERNEL);
+	if (!area)
+		return NULL;
+
+	area->a_sb = sb;
+	return area;
+}
+
+static void map_invalidatepage(struct page *page, unsigned long l)
+{
+	BUG();
+}
+
+static int map_releasepage(struct page *page, gfp_t g)
+{
+	/* Don't release these pages */
+	return 0;
+}
+
+static const struct address_space_operations mapping_aops = {
+	.invalidatepage = map_invalidatepage,
+	.releasepage	= map_releasepage,
+	.set_page_dirty = __set_page_dirty_nobuffers,
+};
+
+int logfs_init_mapping(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct address_space *mapping;
+	struct inode *inode;
+
+	inode = logfs_new_meta_inode(sb, LOGFS_INO_MAPPING);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+	super->s_mapping_inode = inode;
+	mapping = inode->i_mapping;
+	mapping->a_ops = &mapping_aops;
+	/* Would it be possible to use __GFP_HIGHMEM as well? */
+	mapping_set_gfp_mask(mapping, GFP_NOFS);
+	return 0;
+}
+
+int logfs_init_areas(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int i = -1;
+
+	super->s_alias_pool = mempool_create_kmalloc_pool(600,
+			sizeof(struct object_alias_item));
+	if (!super->s_alias_pool)
+		return -ENOMEM;
+
+	super->s_journal_area = alloc_area(sb);
+	if (!super->s_journal_area)
+		goto err;
+
+	for_each_area(i) {
+		super->s_area[i] = alloc_area(sb);
+		if (!super->s_area[i])
+			goto err;
+		super->s_area[i]->a_level = GC_LEVEL(i);
+		super->s_area[i]->a_ops = &ostore_area_ops;
+	}
+	btree_init_mempool128(&super->s_object_alias_tree,
+			super->s_btree_pool);
+	return 0;
+
+err:
+	for (i--; i >= 0; i--)
+		free_area(super->s_area[i]);
+	free_area(super->s_journal_area);
+	mempool_destroy(super->s_alias_pool);
+	return -ENOMEM;
+}
+
+void logfs_cleanup_areas(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int i;
+
+	btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias);
+	for_each_area(i)
+		free_area(super->s_area[i]);
+	free_area(super->s_journal_area);
+	destroy_meta_inode(super->s_mapping_inode);
+}
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
new file mode 100644
index 000000000000..d128a2c1c8d1
--- /dev/null
+++ b/fs/logfs/super.c
@@ -0,0 +1,634 @@
+/*
+ * fs/logfs/super.c
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ *
+ * Generally contains mount/umount code and also serves as a dump area for
+ * any functions that don't fit elsewhere and neither justify a file of their
+ * own.
+ */
+#include "logfs.h"
+#include <linux/bio.h>
+#include <linux/mtd/mtd.h>
+#include <linux/statfs.h>
+#include <linux/buffer_head.h>
+
+static DEFINE_MUTEX(emergency_mutex);
+static struct page *emergency_page;
+
+struct page *emergency_read_begin(struct address_space *mapping, pgoff_t index)
+{
+	filler_t *filler = (filler_t *)mapping->a_ops->readpage;
+	struct page *page;
+	int err;
+
+	page = read_cache_page(mapping, index, filler, NULL);
+	if (page)
+		return page;
+
+	/* No more pages available, switch to emergency page */
+	printk(KERN_INFO"Logfs: Using emergency page\n");
+	mutex_lock(&emergency_mutex);
+	err = filler(NULL, emergency_page);
+	if (err) {
+		mutex_unlock(&emergency_mutex);
+		printk(KERN_EMERG"Logfs: Error reading emergency page\n");
+		return ERR_PTR(err);
+	}
+	return emergency_page;
+}
+
+void emergency_read_end(struct page *page)
+{
+	if (page == emergency_page)
+		mutex_unlock(&emergency_mutex);
+	else
+		page_cache_release(page);
+}
+
+static void dump_segfile(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_segment_entry se;
+	u32 segno;
+
+	for (segno = 0; segno < super->s_no_segs; segno++) {
+		logfs_get_segment_entry(sb, segno, &se);
+		printk("%3x: %6x %8x", segno, be32_to_cpu(se.ec_level),
+				be32_to_cpu(se.valid));
+		if (++segno < super->s_no_segs) {
+			logfs_get_segment_entry(sb, segno, &se);
+			printk(" %6x %8x", be32_to_cpu(se.ec_level),
+					be32_to_cpu(se.valid));
+		}
+		if (++segno < super->s_no_segs) {
+			logfs_get_segment_entry(sb, segno, &se);
+			printk(" %6x %8x", be32_to_cpu(se.ec_level),
+					be32_to_cpu(se.valid));
+		}
+		if (++segno < super->s_no_segs) {
+			logfs_get_segment_entry(sb, segno, &se);
+			printk(" %6x %8x", be32_to_cpu(se.ec_level),
+					be32_to_cpu(se.valid));
+		}
+		printk("\n");
+	}
+}
+
+/*
+ * logfs_crash_dump - dump debug information to device
+ *
+ * The LogFS superblock only occupies part of a segment.  This function will
+ * write as much debug information as it can gather into the spare space.
+ */
+void logfs_crash_dump(struct super_block *sb)
+{
+	dump_segfile(sb);
+}
+
+/*
+ * TODO: move to lib/string.c
+ */
+/**
+ * memchr_inv - Find a character in an area of memory.
+ * @s: The memory area
+ * @c: The byte to search for
+ * @n: The size of the area.
+ *
+ * returns the address of the first character other than @c, or %NULL
+ * if the whole buffer contains just @c.
+ */
+void *memchr_inv(const void *s, int c, size_t n)
+{
+	const unsigned char *p = s;
+	while (n-- != 0)
+		if ((unsigned char)c != *p++)
+			return (void *)(p - 1);
+
+	return NULL;
+}
+
+/*
+ * FIXME: There should be a reserve for root, similar to ext2.
+ */
+int logfs_statfs(struct dentry *dentry, struct kstatfs *stats)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct logfs_super *super = logfs_super(sb);
+
+	stats->f_type		= LOGFS_MAGIC_U32;
+	stats->f_bsize		= sb->s_blocksize;
+	stats->f_blocks		= super->s_size >> LOGFS_BLOCK_BITS >> 3;
+	stats->f_bfree		= super->s_free_bytes >> sb->s_blocksize_bits;
+	stats->f_bavail		= super->s_free_bytes >> sb->s_blocksize_bits;
+	stats->f_files		= 0;
+	stats->f_ffree		= 0;
+	stats->f_namelen	= LOGFS_MAX_NAMELEN;
+	return 0;
+}
+
+static int logfs_sb_set(struct super_block *sb, void *_super)
+{
+	struct logfs_super *super = _super;
+
+	sb->s_fs_info = super;
+	sb->s_mtd = super->s_mtd;
+	sb->s_bdev = super->s_bdev;
+	return 0;
+}
+
+static int logfs_sb_test(struct super_block *sb, void *_super)
+{
+	struct logfs_super *super = _super;
+	struct mtd_info *mtd = super->s_mtd;
+
+	if (mtd && sb->s_mtd == mtd)
+		return 1;
+	if (super->s_bdev && sb->s_bdev == super->s_bdev)
+		return 1;
+	return 0;
+}
+
+static void set_segment_header(struct logfs_segment_header *sh, u8 type,
+		u8 level, u32 segno, u32 ec)
+{
+	sh->pad = 0;
+	sh->type = type;
+	sh->level = level;
+	sh->segno = cpu_to_be32(segno);
+	sh->ec = cpu_to_be32(ec);
+	sh->gec = cpu_to_be64(segno);
+	sh->crc = logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4);
+}
+
+static void logfs_write_ds(struct super_block *sb, struct logfs_disk_super *ds,
+		u32 segno, u32 ec)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_segment_header *sh = &ds->ds_sh;
+	int i;
+
+	memset(ds, 0, sizeof(*ds));
+	set_segment_header(sh, SEG_SUPER, 0, segno, ec);
+
+	ds->ds_ifile_levels	= super->s_ifile_levels;
+	ds->ds_iblock_levels	= super->s_iblock_levels;
+	ds->ds_data_levels	= super->s_data_levels; /* XXX: Remove */
+	ds->ds_segment_shift	= super->s_segshift;
+	ds->ds_block_shift	= sb->s_blocksize_bits;
+	ds->ds_write_shift	= super->s_writeshift;
+	ds->ds_filesystem_size	= cpu_to_be64(super->s_size);
+	ds->ds_segment_size	= cpu_to_be32(super->s_segsize);
+	ds->ds_bad_seg_reserve	= cpu_to_be32(super->s_bad_seg_reserve);
+	ds->ds_feature_incompat	= cpu_to_be64(super->s_feature_incompat);
+	ds->ds_feature_ro_compat= cpu_to_be64(super->s_feature_ro_compat);
+	ds->ds_feature_compat	= cpu_to_be64(super->s_feature_compat);
+	ds->ds_feature_flags	= cpu_to_be64(super->s_feature_flags);
+	ds->ds_root_reserve	= cpu_to_be64(super->s_root_reserve);
+	ds->ds_speed_reserve	= cpu_to_be64(super->s_speed_reserve);
+	journal_for_each(i)
+		ds->ds_journal_seg[i] = cpu_to_be32(super->s_journal_seg[i]);
+	ds->ds_magic		= cpu_to_be64(LOGFS_MAGIC);
+	ds->ds_crc = logfs_crc32(ds, sizeof(*ds),
+			LOGFS_SEGMENT_HEADERSIZE + 12);
+}
+
+static int write_one_sb(struct super_block *sb,
+		struct page *(*find_sb)(struct super_block *sb, u64 *ofs))
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_disk_super *ds;
+	struct logfs_segment_entry se;
+	struct page *page;
+	u64 ofs;
+	u32 ec, segno;
+	int err;
+
+	page = find_sb(sb, &ofs);
+	if (!page)
+		return -EIO;
+	ds = page_address(page);
+	segno = seg_no(sb, ofs);
+	logfs_get_segment_entry(sb, segno, &se);
+	ec = be32_to_cpu(se.ec_level) >> 4;
+	ec++;
+	logfs_set_segment_erased(sb, segno, ec, 0);
+	logfs_write_ds(sb, ds, segno, ec);
+	err = super->s_devops->write_sb(sb, page);
+	page_cache_release(page);
+	return err;
+}
+
+int logfs_write_sb(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int err;
+
+	/* First superblock */
+	err = write_one_sb(sb, super->s_devops->find_first_sb);
+	if (err)
+		return err;
+
+	/* Last superblock */
+	err = write_one_sb(sb, super->s_devops->find_last_sb);
+	if (err)
+		return err;
+	return 0;
+}
+
+static int ds_cmp(const void *ds0, const void *ds1)
+{
+	size_t len = sizeof(struct logfs_disk_super);
+
+	/* We know the segment headers differ, so ignore them */
+	len -= LOGFS_SEGMENT_HEADERSIZE;
+	ds0 += LOGFS_SEGMENT_HEADERSIZE;
+	ds1 += LOGFS_SEGMENT_HEADERSIZE;
+	return memcmp(ds0, ds1, len);
+}
+
+static int logfs_recover_sb(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_disk_super _ds0, *ds0 = &_ds0;
+	struct logfs_disk_super _ds1, *ds1 = &_ds1;
+	int err, valid0, valid1;
+
+	/* read first superblock */
+	err = wbuf_read(sb, super->s_sb_ofs[0], sizeof(*ds0), ds0);
+	if (err)
+		return err;
+	/* read last superblock */
+	err = wbuf_read(sb, super->s_sb_ofs[1], sizeof(*ds1), ds1);
+	if (err)
+		return err;
+	valid0 = logfs_check_ds(ds0) == 0;
+	valid1 = logfs_check_ds(ds1) == 0;
+
+	if (!valid0 && valid1) {
+		printk(KERN_INFO"First superblock is invalid - fixing.\n");
+		return write_one_sb(sb, super->s_devops->find_first_sb);
+	}
+	if (valid0 && !valid1) {
+		printk(KERN_INFO"Last superblock is invalid - fixing.\n");
+		return write_one_sb(sb, super->s_devops->find_last_sb);
+	}
+	if (valid0 && valid1 && ds_cmp(ds0, ds1)) {
+		printk(KERN_INFO"Superblocks don't match - fixing.\n");
+		return write_one_sb(sb, super->s_devops->find_last_sb);
+	}
+	/* If neither is valid now, something's wrong.  Didn't we properly
+	 * check them before?!? */
+	BUG_ON(!valid0 && !valid1);
+	return 0;
+}
+
+static int logfs_make_writeable(struct super_block *sb)
+{
+	int err;
+
+	/* Repair any broken superblock copies */
+	err = logfs_recover_sb(sb);
+	if (err)
+		return err;
+
+	/* Check areas for trailing unaccounted data */
+	err = logfs_check_areas(sb);
+	if (err)
+		return err;
+
+	err = logfs_open_segfile(sb);
+	if (err)
+		return err;
+
+	/* Do one GC pass before any data gets dirtied */
+	logfs_gc_pass(sb);
+
+	/* after all initializations are done, replay the journal
+	 * for rw-mounts, if necessary */
+	err = logfs_replay_journal(sb);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
+{
+	struct inode *rootdir;
+	int err;
+
+	/* root dir */
+	rootdir = logfs_iget(sb, LOGFS_INO_ROOT);
+	if (IS_ERR(rootdir))
+		goto fail;
+
+	sb->s_root = d_alloc_root(rootdir);
+	if (!sb->s_root)
+		goto fail;
+
+	/* FIXME: check for read-only mounts */
+	err = logfs_make_writeable(sb);
+	if (err)
+		goto fail2;
+
+	log_super("LogFS: Finished mounting\n");
+	simple_set_mnt(mnt, sb);
+	return 0;
+
+fail2:
+	iput(rootdir);
+fail:
+	iput(logfs_super(sb)->s_master_inode);
+	return -EIO;
+}
+
+int logfs_check_ds(struct logfs_disk_super *ds)
+{
+	struct logfs_segment_header *sh = &ds->ds_sh;
+
+	if (ds->ds_magic != cpu_to_be64(LOGFS_MAGIC))
+		return -EINVAL;
+	if (sh->crc != logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4))
+		return -EINVAL;
+	if (ds->ds_crc != logfs_crc32(ds, sizeof(*ds),
+				LOGFS_SEGMENT_HEADERSIZE + 12))
+		return -EINVAL;
+	return 0;
+}
+
+static struct page *find_super_block(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct page *first, *last;
+
+	first = super->s_devops->find_first_sb(sb, &super->s_sb_ofs[0]);
+	if (!first || IS_ERR(first))
+		return NULL;
+	last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]);
+	if (!last || IS_ERR(first)) {
+		page_cache_release(first);
+		return NULL;
+	}
+
+	if (!logfs_check_ds(page_address(first))) {
+		page_cache_release(last);
+		return first;
+	}
+
+	/* First one didn't work, try the second superblock */
+	if (!logfs_check_ds(page_address(last))) {
+		page_cache_release(first);
+		return last;
+	}
+
+	/* Neither worked, sorry folks */
+	page_cache_release(first);
+	page_cache_release(last);
+	return NULL;
+}
+
+static int __logfs_read_sb(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct page *page;
+	struct logfs_disk_super *ds;
+	int i;
+
+	page = find_super_block(sb);
+	if (!page)
+		return -EIO;
+
+	ds = page_address(page);
+	super->s_size = be64_to_cpu(ds->ds_filesystem_size);
+	super->s_root_reserve = be64_to_cpu(ds->ds_root_reserve);
+	super->s_speed_reserve = be64_to_cpu(ds->ds_speed_reserve);
+	super->s_bad_seg_reserve = be32_to_cpu(ds->ds_bad_seg_reserve);
+	super->s_segsize = 1 << ds->ds_segment_shift;
+	super->s_segmask = (1 << ds->ds_segment_shift) - 1;
+	super->s_segshift = ds->ds_segment_shift;
+	sb->s_blocksize = 1 << ds->ds_block_shift;
+	sb->s_blocksize_bits = ds->ds_block_shift;
+	super->s_writesize = 1 << ds->ds_write_shift;
+	super->s_writeshift = ds->ds_write_shift;
+	super->s_no_segs = super->s_size >> super->s_segshift;
+	super->s_no_blocks = super->s_segsize >> sb->s_blocksize_bits;
+	super->s_feature_incompat = be64_to_cpu(ds->ds_feature_incompat);
+	super->s_feature_ro_compat = be64_to_cpu(ds->ds_feature_ro_compat);
+	super->s_feature_compat = be64_to_cpu(ds->ds_feature_compat);
+	super->s_feature_flags = be64_to_cpu(ds->ds_feature_flags);
+
+	journal_for_each(i)
+		super->s_journal_seg[i] = be32_to_cpu(ds->ds_journal_seg[i]);
+
+	super->s_ifile_levels = ds->ds_ifile_levels;
+	super->s_iblock_levels = ds->ds_iblock_levels;
+	super->s_data_levels = ds->ds_data_levels;
+	super->s_total_levels = super->s_ifile_levels + super->s_iblock_levels
+		+ super->s_data_levels;
+	page_cache_release(page);
+	return 0;
+}
+
+static int logfs_read_sb(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int ret;
+
+	super->s_btree_pool = mempool_create(32, btree_alloc, btree_free, NULL);
+	if (!super->s_btree_pool)
+		return -ENOMEM;
+
+	btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool);
+	btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool);
+
+	ret = logfs_init_mapping(sb);
+	if (ret)
+		return ret;
+
+	ret = __logfs_read_sb(sb);
+	if (ret)
+		return ret;
+
+	mutex_init(&super->s_dirop_mutex);
+	mutex_init(&super->s_object_alias_mutex);
+	INIT_LIST_HEAD(&super->s_freeing_list);
+
+	ret = logfs_init_rw(sb);
+	if (ret)
+		return ret;
+
+	ret = logfs_init_areas(sb);
+	if (ret)
+		return ret;
+
+	ret = logfs_init_gc(sb);
+	if (ret)
+		return ret;
+
+	ret = logfs_init_journal(sb);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void logfs_kill_sb(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+
+	log_super("LogFS: Start unmounting\n");
+	/* Alias entries slow down mount, so evict as many as possible */
+	sync_filesystem(sb);
+	logfs_write_anchor(super->s_master_inode);
+
+	/*
+	 * From this point on alias entries are simply dropped - and any
+	 * writes to the object store are considered bugs.
+	 */
+	super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN;
+	log_super("LogFS: Now in shutdown\n");
+	generic_shutdown_super(sb);
+
+	BUG_ON(super->s_dirty_used_bytes || super->s_dirty_free_bytes);
+
+	logfs_cleanup_gc(sb);
+	logfs_cleanup_journal(sb);
+	logfs_cleanup_areas(sb);
+	logfs_cleanup_rw(sb);
+	super->s_devops->put_device(sb);
+	mempool_destroy(super->s_btree_pool);
+	mempool_destroy(super->s_alias_pool);
+	kfree(super);
+	log_super("LogFS: Finished unmounting\n");
+}
+
+int logfs_get_sb_device(struct file_system_type *type, int flags,
+		struct mtd_info *mtd, struct block_device *bdev,
+		const struct logfs_device_ops *devops, struct vfsmount *mnt)
+{
+	struct logfs_super *super;
+	struct super_block *sb;
+	int err = -ENOMEM;
+	static int mount_count;
+
+	log_super("LogFS: Start mount %x\n", mount_count++);
+	super = kzalloc(sizeof(*super), GFP_KERNEL);
+	if (!super)
+		goto err0;
+
+	super->s_mtd	= mtd;
+	super->s_bdev	= bdev;
+	err = -EINVAL;
+	sb = sget(type, logfs_sb_test, logfs_sb_set, super);
+	if (IS_ERR(sb))
+		goto err0;
+
+	if (sb->s_root) {
+		/* Device is already in use */
+		err = 0;
+		simple_set_mnt(mnt, sb);
+		goto err0;
+	}
+
+	super->s_devops = devops;
+
+	/*
+	 * sb->s_maxbytes is limited to 8TB.  On 32bit systems, the page cache
+	 * only covers 16TB and the upper 8TB are used for indirect blocks.
+	 * On 64bit system we could bump up the limit, but that would make
+	 * the filesystem incompatible with 32bit systems.
+	 */
+	sb->s_maxbytes	= (1ull << 43) - 1;
+	sb->s_op	= &logfs_super_operations;
+	sb->s_flags	= flags | MS_NOATIME;
+
+	err = logfs_read_sb(sb);
+	if (err)
+		goto err1;
+
+	sb->s_flags |= MS_ACTIVE;
+	err = logfs_get_sb_final(sb, mnt);
+	if (err)
+		goto err1;
+	return 0;
+
+err1:
+	up_write(&sb->s_umount);
+	deactivate_super(sb);
+	return err;
+err0:
+	kfree(super);
+	//devops->put_device(sb);
+	return err;
+}
+
+static int logfs_get_sb(struct file_system_type *type, int flags,
+		const char *devname, void *data, struct vfsmount *mnt)
+{
+	ulong mtdnr;
+
+	if (!devname)
+		return logfs_get_sb_bdev(type, flags, devname, mnt);
+	if (strncmp(devname, "mtd", 3))
+		return logfs_get_sb_bdev(type, flags, devname, mnt);
+
+	{
+		char *garbage;
+		mtdnr = simple_strtoul(devname+3, &garbage, 0);
+		if (*garbage)
+			return -EINVAL;
+	}
+
+	return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
+}
+
+static struct file_system_type logfs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "logfs",
+	.get_sb		= logfs_get_sb,
+	.kill_sb	= logfs_kill_sb,
+	.fs_flags	= FS_REQUIRES_DEV,
+
+};
+
+static int __init logfs_init(void)
+{
+	int ret;
+
+	emergency_page = alloc_pages(GFP_KERNEL, 0);
+	if (!emergency_page)
+		return -ENOMEM;
+
+	ret = logfs_compr_init();
+	if (ret)
+		goto out1;
+
+	ret = logfs_init_inode_cache();
+	if (ret)
+		goto out2;
+
+	return register_filesystem(&logfs_fs_type);
+out2:
+	logfs_compr_exit();
+out1:
+	__free_pages(emergency_page, 0);
+	return ret;
+}
+
+static void __exit logfs_exit(void)
+{
+	unregister_filesystem(&logfs_fs_type);
+	logfs_destroy_inode_cache();
+	logfs_compr_exit();
+	__free_pages(emergency_page, 0);
+}
+
+module_init(logfs_init);
+module_exit(logfs_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
+MODULE_DESCRIPTION("scalable flash filesystem");
diff --git a/include/linux/btree-128.h b/include/linux/btree-128.h
new file mode 100644
index 000000000000..0b3414c4c928
--- /dev/null
+++ b/include/linux/btree-128.h
@@ -0,0 +1,109 @@
+extern struct btree_geo btree_geo128;
+
+struct btree_head128 { struct btree_head h; };
+
+static inline void btree_init_mempool128(struct btree_head128 *head,
+					 mempool_t *mempool)
+{
+	btree_init_mempool(&head->h, mempool);
+}
+
+static inline int btree_init128(struct btree_head128 *head)
+{
+	return btree_init(&head->h);
+}
+
+static inline void btree_destroy128(struct btree_head128 *head)
+{
+	btree_destroy(&head->h);
+}
+
+static inline void *btree_lookup128(struct btree_head128 *head, u64 k1, u64 k2)
+{
+	u64 key[2] = {k1, k2};
+	return btree_lookup(&head->h, &btree_geo128, (unsigned long *)&key);
+}
+
+static inline void *btree_get_prev128(struct btree_head128 *head,
+				      u64 *k1, u64 *k2)
+{
+	u64 key[2] = {*k1, *k2};
+	void *val;
+
+	val = btree_get_prev(&head->h, &btree_geo128,
+			     (unsigned long *)&key);
+	*k1 = key[0];
+	*k2 = key[1];
+	return val;
+}
+
+static inline int btree_insert128(struct btree_head128 *head, u64 k1, u64 k2,
+				  void *val, gfp_t gfp)
+{
+	u64 key[2] = {k1, k2};
+	return btree_insert(&head->h, &btree_geo128,
+			    (unsigned long *)&key, val, gfp);
+}
+
+static inline int btree_update128(struct btree_head128 *head, u64 k1, u64 k2,
+				  void *val)
+{
+	u64 key[2] = {k1, k2};
+	return btree_update(&head->h, &btree_geo128,
+			    (unsigned long *)&key, val);
+}
+
+static inline void *btree_remove128(struct btree_head128 *head, u64 k1, u64 k2)
+{
+	u64 key[2] = {k1, k2};
+	return btree_remove(&head->h, &btree_geo128, (unsigned long *)&key);
+}
+
+static inline void *btree_last128(struct btree_head128 *head, u64 *k1, u64 *k2)
+{
+	u64 key[2];
+	void *val;
+
+	val = btree_last(&head->h, &btree_geo128, (unsigned long *)&key[0]);
+	if (val) {
+		*k1 = key[0];
+		*k2 = key[1];
+	}
+
+	return val;
+}
+
+static inline int btree_merge128(struct btree_head128 *target,
+				 struct btree_head128 *victim,
+				 gfp_t gfp)
+{
+	return btree_merge(&target->h, &victim->h, &btree_geo128, gfp);
+}
+
+void visitor128(void *elem, unsigned long opaque, unsigned long *__key,
+		size_t index, void *__func);
+
+typedef void (*visitor128_t)(void *elem, unsigned long opaque,
+			     u64 key1, u64 key2, size_t index);
+
+static inline size_t btree_visitor128(struct btree_head128 *head,
+				      unsigned long opaque,
+				      visitor128_t func2)
+{
+	return btree_visitor(&head->h, &btree_geo128, opaque,
+			     visitor128, func2);
+}
+
+static inline size_t btree_grim_visitor128(struct btree_head128 *head,
+					   unsigned long opaque,
+					   visitor128_t func2)
+{
+	return btree_grim_visitor(&head->h, &btree_geo128, opaque,
+				  visitor128, func2);
+}
+
+#define btree_for_each_safe128(head, k1, k2, val)	\
+	for (val = btree_last128(head, &k1, &k2);	\
+	     val;					\
+	     val = btree_get_prev128(head, &k1, &k2))
+
diff --git a/include/linux/btree-type.h b/include/linux/btree-type.h
new file mode 100644
index 000000000000..9a1147ef8563
--- /dev/null
+++ b/include/linux/btree-type.h
@@ -0,0 +1,147 @@
+#define __BTREE_TP(pfx, type, sfx)	pfx ## type ## sfx
+#define _BTREE_TP(pfx, type, sfx)	__BTREE_TP(pfx, type, sfx)
+#define BTREE_TP(pfx)			_BTREE_TP(pfx, BTREE_TYPE_SUFFIX,)
+#define BTREE_FN(name)			BTREE_TP(btree_ ## name)
+#define BTREE_TYPE_HEAD			BTREE_TP(struct btree_head)
+#define VISITOR_FN			BTREE_TP(visitor)
+#define VISITOR_FN_T			_BTREE_TP(visitor, BTREE_TYPE_SUFFIX, _t)
+
+BTREE_TYPE_HEAD {
+	struct btree_head h;
+};
+
+static inline void BTREE_FN(init_mempool)(BTREE_TYPE_HEAD *head,
+					  mempool_t *mempool)
+{
+	btree_init_mempool(&head->h, mempool);
+}
+
+static inline int BTREE_FN(init)(BTREE_TYPE_HEAD *head)
+{
+	return btree_init(&head->h);
+}
+
+static inline void BTREE_FN(destroy)(BTREE_TYPE_HEAD *head)
+{
+	btree_destroy(&head->h);
+}
+
+static inline int BTREE_FN(merge)(BTREE_TYPE_HEAD *target,
+				  BTREE_TYPE_HEAD *victim,
+				  gfp_t gfp)
+{
+	return btree_merge(&target->h, &victim->h, BTREE_TYPE_GEO, gfp);
+}
+
+#if (BITS_PER_LONG > BTREE_TYPE_BITS)
+static inline void *BTREE_FN(lookup)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE key)
+{
+	unsigned long _key = key;
+	return btree_lookup(&head->h, BTREE_TYPE_GEO, &_key);
+}
+
+static inline int BTREE_FN(insert)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE key,
+				   void *val, gfp_t gfp)
+{
+	unsigned long _key = key;
+	return btree_insert(&head->h, BTREE_TYPE_GEO, &_key, val, gfp);
+}
+
+static inline int BTREE_FN(update)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE key,
+		void *val)
+{
+	unsigned long _key = key;
+	return btree_update(&head->h, BTREE_TYPE_GEO, &_key, val);
+}
+
+static inline void *BTREE_FN(remove)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE key)
+{
+	unsigned long _key = key;
+	return btree_remove(&head->h, BTREE_TYPE_GEO, &_key);
+}
+
+static inline void *BTREE_FN(last)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE *key)
+{
+	unsigned long _key;
+	void *val = btree_last(&head->h, BTREE_TYPE_GEO, &_key);
+	if (val)
+		*key = _key;
+	return val;
+}
+
+static inline void *BTREE_FN(get_prev)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE *key)
+{
+	unsigned long _key = *key;
+	void *val = btree_get_prev(&head->h, BTREE_TYPE_GEO, &_key);
+	if (val)
+		*key = _key;
+	return val;
+}
+#else
+static inline void *BTREE_FN(lookup)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE key)
+{
+	return btree_lookup(&head->h, BTREE_TYPE_GEO, (unsigned long *)&key);
+}
+
+static inline int BTREE_FN(insert)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE key,
+			   void *val, gfp_t gfp)
+{
+	return btree_insert(&head->h, BTREE_TYPE_GEO, (unsigned long *)&key,
+			    val, gfp);
+}
+
+static inline int BTREE_FN(update)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE key,
+		void *val)
+{
+	return btree_update(&head->h, BTREE_TYPE_GEO, (unsigned long *)&key, val);
+}
+
+static inline void *BTREE_FN(remove)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE key)
+{
+	return btree_remove(&head->h, BTREE_TYPE_GEO, (unsigned long *)&key);
+}
+
+static inline void *BTREE_FN(last)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE *key)
+{
+	return btree_last(&head->h, BTREE_TYPE_GEO, (unsigned long *)key);
+}
+
+static inline void *BTREE_FN(get_prev)(BTREE_TYPE_HEAD *head, BTREE_KEYTYPE *key)
+{
+	return btree_get_prev(&head->h, BTREE_TYPE_GEO, (unsigned long *)key);
+}
+#endif
+
+void VISITOR_FN(void *elem, unsigned long opaque, unsigned long *key,
+		size_t index, void *__func);
+
+typedef void (*VISITOR_FN_T)(void *elem, unsigned long opaque,
+			     BTREE_KEYTYPE key, size_t index);
+
+static inline size_t BTREE_FN(visitor)(BTREE_TYPE_HEAD *head,
+				       unsigned long opaque,
+				       VISITOR_FN_T func2)
+{
+	return btree_visitor(&head->h, BTREE_TYPE_GEO, opaque,
+			     visitorl, func2);
+}
+
+static inline size_t BTREE_FN(grim_visitor)(BTREE_TYPE_HEAD *head,
+					    unsigned long opaque,
+					    VISITOR_FN_T func2)
+{
+	return btree_grim_visitor(&head->h, BTREE_TYPE_GEO, opaque,
+				  visitorl, func2);
+}
+
+#undef VISITOR_FN
+#undef VISITOR_FN_T
+#undef __BTREE_TP
+#undef _BTREE_TP
+#undef BTREE_TP
+#undef BTREE_FN
+#undef BTREE_TYPE_HEAD
+#undef BTREE_TYPE_SUFFIX
+#undef BTREE_TYPE_GEO
+#undef BTREE_KEYTYPE
+#undef BTREE_TYPE_BITS
diff --git a/include/linux/btree.h b/include/linux/btree.h
new file mode 100644
index 000000000000..65b5bb058324
--- /dev/null
+++ b/include/linux/btree.h
@@ -0,0 +1,243 @@
+#ifndef BTREE_H
+#define BTREE_H
+
+#include <linux/kernel.h>
+#include <linux/mempool.h>
+
+/**
+ * DOC: B+Tree basics
+ *
+ * A B+Tree is a data structure for looking up arbitrary (currently allowing
+ * unsigned long, u32, u64 and 2 * u64) keys into pointers. The data structure
+ * is described at http://en.wikipedia.org/wiki/B-tree, we currently do not
+ * use binary search to find the key on lookups.
+ *
+ * Each B+Tree consists of a head, that contains bookkeeping information and
+ * a variable number (starting with zero) nodes. Each node contains the keys
+ * and pointers to sub-nodes, or, for leaf nodes, the keys and values for the
+ * tree entries.
+ *
+ * Each node in this implementation has the following layout:
+ * [key1, key2, ..., keyN] [val1, val2, ..., valN]
+ *
+ * Each key here is an array of unsigned longs, geo->no_longs in total. The
+ * number of keys and values (N) is geo->no_pairs.
+ */
+
+/**
+ * struct btree_head - btree head
+ *
+ * @node: the first node in the tree
+ * @mempool: mempool used for node allocations
+ * @height: current of the tree
+ */
+struct btree_head {
+	unsigned long *node;
+	mempool_t *mempool;
+	int height;
+};
+
+/* btree geometry */
+struct btree_geo;
+
+/**
+ * btree_alloc - allocate function for the mempool
+ * @gfp_mask: gfp mask for the allocation
+ * @pool_data: unused
+ */
+void *btree_alloc(gfp_t gfp_mask, void *pool_data);
+
+/**
+ * btree_free - free function for the mempool
+ * @element: the element to free
+ * @pool_data: unused
+ */
+void btree_free(void *element, void *pool_data);
+
+/**
+ * btree_init_mempool - initialise a btree with given mempool
+ *
+ * @head: the btree head to initialise
+ * @mempool: the mempool to use
+ *
+ * When this function is used, there is no need to destroy
+ * the mempool.
+ */
+void btree_init_mempool(struct btree_head *head, mempool_t *mempool);
+
+/**
+ * btree_init - initialise a btree
+ *
+ * @head: the btree head to initialise
+ *
+ * This function allocates the memory pool that the
+ * btree needs. Returns zero or a negative error code
+ * (-%ENOMEM) when memory allocation fails.
+ *
+ */
+int __must_check btree_init(struct btree_head *head);
+
+/**
+ * btree_destroy - destroy mempool
+ *
+ * @head: the btree head to destroy
+ *
+ * This function destroys the internal memory pool, use only
+ * when using btree_init(), not with btree_init_mempool().
+ */
+void btree_destroy(struct btree_head *head);
+
+/**
+ * btree_lookup - look up a key in the btree
+ *
+ * @head: the btree to look in
+ * @geo: the btree geometry
+ * @key: the key to look up
+ *
+ * This function returns the value for the given key, or %NULL.
+ */
+void *btree_lookup(struct btree_head *head, struct btree_geo *geo,
+		   unsigned long *key);
+
+/**
+ * btree_insert - insert an entry into the btree
+ *
+ * @head: the btree to add to
+ * @geo: the btree geometry
+ * @key: the key to add (must not already be present)
+ * @val: the value to add (must not be %NULL)
+ * @gfp: allocation flags for node allocations
+ *
+ * This function returns 0 if the item could be added, or an
+ * error code if it failed (may fail due to memory pressure).
+ */
+int __must_check btree_insert(struct btree_head *head, struct btree_geo *geo,
+			      unsigned long *key, void *val, gfp_t gfp);
+/**
+ * btree_update - update an entry in the btree
+ *
+ * @head: the btree to update
+ * @geo: the btree geometry
+ * @key: the key to update
+ * @val: the value to change it to (must not be %NULL)
+ *
+ * This function returns 0 if the update was successful, or
+ * -%ENOENT if the key could not be found.
+ */
+int btree_update(struct btree_head *head, struct btree_geo *geo,
+		 unsigned long *key, void *val);
+/**
+ * btree_remove - remove an entry from the btree
+ *
+ * @head: the btree to update
+ * @geo: the btree geometry
+ * @key: the key to remove
+ *
+ * This function returns the removed entry, or %NULL if the key
+ * could not be found.
+ */
+void *btree_remove(struct btree_head *head, struct btree_geo *geo,
+		   unsigned long *key);
+
+/**
+ * btree_merge - merge two btrees
+ *
+ * @target: the tree that gets all the entries
+ * @victim: the tree that gets merged into @target
+ * @geo: the btree geometry
+ * @gfp: allocation flags
+ *
+ * The two trees @target and @victim may not contain the same keys,
+ * that is a bug and triggers a BUG(). This function returns zero
+ * if the trees were merged successfully, and may return a failure
+ * when memory allocation fails, in which case both trees might have
+ * been partially merged, i.e. some entries have been moved from
+ * @victim to @target.
+ */
+int btree_merge(struct btree_head *target, struct btree_head *victim,
+		struct btree_geo *geo, gfp_t gfp);
+
+/**
+ * btree_last - get last entry in btree
+ *
+ * @head: btree head
+ * @geo: btree geometry
+ * @key: last key
+ *
+ * Returns the last entry in the btree, and sets @key to the key
+ * of that entry; returns NULL if the tree is empty, in that case
+ * key is not changed.
+ */
+void *btree_last(struct btree_head *head, struct btree_geo *geo,
+		 unsigned long *key);
+
+/**
+ * btree_get_prev - get previous entry
+ *
+ * @head: btree head
+ * @geo: btree geometry
+ * @key: pointer to key
+ *
+ * The function returns the next item right before the value pointed to by
+ * @key, and updates @key with its key, or returns %NULL when there is no
+ * entry with a key smaller than the given key.
+ */
+void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
+		     unsigned long *key);
+
+
+/* internal use, use btree_visitor{l,32,64,128} */
+size_t btree_visitor(struct btree_head *head, struct btree_geo *geo,
+		     unsigned long opaque,
+		     void (*func)(void *elem, unsigned long opaque,
+				  unsigned long *key, size_t index,
+				  void *func2),
+		     void *func2);
+
+/* internal use, use btree_grim_visitor{l,32,64,128} */
+size_t btree_grim_visitor(struct btree_head *head, struct btree_geo *geo,
+			  unsigned long opaque,
+			  void (*func)(void *elem, unsigned long opaque,
+				       unsigned long *key,
+				       size_t index, void *func2),
+			  void *func2);
+
+
+#include <linux/btree-128.h>
+
+extern struct btree_geo btree_geo32;
+#define BTREE_TYPE_SUFFIX l
+#define BTREE_TYPE_BITS BITS_PER_LONG
+#define BTREE_TYPE_GEO &btree_geo32
+#define BTREE_KEYTYPE unsigned long
+#include <linux/btree-type.h>
+
+#define btree_for_each_safel(head, key, val)	\
+	for (val = btree_lastl(head, &key);	\
+	     val;				\
+	     val = btree_get_prevl(head, &key))
+
+#define BTREE_TYPE_SUFFIX 32
+#define BTREE_TYPE_BITS 32
+#define BTREE_TYPE_GEO &btree_geo32
+#define BTREE_KEYTYPE u32
+#include <linux/btree-type.h>
+
+#define btree_for_each_safe32(head, key, val)	\
+	for (val = btree_last32(head, &key);	\
+	     val;				\
+	     val = btree_get_prev32(head, &key))
+
+extern struct btree_geo btree_geo64;
+#define BTREE_TYPE_SUFFIX 64
+#define BTREE_TYPE_BITS 64
+#define BTREE_TYPE_GEO &btree_geo64
+#define BTREE_KEYTYPE u64
+#include <linux/btree-type.h>
+
+#define btree_for_each_safe64(head, key, val)	\
+	for (val = btree_last64(head, &key);	\
+	     val;				\
+	     val = btree_get_prev64(head, &key))
+
+#endif
diff --git a/lib/Kconfig b/lib/Kconfig
index bb1326d3839c..277fbfb233b9 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -156,6 +156,9 @@ config TEXTSEARCH_BM
 config TEXTSEARCH_FSM
 	tristate
 
+config BTREE
+	boolean
+
 config HAS_IOMEM
 	boolean
 	depends on !NO_IOMEM
diff --git a/lib/Makefile b/lib/Makefile
index 2e78277eff9d..cff82612e98b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -41,6 +41,7 @@ lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
 obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
+obj-$(CONFIG_BTREE) += btree.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
diff --git a/lib/btree.c b/lib/btree.c
new file mode 100644
index 000000000000..41859a820218
--- /dev/null
+++ b/lib/btree.c
@@ -0,0 +1,797 @@
+/*
+ * lib/btree.c	- Simple In-memory B+Tree
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2007-2008 Joern Engel <joern@logfs.org>
+ * Bits and pieces stolen from Peter Zijlstra's code, which is
+ * Copyright 2007, Red Hat Inc. Peter Zijlstra <pzijlstr@redhat.com>
+ * GPLv2
+ *
+ * see http://programming.kicks-ass.net/kernel-patches/vma_lookup/btree.patch
+ *
+ * A relatively simple B+Tree implementation.  I have written it as a learning
+ * excercise to understand how B+Trees work.  Turned out to be useful as well.
+ *
+ * B+Trees can be used similar to Linux radix trees (which don't have anything
+ * in common with textbook radix trees, beware).  Prerequisite for them working
+ * well is that access to a random tree node is much faster than a large number
+ * of operations within each node.
+ *
+ * Disks have fulfilled the prerequisite for a long time.  More recently DRAM
+ * has gained similar properties, as memory access times, when measured in cpu
+ * cycles, have increased.  Cacheline sizes have increased as well, which also
+ * helps B+Trees.
+ *
+ * Compared to radix trees, B+Trees are more efficient when dealing with a
+ * sparsely populated address space.  Between 25% and 50% of the memory is
+ * occupied with valid pointers.  When densely populated, radix trees contain
+ * ~98% pointers - hard to beat.  Very sparse radix trees contain only ~2%
+ * pointers.
+ *
+ * This particular implementation stores pointers identified by a long value.
+ * Storing NULL pointers is illegal, lookup will return NULL when no entry
+ * was found.
+ *
+ * A tricks was used that is not commonly found in textbooks.  The lowest
+ * values are to the right, not to the left.  All used slots within a node
+ * are on the left, all unused slots contain NUL values.  Most operations
+ * simply loop once over all slots and terminate on the first NUL.
+ */
+
+#include <linux/btree.h>
+#include <linux/cache.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define NODESIZE MAX(L1_CACHE_BYTES, 128)
+
+struct btree_geo {
+	int keylen;
+	int no_pairs;
+	int no_longs;
+};
+
+struct btree_geo btree_geo32 = {
+	.keylen = 1,
+	.no_pairs = NODESIZE / sizeof(long) / 2,
+	.no_longs = NODESIZE / sizeof(long) / 2,
+};
+EXPORT_SYMBOL_GPL(btree_geo32);
+
+#define LONG_PER_U64 (64 / BITS_PER_LONG)
+struct btree_geo btree_geo64 = {
+	.keylen = LONG_PER_U64,
+	.no_pairs = NODESIZE / sizeof(long) / (1 + LONG_PER_U64),
+	.no_longs = LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + LONG_PER_U64)),
+};
+EXPORT_SYMBOL_GPL(btree_geo64);
+
+struct btree_geo btree_geo128 = {
+	.keylen = 2 * LONG_PER_U64,
+	.no_pairs = NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64),
+	.no_longs = 2 * LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64)),
+};
+EXPORT_SYMBOL_GPL(btree_geo128);
+
+static struct kmem_cache *btree_cachep;
+
+void *btree_alloc(gfp_t gfp_mask, void *pool_data)
+{
+	return kmem_cache_alloc(btree_cachep, gfp_mask);
+}
+EXPORT_SYMBOL_GPL(btree_alloc);
+
+void btree_free(void *element, void *pool_data)
+{
+	kmem_cache_free(btree_cachep, element);
+}
+EXPORT_SYMBOL_GPL(btree_free);
+
+static unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp)
+{
+	unsigned long *node;
+
+	node = mempool_alloc(head->mempool, gfp);
+	memset(node, 0, NODESIZE);
+	return node;
+}
+
+static int longcmp(const unsigned long *l1, const unsigned long *l2, size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++) {
+		if (l1[i] < l2[i])
+			return -1;
+		if (l1[i] > l2[i])
+			return 1;
+	}
+	return 0;
+}
+
+static unsigned long *longcpy(unsigned long *dest, const unsigned long *src,
+		size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++)
+		dest[i] = src[i];
+	return dest;
+}
+
+static unsigned long *longset(unsigned long *s, unsigned long c, size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++)
+		s[i] = c;
+	return s;
+}
+
+static void dec_key(struct btree_geo *geo, unsigned long *key)
+{
+	unsigned long val;
+	int i;
+
+	for (i = geo->keylen - 1; i >= 0; i--) {
+		val = key[i];
+		key[i] = val - 1;
+		if (val)
+			break;
+	}
+}
+
+static unsigned long *bkey(struct btree_geo *geo, unsigned long *node, int n)
+{
+	return &node[n * geo->keylen];
+}
+
+static void *bval(struct btree_geo *geo, unsigned long *node, int n)
+{
+	return (void *)node[geo->no_longs + n];
+}
+
+static void setkey(struct btree_geo *geo, unsigned long *node, int n,
+		   unsigned long *key)
+{
+	longcpy(bkey(geo, node, n), key, geo->keylen);
+}
+
+static void setval(struct btree_geo *geo, unsigned long *node, int n,
+		   void *val)
+{
+	node[geo->no_longs + n] = (unsigned long) val;
+}
+
+static void clearpair(struct btree_geo *geo, unsigned long *node, int n)
+{
+	longset(bkey(geo, node, n), 0, geo->keylen);
+	node[geo->no_longs + n] = 0;
+}
+
+static inline void __btree_init(struct btree_head *head)
+{
+	head->node = NULL;
+	head->height = 0;
+}
+
+void btree_init_mempool(struct btree_head *head, mempool_t *mempool)
+{
+	__btree_init(head);
+	head->mempool = mempool;
+}
+EXPORT_SYMBOL_GPL(btree_init_mempool);
+
+int btree_init(struct btree_head *head)
+{
+	__btree_init(head);
+	head->mempool = mempool_create(0, btree_alloc, btree_free, NULL);
+	if (!head->mempool)
+		return -ENOMEM;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(btree_init);
+
+void btree_destroy(struct btree_head *head)
+{
+	mempool_destroy(head->mempool);
+	head->mempool = NULL;
+}
+EXPORT_SYMBOL_GPL(btree_destroy);
+
+void *btree_last(struct btree_head *head, struct btree_geo *geo,
+		 unsigned long *key)
+{
+	int height = head->height;
+	unsigned long *node = head->node;
+
+	if (height == 0)
+		return NULL;
+
+	for ( ; height > 1; height--)
+		node = bval(geo, node, 0);
+
+	longcpy(key, bkey(geo, node, 0), geo->keylen);
+	return bval(geo, node, 0);
+}
+EXPORT_SYMBOL_GPL(btree_last);
+
+static int keycmp(struct btree_geo *geo, unsigned long *node, int pos,
+		  unsigned long *key)
+{
+	return longcmp(bkey(geo, node, pos), key, geo->keylen);
+}
+
+static int keyzero(struct btree_geo *geo, unsigned long *key)
+{
+	int i;
+
+	for (i = 0; i < geo->keylen; i++)
+		if (key[i])
+			return 0;
+
+	return 1;
+}
+
+void *btree_lookup(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key)
+{
+	int i, height = head->height;
+	unsigned long *node = head->node;
+
+	if (height == 0)
+		return NULL;
+
+	for ( ; height > 1; height--) {
+		for (i = 0; i < geo->no_pairs; i++)
+			if (keycmp(geo, node, i, key) <= 0)
+				break;
+		if (i == geo->no_pairs)
+			return NULL;
+		node = bval(geo, node, i);
+		if (!node)
+			return NULL;
+	}
+
+	if (!node)
+		return NULL;
+
+	for (i = 0; i < geo->no_pairs; i++)
+		if (keycmp(geo, node, i, key) == 0)
+			return bval(geo, node, i);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(btree_lookup);
+
+int btree_update(struct btree_head *head, struct btree_geo *geo,
+		 unsigned long *key, void *val)
+{
+	int i, height = head->height;
+	unsigned long *node = head->node;
+
+	if (height == 0)
+		return -ENOENT;
+
+	for ( ; height > 1; height--) {
+		for (i = 0; i < geo->no_pairs; i++)
+			if (keycmp(geo, node, i, key) <= 0)
+				break;
+		if (i == geo->no_pairs)
+			return -ENOENT;
+		node = bval(geo, node, i);
+		if (!node)
+			return -ENOENT;
+	}
+
+	if (!node)
+		return -ENOENT;
+
+	for (i = 0; i < geo->no_pairs; i++)
+		if (keycmp(geo, node, i, key) == 0) {
+			setval(geo, node, i, val);
+			return 0;
+		}
+	return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(btree_update);
+
+/*
+ * Usually this function is quite similar to normal lookup.  But the key of
+ * a parent node may be smaller than the smallest key of all its siblings.
+ * In such a case we cannot just return NULL, as we have only proven that no
+ * key smaller than __key, but larger than this parent key exists.
+ * So we set __key to the parent key and retry.  We have to use the smallest
+ * such parent key, which is the last parent key we encountered.
+ */
+void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
+		     unsigned long *__key)
+{
+	int i, height;
+	unsigned long *node, *oldnode;
+	unsigned long *retry_key = NULL, key[geo->keylen];
+
+	if (keyzero(geo, __key))
+		return NULL;
+
+	if (head->height == 0)
+		return NULL;
+retry:
+	longcpy(key, __key, geo->keylen);
+	dec_key(geo, key);
+
+	node = head->node;
+	for (height = head->height ; height > 1; height--) {
+		for (i = 0; i < geo->no_pairs; i++)
+			if (keycmp(geo, node, i, key) <= 0)
+				break;
+		if (i == geo->no_pairs)
+			goto miss;
+		oldnode = node;
+		node = bval(geo, node, i);
+		if (!node)
+			goto miss;
+		retry_key = bkey(geo, oldnode, i);
+	}
+
+	if (!node)
+		goto miss;
+
+	for (i = 0; i < geo->no_pairs; i++) {
+		if (keycmp(geo, node, i, key) <= 0) {
+			if (bval(geo, node, i)) {
+				longcpy(__key, bkey(geo, node, i), geo->keylen);
+				return bval(geo, node, i);
+			} else
+				goto miss;
+		}
+	}
+miss:
+	if (retry_key) {
+		__key = retry_key;
+		retry_key = NULL;
+		goto retry;
+	}
+	return NULL;
+}
+
+static int getpos(struct btree_geo *geo, unsigned long *node,
+		unsigned long *key)
+{
+	int i;
+
+	for (i = 0; i < geo->no_pairs; i++) {
+		if (keycmp(geo, node, i, key) <= 0)
+			break;
+	}
+	return i;
+}
+
+static int getfill(struct btree_geo *geo, unsigned long *node, int start)
+{
+	int i;
+
+	for (i = start; i < geo->no_pairs; i++)
+		if (!bval(geo, node, i))
+			break;
+	return i;
+}
+
+/*
+ * locate the correct leaf node in the btree
+ */
+static unsigned long *find_level(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, int level)
+{
+	unsigned long *node = head->node;
+	int i, height;
+
+	for (height = head->height; height > level; height--) {
+		for (i = 0; i < geo->no_pairs; i++)
+			if (keycmp(geo, node, i, key) <= 0)
+				break;
+
+		if ((i == geo->no_pairs) || !bval(geo, node, i)) {
+			/* right-most key is too large, update it */
+			/* FIXME: If the right-most key on higher levels is
+			 * always zero, this wouldn't be necessary. */
+			i--;
+			setkey(geo, node, i, key);
+		}
+		BUG_ON(i < 0);
+		node = bval(geo, node, i);
+	}
+	BUG_ON(!node);
+	return node;
+}
+
+static int btree_grow(struct btree_head *head, struct btree_geo *geo,
+		      gfp_t gfp)
+{
+	unsigned long *node;
+	int fill;
+
+	node = btree_node_alloc(head, gfp);
+	if (!node)
+		return -ENOMEM;
+	if (head->node) {
+		fill = getfill(geo, head->node, 0);
+		setkey(geo, node, 0, bkey(geo, head->node, fill - 1));
+		setval(geo, node, 0, head->node);
+	}
+	head->node = node;
+	head->height++;
+	return 0;
+}
+
+static void btree_shrink(struct btree_head *head, struct btree_geo *geo)
+{
+	unsigned long *node;
+	int fill;
+
+	if (head->height <= 1)
+		return;
+
+	node = head->node;
+	fill = getfill(geo, node, 0);
+	BUG_ON(fill > 1);
+	head->node = bval(geo, node, 0);
+	head->height--;
+	mempool_free(node, head->mempool);
+}
+
+static int btree_insert_level(struct btree_head *head, struct btree_geo *geo,
+			      unsigned long *key, void *val, int level,
+			      gfp_t gfp)
+{
+	unsigned long *node;
+	int i, pos, fill, err;
+
+	BUG_ON(!val);
+	if (head->height < level) {
+		err = btree_grow(head, geo, gfp);
+		if (err)
+			return err;
+	}
+
+retry:
+	node = find_level(head, geo, key, level);
+	pos = getpos(geo, node, key);
+	fill = getfill(geo, node, pos);
+	/* two identical keys are not allowed */
+	BUG_ON(pos < fill && keycmp(geo, node, pos, key) == 0);
+
+	if (fill == geo->no_pairs) {
+		/* need to split node */
+		unsigned long *new;
+
+		new = btree_node_alloc(head, gfp);
+		if (!new)
+			return -ENOMEM;
+		err = btree_insert_level(head, geo,
+				bkey(geo, node, fill / 2 - 1),
+				new, level + 1, gfp);
+		if (err) {
+			mempool_free(new, head->mempool);
+			return err;
+		}
+		for (i = 0; i < fill / 2; i++) {
+			setkey(geo, new, i, bkey(geo, node, i));
+			setval(geo, new, i, bval(geo, node, i));
+			setkey(geo, node, i, bkey(geo, node, i + fill / 2));
+			setval(geo, node, i, bval(geo, node, i + fill / 2));
+			clearpair(geo, node, i + fill / 2);
+		}
+		if (fill & 1) {
+			setkey(geo, node, i, bkey(geo, node, fill - 1));
+			setval(geo, node, i, bval(geo, node, fill - 1));
+			clearpair(geo, node, fill - 1);
+		}
+		goto retry;
+	}
+	BUG_ON(fill >= geo->no_pairs);
+
+	/* shift and insert */
+	for (i = fill; i > pos; i--) {
+		setkey(geo, node, i, bkey(geo, node, i - 1));
+		setval(geo, node, i, bval(geo, node, i - 1));
+	}
+	setkey(geo, node, pos, key);
+	setval(geo, node, pos, val);
+
+	return 0;
+}
+
+int btree_insert(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, void *val, gfp_t gfp)
+{
+	return btree_insert_level(head, geo, key, val, 1, gfp);
+}
+EXPORT_SYMBOL_GPL(btree_insert);
+
+static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, int level);
+static void merge(struct btree_head *head, struct btree_geo *geo, int level,
+		unsigned long *left, int lfill,
+		unsigned long *right, int rfill,
+		unsigned long *parent, int lpos)
+{
+	int i;
+
+	for (i = 0; i < rfill; i++) {
+		/* Move all keys to the left */
+		setkey(geo, left, lfill + i, bkey(geo, right, i));
+		setval(geo, left, lfill + i, bval(geo, right, i));
+	}
+	/* Exchange left and right child in parent */
+	setval(geo, parent, lpos, right);
+	setval(geo, parent, lpos + 1, left);
+	/* Remove left (formerly right) child from parent */
+	btree_remove_level(head, geo, bkey(geo, parent, lpos), level + 1);
+	mempool_free(right, head->mempool);
+}
+
+static void rebalance(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, int level, unsigned long *child, int fill)
+{
+	unsigned long *parent, *left = NULL, *right = NULL;
+	int i, no_left, no_right;
+
+	if (fill == 0) {
+		/* Because we don't steal entries from a neigbour, this case
+		 * can happen.  Parent node contains a single child, this
+		 * node, so merging with a sibling never happens.
+		 */
+		btree_remove_level(head, geo, key, level + 1);
+		mempool_free(child, head->mempool);
+		return;
+	}
+
+	parent = find_level(head, geo, key, level + 1);
+	i = getpos(geo, parent, key);
+	BUG_ON(bval(geo, parent, i) != child);
+
+	if (i > 0) {
+		left = bval(geo, parent, i - 1);
+		no_left = getfill(geo, left, 0);
+		if (fill + no_left <= geo->no_pairs) {
+			merge(head, geo, level,
+					left, no_left,
+					child, fill,
+					parent, i - 1);
+			return;
+		}
+	}
+	if (i + 1 < getfill(geo, parent, i)) {
+		right = bval(geo, parent, i + 1);
+		no_right = getfill(geo, right, 0);
+		if (fill + no_right <= geo->no_pairs) {
+			merge(head, geo, level,
+					child, fill,
+					right, no_right,
+					parent, i);
+			return;
+		}
+	}
+	/*
+	 * We could also try to steal one entry from the left or right
+	 * neighbor.  By not doing so we changed the invariant from
+	 * "all nodes are at least half full" to "no two neighboring
+	 * nodes can be merged".  Which means that the average fill of
+	 * all nodes is still half or better.
+	 */
+}
+
+static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, int level)
+{
+	unsigned long *node;
+	int i, pos, fill;
+	void *ret;
+
+	if (level > head->height) {
+		/* we recursed all the way up */
+		head->height = 0;
+		head->node = NULL;
+		return NULL;
+	}
+
+	node = find_level(head, geo, key, level);
+	pos = getpos(geo, node, key);
+	fill = getfill(geo, node, pos);
+	if ((level == 1) && (keycmp(geo, node, pos, key) != 0))
+		return NULL;
+	ret = bval(geo, node, pos);
+
+	/* remove and shift */
+	for (i = pos; i < fill - 1; i++) {
+		setkey(geo, node, i, bkey(geo, node, i + 1));
+		setval(geo, node, i, bval(geo, node, i + 1));
+	}
+	clearpair(geo, node, fill - 1);
+
+	if (fill - 1 < geo->no_pairs / 2) {
+		if (level < head->height)
+			rebalance(head, geo, key, level, node, fill - 1);
+		else if (fill - 1 == 1)
+			btree_shrink(head, geo);
+	}
+
+	return ret;
+}
+
+void *btree_remove(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key)
+{
+	if (head->height == 0)
+		return NULL;
+
+	return btree_remove_level(head, geo, key, 1);
+}
+EXPORT_SYMBOL_GPL(btree_remove);
+
+int btree_merge(struct btree_head *target, struct btree_head *victim,
+		struct btree_geo *geo, gfp_t gfp)
+{
+	unsigned long key[geo->keylen];
+	unsigned long dup[geo->keylen];
+	void *val;
+	int err;
+
+	BUG_ON(target == victim);
+
+	if (!(target->node)) {
+		/* target is empty, just copy fields over */
+		target->node = victim->node;
+		target->height = victim->height;
+		__btree_init(victim);
+		return 0;
+	}
+
+	/* TODO: This needs some optimizations.  Currently we do three tree
+	 * walks to remove a single object from the victim.
+	 */
+	for (;;) {
+		if (!btree_last(victim, geo, key))
+			break;
+		val = btree_lookup(victim, geo, key);
+		err = btree_insert(target, geo, key, val, gfp);
+		if (err)
+			return err;
+		/* We must make a copy of the key, as the original will get
+		 * mangled inside btree_remove. */
+		longcpy(dup, key, geo->keylen);
+		btree_remove(victim, geo, dup);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(btree_merge);
+
+static size_t __btree_for_each(struct btree_head *head, struct btree_geo *geo,
+			       unsigned long *node, unsigned long opaque,
+			       void (*func)(void *elem, unsigned long opaque,
+					    unsigned long *key, size_t index,
+					    void *func2),
+			       void *func2, int reap, int height, size_t count)
+{
+	int i;
+	unsigned long *child;
+
+	for (i = 0; i < geo->no_pairs; i++) {
+		child = bval(geo, node, i);
+		if (!child)
+			break;
+		if (height > 1)
+			count = __btree_for_each(head, geo, child, opaque,
+					func, func2, reap, height - 1, count);
+		else
+			func(child, opaque, bkey(geo, node, i), count++,
+					func2);
+	}
+	if (reap)
+		mempool_free(node, head->mempool);
+	return count;
+}
+
+static void empty(void *elem, unsigned long opaque, unsigned long *key,
+		  size_t index, void *func2)
+{
+}
+
+void visitorl(void *elem, unsigned long opaque, unsigned long *key,
+	      size_t index, void *__func)
+{
+	visitorl_t func = __func;
+
+	func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitorl);
+
+void visitor32(void *elem, unsigned long opaque, unsigned long *__key,
+	       size_t index, void *__func)
+{
+	visitor32_t func = __func;
+	u32 *key = (void *)__key;
+
+	func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitor32);
+
+void visitor64(void *elem, unsigned long opaque, unsigned long *__key,
+	       size_t index, void *__func)
+{
+	visitor64_t func = __func;
+	u64 *key = (void *)__key;
+
+	func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitor64);
+
+void visitor128(void *elem, unsigned long opaque, unsigned long *__key,
+		size_t index, void *__func)
+{
+	visitor128_t func = __func;
+	u64 *key = (void *)__key;
+
+	func(elem, opaque, key[0], key[1], index);
+}
+EXPORT_SYMBOL_GPL(visitor128);
+
+size_t btree_visitor(struct btree_head *head, struct btree_geo *geo,
+		     unsigned long opaque,
+		     void (*func)(void *elem, unsigned long opaque,
+		     		  unsigned long *key,
+		     		  size_t index, void *func2),
+		     void *func2)
+{
+	size_t count = 0;
+
+	if (!func2)
+		func = empty;
+	if (head->node)
+		count = __btree_for_each(head, geo, head->node, opaque, func,
+				func2, 0, head->height, 0);
+	return count;
+}
+EXPORT_SYMBOL_GPL(btree_visitor);
+
+size_t btree_grim_visitor(struct btree_head *head, struct btree_geo *geo,
+			  unsigned long opaque,
+			  void (*func)(void *elem, unsigned long opaque,
+				       unsigned long *key,
+				       size_t index, void *func2),
+			  void *func2)
+{
+	size_t count = 0;
+
+	if (!func2)
+		func = empty;
+	if (head->node)
+		count = __btree_for_each(head, geo, head->node, opaque, func,
+				func2, 1, head->height, 0);
+	__btree_init(head);
+	return count;
+}
+EXPORT_SYMBOL_GPL(btree_grim_visitor);
+
+static int __init btree_module_init(void)
+{
+	btree_cachep = kmem_cache_create("btree_node", NODESIZE, 0,
+			SLAB_HWCACHE_ALIGN, NULL);
+	return 0;
+}
+
+static void __exit btree_module_exit(void)
+{
+	kmem_cache_destroy(btree_cachep);
+}
+
+/* If core code starts using btree, initialization should happen even earlier */
+module_init(btree_module_init);
+module_exit(btree_module_exit);
+
+MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
+MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 71f2be213a0009098819e5c04f75ff19f84f2122 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 23 Dec 2009 07:45:44 -0500
Subject: ext4: Add new tracepoint for jbd2_cleanup_journal_tail

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/checkpoint.c        |  1 +
 include/trace/events/jbd2.h | 28 ++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 886849370950..30beb11ef928 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -507,6 +507,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 	if (blocknr < journal->j_tail)
 		freed = freed + journal->j_last - journal->j_first;
 
+	trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed);
 	jbd_debug(1,
 		  "Cleaning journal tail from %d to %d (offset %lu), "
 		  "freeing %lu\n",
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index 96b370a050de..bf16545cc977 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -199,6 +199,34 @@ TRACE_EVENT(jbd2_checkpoint_stats,
 		  __entry->forced_to_close, __entry->written, __entry->dropped)
 );
 
+TRACE_EVENT(jbd2_cleanup_journal_tail,
+
+	TP_PROTO(journal_t *journal, tid_t first_tid,
+		 unsigned long block_nr, unsigned long freed),
+
+	TP_ARGS(journal, first_tid, block_nr, freed),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	tid_t,	tail_sequence		)
+		__field(	tid_t,	first_tid		)
+		__field(unsigned long,	block_nr		)
+		__field(unsigned long,	freed			)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= journal->j_fs_dev->bd_dev;
+		__entry->tail_sequence	= journal->j_tail_sequence;
+		__entry->first_tid	= first_tid;
+		__entry->block_nr	= block_nr;
+		__entry->freed		= freed;
+	),
+
+	TP_printk("dev %s from %u to %u offset %lu freed %lu",
+		  jbd2_dev_to_name(__entry->dev), __entry->tail_sequence,
+		  __entry->first_tid, __entry->block_nr, __entry->freed)
+);
+
 #endif /* _TRACE_JBD2_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From f8ec9d6837241865cf99bed97bb99f4399fd5a03 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 1 Jan 2010 01:00:21 -0500
Subject: ext4: Add new tracepoints to debug delayed allocation space functions

Add tracepoints for ext4_da_reserve_space(),
ext4_da_update_reserve_space(), and ext4_da_release_space().

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c             |   2 +
 include/trace/events/ext4.h | 101 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 103 insertions(+)

(limited to 'include')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3e8afd969236..1a3d7b232cd7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1061,6 +1061,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
 	int mdb_free = 0, allocated_meta_blocks = 0;
 
 	spin_lock(&ei->i_block_reservation_lock);
+	trace_ext4_da_update_reserve_space(inode, used);
 	if (unlikely(used > ei->i_reserved_data_blocks)) {
 		ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
 			 "with only %d reserved data blocks\n",
@@ -1846,6 +1847,7 @@ repeat:
 	spin_lock(&ei->i_block_reservation_lock);
 	md_reserved = ei->i_reserved_meta_blocks;
 	md_needed = ext4_calc_metadata_amount(inode, lblock);
+	trace_ext4_da_reserve_space(inode, md_needed);
 	spin_unlock(&ei->i_block_reservation_lock);
 
 	/*
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index d0b6cd3afb2f..2aa6aa3e8f61 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -874,6 +874,107 @@ TRACE_EVENT(ext4_forget,
 		  __entry->mode, __entry->is_metadata, __entry->block)
 );
 
+TRACE_EVENT(ext4_da_update_reserve_space,
+	TP_PROTO(struct inode *inode, int used_blocks),
+
+	TP_ARGS(inode, used_blocks),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	umode_t, mode			)
+		__field(	__u64,	i_blocks		)
+		__field(	int,	used_blocks		)
+		__field(	int,	reserved_data_blocks	)
+		__field(	int,	reserved_meta_blocks	)
+		__field(	int,	allocated_meta_blocks	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->mode	= inode->i_mode;
+		__entry->i_blocks = inode->i_blocks;
+		__entry->used_blocks = used_blocks;
+		__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+		__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
+		__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
+	),
+
+	TP_printk("dev %s ino %lu mode 0%o i_blocks %llu used_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->mode,  (unsigned long long) __entry->i_blocks,
+		  __entry->used_blocks, __entry->reserved_data_blocks,
+		  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
+);
+
+TRACE_EVENT(ext4_da_reserve_space,
+	TP_PROTO(struct inode *inode, int md_needed),
+
+	TP_ARGS(inode, md_needed),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	umode_t, mode			)
+		__field(	__u64,	i_blocks		)
+		__field(	int,	md_needed		)
+		__field(	int,	reserved_data_blocks	)
+		__field(	int,	reserved_meta_blocks	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->mode	= inode->i_mode;
+		__entry->i_blocks = inode->i_blocks;
+		__entry->md_needed = md_needed;
+		__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+		__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
+	),
+
+	TP_printk("dev %s ino %lu mode 0%o i_blocks %llu md_needed %d reserved_data_blocks %d reserved_meta_blocks %d",
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->mode, (unsigned long long) __entry->i_blocks,
+		  __entry->md_needed, __entry->reserved_data_blocks,
+		  __entry->reserved_meta_blocks)
+);
+
+TRACE_EVENT(ext4_da_release_space,
+	TP_PROTO(struct inode *inode, int freed_blocks),
+
+	TP_ARGS(inode, freed_blocks),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	umode_t, mode			)
+		__field(	__u64,	i_blocks		)
+		__field(	int,	freed_blocks		)
+		__field(	int,	reserved_data_blocks	)
+		__field(	int,	reserved_meta_blocks	)
+		__field(	int,	allocated_meta_blocks	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->mode	= inode->i_mode;
+		__entry->i_blocks = inode->i_blocks;
+		__entry->freed_blocks = freed_blocks;
+		__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+		__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
+		__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
+	),
+
+	TP_printk("dev %s ino %lu mode 0%o i_blocks %llu freed_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->mode, (unsigned long long) __entry->i_blocks,
+		  __entry->freed_blocks, __entry->reserved_data_blocks,
+		  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
+);
+
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From d2eecb03936878ec574ade5532fa83df7d75dde7 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 7 Dec 2009 10:36:20 -0500
Subject: ext4: Use slab allocator for sub-page sized allocations

Now that the SLUB seems to be fixed so that it respects the requested
alignment, use kmem_cache_alloc() to allocator if the block size of
the buffer heads to be allocated is less than the page size.
Previously, we were using 16k page on a Power system for each buffer,
even when the file system was using 1k or 4k block size.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/journal.c    | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/jbd2.h |  11 +----
 2 files changed, 134 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ac0d027595d0..c03d4dce4d76 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -39,6 +39,8 @@
 #include <linux/seq_file.h>
 #include <linux/math64.h>
 #include <linux/hash.h>
+#include <linux/log2.h>
+#include <linux/vmalloc.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/jbd2.h>
@@ -93,6 +95,7 @@ EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
 
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
+static int jbd2_journal_create_slab(size_t slab_size);
 
 /*
  * Helper function used to manage commit timeouts
@@ -1248,6 +1251,13 @@ int jbd2_journal_load(journal_t *journal)
 		}
 	}
 
+	/*
+	 * Create a slab for this blocksize
+	 */
+	err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize));
+	if (err)
+		return err;
+
 	/* Let the recovery code check whether it needs to recover any
 	 * data from the journal. */
 	if (jbd2_journal_recover(journal))
@@ -1806,6 +1816,127 @@ size_t journal_tag_bytes(journal_t *journal)
 		return JBD2_TAG_SIZE32;
 }
 
+/*
+ * JBD memory management
+ *
+ * These functions are used to allocate block-sized chunks of memory
+ * used for making copies of buffer_head data.  Very often it will be
+ * page-sized chunks of data, but sometimes it will be in
+ * sub-page-size chunks.  (For example, 16k pages on Power systems
+ * with a 4k block file system.)  For blocks smaller than a page, we
+ * use a SLAB allocator.  There are slab caches for each block size,
+ * which are allocated at mount time, if necessary, and we only free
+ * (all of) the slab caches when/if the jbd2 module is unloaded.  For
+ * this reason we don't need to a mutex to protect access to
+ * jbd2_slab[] allocating or releasing memory; only in
+ * jbd2_journal_create_slab().
+ */
+#define JBD2_MAX_SLABS 8
+static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS];
+static DECLARE_MUTEX(jbd2_slab_create_sem);
+
+static const char *jbd2_slab_names[JBD2_MAX_SLABS] = {
+	"jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k",
+	"jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k"
+};
+
+
+static void jbd2_journal_destroy_slabs(void)
+{
+	int i;
+
+	for (i = 0; i < JBD2_MAX_SLABS; i++) {
+		if (jbd2_slab[i])
+			kmem_cache_destroy(jbd2_slab[i]);
+		jbd2_slab[i] = NULL;
+	}
+}
+
+static int jbd2_journal_create_slab(size_t size)
+{
+	int i = order_base_2(size) - 10;
+	size_t slab_size;
+
+	if (size == PAGE_SIZE)
+		return 0;
+
+	if (i >= JBD2_MAX_SLABS)
+		return -EINVAL;
+
+	if (unlikely(i < 0))
+		i = 0;
+	down(&jbd2_slab_create_sem);
+	if (jbd2_slab[i]) {
+		up(&jbd2_slab_create_sem);
+		return 0;	/* Already created */
+	}
+
+	slab_size = 1 << (i+10);
+	jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size,
+					 slab_size, 0, NULL);
+	up(&jbd2_slab_create_sem);
+	if (!jbd2_slab[i]) {
+		printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static struct kmem_cache *get_slab(size_t size)
+{
+	int i = order_base_2(size) - 10;
+
+	BUG_ON(i >= JBD2_MAX_SLABS);
+	if (unlikely(i < 0))
+		i = 0;
+	BUG_ON(jbd2_slab[i] == 0);
+	return jbd2_slab[i];
+}
+
+void *jbd2_alloc(size_t size, gfp_t flags)
+{
+	void *ptr;
+
+	BUG_ON(size & (size-1)); /* Must be a power of 2 */
+
+	flags |= __GFP_REPEAT;
+	if (size == PAGE_SIZE)
+		ptr = (void *)__get_free_pages(flags, 0);
+	else if (size > PAGE_SIZE) {
+		int order = get_order(size);
+
+		if (order < 3)
+			ptr = (void *)__get_free_pages(flags, order);
+		else
+			ptr = vmalloc(size);
+	} else
+		ptr = kmem_cache_alloc(get_slab(size), flags);
+
+	/* Check alignment; SLUB has gotten this wrong in the past,
+	 * and this can lead to user data corruption! */
+	BUG_ON(((unsigned long) ptr) & (size-1));
+
+	return ptr;
+}
+
+void jbd2_free(void *ptr, size_t size)
+{
+	if (size == PAGE_SIZE) {
+		free_pages((unsigned long)ptr, 0);
+		return;
+	}
+	if (size > PAGE_SIZE) {
+		int order = get_order(size);
+
+		if (order < 3)
+			free_pages((unsigned long)ptr, order);
+		else
+			vfree(ptr);
+		return;
+	}
+	kmem_cache_free(get_slab(size), ptr);
+};
+
 /*
  * Journal_head storage management
  */
@@ -2204,6 +2335,7 @@ static void jbd2_journal_destroy_caches(void)
 	jbd2_journal_destroy_revoke_caches();
 	jbd2_journal_destroy_jbd2_journal_head_cache();
 	jbd2_journal_destroy_handle_cache();
+	jbd2_journal_destroy_slabs();
 }
 
 static int __init journal_init(void)
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 638ce4554c76..8ada2a129d08 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -69,15 +69,8 @@ extern u8 jbd2_journal_enable_debug;
 #define jbd_debug(f, a...)	/**/
 #endif
 
-static inline void *jbd2_alloc(size_t size, gfp_t flags)
-{
-	return (void *)__get_free_pages(flags, get_order(size));
-}
-
-static inline void jbd2_free(void *ptr, size_t size)
-{
-	free_pages((unsigned long)ptr, get_order(size));
-};
+extern void *jbd2_alloc(size_t size, gfp_t flags);
+extern void jbd2_free(void *ptr, size_t size);
 
 #define JBD2_MIN_JOURNAL_BLOCKS 1024
 
-- 
cgit v1.2.3


From 85438592f179c126ad4cb9a280046d4f0a501e6d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 18 Nov 2009 17:53:21 +0900
Subject: percpu: remove compile warnings caused by __verify_pcpu_ptr()

If percpu pointer is const, __verify_pcpu_ptr() triggers warnings like
the following.

 drivers/net/loopback.c: In function 'loopback_get_stats':
 drivers/net/loopback.c:109: warning: initialization discards qualifiers from pointer target type

Fix it by adding const to the verification target pointer used in
__verify_pcpu_ptr().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 include/linux/percpu-defs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 1fa36eb54b6a..68567c0b3a5d 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -24,7 +24,7 @@
  * input parameter is a percpu pointer.
  */
 #define __verify_pcpu_ptr(ptr)	do {					\
-	void __percpu *__vpp_verify = (typeof(ptr))NULL;		\
+	const void __percpu *__vpp_verify = (typeof(ptr))NULL;		\
 	(void)__vpp_verify;						\
 } while (0)
 
-- 
cgit v1.2.3


From 9dfc6e68bfe6ee452efb1a4e9ca26a9007f2b864 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Fri, 18 Dec 2009 16:26:20 -0600
Subject: SLUB: Use this_cpu operations in slub

Using per cpu allocations removes the needs for the per cpu arrays in the
kmem_cache struct. These could get quite big if we have to support systems
with thousands of cpus. The use of this_cpu_xx operations results in:

1. The size of kmem_cache for SMP configuration shrinks since we will only
   need 1 pointer instead of NR_CPUS. The same pointer can be used by all
   processors. Reduces cache footprint of the allocator.

2. We can dynamically size kmem_cache according to the actual nodes in the
   system meaning less memory overhead for configurations that may potentially
   support up to 1k NUMA nodes / 4k cpus.

3. We can remove the diddle widdle with allocating and releasing of
   kmem_cache_cpu structures when bringing up and shutting down cpus. The cpu
   alloc logic will do it all for us. Removes some portions of the cpu hotplug
   functionality.

4. Fastpath performance increases since per cpu pointer lookups and
   address calculations are avoided.

V7-V8
- Convert missed get_cpu_slab() under CONFIG_SLUB_STATS

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slub_def.h |   6 +-
 mm/slub.c                | 202 +++++++++++------------------------------------
 2 files changed, 49 insertions(+), 159 deletions(-)

(limited to 'include')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 1e14beb23f9b..17ebe0f89bf3 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -69,6 +69,7 @@ struct kmem_cache_order_objects {
  * Slab cache management.
  */
 struct kmem_cache {
+	struct kmem_cache_cpu *cpu_slab;
 	/* Used for retriving partial slabs etc */
 	unsigned long flags;
 	int size;		/* The size of an object including meta data */
@@ -104,11 +105,6 @@ struct kmem_cache {
 	int remote_node_defrag_ratio;
 	struct kmem_cache_node *node[MAX_NUMNODES];
 #endif
-#ifdef CONFIG_SMP
-	struct kmem_cache_cpu *cpu_slab[NR_CPUS];
-#else
-	struct kmem_cache_cpu cpu_slab;
-#endif
 };
 
 /*
diff --git a/mm/slub.c b/mm/slub.c
index 8d71aaf888d7..d6c9ecf629d5 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -242,15 +242,6 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
 #endif
 }
 
-static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
-{
-#ifdef CONFIG_SMP
-	return s->cpu_slab[cpu];
-#else
-	return &s->cpu_slab;
-#endif
-}
-
 /* Verify that a pointer has an address that is valid within a slab page */
 static inline int check_valid_pointer(struct kmem_cache *s,
 				struct page *page, const void *object)
@@ -1124,7 +1115,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 		if (!page)
 			return NULL;
 
-		stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK);
+		stat(this_cpu_ptr(s->cpu_slab), ORDER_FALLBACK);
 	}
 
 	if (kmemcheck_enabled
@@ -1422,7 +1413,7 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
 static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 {
 	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
-	struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id());
+	struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
 
 	__ClearPageSlubFrozen(page);
 	if (page->inuse) {
@@ -1454,7 +1445,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 			slab_unlock(page);
 		} else {
 			slab_unlock(page);
-			stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB);
+			stat(__this_cpu_ptr(s->cpu_slab), FREE_SLAB);
 			discard_slab(s, page);
 		}
 	}
@@ -1507,7 +1498,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
  */
 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
 {
-	struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
+	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
 
 	if (likely(c && c->page))
 		flush_slab(s, c);
@@ -1673,7 +1664,7 @@ new_slab:
 		local_irq_disable();
 
 	if (new) {
-		c = get_cpu_slab(s, smp_processor_id());
+		c = __this_cpu_ptr(s->cpu_slab);
 		stat(c, ALLOC_SLAB);
 		if (c->page)
 			flush_slab(s, c);
@@ -1711,7 +1702,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 	void **object;
 	struct kmem_cache_cpu *c;
 	unsigned long flags;
-	unsigned int objsize;
+	unsigned long objsize;
 
 	gfpflags &= gfp_allowed_mask;
 
@@ -1722,14 +1713,14 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 		return NULL;
 
 	local_irq_save(flags);
-	c = get_cpu_slab(s, smp_processor_id());
+	c = __this_cpu_ptr(s->cpu_slab);
+	object = c->freelist;
 	objsize = c->objsize;
-	if (unlikely(!c->freelist || !node_match(c, node)))
+	if (unlikely(!object || !node_match(c, node)))
 
 		object = __slab_alloc(s, gfpflags, node, addr, c);
 
 	else {
-		object = c->freelist;
 		c->freelist = object[c->offset];
 		stat(c, ALLOC_FASTPATH);
 	}
@@ -1800,7 +1791,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 	void **object = (void *)x;
 	struct kmem_cache_cpu *c;
 
-	c = get_cpu_slab(s, raw_smp_processor_id());
+	c = __this_cpu_ptr(s->cpu_slab);
 	stat(c, FREE_SLOWPATH);
 	slab_lock(page);
 
@@ -1872,7 +1863,7 @@ static __always_inline void slab_free(struct kmem_cache *s,
 
 	kmemleak_free_recursive(x, s->flags);
 	local_irq_save(flags);
-	c = get_cpu_slab(s, smp_processor_id());
+	c = __this_cpu_ptr(s->cpu_slab);
 	kmemcheck_slab_free(s, object, c->objsize);
 	debug_check_no_locks_freed(object, c->objsize);
 	if (!(s->flags & SLAB_DEBUG_OBJECTS))
@@ -2095,130 +2086,28 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
 #endif
 }
 
-#ifdef CONFIG_SMP
-/*
- * Per cpu array for per cpu structures.
- *
- * The per cpu array places all kmem_cache_cpu structures from one processor
- * close together meaning that it becomes possible that multiple per cpu
- * structures are contained in one cacheline. This may be particularly
- * beneficial for the kmalloc caches.
- *
- * A desktop system typically has around 60-80 slabs. With 100 here we are
- * likely able to get per cpu structures for all caches from the array defined
- * here. We must be able to cover all kmalloc caches during bootstrap.
- *
- * If the per cpu array is exhausted then fall back to kmalloc
- * of individual cachelines. No sharing is possible then.
- */
-#define NR_KMEM_CACHE_CPU 100
-
-static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU],
-		      kmem_cache_cpu);
-
-static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
-static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);
-
-static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
-							int cpu, gfp_t flags)
-{
-	struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu);
-
-	if (c)
-		per_cpu(kmem_cache_cpu_free, cpu) =
-				(void *)c->freelist;
-	else {
-		/* Table overflow: So allocate ourselves */
-		c = kmalloc_node(
-			ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()),
-			flags, cpu_to_node(cpu));
-		if (!c)
-			return NULL;
-	}
-
-	init_kmem_cache_cpu(s, c);
-	return c;
-}
-
-static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
-{
-	if (c < per_cpu(kmem_cache_cpu, cpu) ||
-			c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
-		kfree(c);
-		return;
-	}
-	c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu);
-	per_cpu(kmem_cache_cpu_free, cpu) = c;
-}
-
-static void free_kmem_cache_cpus(struct kmem_cache *s)
-{
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
-
-		if (c) {
-			s->cpu_slab[cpu] = NULL;
-			free_kmem_cache_cpu(c, cpu);
-		}
-	}
-}
-
-static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
-{
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
+static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[SLUB_PAGE_SHIFT]);
 
-		if (c)
-			continue;
-
-		c = alloc_kmem_cache_cpu(s, cpu, flags);
-		if (!c) {
-			free_kmem_cache_cpus(s);
-			return 0;
-		}
-		s->cpu_slab[cpu] = c;
-	}
-	return 1;
-}
-
-/*
- * Initialize the per cpu array.
- */
-static void init_alloc_cpu_cpu(int cpu)
-{
-	int i;
-
-	if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)))
-		return;
-
-	for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--)
-		free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu);
-
-	cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once));
-}
-
-static void __init init_alloc_cpu(void)
+static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
 {
 	int cpu;
 
-	for_each_online_cpu(cpu)
-		init_alloc_cpu_cpu(cpu);
-  }
+	if (s < kmalloc_caches + SLUB_PAGE_SHIFT && s >= kmalloc_caches)
+		/*
+		 * Boot time creation of the kmalloc array. Use static per cpu data
+		 * since the per cpu allocator is not available yet.
+		 */
+		s->cpu_slab = per_cpu_var(kmalloc_percpu) + (s - kmalloc_caches);
+	else
+		s->cpu_slab =  alloc_percpu(struct kmem_cache_cpu);
 
-#else
-static inline void free_kmem_cache_cpus(struct kmem_cache *s) {}
-static inline void init_alloc_cpu(void) {}
+	if (!s->cpu_slab)
+		return 0;
 
-static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
-{
-	init_kmem_cache_cpu(s, &s->cpu_slab);
+	for_each_possible_cpu(cpu)
+		init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
 	return 1;
 }
-#endif
 
 #ifdef CONFIG_NUMA
 /*
@@ -2609,9 +2498,8 @@ static inline int kmem_cache_close(struct kmem_cache *s)
 	int node;
 
 	flush_all(s);
-
+	free_percpu(s->cpu_slab);
 	/* Attempt to free all objects */
-	free_kmem_cache_cpus(s);
 	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n = get_node(s, node);
 
@@ -2760,7 +2648,19 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
 	realsize = kmalloc_caches[index].objsize;
 	text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
 			 (unsigned int)realsize);
-	s = kmalloc(kmem_size, flags & ~SLUB_DMA);
+
+	if (flags & __GFP_WAIT)
+		s = kmalloc(kmem_size, flags & ~SLUB_DMA);
+	else {
+		int i;
+
+		s = NULL;
+		for (i = 0; i < SLUB_PAGE_SHIFT; i++)
+			if (kmalloc_caches[i].size) {
+				s = kmalloc_caches + i;
+				break;
+			}
+	}
 
 	/*
 	 * Must defer sysfs creation to a workqueue because we don't know
@@ -3176,8 +3076,6 @@ void __init kmem_cache_init(void)
 	int i;
 	int caches = 0;
 
-	init_alloc_cpu();
-
 #ifdef CONFIG_NUMA
 	/*
 	 * Must first have the slab cache available for the allocations of the
@@ -3261,8 +3159,10 @@ void __init kmem_cache_init(void)
 
 #ifdef CONFIG_SMP
 	register_cpu_notifier(&slab_notifier);
-	kmem_size = offsetof(struct kmem_cache, cpu_slab) +
-				nr_cpu_ids * sizeof(struct kmem_cache_cpu *);
+#endif
+#ifdef CONFIG_NUMA
+	kmem_size = offsetof(struct kmem_cache, node) +
+				nr_node_ids * sizeof(struct kmem_cache_node *);
 #else
 	kmem_size = sizeof(struct kmem_cache);
 #endif
@@ -3365,7 +3265,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 		 * per cpu structures
 		 */
 		for_each_online_cpu(cpu)
-			get_cpu_slab(s, cpu)->objsize = s->objsize;
+			per_cpu_ptr(s->cpu_slab, cpu)->objsize = s->objsize;
 
 		s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
 		up_write(&slub_lock);
@@ -3422,11 +3322,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
-		init_alloc_cpu_cpu(cpu);
 		down_read(&slub_lock);
 		list_for_each_entry(s, &slab_caches, list)
-			s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu,
-							GFP_KERNEL);
+			init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
 		up_read(&slub_lock);
 		break;
 
@@ -3436,13 +3334,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
 	case CPU_DEAD_FROZEN:
 		down_read(&slub_lock);
 		list_for_each_entry(s, &slab_caches, list) {
-			struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
-
 			local_irq_save(flags);
 			__flush_cpu_slab(s, cpu);
 			local_irq_restore(flags);
-			free_kmem_cache_cpu(c, cpu);
-			s->cpu_slab[cpu] = NULL;
 		}
 		up_read(&slub_lock);
 		break;
@@ -3928,7 +3822,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
 		int cpu;
 
 		for_each_possible_cpu(cpu) {
-			struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
+			struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
 
 			if (!c || c->node < 0)
 				continue;
@@ -4353,7 +4247,7 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
 		return -ENOMEM;
 
 	for_each_online_cpu(cpu) {
-		unsigned x = get_cpu_slab(s, cpu)->stat[si];
+		unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
 
 		data[cpu] = x;
 		sum += x;
@@ -4376,7 +4270,7 @@ static void clear_stat(struct kmem_cache *s, enum stat_item si)
 	int cpu;
 
 	for_each_online_cpu(cpu)
-		get_cpu_slab(s, cpu)->stat[si] = 0;
+		per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
 }
 
 #define STAT_ATTR(si, text) 					\
-- 
cgit v1.2.3


From 756dee75872a2a764b478e18076360b8a4ec9045 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Fri, 18 Dec 2009 16:26:21 -0600
Subject: SLUB: Get rid of dynamic DMA kmalloc cache allocation

Dynamic DMA kmalloc cache allocation is troublesome since the
new percpu allocator does not support allocations in atomic contexts.
Reserve some statically allocated kmalloc_cpu structures instead.

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slub_def.h | 19 +++++++++++--------
 mm/slub.c                | 24 ++++++++++--------------
 2 files changed, 21 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 17ebe0f89bf3..a78fb4ac2015 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -131,11 +131,21 @@ struct kmem_cache {
 
 #define SLUB_PAGE_SHIFT (PAGE_SHIFT + 2)
 
+#ifdef CONFIG_ZONE_DMA
+#define SLUB_DMA __GFP_DMA
+/* Reserve extra caches for potential DMA use */
+#define KMALLOC_CACHES (2 * SLUB_PAGE_SHIFT - 6)
+#else
+/* Disable DMA functionality */
+#define SLUB_DMA (__force gfp_t)0
+#define KMALLOC_CACHES SLUB_PAGE_SHIFT
+#endif
+
 /*
  * We keep the general caches in an array of slab caches that are used for
  * 2^x bytes of allocations.
  */
-extern struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT];
+extern struct kmem_cache kmalloc_caches[KMALLOC_CACHES];
 
 /*
  * Sorry that the following has to be that ugly but some versions of GCC
@@ -203,13 +213,6 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
 	return &kmalloc_caches[index];
 }
 
-#ifdef CONFIG_ZONE_DMA
-#define SLUB_DMA __GFP_DMA
-#else
-/* Disable DMA functionality */
-#define SLUB_DMA (__force gfp_t)0
-#endif
-
 void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
 void *__kmalloc(size_t size, gfp_t flags);
 
diff --git a/mm/slub.c b/mm/slub.c
index d6c9ecf629d5..cdb7f0214af0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2092,7 +2092,7 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
 {
 	int cpu;
 
-	if (s < kmalloc_caches + SLUB_PAGE_SHIFT && s >= kmalloc_caches)
+	if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches)
 		/*
 		 * Boot time creation of the kmalloc array. Use static per cpu data
 		 * since the per cpu allocator is not available yet.
@@ -2539,7 +2539,7 @@ EXPORT_SYMBOL(kmem_cache_destroy);
  *		Kmalloc subsystem
  *******************************************************************/
 
-struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned;
+struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned;
 EXPORT_SYMBOL(kmalloc_caches);
 
 static int __init setup_slub_min_order(char *str)
@@ -2629,6 +2629,7 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
 	char *text;
 	size_t realsize;
 	unsigned long slabflags;
+	int i;
 
 	s = kmalloc_caches_dma[index];
 	if (s)
@@ -2649,18 +2650,13 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
 	text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
 			 (unsigned int)realsize);
 
-	if (flags & __GFP_WAIT)
-		s = kmalloc(kmem_size, flags & ~SLUB_DMA);
-	else {
-		int i;
+	s = NULL;
+	for (i = 0; i < KMALLOC_CACHES; i++)
+		if (!kmalloc_caches[i].size)
+			break;
 
-		s = NULL;
-		for (i = 0; i < SLUB_PAGE_SHIFT; i++)
-			if (kmalloc_caches[i].size) {
-				s = kmalloc_caches + i;
-				break;
-			}
-	}
+	BUG_ON(i >= KMALLOC_CACHES);
+	s = kmalloc_caches + i;
 
 	/*
 	 * Must defer sysfs creation to a workqueue because we don't know
@@ -2674,7 +2670,7 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
 
 	if (!s || !text || !kmem_cache_open(s, flags, text,
 			realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) {
-		kfree(s);
+		s->size = 0;
 		kfree(text);
 		goto unlock_out;
 	}
-- 
cgit v1.2.3


From ff12059ed14b0773d7bbef86f98218ada6c20770 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Fri, 18 Dec 2009 16:26:22 -0600
Subject: SLUB: this_cpu: Remove slub kmem_cache fields

Remove the fields in struct kmem_cache_cpu that were used to cache data from
struct kmem_cache when they were in different cachelines. The cacheline that
holds the per cpu array pointer now also holds these values. We can cut down
the struct kmem_cache_cpu size to almost half.

The get_freepointer() and set_freepointer() functions that used to be only
intended for the slow path now are also useful for the hot path since access
to the size field does not require accessing an additional cacheline anymore.
This results in consistent use of functions for setting the freepointer of
objects throughout SLUB.

Also we initialize all possible kmem_cache_cpu structures when a slab is
created. No need to initialize them when a processor or node comes online.

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slub_def.h |  2 --
 mm/slub.c                | 76 +++++++++++-------------------------------------
 2 files changed, 17 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index a78fb4ac2015..0249d4175bac 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -38,8 +38,6 @@ struct kmem_cache_cpu {
 	void **freelist;	/* Pointer to first free per cpu object */
 	struct page *page;	/* The slab from which we are allocating */
 	int node;		/* The node of the page (or -1 for debug) */
-	unsigned int offset;	/* Freepointer offset (in word units) */
-	unsigned int objsize;	/* Size of an object (from kmem_cache) */
 #ifdef CONFIG_SLUB_STATS
 	unsigned stat[NR_SLUB_STAT_ITEMS];
 #endif
diff --git a/mm/slub.c b/mm/slub.c
index cdb7f0214af0..30d2dde27563 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -260,13 +260,6 @@ static inline int check_valid_pointer(struct kmem_cache *s,
 	return 1;
 }
 
-/*
- * Slow version of get and set free pointer.
- *
- * This version requires touching the cache lines of kmem_cache which
- * we avoid to do in the fast alloc free paths. There we obtain the offset
- * from the page struct.
- */
 static inline void *get_freepointer(struct kmem_cache *s, void *object)
 {
 	return *(void **)(object + s->offset);
@@ -1473,10 +1466,10 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 
 		/* Retrieve object from cpu_freelist */
 		object = c->freelist;
-		c->freelist = c->freelist[c->offset];
+		c->freelist = get_freepointer(s, c->freelist);
 
 		/* And put onto the regular freelist */
-		object[c->offset] = page->freelist;
+		set_freepointer(s, object, page->freelist);
 		page->freelist = object;
 		page->inuse--;
 	}
@@ -1635,7 +1628,7 @@ load_freelist:
 	if (unlikely(SLABDEBUG && PageSlubDebug(c->page)))
 		goto debug;
 
-	c->freelist = object[c->offset];
+	c->freelist = get_freepointer(s, object);
 	c->page->inuse = c->page->objects;
 	c->page->freelist = NULL;
 	c->node = page_to_nid(c->page);
@@ -1681,7 +1674,7 @@ debug:
 		goto another_slab;
 
 	c->page->inuse++;
-	c->page->freelist = object[c->offset];
+	c->page->freelist = get_freepointer(s, object);
 	c->node = -1;
 	goto unlock_out;
 }
@@ -1702,7 +1695,6 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 	void **object;
 	struct kmem_cache_cpu *c;
 	unsigned long flags;
-	unsigned long objsize;
 
 	gfpflags &= gfp_allowed_mask;
 
@@ -1715,22 +1707,21 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 	local_irq_save(flags);
 	c = __this_cpu_ptr(s->cpu_slab);
 	object = c->freelist;
-	objsize = c->objsize;
 	if (unlikely(!object || !node_match(c, node)))
 
 		object = __slab_alloc(s, gfpflags, node, addr, c);
 
 	else {
-		c->freelist = object[c->offset];
+		c->freelist = get_freepointer(s, object);
 		stat(c, ALLOC_FASTPATH);
 	}
 	local_irq_restore(flags);
 
 	if (unlikely(gfpflags & __GFP_ZERO) && object)
-		memset(object, 0, objsize);
+		memset(object, 0, s->objsize);
 
-	kmemcheck_slab_alloc(s, gfpflags, object, c->objsize);
-	kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags);
+	kmemcheck_slab_alloc(s, gfpflags, object, s->objsize);
+	kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, gfpflags);
 
 	return object;
 }
@@ -1785,7 +1776,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
  * handling required then we can return immediately.
  */
 static void __slab_free(struct kmem_cache *s, struct page *page,
-			void *x, unsigned long addr, unsigned int offset)
+			void *x, unsigned long addr)
 {
 	void *prior;
 	void **object = (void *)x;
@@ -1799,7 +1790,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 		goto debug;
 
 checks_ok:
-	prior = object[offset] = page->freelist;
+	prior = page->freelist;
+	set_freepointer(s, object, prior);
 	page->freelist = object;
 	page->inuse--;
 
@@ -1864,16 +1856,16 @@ static __always_inline void slab_free(struct kmem_cache *s,
 	kmemleak_free_recursive(x, s->flags);
 	local_irq_save(flags);
 	c = __this_cpu_ptr(s->cpu_slab);
-	kmemcheck_slab_free(s, object, c->objsize);
-	debug_check_no_locks_freed(object, c->objsize);
+	kmemcheck_slab_free(s, object, s->objsize);
+	debug_check_no_locks_freed(object, s->objsize);
 	if (!(s->flags & SLAB_DEBUG_OBJECTS))
-		debug_check_no_obj_freed(object, c->objsize);
+		debug_check_no_obj_freed(object, s->objsize);
 	if (likely(page == c->page && c->node >= 0)) {
-		object[c->offset] = c->freelist;
+		set_freepointer(s, object, c->freelist);
 		c->freelist = object;
 		stat(c, FREE_FASTPATH);
 	} else
-		__slab_free(s, page, x, addr, c->offset);
+		__slab_free(s, page, x, addr);
 
 	local_irq_restore(flags);
 }
@@ -2060,19 +2052,6 @@ static unsigned long calculate_alignment(unsigned long flags,
 	return ALIGN(align, sizeof(void *));
 }
 
-static void init_kmem_cache_cpu(struct kmem_cache *s,
-			struct kmem_cache_cpu *c)
-{
-	c->page = NULL;
-	c->freelist = NULL;
-	c->node = 0;
-	c->offset = s->offset / sizeof(void *);
-	c->objsize = s->objsize;
-#ifdef CONFIG_SLUB_STATS
-	memset(c->stat, 0, NR_SLUB_STAT_ITEMS * sizeof(unsigned));
-#endif
-}
-
 static void
 init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
 {
@@ -2090,8 +2069,6 @@ static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[SLUB_PAGE_SHIFT]);
 
 static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
 {
-	int cpu;
-
 	if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches)
 		/*
 		 * Boot time creation of the kmalloc array. Use static per cpu data
@@ -2104,8 +2081,6 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
 	if (!s->cpu_slab)
 		return 0;
 
-	for_each_possible_cpu(cpu)
-		init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
 	return 1;
 }
 
@@ -2391,6 +2366,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
 
 	if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA))
 		return 1;
+
 	free_kmem_cache_nodes(s);
 error:
 	if (flags & SLAB_PANIC)
@@ -3247,22 +3223,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 	down_write(&slub_lock);
 	s = find_mergeable(size, align, flags, name, ctor);
 	if (s) {
-		int cpu;
-
 		s->refcount++;
 		/*
 		 * Adjust the object sizes so that we clear
 		 * the complete object on kzalloc.
 		 */
 		s->objsize = max(s->objsize, (int)size);
-
-		/*
-		 * And then we need to update the object size in the
-		 * per cpu structures
-		 */
-		for_each_online_cpu(cpu)
-			per_cpu_ptr(s->cpu_slab, cpu)->objsize = s->objsize;
-
 		s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
 		up_write(&slub_lock);
 
@@ -3316,14 +3282,6 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
 	unsigned long flags;
 
 	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		down_read(&slub_lock);
-		list_for_each_entry(s, &slab_caches, list)
-			init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
-		up_read(&slub_lock);
-		break;
-
 	case CPU_UP_CANCELED:
 	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
-- 
cgit v1.2.3


From 1f8fef7b3388b5a976e80839679b5bae581a1091 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Thu, 24 Dec 2009 11:59:57 +0100
Subject: firewire: add fw_csr_string() helper function

The core (sysfs attributes), the firedtv driver, and possible future
drivers all read strings from some configuration ROM directory.  Factor
out the generic code from show_text_leaf() into a new helper function,
modified slightly to handle arbitrary buffer sizes.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-device.c          | 110 ++++++++++++++++++++++----------
 drivers/media/dvb/firewire/firedtv-fw.c |  39 ++---------
 include/linux/firewire.h                |   2 +
 3 files changed, 84 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 9d0dfcbe2c1c..a39e4344cd58 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -59,6 +59,67 @@ int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value)
 }
 EXPORT_SYMBOL(fw_csr_iterator_next);
 
+static u32 *search_leaf(u32 *directory, int search_key)
+{
+	struct fw_csr_iterator ci;
+	int last_key = 0, key, value;
+
+	fw_csr_iterator_init(&ci, directory);
+	while (fw_csr_iterator_next(&ci, &key, &value)) {
+		if (last_key == search_key &&
+		    key == (CSR_DESCRIPTOR | CSR_LEAF))
+			return ci.p - 1 + value;
+		last_key = key;
+	}
+	return NULL;
+}
+
+static int textual_leaf_to_string(u32 *block, char *buf, size_t size)
+{
+	unsigned int quadlets, length;
+
+	if (!size || !buf)
+		return -EINVAL;
+
+	quadlets = min(block[0] >> 16, 256u);
+	if (quadlets < 2)
+		return -ENODATA;
+
+	if (block[1] != 0 || block[2] != 0)
+		/* unknown language/character set */
+		return -ENODATA;
+
+	block += 3;
+	quadlets -= 2;
+	for (length = 0; length < quadlets * 4 && length + 1 < size; length++) {
+		char c = block[length / 4] >> (24 - 8 * (length % 4));
+		if (c == '\0')
+			break;
+		buf[length] = c;
+	}
+	buf[length] = '\0';
+	return length;
+}
+
+/**
+ * fw_csr_string - reads a string from the configuration ROM
+ * @directory: device or unit directory;
+ *             fw_device->config_rom+5 or fw_unit->directory
+ * @key: the key of the preceding directory entry
+ * @buf: where to put the string
+ * @size: size of @buf, in bytes
+ *
+ * Returns string length (>= 0) or error code (< 0).
+ */
+int fw_csr_string(u32 *directory, int key, char *buf, size_t size)
+{
+	u32 *leaf = search_leaf(directory, key);
+	if (!leaf)
+		return -ENOENT;
+	return textual_leaf_to_string(leaf, buf, size);
+}
+EXPORT_SYMBOL(fw_csr_string);
+
 static bool is_fw_unit(struct device *dev);
 
 static int match_unit_directory(u32 *directory, u32 match_flags,
@@ -226,10 +287,10 @@ static ssize_t show_text_leaf(struct device *dev,
 {
 	struct config_rom_attribute *attr =
 		container_of(dattr, struct config_rom_attribute, attr);
-	struct fw_csr_iterator ci;
-	u32 *dir, *block = NULL, *p, *end;
-	int length, key, value, last_key = 0, ret = -ENOENT;
-	char *b;
+	u32 *dir;
+	size_t bufsize;
+	char dummy_buf[2];
+	int ret;
 
 	down_read(&fw_device_rwsem);
 
@@ -238,40 +299,23 @@ static ssize_t show_text_leaf(struct device *dev,
 	else
 		dir = fw_device(dev)->config_rom + 5;
 
-	fw_csr_iterator_init(&ci, dir);
-	while (fw_csr_iterator_next(&ci, &key, &value)) {
-		if (attr->key == last_key &&
-		    key == (CSR_DESCRIPTOR | CSR_LEAF))
-			block = ci.p - 1 + value;
-		last_key = key;
+	if (buf) {
+		bufsize = PAGE_SIZE - 1;
+	} else {
+		buf = dummy_buf;
+		bufsize = 1;
 	}
 
-	if (block == NULL)
-		goto out;
-
-	length = min(block[0] >> 16, 256U);
-	if (length < 3)
-		goto out;
-
-	if (block[1] != 0 || block[2] != 0)
-		/* Unknown encoding. */
-		goto out;
+	ret = fw_csr_string(dir, attr->key, buf, bufsize);
 
-	if (buf == NULL) {
-		ret = length * 4;
-		goto out;
+	if (ret >= 0) {
+		/* Strip trailing whitespace and add newline. */
+		while (ret > 0 && isspace(buf[ret - 1]))
+			ret--;
+		strcpy(buf + ret, "\n");
+		ret++;
 	}
 
-	b = buf;
-	end = &block[length + 1];
-	for (p = &block[3]; p < end; p++, b += 4)
-		* (u32 *) b = (__force u32) __cpu_to_be32(*p);
-
-	/* Strip trailing whitespace and add newline. */
-	while (b--, (isspace(*b) || *b == '\0') && b > buf);
-	strcpy(b + 1, "\n");
-	ret = b + 2 - buf;
- out:
 	up_read(&fw_device_rwsem);
 
 	return ret;
diff --git a/drivers/media/dvb/firewire/firedtv-fw.c b/drivers/media/dvb/firewire/firedtv-fw.c
index 6223bf01efe9..4253b7ab0097 100644
--- a/drivers/media/dvb/firewire/firedtv-fw.c
+++ b/drivers/media/dvb/firewire/firedtv-fw.c
@@ -239,47 +239,18 @@ static const struct fw_address_region fcp_region = {
 };
 
 /* Adjust the template string if models with longer names appear. */
-#define MAX_MODEL_NAME_LEN ((int)DIV_ROUND_UP(sizeof("FireDTV ????"), 4))
-
-static size_t model_name(u32 *directory, __be32 *buffer)
-{
-	struct fw_csr_iterator ci;
-	int i, length, key, value, last_key = 0;
-	u32 *block = NULL;
-
-	fw_csr_iterator_init(&ci, directory);
-	while (fw_csr_iterator_next(&ci, &key, &value)) {
-		if (last_key == CSR_MODEL &&
-		    key == (CSR_DESCRIPTOR | CSR_LEAF))
-			block = ci.p - 1 + value;
-		last_key = key;
-	}
-
-	if (block == NULL)
-		return 0;
-
-	length = min((int)(block[0] >> 16) - 2, MAX_MODEL_NAME_LEN);
-	if (length <= 0)
-		return 0;
-
-	/* fast-forward to text string */
-	block += 3;
-
-	for (i = 0; i < length; i++)
-		buffer[i] = cpu_to_be32(block[i]);
-
-	return length * 4;
-}
+#define MAX_MODEL_NAME_LEN sizeof("FireDTV ????")
 
 static int node_probe(struct device *dev)
 {
 	struct firedtv *fdtv;
-	__be32 name[MAX_MODEL_NAME_LEN];
+	char name[MAX_MODEL_NAME_LEN];
 	int name_len, err;
 
-	name_len = model_name(fw_unit(dev)->directory, name);
+	name_len = fw_csr_string(fw_unit(dev)->directory, CSR_MODEL,
+				 name, sizeof(name));
 
-	fdtv = fdtv_alloc(dev, &backend, (char *)name, name_len);
+	fdtv = fdtv_alloc(dev, &backend, name, name_len >= 0 ? name_len : 0);
 	if (!fdtv)
 		return -ENOMEM;
 
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index a0e67150a729..5246869d8083 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -72,6 +72,8 @@ struct fw_csr_iterator {
 void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 *p);
 int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value);
 
+int fw_csr_string(u32 *directory, int key, char *buf, size_t size);
+
 extern struct bus_type fw_bus_type;
 
 struct fw_card_driver;
-- 
cgit v1.2.3


From 3c2c58cb33b3b15a2c4871babeec8fe1456e1db6 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sat, 26 Dec 2009 01:43:21 +0100
Subject: firewire: core: fw_csr_string addendum

Witespace and comment changes, and a different way to say i + 1 < end.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-device.c | 26 ++++++++++++++++----------
 include/linux/firewire.h       |  1 -
 2 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index a39e4344cd58..5d5c6a689837 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -69,19 +69,22 @@ static u32 *search_leaf(u32 *directory, int search_key)
 		if (last_key == search_key &&
 		    key == (CSR_DESCRIPTOR | CSR_LEAF))
 			return ci.p - 1 + value;
+
 		last_key = key;
 	}
+
 	return NULL;
 }
 
 static int textual_leaf_to_string(u32 *block, char *buf, size_t size)
 {
-	unsigned int quadlets, length;
+	unsigned int quadlets, i;
+	char c;
 
 	if (!size || !buf)
 		return -EINVAL;
 
-	quadlets = min(block[0] >> 16, 256u);
+	quadlets = min(block[0] >> 16, 256U);
 	if (quadlets < 2)
 		return -ENODATA;
 
@@ -91,31 +94,34 @@ static int textual_leaf_to_string(u32 *block, char *buf, size_t size)
 
 	block += 3;
 	quadlets -= 2;
-	for (length = 0; length < quadlets * 4 && length + 1 < size; length++) {
-		char c = block[length / 4] >> (24 - 8 * (length % 4));
+	for (i = 0; i < quadlets * 4 && i < size - 1; i++) {
+		c = block[i / 4] >> (24 - 8 * (i % 4));
 		if (c == '\0')
 			break;
-		buf[length] = c;
+		buf[i] = c;
 	}
-	buf[length] = '\0';
-	return length;
+	buf[i] = '\0';
+
+	return i;
 }
 
 /**
  * fw_csr_string - reads a string from the configuration ROM
- * @directory: device or unit directory;
- *             fw_device->config_rom+5 or fw_unit->directory
+ * @directory: e.g. root directory or unit directory
  * @key: the key of the preceding directory entry
  * @buf: where to put the string
  * @size: size of @buf, in bytes
  *
- * Returns string length (>= 0) or error code (< 0).
+ * The string is taken from a minimal ASCII text descriptor leaf after
+ * the immediate entry with @key.  The string is zero-terminated.
+ * Returns strlen(buf) or a negative error code.
  */
 int fw_csr_string(u32 *directory, int key, char *buf, size_t size)
 {
 	u32 *leaf = search_leaf(directory, key);
 	if (!leaf)
 		return -ENOENT;
+
 	return textual_leaf_to_string(leaf, buf, size);
 }
 EXPORT_SYMBOL(fw_csr_string);
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 5246869d8083..df680216e7b6 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -71,7 +71,6 @@ struct fw_csr_iterator {
 
 void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 *p);
 int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value);
-
 int fw_csr_string(u32 *directory, int key, char *buf, size_t size);
 
 extern struct bus_type fw_bus_type;
-- 
cgit v1.2.3


From 13b302d0a217580c0129b0641b0ca8b592e437b0 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sat, 26 Dec 2009 01:44:10 +0100
Subject: firewire: qualify config ROM cache pointers as const pointers

Several config ROM related functions only peek at the ROM cache; mark
their arguments as const pointers.  Ditto fw_device.config_rom and
fw_unit.directory, as the memory behind them is meant to be write-once.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-device.c | 21 +++++++++++----------
 drivers/firewire/sbp2.c        |  5 +++--
 include/linux/firewire.h       | 12 ++++++------
 3 files changed, 20 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 5d5c6a689837..eecd52dc8e98 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -43,7 +43,7 @@
 
 #include "core.h"
 
-void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 * p)
+void fw_csr_iterator_init(struct fw_csr_iterator *ci, const u32 *p)
 {
 	ci->p = p + 1;
 	ci->end = ci->p + (p[0] >> 16);
@@ -59,7 +59,7 @@ int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value)
 }
 EXPORT_SYMBOL(fw_csr_iterator_next);
 
-static u32 *search_leaf(u32 *directory, int search_key)
+static const u32 *search_leaf(const u32 *directory, int search_key)
 {
 	struct fw_csr_iterator ci;
 	int last_key = 0, key, value;
@@ -76,7 +76,7 @@ static u32 *search_leaf(u32 *directory, int search_key)
 	return NULL;
 }
 
-static int textual_leaf_to_string(u32 *block, char *buf, size_t size)
+static int textual_leaf_to_string(const u32 *block, char *buf, size_t size)
 {
 	unsigned int quadlets, i;
 	char c;
@@ -116,9 +116,9 @@ static int textual_leaf_to_string(u32 *block, char *buf, size_t size)
  * the immediate entry with @key.  The string is zero-terminated.
  * Returns strlen(buf) or a negative error code.
  */
-int fw_csr_string(u32 *directory, int key, char *buf, size_t size)
+int fw_csr_string(const u32 *directory, int key, char *buf, size_t size)
 {
-	u32 *leaf = search_leaf(directory, key);
+	const u32 *leaf = search_leaf(directory, key);
 	if (!leaf)
 		return -ENOENT;
 
@@ -128,7 +128,7 @@ EXPORT_SYMBOL(fw_csr_string);
 
 static bool is_fw_unit(struct device *dev);
 
-static int match_unit_directory(u32 *directory, u32 match_flags,
+static int match_unit_directory(const u32 *directory, u32 match_flags,
 				const struct ieee1394_device_id *id)
 {
 	struct fw_csr_iterator ci;
@@ -262,7 +262,7 @@ static ssize_t show_immediate(struct device *dev,
 	struct config_rom_attribute *attr =
 		container_of(dattr, struct config_rom_attribute, attr);
 	struct fw_csr_iterator ci;
-	u32 *dir;
+	const u32 *dir;
 	int key, value, ret = -ENOENT;
 
 	down_read(&fw_device_rwsem);
@@ -293,7 +293,7 @@ static ssize_t show_text_leaf(struct device *dev,
 {
 	struct config_rom_attribute *attr =
 		container_of(dattr, struct config_rom_attribute, attr);
-	u32 *dir;
+	const u32 *dir;
 	size_t bufsize;
 	char dummy_buf[2];
 	int ret;
@@ -421,7 +421,7 @@ static ssize_t guid_show(struct device *dev,
 	return ret;
 }
 
-static int units_sprintf(char *buf, u32 *directory)
+static int units_sprintf(char *buf, const u32 *directory)
 {
 	struct fw_csr_iterator ci;
 	int key, value;
@@ -503,7 +503,8 @@ static int read_rom(struct fw_device *device,
  */
 static int read_bus_info_block(struct fw_device *device, int generation)
 {
-	u32 *rom, *stack, *old_rom, *new_rom;
+	const u32 *old_rom, *new_rom;
+	u32 *rom, *stack;
 	u32 sp, key;
 	int i, end, length, ret = -1;
 
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index d485cdd8cbac..7e33b0b1704c 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -1014,7 +1014,8 @@ static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry)
 	return 0;
 }
 
-static int sbp2_scan_logical_unit_dir(struct sbp2_target *tgt, u32 *directory)
+static int sbp2_scan_logical_unit_dir(struct sbp2_target *tgt,
+				      const u32 *directory)
 {
 	struct fw_csr_iterator ci;
 	int key, value;
@@ -1027,7 +1028,7 @@ static int sbp2_scan_logical_unit_dir(struct sbp2_target *tgt, u32 *directory)
 	return 0;
 }
 
-static int sbp2_scan_unit_dir(struct sbp2_target *tgt, u32 *directory,
+static int sbp2_scan_unit_dir(struct sbp2_target *tgt, const u32 *directory,
 			      u32 *model, u32 *firmware_revision)
 {
 	struct fw_csr_iterator ci;
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index df680216e7b6..4bd94bf5e739 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -65,13 +65,13 @@
 #define CSR_DIRECTORY_ID	0x20
 
 struct fw_csr_iterator {
-	u32 *p;
-	u32 *end;
+	const u32 *p;
+	const u32 *end;
 };
 
-void fw_csr_iterator_init(struct fw_csr_iterator *ci, u32 *p);
+void fw_csr_iterator_init(struct fw_csr_iterator *ci, const u32 *p);
 int fw_csr_iterator_next(struct fw_csr_iterator *ci, int *key, int *value);
-int fw_csr_string(u32 *directory, int key, char *buf, size_t size);
+int fw_csr_string(const u32 *directory, int key, char *buf, size_t size);
 
 extern struct bus_type fw_bus_type;
 
@@ -163,7 +163,7 @@ struct fw_device {
 	struct mutex client_list_mutex;
 	struct list_head client_list;
 
-	u32 *config_rom;
+	const u32 *config_rom;
 	size_t config_rom_length;
 	int config_rom_retries;
 	unsigned is_local:1;
@@ -205,7 +205,7 @@ int fw_device_enable_phys_dma(struct fw_device *device);
  */
 struct fw_unit {
 	struct device device;
-	u32 *directory;
+	const u32 *directory;
 	struct fw_attribute_group attribute_group;
 };
 
-- 
cgit v1.2.3


From 38b7827fcdd660f591d645bd3ae6644456a4773c Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Tue, 5 Jan 2010 15:34:49 +0900
Subject: local_t: Remove cpu_local_xx macros

These macros have not been used for awhile now.

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/alpha/include/asm/local.h   | 17 -----------------
 arch/m32r/include/asm/local.h    | 25 -------------------------
 arch/mips/include/asm/local.h    | 25 -------------------------
 arch/powerpc/include/asm/local.h | 25 -------------------------
 arch/x86/include/asm/local.h     | 37 -------------------------------------
 include/asm-generic/local.h      | 19 -------------------
 6 files changed, 148 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/local.h b/arch/alpha/include/asm/local.h
index 6ad3ea696421..b9e3e3318371 100644
--- a/arch/alpha/include/asm/local.h
+++ b/arch/alpha/include/asm/local.h
@@ -98,21 +98,4 @@ static __inline__ long local_sub_return(long i, local_t * l)
 #define __local_add(i,l)	((l)->a.counter+=(i))
 #define __local_sub(i,l)	((l)->a.counter-=(i))
 
-/* Use these for per-cpu local_t variables: on some archs they are
- * much more efficient than these naive implementations.  Note they take
- * a variable, not an address.
- */
-#define cpu_local_read(l)	local_read(&__get_cpu_var(l))
-#define cpu_local_set(l, i)	local_set(&__get_cpu_var(l), (i))
-
-#define cpu_local_inc(l)	local_inc(&__get_cpu_var(l))
-#define cpu_local_dec(l)	local_dec(&__get_cpu_var(l))
-#define cpu_local_add(i, l)	local_add((i), &__get_cpu_var(l))
-#define cpu_local_sub(i, l)	local_sub((i), &__get_cpu_var(l))
-
-#define __cpu_local_inc(l)	__local_inc(&__get_cpu_var(l))
-#define __cpu_local_dec(l)	__local_dec(&__get_cpu_var(l))
-#define __cpu_local_add(i, l)	__local_add((i), &__get_cpu_var(l))
-#define __cpu_local_sub(i, l)	__local_sub((i), &__get_cpu_var(l))
-
 #endif /* _ALPHA_LOCAL_H */
diff --git a/arch/m32r/include/asm/local.h b/arch/m32r/include/asm/local.h
index 22256d138630..734bca87018a 100644
--- a/arch/m32r/include/asm/local.h
+++ b/arch/m32r/include/asm/local.h
@@ -338,29 +338,4 @@ static inline void local_set_mask(unsigned long  mask, local_t *addr)
  * a variable, not an address.
  */
 
-/* Need to disable preemption for the cpu local counters otherwise we could
-   still access a variable of a previous CPU in a non local way. */
-#define cpu_local_wrap_v(l)	 	\
-	({ local_t res__;		\
-	   preempt_disable(); 		\
-	   res__ = (l);			\
-	   preempt_enable();		\
-	   res__; })
-#define cpu_local_wrap(l)		\
-	({ preempt_disable();		\
-	   l;				\
-	   preempt_enable(); })		\
-
-#define cpu_local_read(l)    cpu_local_wrap_v(local_read(&__get_cpu_var(l)))
-#define cpu_local_set(l, i)  cpu_local_wrap(local_set(&__get_cpu_var(l), (i)))
-#define cpu_local_inc(l)     cpu_local_wrap(local_inc(&__get_cpu_var(l)))
-#define cpu_local_dec(l)     cpu_local_wrap(local_dec(&__get_cpu_var(l)))
-#define cpu_local_add(i, l)  cpu_local_wrap(local_add((i), &__get_cpu_var(l)))
-#define cpu_local_sub(i, l)  cpu_local_wrap(local_sub((i), &__get_cpu_var(l)))
-
-#define __cpu_local_inc(l)	cpu_local_inc(l)
-#define __cpu_local_dec(l)	cpu_local_dec(l)
-#define __cpu_local_add(i, l)	cpu_local_add((i), (l))
-#define __cpu_local_sub(i, l)	cpu_local_sub((i), (l))
-
 #endif /* __M32R_LOCAL_H */
diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h
index 361f4f16c30c..bdcdef02d147 100644
--- a/arch/mips/include/asm/local.h
+++ b/arch/mips/include/asm/local.h
@@ -193,29 +193,4 @@ static __inline__ long local_sub_return(long i, local_t * l)
 #define __local_add(i, l)	((l)->a.counter+=(i))
 #define __local_sub(i, l)	((l)->a.counter-=(i))
 
-/* Need to disable preemption for the cpu local counters otherwise we could
-   still access a variable of a previous CPU in a non atomic way. */
-#define cpu_local_wrap_v(l)	 	\
-	({ local_t res__;		\
-	   preempt_disable(); 		\
-	   res__ = (l);			\
-	   preempt_enable();		\
-	   res__; })
-#define cpu_local_wrap(l)		\
-	({ preempt_disable();		\
-	   l;				\
-	   preempt_enable(); })		\
-
-#define cpu_local_read(l)    cpu_local_wrap_v(local_read(&__get_cpu_var(l)))
-#define cpu_local_set(l, i)  cpu_local_wrap(local_set(&__get_cpu_var(l), (i)))
-#define cpu_local_inc(l)     cpu_local_wrap(local_inc(&__get_cpu_var(l)))
-#define cpu_local_dec(l)     cpu_local_wrap(local_dec(&__get_cpu_var(l)))
-#define cpu_local_add(i, l)  cpu_local_wrap(local_add((i), &__get_cpu_var(l)))
-#define cpu_local_sub(i, l)  cpu_local_wrap(local_sub((i), &__get_cpu_var(l)))
-
-#define __cpu_local_inc(l)	cpu_local_inc(l)
-#define __cpu_local_dec(l)	cpu_local_dec(l)
-#define __cpu_local_add(i, l)	cpu_local_add((i), (l))
-#define __cpu_local_sub(i, l)	cpu_local_sub((i), (l))
-
 #endif /* _ARCH_MIPS_LOCAL_H */
diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
index 84b457a3c1bc..227753d288f6 100644
--- a/arch/powerpc/include/asm/local.h
+++ b/arch/powerpc/include/asm/local.h
@@ -172,29 +172,4 @@ static __inline__ long local_dec_if_positive(local_t *l)
 #define __local_add(i,l)	((l)->a.counter+=(i))
 #define __local_sub(i,l)	((l)->a.counter-=(i))
 
-/* Need to disable preemption for the cpu local counters otherwise we could
-   still access a variable of a previous CPU in a non atomic way. */
-#define cpu_local_wrap_v(l)	 	\
-	({ local_t res__;		\
-	   preempt_disable(); 		\
-	   res__ = (l);			\
-	   preempt_enable();		\
-	   res__; })
-#define cpu_local_wrap(l)		\
-	({ preempt_disable();		\
-	   l;				\
-	   preempt_enable(); })		\
-
-#define cpu_local_read(l)    cpu_local_wrap_v(local_read(&__get_cpu_var(l)))
-#define cpu_local_set(l, i)  cpu_local_wrap(local_set(&__get_cpu_var(l), (i)))
-#define cpu_local_inc(l)     cpu_local_wrap(local_inc(&__get_cpu_var(l)))
-#define cpu_local_dec(l)     cpu_local_wrap(local_dec(&__get_cpu_var(l)))
-#define cpu_local_add(i, l)  cpu_local_wrap(local_add((i), &__get_cpu_var(l)))
-#define cpu_local_sub(i, l)  cpu_local_wrap(local_sub((i), &__get_cpu_var(l)))
-
-#define __cpu_local_inc(l)	cpu_local_inc(l)
-#define __cpu_local_dec(l)	cpu_local_dec(l)
-#define __cpu_local_add(i, l)	cpu_local_add((i), (l))
-#define __cpu_local_sub(i, l)	cpu_local_sub((i), (l))
-
 #endif /* _ARCH_POWERPC_LOCAL_H */
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 47b9b6f19057..2e9972468a5d 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -195,41 +195,4 @@ static inline long local_sub_return(long i, local_t *l)
 #define __local_add(i, l)	local_add((i), (l))
 #define __local_sub(i, l)	local_sub((i), (l))
 
-/* Use these for per-cpu local_t variables: on some archs they are
- * much more efficient than these naive implementations.  Note they take
- * a variable, not an address.
- *
- * X86_64: This could be done better if we moved the per cpu data directly
- * after GS.
- */
-
-/* Need to disable preemption for the cpu local counters otherwise we could
-   still access a variable of a previous CPU in a non atomic way. */
-#define cpu_local_wrap_v(l)		\
-({					\
-	local_t res__;			\
-	preempt_disable(); 		\
-	res__ = (l);			\
-	preempt_enable();		\
-	res__;				\
-})
-#define cpu_local_wrap(l)		\
-({					\
-	preempt_disable();		\
-	(l);				\
-	preempt_enable();		\
-})					\
-
-#define cpu_local_read(l)    cpu_local_wrap_v(local_read(&__get_cpu_var((l))))
-#define cpu_local_set(l, i)  cpu_local_wrap(local_set(&__get_cpu_var((l)), (i)))
-#define cpu_local_inc(l)     cpu_local_wrap(local_inc(&__get_cpu_var((l))))
-#define cpu_local_dec(l)     cpu_local_wrap(local_dec(&__get_cpu_var((l))))
-#define cpu_local_add(i, l)  cpu_local_wrap(local_add((i), &__get_cpu_var((l))))
-#define cpu_local_sub(i, l)  cpu_local_wrap(local_sub((i), &__get_cpu_var((l))))
-
-#define __cpu_local_inc(l)	cpu_local_inc((l))
-#define __cpu_local_dec(l)	cpu_local_dec((l))
-#define __cpu_local_add(i, l)	cpu_local_add((i), (l))
-#define __cpu_local_sub(i, l)	cpu_local_sub((i), (l))
-
 #endif /* _ASM_X86_LOCAL_H */
diff --git a/include/asm-generic/local.h b/include/asm-generic/local.h
index fc218444e315..c8a5d68541d7 100644
--- a/include/asm-generic/local.h
+++ b/include/asm-generic/local.h
@@ -52,23 +52,4 @@ typedef struct
 #define __local_add(i,l)	local_set((l), local_read(l) + (i))
 #define __local_sub(i,l)	local_set((l), local_read(l) - (i))
 
-/* Use these for per-cpu local_t variables: on some archs they are
- * much more efficient than these naive implementations.  Note they take
- * a variable (eg. mystruct.foo), not an address.
- */
-#define cpu_local_read(l)	local_read(&__get_cpu_var(l))
-#define cpu_local_set(l, i)	local_set(&__get_cpu_var(l), (i))
-#define cpu_local_inc(l)	local_inc(&__get_cpu_var(l))
-#define cpu_local_dec(l)	local_dec(&__get_cpu_var(l))
-#define cpu_local_add(i, l)	local_add((i), &__get_cpu_var(l))
-#define cpu_local_sub(i, l)	local_sub((i), &__get_cpu_var(l))
-
-/* Non-atomic increments, ie. preemption disabled and won't be touched
- * in interrupt, etc.  Some archs can optimize this case well.
- */
-#define __cpu_local_inc(l)	__local_inc(&__get_cpu_var(l))
-#define __cpu_local_dec(l)	__local_dec(&__get_cpu_var(l))
-#define __cpu_local_add(i, l)	__local_add((i), &__get_cpu_var(l))
-#define __cpu_local_sub(i, l)	__local_sub((i), &__get_cpu_var(l))
-
 #endif /* _ASM_GENERIC_LOCAL_H */
-- 
cgit v1.2.3


From e1783a240f491fb233f04edc042e16b18a7a79ba Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Tue, 5 Jan 2010 15:34:50 +0900
Subject: module: Use this_cpu_xx to dynamically allocate counters

Use cpu ops to deal with the per cpu data instead of a local_t. Reduces memory
requirements, cache footprint and decreases cycle counts.

The this_cpu_xx operations are also used for !SMP mode. Otherwise we could
not drop the use of __module_ref_addr() which would make per cpu data handling
complicated. this_cpu_xx operations have their own fallback for !SMP.

V8-V9:
- Leave include asm/module.h since ringbuffer.c depends on it. Nothing else
  does though. Another patch will deal with that.
- Remove spurious free.

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/module.h | 36 ++++++++++++++----------------------
 kernel/module.c        | 29 +++++++++++++++--------------
 2 files changed, 29 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index 6cb1a3cab5d3..2302f09ea2d9 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -18,6 +18,7 @@
 #include <linux/tracepoint.h>
 
 #include <asm/local.h>
+#include <linux/percpu.h>
 #include <asm/module.h>
 
 #include <trace/events/module.h>
@@ -363,11 +364,9 @@ struct module
 	/* Destruction function. */
 	void (*exit)(void);
 
-#ifdef CONFIG_SMP
-	char *refptr;
-#else
-	local_t ref;
-#endif
+	struct module_ref {
+		int count;
+	} *refptr;
 #endif
 
 #ifdef CONFIG_CONSTRUCTORS
@@ -454,25 +453,16 @@ void __symbol_put(const char *symbol);
 #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x)
 void symbol_put_addr(void *addr);
 
-static inline local_t *__module_ref_addr(struct module *mod, int cpu)
-{
-#ifdef CONFIG_SMP
-	return (local_t *) (mod->refptr + per_cpu_offset(cpu));
-#else
-	return &mod->ref;
-#endif
-}
-
 /* Sometimes we know we already have a refcount, and it's easier not
    to handle the error case (which only happens with rmmod --wait). */
 static inline void __module_get(struct module *module)
 {
 	if (module) {
-		unsigned int cpu = get_cpu();
-		local_inc(__module_ref_addr(module, cpu));
+		preempt_disable();
+		__this_cpu_inc(module->refptr->count);
 		trace_module_get(module, _THIS_IP_,
-				 local_read(__module_ref_addr(module, cpu)));
-		put_cpu();
+				 __this_cpu_read(module->refptr->count));
+		preempt_enable();
 	}
 }
 
@@ -481,15 +471,17 @@ static inline int try_module_get(struct module *module)
 	int ret = 1;
 
 	if (module) {
-		unsigned int cpu = get_cpu();
+		preempt_disable();
+
 		if (likely(module_is_live(module))) {
-			local_inc(__module_ref_addr(module, cpu));
+			__this_cpu_inc(module->refptr->count);
 			trace_module_get(module, _THIS_IP_,
-				local_read(__module_ref_addr(module, cpu)));
+				__this_cpu_read(module->refptr->count));
 		}
 		else
 			ret = 0;
-		put_cpu();
+
+		preempt_enable();
 	}
 	return ret;
 }
diff --git a/kernel/module.c b/kernel/module.c
index e96b8ed1cb6a..9bf228052ec5 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -474,9 +474,10 @@ static void module_unload_init(struct module *mod)
 
 	INIT_LIST_HEAD(&mod->modules_which_use_me);
 	for_each_possible_cpu(cpu)
-		local_set(__module_ref_addr(mod, cpu), 0);
+		per_cpu_ptr(mod->refptr, cpu)->count = 0;
+
 	/* Hold reference count during initialization. */
-	local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1);
+	__this_cpu_write(mod->refptr->count, 1);
 	/* Backwards compatibility macros put refcount during init. */
 	mod->waiter = current;
 }
@@ -619,7 +620,7 @@ unsigned int module_refcount(struct module *mod)
 	int cpu;
 
 	for_each_possible_cpu(cpu)
-		total += local_read(__module_ref_addr(mod, cpu));
+		total += per_cpu_ptr(mod->refptr, cpu)->count;
 	return total;
 }
 EXPORT_SYMBOL(module_refcount);
@@ -796,14 +797,15 @@ static struct module_attribute refcnt = {
 void module_put(struct module *module)
 {
 	if (module) {
-		unsigned int cpu = get_cpu();
-		local_dec(__module_ref_addr(module, cpu));
+		preempt_disable();
+		__this_cpu_dec(module->refptr->count);
+
 		trace_module_put(module, _RET_IP_,
-				 local_read(__module_ref_addr(module, cpu)));
+				 __this_cpu_read(module->refptr->count));
 		/* Maybe they're waiting for us to drop reference? */
 		if (unlikely(!module_is_live(module)))
 			wake_up_process(module->waiter);
-		put_cpu();
+		preempt_enable();
 	}
 }
 EXPORT_SYMBOL(module_put);
@@ -1394,9 +1396,9 @@ static void free_module(struct module *mod)
 	kfree(mod->args);
 	if (mod->percpu)
 		percpu_modfree(mod->percpu);
-#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
+#if defined(CONFIG_MODULE_UNLOAD)
 	if (mod->refptr)
-		percpu_modfree(mod->refptr);
+		free_percpu(mod->refptr);
 #endif
 	/* Free lock-classes: */
 	lockdep_free_key_range(mod->module_core, mod->core_size);
@@ -2159,9 +2161,8 @@ static noinline struct module *load_module(void __user *umod,
 	mod = (void *)sechdrs[modindex].sh_addr;
 	kmemleak_load_module(mod, hdr, sechdrs, secstrings);
 
-#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
-	mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t),
-				      mod->name);
+#if defined(CONFIG_MODULE_UNLOAD)
+	mod->refptr = alloc_percpu(struct module_ref);
 	if (!mod->refptr) {
 		err = -ENOMEM;
 		goto free_init;
@@ -2393,8 +2394,8 @@ static noinline struct module *load_module(void __user *umod,
 	kobject_put(&mod->mkobj.kobj);
  free_unload:
 	module_unload_free(mod);
-#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
-	percpu_modfree(mod->refptr);
+#if defined(CONFIG_MODULE_UNLOAD)
+	free_percpu(mod->refptr);
  free_init:
 #endif
 	module_free(mod, mod->module_init);
-- 
cgit v1.2.3


From 79615760f380ec86cd58204744e774c33fab9211 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Tue, 5 Jan 2010 15:34:50 +0900
Subject: local_t: Move local.h include to ringbuffer.c and
 ring_buffer_benchmark.c

ringbuffer*.c are the last users of local.h.

Remove the include from modules.h and add it to ringbuffer files.

Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/module.h               | 1 -
 kernel/trace/ring_buffer.c           | 1 +
 kernel/trace/ring_buffer_benchmark.c | 1 +
 3 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index 2302f09ea2d9..7e74ae0051cc 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -17,7 +17,6 @@
 #include <linux/moduleparam.h>
 #include <linux/tracepoint.h>
 
-#include <asm/local.h>
 #include <linux/percpu.h>
 #include <asm/module.h>
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2326b04c95c4..eb6c8988c31a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -20,6 +20,7 @@
 #include <linux/cpu.h>
 #include <linux/fs.h>
 
+#include <asm/local.h>
 #include "trace.h"
 
 /*
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index b2477caf09c2..df74c7982255 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -8,6 +8,7 @@
 #include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/time.h>
+#include <asm/local.h>
 
 struct rb_page {
 	u64		ts;
-- 
cgit v1.2.3


From 99dcc3e5a94ed491fbef402831d8c0bbb267f995 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux-foundation.org>
Date: Tue, 5 Jan 2010 15:34:51 +0900
Subject: this_cpu: Page allocator conversion

Use the per cpu allocator functionality to avoid per cpu arrays in struct zone.

This drastically reduces the size of struct zone for systems with large
amounts of processors and allows placement of critical variables of struct
zone in one cacheline even on very large systems.

Another effect is that the pagesets of one processor are placed near one
another. If multiple pagesets from different zones fit into one cacheline
then additional cacheline fetches can be avoided on the hot paths when
allocating memory from multiple zones.

Bootstrap becomes simpler if we use the same scheme for UP, SMP, NUMA. #ifdefs
are reduced and we can drop the zone_pcp macro.

Hotplug handling is also simplified since cpu alloc can bring up and
shut down cpu areas for a specific cpu as a whole. So there is no need to
allocate or free individual pagesets.

V7-V8:
- Explain chicken egg dilemmna with percpu allocator.

V4-V5:
- Fix up cases where per_cpu_ptr is called before irq disable
- Integrate the bootstrap logic that was separate before.

tj: Build failure in pageset_cpuup_callback() due to missing ret
    variable fixed.

Reviewed-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/mm.h     |   4 -
 include/linux/mmzone.h |  12 +--
 mm/page_alloc.c        | 202 +++++++++++++++++--------------------------------
 mm/vmstat.c            |  14 ++--
 4 files changed, 81 insertions(+), 151 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2265f28eb47a..554fa395aac9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1079,11 +1079,7 @@ extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 extern int after_bootmem;
 
-#ifdef CONFIG_NUMA
 extern void setup_per_cpu_pageset(void);
-#else
-static inline void setup_per_cpu_pageset(void) {}
-#endif
 
 extern void zone_pcp_update(struct zone *zone);
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 30fe668c2542..7874201a3556 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -184,13 +184,7 @@ struct per_cpu_pageset {
 	s8 stat_threshold;
 	s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
 #endif
-} ____cacheline_aligned_in_smp;
-
-#ifdef CONFIG_NUMA
-#define zone_pcp(__z, __cpu) ((__z)->pageset[(__cpu)])
-#else
-#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)])
-#endif
+};
 
 #endif /* !__GENERATING_BOUNDS.H */
 
@@ -306,10 +300,8 @@ struct zone {
 	 */
 	unsigned long		min_unmapped_pages;
 	unsigned long		min_slab_pages;
-	struct per_cpu_pageset	*pageset[NR_CPUS];
-#else
-	struct per_cpu_pageset	pageset[NR_CPUS];
 #endif
+	struct per_cpu_pageset	*pageset;
 	/*
 	 * free areas of different sizes
 	 */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4e9f5cc5fb59..6849e870de54 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1008,10 +1008,10 @@ static void drain_pages(unsigned int cpu)
 		struct per_cpu_pageset *pset;
 		struct per_cpu_pages *pcp;
 
-		pset = zone_pcp(zone, cpu);
+		local_irq_save(flags);
+		pset = per_cpu_ptr(zone->pageset, cpu);
 
 		pcp = &pset->pcp;
-		local_irq_save(flags);
 		free_pcppages_bulk(zone, pcp->count, pcp);
 		pcp->count = 0;
 		local_irq_restore(flags);
@@ -1095,7 +1095,6 @@ static void free_hot_cold_page(struct page *page, int cold)
 	arch_free_page(page, 0);
 	kernel_map_pages(page, 1, 0);
 
-	pcp = &zone_pcp(zone, get_cpu())->pcp;
 	migratetype = get_pageblock_migratetype(page);
 	set_page_private(page, migratetype);
 	local_irq_save(flags);
@@ -1118,6 +1117,7 @@ static void free_hot_cold_page(struct page *page, int cold)
 		migratetype = MIGRATE_MOVABLE;
 	}
 
+	pcp = &this_cpu_ptr(zone->pageset)->pcp;
 	if (cold)
 		list_add_tail(&page->lru, &pcp->lists[migratetype]);
 	else
@@ -1130,7 +1130,6 @@ static void free_hot_cold_page(struct page *page, int cold)
 
 out:
 	local_irq_restore(flags);
-	put_cpu();
 }
 
 void free_hot_page(struct page *page)
@@ -1180,17 +1179,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
 	unsigned long flags;
 	struct page *page;
 	int cold = !!(gfp_flags & __GFP_COLD);
-	int cpu;
 
 again:
-	cpu  = get_cpu();
 	if (likely(order == 0)) {
 		struct per_cpu_pages *pcp;
 		struct list_head *list;
 
-		pcp = &zone_pcp(zone, cpu)->pcp;
-		list = &pcp->lists[migratetype];
 		local_irq_save(flags);
+		pcp = &this_cpu_ptr(zone->pageset)->pcp;
+		list = &pcp->lists[migratetype];
 		if (list_empty(list)) {
 			pcp->count += rmqueue_bulk(zone, 0,
 					pcp->batch, list,
@@ -1231,7 +1228,6 @@ again:
 	__count_zone_vm_events(PGALLOC, zone, 1 << order);
 	zone_statistics(preferred_zone, zone);
 	local_irq_restore(flags);
-	put_cpu();
 
 	VM_BUG_ON(bad_range(zone, page));
 	if (prep_new_page(page, order, gfp_flags))
@@ -1240,7 +1236,6 @@ again:
 
 failed:
 	local_irq_restore(flags);
-	put_cpu();
 	return NULL;
 }
 
@@ -2179,7 +2174,7 @@ void show_free_areas(void)
 		for_each_online_cpu(cpu) {
 			struct per_cpu_pageset *pageset;
 
-			pageset = zone_pcp(zone, cpu);
+			pageset = per_cpu_ptr(zone->pageset, cpu);
 
 			printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
 			       cpu, pageset->pcp.high,
@@ -2744,10 +2739,29 @@ static void build_zonelist_cache(pg_data_t *pgdat)
 
 #endif	/* CONFIG_NUMA */
 
+/*
+ * Boot pageset table. One per cpu which is going to be used for all
+ * zones and all nodes. The parameters will be set in such a way
+ * that an item put on a list will immediately be handed over to
+ * the buddy list. This is safe since pageset manipulation is done
+ * with interrupts disabled.
+ *
+ * The boot_pagesets must be kept even after bootup is complete for
+ * unused processors and/or zones. They do play a role for bootstrapping
+ * hotplugged processors.
+ *
+ * zoneinfo_show() and maybe other functions do
+ * not check if the processor is online before following the pageset pointer.
+ * Other parts of the kernel may not check if the zone is available.
+ */
+static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch);
+static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset);
+
 /* return values int ....just for stop_machine() */
 static int __build_all_zonelists(void *dummy)
 {
 	int nid;
+	int cpu;
 
 #ifdef CONFIG_NUMA
 	memset(node_load, 0, sizeof(node_load));
@@ -2758,6 +2772,23 @@ static int __build_all_zonelists(void *dummy)
 		build_zonelists(pgdat);
 		build_zonelist_cache(pgdat);
 	}
+
+	/*
+	 * Initialize the boot_pagesets that are going to be used
+	 * for bootstrapping processors. The real pagesets for
+	 * each zone will be allocated later when the per cpu
+	 * allocator is available.
+	 *
+	 * boot_pagesets are used also for bootstrapping offline
+	 * cpus if the system is already booted because the pagesets
+	 * are needed to initialize allocators on a specific cpu too.
+	 * F.e. the percpu allocator needs the page allocator which
+	 * needs the percpu allocator in order to allocate its pagesets
+	 * (a chicken-egg dilemma).
+	 */
+	for_each_possible_cpu(cpu)
+		setup_pageset(&per_cpu(boot_pageset, cpu), 0);
+
 	return 0;
 }
 
@@ -3095,121 +3126,33 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p,
 		pcp->batch = PAGE_SHIFT * 8;
 }
 
-
-#ifdef CONFIG_NUMA
-/*
- * Boot pageset table. One per cpu which is going to be used for all
- * zones and all nodes. The parameters will be set in such a way
- * that an item put on a list will immediately be handed over to
- * the buddy list. This is safe since pageset manipulation is done
- * with interrupts disabled.
- *
- * Some NUMA counter updates may also be caught by the boot pagesets.
- *
- * The boot_pagesets must be kept even after bootup is complete for
- * unused processors and/or zones. They do play a role for bootstrapping
- * hotplugged processors.
- *
- * zoneinfo_show() and maybe other functions do
- * not check if the processor is online before following the pageset pointer.
- * Other parts of the kernel may not check if the zone is available.
- */
-static struct per_cpu_pageset boot_pageset[NR_CPUS];
-
 /*
- * Dynamically allocate memory for the
- * per cpu pageset array in struct zone.
+ * Allocate per cpu pagesets and initialize them.
+ * Before this call only boot pagesets were available.
+ * Boot pagesets will no longer be used by this processorr
+ * after setup_per_cpu_pageset().
  */
-static int __cpuinit process_zones(int cpu)
+void __init setup_per_cpu_pageset(void)
 {
-	struct zone *zone, *dzone;
-	int node = cpu_to_node(cpu);
-
-	node_set_state(node, N_CPU);	/* this node has a cpu */
+	struct zone *zone;
+	int cpu;
 
 	for_each_populated_zone(zone) {
-		zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
-					 GFP_KERNEL, node);
-		if (!zone_pcp(zone, cpu))
-			goto bad;
-
-		setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
-
-		if (percpu_pagelist_fraction)
-			setup_pagelist_highmark(zone_pcp(zone, cpu),
-			    (zone->present_pages / percpu_pagelist_fraction));
-	}
-
-	return 0;
-bad:
-	for_each_zone(dzone) {
-		if (!populated_zone(dzone))
-			continue;
-		if (dzone == zone)
-			break;
-		kfree(zone_pcp(dzone, cpu));
-		zone_pcp(dzone, cpu) = &boot_pageset[cpu];
-	}
-	return -ENOMEM;
-}
+		zone->pageset = alloc_percpu(struct per_cpu_pageset);
 
-static inline void free_zone_pagesets(int cpu)
-{
-	struct zone *zone;
-
-	for_each_zone(zone) {
-		struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
+		for_each_possible_cpu(cpu) {
+			struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);
 
-		/* Free per_cpu_pageset if it is slab allocated */
-		if (pset != &boot_pageset[cpu])
-			kfree(pset);
-		zone_pcp(zone, cpu) = &boot_pageset[cpu];
-	}
-}
+			setup_pageset(pcp, zone_batchsize(zone));
 
-static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
-		unsigned long action,
-		void *hcpu)
-{
-	int cpu = (long)hcpu;
-	int ret = NOTIFY_OK;
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		if (process_zones(cpu))
-			ret = NOTIFY_BAD;
-		break;
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		free_zone_pagesets(cpu);
-		break;
-	default:
-		break;
+			if (percpu_pagelist_fraction)
+				setup_pagelist_highmark(pcp,
+					(zone->present_pages /
+						percpu_pagelist_fraction));
+		}
 	}
-	return ret;
 }
 
-static struct notifier_block __cpuinitdata pageset_notifier =
-	{ &pageset_cpuup_callback, NULL, 0 };
-
-void __init setup_per_cpu_pageset(void)
-{
-	int err;
-
-	/* Initialize per_cpu_pageset for cpu 0.
-	 * A cpuup callback will do this for every cpu
-	 * as it comes online
-	 */
-	err = process_zones(smp_processor_id());
-	BUG_ON(err);
-	register_cpu_notifier(&pageset_notifier);
-}
-
-#endif
-
 static noinline __init_refok
 int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
 {
@@ -3263,7 +3206,7 @@ static int __zone_pcp_update(void *data)
 		struct per_cpu_pageset *pset;
 		struct per_cpu_pages *pcp;
 
-		pset = zone_pcp(zone, cpu);
+		pset = per_cpu_ptr(zone->pageset, cpu);
 		pcp = &pset->pcp;
 
 		local_irq_save(flags);
@@ -3281,21 +3224,17 @@ void zone_pcp_update(struct zone *zone)
 
 static __meminit void zone_pcp_init(struct zone *zone)
 {
-	int cpu;
-	unsigned long batch = zone_batchsize(zone);
+	/*
+	 * per cpu subsystem is not up at this point. The following code
+	 * relies on the ability of the linker to provide the
+	 * offset of a (static) per cpu variable into the per cpu area.
+	 */
+	zone->pageset = &boot_pageset;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
-#ifdef CONFIG_NUMA
-		/* Early boot. Slab allocator not functional yet */
-		zone_pcp(zone, cpu) = &boot_pageset[cpu];
-		setup_pageset(&boot_pageset[cpu],0);
-#else
-		setup_pageset(zone_pcp(zone,cpu), batch);
-#endif
-	}
 	if (zone->present_pages)
-		printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",
-			zone->name, zone->present_pages, batch);
+		printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%u\n",
+			zone->name, zone->present_pages,
+					 zone_batchsize(zone));
 }
 
 __meminit int init_currently_empty_zone(struct zone *zone,
@@ -4809,10 +4748,11 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
 	if (!write || (ret == -EINVAL))
 		return ret;
 	for_each_populated_zone(zone) {
-		for_each_online_cpu(cpu) {
+		for_each_possible_cpu(cpu) {
 			unsigned long  high;
 			high = zone->present_pages / percpu_pagelist_fraction;
-			setup_pagelist_highmark(zone_pcp(zone, cpu), high);
+			setup_pagelist_highmark(
+				per_cpu_ptr(zone->pageset, cpu), high);
 		}
 	}
 	return 0;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6051fbab67ba..1ba0bb7ad043 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -139,7 +139,8 @@ static void refresh_zone_stat_thresholds(void)
 		threshold = calculate_threshold(zone);
 
 		for_each_online_cpu(cpu)
-			zone_pcp(zone, cpu)->stat_threshold = threshold;
+			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
+							= threshold;
 	}
 }
 
@@ -149,7 +150,8 @@ static void refresh_zone_stat_thresholds(void)
 void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
 				int delta)
 {
-	struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
+	struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
+
 	s8 *p = pcp->vm_stat_diff + item;
 	long x;
 
@@ -202,7 +204,7 @@ EXPORT_SYMBOL(mod_zone_page_state);
  */
 void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
 {
-	struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
+	struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
 	s8 *p = pcp->vm_stat_diff + item;
 
 	(*p)++;
@@ -223,7 +225,7 @@ EXPORT_SYMBOL(__inc_zone_page_state);
 
 void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
 {
-	struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
+	struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset);
 	s8 *p = pcp->vm_stat_diff + item;
 
 	(*p)--;
@@ -300,7 +302,7 @@ void refresh_cpu_vm_stats(int cpu)
 	for_each_populated_zone(zone) {
 		struct per_cpu_pageset *p;
 
-		p = zone_pcp(zone, cpu);
+		p = per_cpu_ptr(zone->pageset, cpu);
 
 		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
 			if (p->vm_stat_diff[i]) {
@@ -741,7 +743,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 	for_each_online_cpu(i) {
 		struct per_cpu_pageset *pageset;
 
-		pageset = zone_pcp(zone, i);
+		pageset = per_cpu_ptr(zone->pageset, i);
 		seq_printf(m,
 			   "\n    cpu: %i"
 			   "\n              count: %i"
-- 
cgit v1.2.3


From 0ed731859e24cd6e3ec058cf2b49b2a0df80e86b Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Wed, 6 Jan 2010 09:23:54 +0900
Subject: LSM: Update comment on security_sock_rcv_skb

It is not permitted to do sleeping operation inside security_sock_rcv_skb().

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Serge Hallyn <serue@us.ibm.com>

--
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 466cbadbd1ef..3696ca345745 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -978,6 +978,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	Check permissions on incoming network packets.  This hook is distinct
  *	from Netfilter's IP input hooks since it is the first time that the
  *	incoming sk_buff @skb has been associated with a particular socket, @sk.
+ *	Must not sleep inside this hook because some callers hold spinlocks.
  *	@sk contains the sock (not socket) associated with the incoming sk_buff.
  *	@skb contains the incoming network data.
  * @socket_getpeersec_stream:
-- 
cgit v1.2.3


From c955fe8e0bdd7be7a6bc2d49245d570a816f7cc5 Mon Sep 17 00:00:00 2001
From: Alexey Starikovskiy <astarikovskiy@suse.de>
Date: Thu, 15 Oct 2009 14:31:30 +0400
Subject: POWER: Add support for cycle_count

Signed-off-by: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/power/power_supply_sysfs.c | 1 +
 include/linux/power_supply.h       | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index c790e0c77d4b..ff05e6189768 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -99,6 +99,7 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(present),
 	POWER_SUPPLY_ATTR(online),
 	POWER_SUPPLY_ATTR(technology),
+	POWER_SUPPLY_ATTR(cycle_count),
 	POWER_SUPPLY_ATTR(voltage_max),
 	POWER_SUPPLY_ATTR(voltage_min),
 	POWER_SUPPLY_ATTR(voltage_max_design),
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index b5d096d3a9be..ebd2b8fb00d0 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -82,6 +82,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_PRESENT,
 	POWER_SUPPLY_PROP_ONLINE,
 	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_CYCLE_COUNT,
 	POWER_SUPPLY_PROP_VOLTAGE_MAX,
 	POWER_SUPPLY_PROP_VOLTAGE_MIN,
 	POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
-- 
cgit v1.2.3


From d2d4e780aff2fab46a792ebc89f80d1a6872b325 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 18 Jan 2010 07:20:28 +0000
Subject: ide: add drive->pio_mode field

Add pio_mode field to ide_drive_t matching pio_mode field used in
struct ata_device.

The validity of the field is restricted to ->set_pio_mode method
only currently in IDE subsystem.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-devsets.c   | 2 ++
 drivers/ide/ide-probe.c     | 2 ++
 drivers/ide/ide-xfer-mode.c | 3 +++
 include/linux/ide.h         | 1 +
 4 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index 1099bf7cf968..cb3341ce655c 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -105,6 +105,8 @@ static int set_pio_mode(ide_drive_t *drive, int arg)
 		return -ENOSYS;
 
 	if (set_pio_mode_abuse(drive->hwif, arg)) {
+		drive->pio_mode = arg + XFER_PIO_0;
+
 		if (arg == 8 || arg == 9) {
 			unsigned long flags;
 
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 4d76ba473097..9a9f10f4cf9f 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1043,6 +1043,8 @@ static void ide_port_init_devices(ide_hwif_t *hwif)
 		if (hwif->host_flags & IDE_HFLAG_NO_UNMASK_IRQS)
 			drive->dev_flags |= IDE_DFLAG_NO_UNMASK;
 
+		drive->pio_mode = XFER_PIO_0;
+
 		if (port_ops && port_ops->init_dev)
 			port_ops->init_dev(drive);
 	}
diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c
index 46d203ce60cc..cdae463f6b41 100644
--- a/drivers/ide/ide-xfer-mode.c
+++ b/drivers/ide/ide-xfer-mode.c
@@ -135,6 +135,7 @@ int ide_set_pio_mode(ide_drive_t *drive, const u8 mode)
 	 * set transfer mode on the device in ->set_pio_mode method...
 	 */
 	if (port_ops->set_dma_mode == NULL) {
+		drive->pio_mode = mode;
 		port_ops->set_pio_mode(drive, mode - XFER_PIO_0);
 		return 0;
 	}
@@ -142,9 +143,11 @@ int ide_set_pio_mode(ide_drive_t *drive, const u8 mode)
 	if (hwif->host_flags & IDE_HFLAG_POST_SET_MODE) {
 		if (ide_config_drive_speed(drive, mode))
 			return -1;
+		drive->pio_mode = mode;
 		port_ops->set_pio_mode(drive, mode - XFER_PIO_0);
 		return 0;
 	} else {
+		drive->pio_mode = mode;
 		port_ops->set_pio_mode(drive, mode - XFER_PIO_0);
 		return ide_config_drive_speed(drive, mode);
 	}
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 0ec612959042..b5d2e9655059 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -515,6 +515,7 @@ struct ide_drive_s {
         u8	init_speed;	/* transfer rate set at boot */
         u8	current_speed;	/* current transfer rate set */
 	u8	desired_speed;	/* desired transfer rate set */
+	u8	pio_mode;	/* for ->set_pio_mode _only_ */
         u8	dn;		/* now wide spread use */
 	u8	acoustic;	/* acoustic management */
 	u8	media;		/* disk, cdrom, tape, floppy, ... */
-- 
cgit v1.2.3


From 3fccaa192b9501e79a57e02e62b6bf420d2b461e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 18 Jan 2010 07:20:35 +0000
Subject: ide: add drive->dma_mode field

Add dma_mode field to ide_drive_t matching dma_mode field used in
struct ata_device.

The validity of the field is restricted to ->dma_pio_mode method
only currently in IDE subsystem.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/aec62xx.c       | 1 +
 drivers/ide/ide-xfer-mode.c | 2 ++
 include/linux/ide.h         | 1 +
 3 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/drivers/ide/aec62xx.c b/drivers/ide/aec62xx.c
index 878f8ec6dbe1..4c869872eb9a 100644
--- a/drivers/ide/aec62xx.c
+++ b/drivers/ide/aec62xx.c
@@ -136,6 +136,7 @@ static void aec6260_set_mode(ide_drive_t *drive, const u8 speed)
 
 static void aec_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
+	drive->dma_mode = pio + XFER_PIO_0;
 	drive->hwif->port_ops->set_dma_mode(drive, pio + XFER_PIO_0);
 }
 
diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c
index cdae463f6b41..c2323869d92a 100644
--- a/drivers/ide/ide-xfer-mode.c
+++ b/drivers/ide/ide-xfer-mode.c
@@ -167,9 +167,11 @@ int ide_set_dma_mode(ide_drive_t *drive, const u8 mode)
 	if (hwif->host_flags & IDE_HFLAG_POST_SET_MODE) {
 		if (ide_config_drive_speed(drive, mode))
 			return -1;
+		drive->dma_mode = mode;
 		port_ops->set_dma_mode(drive, mode);
 		return 0;
 	} else {
+		drive->dma_mode = mode;
 		port_ops->set_dma_mode(drive, mode);
 		return ide_config_drive_speed(drive, mode);
 	}
diff --git a/include/linux/ide.h b/include/linux/ide.h
index b5d2e9655059..746ef9fdabcb 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -516,6 +516,7 @@ struct ide_drive_s {
         u8	current_speed;	/* current transfer rate set */
 	u8	desired_speed;	/* desired transfer rate set */
 	u8	pio_mode;	/* for ->set_pio_mode _only_ */
+	u8	dma_mode;	/* for ->dma_pio_mode _only_ */
         u8	dn;		/* now wide spread use */
 	u8	acoustic;	/* acoustic management */
 	u8	media;		/* disk, cdrom, tape, floppy, ... */
-- 
cgit v1.2.3


From e085b3cae85af47eb0a3eda3186bd898310fb322 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 19 Jan 2010 01:44:41 -0800
Subject: ide: change ->set_pio_mode method parameters

Change ->set_pio_mode method parameters to match ->set_piomode method
used in struct ata_port_operations.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/aec62xx.c       |  6 +++---
 drivers/ide/ali14xx.c       |  3 ++-
 drivers/ide/alim15x3.c      |  7 +++----
 drivers/ide/amd74xx.c       |  4 ++--
 drivers/ide/at91_ide.c      |  5 +++--
 drivers/ide/atiixp.c        |  7 ++++---
 drivers/ide/au1xxx-ide.c    |  5 ++---
 drivers/ide/cmd640.c        |  3 ++-
 drivers/ide/cmd64x.c        |  4 +++-
 drivers/ide/cs5520.c        |  7 ++++---
 drivers/ide/cs5530.c        |  7 ++++---
 drivers/ide/cs5535.c        |  6 +++---
 drivers/ide/cs5536.c        |  7 ++++---
 drivers/ide/cy82c693.c      |  5 ++---
 drivers/ide/dtc2278.c       |  4 ++--
 drivers/ide/hpt366.c        |  4 ++--
 drivers/ide/ht6560b.c       |  3 ++-
 drivers/ide/ide-devsets.c   |  4 ++--
 drivers/ide/ide-xfer-mode.c |  6 +++---
 drivers/ide/it8172.c        | 10 +++++-----
 drivers/ide/it8213.c        | 14 +++++++-------
 drivers/ide/it821x.c        |  6 +++---
 drivers/ide/jmicron.c       |  2 +-
 drivers/ide/opti621.c       |  6 +++---
 drivers/ide/palm_bk3710.c   |  5 +++--
 drivers/ide/pdc202xx_new.c  |  4 ++--
 drivers/ide/pdc202xx_old.c  |  4 ++--
 drivers/ide/piix.c          | 14 +++++++-------
 drivers/ide/pmac.c          |  5 ++---
 drivers/ide/qd65xx.c        | 10 ++++------
 drivers/ide/sc1200.c        |  4 ++--
 drivers/ide/scc_pata.c      |  6 +++---
 drivers/ide/serverworks.c   |  5 +++--
 drivers/ide/siimage.c       |  6 +++---
 drivers/ide/sis5513.c       |  4 ++--
 drivers/ide/sl82c105.c      |  5 +++--
 drivers/ide/slc90e66.c      | 13 +++++++------
 drivers/ide/tc86c001.c      |  4 ++--
 drivers/ide/triflex.c       |  4 ++--
 drivers/ide/tx4938ide.c     |  5 ++---
 drivers/ide/tx4939ide.c     |  4 ++--
 drivers/ide/umc8672.c       |  5 +++--
 drivers/ide/via82cxxx.c     |  6 +++---
 include/linux/ide.h         |  2 +-
 44 files changed, 129 insertions(+), 121 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/aec62xx.c b/drivers/ide/aec62xx.c
index 4c869872eb9a..3790847361c3 100644
--- a/drivers/ide/aec62xx.c
+++ b/drivers/ide/aec62xx.c
@@ -134,10 +134,10 @@ static void aec6260_set_mode(ide_drive_t *drive, const u8 speed)
 	local_irq_restore(flags);
 }
 
-static void aec_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void aec_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	drive->dma_mode = pio + XFER_PIO_0;
-	drive->hwif->port_ops->set_dma_mode(drive, pio + XFER_PIO_0);
+	drive->dma_mode = drive->pio_mode;
+	hwif->port_ops->set_dma_mode(drive, drive->dma_mode);
 }
 
 static int init_chipset_aec62xx(struct pci_dev *dev)
diff --git a/drivers/ide/ali14xx.c b/drivers/ide/ali14xx.c
index 90da1f953ed0..25b9fe3a9f8e 100644
--- a/drivers/ide/ali14xx.c
+++ b/drivers/ide/ali14xx.c
@@ -109,13 +109,14 @@ static DEFINE_SPINLOCK(ali14xx_lock);
  * This function computes timing parameters
  * and sets controller registers accordingly.
  */
-static void ali14xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void ali14xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	int driveNum;
 	int time1, time2;
 	u8 param1, param2, param3, param4;
 	unsigned long flags;
 	int bus_speed = ide_vlb_clk ? ide_vlb_clk : 50;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
 
 	/* calculate timing, according to PIO mode */
diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
index 8f03cce055fa..28cee1055f76 100644
--- a/drivers/ide/alim15x3.c
+++ b/drivers/ide/alim15x3.c
@@ -63,15 +63,14 @@ static void ali_fifo_control(ide_hwif_t *hwif, ide_drive_t *drive, int on)
 
 /**
  *	ali_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Program the controller for the given PIO mode.
  */
 
-static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void ali_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
 	unsigned long T =  1000000 / bus_speed; /* PCI clock based */
@@ -79,7 +78,7 @@ static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	u8 unit = drive->dn & 1;
 	struct ide_timing t;
 
-	ide_timing_compute(drive, XFER_PIO_0 + pio, &t, T, 1);
+	ide_timing_compute(drive, drive->pio_mode, &t, T, 1);
 
 	t.setup = clamp_val(t.setup, 1, 8) & 7;
 	t.active = clamp_val(t.active, 1, 8) & 7;
diff --git a/drivers/ide/amd74xx.c b/drivers/ide/amd74xx.c
index 108e9b676859..3eee7be7ca6f 100644
--- a/drivers/ide/amd74xx.c
+++ b/drivers/ide/amd74xx.c
@@ -108,9 +108,9 @@ static void amd_set_drive(ide_drive_t *drive, const u8 speed)
  * amd_set_pio_mode() is a callback from upper layers for PIO-only tuning.
  */
 
-static void amd_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void amd_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	amd_set_drive(drive, XFER_PIO_0 + pio);
+	amd_set_drive(drive, drive->pio_mode);
 }
 
 static void amd7409_cable_detect(struct pci_dev *dev)
diff --git a/drivers/ide/at91_ide.c b/drivers/ide/at91_ide.c
index 248219a89a68..000a78e5246c 100644
--- a/drivers/ide/at91_ide.c
+++ b/drivers/ide/at91_ide.c
@@ -172,11 +172,12 @@ static void at91_ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd,
 	leave_16bit(chipselect, mode);
 }
 
-static void at91_ide_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void at91_ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	struct ide_timing *timing;
-	u8 chipselect = drive->hwif->select_data;
+	u8 chipselect = hwif->select_data;
 	int use_iordy = 0;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	pdbg("chipselect %u pio %u\n", chipselect, pio);
 
diff --git a/drivers/ide/atiixp.c b/drivers/ide/atiixp.c
index 837322b10a4c..b6848dfb93b0 100644
--- a/drivers/ide/atiixp.c
+++ b/drivers/ide/atiixp.c
@@ -42,19 +42,20 @@ static DEFINE_SPINLOCK(atiixp_lock);
 
 /**
  *	atiixp_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Set the interface PIO mode.
  */
 
-static void atiixp_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void atiixp_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
+	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	unsigned long flags;
 	int timing_shift = (drive->dn ^ 1) * 8;
 	u32 pio_timing_data;
 	u16 pio_mode_data;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	spin_lock_irqsave(&atiixp_lock, flags);
 
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index 87cef0c440ad..c90e9b0a9f6e 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -99,12 +99,11 @@ static void au1xxx_output_data(ide_drive_t *drive, struct ide_cmd *cmd,
 }
 #endif
 
-static void au1xxx_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void au1xxx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	int mem_sttime = 0, mem_stcfg = au_readl(MEM_STCFG2);
 
-	/* set pio mode! */
-	switch(pio) {
+	switch (drive->pio_mode - XFER_PIO_0) {
 	case 0:
 		mem_sttime = SBC_IDE_TIMING(PIO0);
 
diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c
index 1a32d62ed86b..c7d46a3d347a 100644
--- a/drivers/ide/cmd640.c
+++ b/drivers/ide/cmd640.c
@@ -572,9 +572,10 @@ static void cmd640_set_mode(ide_drive_t *drive, unsigned int index,
 	program_drive_counts(drive, index);
 }
 
-static void cmd640_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cmd640_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned int index = 0, cycle_time;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	u8 b;
 
 	switch (pio) {
diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c
index 9f89f3116df0..0b11745937e7 100644
--- a/drivers/ide/cmd64x.c
+++ b/drivers/ide/cmd64x.c
@@ -127,8 +127,10 @@ static void cmd64x_program_timings(ide_drive_t *drive, u8 mode)
  * Special cases are 8: prefetch off, 9: prefetch on (both never worked)
  */
 
-static void cmd64x_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cmd64x_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
+
 	/*
 	 * Filter out the prefetch control values
 	 * to prevent PIO5 from being programmed
diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c
index 09f98ed0731f..b8094f049f3e 100644
--- a/drivers/ide/cs5520.c
+++ b/drivers/ide/cs5520.c
@@ -57,11 +57,11 @@ static struct pio_clocks cs5520_pio_clocks[]={
 	{1, 2, 1}
 };
 
-static void cs5520_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cs5520_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *pdev = to_pci_dev(hwif->dev);
 	int controller = drive->dn > 1 ? 1 : 0;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	/* 8bit CAT/CRT - 8bit command timing for channel */
 	pci_write_config_byte(pdev, 0x62 + controller, 
@@ -85,7 +85,8 @@ static void cs5520_set_dma_mode(ide_drive_t *drive, const u8 speed)
 {
 	printk(KERN_ERR "cs55x0: bad ide timing.\n");
 
-	cs5520_set_pio_mode(drive, 0);
+	drive->pio_mode = XFER_PIO_0 + 0;
+	cs5520_set_pio_mode(drive->hwif, drive);
 }
 
 static const struct ide_port_ops cs5520_port_ops = {
diff --git a/drivers/ide/cs5530.c b/drivers/ide/cs5530.c
index 40bf05eddf6e..4ced40255ad6 100644
--- a/drivers/ide/cs5530.c
+++ b/drivers/ide/cs5530.c
@@ -41,8 +41,8 @@ static unsigned int cs5530_pio_timings[2][5] = {
 
 /**
  *	cs5530_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Handles setting of PIO mode for the chipset.
  *
@@ -50,10 +50,11 @@ static unsigned int cs5530_pio_timings[2][5] = {
  *	will have valid default PIO timings set up before we get here.
  */
 
-static void cs5530_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cs5530_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	unsigned long basereg = CS5530_BASEREG(drive->hwif);
+	unsigned long basereg = CS5530_BASEREG(hwif);
 	unsigned int format = (inl(basereg + 4) >> 31) & 1;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	outl(cs5530_pio_timings[format][pio], basereg + ((drive->dn & 1)<<3));
 }
diff --git a/drivers/ide/cs5535.c b/drivers/ide/cs5535.c
index b883838adc24..7974415ea89f 100644
--- a/drivers/ide/cs5535.c
+++ b/drivers/ide/cs5535.c
@@ -142,15 +142,15 @@ static void cs5535_set_dma_mode(ide_drive_t *drive, const u8 speed)
 
 /**
  *	cs5535_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	A callback from the upper layers for PIO-only tuning.
  */
 
-static void cs5535_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cs5535_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	cs5535_set_speed(drive, XFER_PIO_0 + pio);
+	cs5535_set_speed(drive, drive->pio_mode);
 }
 
 static u8 cs5535_cable_detect(ide_hwif_t *hwif)
diff --git a/drivers/ide/cs5536.c b/drivers/ide/cs5536.c
index 9623b852c616..b518ef0e9a35 100644
--- a/drivers/ide/cs5536.c
+++ b/drivers/ide/cs5536.c
@@ -125,11 +125,11 @@ static u8 cs5536_cable_detect(ide_hwif_t *hwif)
 
 /**
  *	cs5536_set_pio_mode		-	PIO timing setup
+ *	@hwif: ATA port
  *	@drive: ATA device
- *	@pio: PIO mode number
  */
 
-static void cs5536_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cs5536_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u8 drv_timings[5] = {
 		0x98, 0x55, 0x32, 0x21, 0x20,
@@ -143,11 +143,12 @@ static void cs5536_set_pio_mode(ide_drive_t *drive, const u8 pio)
 		0x99, 0x92, 0x90, 0x22, 0x20,
 	};
 
-	struct pci_dev *pdev = to_pci_dev(drive->hwif->dev);
+	struct pci_dev *pdev = to_pci_dev(hwif->dev);
 	ide_drive_t *pair = ide_get_pair_dev(drive);
 	int cshift = (drive->dn & 1) ? IDE_CAST_D1_SHIFT : IDE_CAST_D0_SHIFT;
 	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
 	u32 cast;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	u8 cmd_pio = pio;
 
 	if (pair)
diff --git a/drivers/ide/cy82c693.c b/drivers/ide/cy82c693.c
index fbf3dcc26577..ead65c394f00 100644
--- a/drivers/ide/cy82c693.c
+++ b/drivers/ide/cy82c693.c
@@ -80,9 +80,8 @@ static void cy82c693_set_dma_mode(ide_drive_t *drive, const u8 mode)
 	outb(data, CY82_DATA_PORT);
 }
 
-static void cy82c693_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void cy82c693_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
 	const unsigned long T = 1000000 / bus_speed;
@@ -101,7 +100,7 @@ static void cy82c693_set_pio_mode(ide_drive_t *drive, const u8 pio)
 		}
 	}
 
-	ide_timing_compute(drive, XFER_PIO_0 + pio, &t, T, 1);
+	ide_timing_compute(drive, drive->pio_mode, &t, T, 1);
 
 	time_16 = clamp_val(t.recover - 1, 0, 15) |
 		  (clamp_val(t.active - 1, 0, 15) << 4);
diff --git a/drivers/ide/dtc2278.c b/drivers/ide/dtc2278.c
index c6b138122981..6929f7fce93a 100644
--- a/drivers/ide/dtc2278.c
+++ b/drivers/ide/dtc2278.c
@@ -68,11 +68,11 @@ static void sub22 (char b, char c)
 
 static DEFINE_SPINLOCK(dtc2278_lock);
 
-static void dtc2278_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void dtc2278_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned long flags;
 
-	if (pio >= 3) {
+	if (drive->pio_mode >= XFER_PIO_3) {
 		spin_lock_irqsave(&dtc2278_lock, flags);
 		/*
 		 * This enables PIO mode4 (3?) on the first interface
diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index 4d90ac2dbb1b..f1dec519a9e6 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -651,9 +651,9 @@ static void hpt3xx_set_mode(ide_drive_t *drive, const u8 speed)
 	pci_write_config_dword(dev, itr_addr, new_itr);
 }
 
-static void hpt3xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void hpt3xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	hpt3xx_set_mode(drive, XFER_PIO_0 + pio);
+	hpt3xx_set_mode(drive, drive->pio_mode);
 }
 
 static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
diff --git a/drivers/ide/ht6560b.c b/drivers/ide/ht6560b.c
index aafed8060e17..d81e49680c3f 100644
--- a/drivers/ide/ht6560b.c
+++ b/drivers/ide/ht6560b.c
@@ -279,9 +279,10 @@ static void ht_set_prefetch(ide_drive_t *drive, u8 state)
 #endif
 }
 
-static void ht6560b_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void ht6560b_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned long flags, config;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	u8 timing;
 	
 	switch (pio) {
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index cb3341ce655c..c6935c78757c 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -112,10 +112,10 @@ static int set_pio_mode(ide_drive_t *drive, int arg)
 
 			/* take lock for IDE_DFLAG_[NO_]UNMASK/[NO_]IO_32BIT */
 			spin_lock_irqsave(&hwif->lock, flags);
-			port_ops->set_pio_mode(drive, arg);
+			port_ops->set_pio_mode(hwif, drive);
 			spin_unlock_irqrestore(&hwif->lock, flags);
 		} else
-			port_ops->set_pio_mode(drive, arg);
+			port_ops->set_pio_mode(hwif, drive);
 	} else {
 		int keep_dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
 
diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c
index c2323869d92a..a62fb03fc1cc 100644
--- a/drivers/ide/ide-xfer-mode.c
+++ b/drivers/ide/ide-xfer-mode.c
@@ -136,7 +136,7 @@ int ide_set_pio_mode(ide_drive_t *drive, const u8 mode)
 	 */
 	if (port_ops->set_dma_mode == NULL) {
 		drive->pio_mode = mode;
-		port_ops->set_pio_mode(drive, mode - XFER_PIO_0);
+		port_ops->set_pio_mode(hwif, drive);
 		return 0;
 	}
 
@@ -144,11 +144,11 @@ int ide_set_pio_mode(ide_drive_t *drive, const u8 mode)
 		if (ide_config_drive_speed(drive, mode))
 			return -1;
 		drive->pio_mode = mode;
-		port_ops->set_pio_mode(drive, mode - XFER_PIO_0);
+		port_ops->set_pio_mode(hwif, drive);
 		return 0;
 	} else {
 		drive->pio_mode = mode;
-		port_ops->set_pio_mode(drive, mode - XFER_PIO_0);
+		port_ops->set_pio_mode(hwif, drive);
 		return ide_config_drive_speed(drive, mode);
 	}
 }
diff --git a/drivers/ide/it8172.c b/drivers/ide/it8172.c
index 0d266a5b524d..9dfdc8741a7b 100644
--- a/drivers/ide/it8172.c
+++ b/drivers/ide/it8172.c
@@ -37,12 +37,12 @@
 
 #define DRV_NAME "IT8172"
 
-static void it8172_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void it8172_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u16 drive_enables;
 	u32 drive_timing;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	/*
 	 * The highest value of DIOR/DIOW pulse width and recovery time
@@ -98,14 +98,14 @@ static void it8172_set_dma_mode(ide_drive_t *drive, const u8 speed)
 		pci_write_config_byte(dev, 0x4a, reg4a | u_speed);
 	} else {
 		const u8 mwdma_to_pio[] = { 0, 3, 4 };
-		u8 pio;
 
 		pci_write_config_byte(dev, 0x48, reg48 & ~u_flag);
 		pci_write_config_byte(dev, 0x4a, reg4a & ~a_speed);
 
-		pio = mwdma_to_pio[speed - XFER_MW_DMA_0];
+		drive->pio_mode =
+			mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0;
 
-		it8172_set_pio_mode(drive, pio);
+		it8172_set_pio_mode(hwif, drive);
 	}
 }
 
diff --git a/drivers/ide/it8213.c b/drivers/ide/it8213.c
index 47976167796a..492c07d5f4f3 100644
--- a/drivers/ide/it8213.c
+++ b/drivers/ide/it8213.c
@@ -17,15 +17,14 @@
 
 /**
  *	it8213_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Set the interface PIO mode.
  */
 
-static void it8213_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void it8213_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int is_slave		= drive->dn & 1;
 	int master_port		= 0x40;
@@ -35,6 +34,7 @@ static void it8213_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	u8 slave_data;
 	static DEFINE_SPINLOCK(tune_lock);
 	int control = 0;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	static const u8 timings[][2] = {
 					{ 0, 0 },
@@ -120,7 +120,6 @@ static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed)
 			pci_write_config_byte(dev, 0x54, reg54 & ~v_flag);
 	} else {
 		const u8 mwdma_to_pio[] = { 0, 3, 4 };
-		u8 pio;
 
 		if (reg48 & u_flag)
 			pci_write_config_byte(dev, 0x48, reg48 & ~u_flag);
@@ -132,11 +131,12 @@ static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed)
 			pci_write_config_byte(dev, 0x55, (u8) reg55 & ~w_flag);
 
 		if (speed >= XFER_MW_DMA_0)
-			pio = mwdma_to_pio[speed - XFER_MW_DMA_0];
+			drive->pio_mode =
+				mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0;
 		else
-			pio = 2; /* only SWDMA2 is allowed */
+			drive->pio_mode = XFER_PIO_2; /* for SWDMA2 */
 
-		it8213_set_pio_mode(drive, pio);
+		it8213_set_pio_mode(hwif, drive);
 	}
 }
 
diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c
index 51aa745246dc..69becb7b9656 100644
--- a/drivers/ide/it821x.c
+++ b/drivers/ide/it821x.c
@@ -228,18 +228,18 @@ static void it821x_clock_strategy(ide_drive_t *drive)
 
 /**
  *	it821x_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Tune the host to the desired PIO mode taking into the consideration
  *	the maximum PIO mode supported by the other device on the cable.
  */
 
-static void it821x_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void it821x_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
 	ide_drive_t *pair = ide_get_pair_dev(drive);
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	u8 unit = drive->dn & 1, set_pio = pio;
 
 	/* Spec says 89 ref driver uses 88 */
diff --git a/drivers/ide/jmicron.c b/drivers/ide/jmicron.c
index bf2be6431b20..ebffb904ed24 100644
--- a/drivers/ide/jmicron.c
+++ b/drivers/ide/jmicron.c
@@ -80,7 +80,7 @@ static u8 jmicron_cable_detect(ide_hwif_t *hwif)
 	return ATA_CBL_PATA80;
 }
 
-static void jmicron_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void jmicron_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 }
 
diff --git a/drivers/ide/opti621.c b/drivers/ide/opti621.c
index 2052788fab7a..1a53a4c375ed 100644
--- a/drivers/ide/opti621.c
+++ b/drivers/ide/opti621.c
@@ -62,12 +62,12 @@ static u8 read_reg(int reg)
 	return ret;
 }
 
-static void opti621_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void opti621_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	ide_drive_t *pair = ide_get_pair_dev(drive);
 	unsigned long flags;
-	unsigned long mode = XFER_PIO_0 + pio, pair_mode;
+	unsigned long mode = drive->pio_mode, pair_mode;
+	const u8 pio = mode - XFER_PIO_0;
 	u8 tim, misc, addr_pio = pio, clk;
 
 	/* DRDY is default 2 (by OPTi Databook) */
diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c
index f8eddf05ecb8..0f262d07c378 100644
--- a/drivers/ide/palm_bk3710.c
+++ b/drivers/ide/palm_bk3710.c
@@ -203,12 +203,13 @@ static void palm_bk3710_set_dma_mode(ide_drive_t *drive, u8 xferspeed)
 	}
 }
 
-static void palm_bk3710_set_pio_mode(ide_drive_t *drive, u8 pio)
+static void palm_bk3710_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned int cycle_time;
 	int is_slave = drive->dn & 1;
 	ide_drive_t *mate;
-	void __iomem *base = (void *)drive->hwif->dma_base;
+	void __iomem *base = (void *)hwif->dma_base;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	/*
 	 * Obtain the drive PIO data for tuning the Palm Chip registers
diff --git a/drivers/ide/pdc202xx_new.c b/drivers/ide/pdc202xx_new.c
index 65ba8239e7b5..874acd2bb6e6 100644
--- a/drivers/ide/pdc202xx_new.c
+++ b/drivers/ide/pdc202xx_new.c
@@ -167,11 +167,11 @@ static void pdcnew_set_dma_mode(ide_drive_t *drive, const u8 speed)
  	}
 }
 
-static void pdcnew_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void pdcnew_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	u8 adj = (drive->dn & 1) ? 0x08 : 0x00;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	if (max_dma_rate(dev) == 4) {
 		set_indexed_reg(hwif, 0x0c + adj, pio_timings[pio].reg0c);
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 1d20594ee420..402aab7f3baa 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -76,9 +76,9 @@ static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed)
 	}
 }
 
-static void pdc202xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void pdc202xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	pdc202xx_set_mode(drive, XFER_PIO_0 + pio);
+	pdc202xx_set_mode(drive, drive->pio_mode);
 }
 
 static int pdc202xx_test_irq(ide_hwif_t *hwif)
diff --git a/drivers/ide/piix.c b/drivers/ide/piix.c
index bf14f39bd3a7..64b3041daa60 100644
--- a/drivers/ide/piix.c
+++ b/drivers/ide/piix.c
@@ -59,15 +59,14 @@ static int no_piix_dma;
 
 /**
  *	piix_set_pio_mode	-	set host controller for PIO mode
+ *	@port: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Set the interface PIO mode based upon the settings done by AMI BIOS.
  */
 
-static void piix_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void piix_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int is_slave		= drive->dn & 1;
 	int master_port		= hwif->channel ? 0x42 : 0x40;
@@ -77,6 +76,7 @@ static void piix_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	u8 slave_data;
 	static DEFINE_SPINLOCK(tune_lock);
 	int control = 0;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 				     /* ISP  RTC */
 	static const u8 timings[][2]= {
@@ -176,7 +176,6 @@ static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed)
 			pci_write_config_byte(dev, 0x54, reg54 & ~v_flag);
 	} else {
 		const u8 mwdma_to_pio[] = { 0, 3, 4 };
-		u8 pio;
 
 		if (reg48 & u_flag)
 			pci_write_config_byte(dev, 0x48, reg48 & ~u_flag);
@@ -188,11 +187,12 @@ static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed)
 			pci_write_config_byte(dev, 0x55, (u8) reg55 & ~w_flag);
 
 		if (speed >= XFER_MW_DMA_0)
-			pio = mwdma_to_pio[speed - XFER_MW_DMA_0];
+			drive->pio_mode =
+				mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0;
 		else
-			pio = 2; /* only SWDMA2 is allowed */
+			drive->pio_mode = XFER_PIO_2; /* for SWDMA2 */
 
-		piix_set_pio_mode(drive, pio);
+		piix_set_pio_mode(hwif, drive);
 	}
 }
 
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index 7a4e788cab2f..a167968a2d42 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -496,12 +496,11 @@ static void pmac_write_devctl(ide_hwif_t *hwif, u8 ctl)
 /*
  * Old tuning functions (called on hdparm -p), sets up drive PIO timings
  */
-static void
-pmac_ide_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void pmac_ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	pmac_ide_hwif_t *pmif =
 		(pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent);
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	struct ide_timing *tim = ide_timing_find_mode(XFER_PIO_0 + pio);
 	u32 *timings, t;
 	unsigned accessTicks, recTicks;
diff --git a/drivers/ide/qd65xx.c b/drivers/ide/qd65xx.c
index 74696edc8d1d..3f0244fd8e62 100644
--- a/drivers/ide/qd65xx.c
+++ b/drivers/ide/qd65xx.c
@@ -189,15 +189,13 @@ static void qd_set_timing (ide_drive_t *drive, u8 timing)
 	printk(KERN_DEBUG "%s: %#x\n", drive->name, timing);
 }
 
-static void qd6500_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void qd6500_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	u16 *id = drive->id;
 	int active_time   = 175;
 	int recovery_time = 415; /* worst case values from the dos driver */
 
-	/*
-	 * FIXME: use "pio" value
-	 */
+	/* FIXME: use drive->pio_mode value */
 	if (!qd_find_disk_type(drive, &active_time, &recovery_time) &&
 	    (id[ATA_ID_OLD_PIO_MODES] & 0xff) && (id[ATA_ID_FIELD_VALID] & 2) &&
 	    id[ATA_ID_EIDE_PIO] >= 240) {
@@ -211,9 +209,9 @@ static void qd6500_set_pio_mode(ide_drive_t *drive, const u8 pio)
 				active_time, recovery_time));
 }
 
-static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void qd6580_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
 	unsigned int cycle_time;
 	int active_time   = 175;
diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c
index d467478d68da..bb0166e460ab 100644
--- a/drivers/ide/sc1200.c
+++ b/drivers/ide/sc1200.c
@@ -193,10 +193,10 @@ static int sc1200_dma_end(ide_drive_t *drive)
  * will have valid default PIO timings set up before we get here.
  */
 
-static void sc1200_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void sc1200_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t	*hwif = drive->hwif;
 	int		mode = -1;
+	const u8	pio = drive->pio_mode - XFER_PIO_0;
 
 	/*
 	 * bad abuse of ->set_pio_mode interface
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 1104bb301eb9..23e16e4460ee 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -199,16 +199,15 @@ scc_ide_outsl(unsigned long port, void *addr, u32 count)
 
 /**
  *	scc_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Load the timing settings for this device mode into the
  *	controller.
  */
 
-static void scc_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void scc_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct scc_ports *ports = ide_get_hwifdata(hwif);
 	unsigned long ctl_base = ports->ctl;
 	unsigned long cckctrl_port = ctl_base + 0xff0;
@@ -216,6 +215,7 @@ static void scc_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	unsigned long pioct_port = ctl_base + 0x004;
 	unsigned long reg;
 	int offset;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	reg = in_be32((void __iomem *)cckctrl_port);
 	if (reg & CCKCTRL_ATACLKOEN) {
diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c
index 657f0433ec50..a56bc51ae032 100644
--- a/drivers/ide/serverworks.c
+++ b/drivers/ide/serverworks.c
@@ -106,12 +106,13 @@ static u8 svwks_csb_check (struct pci_dev *dev)
 	return 0;
 }
 
-static void svwks_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void svwks_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u8 pio_modes[] = { 0x5d, 0x47, 0x34, 0x22, 0x20 };
 	static const u8 drive_pci[] = { 0x41, 0x40, 0x43, 0x42 };
 
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
+	struct pci_dev *dev = to_pci_dev(hwif->dev);
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	pci_write_config_byte(dev, drive_pci[drive->dn], pio_modes[pio]);
 
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index d95df528562f..97266958f744 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -229,19 +229,18 @@ static u8 sil_sata_udma_filter(ide_drive_t *drive)
 
 /**
  *	sil_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	Load the timing settings for this device mode into the
  *	controller.
  */
 
-static void sil_set_pio_mode(ide_drive_t *drive, u8 pio)
+static void sil_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u16 tf_speed[]   = { 0x328a, 0x2283, 0x1281, 0x10c3, 0x10c1 };
 	static const u16 data_speed[] = { 0x328a, 0x2283, 0x1104, 0x10c3, 0x10c1 };
 
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	ide_drive_t *pair	= ide_get_pair_dev(drive);
 	u32 speedt		= 0;
@@ -249,6 +248,7 @@ static void sil_set_pio_mode(ide_drive_t *drive, u8 pio)
 	unsigned long addr	= siimage_seldev(drive, 0x04);
 	unsigned long tfaddr	= siimage_selreg(hwif,	0x02);
 	unsigned long base	= (unsigned long)hwif->hwif_data;
+	const u8 pio		= drive->pio_mode - XFER_PIO_0;
 	u8 tf_pio		= pio;
 	u8 mmio			= (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
 	u8 addr_mask		= hwif->channel ? (mmio ? 0xF4 : 0x84)
diff --git a/drivers/ide/sis5513.c b/drivers/ide/sis5513.c
index 468706082fb5..5a0192060531 100644
--- a/drivers/ide/sis5513.c
+++ b/drivers/ide/sis5513.c
@@ -290,10 +290,10 @@ static void config_drive_art_rwp(ide_drive_t *drive)
 		pci_write_config_byte(dev, 0x4b, rw_prefetch);
 }
 
-static void sis_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void sis_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	config_drive_art_rwp(drive);
-	sis_program_timings(drive, XFER_PIO_0 + pio);
+	sis_program_timings(drive, drive->pio_mode);
 }
 
 static void sis_ata133_program_udma_timings(ide_drive_t *drive, const u8 mode)
diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c
index 3c2bbf0057ea..419cd3bc6c84 100644
--- a/drivers/ide/sl82c105.c
+++ b/drivers/ide/sl82c105.c
@@ -63,12 +63,13 @@ static unsigned int get_pio_timings(ide_drive_t *drive, u8 pio)
 /*
  * Configure the chipset for PIO mode.
  */
-static void sl82c105_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void sl82c105_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	struct pci_dev *dev	= to_pci_dev(drive->hwif->dev);
+	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	unsigned long timings	= (unsigned long)ide_get_drivedata(drive);
 	int reg			= 0x44 + drive->dn * 4;
 	u16 drv_ctrl;
+	const u8 pio		= drive->pio_mode - XFER_PIO_0;
 
 	drv_ctrl = get_pio_timings(drive, pio);
 
diff --git a/drivers/ide/slc90e66.c b/drivers/ide/slc90e66.c
index 1ccfb40e7215..019777522cd2 100644
--- a/drivers/ide/slc90e66.c
+++ b/drivers/ide/slc90e66.c
@@ -18,9 +18,8 @@
 
 static DEFINE_SPINLOCK(slc90e66_lock);
 
-static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void slc90e66_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int is_slave		= drive->dn & 1;
 	int master_port		= hwif->channel ? 0x42 : 0x40;
@@ -29,6 +28,8 @@ static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	u16 master_data;
 	u8 slave_data;
 	int control = 0;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
+
 				     /* ISP  RTC */
 	static const u8 timings[][2] = {
 					{ 0, 0 },
@@ -98,7 +99,6 @@ static void slc90e66_set_dma_mode(ide_drive_t *drive, const u8 speed)
 		}
 	} else {
 		const u8 mwdma_to_pio[] = { 0, 3, 4 };
-		u8 pio;
 
 		if (reg48 & u_flag)
 			pci_write_config_word(dev, 0x48, reg48 & ~u_flag);
@@ -106,11 +106,12 @@ static void slc90e66_set_dma_mode(ide_drive_t *drive, const u8 speed)
 			pci_write_config_word(dev, 0x4a, reg4a & ~a_speed);
 
 		if (speed >= XFER_MW_DMA_0)
-			pio = mwdma_to_pio[speed - XFER_MW_DMA_0];
+			drive->pio_mode =
+				mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0;
 		else
-			pio = 2; /* only SWDMA2 is allowed */
+			drive->pio_mode = XFER_PIO_2; /* for SWDMA2 */
 
-		slc90e66_set_pio_mode(drive, pio);
+		slc90e66_set_pio_mode(hwif, drive);
 	}
 }
 
diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c
index 05a93d6baecc..f2cb62bf3f22 100644
--- a/drivers/ide/tc86c001.c
+++ b/drivers/ide/tc86c001.c
@@ -41,9 +41,9 @@ static void tc86c001_set_mode(ide_drive_t *drive, const u8 speed)
 	outw(scr, scr_port);
 }
 
-static void tc86c001_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void tc86c001_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	tc86c001_set_mode(drive, XFER_PIO_0 + pio);
+	tc86c001_set_mode(drive, drive->pio_mode);
 }
 
 /*
diff --git a/drivers/ide/triflex.c b/drivers/ide/triflex.c
index 8773c3ba7462..d34a7eecdea5 100644
--- a/drivers/ide/triflex.c
+++ b/drivers/ide/triflex.c
@@ -82,9 +82,9 @@ static void triflex_set_mode(ide_drive_t *drive, const u8 speed)
 	pci_write_config_dword(dev, channel_offset, triflex_timings);
 }
 
-static void triflex_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void triflex_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	triflex_set_mode(drive, XFER_PIO_0 + pio);
+	triflex_set_mode(drive, drive->pio_mode);
 }
 
 static const struct ide_port_ops triflex_port_ops = {
diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c
index fd59c0d235b5..326d4683488b 100644
--- a/drivers/ide/tx4938ide.c
+++ b/drivers/ide/tx4938ide.c
@@ -56,11 +56,10 @@ static void tx4938ide_tune_ebusc(unsigned int ebus_ch,
 		     &tx4938_ebuscptr->cr[ebus_ch]);
 }
 
-static void tx4938ide_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void tx4938ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct tx4938ide_platform_info *pdata = hwif->dev->platform_data;
-	u8 safe = pio;
+	u8 safe = drive->pio_mode - XFER_PIO_0;
 	ide_drive_t *pair;
 
 	pair = ide_get_pair_dev(drive);
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 64b58ecc3f0e..5228a4786de5 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -104,11 +104,11 @@ static void tx4939ide_writeb(u8 val, void __iomem *base, u32 reg)
 
 #define TX4939IDE_BASE(hwif)	((void __iomem *)(hwif)->extra_base)
 
-static void tx4939ide_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void tx4939ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	int is_slave = drive->dn;
 	u32 mask, val;
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 	u8 safe = pio;
 	ide_drive_t *pair;
 
diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c
index 60f936e2319c..47adcd09cb26 100644
--- a/drivers/ide/umc8672.c
+++ b/drivers/ide/umc8672.c
@@ -104,10 +104,11 @@ static void umc_set_speeds(u8 speeds[])
 		speeds[0], speeds[1], speeds[2], speeds[3]);
 }
 
-static void umc_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void umc_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif, *mate = hwif->mate;
+	ide_hwif_t *mate = hwif->mate;
 	unsigned long uninitialized_var(flags);
+	const u8 pio = drive->pio_mode - XFER_PIO_0;
 
 	printk("%s: setting umc8672 to PIO mode%d (speed %d)\n",
 		drive->name, pio, pio_to_umc[pio]);
diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c
index fbecf8ea8207..6d995fc9d4f5 100644
--- a/drivers/ide/via82cxxx.c
+++ b/drivers/ide/via82cxxx.c
@@ -208,15 +208,15 @@ static void via_set_drive(ide_drive_t *drive, const u8 speed)
 
 /**
  *	via_set_pio_mode	-	set host controller for PIO mode
+ *	@hwif: port
  *	@drive: drive
- *	@pio: PIO mode number
  *
  *	A callback from the upper layers for PIO-only tuning.
  */
 
-static void via_set_pio_mode(ide_drive_t *drive, const u8 pio)
+static void via_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	via_set_drive(drive, XFER_PIO_0 + pio);
+	via_set_drive(drive, drive->pio_mode);
 }
 
 static struct via_isa_bridge *via_config_find(struct pci_dev **isa)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 746ef9fdabcb..803ec306883c 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -624,7 +624,7 @@ extern const struct ide_tp_ops default_tp_ops;
  */
 struct ide_port_ops {
 	void	(*init_dev)(ide_drive_t *);
-	void	(*set_pio_mode)(ide_drive_t *, const u8);
+	void	(*set_pio_mode)(struct hwif_s *, ide_drive_t *);
 	void	(*set_dma_mode)(ide_drive_t *, const u8);
 	int	(*reset_poll)(ide_drive_t *);
 	void	(*pre_reset)(ide_drive_t *);
-- 
cgit v1.2.3


From 8776168ca2151850164af1de5565d01f7b8b2c53 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 19 Jan 2010 01:45:29 -0800
Subject: ide: change ->set_dma_mode method parameters

Change ->set_dma_mode method parameters to match ->set_dmamode method
used in struct ata_port_operations.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/aec62xx.c       | 10 +++++-----
 drivers/ide/alim15x3.c      |  6 +++---
 drivers/ide/amd74xx.c       |  7 ++++---
 drivers/ide/atiixp.c        |  7 ++++---
 drivers/ide/au1xxx-ide.c    |  4 ++--
 drivers/ide/cmd64x.c        |  4 ++--
 drivers/ide/cs5520.c        |  4 ++--
 drivers/ide/cs5530.c        |  6 +++---
 drivers/ide/cs5535.c        |  6 +++---
 drivers/ide/cs5536.c        |  7 ++++---
 drivers/ide/cy82c693.c      |  4 ++--
 drivers/ide/hpt366.c        |  7 ++++---
 drivers/ide/icside.c        |  3 ++-
 drivers/ide/ide-xfer-mode.c |  4 ++--
 drivers/ide/it8172.c        |  4 ++--
 drivers/ide/it8213.c        |  6 +++---
 drivers/ide/it821x.c        |  6 ++++--
 drivers/ide/jmicron.c       |  4 ++--
 drivers/ide/palm_bk3710.c   |  5 +++--
 drivers/ide/pdc202xx_new.c  |  4 ++--
 drivers/ide/pdc202xx_old.c  |  7 ++++---
 drivers/ide/piix.c          |  6 +++---
 drivers/ide/pmac.c          |  4 ++--
 drivers/ide/sc1200.c        |  4 ++--
 drivers/ide/scc_pata.c      |  6 +++---
 drivers/ide/serverworks.c   |  4 ++--
 drivers/ide/sgiioc4.c       |  2 +-
 drivers/ide/siimage.c       |  6 +++---
 drivers/ide/sis5513.c       |  4 +++-
 drivers/ide/sl82c105.c      |  3 ++-
 drivers/ide/slc90e66.c      |  4 ++--
 drivers/ide/tc86c001.c      |  7 ++++---
 drivers/ide/triflex.c       |  8 ++++----
 drivers/ide/tx4939ide.c     |  4 ++--
 drivers/ide/via82cxxx.c     |  9 +++++----
 include/linux/ide.h         |  2 +-
 36 files changed, 101 insertions(+), 87 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/aec62xx.c b/drivers/ide/aec62xx.c
index 3790847361c3..57d00caefc86 100644
--- a/drivers/ide/aec62xx.c
+++ b/drivers/ide/aec62xx.c
@@ -81,15 +81,15 @@ static u8 pci_bus_clock_list_ultra (u8 speed, struct chipset_bus_clock_list_entr
 	return chipset_table->ultra_settings;
 }
 
-static void aec6210_set_mode(ide_drive_t *drive, const u8 speed)
+static void aec6210_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct ide_host *host	= pci_get_drvdata(dev);
 	struct chipset_bus_clock_list_entry *bus_clock = host->host_priv;
 	u16 d_conf		= 0;
 	u8 ultra = 0, ultra_conf = 0;
 	u8 tmp0 = 0, tmp1 = 0, tmp2 = 0;
+	const u8 speed = drive->dma_mode;
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -109,15 +109,15 @@ static void aec6210_set_mode(ide_drive_t *drive, const u8 speed)
 	local_irq_restore(flags);
 }
 
-static void aec6260_set_mode(ide_drive_t *drive, const u8 speed)
+static void aec6260_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct ide_host *host	= pci_get_drvdata(dev);
 	struct chipset_bus_clock_list_entry *bus_clock = host->host_priv;
 	u8 unit			= drive->dn & 1;
 	u8 tmp1 = 0, tmp2 = 0;
 	u8 ultra = 0, drive_conf = 0, ultra_conf = 0;
+	const u8 speed = drive->dma_mode;
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -137,7 +137,7 @@ static void aec6260_set_mode(ide_drive_t *drive, const u8 speed)
 static void aec_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	drive->dma_mode = drive->pio_mode;
-	hwif->port_ops->set_dma_mode(drive, drive->dma_mode);
+	hwif->port_ops->set_dma_mode(hwif, drive);
 }
 
 static int init_chipset_aec62xx(struct pci_dev *dev)
diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
index 28cee1055f76..6f0debae4e27 100644
--- a/drivers/ide/alim15x3.c
+++ b/drivers/ide/alim15x3.c
@@ -121,16 +121,16 @@ static u8 ali_udma_filter(ide_drive_t *drive)
 
 /**
  *	ali_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Configure the hardware for the desired IDE transfer mode.
  */
 
-static void ali_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void ali_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
+	const u8 speed		= drive->dma_mode;
 	u8 speed1		= speed;
 	u8 unit			= drive->dn & 1;
 	u8 tmpbyte		= 0x00;
diff --git a/drivers/ide/amd74xx.c b/drivers/ide/amd74xx.c
index 3eee7be7ca6f..b7e105338205 100644
--- a/drivers/ide/amd74xx.c
+++ b/drivers/ide/amd74xx.c
@@ -79,14 +79,14 @@ static void amd_set_speed(struct pci_dev *dev, u8 dn, u8 udma_mask,
  * to a desired transfer mode.  It also can be called by upper layers.
  */
 
-static void amd_set_drive(ide_drive_t *drive, const u8 speed)
+static void amd_set_drive(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	ide_drive_t *peer = ide_get_pair_dev(drive);
 	struct ide_timing t, p;
 	int T, UT;
 	u8 udma_mask = hwif->ultra_mask;
+	const u8 speed = drive->dma_mode;
 
 	T = 1000000000 / amd_clock;
 	UT = (udma_mask == ATA_UDMA2) ? T : (T / 2);
@@ -110,7 +110,8 @@ static void amd_set_drive(ide_drive_t *drive, const u8 speed)
 
 static void amd_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	amd_set_drive(drive, drive->pio_mode);
+	drive->dma_mode = drive->pio_mode;
+	amd_set_drive(hwif, drive);
 }
 
 static void amd7409_cable_detect(struct pci_dev *dev)
diff --git a/drivers/ide/atiixp.c b/drivers/ide/atiixp.c
index b6848dfb93b0..15f0ead89f5c 100644
--- a/drivers/ide/atiixp.c
+++ b/drivers/ide/atiixp.c
@@ -75,21 +75,22 @@ static void atiixp_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 
 /**
  *	atiixp_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Set a ATIIXP host controller to the desired DMA mode.  This involves
  *	programming the right timing data into the PCI configuration space.
  */
 
-static void atiixp_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void atiixp_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
+	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	unsigned long flags;
 	int timing_shift = (drive->dn ^ 1) * 8;
 	u32 tmp32;
 	u16 tmp16;
 	u16 udma_ctl = 0;
+	const u8 speed = drive->dma_mode;
 
 	spin_lock_irqsave(&atiixp_lock, flags);
 
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index c90e9b0a9f6e..e2fd378ba9de 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -160,11 +160,11 @@ static void au1xxx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 	au_writel(mem_stcfg,MEM_STCFG2);
 }
 
-static void auide_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void auide_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	int mem_sttime = 0, mem_stcfg = au_readl(MEM_STCFG2);
 
-	switch(speed) {
+	switch (drive->dma_mode) {
 #ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
 	case XFER_MW_DMA_2:
 		mem_sttime = SBC_IDE_TIMING(MDMA2);
diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c
index 0b11745937e7..a65a69171250 100644
--- a/drivers/ide/cmd64x.c
+++ b/drivers/ide/cmd64x.c
@@ -141,12 +141,12 @@ static void cmd64x_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 	cmd64x_program_timings(drive, XFER_PIO_0 + pio);
 }
 
-static void cmd64x_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void cmd64x_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 unit			= drive->dn & 0x01;
 	u8 regU = 0, pciU	= hwif->channel ? UDIDETCR1 : UDIDETCR0;
+	const u8 speed		= drive->dma_mode;
 
 	pci_read_config_byte(dev, pciU, &regU);
 	regU &= ~(unit ? 0xCA : 0x35);
diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c
index b8094f049f3e..2c1e5f7cd261 100644
--- a/drivers/ide/cs5520.c
+++ b/drivers/ide/cs5520.c
@@ -81,12 +81,12 @@ static void cs5520_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 		(cs5520_pio_clocks[pio].assert));
 }
 
-static void cs5520_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void cs5520_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	printk(KERN_ERR "cs55x0: bad ide timing.\n");
 
 	drive->pio_mode = XFER_PIO_0 + 0;
-	cs5520_set_pio_mode(drive->hwif, drive);
+	cs5520_set_pio_mode(hwif, drive);
 }
 
 static const struct ide_port_ops cs5520_port_ops = {
diff --git a/drivers/ide/cs5530.c b/drivers/ide/cs5530.c
index 4ced40255ad6..4dc4eb92b076 100644
--- a/drivers/ide/cs5530.c
+++ b/drivers/ide/cs5530.c
@@ -100,12 +100,12 @@ out:
 	return mask;
 }
 
-static void cs5530_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void cs5530_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned long basereg;
 	unsigned int reg, timings = 0;
 
-	switch (mode) {
+	switch (drive->dma_mode) {
 		case XFER_UDMA_0:	timings = 0x00921250; break;
 		case XFER_UDMA_1:	timings = 0x00911140; break;
 		case XFER_UDMA_2:	timings = 0x00911030; break;
@@ -113,7 +113,7 @@ static void cs5530_set_dma_mode(ide_drive_t *drive, const u8 mode)
 		case XFER_MW_DMA_1:	timings = 0x00012121; break;
 		case XFER_MW_DMA_2:	timings = 0x00002020; break;
 	}
-	basereg = CS5530_BASEREG(drive->hwif);
+	basereg = CS5530_BASEREG(hwif);
 	reg = inl(basereg + 4);			/* get drive0 config register */
 	timings |= reg & 0x80000000;		/* preserve PIO format bit */
 	if ((drive-> dn & 1) == 0) {		/* are we configuring drive0? */
diff --git a/drivers/ide/cs5535.c b/drivers/ide/cs5535.c
index 7974415ea89f..740002b2f3e8 100644
--- a/drivers/ide/cs5535.c
+++ b/drivers/ide/cs5535.c
@@ -129,15 +129,15 @@ static void cs5535_set_speed(ide_drive_t *drive, const u8 speed)
 
 /**
  *	cs5535_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Programs the chipset for DMA mode.
  */
 
-static void cs5535_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void cs5535_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	cs5535_set_speed(drive, speed);
+	cs5535_set_speed(drive, drive->dma_mode);
 }
 
 /**
diff --git a/drivers/ide/cs5536.c b/drivers/ide/cs5536.c
index b518ef0e9a35..70871fbc3c0a 100644
--- a/drivers/ide/cs5536.c
+++ b/drivers/ide/cs5536.c
@@ -173,11 +173,11 @@ static void cs5536_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 
 /**
  *	cs5536_set_dma_mode		-	DMA timing setup
+ *	@hwif: ATA port
  *	@drive: ATA device
- *	@mode: DMA mode
  */
 
-static void cs5536_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void cs5536_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u8 udma_timings[6] = {
 		0xc2, 0xc1, 0xc0, 0xc4, 0xc5, 0xc6,
@@ -187,10 +187,11 @@ static void cs5536_set_dma_mode(ide_drive_t *drive, const u8 mode)
 		0x67, 0x21, 0x20,
 	};
 
-	struct pci_dev *pdev = to_pci_dev(drive->hwif->dev);
+	struct pci_dev *pdev = to_pci_dev(hwif->dev);
 	int dshift = (drive->dn & 1) ? IDE_D1_SHIFT : IDE_D0_SHIFT;
 	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
 	u32 etc;
+	const u8 mode = drive->dma_mode;
 
 	cs5536_read(pdev, ETC, &etc);
 
diff --git a/drivers/ide/cy82c693.c b/drivers/ide/cy82c693.c
index ead65c394f00..9383f67deae1 100644
--- a/drivers/ide/cy82c693.c
+++ b/drivers/ide/cy82c693.c
@@ -53,9 +53,9 @@
  * set DMA mode a specific channel for CY82C693
  */
 
-static void cy82c693_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void cy82c693_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
+	const u8 mode = drive->dma_mode;
 	u8 single = (mode & 0x10) >> 4, index = 0, data = 0;
 
 	index = hwif->channel ? CY82_INDEX_CHANNEL1 : CY82_INDEX_CHANNEL0;
diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index f1dec519a9e6..b885c1d548f5 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -627,14 +627,14 @@ static u32 get_speed_setting(u8 speed, struct hpt_info *info)
 	return info->timings->clock_table[info->clock][i];
 }
 
-static void hpt3xx_set_mode(ide_drive_t *drive, const u8 speed)
+static void hpt3xx_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 	struct hpt_timings *t	= info->timings;
 	u8  itr_addr		= 0x40 + (drive->dn * 4);
 	u32 old_itr		= 0;
+	const u8 speed		= drive->dma_mode;
 	u32 new_itr		= get_speed_setting(speed, info);
 	u32 itr_mask		= speed < XFER_MW_DMA_0 ? t->pio_mask :
 				 (speed < XFER_UDMA_0   ? t->dma_mask :
@@ -653,7 +653,8 @@ static void hpt3xx_set_mode(ide_drive_t *drive, const u8 speed)
 
 static void hpt3xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	hpt3xx_set_mode(drive, drive->pio_mode);
+	drive->dma_mode = drive->pio_mode;
+	hpt3xx_set_mode(hwif, drive);
 }
 
 static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 0f67f1abbbd3..26b6c0a1f772 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -185,10 +185,11 @@ static const expansioncard_ops_t icside_ops_arcin_v6 = {
  *	MW1	80	50	50	150	C
  *	MW2	70	25	25	120	C
  */
-static void icside_set_dma_mode(ide_drive_t *drive, const u8 xfer_mode)
+static void icside_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	unsigned long cycle_time;
 	int use_dma_info = 0;
+	const u8 xfer_mode = drive->dma_mode;
 
 	switch (xfer_mode) {
 	case XFER_MW_DMA_2:
diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c
index a62fb03fc1cc..9b549e4d1848 100644
--- a/drivers/ide/ide-xfer-mode.c
+++ b/drivers/ide/ide-xfer-mode.c
@@ -168,11 +168,11 @@ int ide_set_dma_mode(ide_drive_t *drive, const u8 mode)
 		if (ide_config_drive_speed(drive, mode))
 			return -1;
 		drive->dma_mode = mode;
-		port_ops->set_dma_mode(drive, mode);
+		port_ops->set_dma_mode(hwif, drive);
 		return 0;
 	} else {
 		drive->dma_mode = mode;
-		port_ops->set_dma_mode(drive, mode);
+		port_ops->set_dma_mode(hwif, drive);
 		return ide_config_drive_speed(drive, mode);
 	}
 }
diff --git a/drivers/ide/it8172.c b/drivers/ide/it8172.c
index 9dfdc8741a7b..560e66d07659 100644
--- a/drivers/ide/it8172.c
+++ b/drivers/ide/it8172.c
@@ -77,14 +77,14 @@ static void it8172_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 	pci_write_config_dword(dev, 0x44, drive_timing);
 }
 
-static void it8172_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void it8172_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int a_speed		= 3 << (drive->dn * 4);
 	int u_flag		= 1 << drive->dn;
 	int u_speed		= 0;
 	u8 reg48, reg4a;
+	const u8 speed		= drive->dma_mode;
 
 	pci_read_config_byte(dev, 0x48, &reg48);
 	pci_read_config_byte(dev, 0x4a, &reg4a);
diff --git a/drivers/ide/it8213.c b/drivers/ide/it8213.c
index 492c07d5f4f3..46816ba26416 100644
--- a/drivers/ide/it8213.c
+++ b/drivers/ide/it8213.c
@@ -74,15 +74,14 @@ static void it8213_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 
 /**
  *	it8213_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Tune the ITE chipset for the DMA mode.
  */
 
-static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void it8213_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 maslave		= 0x40;
 	int a_speed		= 3 << (drive->dn * 4);
@@ -92,6 +91,7 @@ static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed)
 	int u_speed		= 0;
 	u16			reg4042, reg4a;
 	u8			reg48, reg54, reg55;
+	const u8 speed		= drive->dma_mode;
 
 	pci_read_config_word(dev, maslave, &reg4042);
 	pci_read_config_byte(dev, 0x48, &reg48);
diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c
index 69becb7b9656..56b79194156b 100644
--- a/drivers/ide/it821x.c
+++ b/drivers/ide/it821x.c
@@ -393,14 +393,16 @@ static int it821x_dma_end(ide_drive_t *drive)
 
 /**
  *	it821x_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Tune the ITE chipset for the desired DMA mode.
  */
 
-static void it821x_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void it821x_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
+	const u8 speed = drive->dma_mode;
+
 	/*
 	 * MWDMA tuning is really hard because our MWDMA and PIO
 	 * timings are kept in the same place.  We can switch in the
diff --git a/drivers/ide/jmicron.c b/drivers/ide/jmicron.c
index ebffb904ed24..74c2c4a6d909 100644
--- a/drivers/ide/jmicron.c
+++ b/drivers/ide/jmicron.c
@@ -86,13 +86,13 @@ static void jmicron_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 
 /**
  *	jmicron_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@mode: DMA mode
  *
  *	As the JMicron snoops for timings we don't need to do anything here.
  */
 
-static void jmicron_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void jmicron_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 }
 
diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c
index 0f262d07c378..35448c91b8c8 100644
--- a/drivers/ide/palm_bk3710.c
+++ b/drivers/ide/palm_bk3710.c
@@ -188,10 +188,11 @@ static void palm_bk3710_setpiomode(void __iomem *base, ide_drive_t *mate,
 	writel(val32, base + BK3710_REGRCVR);
 }
 
-static void palm_bk3710_set_dma_mode(ide_drive_t *drive, u8 xferspeed)
+static void palm_bk3710_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	int is_slave = drive->dn & 1;
-	void __iomem *base = (void *)drive->hwif->dma_base;
+	void __iomem *base = (void *)hwif->dma_base;
+	const u8 xferspeed = drive->dma_mode;
 
 	if (xferspeed >= XFER_UDMA_0) {
 		palm_bk3710_setudmamode(base, is_slave,
diff --git a/drivers/ide/pdc202xx_new.c b/drivers/ide/pdc202xx_new.c
index 874acd2bb6e6..9546fe2a93f7 100644
--- a/drivers/ide/pdc202xx_new.c
+++ b/drivers/ide/pdc202xx_new.c
@@ -129,11 +129,11 @@ static struct udma_timing {
 	{ 0x1a, 0x01, 0xcb },	/* UDMA mode 6 */
 };
 
-static void pdcnew_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void pdcnew_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 adj			= (drive->dn & 1) ? 0x08 : 0x00;
+	const u8 speed		= drive->dma_mode;
 
 	/*
 	 * IDE core issues SETFEATURES_XFER to the drive first (thanks to
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 402aab7f3baa..07cd37516ba6 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -21,11 +21,11 @@
 
 #define DRV_NAME "pdc202xx_old"
 
-static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed)
+static void pdc202xx_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 drive_pci		= 0x60 + (drive->dn << 2);
+	const u8 speed		= drive->dma_mode;
 
 	u8			AP = 0, BP = 0, CP = 0;
 	u8			TA = 0, TB = 0, TC = 0;
@@ -78,7 +78,8 @@ static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed)
 
 static void pdc202xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	pdc202xx_set_mode(drive, drive->pio_mode);
+	drive->dma_mode = drive->pio_mode;
+	pdc202xx_set_mode(hwif, drive);
 }
 
 static int pdc202xx_test_irq(ide_hwif_t *hwif)
diff --git a/drivers/ide/piix.c b/drivers/ide/piix.c
index 64b3041daa60..1bdca49e5a03 100644
--- a/drivers/ide/piix.c
+++ b/drivers/ide/piix.c
@@ -127,16 +127,15 @@ static void piix_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 
 /**
  *	piix_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Set a PIIX host controller to the desired DMA mode.  This involves
  *	programming the right timing data into the PCI configuration space.
  */
 
-static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void piix_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 maslave		= hwif->channel ? 0x42 : 0x40;
 	int a_speed		= 3 << (drive->dn * 4);
@@ -147,6 +146,7 @@ static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed)
 	int			sitre;
 	u16			reg4042, reg4a;
 	u8			reg48, reg54, reg55;
+	const u8 speed		= drive->dma_mode;
 
 	pci_read_config_word(dev, maslave, &reg4042);
 	sitre = (reg4042 & 0x4000) ? 1 : 0;
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index a167968a2d42..9fae1fb1468b 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -777,14 +777,14 @@ set_timings_mdma(ide_drive_t *drive, int intf_type, u32 *timings, u32 *timings2,
 #endif	
 }
 
-static void pmac_ide_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void pmac_ide_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	pmac_ide_hwif_t *pmif =
 		(pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent);
 	int ret = 0;
 	u32 *timings, *timings2, tl[2];
 	u8 unit = drive->dn & 1;
+	const u8 speed = drive->dma_mode;
 
 	timings = &pmif->timings[unit];
 	timings2 = &pmif->timings[unit+2];
diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c
index bb0166e460ab..134f1fd13866 100644
--- a/drivers/ide/sc1200.c
+++ b/drivers/ide/sc1200.c
@@ -122,13 +122,13 @@ out:
 	return mask;
 }
 
-static void sc1200_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void sc1200_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t		*hwif = drive->hwif;
 	struct pci_dev		*dev = to_pci_dev(hwif->dev);
 	unsigned int		reg, timings;
 	unsigned short		pci_clock;
 	unsigned int		basereg = hwif->channel ? 0x50 : 0x40;
+	const u8		mode = drive->dma_mode;
 
 	static const u32 udma_timing[3][3] = {
 		{ 0x00921250, 0x00911140, 0x00911030 },
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 23e16e4460ee..e9d4b441d1c3 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -231,16 +231,15 @@ static void scc_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 
 /**
  *	scc_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Load the timing settings for this device mode into the
  *	controller.
  */
 
-static void scc_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void scc_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct scc_ports *ports = ide_get_hwifdata(hwif);
 	unsigned long ctl_base = ports->ctl;
 	unsigned long cckctrl_port = ctl_base + 0xff0;
@@ -254,6 +253,7 @@ static void scc_set_dma_mode(ide_drive_t *drive, const u8 speed)
 	int offset, idx;
 	unsigned long reg;
 	unsigned long jcactsel;
+	const u8 speed = drive->dma_mode;
 
 	reg = in_be32((void __iomem *)cckctrl_port);
 	if (reg & CCKCTRL_ATACLKOEN) {
diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c
index a56bc51ae032..35fb8dabb55d 100644
--- a/drivers/ide/serverworks.c
+++ b/drivers/ide/serverworks.c
@@ -128,14 +128,14 @@ static void svwks_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 	}
 }
 
-static void svwks_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void svwks_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u8 udma_modes[]		= { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05 };
 	static const u8 dma_modes[]		= { 0x77, 0x21, 0x20 };
 	static const u8 drive_pci2[]		= { 0x45, 0x44, 0x47, 0x46 };
 
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
+	const u8 speed		= drive->dma_mode;
 	u8 unit			= drive->dn & 1;
 
 	u8 ultra_enable	 = 0, ultra_timing = 0, dma_timing = 0;
diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c
index b7d61dc64096..e3ea591f66d3 100644
--- a/drivers/ide/sgiioc4.c
+++ b/drivers/ide/sgiioc4.c
@@ -255,7 +255,7 @@ static int sgiioc4_dma_end(ide_drive_t *drive)
 	return dma_stat;
 }
 
-static void sgiioc4_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void sgiioc4_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 }
 
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index 97266958f744..2009ac2ff658 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -289,19 +289,18 @@ static void sil_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 
 /**
  *	sil_set_dma_mode	-	set host controller for DMA mode
+ *	@hwif: port
  *	@drive: drive
- *	@speed: DMA mode
  *
  *	Tune the SiI chipset for the desired DMA mode.
  */
 
-static void sil_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void sil_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static const u8 ultra6[] = { 0x0F, 0x0B, 0x07, 0x05, 0x03, 0x02, 0x01 };
 	static const u8 ultra5[] = { 0x0C, 0x07, 0x05, 0x04, 0x02, 0x01 };
 	static const u16 dma[]	 = { 0x2208, 0x10C2, 0x10C1 };
 
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	unsigned long base	= (unsigned long)hwif->hwif_data;
 	u16 ultra = 0, multi	= 0;
@@ -311,6 +310,7 @@ static void sil_set_dma_mode(ide_drive_t *drive, const u8 speed)
 						: (mmio ? 0xB4 : 0x80);
 	unsigned long ma	= siimage_seldev(drive, 0x08);
 	unsigned long ua	= siimage_seldev(drive, 0x0C);
+	const u8 speed		= drive->dma_mode;
 
 	scsc  = sil_ioread8 (dev, base + (mmio ? 0x4A : 0x8A));
 	mode  = sil_ioread8 (dev, base + addr_mask);
diff --git a/drivers/ide/sis5513.c b/drivers/ide/sis5513.c
index 5a0192060531..db7f4e761dbc 100644
--- a/drivers/ide/sis5513.c
+++ b/drivers/ide/sis5513.c
@@ -340,8 +340,10 @@ static void sis_program_udma_timings(ide_drive_t *drive, const u8 mode)
 		sis_ata33_program_udma_timings(drive, mode);
 }
 
-static void sis_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void sis_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
+	const u8 speed = drive->dma_mode;
+
 	if (speed >= XFER_UDMA_0)
 		sis_program_udma_timings(drive, speed);
 	else
diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c
index 419cd3bc6c84..f21dc2ad7682 100644
--- a/drivers/ide/sl82c105.c
+++ b/drivers/ide/sl82c105.c
@@ -92,11 +92,12 @@ static void sl82c105_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 /*
  * Configure the chipset for DMA mode.
  */
-static void sl82c105_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void sl82c105_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
 	static u16 mwdma_timings[] = {0x0707, 0x0201, 0x0200};
 	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
 	u16 drv_ctrl;
+	const u8 speed = drive->dma_mode;
 
 	drv_ctrl = mwdma_timings[speed - XFER_MW_DMA_0];
 
diff --git a/drivers/ide/slc90e66.c b/drivers/ide/slc90e66.c
index 019777522cd2..864ffe0e26d9 100644
--- a/drivers/ide/slc90e66.c
+++ b/drivers/ide/slc90e66.c
@@ -72,14 +72,14 @@ static void slc90e66_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 	spin_unlock_irqrestore(&slc90e66_lock, flags);
 }
 
-static void slc90e66_set_dma_mode(ide_drive_t *drive, const u8 speed)
+static void slc90e66_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 maslave		= hwif->channel ? 0x42 : 0x40;
 	int sitre = 0, a_speed	= 7 << (drive->dn * 4);
 	int u_speed = 0, u_flag = 1 << drive->dn;
 	u16			reg4042, reg44, reg48, reg4a;
+	const u8 speed		= drive->dma_mode;
 
 	pci_read_config_word(dev, maslave, &reg4042);
 	sitre = (reg4042 & 0x4000) ? 1 : 0;
diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c
index f2cb62bf3f22..e444d24934b3 100644
--- a/drivers/ide/tc86c001.c
+++ b/drivers/ide/tc86c001.c
@@ -13,11 +13,11 @@
 
 #define DRV_NAME "tc86c001"
 
-static void tc86c001_set_mode(ide_drive_t *drive, const u8 speed)
+static void tc86c001_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long scr_port	= hwif->config_data + (drive->dn ? 0x02 : 0x00);
 	u16 mode, scr		= inw(scr_port);
+	const u8 speed		= drive->dma_mode;
 
 	switch (speed) {
 	case XFER_UDMA_4:	mode = 0x00c0; break;
@@ -43,7 +43,8 @@ static void tc86c001_set_mode(ide_drive_t *drive, const u8 speed)
 
 static void tc86c001_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	tc86c001_set_mode(drive, drive->pio_mode);
+	drive->dma_mode = drive->pio_mode;
+	tc86c001_set_mode(hwif, drive);
 }
 
 /*
diff --git a/drivers/ide/triflex.c b/drivers/ide/triflex.c
index d34a7eecdea5..7953447eae0f 100644
--- a/drivers/ide/triflex.c
+++ b/drivers/ide/triflex.c
@@ -34,9 +34,8 @@
 
 #define DRV_NAME "triflex"
 
-static void triflex_set_mode(ide_drive_t *drive, const u8 speed)
+static void triflex_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	u32 triflex_timings = 0;
 	u16 timing = 0;
@@ -44,7 +43,7 @@ static void triflex_set_mode(ide_drive_t *drive, const u8 speed)
 
 	pci_read_config_dword(dev, channel_offset, &triflex_timings);
 
-	switch(speed) {
+	switch (drive->dma_mode) {
 		case XFER_MW_DMA_2:
 			timing = 0x0103; 
 			break;
@@ -84,7 +83,8 @@ static void triflex_set_mode(ide_drive_t *drive, const u8 speed)
 
 static void triflex_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	triflex_set_mode(drive, drive->pio_mode);
+	drive->dma_mode = drive->pio_mode;
+	triflex_set_mode(hwif, drive);
 }
 
 static const struct ide_port_ops triflex_port_ops = {
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 5228a4786de5..f210633a3d57 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -125,10 +125,10 @@ static void tx4939ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 	/* tx4939ide_tf_load_fixup() will set the Sys_Ctl register */
 }
 
-static void tx4939ide_set_dma_mode(ide_drive_t *drive, const u8 mode)
+static void tx4939ide_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	u32 mask, val;
+	const u8 mode = drive->dma_mode;
 
 	/* Update Data Transfer Mode for this drive. */
 	if (mode >= XFER_UDMA_0)
diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c
index 6d995fc9d4f5..6769fe252b07 100644
--- a/drivers/ide/via82cxxx.c
+++ b/drivers/ide/via82cxxx.c
@@ -169,22 +169,22 @@ static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing)
 
 /**
  *	via_set_drive		-	configure transfer mode
+ *	@hwif: port
  *	@drive: Drive to set up
- *	@speed: desired speed
  *
  *	via_set_drive() computes timing values configures the chipset to
  *	a desired transfer mode.  It also can be called by upper layers.
  */
 
-static void via_set_drive(ide_drive_t *drive, const u8 speed)
+static void via_set_drive(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = drive->hwif;
 	ide_drive_t *peer = ide_get_pair_dev(drive);
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	struct ide_host *host = pci_get_drvdata(dev);
 	struct via82cxxx_dev *vdev = host->host_priv;
 	struct ide_timing t, p;
 	unsigned int T, UT;
+	const u8 speed = drive->dma_mode;
 
 	T = 1000000000 / via_clock;
 
@@ -216,7 +216,8 @@ static void via_set_drive(ide_drive_t *drive, const u8 speed)
 
 static void via_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
 {
-	via_set_drive(drive, drive->pio_mode);
+	drive->dma_mode = drive->pio_mode;
+	via_set_drive(hwif, drive);
 }
 
 static struct via_isa_bridge *via_config_find(struct pci_dev **isa)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 803ec306883c..53ecdba82d72 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -625,7 +625,7 @@ extern const struct ide_tp_ops default_tp_ops;
 struct ide_port_ops {
 	void	(*init_dev)(ide_drive_t *);
 	void	(*set_pio_mode)(struct hwif_s *, ide_drive_t *);
-	void	(*set_dma_mode)(ide_drive_t *, const u8);
+	void	(*set_dma_mode)(struct hwif_s *, ide_drive_t *);
 	int	(*reset_poll)(ide_drive_t *);
 	void	(*pre_reset)(ide_drive_t *);
 	void	(*resetproc)(ide_drive_t *);
-- 
cgit v1.2.3


From 220c58bc6d1198c4c4e69a385d364602c38b6b1c Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 18 Jan 2010 07:22:38 +0000
Subject: ide: make ide_get_best_pio_mode() static

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-xfer-mode.c | 3 +--
 include/linux/ide.h         | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c
index 9b549e4d1848..5fc8d5c17de9 100644
--- a/drivers/ide/ide-xfer-mode.c
+++ b/drivers/ide/ide-xfer-mode.c
@@ -58,7 +58,7 @@ EXPORT_SYMBOL(ide_xfer_verbose);
  *	This is used by most chipset support modules when "auto-tuning".
  */
 
-u8 ide_get_best_pio_mode(ide_drive_t *drive, u8 mode_wanted, u8 max_mode)
+static u8 ide_get_best_pio_mode(ide_drive_t *drive, u8 mode_wanted, u8 max_mode)
 {
 	u16 *id = drive->id;
 	int pio_mode = -1, overridden = 0;
@@ -105,7 +105,6 @@ u8 ide_get_best_pio_mode(ide_drive_t *drive, u8 mode_wanted, u8 max_mode)
 
 	return pio_mode;
 }
-EXPORT_SYMBOL_GPL(ide_get_best_pio_mode);
 
 int ide_pio_need_iordy(ide_drive_t *drive, const u8 pio)
 {
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 53ecdba82d72..97e6ab435184 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1496,7 +1496,6 @@ int ide_timing_compute(ide_drive_t *, u8, struct ide_timing *, int, int);
 #ifdef CONFIG_IDE_XFER_MODE
 int ide_scan_pio_blacklist(char *);
 const char *ide_xfer_verbose(u8);
-u8 ide_get_best_pio_mode(ide_drive_t *, u8, u8);
 int ide_pio_need_iordy(ide_drive_t *, const u8);
 int ide_set_pio_mode(ide_drive_t *, u8);
 int ide_set_dma_mode(ide_drive_t *, u8);
-- 
cgit v1.2.3


From 002345925e6c45861f60db6f4fc6236713fd8847 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees.cook@canonical.com>
Date: Wed, 3 Feb 2010 15:36:43 -0800
Subject: syslog: distinguish between /proc/kmsg and syscalls

This allows the LSM to distinguish between syslog functions originating
from /proc/kmsg access and direct syscalls.  By default, the commoncaps
will now no longer require CAP_SYS_ADMIN to read an opened /proc/kmsg
file descriptor.  For example the kernel syslog reader can now drop
privileges after opening /proc/kmsg, instead of staying privileged with
CAP_SYS_ADMIN.  MAC systems that implement security_syslog have unchanged
behavior.

Signed-off-by: Kees Cook <kees.cook@canonical.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: John Johansen <john.johansen@canonical.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/proc/kmsg.c             | 14 +++++++-------
 include/linux/security.h   | 11 ++++++-----
 include/linux/syslog.h     | 29 +++++++++++++++++++++++++++++
 kernel/printk.c            |  7 ++++---
 security/commoncap.c       |  7 ++++++-
 security/security.c        |  4 ++--
 security/selinux/hooks.c   |  5 +++--
 security/smack/smack_lsm.c |  4 ++--
 8 files changed, 59 insertions(+), 22 deletions(-)
 create mode 100644 include/linux/syslog.h

(limited to 'include')

diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index 7ca78346d3f0..6a3d843a1088 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -12,37 +12,37 @@
 #include <linux/poll.h>
 #include <linux/proc_fs.h>
 #include <linux/fs.h>
+#include <linux/syslog.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
 extern wait_queue_head_t log_wait;
 
-extern int do_syslog(int type, char __user *bug, int count);
-
 static int kmsg_open(struct inode * inode, struct file * file)
 {
-	return do_syslog(1,NULL,0);
+	return do_syslog(1, NULL, 0, SYSLOG_FROM_FILE);
 }
 
 static int kmsg_release(struct inode * inode, struct file * file)
 {
-	(void) do_syslog(0,NULL,0);
+	(void) do_syslog(0, NULL, 0, SYSLOG_FROM_FILE);
 	return 0;
 }
 
 static ssize_t kmsg_read(struct file *file, char __user *buf,
 			 size_t count, loff_t *ppos)
 {
-	if ((file->f_flags & O_NONBLOCK) && !do_syslog(9, NULL, 0))
+	if ((file->f_flags & O_NONBLOCK) &&
+	    !do_syslog(9, NULL, 0, SYSLOG_FROM_FILE))
 		return -EAGAIN;
-	return do_syslog(2, buf, count);
+	return do_syslog(2, buf, count, SYSLOG_FROM_FILE);
 }
 
 static unsigned int kmsg_poll(struct file *file, poll_table *wait)
 {
 	poll_wait(file, &log_wait, wait);
-	if (do_syslog(9, NULL, 0))
+	if (do_syslog(9, NULL, 0, SYSLOG_FROM_FILE))
 		return POLLIN | POLLRDNORM;
 	return 0;
 }
diff --git a/include/linux/security.h b/include/linux/security.h
index 26eca85b2417..a4dc74d86ac6 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -76,7 +76,7 @@ extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 extern int cap_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp);
 extern int cap_task_setioprio(struct task_struct *p, int ioprio);
 extern int cap_task_setnice(struct task_struct *p, int nice);
-extern int cap_syslog(int type);
+extern int cap_syslog(int type, bool from_file);
 extern int cap_vm_enough_memory(struct mm_struct *mm, long pages);
 
 struct msghdr;
@@ -1349,6 +1349,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	logging to the console.
  *	See the syslog(2) manual page for an explanation of the @type values.
  *	@type contains the type of action.
+ *	@from_file indicates the context of action (if it came from /proc).
  *	Return 0 if permission is granted.
  * @settime:
  *	Check permission to change the system time.
@@ -1463,7 +1464,7 @@ struct security_operations {
 	int (*sysctl) (struct ctl_table *table, int op);
 	int (*quotactl) (int cmds, int type, int id, struct super_block *sb);
 	int (*quota_on) (struct dentry *dentry);
-	int (*syslog) (int type);
+	int (*syslog) (int type, bool from_file);
 	int (*settime) (struct timespec *ts, struct timezone *tz);
 	int (*vm_enough_memory) (struct mm_struct *mm, long pages);
 
@@ -1762,7 +1763,7 @@ int security_acct(struct file *file);
 int security_sysctl(struct ctl_table *table, int op);
 int security_quotactl(int cmds, int type, int id, struct super_block *sb);
 int security_quota_on(struct dentry *dentry);
-int security_syslog(int type);
+int security_syslog(int type, bool from_file);
 int security_settime(struct timespec *ts, struct timezone *tz);
 int security_vm_enough_memory(long pages);
 int security_vm_enough_memory_mm(struct mm_struct *mm, long pages);
@@ -2008,9 +2009,9 @@ static inline int security_quota_on(struct dentry *dentry)
 	return 0;
 }
 
-static inline int security_syslog(int type)
+static inline int security_syslog(int type, bool from_file)
 {
-	return cap_syslog(type);
+	return cap_syslog(type, from_file);
 }
 
 static inline int security_settime(struct timespec *ts, struct timezone *tz)
diff --git a/include/linux/syslog.h b/include/linux/syslog.h
new file mode 100644
index 000000000000..5f02b1817be1
--- /dev/null
+++ b/include/linux/syslog.h
@@ -0,0 +1,29 @@
+/*  Syslog internals
+ *
+ *  Copyright 2010 Canonical, Ltd.
+ *  Author: Kees Cook <kees.cook@canonical.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _LINUX_SYSLOG_H
+#define _LINUX_SYSLOG_H
+
+#define SYSLOG_FROM_CALL 0
+#define SYSLOG_FROM_FILE 1
+
+int do_syslog(int type, char __user *buf, int count, bool from_file);
+
+#endif /* _LINUX_SYSLOG_H */
diff --git a/kernel/printk.c b/kernel/printk.c
index 17463ca2e229..809cf9a258a0 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -35,6 +35,7 @@
 #include <linux/kexec.h>
 #include <linux/ratelimit.h>
 #include <linux/kmsg_dump.h>
+#include <linux/syslog.h>
 
 #include <asm/uaccess.h>
 
@@ -273,14 +274,14 @@ static inline void boot_delay_msec(void)
  *	9 -- Return number of unread characters in the log buffer
  *     10 -- Return size of the log buffer
  */
-int do_syslog(int type, char __user *buf, int len)
+int do_syslog(int type, char __user *buf, int len, bool from_file)
 {
 	unsigned i, j, limit, count;
 	int do_clear = 0;
 	char c;
 	int error = 0;
 
-	error = security_syslog(type);
+	error = security_syslog(type, from_file);
 	if (error)
 		return error;
 
@@ -417,7 +418,7 @@ out:
 
 SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
 {
-	return do_syslog(type, buf, len);
+	return do_syslog(type, buf, len, SYSLOG_FROM_CALL);
 }
 
 /*
diff --git a/security/commoncap.c b/security/commoncap.c
index f800fdb3de94..677fad9d5cba 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -27,6 +27,7 @@
 #include <linux/sched.h>
 #include <linux/prctl.h>
 #include <linux/securebits.h>
+#include <linux/syslog.h>
 
 /*
  * If a non-root user executes a setuid-root binary in
@@ -888,12 +889,16 @@ error:
 /**
  * cap_syslog - Determine whether syslog function is permitted
  * @type: Function requested
+ * @from_file: Whether this request came from an open file (i.e. /proc)
  *
  * Determine whether the current process is permitted to use a particular
  * syslog function, returning 0 if permission is granted, -ve if not.
  */
-int cap_syslog(int type)
+int cap_syslog(int type, bool from_file)
 {
+	/* /proc/kmsg can open be opened by CAP_SYS_ADMIN */
+	if (type != 1 && from_file)
+		return 0;
 	if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	return 0;
diff --git a/security/security.c b/security/security.c
index 440afe5eb54c..971092c06f31 100644
--- a/security/security.c
+++ b/security/security.c
@@ -203,9 +203,9 @@ int security_quota_on(struct dentry *dentry)
 	return security_ops->quota_on(dentry);
 }
 
-int security_syslog(int type)
+int security_syslog(int type, bool from_file)
 {
-	return security_ops->syslog(type);
+	return security_ops->syslog(type, from_file);
 }
 
 int security_settime(struct timespec *ts, struct timezone *tz)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 9a2ee845e9d4..a4862a0730fa 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -76,6 +76,7 @@
 #include <linux/selinux.h>
 #include <linux/mutex.h>
 #include <linux/posix-timers.h>
+#include <linux/syslog.h>
 
 #include "avc.h"
 #include "objsec.h"
@@ -2049,11 +2050,11 @@ static int selinux_quota_on(struct dentry *dentry)
 	return dentry_has_perm(cred, NULL, dentry, FILE__QUOTAON);
 }
 
-static int selinux_syslog(int type)
+static int selinux_syslog(int type, bool from_file)
 {
 	int rc;
 
-	rc = cap_syslog(type);
+	rc = cap_syslog(type, from_file);
 	if (rc)
 		return rc;
 
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 529c9ca65878..a5721b373f53 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -157,12 +157,12 @@ static int smack_ptrace_traceme(struct task_struct *ptp)
  *
  * Returns 0 on success, error code otherwise.
  */
-static int smack_syslog(int type)
+static int smack_syslog(int type, bool from_file)
 {
 	int rc;
 	char *sp = current_security();
 
-	rc = cap_syslog(type);
+	rc = cap_syslog(type, from_file);
 	if (rc != 0)
 		return rc;
 
-- 
cgit v1.2.3


From d78ca3cd733d8a2c3dcd88471beb1a15d973eed8 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees.cook@canonical.com>
Date: Wed, 3 Feb 2010 15:37:13 -0800
Subject: syslog: use defined constants instead of raw numbers

Right now the syslog "type" action are just raw numbers which makes
the source difficult to follow.  This patch replaces the raw numbers
with defined constants for some level of sanity.

Signed-off-by: Kees Cook <kees.cook@canonical.com>
Acked-by: John Johansen <john.johansen@canonical.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/proc/kmsg.c           | 10 +++++-----
 include/linux/syslog.h   | 23 +++++++++++++++++++++++
 kernel/printk.c          | 45 +++++++++++++++++++--------------------------
 security/commoncap.c     |  5 +++--
 security/selinux/hooks.c | 21 +++++++++++----------
 5 files changed, 61 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index 6a3d843a1088..cfe90a48a6e8 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -21,12 +21,12 @@ extern wait_queue_head_t log_wait;
 
 static int kmsg_open(struct inode * inode, struct file * file)
 {
-	return do_syslog(1, NULL, 0, SYSLOG_FROM_FILE);
+	return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_FILE);
 }
 
 static int kmsg_release(struct inode * inode, struct file * file)
 {
-	(void) do_syslog(0, NULL, 0, SYSLOG_FROM_FILE);
+	(void) do_syslog(SYSLOG_ACTION_CLOSE, NULL, 0, SYSLOG_FROM_FILE);
 	return 0;
 }
 
@@ -34,15 +34,15 @@ static ssize_t kmsg_read(struct file *file, char __user *buf,
 			 size_t count, loff_t *ppos)
 {
 	if ((file->f_flags & O_NONBLOCK) &&
-	    !do_syslog(9, NULL, 0, SYSLOG_FROM_FILE))
+	    !do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE))
 		return -EAGAIN;
-	return do_syslog(2, buf, count, SYSLOG_FROM_FILE);
+	return do_syslog(SYSLOG_ACTION_READ, buf, count, SYSLOG_FROM_FILE);
 }
 
 static unsigned int kmsg_poll(struct file *file, poll_table *wait)
 {
 	poll_wait(file, &log_wait, wait);
-	if (do_syslog(9, NULL, 0, SYSLOG_FROM_FILE))
+	if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE))
 		return POLLIN | POLLRDNORM;
 	return 0;
 }
diff --git a/include/linux/syslog.h b/include/linux/syslog.h
index 5f02b1817be1..38911391a139 100644
--- a/include/linux/syslog.h
+++ b/include/linux/syslog.h
@@ -21,6 +21,29 @@
 #ifndef _LINUX_SYSLOG_H
 #define _LINUX_SYSLOG_H
 
+/* Close the log.  Currently a NOP. */
+#define SYSLOG_ACTION_CLOSE          0
+/* Open the log. Currently a NOP. */
+#define SYSLOG_ACTION_OPEN           1
+/* Read from the log. */
+#define SYSLOG_ACTION_READ           2
+/* Read all messages remaining in the ring buffer. */
+#define SYSLOG_ACTION_READ_ALL       3
+/* Read and clear all messages remaining in the ring buffer */
+#define SYSLOG_ACTION_READ_CLEAR     4
+/* Clear ring buffer. */
+#define SYSLOG_ACTION_CLEAR          5
+/* Disable printk's to console */
+#define SYSLOG_ACTION_CONSOLE_OFF    6
+/* Enable printk's to console */
+#define SYSLOG_ACTION_CONSOLE_ON     7
+/* Set level of messages printed to console */
+#define SYSLOG_ACTION_CONSOLE_LEVEL  8
+/* Return number of unread characters in the log buffer */
+#define SYSLOG_ACTION_SIZE_UNREAD    9
+/* Return size of the log buffer */
+#define SYSLOG_ACTION_SIZE_BUFFER   10
+
 #define SYSLOG_FROM_CALL 0
 #define SYSLOG_FROM_FILE 1
 
diff --git a/kernel/printk.c b/kernel/printk.c
index 809cf9a258a0..3e162d867098 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -259,21 +259,6 @@ static inline void boot_delay_msec(void)
 }
 #endif
 
-/*
- * Commands to do_syslog:
- *
- * 	0 -- Close the log.  Currently a NOP.
- * 	1 -- Open the log. Currently a NOP.
- * 	2 -- Read from the log.
- * 	3 -- Read all messages remaining in the ring buffer.
- * 	4 -- Read and clear all messages remaining in the ring buffer
- * 	5 -- Clear ring buffer.
- * 	6 -- Disable printk's to console
- * 	7 -- Enable printk's to console
- *	8 -- Set level of messages printed to console
- *	9 -- Return number of unread characters in the log buffer
- *     10 -- Return size of the log buffer
- */
 int do_syslog(int type, char __user *buf, int len, bool from_file)
 {
 	unsigned i, j, limit, count;
@@ -286,11 +271,11 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
 		return error;
 
 	switch (type) {
-	case 0:		/* Close log */
+	case SYSLOG_ACTION_CLOSE:	/* Close log */
 		break;
-	case 1:		/* Open log */
+	case SYSLOG_ACTION_OPEN:	/* Open log */
 		break;
-	case 2:		/* Read from log */
+	case SYSLOG_ACTION_READ:	/* Read from log */
 		error = -EINVAL;
 		if (!buf || len < 0)
 			goto out;
@@ -321,10 +306,12 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
 		if (!error)
 			error = i;
 		break;
-	case 4:		/* Read/clear last kernel messages */
+	/* Read/clear last kernel messages */
+	case SYSLOG_ACTION_READ_CLEAR:
 		do_clear = 1;
 		/* FALL THRU */
-	case 3:		/* Read last kernel messages */
+	/* Read last kernel messages */
+	case SYSLOG_ACTION_READ_ALL:
 		error = -EINVAL;
 		if (!buf || len < 0)
 			goto out;
@@ -377,21 +364,25 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
 			}
 		}
 		break;
-	case 5:		/* Clear ring buffer */
+	/* Clear ring buffer */
+	case SYSLOG_ACTION_CLEAR:
 		logged_chars = 0;
 		break;
-	case 6:		/* Disable logging to console */
+	/* Disable logging to console */
+	case SYSLOG_ACTION_CONSOLE_OFF:
 		if (saved_console_loglevel == -1)
 			saved_console_loglevel = console_loglevel;
 		console_loglevel = minimum_console_loglevel;
 		break;
-	case 7:		/* Enable logging to console */
+	/* Enable logging to console */
+	case SYSLOG_ACTION_CONSOLE_ON:
 		if (saved_console_loglevel != -1) {
 			console_loglevel = saved_console_loglevel;
 			saved_console_loglevel = -1;
 		}
 		break;
-	case 8:		/* Set level of messages printed to console */
+	/* Set level of messages printed to console */
+	case SYSLOG_ACTION_CONSOLE_LEVEL:
 		error = -EINVAL;
 		if (len < 1 || len > 8)
 			goto out;
@@ -402,10 +393,12 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
 		saved_console_loglevel = -1;
 		error = 0;
 		break;
-	case 9:		/* Number of chars in the log buffer */
+	/* Number of chars in the log buffer */
+	case SYSLOG_ACTION_SIZE_UNREAD:
 		error = log_end - log_start;
 		break;
-	case 10:	/* Size of the log buffer */
+	/* Size of the log buffer */
+	case SYSLOG_ACTION_SIZE_BUFFER:
 		error = log_buf_len;
 		break;
 	default:
diff --git a/security/commoncap.c b/security/commoncap.c
index 677fad9d5cba..cf01b2eebb60 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -897,9 +897,10 @@ error:
 int cap_syslog(int type, bool from_file)
 {
 	/* /proc/kmsg can open be opened by CAP_SYS_ADMIN */
-	if (type != 1 && from_file)
+	if (type != SYSLOG_ACTION_OPEN && from_file)
 		return 0;
-	if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
+	if ((type != SYSLOG_ACTION_READ_ALL &&
+	     type != SYSLOG_ACTION_SIZE_BUFFER) && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	return 0;
 }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index a4862a0730fa..6b36ce2eef2e 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2059,20 +2059,21 @@ static int selinux_syslog(int type, bool from_file)
 		return rc;
 
 	switch (type) {
-	case 3:		/* Read last kernel messages */
-	case 10:	/* Return size of the log buffer */
+	case SYSLOG_ACTION_READ_ALL:	/* Read last kernel messages */
+	case SYSLOG_ACTION_SIZE_BUFFER:	/* Return size of the log buffer */
 		rc = task_has_system(current, SYSTEM__SYSLOG_READ);
 		break;
-	case 6:		/* Disable logging to console */
-	case 7:		/* Enable logging to console */
-	case 8:		/* Set level of messages printed to console */
+	case SYSLOG_ACTION_CONSOLE_OFF:	/* Disable logging to console */
+	case SYSLOG_ACTION_CONSOLE_ON:	/* Enable logging to console */
+	/* Set level of messages printed to console */
+	case SYSLOG_ACTION_CONSOLE_LEVEL:
 		rc = task_has_system(current, SYSTEM__SYSLOG_CONSOLE);
 		break;
-	case 0:		/* Close log */
-	case 1:		/* Open log */
-	case 2:		/* Read from log */
-	case 4:		/* Read/clear last kernel messages */
-	case 5:		/* Clear ring buffer */
+	case SYSLOG_ACTION_CLOSE:	/* Close log */
+	case SYSLOG_ACTION_OPEN:	/* Open log */
+	case SYSLOG_ACTION_READ:	/* Read from log */
+	case SYSLOG_ACTION_READ_CLEAR:	/* Read/clear last kernel messages */
+	case SYSLOG_ACTION_CLEAR:	/* Clear ring buffer */
 	default:
 		rc = task_has_system(current, SYSTEM__SYSLOG_MOD);
 		break;
-- 
cgit v1.2.3


From c41b20e721ea4f6f20f66a66e7f0c3c97a2ca9c2 Mon Sep 17 00:00:00 2001
From: Adam Buchbinder <adam.buchbinder@gmail.com>
Date: Fri, 11 Dec 2009 16:35:39 -0500
Subject: Fix misspellings of "truly" in comments.

Some comments misspell "truly"; this fixes them. No code changes.

Signed-off-by: Adam Buchbinder <adam.buchbinder@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/char/random.c                 | 2 +-
 drivers/gpu/drm/radeon/radeon_state.c | 2 +-
 drivers/net/cs89x0.c                  | 2 +-
 fs/dlm/member.c                       | 2 +-
 fs/namei.c                            | 2 +-
 include/math-emu/op-common.h          | 2 +-
 kernel/trace/ring_buffer.c            | 4 ++--
 7 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 2849713d2231..2fd3d39995d5 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1191,7 +1191,7 @@ const struct file_operations urandom_fops = {
 void generate_random_uuid(unsigned char uuid_out[16])
 {
 	get_random_bytes(uuid_out, 16);
-	/* Set UUID version to 4 --- truely random generation */
+	/* Set UUID version to 4 --- truly random generation */
 	uuid_out[6] = (uuid_out[6] & 0x0F) | 0x40;
 	/* Set the UUID variant to DCE */
 	uuid_out[8] = (uuid_out[8] & 0x3F) | 0x80;
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index 067167cb39ca..1982a87386a1 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -1065,7 +1065,7 @@ static void radeon_cp_dispatch_clear(struct drm_device * dev,
 					/* judging by the first tile offset needed, could possibly
 					   directly address/clear 4x4 tiles instead of 8x2 * 4x4
 					   macro tiles, though would still need clear mask for
-					   right/bottom if truely 4x4 granularity is desired ? */
+					   right/bottom if truly 4x4 granularity is desired ? */
 					OUT_RING(tileoffset * 16);
 					/* the number of tiles to clear */
 					OUT_RING(nrtilesx + 1);
diff --git a/drivers/net/cs89x0.c b/drivers/net/cs89x0.c
index 0e79cef95c0a..9b5bbc6ea2fa 100644
--- a/drivers/net/cs89x0.c
+++ b/drivers/net/cs89x0.c
@@ -580,7 +580,7 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular)
 	}
 
 #ifdef CONFIG_SH_HICOSH4
-	/* truely reset the chip */
+	/* truly reset the chip */
 	writeword(ioaddr, ADD_PORT, 0x0114);
 	writeword(ioaddr, DATA_PORT, 0x0040);
 #endif
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 84f70bfb0baf..b12532e553f8 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -312,7 +312,7 @@ int dlm_ls_stop(struct dlm_ls *ls)
 	/*
 	 * This in_recovery lock does two things:
 	 * 1) Keeps this function from returning until all threads are out
-	 *    of locking routines and locking is truely stopped.
+	 *    of locking routines and locking is truly stopped.
 	 * 2) Keeps any new requests from being processed until it's unlocked
 	 *    when recovery is complete.
 	 */
diff --git a/fs/namei.c b/fs/namei.c
index 94a5e60779f9..e05c243105a0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2556,7 +2556,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
  *	e) conversion from fhandle to dentry may come in the wrong moment - when
  *	   we are removing the target. Solution: we will have to grab ->i_mutex
  *	   in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
- *	   ->i_mutex on parents, which works but leads to some truely excessive
+ *	   ->i_mutex on parents, which works but leads to some truly excessive
  *	   locking].
  */
 static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
diff --git a/include/math-emu/op-common.h b/include/math-emu/op-common.h
index f456534dcaf9..fd882261225e 100644
--- a/include/math-emu/op-common.h
+++ b/include/math-emu/op-common.h
@@ -29,7 +29,7 @@
   _FP_FRAC_DECL_##wc(X)
 
 /*
- * Finish truely unpacking a native fp value by classifying the kind
+ * Finish truly unpacking a native fp value by classifying the kind
  * of fp value and normalizing both the exponent and the fraction.
  */
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8c1b2d290718..9ab578f1bb65 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2541,7 +2541,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
  * @buffer: The ring buffer to enable writes
  *
  * Note, multiple disables will need the same number of enables
- * to truely enable the writing (much like preempt_disable).
+ * to truly enable the writing (much like preempt_disable).
  */
 void ring_buffer_record_enable(struct ring_buffer *buffer)
 {
@@ -2577,7 +2577,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
  * @cpu: The CPU to enable.
  *
  * Note, multiple disables will need the same number of enables
- * to truely enable the writing (much like preempt_disable).
+ * to truly enable the writing (much like preempt_disable).
  */
 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
 {
-- 
cgit v1.2.3


From 2a61aa401638529cd4231f6106980d307fba98fa Mon Sep 17 00:00:00 2001
From: Adam Buchbinder <adam.buchbinder@gmail.com>
Date: Fri, 11 Dec 2009 16:35:40 -0500
Subject: Fix misspellings of "invocation" in comments.

Some comments misspell "invocation"; this fixes them. No code
changes.

Signed-off-by: Adam Buchbinder <adam.buchbinder@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/buffer.c            | 2 +-
 fs/mpage.c             | 2 +-
 include/linux/mmzone.h | 2 +-
 kernel/sched_cpupri.c  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/buffer.c b/fs/buffer.c
index 6fa530256bfd..1d920bab5e70 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2893,7 +2893,7 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block,
 
 	/*
 	 * The page straddles i_size.  It must be zeroed out on each and every
-	 * writepage invokation because it may be mmapped.  "A file is mapped
+	 * writepage invocation because it may be mmapped.  "A file is mapped
 	 * in multiples of the page size.  For a file that is not a multiple of
 	 * the  page size, the remaining memory is zeroed when mapped, and
 	 * writes to that region are not written out to the file."
diff --git a/fs/mpage.c b/fs/mpage.c
index 42381bd6543b..598d54e200eb 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -561,7 +561,7 @@ page_is_mapped:
 	if (page->index >= end_index) {
 		/*
 		 * The page straddles i_size.  It must be zeroed out on each
-		 * and every writepage invokation because it may be mmapped.
+		 * and every writepage invocation because it may be mmapped.
 		 * "A file is mapped in multiples of the page size.  For a file
 		 * that is not a multiple of the page size, the remaining memory
 		 * is zeroed when mapped, and writes to that region are not
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 30fe668c2542..e60a340fe890 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -349,7 +349,7 @@ struct zone {
 	 * prev_priority holds the scanning priority for this zone.  It is
 	 * defined as the scanning priority at which we achieved our reclaim
 	 * target at the previous try_to_free_pages() or balance_pgdat()
-	 * invokation.
+	 * invocation.
 	 *
 	 * We use prev_priority as a measure of how much stress page reclaim is
 	 * under - it drives the swappiness decision: whether to unmap mapped
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 597b33099dfa..3db4b1a0e921 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -58,7 +58,7 @@ static int convert_prio(int prio)
  * @lowest_mask: A mask to fill in with selected CPUs (or NULL)
  *
  * Note: This function returns the recommended CPUs as calculated during the
- * current invokation.  By the time the call returns, the CPUs may have in
+ * current invocation.  By the time the call returns, the CPUs may have in
  * fact changed priorities any number of times.  While not ideal, it is not
  * an issue of correctness since the normal rebalancer logic will correct
  * any discrepancies created by racing against the uncertainty of the current
-- 
cgit v1.2.3


From 947af2943576400628bba085eaa6b85143526133 Mon Sep 17 00:00:00 2001
From: Stefan Weil <weil@mail.berlios.de>
Date: Thu, 7 Jan 2010 00:03:52 +0100
Subject: Fix spelling of 'platform' in comments and doc

Replace platfrom -> platform.

This is a frequent spelling bug.

Signed-off-by: Stefan Weil <weil@mail.berlios.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/driver-model/platform.txt       | 2 +-
 arch/arm/mach-davinci/include/mach/i2c.h      | 2 +-
 arch/arm/mach-s3c2410/include/mach/spi-gpio.h | 2 +-
 arch/arm/plat-s3c/include/plat/nand.h         | 2 +-
 arch/blackfin/include/asm/nand.h              | 4 ++--
 arch/powerpc/kernel/legacy_serial.c           | 2 +-
 drivers/mtd/maps/plat-ram.c                   | 2 +-
 drivers/net/cxgb3/sge.c                       | 4 ++--
 drivers/video/mbx/mbxfb.c                     | 2 +-
 include/linux/dm9000.h                        | 2 +-
 10 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/Documentation/driver-model/platform.txt b/Documentation/driver-model/platform.txt
index 2e2c2ea90ceb..41f41632ee55 100644
--- a/Documentation/driver-model/platform.txt
+++ b/Documentation/driver-model/platform.txt
@@ -192,7 +192,7 @@ command line. This will execute all matching early_param() callbacks.
 User specified early platform devices will be registered at this point.
 For the early serial console case the user can specify port on the
 kernel command line as "earlyprintk=serial.0" where "earlyprintk" is
-the class string, "serial" is the name of the platfrom driver and
+the class string, "serial" is the name of the platform driver and
 0 is the platform device id. If the id is -1 then the dot and the
 id can be omitted.
 
diff --git a/arch/arm/mach-davinci/include/mach/i2c.h b/arch/arm/mach-davinci/include/mach/i2c.h
index c248e9b7e825..44bdea13cc8c 100644
--- a/arch/arm/mach-davinci/include/mach/i2c.h
+++ b/arch/arm/mach-davinci/include/mach/i2c.h
@@ -1,5 +1,5 @@
 /*
- * DaVinci I2C controller platfrom_device info
+ * DaVinci I2C controller platform_device info
  *
  * Author: Vladimir Barinov, MontaVista Software, Inc. <source@mvista.com>
  *
diff --git a/arch/arm/mach-s3c2410/include/mach/spi-gpio.h b/arch/arm/mach-s3c2410/include/mach/spi-gpio.h
index 980a099e209c..dcef2287cb38 100644
--- a/arch/arm/mach-s3c2410/include/mach/spi-gpio.h
+++ b/arch/arm/mach-s3c2410/include/mach/spi-gpio.h
@@ -3,7 +3,7 @@
  * Copyright (c) 2006 Simtec Electronics
  *	Ben Dooks <ben@simtec.co.uk>
  *
- * S3C2410 - SPI Controller platfrom_device info
+ * S3C2410 - SPI Controller platform_device info
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/arch/arm/plat-s3c/include/plat/nand.h b/arch/arm/plat-s3c/include/plat/nand.h
index 226147b7e026..b64115fa93a4 100644
--- a/arch/arm/plat-s3c/include/plat/nand.h
+++ b/arch/arm/plat-s3c/include/plat/nand.h
@@ -3,7 +3,7 @@
  * Copyright (c) 2004 Simtec Electronics
  *	Ben Dooks <ben@simtec.co.uk>
  *
- * S3C2410 - NAND device controller platfrom_device info
+ * S3C2410 - NAND device controller platform_device info
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/arch/blackfin/include/asm/nand.h b/arch/blackfin/include/asm/nand.h
index 3ae8b569edfc..3a1e79dfc8d9 100644
--- a/arch/blackfin/include/asm/nand.h
+++ b/arch/blackfin/include/asm/nand.h
@@ -1,5 +1,5 @@
 /*
- * BF5XX - NAND flash controller platfrom_device info
+ * BF5XX - NAND flash controller platform_device info
  *
  * Copyright 2007-2008 Analog Devices, Inc.
  *
@@ -8,7 +8,7 @@
 
 /* struct bf5xx_nand_platform
  *
- * define a interface between platfrom board specific code and
+ * define a interface between platform board specific code and
  * bf54x NFC driver.
  *
  * nr_partitions = number of partitions pointed to be partitoons (or zero)
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 9ddfaef1a184..035ada5443ee 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -469,7 +469,7 @@ static int __init serial_dev_init(void)
 		return -ENODEV;
 
 	/*
-	 * Before we register the platfrom serial devices, we need
+	 * Before we register the platform serial devices, we need
 	 * to fixup their interrupts and their IO ports.
 	 */
 	DBG("Fixing serial ports interrupts and IO ports ...\n");
diff --git a/drivers/mtd/maps/plat-ram.c b/drivers/mtd/maps/plat-ram.c
index dafb91944e70..76a76be5a7bd 100644
--- a/drivers/mtd/maps/plat-ram.c
+++ b/drivers/mtd/maps/plat-ram.c
@@ -4,7 +4,7 @@
  *	http://www.simtec.co.uk/products/SWLINUX/
  *	Ben Dooks <ben@simtec.co.uk>
  *
- * Generic platfrom device based RAM map
+ * Generic platform device based RAM map
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index bdbd14727e4b..5dbc125822b9 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -196,13 +196,13 @@ static inline void refill_rspq(struct adapter *adapter,
 /**
  *	need_skb_unmap - does the platform need unmapping of sk_buffs?
  *
- *	Returns true if the platfrom needs sk_buff unmapping.  The compiler
+ *	Returns true if the platform needs sk_buff unmapping.  The compiler
  *	optimizes away unecessary code if this returns true.
  */
 static inline int need_skb_unmap(void)
 {
 	/*
-	 * This structure is used to tell if the platfrom needs buffer
+	 * This structure is used to tell if the platform needs buffer
 	 * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
 	 */
 	struct dummy {
diff --git a/drivers/video/mbx/mbxfb.c b/drivers/video/mbx/mbxfb.c
index 01f77bcc68f9..afea9abbd678 100644
--- a/drivers/video/mbx/mbxfb.c
+++ b/drivers/video/mbx/mbxfb.c
@@ -693,7 +693,7 @@ static void __devinit setup_memc(struct fb_info *fbi)
 	unsigned long tmp;
 	int i;
 
-	/* FIXME: use platfrom specific parameters */
+	/* FIXME: use platform specific parameters */
 	/* setup SDRAM controller */
 	write_reg_dly((LMCFG_LMC_DS | LMCFG_LMC_TS | LMCFG_LMD_TS |
 		LMCFG_LMA_TS),
diff --git a/include/linux/dm9000.h b/include/linux/dm9000.h
index c30879cf93bc..96e87693d933 100644
--- a/include/linux/dm9000.h
+++ b/include/linux/dm9000.h
@@ -23,7 +23,7 @@
 #define DM9000_PLATF_NO_EEPROM	(0x0010)
 #define DM9000_PLATF_SIMPLE_PHY (0x0020)  /* Use NSR to find LinkStatus */
 
-/* platfrom data for platfrom device structure's platfrom_data field */
+/* platform data for platform device structure's platform_data field */
 
 struct dm9000_plat_data {
 	unsigned int	flags;
-- 
cgit v1.2.3


From 123df2944c436c80640c4281c5bc9c7950b18687 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 25 Dec 2009 04:57:57 -0500
Subject: Lose the new_name argument of fsnotify_move()

it's always new_dentry->d_name.name

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/debugfs/inode.c       | 2 +-
 fs/namei.c               | 6 ++----
 include/linux/fsnotify.h | 3 ++-
 3 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 274ac865bae8..049d6c36da09 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -496,7 +496,7 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
 	}
 	d_move(old_dentry, dentry);
 	fsnotify_move(old_dir->d_inode, new_dir->d_inode, old_name,
-		old_dentry->d_name.name, S_ISDIR(old_dentry->d_inode->i_mode),
+		S_ISDIR(old_dentry->d_inode->i_mode),
 		NULL, old_dentry);
 	fsnotify_oldname_free(old_name);
 	unlock_rename(new_dir, old_dir);
diff --git a/fs/namei.c b/fs/namei.c
index d62fdc875f22..f69df876fac3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2661,11 +2661,9 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
 	else
 		error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
-	if (!error) {
-		const char *new_name = old_dentry->d_name.name;
-		fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir,
+	if (!error)
+		fsnotify_move(old_dir, new_dir, old_name, is_dir,
 			      new_dentry->d_inode, old_dentry);
-	}
 	fsnotify_oldname_free(old_name);
 
 	return error;
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 936f9aa8bb97..2d755c49c324 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -65,7 +65,7 @@ static inline void fsnotify_link_count(struct inode *inode)
  * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir
  */
 static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
-				 const char *old_name, const char *new_name,
+				 const char *old_name,
 				 int isdir, struct inode *target, struct dentry *moved)
 {
 	struct inode *source = moved->d_inode;
@@ -73,6 +73,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 	u32 fs_cookie = fsnotify_get_cookie();
 	__u32 old_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_FROM);
 	__u32 new_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_TO);
+	const char *new_name = moved->d_name.name;
 
 	if (old_dir == new_dir)
 		old_dir_mask |= FS_DN_RENAME;
-- 
cgit v1.2.3


From cccc6bba3f771ef29b33e4f79e70ebc3dba245b0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 25 Dec 2009 05:07:33 -0500
Subject: Lose the first argument of audit_inode_child()

it's always equal to ->d_name.name of the second argument

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c               |  2 +-
 include/linux/audit.h    | 11 +++++------
 include/linux/fsnotify.h |  8 ++++----
 kernel/auditsc.c         |  7 ++-----
 4 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index f69df876fac3..865282f8e012 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1337,7 +1337,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
 		return -ENOENT;
 
 	BUG_ON(victim->d_parent->d_inode != dir);
-	audit_inode_child(victim->d_name.name, victim, dir);
+	audit_inode_child(victim, dir);
 
 	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
 	if (error)
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 3c7a358241a7..f391d45c8aea 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -424,7 +424,7 @@ extern void audit_syscall_exit(int failed, long return_code);
 extern void __audit_getname(const char *name);
 extern void audit_putname(const char *name);
 extern void __audit_inode(const char *name, const struct dentry *dentry);
-extern void __audit_inode_child(const char *dname, const struct dentry *dentry,
+extern void __audit_inode_child(const struct dentry *dentry,
 				const struct inode *parent);
 extern void __audit_ptrace(struct task_struct *t);
 
@@ -442,11 +442,10 @@ static inline void audit_inode(const char *name, const struct dentry *dentry) {
 	if (unlikely(!audit_dummy_context()))
 		__audit_inode(name, dentry);
 }
-static inline void audit_inode_child(const char *dname, 
-				     const struct dentry *dentry,
+static inline void audit_inode_child(const struct dentry *dentry,
 				     const struct inode *parent) {
 	if (unlikely(!audit_dummy_context()))
-		__audit_inode_child(dname, dentry, parent);
+		__audit_inode_child(dentry, parent);
 }
 void audit_core_dumps(long signr);
 
@@ -544,9 +543,9 @@ extern int audit_signals;
 #define audit_getname(n) do { ; } while (0)
 #define audit_putname(n) do { ; } while (0)
 #define __audit_inode(n,d) do { ; } while (0)
-#define __audit_inode_child(d,i,p) do { ; } while (0)
+#define __audit_inode_child(i,p) do { ; } while (0)
 #define audit_inode(n,d) do { ; } while (0)
-#define audit_inode_child(d,i,p) do { ; } while (0)
+#define audit_inode_child(i,p) do { ; } while (0)
 #define audit_core_dumps(i) do { ; } while (0)
 #define auditsc_get_stamp(c,t,s) (0)
 #define audit_get_loginuid(t) (-1)
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 2d755c49c324..df8fd9a3b214 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -104,7 +104,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 		inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL);
 		fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 	}
-	audit_inode_child(new_name, moved, new_dir);
+	audit_inode_child(moved, new_dir);
 }
 
 /*
@@ -147,7 +147,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
 {
 	inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name,
 				  dentry->d_inode);
-	audit_inode_child(dentry->d_name.name, dentry, inode);
+	audit_inode_child(dentry, inode);
 
 	fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
 }
@@ -162,7 +162,7 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct
 	inotify_inode_queue_event(dir, IN_CREATE, 0, new_dentry->d_name.name,
 				  inode);
 	fsnotify_link_count(inode);
-	audit_inode_child(new_dentry->d_name.name, new_dentry, dir);
+	audit_inode_child(new_dentry, dir);
 
 	fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name, 0);
 }
@@ -176,7 +176,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 	struct inode *d_inode = dentry->d_inode;
 
 	inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode);
-	audit_inode_child(dentry->d_name.name, dentry, inode);
+	audit_inode_child(dentry, inode);
 
 	fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
 }
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index fc0f928167e7..f3a461c0970a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1988,7 +1988,6 @@ void __audit_inode(const char *name, const struct dentry *dentry)
 
 /**
  * audit_inode_child - collect inode info for created/removed objects
- * @dname: inode's dentry name
  * @dentry: dentry being audited
  * @parent: inode of dentry parent
  *
@@ -2000,13 +1999,14 @@ void __audit_inode(const char *name, const struct dentry *dentry)
  * must be hooked prior, in order to capture the target inode during
  * unsuccessful attempts.
  */
-void __audit_inode_child(const char *dname, const struct dentry *dentry,
+void __audit_inode_child(const struct dentry *dentry,
 			 const struct inode *parent)
 {
 	int idx;
 	struct audit_context *context = current->audit_context;
 	const char *found_parent = NULL, *found_child = NULL;
 	const struct inode *inode = dentry->d_inode;
+	const char *dname = dentry->d_name.name;
 	int dirlen = 0;
 
 	if (!context->in_syscall)
@@ -2014,9 +2014,6 @@ void __audit_inode_child(const char *dname, const struct dentry *dentry,
 
 	if (inode)
 		handle_one(inode);
-	/* determine matching parent */
-	if (!dname)
-		goto add_names;
 
 	/* parent is more likely, look for it first */
 	for (idx = 0; idx < context->name_count; idx++) {
-- 
cgit v1.2.3


From 3c537889e17232e9073f75ae8710ea0f008c5a29 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Fri, 5 Feb 2010 04:21:19 -0500
Subject: drm/radeon/kms: add support for hardcoded edids in rom (v2)

Some servers hardcode an edid in rom so that they will
work properly with KVMs.  This is a port of the relevant
code from the ddx.

[airlied: reworked to validate edid at boot stage - and
remove special quirk, if there is a valid EDID in the BIOS rom
we'll just try and use it.]

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_edid.c              | 30 +++++++++++++++---------------
 drivers/gpu/drm/radeon/radeon_combios.c | 33 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/radeon/radeon_display.c | 14 +++++++++++++-
 drivers/gpu/drm/radeon/radeon_mode.h    |  6 +++++-
 include/drm/drm_crtc.h                  |  2 ++
 include/drm/drm_edid.h                  |  3 +++
 6 files changed, 71 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index f665b05592f3..f41e91ceaea6 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -60,8 +60,7 @@
 #define EDID_QUIRK_FIRST_DETAILED_PREFERRED	(1 << 5)
 /* use +hsync +vsync for detailed mode */
 #define EDID_QUIRK_DETAILED_SYNC_PP		(1 << 6)
-/* define the number of Extension EDID block */
-#define MAX_EDID_EXT_NUM 4
+
 
 #define LEVEL_DMT	0
 #define LEVEL_GTF	1
@@ -114,14 +113,14 @@ static const u8 edid_header[] = {
 };
 
 /**
- * edid_is_valid - sanity check EDID data
+ * drm_edid_is_valid - sanity check EDID data
  * @edid: EDID data
  *
  * Sanity check the EDID block by looking at the header, the version number
  * and the checksum.  Return 0 if the EDID doesn't check out, or 1 if it's
  * valid.
  */
-static bool edid_is_valid(struct edid *edid)
+bool drm_edid_is_valid(struct edid *edid)
 {
 	int i, score = 0;
 	u8 csum = 0;
@@ -163,6 +162,7 @@ bad:
 	}
 	return 0;
 }
+EXPORT_SYMBOL(drm_edid_is_valid);
 
 /**
  * edid_vendor - match a string against EDID's obfuscated vendor field
@@ -1069,8 +1069,8 @@ static int add_detailed_info_eedid(struct drm_connector *connector,
 	}
 
 	/* Chose real EDID extension number */
-	edid_ext_num = edid->extensions > MAX_EDID_EXT_NUM ?
-		       MAX_EDID_EXT_NUM : edid->extensions;
+	edid_ext_num = edid->extensions > DRM_MAX_EDID_EXT_NUM ?
+		DRM_MAX_EDID_EXT_NUM : edid->extensions;
 
 	/* Find CEA extension */
 	for (i = 0; i < edid_ext_num; i++) {
@@ -1152,7 +1152,7 @@ static int drm_ddc_read_edid(struct drm_connector *connector,
 	for (i = 0; i < 4; i++) {
 		if (drm_do_probe_ddc_edid(adapter, buf, len))
 			return -1;
-		if (edid_is_valid((struct edid *)buf))
+		if (drm_edid_is_valid((struct edid *)buf))
 			return 0;
 	}
 
@@ -1177,7 +1177,7 @@ struct edid *drm_get_edid(struct drm_connector *connector,
 	int ret;
 	struct edid *edid;
 
-	edid = kmalloc(EDID_LENGTH * (MAX_EDID_EXT_NUM + 1),
+	edid = kmalloc(EDID_LENGTH * (DRM_MAX_EDID_EXT_NUM + 1),
 		       GFP_KERNEL);
 	if (edid == NULL) {
 		dev_warn(&connector->dev->pdev->dev,
@@ -1195,14 +1195,14 @@ struct edid *drm_get_edid(struct drm_connector *connector,
 	if (edid->extensions != 0) {
 		int edid_ext_num = edid->extensions;
 
-		if (edid_ext_num > MAX_EDID_EXT_NUM) {
+		if (edid_ext_num > DRM_MAX_EDID_EXT_NUM) {
 			dev_warn(&connector->dev->pdev->dev,
 				 "The number of extension(%d) is "
 				 "over max (%d), actually read number (%d)\n",
-				 edid_ext_num, MAX_EDID_EXT_NUM,
-				 MAX_EDID_EXT_NUM);
+				 edid_ext_num, DRM_MAX_EDID_EXT_NUM,
+				 DRM_MAX_EDID_EXT_NUM);
 			/* Reset EDID extension number to be read */
-			edid_ext_num = MAX_EDID_EXT_NUM;
+			edid_ext_num = DRM_MAX_EDID_EXT_NUM;
 		}
 		/* Read EDID including extensions too */
 		ret = drm_ddc_read_edid(connector, adapter, (char *)edid,
@@ -1245,8 +1245,8 @@ bool drm_detect_hdmi_monitor(struct edid *edid)
 		goto end;
 
 	/* Chose real EDID extension number */
-	edid_ext_num = edid->extensions > MAX_EDID_EXT_NUM ?
-		       MAX_EDID_EXT_NUM : edid->extensions;
+	edid_ext_num = edid->extensions > DRM_MAX_EDID_EXT_NUM ?
+		       DRM_MAX_EDID_EXT_NUM : edid->extensions;
 
 	/* Find CEA extension */
 	for (i = 0; i < edid_ext_num; i++) {
@@ -1303,7 +1303,7 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
 	if (edid == NULL) {
 		return 0;
 	}
-	if (!edid_is_valid(edid)) {
+	if (!drm_edid_is_valid(edid)) {
 		dev_warn(&connector->dev->pdev->dev, "%s: EDID invalid.\n",
 			 drm_get_connector_name(connector));
 		return 0;
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index e208d730f514..257ce1774e40 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -443,6 +443,39 @@ static uint16_t combios_get_table_offset(struct drm_device *dev,
 
 }
 
+bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev)
+{
+	int edid_info;
+	struct edid *edid;
+	edid_info = combios_get_table_offset(rdev->ddev, COMBIOS_HARDCODED_EDID_TABLE);
+	if (!edid_info)
+		return false;
+
+	edid = kmalloc(EDID_LENGTH * (DRM_MAX_EDID_EXT_NUM + 1),
+		       GFP_KERNEL);
+	if (edid == NULL)
+		return false;
+
+	memcpy((unsigned char *)edid,
+	       (unsigned char *)(rdev->bios + edid_info), EDID_LENGTH);
+
+	if (!drm_edid_is_valid(edid)) {
+		kfree(edid);
+		return false;
+	}
+
+	rdev->mode_info.bios_hardcoded_edid = edid;
+	return true;
+}
+
+struct edid *
+radeon_combios_get_hardcoded_edid(struct radeon_device *rdev)
+{
+	if (rdev->mode_info.bios_hardcoded_edid)
+		return rdev->mode_info.bios_hardcoded_edid;
+	return NULL;
+}
+
 static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rdev,
 						       int ddc_line)
 {
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index ec3166bfaa49..79634da7c311 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -352,6 +352,8 @@ static bool radeon_setup_enc_conn(struct drm_device *dev)
 
 int radeon_ddc_get_modes(struct radeon_connector *radeon_connector)
 {
+	struct drm_device *dev = radeon_connector->base.dev;
+	struct radeon_device *rdev = dev->dev_private;
 	int ret = 0;
 
 	if ((radeon_connector->base.connector_type == DRM_MODE_CONNECTOR_DisplayPort) ||
@@ -366,7 +368,9 @@ int radeon_ddc_get_modes(struct radeon_connector *radeon_connector)
 	if (!radeon_connector->edid) {
 		radeon_connector->edid = drm_get_edid(&radeon_connector->base, &radeon_connector->ddc_bus->adapter);
 	}
-
+	/* some servers provide a hardcoded edid in rom for KVMs */
+	if (!radeon_connector->edid)
+		radeon_connector->edid = radeon_combios_get_hardcoded_edid(rdev);
 	if (radeon_connector->edid) {
 		drm_mode_connector_update_edid_property(&radeon_connector->base, radeon_connector->edid);
 		ret = drm_add_edid_modes(&radeon_connector->base, radeon_connector->edid);
@@ -829,6 +833,12 @@ int radeon_modeset_init(struct radeon_device *rdev)
 		return ret;
 	}
 
+	/* check combios for a valid hardcoded EDID - Sun servers */
+	if (!rdev->is_atom_bios) {
+		/* check for hardcoded EDID in BIOS */
+		radeon_combios_check_hardcoded_edid(rdev);
+	}
+
 	if (rdev->flags & RADEON_SINGLE_CRTC)
 		num_crtc = 1;
 
@@ -850,6 +860,8 @@ int radeon_modeset_init(struct radeon_device *rdev)
 
 void radeon_modeset_fini(struct radeon_device *rdev)
 {
+	kfree(rdev->mode_info.bios_hardcoded_edid);
+
 	if (rdev->mode_info.mode_config_initialized) {
 		radeon_hpd_fini(rdev);
 		drm_mode_config_cleanup(rdev->ddev);
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index b884bacf09ff..71439ba2feeb 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -207,7 +207,8 @@ struct radeon_mode_info {
 	struct drm_property *tv_std_property;
 	/* legacy TMDS PLL detect */
 	struct drm_property *tmds_pll_property;
-
+	/* hardcoded DFP edid from BIOS */
+	struct edid *bios_hardcoded_edid;
 };
 
 #define MAX_H_CODE_TIMING_LEN 32
@@ -479,6 +480,9 @@ extern int radeon_crtc_cursor_set(struct drm_crtc *crtc,
 extern int radeon_crtc_cursor_move(struct drm_crtc *crtc,
 				   int x, int y);
 
+extern bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev);
+extern struct edid *
+radeon_combios_get_hardcoded_edid(struct radeon_device *rdev);
 extern bool radeon_atom_get_clock_info(struct drm_device *dev);
 extern bool radeon_combios_get_clock_info(struct drm_device *dev);
 extern struct radeon_encoder_atom_dig *
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index fdf43abc36db..1347524a8e30 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -801,4 +801,6 @@ extern struct drm_display_mode *drm_gtf_mode(struct drm_device *dev,
 				bool interlaced, int margins);
 extern int drm_add_modes_noedid(struct drm_connector *connector,
 				int hdisplay, int vdisplay);
+
+extern bool drm_edid_is_valid(struct edid *edid);
 #endif /* __DRM_CRTC_H__ */
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index d33c3e038606..b4209898f115 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -201,4 +201,7 @@ struct edid {
 
 #define EDID_PRODUCT_ID(e) ((e)->prod_code[0] | ((e)->prod_code[1] << 8))
 
+/* define the number of Extension EDID block */
+#define DRM_MAX_EDID_EXT_NUM 4
+
 #endif /* __DRM_EDID_H__ */
-- 
cgit v1.2.3


From bcc1c2a1d22974215e39dc87ce746ba9a39223e5 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Tue, 12 Jan 2010 17:54:34 -0500
Subject: drm/radeon/kms: add initial Evergreen support (Radeon HD 5xxx)

This adds initial Evergreen KMS support, it doesn't include
any acceleration features or interrupt handling yet.

Major changes are DCE4 handling for PLLs for the > 2 crtcs.

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/Makefile          |   3 +-
 drivers/gpu/drm/radeon/atombios_crtc.c   | 370 ++++++++++++--
 drivers/gpu/drm/radeon/atombios_dp.c     |  62 ++-
 drivers/gpu/drm/radeon/evergreen.c       | 794 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/radeon/evergreen_reg.h   | 176 +++++++
 drivers/gpu/drm/radeon/radeon.h          |  13 +-
 drivers/gpu/drm/radeon/radeon_asic.h     |  50 ++
 drivers/gpu/drm/radeon/radeon_atombios.c |  25 +-
 drivers/gpu/drm/radeon/radeon_clocks.c   |  12 +
 drivers/gpu/drm/radeon/radeon_combios.c  |   1 +
 drivers/gpu/drm/radeon/radeon_cursor.c   |  37 +-
 drivers/gpu/drm/radeon/radeon_device.c   |  20 +-
 drivers/gpu/drm/radeon/radeon_display.c  |  42 +-
 drivers/gpu/drm/radeon/radeon_encoders.c | 241 +++++++---
 drivers/gpu/drm/radeon/radeon_family.h   |   5 +
 drivers/gpu/drm/radeon/radeon_mode.h     |   6 +-
 drivers/gpu/drm/radeon/radeon_reg.h      |   2 +-
 drivers/gpu/drm/radeon/rv770d.h          |   2 +
 include/drm/drm_pciids.h                 |  35 ++
 19 files changed, 1775 insertions(+), 121 deletions(-)
 create mode 100644 drivers/gpu/drm/radeon/evergreen.c
 create mode 100644 drivers/gpu/drm/radeon/evergreen_reg.h

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 1cc7b937b1ea..83c59079193b 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -54,7 +54,8 @@ radeon-y += radeon_device.o radeon_kms.o \
 	radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \
 	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \
 	r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \
-	r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o
+	r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \
+	evergreen.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
 
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index bb45517719ae..7e7c0b32bb65 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -249,13 +249,17 @@ void atombios_crtc_dpms(struct drm_crtc *crtc, int mode)
 		if (ASIC_IS_DCE3(rdev))
 			atombios_enable_crtc_memreq(crtc, 1);
 		atombios_blank_crtc(crtc, 0);
-		drm_vblank_post_modeset(dev, radeon_crtc->crtc_id);
+		/* XXX re-enable when interrupt support is added */
+		if (!ASIC_IS_DCE4(rdev))
+			drm_vblank_post_modeset(dev, radeon_crtc->crtc_id);
 		radeon_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_pre_modeset(dev, radeon_crtc->crtc_id);
+		/* XXX re-enable when interrupt support is added */
+		if (!ASIC_IS_DCE4(rdev))
+			drm_vblank_pre_modeset(dev, radeon_crtc->crtc_id);
 		atombios_blank_crtc(crtc, 1);
 		if (ASIC_IS_DCE3(rdev))
 			atombios_enable_crtc_memreq(crtc, 0);
@@ -367,6 +371,10 @@ static void atombios_set_ss(struct drm_crtc *crtc, int enable)
 	uint16_t percentage = 0;
 	uint8_t type = 0, step = 0, delay = 0, range = 0;
 
+	/* XXX add ss support for DCE4 */
+	if (ASIC_IS_DCE4(rdev))
+		return;
+
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		if (encoder->crtc == crtc) {
 			radeon_encoder = to_radeon_encoder(encoder);
@@ -411,6 +419,7 @@ static void atombios_set_ss(struct drm_crtc *crtc, int enable)
 
 union adjust_pixel_clock {
 	ADJUST_DISPLAY_PLL_PS_ALLOCATION v1;
+	ADJUST_DISPLAY_PLL_PS_ALLOCATION_V3 v3;
 };
 
 static u32 atombios_adjust_pll(struct drm_crtc *crtc,
@@ -422,6 +431,7 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 	struct drm_encoder *encoder = NULL;
 	struct radeon_encoder *radeon_encoder = NULL;
 	u32 adjusted_clock = mode->clock;
+	int encoder_mode = 0;
 
 	/* reset the pll flags */
 	pll->flags = 0;
@@ -459,6 +469,7 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		if (encoder->crtc == crtc) {
 			radeon_encoder = to_radeon_encoder(encoder);
+			encoder_mode = atombios_get_encoder_mode(encoder);
 			if (ASIC_IS_AVIVO(rdev)) {
 				/* DVO wants 2x pixel clock if the DVO chip is in 12 bit mode */
 				if (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1)
@@ -484,14 +495,9 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 	 */
 	if (ASIC_IS_DCE3(rdev)) {
 		union adjust_pixel_clock args;
-		struct radeon_encoder_atom_dig *dig;
 		u8 frev, crev;
 		int index;
 
-		if (!radeon_encoder->enc_priv)
-			return adjusted_clock;
-		dig = radeon_encoder->enc_priv;
-
 		index = GetIndexIntoMasterTable(COMMAND, AdjustDisplayPll);
 		atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev,
 				      &crev);
@@ -505,12 +511,51 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 			case 2:
 				args.v1.usPixelClock = cpu_to_le16(mode->clock / 10);
 				args.v1.ucTransmitterID = radeon_encoder->encoder_id;
-				args.v1.ucEncodeMode = atombios_get_encoder_mode(encoder);
+				args.v1.ucEncodeMode = encoder_mode;
 
 				atom_execute_table(rdev->mode_info.atom_context,
 						   index, (uint32_t *)&args);
 				adjusted_clock = le16_to_cpu(args.v1.usPixelClock) * 10;
 				break;
+			case 3:
+				args.v3.sInput.usPixelClock = cpu_to_le16(mode->clock / 10);
+				args.v3.sInput.ucTransmitterID = radeon_encoder->encoder_id;
+				args.v3.sInput.ucEncodeMode = encoder_mode;
+				args.v3.sInput.ucDispPllConfig = 0;
+				if (radeon_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+					struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
+
+					if (encoder_mode == ATOM_ENCODER_MODE_DP)
+						args.v3.sInput.ucDispPllConfig |=
+							DISPPLL_CONFIG_COHERENT_MODE;
+					else {
+						if (dig->coherent_mode)
+							args.v3.sInput.ucDispPllConfig |=
+								DISPPLL_CONFIG_COHERENT_MODE;
+						if (mode->clock > 165000)
+							args.v3.sInput.ucDispPllConfig |=
+								DISPPLL_CONFIG_DUAL_LINK;
+					}
+				} else if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
+					/* may want to enable SS on DP/eDP eventually */
+					args.v3.sInput.ucDispPllConfig |=
+						DISPPLL_CONFIG_SS_ENABLE;
+					if (mode->clock > 165000)
+						args.v3.sInput.ucDispPllConfig |=
+							DISPPLL_CONFIG_DUAL_LINK;
+				}
+				atom_execute_table(rdev->mode_info.atom_context,
+						   index, (uint32_t *)&args);
+				adjusted_clock = le32_to_cpu(args.v3.sOutput.ulDispPllFreq) * 10;
+				if (args.v3.sOutput.ucRefDiv) {
+					pll->flags |= RADEON_PLL_USE_REF_DIV;
+					pll->reference_div = args.v3.sOutput.ucRefDiv;
+				}
+				if (args.v3.sOutput.ucPostDiv) {
+					pll->flags |= RADEON_PLL_USE_POST_DIV;
+					pll->post_div = args.v3.sOutput.ucPostDiv;
+				}
+				break;
 			default:
 				DRM_ERROR("Unknown table version %d %d\n", frev, crev);
 				return adjusted_clock;
@@ -529,9 +574,47 @@ union set_pixel_clock {
 	PIXEL_CLOCK_PARAMETERS v1;
 	PIXEL_CLOCK_PARAMETERS_V2 v2;
 	PIXEL_CLOCK_PARAMETERS_V3 v3;
+	PIXEL_CLOCK_PARAMETERS_V5 v5;
 };
 
-void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
+static void atombios_crtc_set_dcpll(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	u8 frev, crev;
+	int index;
+	union set_pixel_clock args;
+
+	memset(&args, 0, sizeof(args));
+
+	index = GetIndexIntoMasterTable(COMMAND, SetPixelClock);
+	atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev,
+			      &crev);
+
+	switch (frev) {
+	case 1:
+		switch (crev) {
+		case 5:
+			/* if the default dcpll clock is specified,
+			 * SetPixelClock provides the dividers
+			 */
+			args.v5.ucCRTC = ATOM_CRTC_INVALID;
+			args.v5.usPixelClock = rdev->clock.default_dispclk;
+			args.v5.ucPpll = ATOM_DCPLL;
+			break;
+		default:
+			DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+			return;
+		}
+		break;
+	default:
+		DRM_ERROR("Unknown table version %d %d\n", frev, crev);
+		return;
+	}
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+}
+
+static void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
@@ -545,12 +628,14 @@ void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
 	u32 ref_div = 0, fb_div = 0, frac_fb_div = 0, post_div = 0;
 	struct radeon_pll *pll;
 	u32 adjusted_clock;
+	int encoder_mode = 0;
 
 	memset(&args, 0, sizeof(args));
 
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		if (encoder->crtc == crtc) {
 			radeon_encoder = to_radeon_encoder(encoder);
+			encoder_mode = atombios_get_encoder_mode(encoder);
 			break;
 		}
 	}
@@ -558,10 +643,18 @@ void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
 	if (!radeon_encoder)
 		return;
 
-	if (radeon_crtc->crtc_id == 0)
+	switch (radeon_crtc->pll_id) {
+	case ATOM_PPLL1:
 		pll = &rdev->clock.p1pll;
-	else
+		break;
+	case ATOM_PPLL2:
 		pll = &rdev->clock.p2pll;
+		break;
+	case ATOM_DCPLL:
+	case ATOM_PPLL_INVALID:
+		pll = &rdev->clock.dcpll;
+		break;
+	}
 
 	/* adjust pixel clock as needed */
 	adjusted_clock = atombios_adjust_pll(crtc, mode, pll);
@@ -582,8 +675,7 @@ void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
 			args.v1.usFbDiv = cpu_to_le16(fb_div);
 			args.v1.ucFracFbDiv = frac_fb_div;
 			args.v1.ucPostDiv = post_div;
-			args.v1.ucPpll =
-			    radeon_crtc->crtc_id ? ATOM_PPLL2 : ATOM_PPLL1;
+			args.v1.ucPpll = radeon_crtc->pll_id;
 			args.v1.ucCRTC = radeon_crtc->crtc_id;
 			args.v1.ucRefDivSrc = 1;
 			break;
@@ -593,8 +685,7 @@ void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
 			args.v2.usFbDiv = cpu_to_le16(fb_div);
 			args.v2.ucFracFbDiv = frac_fb_div;
 			args.v2.ucPostDiv = post_div;
-			args.v2.ucPpll =
-			    radeon_crtc->crtc_id ? ATOM_PPLL2 : ATOM_PPLL1;
+			args.v2.ucPpll = radeon_crtc->pll_id;
 			args.v2.ucCRTC = radeon_crtc->crtc_id;
 			args.v2.ucRefDivSrc = 1;
 			break;
@@ -604,12 +695,22 @@ void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
 			args.v3.usFbDiv = cpu_to_le16(fb_div);
 			args.v3.ucFracFbDiv = frac_fb_div;
 			args.v3.ucPostDiv = post_div;
-			args.v3.ucPpll =
-			    radeon_crtc->crtc_id ? ATOM_PPLL2 : ATOM_PPLL1;
-			args.v3.ucMiscInfo = (radeon_crtc->crtc_id << 2);
+			args.v3.ucPpll = radeon_crtc->pll_id;
+			args.v3.ucMiscInfo = (radeon_crtc->pll_id << 2);
 			args.v3.ucTransmitterId = radeon_encoder->encoder_id;
-			args.v3.ucEncoderMode =
-			    atombios_get_encoder_mode(encoder);
+			args.v3.ucEncoderMode = encoder_mode;
+			break;
+		case 5:
+			args.v5.ucCRTC = radeon_crtc->crtc_id;
+			args.v5.usPixelClock = cpu_to_le16(mode->clock / 10);
+			args.v5.ucRefDiv = ref_div;
+			args.v5.usFbDiv = cpu_to_le16(fb_div);
+			args.v5.ulFbDivDecFrac = cpu_to_le32(frac_fb_div * 100000);
+			args.v5.ucPostDiv = post_div;
+			args.v5.ucMiscInfo = 0; /* HDMI depth, etc. */
+			args.v5.ucTransmitterID = radeon_encoder->encoder_id;
+			args.v5.ucEncoderMode = encoder_mode;
+			args.v5.ucPpll = radeon_crtc->pll_id;
 			break;
 		default:
 			DRM_ERROR("Unknown table version %d %d\n", frev, crev);
@@ -624,6 +725,140 @@ void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 }
 
+static int evergreen_crtc_set_base(struct drm_crtc *crtc, int x, int y,
+				   struct drm_framebuffer *old_fb)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_framebuffer *radeon_fb;
+	struct drm_gem_object *obj;
+	struct radeon_bo *rbo;
+	uint64_t fb_location;
+	uint32_t fb_format, fb_pitch_pixels, tiling_flags;
+	int r;
+
+	/* no fb bound */
+	if (!crtc->fb) {
+		DRM_DEBUG("No FB bound\n");
+		return 0;
+	}
+
+	radeon_fb = to_radeon_framebuffer(crtc->fb);
+
+	/* Pin framebuffer & get tilling informations */
+	obj = radeon_fb->obj;
+	rbo = obj->driver_private;
+	r = radeon_bo_reserve(rbo, false);
+	if (unlikely(r != 0))
+		return r;
+	r = radeon_bo_pin(rbo, RADEON_GEM_DOMAIN_VRAM, &fb_location);
+	if (unlikely(r != 0)) {
+		radeon_bo_unreserve(rbo);
+		return -EINVAL;
+	}
+	radeon_bo_get_tiling_flags(rbo, &tiling_flags, NULL);
+	radeon_bo_unreserve(rbo);
+
+	switch (crtc->fb->bits_per_pixel) {
+	case 8:
+		fb_format = (EVERGREEN_GRPH_DEPTH(EVERGREEN_GRPH_DEPTH_8BPP) |
+			     EVERGREEN_GRPH_FORMAT(EVERGREEN_GRPH_FORMAT_INDEXED));
+		break;
+	case 15:
+		fb_format = (EVERGREEN_GRPH_DEPTH(EVERGREEN_GRPH_DEPTH_16BPP) |
+			     EVERGREEN_GRPH_FORMAT(EVERGREEN_GRPH_FORMAT_ARGB1555));
+		break;
+	case 16:
+		fb_format = (EVERGREEN_GRPH_DEPTH(EVERGREEN_GRPH_DEPTH_16BPP) |
+			     EVERGREEN_GRPH_FORMAT(EVERGREEN_GRPH_FORMAT_ARGB565));
+		break;
+	case 24:
+	case 32:
+		fb_format = (EVERGREEN_GRPH_DEPTH(EVERGREEN_GRPH_DEPTH_32BPP) |
+			     EVERGREEN_GRPH_FORMAT(EVERGREEN_GRPH_FORMAT_ARGB8888));
+		break;
+	default:
+		DRM_ERROR("Unsupported screen depth %d\n",
+			  crtc->fb->bits_per_pixel);
+		return -EINVAL;
+	}
+
+	switch (radeon_crtc->crtc_id) {
+	case 0:
+		WREG32(AVIVO_D1VGA_CONTROL, 0);
+		break;
+	case 1:
+		WREG32(AVIVO_D2VGA_CONTROL, 0);
+		break;
+	case 2:
+		WREG32(EVERGREEN_D3VGA_CONTROL, 0);
+		break;
+	case 3:
+		WREG32(EVERGREEN_D4VGA_CONTROL, 0);
+		break;
+	case 4:
+		WREG32(EVERGREEN_D5VGA_CONTROL, 0);
+		break;
+	case 5:
+		WREG32(EVERGREEN_D6VGA_CONTROL, 0);
+		break;
+	default:
+		break;
+	}
+
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset,
+	       upper_32_bits(fb_location));
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset,
+	       upper_32_bits(fb_location));
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
+	       (u32)fb_location & EVERGREEN_GRPH_SURFACE_ADDRESS_MASK);
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
+	       (u32) fb_location & EVERGREEN_GRPH_SURFACE_ADDRESS_MASK);
+	WREG32(EVERGREEN_GRPH_CONTROL + radeon_crtc->crtc_offset, fb_format);
+
+	WREG32(EVERGREEN_GRPH_SURFACE_OFFSET_X + radeon_crtc->crtc_offset, 0);
+	WREG32(EVERGREEN_GRPH_SURFACE_OFFSET_Y + radeon_crtc->crtc_offset, 0);
+	WREG32(EVERGREEN_GRPH_X_START + radeon_crtc->crtc_offset, 0);
+	WREG32(EVERGREEN_GRPH_Y_START + radeon_crtc->crtc_offset, 0);
+	WREG32(EVERGREEN_GRPH_X_END + radeon_crtc->crtc_offset, crtc->fb->width);
+	WREG32(EVERGREEN_GRPH_Y_END + radeon_crtc->crtc_offset, crtc->fb->height);
+
+	fb_pitch_pixels = crtc->fb->pitch / (crtc->fb->bits_per_pixel / 8);
+	WREG32(EVERGREEN_GRPH_PITCH + radeon_crtc->crtc_offset, fb_pitch_pixels);
+	WREG32(EVERGREEN_GRPH_ENABLE + radeon_crtc->crtc_offset, 1);
+
+	WREG32(EVERGREEN_DESKTOP_HEIGHT + radeon_crtc->crtc_offset,
+	       crtc->mode.vdisplay);
+	x &= ~3;
+	y &= ~1;
+	WREG32(EVERGREEN_VIEWPORT_START + radeon_crtc->crtc_offset,
+	       (x << 16) | y);
+	WREG32(EVERGREEN_VIEWPORT_SIZE + radeon_crtc->crtc_offset,
+	       (crtc->mode.hdisplay << 16) | crtc->mode.vdisplay);
+
+	if (crtc->mode.flags & DRM_MODE_FLAG_INTERLACE)
+		WREG32(EVERGREEN_DATA_FORMAT + radeon_crtc->crtc_offset,
+		       EVERGREEN_INTERLEAVE_EN);
+	else
+		WREG32(EVERGREEN_DATA_FORMAT + radeon_crtc->crtc_offset, 0);
+
+	if (old_fb && old_fb != crtc->fb) {
+		radeon_fb = to_radeon_framebuffer(old_fb);
+		rbo = radeon_fb->obj->driver_private;
+		r = radeon_bo_reserve(rbo, false);
+		if (unlikely(r != 0))
+			return r;
+		radeon_bo_unpin(rbo);
+		radeon_bo_unreserve(rbo);
+	}
+
+	/* Bytes per pixel may have changed */
+	radeon_bandwidth_update(rdev);
+
+	return 0;
+}
+
 static int avivo_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 			       struct drm_framebuffer *old_fb)
 {
@@ -761,7 +996,9 @@ int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 	struct drm_device *dev = crtc->dev;
 	struct radeon_device *rdev = dev->dev_private;
 
-	if (ASIC_IS_AVIVO(rdev))
+	if (ASIC_IS_DCE4(rdev))
+		return evergreen_crtc_set_base(crtc, x, y, old_fb);
+	else if (ASIC_IS_AVIVO(rdev))
 		return avivo_crtc_set_base(crtc, x, y, old_fb);
 	else
 		return radeon_crtc_set_base(crtc, x, y, old_fb);
@@ -791,6 +1028,46 @@ static void radeon_legacy_atom_fixup(struct drm_crtc *crtc)
 	}
 }
 
+static int radeon_atom_pick_pll(struct drm_crtc *crtc)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_encoder *test_encoder;
+	struct drm_crtc *test_crtc;
+	uint32_t pll_in_use = 0;
+
+	if (ASIC_IS_DCE4(rdev)) {
+		/* if crtc is driving DP and we have an ext clock, use that */
+		list_for_each_entry(test_encoder, &dev->mode_config.encoder_list, head) {
+			if (test_encoder->crtc && (test_encoder->crtc == crtc)) {
+				if (atombios_get_encoder_mode(test_encoder) == ATOM_ENCODER_MODE_DP) {
+					if (rdev->clock.dp_extclk)
+						return ATOM_PPLL_INVALID;
+				}
+			}
+		}
+
+		/* otherwise, pick one of the plls */
+		list_for_each_entry(test_crtc, &dev->mode_config.crtc_list, head) {
+			struct radeon_crtc *radeon_test_crtc;
+
+			if (crtc == test_crtc)
+				continue;
+
+			radeon_test_crtc = to_radeon_crtc(test_crtc);
+			if ((radeon_test_crtc->pll_id >= ATOM_PPLL1) &&
+			    (radeon_test_crtc->pll_id <= ATOM_PPLL2))
+				pll_in_use |= (1 << radeon_test_crtc->pll_id);
+		}
+		if (!(pll_in_use & 1))
+			return ATOM_PPLL1;
+		return ATOM_PPLL2;
+	} else
+		return radeon_crtc->crtc_id;
+
+}
+
 int atombios_crtc_mode_set(struct drm_crtc *crtc,
 			   struct drm_display_mode *mode,
 			   struct drm_display_mode *adjusted_mode,
@@ -802,19 +1079,27 @@ int atombios_crtc_mode_set(struct drm_crtc *crtc,
 
 	/* TODO color tiling */
 
+	/* pick pll */
+	radeon_crtc->pll_id = radeon_atom_pick_pll(crtc);
+
 	atombios_set_ss(crtc, 0);
+	/* always set DCPLL */
+	if (ASIC_IS_DCE4(rdev))
+		atombios_crtc_set_dcpll(crtc);
 	atombios_crtc_set_pll(crtc, adjusted_mode);
 	atombios_set_ss(crtc, 1);
-	atombios_crtc_set_timing(crtc, adjusted_mode);
 
-	if (ASIC_IS_AVIVO(rdev))
-		atombios_crtc_set_base(crtc, x, y, old_fb);
+	if (ASIC_IS_DCE4(rdev))
+		atombios_set_crtc_dtd_timing(crtc, adjusted_mode);
+	else if (ASIC_IS_AVIVO(rdev))
+		atombios_crtc_set_timing(crtc, adjusted_mode);
 	else {
+		atombios_crtc_set_timing(crtc, adjusted_mode);
 		if (radeon_crtc->crtc_id == 0)
 			atombios_set_crtc_dtd_timing(crtc, adjusted_mode);
-		atombios_crtc_set_base(crtc, x, y, old_fb);
 		radeon_legacy_atom_fixup(crtc);
 	}
+	atombios_crtc_set_base(crtc, x, y, old_fb);
 	atombios_overscan_setup(crtc, mode, adjusted_mode);
 	atombios_scaler_setup(crtc);
 	return 0;
@@ -854,8 +1139,37 @@ static const struct drm_crtc_helper_funcs atombios_helper_funcs = {
 void radeon_atombios_init_crtc(struct drm_device *dev,
 			       struct radeon_crtc *radeon_crtc)
 {
-	if (radeon_crtc->crtc_id == 1)
-		radeon_crtc->crtc_offset =
-		    AVIVO_D2CRTC_H_TOTAL - AVIVO_D1CRTC_H_TOTAL;
+	struct radeon_device *rdev = dev->dev_private;
+
+	if (ASIC_IS_DCE4(rdev)) {
+		switch (radeon_crtc->crtc_id) {
+		case 0:
+		default:
+			radeon_crtc->crtc_id = EVERGREEN_CRTC0_REGISTER_OFFSET;
+			break;
+		case 1:
+			radeon_crtc->crtc_id = EVERGREEN_CRTC1_REGISTER_OFFSET;
+			break;
+		case 2:
+			radeon_crtc->crtc_id = EVERGREEN_CRTC2_REGISTER_OFFSET;
+			break;
+		case 3:
+			radeon_crtc->crtc_id = EVERGREEN_CRTC3_REGISTER_OFFSET;
+			break;
+		case 4:
+			radeon_crtc->crtc_id = EVERGREEN_CRTC4_REGISTER_OFFSET;
+			break;
+		case 5:
+			radeon_crtc->crtc_id = EVERGREEN_CRTC5_REGISTER_OFFSET;
+			break;
+		}
+	} else {
+		if (radeon_crtc->crtc_id == 1)
+			radeon_crtc->crtc_offset =
+				AVIVO_D2CRTC_H_TOTAL - AVIVO_D1CRTC_H_TOTAL;
+		else
+			radeon_crtc->crtc_offset = 0;
+	}
+	radeon_crtc->pll_id = -1;
 	drm_crtc_helper_add(&radeon_crtc->base, &atombios_helper_funcs);
 }
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 71060114d5de..0b6f2cef1c52 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -321,6 +321,10 @@ static void dp_get_adjust_train(u8 link_status[DP_LINK_STATUS_SIZE],
 		train_set[lane] = v | p;
 }
 
+union aux_channel_transaction {
+	PROCESS_AUX_CHANNEL_TRANSACTION_PS_ALLOCATION v1;
+	PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS_V2 v2;
+};
 
 /* radeon aux chan functions */
 bool radeon_process_aux_ch(struct radeon_i2c_chan *chan, u8 *req_bytes,
@@ -329,7 +333,7 @@ bool radeon_process_aux_ch(struct radeon_i2c_chan *chan, u8 *req_bytes,
 {
 	struct drm_device *dev = chan->dev;
 	struct radeon_device *rdev = dev->dev_private;
-	PROCESS_AUX_CHANNEL_TRANSACTION_PS_ALLOCATION args;
+	union aux_channel_transaction args;
 	int index = GetIndexIntoMasterTable(COMMAND, ProcessAuxChannelTransaction);
 	unsigned char *base;
 
@@ -339,29 +343,31 @@ bool radeon_process_aux_ch(struct radeon_i2c_chan *chan, u8 *req_bytes,
 
 	memcpy(base, req_bytes, num_bytes);
 
-	args.lpAuxRequest = 0;
-	args.lpDataOut = 16;
-	args.ucDataOutLen = 0;
-	args.ucChannelID = chan->rec.i2c_id;
-	args.ucDelay = delay / 10;
+	args.v1.lpAuxRequest = 0;
+	args.v1.lpDataOut = 16;
+	args.v1.ucDataOutLen = 0;
+	args.v1.ucChannelID = chan->rec.i2c_id;
+	args.v1.ucDelay = delay / 10;
+	if (ASIC_IS_DCE4(rdev))
+		args.v2.ucHPD_ID = chan->rec.hpd_id;
 
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 
-	if (args.ucReplyStatus) {
+	if (args.v1.ucReplyStatus) {
 		DRM_DEBUG("failed to get auxch %02x%02x %02x %02x 0x%02x %02x\n",
 			  req_bytes[1], req_bytes[0], req_bytes[2], req_bytes[3],
-			  chan->rec.i2c_id, args.ucReplyStatus);
+			  chan->rec.i2c_id, args.v1.ucReplyStatus);
 		return false;
 	}
 
-	if (args.ucDataOutLen && read_byte && read_buf_len) {
-		if (read_buf_len < args.ucDataOutLen) {
+	if (args.v1.ucDataOutLen && read_byte && read_buf_len) {
+		if (read_buf_len < args.v1.ucDataOutLen) {
 			DRM_ERROR("Buffer to small for return answer %d %d\n",
-				  read_buf_len, args.ucDataOutLen);
+				  read_buf_len, args.v1.ucDataOutLen);
 			return false;
 		}
 		{
-			int len = min(read_buf_len, args.ucDataOutLen);
+			int len = min(read_buf_len, args.v1.ucDataOutLen);
 			memcpy(read_byte, base + 16, len);
 		}
 	}
@@ -622,12 +628,19 @@ void dp_link_train(struct drm_encoder *encoder,
 	dp_set_link_bw_lanes(radeon_connector, link_configuration);
 	/* disable downspread on the sink */
 	dp_set_downspread(radeon_connector, 0);
-	/* start training on the source */
-	radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_START,
-				  dig_connector->dp_clock, enc_id, 0);
-	/* set training pattern 1 on the source */
-	radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_PATTERN_SEL,
-				  dig_connector->dp_clock, enc_id, 0);
+	if (ASIC_IS_DCE4(rdev)) {
+		/* start training on the source */
+		atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_LINK_TRAINING_START);
+		/* set training pattern 1 on the source */
+		atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN1);
+	} else {
+		/* start training on the source */
+		radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_START,
+					  dig_connector->dp_clock, enc_id, 0);
+		/* set training pattern 1 on the source */
+		radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_PATTERN_SEL,
+					  dig_connector->dp_clock, enc_id, 0);
+	}
 
 	/* set initial vs/emph */
 	memset(train_set, 0, 4);
@@ -687,8 +700,11 @@ void dp_link_train(struct drm_encoder *encoder,
 	/* set training pattern 2 on the sink */
 	dp_set_training(radeon_connector, DP_TRAINING_PATTERN_2);
 	/* set training pattern 2 on the source */
-	radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_PATTERN_SEL,
-				  dig_connector->dp_clock, enc_id, 1);
+	if (ASIC_IS_DCE4(rdev))
+		atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN2);
+	else
+		radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_PATTERN_SEL,
+					  dig_connector->dp_clock, enc_id, 1);
 
 	/* channel equalization loop */
 	tries = 0;
@@ -725,7 +741,11 @@ void dp_link_train(struct drm_encoder *encoder,
 			  >> DP_TRAIN_PRE_EMPHASIS_SHIFT);
 
 	/* disable the training pattern on the sink */
-	dp_set_training(radeon_connector, DP_TRAINING_PATTERN_DISABLE);
+	if (ASIC_IS_DCE4(rdev))
+		atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_LINK_TRAINING_COMPLETE);
+	else
+		radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_COMPLETE,
+					  dig_connector->dp_clock, enc_id, 0);
 
 	radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_COMPLETE,
 				  dig_connector->dp_clock, enc_id, 0);
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
new file mode 100644
index 000000000000..c2f9752e4ee0
--- /dev/null
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -0,0 +1,794 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
+#include "drmP.h"
+#include "radeon.h"
+#include "radeon_drm.h"
+#include "rv770d.h"
+#include "atom.h"
+#include "avivod.h"
+#include "evergreen_reg.h"
+
+static void evergreen_gpu_init(struct radeon_device *rdev);
+void evergreen_fini(struct radeon_device *rdev);
+
+bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
+{
+	bool connected = false;
+	/* XXX */
+	return connected;
+}
+
+void evergreen_hpd_set_polarity(struct radeon_device *rdev,
+				enum radeon_hpd_id hpd)
+{
+	/* XXX */
+}
+
+void evergreen_hpd_init(struct radeon_device *rdev)
+{
+	/* XXX */
+}
+
+
+void evergreen_bandwidth_update(struct radeon_device *rdev)
+{
+	/* XXX */
+}
+
+void evergreen_hpd_fini(struct radeon_device *rdev)
+{
+	/* XXX */
+}
+
+static int evergreen_mc_wait_for_idle(struct radeon_device *rdev)
+{
+	unsigned i;
+	u32 tmp;
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		/* read MC_STATUS */
+		tmp = RREG32(SRBM_STATUS) & 0x1F00;
+		if (!tmp)
+			return 0;
+		udelay(1);
+	}
+	return -1;
+}
+
+/*
+ * GART
+ */
+int evergreen_pcie_gart_enable(struct radeon_device *rdev)
+{
+	u32 tmp;
+	int r, i;
+
+	if (rdev->gart.table.vram.robj == NULL) {
+		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
+		return -EINVAL;
+	}
+	r = radeon_gart_table_vram_pin(rdev);
+	if (r)
+		return r;
+	/* Setup L2 cache */
+	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
+				ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+				EFFECTIVE_L2_QUEUE_SIZE(7));
+	WREG32(VM_L2_CNTL2, 0);
+	WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
+	/* Setup TLB control */
+	tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
+		SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
+		EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
+	WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
+	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
+	WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
+				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
+	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
+			(u32)(rdev->dummy_page.addr >> 12));
+	for (i = 1; i < 7; i++)
+		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
+
+	r600_pcie_gart_tlb_flush(rdev);
+	rdev->gart.ready = true;
+	return 0;
+}
+
+void evergreen_pcie_gart_disable(struct radeon_device *rdev)
+{
+	u32 tmp;
+	int i, r;
+
+	/* Disable all tables */
+	for (i = 0; i < 7; i++)
+		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
+
+	/* Setup L2 cache */
+	WREG32(VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING |
+				EFFECTIVE_L2_QUEUE_SIZE(7));
+	WREG32(VM_L2_CNTL2, 0);
+	WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
+	/* Setup TLB control */
+	tmp = EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
+	WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
+	if (rdev->gart.table.vram.robj) {
+		r = radeon_bo_reserve(rdev->gart.table.vram.robj, false);
+		if (likely(r == 0)) {
+			radeon_bo_kunmap(rdev->gart.table.vram.robj);
+			radeon_bo_unpin(rdev->gart.table.vram.robj);
+			radeon_bo_unreserve(rdev->gart.table.vram.robj);
+		}
+	}
+}
+
+void evergreen_pcie_gart_fini(struct radeon_device *rdev)
+{
+	evergreen_pcie_gart_disable(rdev);
+	radeon_gart_table_vram_free(rdev);
+	radeon_gart_fini(rdev);
+}
+
+
+void evergreen_agp_enable(struct radeon_device *rdev)
+{
+	u32 tmp;
+	int i;
+
+	/* Setup L2 cache */
+	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
+				ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
+				EFFECTIVE_L2_QUEUE_SIZE(7));
+	WREG32(VM_L2_CNTL2, 0);
+	WREG32(VM_L2_CNTL3, BANK_SELECT(0) | CACHE_UPDATE_MODE(2));
+	/* Setup TLB control */
+	tmp = ENABLE_L1_TLB | ENABLE_L1_FRAGMENT_PROCESSING |
+		SYSTEM_ACCESS_MODE_NOT_IN_SYS |
+		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU |
+		EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5);
+	WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
+	WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
+	for (i = 0; i < 7; i++)
+		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
+}
+
+static void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save)
+{
+	save->vga_control[0] = RREG32(D1VGA_CONTROL);
+	save->vga_control[1] = RREG32(D2VGA_CONTROL);
+	save->vga_control[2] = RREG32(EVERGREEN_D3VGA_CONTROL);
+	save->vga_control[3] = RREG32(EVERGREEN_D4VGA_CONTROL);
+	save->vga_control[4] = RREG32(EVERGREEN_D5VGA_CONTROL);
+	save->vga_control[5] = RREG32(EVERGREEN_D6VGA_CONTROL);
+	save->vga_render_control = RREG32(VGA_RENDER_CONTROL);
+	save->vga_hdp_control = RREG32(VGA_HDP_CONTROL);
+	save->crtc_control[0] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET);
+	save->crtc_control[1] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET);
+	save->crtc_control[2] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET);
+	save->crtc_control[3] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET);
+	save->crtc_control[4] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET);
+	save->crtc_control[5] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET);
+
+	/* Stop all video */
+	WREG32(VGA_RENDER_CONTROL, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
+
+	WREG32(D1VGA_CONTROL, 0);
+	WREG32(D2VGA_CONTROL, 0);
+	WREG32(EVERGREEN_D3VGA_CONTROL, 0);
+	WREG32(EVERGREEN_D4VGA_CONTROL, 0);
+	WREG32(EVERGREEN_D5VGA_CONTROL, 0);
+	WREG32(EVERGREEN_D6VGA_CONTROL, 0);
+}
+
+static void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save)
+{
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC0_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC0_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC0_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC0_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC1_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC1_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC1_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC1_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC2_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC2_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC2_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC2_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC3_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC3_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC3_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC3_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC4_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC4_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC4_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC4_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC5_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + EVERGREEN_CRTC5_REGISTER_OFFSET,
+	       upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + EVERGREEN_CRTC5_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + EVERGREEN_CRTC5_REGISTER_OFFSET,
+	       (u32)rdev->mc.vram_start);
+
+	WREG32(EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(rdev->mc.vram_start));
+	WREG32(EVERGREEN_VGA_MEMORY_BASE_ADDRESS, (u32)rdev->mc.vram_start);
+	/* Unlock host access */
+	WREG32(VGA_HDP_CONTROL, save->vga_hdp_control);
+	mdelay(1);
+	/* Restore video state */
+	WREG32(D1VGA_CONTROL, save->vga_control[0]);
+	WREG32(D2VGA_CONTROL, save->vga_control[1]);
+	WREG32(EVERGREEN_D3VGA_CONTROL, save->vga_control[2]);
+	WREG32(EVERGREEN_D4VGA_CONTROL, save->vga_control[3]);
+	WREG32(EVERGREEN_D5VGA_CONTROL, save->vga_control[4]);
+	WREG32(EVERGREEN_D6VGA_CONTROL, save->vga_control[5]);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 1);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, save->crtc_control[0]);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, save->crtc_control[1]);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, save->crtc_control[2]);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, save->crtc_control[3]);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, save->crtc_control[4]);
+	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, save->crtc_control[5]);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
+	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
+	WREG32(VGA_RENDER_CONTROL, save->vga_render_control);
+}
+
+static void evergreen_mc_program(struct radeon_device *rdev)
+{
+	struct evergreen_mc_save save;
+	u32 tmp;
+	int i, j;
+
+	/* Initialize HDP */
+	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
+		WREG32((0x2c14 + j), 0x00000000);
+		WREG32((0x2c18 + j), 0x00000000);
+		WREG32((0x2c1c + j), 0x00000000);
+		WREG32((0x2c20 + j), 0x00000000);
+		WREG32((0x2c24 + j), 0x00000000);
+	}
+	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
+
+	evergreen_mc_stop(rdev, &save);
+	if (evergreen_mc_wait_for_idle(rdev)) {
+		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+	}
+	/* Lockout access through VGA aperture*/
+	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
+	/* Update configuration */
+	if (rdev->flags & RADEON_IS_AGP) {
+		if (rdev->mc.vram_start < rdev->mc.gtt_start) {
+			/* VRAM before AGP */
+			WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
+				rdev->mc.vram_start >> 12);
+			WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+				rdev->mc.gtt_end >> 12);
+		} else {
+			/* VRAM after AGP */
+			WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
+				rdev->mc.gtt_start >> 12);
+			WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+				rdev->mc.vram_end >> 12);
+		}
+	} else {
+		WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
+			rdev->mc.vram_start >> 12);
+		WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+			rdev->mc.vram_end >> 12);
+	}
+	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, 0);
+	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
+	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
+	WREG32(MC_VM_FB_LOCATION, tmp);
+	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
+	WREG32(HDP_NONSURFACE_INFO, (2 << 7));
+	WREG32(HDP_NONSURFACE_SIZE, (rdev->mc.mc_vram_size - 1) | 0x3FF);
+	if (rdev->flags & RADEON_IS_AGP) {
+		WREG32(MC_VM_AGP_TOP, rdev->mc.gtt_end >> 16);
+		WREG32(MC_VM_AGP_BOT, rdev->mc.gtt_start >> 16);
+		WREG32(MC_VM_AGP_BASE, rdev->mc.agp_base >> 22);
+	} else {
+		WREG32(MC_VM_AGP_BASE, 0);
+		WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
+		WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
+	}
+	if (evergreen_mc_wait_for_idle(rdev)) {
+		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+	}
+	evergreen_mc_resume(rdev, &save);
+	/* we need to own VRAM, so turn off the VGA renderer here
+	 * to stop it overwriting our objects */
+	rv515_vga_render_disable(rdev);
+}
+
+#if 0
+/*
+ * CP.
+ */
+static void evergreen_cp_stop(struct radeon_device *rdev)
+{
+	/* XXX */
+}
+
+
+static int evergreen_cp_load_microcode(struct radeon_device *rdev)
+{
+	/* XXX */
+
+	return 0;
+}
+
+
+/*
+ * Core functions
+ */
+static u32 evergreen_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
+						  u32 num_backends,
+						  u32 backend_disable_mask)
+{
+	u32 backend_map = 0;
+
+	return backend_map;
+}
+#endif
+
+static void evergreen_gpu_init(struct radeon_device *rdev)
+{
+	/* XXX */
+}
+
+int evergreen_mc_init(struct radeon_device *rdev)
+{
+	fixed20_12 a;
+	u32 tmp;
+	int chansize, numchan;
+	int r;
+
+	/* Get VRAM informations */
+	rdev->mc.vram_is_ddr = true;
+	tmp = RREG32(MC_ARB_RAMCFG);
+	if (tmp & CHANSIZE_OVERRIDE) {
+		chansize = 16;
+	} else if (tmp & CHANSIZE_MASK) {
+		chansize = 64;
+	} else {
+		chansize = 32;
+	}
+	tmp = RREG32(MC_SHARED_CHMAP);
+	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
+	case 0:
+	default:
+		numchan = 1;
+		break;
+	case 1:
+		numchan = 2;
+		break;
+	case 2:
+		numchan = 4;
+		break;
+	case 3:
+		numchan = 8;
+		break;
+	}
+	rdev->mc.vram_width = numchan * chansize;
+	/* Could aper size report 0 ? */
+	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
+	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
+	/* Setup GPU memory space */
+	/* size in MB on evergreen */
+	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
+	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
+
+	if (rdev->mc.mc_vram_size > rdev->mc.aper_size)
+		rdev->mc.mc_vram_size = rdev->mc.aper_size;
+
+	if (rdev->mc.real_vram_size > rdev->mc.aper_size)
+		rdev->mc.real_vram_size = rdev->mc.aper_size;
+
+	if (rdev->flags & RADEON_IS_AGP) {
+		r = radeon_agp_init(rdev);
+		if (r)
+			return r;
+		/* gtt_size is setup by radeon_agp_init */
+		rdev->mc.gtt_location = rdev->mc.agp_base;
+		tmp = 0xFFFFFFFFUL - rdev->mc.agp_base - rdev->mc.gtt_size;
+		/* Try to put vram before or after AGP because we
+		 * we want SYSTEM_APERTURE to cover both VRAM and
+		 * AGP so that GPU can catch out of VRAM/AGP access
+		 */
+		if (rdev->mc.gtt_location > rdev->mc.mc_vram_size) {
+			/* Enought place before */
+			rdev->mc.vram_location = rdev->mc.gtt_location -
+							rdev->mc.mc_vram_size;
+		} else if (tmp > rdev->mc.mc_vram_size) {
+			/* Enought place after */
+			rdev->mc.vram_location = rdev->mc.gtt_location +
+							rdev->mc.gtt_size;
+		} else {
+			/* Try to setup VRAM then AGP might not
+			 * not work on some card
+			 */
+			rdev->mc.vram_location = 0x00000000UL;
+			rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+		}
+	} else {
+		rdev->mc.vram_location = 0x00000000UL;
+		rdev->mc.gtt_location = rdev->mc.mc_vram_size;
+		rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+	}
+	rdev->mc.vram_start = rdev->mc.vram_location;
+	rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
+	rdev->mc.gtt_start = rdev->mc.gtt_location;
+	rdev->mc.gtt_end = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
+	/* FIXME: we should enforce default clock in case GPU is not in
+	 * default setup
+	 */
+	a.full = rfixed_const(100);
+	rdev->pm.sclk.full = rfixed_const(rdev->clock.default_sclk);
+	rdev->pm.sclk.full = rfixed_div(rdev->pm.sclk, a);
+	return 0;
+}
+int evergreen_gpu_reset(struct radeon_device *rdev)
+{
+	/* FIXME: implement for evergreen */
+	return 0;
+}
+
+static int evergreen_startup(struct radeon_device *rdev)
+{
+#if 0
+	int r;
+
+	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
+		r = r600_init_microcode(rdev);
+		if (r) {
+			DRM_ERROR("Failed to load firmware!\n");
+			return r;
+		}
+	}
+#endif
+	evergreen_mc_program(rdev);
+#if 0
+	if (rdev->flags & RADEON_IS_AGP) {
+		evergreem_agp_enable(rdev);
+	} else {
+		r = evergreen_pcie_gart_enable(rdev);
+		if (r)
+			return r;
+	}
+#endif
+	evergreen_gpu_init(rdev);
+#if 0
+	if (!rdev->r600_blit.shader_obj) {
+		r = r600_blit_init(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failed blitter (%d).\n", r);
+			return r;
+		}
+	}
+
+	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
+	if (unlikely(r != 0))
+		return r;
+	r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
+			&rdev->r600_blit.shader_gpu_addr);
+	radeon_bo_unreserve(rdev->r600_blit.shader_obj);
+	if (r) {
+		DRM_ERROR("failed to pin blit object %d\n", r);
+		return r;
+	}
+
+	/* Enable IRQ */
+	r = r600_irq_init(rdev);
+	if (r) {
+		DRM_ERROR("radeon: IH init failed (%d).\n", r);
+		radeon_irq_kms_fini(rdev);
+		return r;
+	}
+	r600_irq_set(rdev);
+
+	r = radeon_ring_init(rdev, rdev->cp.ring_size);
+	if (r)
+		return r;
+	r = evergreen_cp_load_microcode(rdev);
+	if (r)
+		return r;
+	r = r600_cp_resume(rdev);
+	if (r)
+		return r;
+	/* write back buffer are not vital so don't worry about failure */
+	r600_wb_enable(rdev);
+#endif
+	return 0;
+}
+
+int evergreen_resume(struct radeon_device *rdev)
+{
+	int r;
+
+	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
+	 * posting will perform necessary task to bring back GPU into good
+	 * shape.
+	 */
+	/* post card */
+	atom_asic_init(rdev->mode_info.atom_context);
+	/* Initialize clocks */
+	r = radeon_clocks_init(rdev);
+	if (r) {
+		return r;
+	}
+
+	r = evergreen_startup(rdev);
+	if (r) {
+		DRM_ERROR("r600 startup failed on resume\n");
+		return r;
+	}
+#if 0
+	r = r600_ib_test(rdev);
+	if (r) {
+		DRM_ERROR("radeon: failled testing IB (%d).\n", r);
+		return r;
+	}
+#endif
+	return r;
+
+}
+
+int evergreen_suspend(struct radeon_device *rdev)
+{
+#if 0
+	int r;
+
+	/* FIXME: we should wait for ring to be empty */
+	r700_cp_stop(rdev);
+	rdev->cp.ready = false;
+	r600_wb_disable(rdev);
+	evergreen_pcie_gart_disable(rdev);
+	/* unpin shaders bo */
+	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
+	if (likely(r == 0)) {
+		radeon_bo_unpin(rdev->r600_blit.shader_obj);
+		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
+	}
+#endif
+	return 0;
+}
+
+static bool evergreen_card_posted(struct radeon_device *rdev)
+{
+	u32 reg;
+
+	/* first check CRTCs */
+	reg = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET) |
+		RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET) |
+		RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET) |
+		RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET) |
+		RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET) |
+		RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET);
+	if (reg & EVERGREEN_CRTC_MASTER_EN)
+		return true;
+
+	/* then check MEM_SIZE, in case the crtcs are off */
+	if (RREG32(CONFIG_MEMSIZE))
+		return true;
+
+	return false;
+}
+
+/* Plan is to move initialization in that function and use
+ * helper function so that radeon_device_init pretty much
+ * do nothing more than calling asic specific function. This
+ * should also allow to remove a bunch of callback function
+ * like vram_info.
+ */
+int evergreen_init(struct radeon_device *rdev)
+{
+	int r;
+
+	r = radeon_dummy_page_init(rdev);
+	if (r)
+		return r;
+	/* This don't do much */
+	r = radeon_gem_init(rdev);
+	if (r)
+		return r;
+	/* Read BIOS */
+	if (!radeon_get_bios(rdev)) {
+		if (ASIC_IS_AVIVO(rdev))
+			return -EINVAL;
+	}
+	/* Must be an ATOMBIOS */
+	if (!rdev->is_atom_bios) {
+		dev_err(rdev->dev, "Expecting atombios for R600 GPU\n");
+		return -EINVAL;
+	}
+	r = radeon_atombios_init(rdev);
+	if (r)
+		return r;
+	/* Post card if necessary */
+	if (!evergreen_card_posted(rdev)) {
+		if (!rdev->bios) {
+			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
+			return -EINVAL;
+		}
+		DRM_INFO("GPU not posted. posting now...\n");
+		atom_asic_init(rdev->mode_info.atom_context);
+	}
+	/* Initialize scratch registers */
+	r600_scratch_init(rdev);
+	/* Initialize surface registers */
+	radeon_surface_init(rdev);
+	/* Initialize clocks */
+	radeon_get_clock_info(rdev->ddev);
+	r = radeon_clocks_init(rdev);
+	if (r)
+		return r;
+	/* Initialize power management */
+	radeon_pm_init(rdev);
+	/* Fence driver */
+	r = radeon_fence_driver_init(rdev);
+	if (r)
+		return r;
+	r = evergreen_mc_init(rdev);
+	if (r)
+		return r;
+	/* Memory manager */
+	r = radeon_bo_init(rdev);
+	if (r)
+		return r;
+#if 0
+	r = radeon_irq_kms_init(rdev);
+	if (r)
+		return r;
+
+	rdev->cp.ring_obj = NULL;
+	r600_ring_init(rdev, 1024 * 1024);
+
+	rdev->ih.ring_obj = NULL;
+	r600_ih_ring_init(rdev, 64 * 1024);
+
+	r = r600_pcie_gart_init(rdev);
+	if (r)
+		return r;
+#endif
+	rdev->accel_working = false;
+	r = evergreen_startup(rdev);
+	if (r) {
+		evergreen_suspend(rdev);
+		/*r600_wb_fini(rdev);*/
+		/*radeon_ring_fini(rdev);*/
+		/*evergreen_pcie_gart_fini(rdev);*/
+		rdev->accel_working = false;
+	}
+	if (rdev->accel_working) {
+		r = radeon_ib_pool_init(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failed initializing IB pool (%d).\n", r);
+			rdev->accel_working = false;
+		}
+		r = r600_ib_test(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failed testing IB (%d).\n", r);
+			rdev->accel_working = false;
+		}
+	}
+	return 0;
+}
+
+void evergreen_fini(struct radeon_device *rdev)
+{
+	evergreen_suspend(rdev);
+#if 0
+	r600_blit_fini(rdev);
+	r600_irq_fini(rdev);
+	radeon_irq_kms_fini(rdev);
+	radeon_ring_fini(rdev);
+	r600_wb_fini(rdev);
+	evergreen_pcie_gart_fini(rdev);
+#endif
+	radeon_gem_fini(rdev);
+	radeon_fence_driver_fini(rdev);
+	radeon_clocks_fini(rdev);
+	radeon_agp_fini(rdev);
+	radeon_bo_fini(rdev);
+	radeon_atombios_fini(rdev);
+	kfree(rdev->bios);
+	rdev->bios = NULL;
+	radeon_dummy_page_fini(rdev);
+}
diff --git a/drivers/gpu/drm/radeon/evergreen_reg.h b/drivers/gpu/drm/radeon/evergreen_reg.h
new file mode 100644
index 000000000000..f7c7c9643433
--- /dev/null
+++ b/drivers/gpu/drm/radeon/evergreen_reg.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#ifndef __EVERGREEN_REG_H__
+#define __EVERGREEN_REG_H__
+
+/* evergreen */
+#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS               0x310
+#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH          0x324
+#define EVERGREEN_D3VGA_CONTROL                         0x3e0
+#define EVERGREEN_D4VGA_CONTROL                         0x3e4
+#define EVERGREEN_D5VGA_CONTROL                         0x3e8
+#define EVERGREEN_D6VGA_CONTROL                         0x3ec
+
+#define EVERGREEN_P1PLL_SS_CNTL                         0x414
+#define EVERGREEN_P2PLL_SS_CNTL                         0x454
+#       define EVERGREEN_PxPLL_SS_EN                    (1 << 12)
+/* GRPH blocks at 0x6800, 0x7400, 0x10000, 0x10c00, 0x11800, 0x12400 */
+#define EVERGREEN_GRPH_ENABLE                           0x6800
+#define EVERGREEN_GRPH_CONTROL                          0x6804
+#       define EVERGREEN_GRPH_DEPTH(x)                  (((x) & 0x3) << 0)
+#       define EVERGREEN_GRPH_DEPTH_8BPP                0
+#       define EVERGREEN_GRPH_DEPTH_16BPP               1
+#       define EVERGREEN_GRPH_DEPTH_32BPP               2
+#       define EVERGREEN_GRPH_FORMAT(x)                 (((x) & 0x7) << 8)
+/* 8 BPP */
+#       define EVERGREEN_GRPH_FORMAT_INDEXED            0
+/* 16 BPP */
+#       define EVERGREEN_GRPH_FORMAT_ARGB1555           0
+#       define EVERGREEN_GRPH_FORMAT_ARGB565            1
+#       define EVERGREEN_GRPH_FORMAT_ARGB4444           2
+#       define EVERGREEN_GRPH_FORMAT_AI88               3
+#       define EVERGREEN_GRPH_FORMAT_MONO16             4
+#       define EVERGREEN_GRPH_FORMAT_BGRA5551           5
+/* 32 BPP */
+#       define EVERGREEN_GRPH_FORMAT_ARGB8888           0
+#       define EVERGREEN_GRPH_FORMAT_ARGB2101010        1
+#       define EVERGREEN_GRPH_FORMAT_32BPP_DIG          2
+#       define EVERGREEN_GRPH_FORMAT_8B_ARGB2101010     3
+#       define EVERGREEN_GRPH_FORMAT_BGRA1010102        4
+#       define EVERGREEN_GRPH_FORMAT_8B_BGRA1010102     5
+#       define EVERGREEN_GRPH_FORMAT_RGB111110          6
+#       define EVERGREEN_GRPH_FORMAT_BGR101111          7
+#define EVERGREEN_GRPH_SWAP_CONTROL                     0x680c
+#       define EVERGREEN_GRPH_ENDIAN_SWAP(x)            (((x) & 0x3) << 0)
+#       define EVERGREEN_GRPH_ENDIAN_NONE               0
+#       define EVERGREEN_GRPH_ENDIAN_8IN16              1
+#       define EVERGREEN_GRPH_ENDIAN_8IN32              2
+#       define EVERGREEN_GRPH_ENDIAN_8IN64              3
+#       define EVERGREEN_GRPH_RED_CROSSBAR(x)           (((x) & 0x3) << 4)
+#       define EVERGREEN_GRPH_RED_SEL_R                 0
+#       define EVERGREEN_GRPH_RED_SEL_G                 1
+#       define EVERGREEN_GRPH_RED_SEL_B                 2
+#       define EVERGREEN_GRPH_RED_SEL_A                 3
+#       define EVERGREEN_GRPH_GREEN_CROSSBAR(x)         (((x) & 0x3) << 6)
+#       define EVERGREEN_GRPH_GREEN_SEL_G               0
+#       define EVERGREEN_GRPH_GREEN_SEL_B               1
+#       define EVERGREEN_GRPH_GREEN_SEL_A               2
+#       define EVERGREEN_GRPH_GREEN_SEL_R               3
+#       define EVERGREEN_GRPH_BLUE_CROSSBAR(x)          (((x) & 0x3) << 8)
+#       define EVERGREEN_GRPH_BLUE_SEL_B                0
+#       define EVERGREEN_GRPH_BLUE_SEL_A                1
+#       define EVERGREEN_GRPH_BLUE_SEL_R                2
+#       define EVERGREEN_GRPH_BLUE_SEL_G                3
+#       define EVERGREEN_GRPH_ALPHA_CROSSBAR(x)         (((x) & 0x3) << 10)
+#       define EVERGREEN_GRPH_ALPHA_SEL_A               0
+#       define EVERGREEN_GRPH_ALPHA_SEL_R               1
+#       define EVERGREEN_GRPH_ALPHA_SEL_G               2
+#       define EVERGREEN_GRPH_ALPHA_SEL_B               3
+#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS          0x6810
+#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS        0x6814
+#       define EVERGREEN_GRPH_DFQ_ENABLE                (1 << 0)
+#       define EVERGREEN_GRPH_SURFACE_ADDRESS_MASK      0xffffff00
+#define EVERGREEN_GRPH_PITCH                            0x6818
+#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH     0x681c
+#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH   0x6820
+#define EVERGREEN_GRPH_SURFACE_OFFSET_X                 0x6824
+#define EVERGREEN_GRPH_SURFACE_OFFSET_Y                 0x6828
+#define EVERGREEN_GRPH_X_START                          0x682c
+#define EVERGREEN_GRPH_Y_START                          0x6830
+#define EVERGREEN_GRPH_X_END                            0x6834
+#define EVERGREEN_GRPH_Y_END                            0x6838
+
+/* CUR blocks at 0x6998, 0x7598, 0x10198, 0x10d98, 0x11998, 0x12598 */
+#define EVERGREEN_CUR_CONTROL                           0x6998
+#       define EVERGREEN_CURSOR_EN                      (1 << 0)
+#       define EVERGREEN_CURSOR_MODE(x)                 (((x) & 0x3) << 8)
+#       define EVERGREEN_CURSOR_MONO                    0
+#       define EVERGREEN_CURSOR_24_1                    1
+#       define EVERGREEN_CURSOR_24_8_PRE_MULT           2
+#       define EVERGREEN_CURSOR_24_8_UNPRE_MULT         3
+#       define EVERGREEN_CURSOR_2X_MAGNIFY              (1 << 16)
+#       define EVERGREEN_CURSOR_FORCE_MC_ON             (1 << 20)
+#       define EVERGREEN_CURSOR_URGENT_CONTROL(x)       (((x) & 0x7) << 24)
+#       define EVERGREEN_CURSOR_URGENT_ALWAYS           0
+#       define EVERGREEN_CURSOR_URGENT_1_8              1
+#       define EVERGREEN_CURSOR_URGENT_1_4              2
+#       define EVERGREEN_CURSOR_URGENT_3_8              3
+#       define EVERGREEN_CURSOR_URGENT_1_2              4
+#define EVERGREEN_CUR_SURFACE_ADDRESS                   0x699c
+#       define EVERGREEN_CUR_SURFACE_ADDRESS_MASK       0xfffff000
+#define EVERGREEN_CUR_SIZE                              0x69a0
+#define EVERGREEN_CUR_SURFACE_ADDRESS_HIGH              0x69a4
+#define EVERGREEN_CUR_POSITION                          0x69a8
+#define EVERGREEN_CUR_HOT_SPOT                          0x69ac
+#define EVERGREEN_CUR_COLOR1                            0x69b0
+#define EVERGREEN_CUR_COLOR2                            0x69b4
+#define EVERGREEN_CUR_UPDATE                            0x69b8
+#       define EVERGREEN_CURSOR_UPDATE_PENDING          (1 << 0)
+#       define EVERGREEN_CURSOR_UPDATE_TAKEN            (1 << 1)
+#       define EVERGREEN_CURSOR_UPDATE_LOCK             (1 << 16)
+#       define EVERGREEN_CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24)
+
+/* LUT blocks at 0x69e0, 0x75e0, 0x101e0, 0x10de0, 0x119e0, 0x125e0 */
+#define EVERGREEN_DC_LUT_RW_MODE                        0x69e0
+#define EVERGREEN_DC_LUT_RW_INDEX                       0x69e4
+#define EVERGREEN_DC_LUT_SEQ_COLOR                      0x69e8
+#define EVERGREEN_DC_LUT_PWL_DATA                       0x69ec
+#define EVERGREEN_DC_LUT_30_COLOR                       0x69f0
+#define EVERGREEN_DC_LUT_VGA_ACCESS_ENABLE              0x69f4
+#define EVERGREEN_DC_LUT_WRITE_EN_MASK                  0x69f8
+#define EVERGREEN_DC_LUT_AUTOFILL                       0x69fc
+#define EVERGREEN_DC_LUT_CONTROL                        0x6a00
+#define EVERGREEN_DC_LUT_BLACK_OFFSET_BLUE              0x6a04
+#define EVERGREEN_DC_LUT_BLACK_OFFSET_GREEN             0x6a08
+#define EVERGREEN_DC_LUT_BLACK_OFFSET_RED               0x6a0c
+#define EVERGREEN_DC_LUT_WHITE_OFFSET_BLUE              0x6a10
+#define EVERGREEN_DC_LUT_WHITE_OFFSET_GREEN             0x6a14
+#define EVERGREEN_DC_LUT_WHITE_OFFSET_RED               0x6a18
+
+#define EVERGREEN_DATA_FORMAT                           0x6b00
+#       define EVERGREEN_INTERLEAVE_EN                  (1 << 0)
+#define EVERGREEN_DESKTOP_HEIGHT                        0x6b04
+
+#define EVERGREEN_VIEWPORT_START                        0x6d70
+#define EVERGREEN_VIEWPORT_SIZE                         0x6d74
+
+/* display controller offsets used for crtc/cur/lut/grph/viewport/etc. */
+#define EVERGREEN_CRTC0_REGISTER_OFFSET                 (0x6df0 - 0x6df0)
+#define EVERGREEN_CRTC1_REGISTER_OFFSET                 (0x79f0 - 0x6df0)
+#define EVERGREEN_CRTC2_REGISTER_OFFSET                 (0x105f0 - 0x6df0)
+#define EVERGREEN_CRTC3_REGISTER_OFFSET                 (0x111f0 - 0x6df0)
+#define EVERGREEN_CRTC4_REGISTER_OFFSET                 (0x11df0 - 0x6df0)
+#define EVERGREEN_CRTC5_REGISTER_OFFSET                 (0x129f0 - 0x6df0)
+
+/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */
+#define EVERGREEN_CRTC_CONTROL                          0x6e70
+#       define EVERGREEN_CRTC_MASTER_EN                 (1 << 0)
+#define EVERGREEN_CRTC_UPDATE_LOCK                      0x6ed4
+
+#define EVERGREEN_DC_GPIO_HPD_MASK                      0x64b0
+#define EVERGREEN_DC_GPIO_HPD_A                         0x64b4
+#define EVERGREEN_DC_GPIO_HPD_EN                        0x64b8
+#define EVERGREEN_DC_GPIO_HPD_Y                         0x64bc
+
+#endif
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index b519d7db2dcb..a7a96a240844 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -138,11 +138,14 @@ void radeon_dummy_page_fini(struct radeon_device *rdev);
 struct radeon_clock {
 	struct radeon_pll p1pll;
 	struct radeon_pll p2pll;
+	struct radeon_pll dcpll;
 	struct radeon_pll spll;
 	struct radeon_pll mpll;
 	/* 10 Khz units */
 	uint32_t default_mclk;
 	uint32_t default_sclk;
+	uint32_t default_dispclk;
+	uint32_t dp_extclk;
 };
 
 /*
@@ -1062,7 +1065,7 @@ void r100_pll_errata_after_index(struct radeon_device *rdev);
 #define ASIC_IS_AVIVO(rdev) ((rdev->family >= CHIP_RS600))
 #define ASIC_IS_DCE3(rdev) ((rdev->family >= CHIP_RV620))
 #define ASIC_IS_DCE32(rdev) ((rdev->family >= CHIP_RV730))
-
+#define ASIC_IS_DCE4(rdev) ((rdev->family >= CHIP_CEDAR))
 
 /*
  * BIOS helpers.
@@ -1296,6 +1299,14 @@ extern void r600_hdmi_update_audio_settings(struct drm_encoder *encoder,
 					    uint8_t status_bits,
 					    uint8_t category_code);
 
+/* evergreen */
+struct evergreen_mc_save {
+	u32 vga_control[6];
+	u32 vga_render_control;
+	u32 vga_hdp_control;
+	u32 crtc_control[6];
+};
+
 #include "radeon_object.h"
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 3f3c7a2169f1..4b0cb67a379a 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -606,4 +606,54 @@ static struct radeon_asic rv770_asic = {
 	.ioctl_wait_idle = r600_ioctl_wait_idle,
 };
 
+/*
+ * evergreen
+ */
+int evergreen_init(struct radeon_device *rdev);
+void evergreen_fini(struct radeon_device *rdev);
+int evergreen_suspend(struct radeon_device *rdev);
+int evergreen_resume(struct radeon_device *rdev);
+int evergreen_gpu_reset(struct radeon_device *rdev);
+void evergreen_bandwidth_update(struct radeon_device *rdev);
+void evergreen_hpd_init(struct radeon_device *rdev);
+void evergreen_hpd_fini(struct radeon_device *rdev);
+bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
+void evergreen_hpd_set_polarity(struct radeon_device *rdev,
+				enum radeon_hpd_id hpd);
+
+static struct radeon_asic evergreen_asic = {
+	.init = &evergreen_init,
+	.fini = &evergreen_fini,
+	.suspend = &evergreen_suspend,
+	.resume = &evergreen_resume,
+	.cp_commit = NULL,
+	.gpu_reset = &evergreen_gpu_reset,
+	.vga_set_state = &r600_vga_set_state,
+	.gart_tlb_flush = &r600_pcie_gart_tlb_flush,
+	.gart_set_page = &rs600_gart_set_page,
+	.ring_test = NULL,
+	.ring_ib_execute = NULL,
+	.irq_set = NULL,
+	.irq_process = NULL,
+	.get_vblank_counter = NULL,
+	.fence_ring_emit = NULL,
+	.cs_parse = NULL,
+	.copy_blit = NULL,
+	.copy_dma = NULL,
+	.copy = NULL,
+	.get_engine_clock = &radeon_atom_get_engine_clock,
+	.set_engine_clock = &radeon_atom_set_engine_clock,
+	.get_memory_clock = &radeon_atom_get_memory_clock,
+	.set_memory_clock = &radeon_atom_set_memory_clock,
+	.set_pcie_lanes = NULL,
+	.set_clock_gating = NULL,
+	.set_surface_reg = r600_set_surface_reg,
+	.clear_surface_reg = r600_clear_surface_reg,
+	.bandwidth_update = &evergreen_bandwidth_update,
+	.hpd_init = &evergreen_hpd_init,
+	.hpd_fini = &evergreen_hpd_fini,
+	.hpd_sense = &evergreen_hpd_sense,
+	.hpd_set_polarity = &evergreen_hpd_set_polarity,
+};
+
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index c3198453528f..4f7dbce9883a 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -159,8 +159,15 @@ static struct radeon_hpd radeon_atom_get_hpd_info_from_gpio(struct radeon_device
 							    struct radeon_gpio_rec *gpio)
 {
 	struct radeon_hpd hpd;
+	u32 reg;
+
+	if (ASIC_IS_DCE4(rdev))
+		reg = EVERGREEN_DC_GPIO_HPD_A;
+	else
+		reg = AVIVO_DC_GPIO_HPD_A;
+
 	hpd.gpio = *gpio;
-	if (gpio->reg == AVIVO_DC_GPIO_HPD_A) {
+	if (gpio->reg == reg) {
 		switch(gpio->mask) {
 		case (1 << 0):
 			hpd.hpd = RADEON_HPD_1;
@@ -556,6 +563,9 @@ bool radeon_get_atom_connector_info_from_object_table(struct drm_device *dev)
 				ddc_bus.valid = false;
 			}
 
+			/* needed for aux chan transactions */
+			ddc_bus.hpd_id = hpd.hpd ? (hpd.hpd - 1) : 0;
+
 			conn_id = le16_to_cpu(path->usConnObjectId);
 
 			if (!radeon_atom_apply_quirks
@@ -820,6 +830,7 @@ union firmware_info {
 	ATOM_FIRMWARE_INFO_V1_2 info_12;
 	ATOM_FIRMWARE_INFO_V1_3 info_13;
 	ATOM_FIRMWARE_INFO_V1_4 info_14;
+	ATOM_FIRMWARE_INFO_V2_1 info_21;
 };
 
 bool radeon_atom_get_clock_info(struct drm_device *dev)
@@ -831,6 +842,7 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
 	uint8_t frev, crev;
 	struct radeon_pll *p1pll = &rdev->clock.p1pll;
 	struct radeon_pll *p2pll = &rdev->clock.p2pll;
+	struct radeon_pll *dcpll = &rdev->clock.dcpll;
 	struct radeon_pll *spll = &rdev->clock.spll;
 	struct radeon_pll *mpll = &rdev->clock.mpll;
 	uint16_t data_offset;
@@ -933,8 +945,19 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
 		rdev->clock.default_mclk =
 		    le32_to_cpu(firmware_info->info.ulDefaultMemoryClock);
 
+		if (ASIC_IS_DCE4(rdev)) {
+			rdev->clock.default_dispclk =
+				le32_to_cpu(firmware_info->info_21.ulDefaultDispEngineClkFreq);
+			if (rdev->clock.default_dispclk == 0)
+				rdev->clock.default_dispclk = 60000; /* 600 Mhz */
+			rdev->clock.dp_extclk =
+				le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);
+		}
+		*dcpll = *p1pll;
+
 		return true;
 	}
+
 	return false;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_clocks.c b/drivers/gpu/drm/radeon/radeon_clocks.c
index 3ec94a0d3109..f64936cc4dd9 100644
--- a/drivers/gpu/drm/radeon/radeon_clocks.c
+++ b/drivers/gpu/drm/radeon/radeon_clocks.c
@@ -96,6 +96,7 @@ void radeon_get_clock_info(struct drm_device *dev)
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_pll *p1pll = &rdev->clock.p1pll;
 	struct radeon_pll *p2pll = &rdev->clock.p2pll;
+	struct radeon_pll *dcpll = &rdev->clock.dcpll;
 	struct radeon_pll *spll = &rdev->clock.spll;
 	struct radeon_pll *mpll = &rdev->clock.mpll;
 	int ret;
@@ -204,6 +205,17 @@ void radeon_get_clock_info(struct drm_device *dev)
 		p2pll->max_frac_feedback_div = 0;
 	}
 
+	/* dcpll is DCE4 only */
+	dcpll->min_post_div = 2;
+	dcpll->max_post_div = 0x7f;
+	dcpll->min_frac_feedback_div = 0;
+	dcpll->max_frac_feedback_div = 9;
+	dcpll->min_ref_div = 2;
+	dcpll->max_ref_div = 0x3ff;
+	dcpll->min_feedback_div = 4;
+	dcpll->max_feedback_div = 0xfff;
+	dcpll->best_vco = 0;
+
 	p1pll->min_ref_div = 2;
 	p1pll->max_ref_div = 0x3ff;
 	p1pll->min_feedback_div = 4;
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index a4d40de50434..26fb42450cb2 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -600,6 +600,7 @@ static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rde
 	}
 	i2c.mm_i2c = false;
 	i2c.i2c_id = 0;
+	i2c.hpd_id = 0;
 
 	if (ddc_line)
 		i2c.valid = true;
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index 28772a37009c..9514f3275357 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -36,7 +36,14 @@ static void radeon_lock_cursor(struct drm_crtc *crtc, bool lock)
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	uint32_t cur_lock;
 
-	if (ASIC_IS_AVIVO(rdev)) {
+	if (ASIC_IS_DCE4(rdev)) {
+		cur_lock = RREG32(EVERGREEN_CUR_UPDATE + radeon_crtc->crtc_offset);
+		if (lock)
+			cur_lock |= EVERGREEN_CURSOR_UPDATE_LOCK;
+		else
+			cur_lock &= ~EVERGREEN_CURSOR_UPDATE_LOCK;
+		WREG32(EVERGREEN_CUR_UPDATE + radeon_crtc->crtc_offset, cur_lock);
+	} else if (ASIC_IS_AVIVO(rdev)) {
 		cur_lock = RREG32(AVIVO_D1CUR_UPDATE + radeon_crtc->crtc_offset);
 		if (lock)
 			cur_lock |= AVIVO_D1CURSOR_UPDATE_LOCK;
@@ -58,7 +65,10 @@ static void radeon_hide_cursor(struct drm_crtc *crtc)
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	struct radeon_device *rdev = crtc->dev->dev_private;
 
-	if (ASIC_IS_AVIVO(rdev)) {
+	if (ASIC_IS_DCE4(rdev)) {
+		WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset);
+		WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT));
+	} else if (ASIC_IS_AVIVO(rdev)) {
 		WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset);
 		WREG32(RADEON_MM_DATA, (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
 	} else {
@@ -81,10 +91,14 @@ static void radeon_show_cursor(struct drm_crtc *crtc)
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	struct radeon_device *rdev = crtc->dev->dev_private;
 
-	if (ASIC_IS_AVIVO(rdev)) {
+	if (ASIC_IS_DCE4(rdev)) {
+		WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset);
+		WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_EN |
+		       EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT));
+	} else if (ASIC_IS_AVIVO(rdev)) {
 		WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset);
 		WREG32(RADEON_MM_DATA, AVIVO_D1CURSOR_EN |
-			     (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
+		       (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
 	} else {
 		switch (radeon_crtc->crtc_id) {
 		case 0:
@@ -109,7 +123,10 @@ static void radeon_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj,
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	struct radeon_device *rdev = crtc->dev->dev_private;
 
-	if (ASIC_IS_AVIVO(rdev)) {
+	if (ASIC_IS_DCE4(rdev)) {
+		WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, 0);
+		WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, gpu_addr);
+	} else if (ASIC_IS_AVIVO(rdev)) {
 		if (rdev->family >= CHIP_RV770) {
 			if (radeon_crtc->crtc_id)
 				WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, 0);
@@ -201,7 +218,15 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc,
 		yorigin = CURSOR_HEIGHT - 1;
 
 	radeon_lock_cursor(crtc, true);
-	if (ASIC_IS_AVIVO(rdev)) {
+	if (ASIC_IS_DCE4(rdev)) {
+		/* XXX: check if evergreen has the same issues as avivo chips */
+		WREG32(EVERGREEN_CUR_POSITION + radeon_crtc->crtc_offset,
+		       ((xorigin ? 0 : x) << 16) |
+		       (yorigin ? 0 : y));
+		WREG32(EVERGREEN_CUR_HOT_SPOT + radeon_crtc->crtc_offset, (xorigin << 16) | yorigin);
+		WREG32(EVERGREEN_CUR_SIZE + radeon_crtc->crtc_offset,
+		       ((radeon_crtc->cursor_width - 1) << 16) | (radeon_crtc->cursor_height - 1));
+	} else if (ASIC_IS_AVIVO(rdev)) {
 		int w = radeon_crtc->cursor_width;
 		int i = 0;
 		struct drm_crtc *crtc_p;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index c90f8d370266..c224c1d944ef 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -182,7 +182,16 @@ bool radeon_card_posted(struct radeon_device *rdev)
 	uint32_t reg;
 
 	/* first check CRTCs */
-	if (ASIC_IS_AVIVO(rdev)) {
+	if (ASIC_IS_DCE4(rdev)) {
+		reg = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET) |
+			RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET) |
+			RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET) |
+			RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET) |
+			RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET) |
+			RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET);
+		if (reg & EVERGREEN_CRTC_MASTER_EN)
+			return true;
+	} else if (ASIC_IS_AVIVO(rdev)) {
 		reg = RREG32(AVIVO_D1CRTC_CONTROL) |
 		      RREG32(AVIVO_D2CRTC_CONTROL);
 		if (reg & AVIVO_CRTC_EN) {
@@ -310,7 +319,7 @@ void radeon_register_accessor_init(struct radeon_device *rdev)
 		rdev->mc_rreg = &rs600_mc_rreg;
 		rdev->mc_wreg = &rs600_mc_wreg;
 	}
-	if (rdev->family >= CHIP_R600) {
+	if ((rdev->family >= CHIP_R600) && (rdev->family <= CHIP_RV740)) {
 		rdev->pciep_rreg = &r600_pciep_rreg;
 		rdev->pciep_wreg = &r600_pciep_wreg;
 	}
@@ -387,6 +396,13 @@ int radeon_asic_init(struct radeon_device *rdev)
 	case CHIP_RV740:
 		rdev->asic = &rv770_asic;
 		break;
+	case CHIP_CEDAR:
+	case CHIP_REDWOOD:
+	case CHIP_JUNIPER:
+	case CHIP_CYPRESS:
+	case CHIP_HEMLOCK:
+		rdev->asic = &evergreen_asic;
+		break;
 	default:
 		/* FIXME: not supported yet */
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index a41ed40c13fb..257827806aee 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -68,6 +68,36 @@ static void avivo_crtc_load_lut(struct drm_crtc *crtc)
 	WREG32(AVIVO_D1GRPH_LUT_SEL + radeon_crtc->crtc_offset, radeon_crtc->crtc_id);
 }
 
+static void evergreen_crtc_load_lut(struct drm_crtc *crtc)
+{
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	int i;
+
+	DRM_DEBUG("%d\n", radeon_crtc->crtc_id);
+	WREG32(EVERGREEN_DC_LUT_CONTROL + radeon_crtc->crtc_offset, 0);
+
+	WREG32(EVERGREEN_DC_LUT_BLACK_OFFSET_BLUE + radeon_crtc->crtc_offset, 0);
+	WREG32(EVERGREEN_DC_LUT_BLACK_OFFSET_GREEN + radeon_crtc->crtc_offset, 0);
+	WREG32(EVERGREEN_DC_LUT_BLACK_OFFSET_RED + radeon_crtc->crtc_offset, 0);
+
+	WREG32(EVERGREEN_DC_LUT_WHITE_OFFSET_BLUE + radeon_crtc->crtc_offset, 0xffff);
+	WREG32(EVERGREEN_DC_LUT_WHITE_OFFSET_GREEN + radeon_crtc->crtc_offset, 0xffff);
+	WREG32(EVERGREEN_DC_LUT_WHITE_OFFSET_RED + radeon_crtc->crtc_offset, 0xffff);
+
+	WREG32(EVERGREEN_DC_LUT_RW_MODE, radeon_crtc->crtc_id);
+	WREG32(EVERGREEN_DC_LUT_WRITE_EN_MASK, 0x00000007);
+
+	WREG32(EVERGREEN_DC_LUT_RW_INDEX, 0);
+	for (i = 0; i < 256; i++) {
+		WREG32(EVERGREEN_DC_LUT_30_COLOR,
+		       (radeon_crtc->lut_r[i] << 20) |
+		       (radeon_crtc->lut_g[i] << 10) |
+		       (radeon_crtc->lut_b[i] << 0));
+	}
+}
+
 static void legacy_crtc_load_lut(struct drm_crtc *crtc)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
@@ -100,7 +130,9 @@ void radeon_crtc_load_lut(struct drm_crtc *crtc)
 	if (!crtc->enabled)
 		return;
 
-	if (ASIC_IS_AVIVO(rdev))
+	if (ASIC_IS_DCE4(rdev))
+		evergreen_crtc_load_lut(crtc);
+	else if (ASIC_IS_AVIVO(rdev))
 		avivo_crtc_load_lut(crtc);
 	else
 		legacy_crtc_load_lut(crtc);
@@ -862,8 +894,12 @@ int radeon_modeset_init(struct radeon_device *rdev)
 
 	if (rdev->flags & RADEON_SINGLE_CRTC)
 		rdev->num_crtc = 1;
-	else
-		rdev->num_crtc = 2;
+	else {
+		if (ASIC_IS_DCE4(rdev))
+			rdev->num_crtc = 6;
+		else
+			rdev->num_crtc = 2;
+	}
 
 	/* allocate crtcs */
 	for (i = 0; i < rdev->num_crtc; i++) {
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c
index f7d6078876c5..bc926ea0a530 100644
--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -53,7 +53,7 @@ static uint32_t radeon_encoder_clones(struct drm_encoder *encoder)
 	/* DVO requires 2x ppll clocks depending on tmds chip */
 	if (radeon_encoder->devices & ATOM_DEVICE_DFP2_SUPPORT)
 		return index_mask;
-	
+
 	count = -1;
 	list_for_each_entry(clone_encoder, &dev->mode_config.encoder_list, head) {
 		struct radeon_encoder *radeon_clone = to_radeon_encoder(clone_encoder);
@@ -671,6 +671,18 @@ atombios_get_encoder_mode(struct drm_encoder *encoder)
  * - 2 DIG encoder blocks.
  * DIG1/2 can drive UNIPHY0/1/2 link A or link B
  *
+ * DCE 4.0
+ * - 3 DIG transmitter blocks UNPHY0/1/2 (links A and B).
+ * Supports up to 6 digital outputs
+ * - 6 DIG encoder blocks.
+ * - DIG to PHY mapping is hardcoded
+ * DIG1 drives UNIPHY0 link A, A+B
+ * DIG2 drives UNIPHY0 link B
+ * DIG3 drives UNIPHY1 link A, A+B
+ * DIG4 drives UNIPHY1 link B
+ * DIG5 drives UNIPHY2 link A, A+B
+ * DIG6 drives UNIPHY2 link B
+ *
  * Routing
  * crtc -> dig encoder -> UNIPHY/LVTMA (1 or 2 links)
  * Examples:
@@ -679,7 +691,14 @@ atombios_get_encoder_mode(struct drm_encoder *encoder)
  * crtc0 -> dig1 -> UNIPHY2 link  A   -> LVDS
  * crtc1 -> dig2 -> UNIPHY1 link  B+A -> TMDS/HDMI
  */
-static void
+
+union dig_encoder_control {
+	DIG_ENCODER_CONTROL_PS_ALLOCATION v1;
+	DIG_ENCODER_CONTROL_PARAMETERS_V2 v2;
+	DIG_ENCODER_CONTROL_PARAMETERS_V3 v3;
+};
+
+void
 atombios_dig_encoder_setup(struct drm_encoder *encoder, int action)
 {
 	struct drm_device *dev = encoder->dev;
@@ -688,7 +707,7 @@ atombios_dig_encoder_setup(struct drm_encoder *encoder, int action)
 	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
 	struct radeon_connector_atom_dig *dig_connector =
 		radeon_get_atom_connector_priv_from_encoder(encoder);
-	DIG_ENCODER_CONTROL_PS_ALLOCATION args;
+	union dig_encoder_control args;
 	int index = 0, num = 0;
 	uint8_t frev, crev;
 
@@ -697,56 +716,53 @@ atombios_dig_encoder_setup(struct drm_encoder *encoder, int action)
 
 	memset(&args, 0, sizeof(args));
 
-	if (dig->dig_encoder)
-		index = GetIndexIntoMasterTable(COMMAND, DIG2EncoderControl);
-	else
-		index = GetIndexIntoMasterTable(COMMAND, DIG1EncoderControl);
+	if (ASIC_IS_DCE4(rdev))
+		index = GetIndexIntoMasterTable(COMMAND, DIGxEncoderControl);
+	else {
+		if (dig->dig_encoder)
+			index = GetIndexIntoMasterTable(COMMAND, DIG2EncoderControl);
+		else
+			index = GetIndexIntoMasterTable(COMMAND, DIG1EncoderControl);
+	}
 	num = dig->dig_encoder + 1;
 
 	atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev);
 
-	args.ucAction = action;
-	args.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
+	args.v1.ucAction = action;
+	args.v1.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
+	args.v1.ucEncoderMode = atombios_get_encoder_mode(encoder);
 
-	if (ASIC_IS_DCE32(rdev)) {
+	if (args.v1.ucEncoderMode == ATOM_ENCODER_MODE_DP) {
+		if (dig_connector->dp_clock == 270000)
+			args.v1.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ;
+		args.v1.ucLaneNum = dig_connector->dp_lane_count;
+	} else if (radeon_encoder->pixel_clock > 165000)
+		args.v1.ucLaneNum = 8;
+	else
+		args.v1.ucLaneNum = 4;
+
+	if (ASIC_IS_DCE4(rdev)) {
+		args.v3.acConfig.ucDigSel = dig->dig_encoder;
+		args.v3.ucBitPerColor = PANEL_8BIT_PER_COLOR;
+	} else {
 		switch (radeon_encoder->encoder_id) {
 		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
-			args.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER1;
+			args.v1.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER1;
 			break;
 		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
-			args.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER2;
+		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+			args.v1.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER2;
 			break;
 		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
-			args.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER3;
-			break;
-		}
-	} else {
-		switch (radeon_encoder->encoder_id) {
-		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
-			args.ucConfig = ATOM_ENCODER_CONFIG_TRANSMITTER1;
-			break;
-		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
-			args.ucConfig = ATOM_ENCODER_CONFIG_TRANSMITTER2;
+			args.v1.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER3;
 			break;
 		}
+		if (dig_connector->linkb)
+			args.v1.ucConfig |= ATOM_ENCODER_CONFIG_LINKB;
+		else
+			args.v1.ucConfig |= ATOM_ENCODER_CONFIG_LINKA;
 	}
 
-	args.ucEncoderMode = atombios_get_encoder_mode(encoder);
-
-	if (args.ucEncoderMode == ATOM_ENCODER_MODE_DP) {
-		if (dig_connector->dp_clock == 270000)
-			args.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ;
-		args.ucLaneNum = dig_connector->dp_lane_count;
-	} else if (radeon_encoder->pixel_clock > 165000)
-		args.ucLaneNum = 8;
-	else
-		args.ucLaneNum = 4;
-
-	if (dig_connector->linkb)
-		args.ucConfig |= ATOM_ENCODER_CONFIG_LINKB;
-	else
-		args.ucConfig |= ATOM_ENCODER_CONFIG_LINKA;
-
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 
 }
@@ -754,6 +770,7 @@ atombios_dig_encoder_setup(struct drm_encoder *encoder, int action)
 union dig_transmitter_control {
 	DIG_TRANSMITTER_CONTROL_PS_ALLOCATION v1;
 	DIG_TRANSMITTER_CONTROL_PARAMETERS_V2 v2;
+	DIG_TRANSMITTER_CONTROL_PARAMETERS_V3 v3;
 };
 
 void
@@ -771,6 +788,7 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t
 	int index = 0, num = 0;
 	uint8_t frev, crev;
 	bool is_dp = false;
+	int pll_id = 0;
 
 	if (!dig || !dig_connector)
 		return;
@@ -783,7 +801,7 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t
 
 	memset(&args, 0, sizeof(args));
 
-	if (ASIC_IS_DCE32(rdev))
+	if (ASIC_IS_DCE32(rdev) || ASIC_IS_DCE4(rdev))
 		index = GetIndexIntoMasterTable(COMMAND, UNIPHYTransmitterControl);
 	else {
 		switch (radeon_encoder->encoder_id) {
@@ -813,7 +831,54 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t
 		else
 			args.v1.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
 	}
-	if (ASIC_IS_DCE32(rdev)) {
+	if (ASIC_IS_DCE4(rdev)) {
+		if (is_dp)
+			args.v3.ucLaneNum = dig_connector->dp_lane_count;
+		else if (radeon_encoder->pixel_clock > 165000)
+			args.v3.ucLaneNum = 8;
+		else
+			args.v3.ucLaneNum = 4;
+
+		if (dig_connector->linkb) {
+			args.v3.acConfig.ucLinkSel = 1;
+			args.v3.acConfig.ucEncoderSel = 1;
+		}
+
+		/* Select the PLL for the PHY
+		 * DP PHY should be clocked from external src if there is
+		 * one.
+		 */
+		if (encoder->crtc) {
+			struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
+			pll_id = radeon_crtc->pll_id;
+		}
+		if (is_dp && rdev->clock.dp_extclk)
+			args.v3.acConfig.ucRefClkSource = 2; /* external src */
+		else
+			args.v3.acConfig.ucRefClkSource = pll_id;
+
+		switch (radeon_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+			args.v3.acConfig.ucTransmitterSel = 0;
+			num = 0;
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+			args.v3.acConfig.ucTransmitterSel = 1;
+			num = 1;
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+			args.v3.acConfig.ucTransmitterSel = 2;
+			num = 2;
+			break;
+		}
+
+		if (is_dp)
+			args.v3.acConfig.fCoherentMode = 1; /* DP requires coherent */
+		else if (radeon_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+			if (dig->coherent_mode)
+				args.v3.acConfig.fCoherentMode = 1;
+		}
+	} else if (ASIC_IS_DCE32(rdev)) {
 		if (dig->dig_encoder == 1)
 			args.v2.acConfig.ucEncoderSel = 1;
 		if (dig_connector->linkb)
@@ -841,7 +906,6 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t
 				args.v2.acConfig.fCoherentMode = 1;
 		}
 	} else {
-
 		args.v1.ucConfig = ATOM_TRANSMITTER_CONFIG_CLKSRC_PPLL;
 
 		if (dig->dig_encoder)
@@ -1102,10 +1166,26 @@ atombios_set_encoder_crtc_source(struct drm_encoder *encoder)
 			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
 			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
 				dig = radeon_encoder->enc_priv;
-				if (dig->dig_encoder)
-					args.v2.ucEncoderID = ASIC_INT_DIG2_ENCODER_ID;
-				else
+				switch (dig->dig_encoder) {
+				case 0:
 					args.v2.ucEncoderID = ASIC_INT_DIG1_ENCODER_ID;
+					break;
+				case 1:
+					args.v2.ucEncoderID = ASIC_INT_DIG2_ENCODER_ID;
+					break;
+				case 2:
+					args.v2.ucEncoderID = ASIC_INT_DIG3_ENCODER_ID;
+					break;
+				case 3:
+					args.v2.ucEncoderID = ASIC_INT_DIG4_ENCODER_ID;
+					break;
+				case 4:
+					args.v2.ucEncoderID = ASIC_INT_DIG5_ENCODER_ID;
+					break;
+				case 5:
+					args.v2.ucEncoderID = ASIC_INT_DIG6_ENCODER_ID;
+					break;
+				}
 				break;
 			case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
 				args.v2.ucEncoderID = ASIC_INT_DVO_ENCODER_ID;
@@ -1162,6 +1242,7 @@ atombios_apply_encoder_quirks(struct drm_encoder *encoder,
 	}
 
 	/* set scaler clears this on some chips */
+	/* XXX check DCE4 */
 	if (!(radeon_encoder->active_device & (ATOM_DEVICE_TV_SUPPORT))) {
 		if (ASIC_IS_AVIVO(rdev) && (mode->flags & DRM_MODE_FLAG_INTERLACE))
 			WREG32(AVIVO_D1MODE_DATA_FORMAT + radeon_crtc->crtc_offset,
@@ -1178,6 +1259,33 @@ static int radeon_atom_pick_dig_encoder(struct drm_encoder *encoder)
 	struct drm_encoder *test_encoder;
 	struct radeon_encoder_atom_dig *dig;
 	uint32_t dig_enc_in_use = 0;
+
+	if (ASIC_IS_DCE4(rdev)) {
+		struct radeon_connector_atom_dig *dig_connector =
+			radeon_get_atom_connector_priv_from_encoder(encoder);
+
+		switch (radeon_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+			if (dig_connector->linkb)
+				return 1;
+			else
+				return 0;
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+			if (dig_connector->linkb)
+				return 3;
+			else
+				return 2;
+			break;
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+			if (dig_connector->linkb)
+				return 5;
+			else
+				return 4;
+			break;
+		}
+	}
+
 	/* on DCE32 and encoder can driver any block so just crtc id */
 	if (ASIC_IS_DCE32(rdev)) {
 		return radeon_crtc->crtc_id;
@@ -1249,15 +1357,26 @@ radeon_atom_encoder_mode_set(struct drm_encoder *encoder,
 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
-		/* disable the encoder and transmitter */
-		atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
-		atombios_dig_encoder_setup(encoder, ATOM_DISABLE);
-
-		/* setup and enable the encoder and transmitter */
-		atombios_dig_encoder_setup(encoder, ATOM_ENABLE);
-		atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_INIT, 0, 0);
-		atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0);
-		atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
+		if (ASIC_IS_DCE4(rdev)) {
+			/* disable the transmitter */
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
+			/* setup and enable the encoder */
+			atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_SETUP);
+
+			/* init and enable the transmitter */
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_INIT, 0, 0);
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
+		} else {
+			/* disable the encoder and transmitter */
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
+			atombios_dig_encoder_setup(encoder, ATOM_DISABLE);
+
+			/* setup and enable the encoder and transmitter */
+			atombios_dig_encoder_setup(encoder, ATOM_ENABLE);
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_INIT, 0, 0);
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0);
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
+		}
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DDI:
 		atombios_ddia_setup(encoder, ATOM_ENABLE);
@@ -1277,7 +1396,9 @@ radeon_atom_encoder_mode_set(struct drm_encoder *encoder,
 	}
 	atombios_apply_encoder_quirks(encoder, adjusted_mode);
 
-	r600_hdmi_setmode(encoder, adjusted_mode);
+	/* XXX */
+	if (!ASIC_IS_DCE4(rdev))
+		r600_hdmi_setmode(encoder, adjusted_mode);
 }
 
 static bool
@@ -1475,10 +1596,18 @@ radeon_add_atom_encoder(struct drm_device *dev, uint32_t encoder_id, uint32_t su
 		return;
 
 	encoder = &radeon_encoder->base;
-	if (rdev->flags & RADEON_SINGLE_CRTC)
+	switch (rdev->num_crtc) {
+	case 1:
 		encoder->possible_crtcs = 0x1;
-	else
+		break;
+	case 2:
+	default:
 		encoder->possible_crtcs = 0x3;
+		break;
+	case 6:
+		encoder->possible_crtcs = 0x3f;
+		break;
+	}
 
 	radeon_encoder->enc_priv = NULL;
 
diff --git a/drivers/gpu/drm/radeon/radeon_family.h b/drivers/gpu/drm/radeon/radeon_family.h
index 797972e344a6..93c7d5d41914 100644
--- a/drivers/gpu/drm/radeon/radeon_family.h
+++ b/drivers/gpu/drm/radeon/radeon_family.h
@@ -75,6 +75,11 @@ enum radeon_family {
 	CHIP_RV730,
 	CHIP_RV710,
 	CHIP_RV740,
+	CHIP_CEDAR,
+	CHIP_REDWOOD,
+	CHIP_JUNIPER,
+	CHIP_CYPRESS,
+	CHIP_HEMLOCK,
 	CHIP_LAST,
 };
 
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index d1e859d1dbf9..8912f2e8e640 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -83,6 +83,8 @@ struct radeon_i2c_bus_rec {
 	bool valid;
 	/* id used by atom */
 	uint8_t i2c_id;
+	/* id used by atom */
+	uint8_t hpd_id;
 	/* can be used with hw i2c engine */
 	bool hw_capable;
 	/* uses multi-media i2c engine */
@@ -207,7 +209,7 @@ struct radeon_mode_info {
 	struct card_info *atom_card_info;
 	enum radeon_connector_table connector_table;
 	bool mode_config_initialized;
-	struct radeon_crtc *crtcs[2];
+	struct radeon_crtc *crtcs[6];
 	/* DVI-I properties */
 	struct drm_property *coherent_mode_property;
 	/* DAC enable load detect */
@@ -252,6 +254,7 @@ struct radeon_crtc {
 	fixed20_12 vsc;
 	fixed20_12 hsc;
 	struct drm_display_mode native_mode;
+	int pll_id;
 };
 
 struct radeon_encoder_primary_dac {
@@ -414,6 +417,7 @@ extern void dp_link_train(struct drm_encoder *encoder,
 			  struct drm_connector *connector);
 extern u8 radeon_dp_getsinktype(struct radeon_connector *radeon_connector);
 extern bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector);
+extern void atombios_dig_encoder_setup(struct drm_encoder *encoder, int action);
 extern void atombios_dig_transmitter_setup(struct drm_encoder *encoder,
 					   int action, uint8_t lane_num,
 					   uint8_t lane_set);
diff --git a/drivers/gpu/drm/radeon/radeon_reg.h b/drivers/gpu/drm/radeon/radeon_reg.h
index b4a06676fff6..5c0dc082d330 100644
--- a/drivers/gpu/drm/radeon/radeon_reg.h
+++ b/drivers/gpu/drm/radeon/radeon_reg.h
@@ -54,7 +54,7 @@
 #include "r300_reg.h"
 #include "r500_reg.h"
 #include "r600_reg.h"
-
+#include "evergreen_reg.h"
 
 #define RADEON_MC_AGP_LOCATION		0x014c
 #define		RADEON_MC_AGP_START_MASK	0x0000FFFF
diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h
index a1367ab6f261..9506f8cb99e0 100644
--- a/drivers/gpu/drm/radeon/rv770d.h
+++ b/drivers/gpu/drm/radeon/rv770d.h
@@ -343,4 +343,6 @@
 
 #define	WAIT_UNTIL					0x8040
 
+#define	SRBM_STATUS				        0x0E50
+
 #endif
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index e6f3b120f51a..403490c7b647 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -141,6 +141,41 @@
 	{0x1002, 0x5e4c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x5e4d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x5e4f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6880, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6888, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6889, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x688A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6898, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6899, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x689c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HEMLOCK|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x689d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HEMLOCK|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x689e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68a0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68a1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68a8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68a9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68b0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68b8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68b9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68be, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68c1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68c8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68c9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68d9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68da, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68de, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68e0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68e1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68e4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68e5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68e8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68e9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68f1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68f8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68f9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x68fe, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CEDAR|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x7100, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R520|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x7101, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R520|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x7102, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R520|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
-- 
cgit v1.2.3


From 3ad2f3fbb961429d2aa627465ae4829758bc7e07 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 3 Feb 2010 08:01:28 +0800
Subject: tree-wide: Assorted spelling fixes

In particular, several occurances of funny versions of 'success',
'unknown', 'therefore', 'acknowledge', 'argument', 'achieve', 'address',
'beginning', 'desirable', 'separate' and 'necessary' are fixed.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Joe Perches <joe@perches.com>
Cc: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/DocBook/mtdnand.tmpl                  |  6 +++---
 Documentation/DocBook/v4l/common.xml                |  2 +-
 Documentation/DocBook/v4l/vidioc-g-parm.xml         |  2 +-
 Documentation/arm/Samsung-S3C24XX/CPUfreq.txt       |  4 ++--
 Documentation/hwmon/abituguru                       |  2 +-
 Documentation/input/rotary-encoder.txt              |  2 +-
 Documentation/networking/skfp.txt                   |  2 +-
 Documentation/s390/kvm.txt                          |  2 +-
 Documentation/scsi/ChangeLog.lpfc                   | 10 +++++-----
 Documentation/trace/ftrace.txt                      |  2 +-
 arch/arm/mach-ep93xx/micro9.c                       |  2 +-
 arch/arm/mach-nomadik/board-nhk8815.c               |  2 +-
 arch/arm/mach-u300/core.c                           |  4 ++--
 arch/arm/mach-u300/include/mach/debug-macro.S       |  2 +-
 arch/arm/plat-s3c/include/plat/gpio-cfg-helpers.h   |  2 +-
 arch/arm/plat-s3c/include/plat/regs-usb-hsotg-phy.h |  2 +-
 arch/arm/plat-s3c24xx/include/plat/cpu-freq-core.h  |  2 +-
 arch/cris/arch-v10/lib/old_checksum.c               |  2 +-
 arch/cris/arch-v32/mm/tlb.c                         |  2 +-
 arch/h8300/include/asm/io.h                         |  2 +-
 arch/ia64/sn/kernel/setup.c                         |  2 +-
 arch/m68k/atari/atakeyb.c                           |  2 +-
 arch/m68k/include/asm/io_no.h                       |  2 +-
 arch/powerpc/boot/dts/kmeter1.dts                   |  2 +-
 arch/s390/include/asm/cio.h                         |  2 +-
 arch/s390/kernel/sclp.S                             |  2 +-
 arch/sparc/kernel/leon_kernel.c                     |  2 +-
 arch/sparc/kernel/perf_event.c                      |  2 +-
 arch/x86/crypto/twofish-i586-asm_32.S               | 10 +++++-----
 arch/x86/crypto/twofish-x86_64-asm_64.S             | 20 ++++++++++----------
 arch/x86/kernel/head_64.S                           |  2 +-
 arch/x86/kernel/pci-calgary_64.c                    |  2 +-
 arch/x86/kernel/tsc.c                               |  2 +-
 arch/xtensa/kernel/entry.S                          |  4 ++--
 block/bsg.c                                         |  2 +-
 drivers/acpi/dock.c                                 |  2 +-
 drivers/ata/libata-sff.c                            |  2 +-
 drivers/ata/pata_acpi.c                             |  2 +-
 drivers/ata/pata_hpt3x3.c                           |  2 +-
 drivers/ata/pata_pcmcia.c                           |  2 +-
 drivers/block/drbd/drbd_int.h                       |  4 ++--
 drivers/block/drbd/drbd_req.h                       |  2 +-
 drivers/char/agp/intel-agp.c                        |  2 +-
 drivers/char/applicom.c                             |  2 +-
 drivers/char/hvc_iseries.c                          |  2 +-
 drivers/char/hw_random/n2-drv.c                     |  2 +-
 drivers/char/ip2/i2hw.h                             |  2 +-
 drivers/char/pty.c                                  |  2 +-
 drivers/char/tty_io.c                               |  2 +-
 drivers/char/vt.c                                   |  2 +-
 drivers/dma/coh901318_lli.h                         |  2 +-
 drivers/gpu/drm/nouveau/nouveau_bios.c              |  2 +-
 drivers/gpu/drm/nouveau/nouveau_drv.h               |  2 +-
 drivers/gpu/drm/via/via_irq.c                       |  4 ++--
 drivers/i2c/busses/i2c-pxa.c                        |  2 +-
 drivers/infiniband/hw/ehca/ehca_qes.h               |  4 ++--
 drivers/infiniband/hw/ehca/ehca_reqs.c              |  2 +-
 drivers/input/misc/yealink.h                        |  2 +-
 drivers/isdn/i4l/isdn_common.c                      |  2 +-
 drivers/media/dvb/dvb-core/dvb_frontend.h           |  8 ++++----
 drivers/media/video/bt8xx/bttv-cards.c              |  4 ++--
 drivers/media/video/gspca/ov519.c                   |  2 +-
 drivers/media/video/pwc/philips.txt                 |  2 +-
 drivers/media/video/sn9c102/sn9c102_sensor.h        |  2 +-
 drivers/media/video/tea6420.c                       |  2 +-
 drivers/mfd/sm501.c                                 |  8 ++++----
 drivers/mmc/host/mxcmmc.c                           |  2 +-
 drivers/mtd/chips/jedec_probe.c                     |  2 +-
 drivers/mtd/nand/bcm_umi_nand.c                     |  4 ++--
 drivers/mtd/nand/mxc_nand.c                         |  2 +-
 drivers/net/atlx/atl2.h                             |  2 +-
 drivers/net/chelsio/sge.c                           |  2 +-
 drivers/net/e1000e/82571.c                          |  2 +-
 drivers/net/e1000e/lib.c                            |  2 +-
 drivers/net/igb/igb_main.c                          |  2 +-
 drivers/net/irda/sa1100_ir.c                        |  2 +-
 drivers/net/qlge/qlge_ethtool.c                     |  2 +-
 drivers/net/qlge/qlge_main.c                        |  2 +-
 drivers/net/sfc/regs.h                              |  2 +-
 drivers/net/smsc9420.c                              |  2 +-
 drivers/net/spider_net.c                            |  4 ++--
 drivers/net/sungem.c                                |  2 +-
 drivers/net/tehuti.c                                |  2 +-
 drivers/net/tokenring/tms380tr.c                    |  4 ++--
 drivers/net/tun.c                                   |  2 +-
 drivers/net/ucc_geth.c                              |  2 +-
 drivers/net/wimax/i2400m/fw.c                       |  2 +-
 drivers/net/wimax/i2400m/i2400m.h                   |  2 +-
 drivers/net/wimax/i2400m/sdio.c                     |  4 ++--
 drivers/net/wimax/i2400m/usb.c                      |  4 ++--
 drivers/net/wireless/ath/ar9170/main.c              |  2 +-
 drivers/net/wireless/iwmc3200wifi/lmac.h            |  2 +-
 drivers/net/wireless/rt2x00/rt2500usb.c             |  4 ++--
 drivers/net/wireless/rt2x00/rt2800usb.c             |  4 ++--
 drivers/net/wireless/rt2x00/rt2x00debug.c           |  2 +-
 drivers/net/wireless/rt2x00/rt2x00dev.c             |  2 +-
 drivers/net/wireless/rt2x00/rt2x00queue.c           |  2 +-
 drivers/net/wireless/rt2x00/rt61pci.c               |  2 +-
 drivers/net/wireless/rt2x00/rt73usb.c               |  6 +++---
 drivers/s390/char/raw3270.c                         |  2 +-
 drivers/s390/char/sclp.c                            |  2 +-
 drivers/scsi/a100u2w.c                              |  2 +-
 drivers/scsi/initio.c                               |  2 +-
 drivers/scsi/libfc/fc_fcp.c                         |  2 +-
 drivers/scsi/lpfc/lpfc_els.c                        |  4 ++--
 drivers/scsi/pcmcia/nsp_cs.h                        |  2 +-
 drivers/scsi/pm8001/pm8001_hwi.c                    |  2 +-
 drivers/scsi/pm8001/pm8001_sas.c                    |  2 +-
 drivers/scsi/pmcraid.h                              |  2 +-
 drivers/scsi/sd.c                                   |  2 +-
 drivers/spi/spi_s3c24xx.c                           |  2 +-
 drivers/usb/musb/musb_regs.h                        |  2 +-
 drivers/usb/serial/cypress_m8.c                     |  2 +-
 drivers/video/omap/lcdc.c                           |  2 +-
 drivers/video/s1d13xxxfb.c                          |  4 ++--
 drivers/video/sm501fb.c                             |  2 +-
 fs/affs/bitmap.c                                    |  2 +-
 fs/binfmt_elf_fdpic.c                               |  2 +-
 fs/cifs/cifs_dfs_ref.c                              |  2 +-
 fs/cifs/cifssmb.c                                   |  2 +-
 fs/ext4/move_extent.c                               |  2 +-
 fs/fuse/inode.c                                     |  2 +-
 fs/gfs2/ops_fstype.c                                |  2 +-
 fs/jbd/transaction.c                                |  2 +-
 fs/nfsd/nfs4xdr.c                                   |  2 +-
 fs/ocfs2/dlmglue.c                                  |  2 +-
 fs/ocfs2/extent_map.c                               |  2 +-
 fs/reiserfs/bitmap.c                                |  2 +-
 include/linux/hil.h                                 | 16 ++++++++--------
 include/linux/lru_cache.h                           |  2 +-
 include/linux/sched.h                               |  2 +-
 include/media/davinci/vpfe_capture.h                |  2 +-
 net/ipv4/tcp_timer.c                                |  2 +-
 net/mac80211/mesh_plink.c                           |  2 +-
 net/netfilter/nf_conntrack_sip.c                    |  4 ++--
 net/netfilter/xt_hashlimit.c                        |  2 +-
 net/sctp/sm_sideeffect.c                            |  2 +-
 scripts/gfp-translate                               |  2 +-
 sound/pci/rme9652/hdspm.c                           |  2 +-
 sound/soc/codecs/wm8990.c                           |  2 +-
 tools/perf/util/hist.c                              |  2 +-
 141 files changed, 192 insertions(+), 192 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
index 5e7d84b48505..133cd6c3f3c1 100644
--- a/Documentation/DocBook/mtdnand.tmpl
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -488,7 +488,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 				The ECC bytes must be placed immidiately after the data
 				bytes in order to make the syndrome generator work. This
 				is contrary to the usual layout used by software ECC. The
-				seperation of data and out of band area is not longer
+				separation of data and out of band area is not longer
 				possible. The nand driver code handles this layout and
 				the remaining free bytes in the oob area are managed by 
 				the autoplacement code. Provide a matching oob-layout
@@ -560,7 +560,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 				bad blocks. They have factory marked good blocks. The marker pattern
 				is erased when the block is erased to be reused. So in case of
 				powerloss before writing the pattern back to the chip this block 
-				would be lost and added to the bad blocks. Therefor we scan the 
+				would be lost and added to the bad blocks. Therefore we scan the 
 				chip(s) when we detect them the first time for good blocks and 
 				store this information in a bad block table before erasing any 
 				of the blocks.
@@ -1094,7 +1094,7 @@ in this page</entry>
 		manufacturers specifications. This applies similar to the spare area. 
 	</para>
 	<para>
-		Therefor NAND aware filesystems must either write in page size chunks
+		Therefore NAND aware filesystems must either write in page size chunks
 		or hold a writebuffer to collect smaller writes until they sum up to 
 		pagesize. Available NAND aware filesystems: JFFS2, YAFFS. 		
 	</para>
diff --git a/Documentation/DocBook/v4l/common.xml b/Documentation/DocBook/v4l/common.xml
index c65f0ac9b6ee..cea23e1c4fc6 100644
--- a/Documentation/DocBook/v4l/common.xml
+++ b/Documentation/DocBook/v4l/common.xml
@@ -1170,7 +1170,7 @@ frames per second. If less than this number of frames is to be
 captured or output, applications can request frame skipping or
 duplicating on the driver side. This is especially useful when using
 the &func-read; or &func-write;, which are not augmented by timestamps
-or sequence counters, and to avoid unneccessary data copying.</para>
+or sequence counters, and to avoid unnecessary data copying.</para>
 
     <para>Finally these ioctls can be used to determine the number of
 buffers used internally by a driver in read/write mode. For
diff --git a/Documentation/DocBook/v4l/vidioc-g-parm.xml b/Documentation/DocBook/v4l/vidioc-g-parm.xml
index 78332d365ce9..392aa9e5571e 100644
--- a/Documentation/DocBook/v4l/vidioc-g-parm.xml
+++ b/Documentation/DocBook/v4l/vidioc-g-parm.xml
@@ -55,7 +55,7 @@ captured or output, applications can request frame skipping or
 duplicating on the driver side. This is especially useful when using
 the <function>read()</function> or <function>write()</function>, which
 are not augmented by timestamps or sequence counters, and to avoid
-unneccessary data copying.</para>
+unnecessary data copying.</para>
 
     <para>Further these ioctls can be used to determine the number of
 buffers used internally by a driver in read/write mode. For
diff --git a/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt b/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
index 76b3a11e90be..fa968aa99d67 100644
--- a/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
+++ b/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
@@ -14,8 +14,8 @@ Introduction
  how the clocks are arranged. The first implementation used as single
  PLL to feed the ARM, memory and peripherals via a series of dividers
  and muxes and this is the implementation that is documented here. A
- newer version where there is a seperate PLL and clock divider for the
- ARM core is available as a seperate driver.
+ newer version where there is a separate PLL and clock divider for the
+ ARM core is available as a separate driver.
 
 
 Layout
diff --git a/Documentation/hwmon/abituguru b/Documentation/hwmon/abituguru
index 87ffa0f5ec70..5eb3b9d5f0d5 100644
--- a/Documentation/hwmon/abituguru
+++ b/Documentation/hwmon/abituguru
@@ -30,7 +30,7 @@ Supported chips:
 	   bank1_types=1,1,0,0,0,0,0,2,0,0,0,0,2,0,0,1
 	   You may also need to specify the fan_sensors option for these boards
 	   fan_sensors=5
-	2) There is a seperate abituguru3 driver for these motherboards,
+	2) There is a separate abituguru3 driver for these motherboards,
 	   the abituguru (without the 3 !) driver will not work on these
 	   motherboards (and visa versa)!
 
diff --git a/Documentation/input/rotary-encoder.txt b/Documentation/input/rotary-encoder.txt
index 3a6aec40c0b0..8b4129de1d2d 100644
--- a/Documentation/input/rotary-encoder.txt
+++ b/Documentation/input/rotary-encoder.txt
@@ -75,7 +75,7 @@ and the number of steps or will clamp at the maximum and zero depending on
 the configuration.
 
 Because GPIO to IRQ mapping is platform specific, this information must
-be given in seperately to the driver. See the example below.
+be given in separately to the driver. See the example below.
 
 ---------<snip>---------
 
diff --git a/Documentation/networking/skfp.txt b/Documentation/networking/skfp.txt
index abfddf81e34a..203ec66c9fb4 100644
--- a/Documentation/networking/skfp.txt
+++ b/Documentation/networking/skfp.txt
@@ -68,7 +68,7 @@ Compaq adapters (not tested):
 =======================
 
 From v2.01 on, the driver is integrated in the linux kernel sources.
-Therefor, the installation is the same as for any other adapter
+Therefore, the installation is the same as for any other adapter
 supported by the kernel.
 Refer to the manual of your distribution about the installation
 of network adapters.
diff --git a/Documentation/s390/kvm.txt b/Documentation/s390/kvm.txt
index 6f5ceb0f09fc..85f3280d7ef6 100644
--- a/Documentation/s390/kvm.txt
+++ b/Documentation/s390/kvm.txt
@@ -102,7 +102,7 @@ args:		unsigned long
 see also:	include/linux/kvm.h
 This ioctl stores the state of the cpu at the guest real address given as
 argument, unless one of the following values defined in include/linux/kvm.h
-is given as arguement:
+is given as argument:
 KVM_S390_STORE_STATUS_NOADDR - the CPU stores its status to the save area in
 absolute lowcore as defined by the principles of operation
 KVM_S390_STORE_STATUS_PREFIXED - the CPU stores its status to the save area in
diff --git a/Documentation/scsi/ChangeLog.lpfc b/Documentation/scsi/ChangeLog.lpfc
index ff19a52fe004..2ffc1148eb95 100644
--- a/Documentation/scsi/ChangeLog.lpfc
+++ b/Documentation/scsi/ChangeLog.lpfc
@@ -989,8 +989,8 @@ Changes from 20040709 to 20040716
 	* Remove redundant port_cmp != 2 check in if
 	  (!port_cmp) { .... if (port_cmp != 2).... }
 	* Clock changes: removed struct clk_data and timerList.
-	* Clock changes: seperate nodev_tmo and els_retry_delay into 2
-	  seperate timers and convert to 1 argument changed
+	* Clock changes: separate nodev_tmo and els_retry_delay into 2
+	  separate timers and convert to 1 argument changed
 	  LPFC_NODE_FARP_PEND_t to struct lpfc_node_farp_pend convert
 	  ipfarp_tmo to 1 argument convert target struct tmofunc and
 	  rtplunfunc to 1 argument * cr_count, cr_delay and
@@ -1514,7 +1514,7 @@ Changes from 20040402 to 20040409
 	* Remove unused elxclock declaration in elx_sli.h.
 	* Since everywhere IOCB_ENTRY is used, the return value is cast,
 	  move the cast into the macro.
-	* Split ioctls out into seperate files
+	* Split ioctls out into separate files
 
 Changes from 20040326 to 20040402
 
@@ -1534,7 +1534,7 @@ Changes from 20040326 to 20040402
 	* Unused variable cleanup
 	* Use Linux list macros for DMABUF_t
 	* Break up ioctls into 3 sections, dfc, util, hbaapi
-	  rearranged code so this could be easily seperated into a
+	  rearranged code so this could be easily separated into a
 	  differnet module later All 3 are currently turned on by
 	  defines in lpfc_ioctl.c LPFC_DFC_IOCTL, LPFC_UTIL_IOCTL,
 	  LPFC_HBAAPI_IOCTL
@@ -1551,7 +1551,7 @@ Changes from 20040326 to 20040402
 	  started by lpfc_online().  lpfc_offline() only stopped
 	  els_timeout routine.  It now stops all timeout routines
 	  associated with that hba.
-	* Replace seperate next and prev pointers in struct
+	* Replace separate next and prev pointers in struct
 	  lpfc_bindlist with list_head type.  In elxHBA_t, replace
 	  fc_nlpbind_start and _end with fc_nlpbind_list and use
 	  list_head macros to access it.
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index bab3040da548..03485bfbd797 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -1588,7 +1588,7 @@ module author does not need to worry about it.
 
 When tracing is enabled, kstop_machine is called to prevent
 races with the CPUS executing code being modified (which can
-cause the CPU to do undesireable things), and the nops are
+cause the CPU to do undesirable things), and the nops are
 patched back to calls. But this time, they do not call mcount
 (which is just a function stub). They now call into the ftrace
 infrastructure.
diff --git a/arch/arm/mach-ep93xx/micro9.c b/arch/arm/mach-ep93xx/micro9.c
index f3757a1c5a10..c33360e82868 100644
--- a/arch/arm/mach-ep93xx/micro9.c
+++ b/arch/arm/mach-ep93xx/micro9.c
@@ -28,7 +28,7 @@
  *
  * Micro9-High has up to 64MB of 32-bit flash on CS1
  * Micro9-Mid has up to 64MB of either 32-bit or 16-bit flash on CS1
- * Micro9-Lite uses a seperate MTD map driver for flash support
+ * Micro9-Lite uses a separate MTD map driver for flash support
  * Micro9-Slim has up to 64MB of either 32-bit or 16-bit flash on CS1
  *************************************************************************/
 static struct physmap_flash_data micro9_flash_data;
diff --git a/arch/arm/mach-nomadik/board-nhk8815.c b/arch/arm/mach-nomadik/board-nhk8815.c
index 9438bf6613a3..ab3712c86d2b 100644
--- a/arch/arm/mach-nomadik/board-nhk8815.c
+++ b/arch/arm/mach-nomadik/board-nhk8815.c
@@ -38,7 +38,7 @@
 #define SRC_CR_INIT_MASK	0x00007fff
 #define SRC_CR_INIT_VAL		0x2aaa8000
 
-/* These adresses span 16MB, so use three individual pages */
+/* These addresses span 16MB, so use three individual pages */
 static struct resource nhk8815_nand_resources[] = {
 	{
 		.name = "nand_addr",
diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
index 653e25be3dd8..d0cb5e940776 100644
--- a/arch/arm/mach-u300/core.c
+++ b/arch/arm/mach-u300/core.c
@@ -356,7 +356,7 @@ static struct resource ave_resources[] = {
 	/*
 	 * The AVE3e requires two regions of 256MB that it considers
 	 * "invisible". The hardware will not be able to access these
-	 * adresses, so they should never point to system RAM.
+	 * addresses, so they should never point to system RAM.
 	 */
 	{
 		.name  = "AVE3e Reserved 0",
@@ -571,7 +571,7 @@ static void __init u300_init_check_chip(void)
 /*
  * Some devices and their resources require reserved physical memory from
  * the end of the available RAM. This function traverses the list of devices
- * and assigns actual adresses to these.
+ * and assigns actual addresses to these.
  */
 static void __init u300_assign_physmem(void)
 {
diff --git a/arch/arm/mach-u300/include/mach/debug-macro.S b/arch/arm/mach-u300/include/mach/debug-macro.S
index f3a1cbbeeab3..d591fe13ed13 100644
--- a/arch/arm/mach-u300/include/mach/debug-macro.S
+++ b/arch/arm/mach-u300/include/mach/debug-macro.S
@@ -11,7 +11,7 @@
 #include <mach/hardware.h>
 
 	.macro	addruart,rx
-	/* If we move the adress using MMU, use this. */
+	/* If we move the address using MMU, use this. */
 	mrc	p15, 0, \rx, c1, c0
 	tst	\rx, #1			@ MMU enabled?
 	ldreq	\rx,	  = U300_SLOW_PER_PHYS_BASE @ MMU off, physical address
diff --git a/arch/arm/plat-s3c/include/plat/gpio-cfg-helpers.h b/arch/arm/plat-s3c/include/plat/gpio-cfg-helpers.h
index 652e2bbdaa20..dda19da037ad 100644
--- a/arch/arm/plat-s3c/include/plat/gpio-cfg-helpers.h
+++ b/arch/arm/plat-s3c/include/plat/gpio-cfg-helpers.h
@@ -78,7 +78,7 @@ extern int s3c_gpio_setcfg_s3c24xx_a(struct s3c_gpio_chip *chip,
  *	others = Special functions (dependant on bank)
  *
  * Note, since the code to deal with the case where there are two control
- * registers instead of one, we do not have a seperate set of functions for
+ * registers instead of one, we do not have a separate set of functions for
  * each case.
 */
 extern int s3c_gpio_setcfg_s3c64xx_4bit(struct s3c_gpio_chip *chip,
diff --git a/arch/arm/plat-s3c/include/plat/regs-usb-hsotg-phy.h b/arch/arm/plat-s3c/include/plat/regs-usb-hsotg-phy.h
index 36a85f5000c8..a111ad871833 100644
--- a/arch/arm/plat-s3c/include/plat/regs-usb-hsotg-phy.h
+++ b/arch/arm/plat-s3c/include/plat/regs-usb-hsotg-phy.h
@@ -12,7 +12,7 @@
  * published by the Free Software Foundation.
 */
 
-/* Note, this is a seperate header file as some of the clock framework
+/* Note, this is a separate header file as some of the clock framework
  * needs to touch this if the clk_48m is used as the USB OHCI or other
  * peripheral source.
 */
diff --git a/arch/arm/plat-s3c24xx/include/plat/cpu-freq-core.h b/arch/arm/plat-s3c24xx/include/plat/cpu-freq-core.h
index 33d421d78bad..d623235ae961 100644
--- a/arch/arm/plat-s3c24xx/include/plat/cpu-freq-core.h
+++ b/arch/arm/plat-s3c24xx/include/plat/cpu-freq-core.h
@@ -135,7 +135,7 @@ struct s3c_cpufreq_config {
  * @locktime_m: The lock-time in uS for the MPLL.
  * @locktime_u: The lock-time in uS for the UPLL.
  * @locttime_bits: The number of bits each LOCKTIME field.
- * @need_pll: Set if this driver needs to change the PLL values to acheive
+ * @need_pll: Set if this driver needs to change the PLL values to achieve
  *	any frequency changes. This is really only need by devices like the
  *	S3C2410 where there is no or limited divider between the PLL and the
  *	ARMCLK.
diff --git a/arch/cris/arch-v10/lib/old_checksum.c b/arch/cris/arch-v10/lib/old_checksum.c
index 1734b467efa6..8f79163f1394 100644
--- a/arch/cris/arch-v10/lib/old_checksum.c
+++ b/arch/cris/arch-v10/lib/old_checksum.c
@@ -77,7 +77,7 @@ __wsum csum_partial(const void *p, int len, __wsum __sum)
 		sum += *buff++;
 
 	if (endMarker > buff)
-		sum += *(const u8 *)buff;	/* add extra byte seperately */
+		sum += *(const u8 *)buff;	/* add extra byte separately */
 
 	BITOFF;
 	return (__force __wsum)sum;
diff --git a/arch/cris/arch-v32/mm/tlb.c b/arch/cris/arch-v32/mm/tlb.c
index 6779bcb28ab0..c030d020660a 100644
--- a/arch/cris/arch-v32/mm/tlb.c
+++ b/arch/cris/arch-v32/mm/tlb.c
@@ -189,7 +189,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		spin_unlock(&mmu_context_lock);
 
 		/*
-		 * Remember the pgd for the fault handlers. Keep a seperate
+		 * Remember the pgd for the fault handlers. Keep a separate
 		 * copy of it because current and active_mm might be invalid
 		 * at points where * there's still a need to derefer the pgd.
 		 */
diff --git a/arch/h8300/include/asm/io.h b/arch/h8300/include/asm/io.h
index 33e842f3284b..c1a8df22080f 100644
--- a/arch/h8300/include/asm/io.h
+++ b/arch/h8300/include/asm/io.h
@@ -25,7 +25,7 @@
  * memory location directly.
  */
 /* ++roman: The assignments to temp. vars avoid that gcc sometimes generates
- * two accesses to memory, which may be undesireable for some devices.
+ * two accesses to memory, which may be undesirable for some devices.
  */
 
 /*
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index ece1bf994499..c6c6d9381126 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -241,7 +241,7 @@ static void __cpuinit sn_check_for_wars(void)
  * Note:  This stuff is duped here because Altix requires the PCDP to
  * locate a usable VGA device due to lack of proper ACPI support.  Structures
  * could be used from drivers/firmware/pcdp.h, but it was decided that moving
- * this file to a more public location just for Altix use was undesireable.
+ * this file to a more public location just for Altix use was undesirable.
  */
 
 struct hcdp_uart_desc {
diff --git a/arch/m68k/atari/atakeyb.c b/arch/m68k/atari/atakeyb.c
index 4add96d13b19..5890897d28bf 100644
--- a/arch/m68k/atari/atakeyb.c
+++ b/arch/m68k/atari/atakeyb.c
@@ -121,7 +121,7 @@ KEYBOARD_STATE kb_state;
  * bytes have been lost and in which state of the packet structure we are now.
  * This usually causes keyboards bytes to be interpreted as mouse movements
  * and vice versa, which is very annoying. It seems better to throw away some
- * bytes (that are usually mouse bytes) than to misinterpret them. Therefor I
+ * bytes (that are usually mouse bytes) than to misinterpret them. Therefore I
  * introduced the RESYNC state for IKBD data. In this state, the bytes up to
  * one that really looks like a key event (0x04..0xf2) or the start of a mouse
  * packet (0xf8..0xfb) are thrown away, but at most 2 bytes. This at least
diff --git a/arch/m68k/include/asm/io_no.h b/arch/m68k/include/asm/io_no.h
index 359065d5a9f2..6e2413e518cb 100644
--- a/arch/m68k/include/asm/io_no.h
+++ b/arch/m68k/include/asm/io_no.h
@@ -16,7 +16,7 @@
  * memory location directly.
  */
 /* ++roman: The assignments to temp. vars avoid that gcc sometimes generates
- * two accesses to memory, which may be undesireable for some devices.
+ * two accesses to memory, which may be undesirable for some devices.
  */
 
 /*
diff --git a/arch/powerpc/boot/dts/kmeter1.dts b/arch/powerpc/boot/dts/kmeter1.dts
index 65b8b4f27efe..d8b5d12fb663 100644
--- a/arch/powerpc/boot/dts/kmeter1.dts
+++ b/arch/powerpc/boot/dts/kmeter1.dts
@@ -490,7 +490,7 @@
 			compatible = "cfi-flash";
 			/*
 			 * The Intel P30 chip has 2 non-identical chips on
-			 * one die, so we need to define 2 seperate regions
+			 * one die, so we need to define 2 separate regions
 			 * that are scanned by physmap_of independantly.
 			 */
 			reg = <0 0x00000000 0x02000000
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index e85679af54dd..e34347d567a6 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -20,7 +20,7 @@
 /**
  * struct ccw1 - channel command word
  * @cmd_code: command code
- * @flags: flags, like IDA adressing, etc.
+ * @flags: flags, like IDA addressing, etc.
  * @count: byte count
  * @cda: data address
  *
diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S
index e27ca63076d1..27c1a2e236d1 100644
--- a/arch/s390/kernel/sclp.S
+++ b/arch/s390/kernel/sclp.S
@@ -221,7 +221,7 @@ _sclp_print:
 	lh	%r9,0(%r8)			# update sccb length
 	ar	%r9,%r6
 	sth	%r9,0(%r8)
-	ar	%r7,%r6				# update current mto adress
+	ar	%r7,%r6				# update current mto address
 	ltr	%r0,%r0				# more characters?
 	jnz	.LinitmtoS4
 	l	%r2,.LwritedataS4-.LbaseS4(%r13)# write data
diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
index 87f1760c0aa2..554e0b80bcde 100644
--- a/arch/sparc/kernel/leon_kernel.c
+++ b/arch/sparc/kernel/leon_kernel.c
@@ -124,7 +124,7 @@ void __init leon_init_timers(irq_handler_t counter_fn)
 
 		if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) &
 		      (1<<LEON3_GPTIMER_SEPIRQ))) {
-			prom_printf("irq timer not configured with seperate irqs \n");
+			prom_printf("irq timer not configured with separate irqs \n");
 			BUG();
 		}
 
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index e856456ec02f..a565ee5146eb 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1353,7 +1353,7 @@ static void perf_callchain_user_32(struct pt_regs *regs,
 }
 
 /* Like powerpc we can't get PMU interrupts within the PMU handler,
- * so no need for seperate NMI and IRQ chains as on x86.
+ * so no need for separate NMI and IRQ chains as on x86.
  */
 static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
 
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S
index 39b98ed2c1b9..575331cb2a8a 100644
--- a/arch/x86/crypto/twofish-i586-asm_32.S
+++ b/arch/x86/crypto/twofish-i586-asm_32.S
@@ -22,7 +22,7 @@
 
 #include <asm/asm-offsets.h>
 
-/* return adress at 0 */
+/* return address at 0 */
 
 #define in_blk    12  /* input byte array address parameter*/
 #define out_blk   8  /* output byte array address parameter*/
@@ -230,8 +230,8 @@ twofish_enc_blk:
 	push    %edi
 
 	mov	tfm + 16(%esp),	%ebp	/* abuse the base pointer: set new base bointer to the crypto tfm */
-	add	$crypto_tfm_ctx_offset, %ebp	/* ctx adress */
-	mov     in_blk+16(%esp),%edi	/* input adress in edi */
+	add	$crypto_tfm_ctx_offset, %ebp	/* ctx address */
+	mov     in_blk+16(%esp),%edi	/* input address in edi */
 
 	mov	(%edi),		%eax
 	mov	b_offset(%edi),	%ebx
@@ -286,8 +286,8 @@ twofish_dec_blk:
 
 
 	mov	tfm + 16(%esp),	%ebp	/* abuse the base pointer: set new base bointer to the crypto tfm */
-	add	$crypto_tfm_ctx_offset, %ebp	/* ctx adress */
-	mov     in_blk+16(%esp),%edi	/* input adress in edi */
+	add	$crypto_tfm_ctx_offset, %ebp	/* ctx address */
+	mov     in_blk+16(%esp),%edi	/* input address in edi */
 
 	mov	(%edi),		%eax
 	mov	b_offset(%edi),	%ebx
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
index 35974a586615..573aa102542e 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -221,11 +221,11 @@
 twofish_enc_blk:
 	pushq    R1
 
-	/* %rdi contains the crypto tfm adress */
-	/* %rsi contains the output adress */
-	/* %rdx contains the input adress */
-	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx adress */
-	/* ctx adress is moved to free one non-rex register
+	/* %rdi contains the crypto tfm address */
+	/* %rsi contains the output address */
+	/* %rdx contains the input address */
+	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx address */
+	/* ctx address is moved to free one non-rex register
 	as target for the 8bit high operations */
 	mov	%rdi,		%r11
 
@@ -274,11 +274,11 @@ twofish_enc_blk:
 twofish_dec_blk:
 	pushq    R1
 
-	/* %rdi contains the crypto tfm adress */
-	/* %rsi contains the output adress */
-	/* %rdx contains the input adress */
-	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx adress */
-	/* ctx adress is moved to free one non-rex register
+	/* %rdi contains the crypto tfm address */
+	/* %rsi contains the output address */
+	/* %rdx contains the input address */
+	add	$crypto_tfm_ctx_offset, %rdi	/* set ctx address */
+	/* ctx address is moved to free one non-rex register
 	as target for the 8bit high operations */
 	mov	%rdi,		%r11
 
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 2d8b5035371c..3d1e6f16b7a6 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -27,7 +27,7 @@
 #define GET_CR2_INTO_RCX movq %cr2, %rcx
 #endif
 
-/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
+/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE
  * because we need identity-mapped pages.
  *
  */
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 2bbde6078143..fb99f7edb341 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1309,7 +1309,7 @@ static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
 /*
  * get_tce_space_from_tar():
  * Function for kdump case. Get the tce tables from first kernel
- * by reading the contents of the base adress register of calgary iommu
+ * by reading the contents of the base address register of calgary iommu
  */
 static void __init get_tce_space_from_tar(void)
 {
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 597683aa5ba0..dec8f68e3eda 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -50,7 +50,7 @@ u64 native_sched_clock(void)
 	 *   unstable. We do this because unlike Time Of Day,
 	 *   the scheduler clock tolerates small errors and it's
 	 *   very important for it to be as fast as the platform
-	 *   can achive it. )
+	 *   can achieve it. )
 	 */
 	if (unlikely(tsc_disabled)) {
 		/* No locking but a rare wrong value is not a big deal: */
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 80d24c485fd3..77fc9f6dc016 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -104,7 +104,7 @@
  *   excsave has been restored, and
  *   stack pointer (a1) has been set.
  *
- * Note: _user_exception might be at an odd adress. Don't use call0..call12
+ * Note: _user_exception might be at an odd address. Don't use call0..call12
  */
 
 ENTRY(user_exception)
@@ -244,7 +244,7 @@ _user_exception:
  *   excsave has been restored, and
  *   stack pointer (a1) has been set.
  *
- * Note: _kernel_exception might be at an odd adress. Don't use call0..call12
+ * Note: _kernel_exception might be at an odd address. Don't use call0..call12
  */
 
 ENTRY(kernel_exception)
diff --git a/block/bsg.c b/block/bsg.c
index a9fd2d84b53a..46597a6bd112 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -260,7 +260,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
 		return ERR_PTR(ret);
 
 	/*
-	 * map scatter-gather elements seperately and string them to request
+	 * map scatter-gather elements separately and string them to request
 	 */
 	rq = blk_get_request(q, rw, GFP_KERNEL);
 	if (!rq)
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index bbc2c1315c47..d7f363f9435f 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -605,7 +605,7 @@ register_hotplug_dock_device(acpi_handle handle, struct acpi_dock_ops *ops,
 	list_for_each_entry(dock_station, &dock_stations, sibling) {
 		/*
 		 * An ATA bay can be in a dock and itself can be ejected
-		 * seperately, so there are two 'dock stations' which need the
+		 * separately, so there are two 'dock stations' which need the
 		 * ops
 		 */
 		dd = find_dock_dependent_device(dock_station, handle);
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 741065c9da67..7f2c94a07c00 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -2258,7 +2258,7 @@ EXPORT_SYMBOL_GPL(ata_sff_postreset);
  *	@qc: command
  *
  *	Drain the FIFO and device of any stuck data following a command
- *	failing to complete. In some cases this is neccessary before a
+ *	failing to complete. In some cases this is necessary before a
  *	reset will recover the device.
  *
  */
diff --git a/drivers/ata/pata_acpi.c b/drivers/ata/pata_acpi.c
index d8f35fe44421..9e33da9565d9 100644
--- a/drivers/ata/pata_acpi.c
+++ b/drivers/ata/pata_acpi.c
@@ -161,7 +161,7 @@ static void pacpi_set_dmamode(struct ata_port *ap, struct ata_device *adev)
  *
  *	Called when the libata layer is about to issue a command. We wrap
  *	this interface so that we can load the correct ATA timings if
- *	neccessary.
+ *	necessary.
  */
 
 static unsigned int pacpi_qc_issue(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/pata_hpt3x3.c b/drivers/ata/pata_hpt3x3.c
index c86c71639a95..727a81ce4c9f 100644
--- a/drivers/ata/pata_hpt3x3.c
+++ b/drivers/ata/pata_hpt3x3.c
@@ -180,7 +180,7 @@ static void hpt3x3_init_chipset(struct pci_dev *dev)
  *	@id: Entry in match table
  *
  *	Perform basic initialisation. We set the device up so we access all
- *	ports via BAR4. This is neccessary to work around errata.
+ *	ports via BAR4. This is necessary to work around errata.
  */
 
 static int hpt3x3_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index 1b392c9e8531..416aebb8b913 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -131,7 +131,7 @@ static unsigned int ata_data_xfer_8bit(struct ata_device *dev,
  *	@qc: command
  *
  *	Drain the FIFO and device of any stuck data following a command
- *	failing to complete. In some cases this is neccessary before a
+ *	failing to complete. In some cases this is necessary before a
  *	reset will recover the device.
  *
  */
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 2bf3a6ef3684..d9301e861d9f 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -95,7 +95,7 @@ extern char usermode_helper[];
 
 /* All EEs on the free list should have ID_VACANT (== 0)
  * freshly allocated EEs get !ID_VACANT (== 1)
- * so if it says "cannot dereference null pointer at adress 0x00000001",
+ * so if it says "cannot dereference null pointer at address 0x00000001",
  * it is most likely one of these :( */
 
 #define ID_IN_SYNC      (4711ULL)
@@ -1171,7 +1171,7 @@ extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf
 /* Meta data layout
    We reserve a 128MB Block (4k aligned)
    * either at the end of the backing device
-   * or on a seperate meta data device. */
+   * or on a separate meta data device. */
 
 #define MD_RESERVED_SECT (128LU << 11)  /* 128 MB, unit sectors */
 /* The following numbers are sectors */
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index f22c1bc8ec7e..16119d7056cc 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -57,7 +57,7 @@
  *
  *  It may me handed over to the local disk subsystem.
  *  It may be completed by the local disk subsystem,
- *    either sucessfully or with io-error.
+ *    either successfully or with io-error.
  *  In case it is a READ request, and it failed locally,
  *    it may be retried remotely.
  *
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 3999a5f25f38..45a22f9bfec2 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -269,7 +269,7 @@ static void intel_agp_insert_sg_entries(struct agp_memory *mem,
 			j++;
 		}
 	} else {
-		/* sg may merge pages, but we have to seperate
+		/* sg may merge pages, but we have to separate
 		 * per-page addr for GTT */
 		unsigned int len, m;
 
diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c
index fe2cb2f5db17..a7424bf7eacf 100644
--- a/drivers/char/applicom.c
+++ b/drivers/char/applicom.c
@@ -14,7 +14,7 @@
 /* et passe en argument a acinit, mais est scrute sur le bus pour s'adapter  */
 /* au nombre de cartes presentes sur le bus. IOCL code 6 affichait V2.4.3    */
 /* F.LAFORSE 28/11/95 creation de fichiers acXX.o avec les differentes       */
-/* adresses de base des cartes, IOCTL 6 plus complet                         */
+/* addresses de base des cartes, IOCTL 6 plus complet                         */
 /* J.PAGET le 19/08/96 copie de la version V2.6 en V2.8.0 sans modification  */
 /* de code autre que le texte V2.6.1 en V2.8.0                               */
 /*****************************************************************************/
diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c
index 936d05bf37fa..0794925d8042 100644
--- a/drivers/char/hvc_iseries.c
+++ b/drivers/char/hvc_iseries.c
@@ -353,7 +353,7 @@ static void hvc_close_event(struct HvLpEvent *event)
 
 	if (!hvlpevent_is_int(event)) {
 		printk(KERN_WARNING
-			"hvc: got unexpected close acknowlegement\n");
+			"hvc: got unexpected close acknowledgement\n");
 		return;
 	}
 
diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c
index 9b3e09cd41f9..10f868eefaa6 100644
--- a/drivers/char/hw_random/n2-drv.c
+++ b/drivers/char/hw_random/n2-drv.c
@@ -71,7 +71,7 @@ MODULE_VERSION(DRV_MODULE_VERSION);
  *        x22 + x21 + x17 + x15 + x13 + x12 + x11 + x7 + x5 + x + 1
  *
  * The RNG_CTL_VCO value of each noise cell must be programmed
- * seperately.  This is why 4 control register values must be provided
+ * separately.  This is why 4 control register values must be provided
  * to the hypervisor.  During a write, the hypervisor writes them all,
  * one at a time, to the actual RNG_CTL register.  The first three
  * values are used to setup the desired RNG_CTL_VCO for each entropy
diff --git a/drivers/char/ip2/i2hw.h b/drivers/char/ip2/i2hw.h
index 8aa6e7ab8d5b..c0ba6c05f0cd 100644
--- a/drivers/char/ip2/i2hw.h
+++ b/drivers/char/ip2/i2hw.h
@@ -559,7 +559,7 @@ Loadware may be sent to the board in two ways:
 
 2) It may be hard-coded into your source by including a .h file (typically
 	supplied by Computone), which declares a data array and initializes every
-	element. This acheives the same result as if an entire loadware file had 
+	element. This achieves the same result as if an entire loadware file had 
 	been read into the array.
 
 	This requires more data space in your program, but access to the file system
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 385c44b3034f..5ee424817263 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -220,7 +220,7 @@ static void pty_set_termios(struct tty_struct *tty,
  *	@tty: tty being resized
  *	@ws: window size being set.
  *
- *	Update the termios variables and send the neccessary signals to
+ *	Update the termios variables and send the necessary signals to
  *	peform a terminal resize correctly
  */
 
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index c6f3b48be9dd..56b11c1c7aeb 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -2026,7 +2026,7 @@ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user *arg)
  *	@rows: rows (character)
  *	@cols: cols (character)
  *
- *	Update the termios variables and send the neccessary signals to
+ *	Update the termios variables and send the necessary signals to
  *	peform a terminal resize correctly
  */
 
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 94f530a29691..bd1d1164fec5 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -821,7 +821,7 @@ static inline int resize_screen(struct vc_data *vc, int width, int height,
  *
  *	Resize a virtual console, clipping according to the actual constraints.
  *	If the caller passes a tty structure then update the termios winsize
- *	information and perform any neccessary signal handling.
+ *	information and perform any necessary signal handling.
  *
  *	Caller must hold the console semaphore. Takes the termios mutex and
  *	ctrl_lock of the tty IFF a tty is passed.
diff --git a/drivers/dma/coh901318_lli.h b/drivers/dma/coh901318_lli.h
index 7bf713b79c6b..7a5c80990e9e 100644
--- a/drivers/dma/coh901318_lli.h
+++ b/drivers/dma/coh901318_lli.h
@@ -30,7 +30,7 @@ struct device;
  * @pool: pool handle
  * @dev: dma device
  * @lli_nbr: number of lli:s in the pool
- * @algin: adress alignemtn of lli:s
+ * @algin: address alignemtn of lli:s
  * returns 0 on success otherwise none zero
  */
 int coh901318_pool_create(struct coh901318_pool *pool,
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index d7f8d8b4a4b8..52fb371784e1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -3544,7 +3544,7 @@ int nouveau_bios_parse_lvds_table(struct drm_device *dev, int pxclk, bool *dl, b
 	 * at which modes should be set up in the dual link style.
 	 *
 	 * Following the header, the BMP (ver 0xa) table has several records,
-	 * indexed by a seperate xlat table, indexed in turn by the fp strap in
+	 * indexed by a separate xlat table, indexed in turn by the fp strap in
 	 * EXTDEV_BOOT. Each record had a config byte, followed by 6 script
 	 * numbers for use by INIT_SUB which controlled panel init and power,
 	 * and finally a dword of ms to sleep between power off and on
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 6b9690418bc7..23664058690f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -544,7 +544,7 @@ struct drm_nouveau_private {
 	uint32_t ramro_offset;
 	uint32_t ramro_size;
 
-	/* base physical adresses */
+	/* base physical addresses */
 	uint64_t fb_phys;
 	uint64_t fb_available_size;
 	uint64_t fb_mappable_pages;
diff --git a/drivers/gpu/drm/via/via_irq.c b/drivers/gpu/drm/via/via_irq.c
index 5935b8842e86..34079f251cd4 100644
--- a/drivers/gpu/drm/via/via_irq.c
+++ b/drivers/gpu/drm/via/via_irq.c
@@ -150,7 +150,7 @@ irqreturn_t via_driver_irq_handler(DRM_IRQ_ARGS)
 		cur_irq++;
 	}
 
-	/* Acknowlege interrupts */
+	/* Acknowledge interrupts */
 	VIA_WRITE(VIA_REG_INTERRUPT, status);
 
 
@@ -165,7 +165,7 @@ static __inline__ void viadrv_acknowledge_irqs(drm_via_private_t * dev_priv)
 	u32 status;
 
 	if (dev_priv) {
-		/* Acknowlege interrupts */
+		/* Acknowledge interrupts */
 		status = VIA_READ(VIA_REG_INTERRUPT);
 		VIA_WRITE(VIA_REG_INTERRUPT, status |
 			  dev_priv->irq_pending_mask);
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 7647a20523a0..90ffbf6f9d4f 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -12,7 +12,7 @@
  *
  *  History:
  *    Apr 2002: Initial version [CS]
- *    Jun 2002: Properly seperated algo/adap [FB]
+ *    Jun 2002: Properly separated algo/adap [FB]
  *    Jan 2003: Fixed several bugs concerning interrupt handling [Kai-Uwe Bloem]
  *    Jan 2003: added limited signal handling [Kai-Uwe Bloem]
  *    Sep 2004: Major rework to ensure efficient bus handling [RMK]
diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h
index 5d28e3e98a20..90c4efa67586 100644
--- a/drivers/infiniband/hw/ehca/ehca_qes.h
+++ b/drivers/infiniband/hw/ehca/ehca_qes.h
@@ -46,7 +46,7 @@
 
 #include "ehca_tools.h"
 
-/* virtual scatter gather entry to specify remote adresses with length */
+/* virtual scatter gather entry to specify remote addresses with length */
 struct ehca_vsgentry {
 	u64 vaddr;
 	u32 lkey;
@@ -148,7 +148,7 @@ struct ehca_wqe {
 	u32 immediate_data;
 	union {
 		struct {
-			u64 remote_virtual_adress;
+			u64 remote_virtual_address;
 			u32 rkey;
 			u32 reserved;
 			u64 atomic_1st_op_dma_len;
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index e3ec7fdd67bd..9a3fbfca9b41 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -269,7 +269,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
 		/* no break is intentional here */
 	case IB_QPT_RC:
 		/* TODO: atomic not implemented */
-		wqe_p->u.nud.remote_virtual_adress =
+		wqe_p->u.nud.remote_virtual_address =
 			send_wr->wr.rdma.remote_addr;
 		wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey;
 
diff --git a/drivers/input/misc/yealink.h b/drivers/input/misc/yealink.h
index 48af0be9cbdf..1e0f52397010 100644
--- a/drivers/input/misc/yealink.h
+++ b/drivers/input/misc/yealink.h
@@ -127,7 +127,7 @@ struct yld_ctl_packet {
  * yld_status struct.
  */
 
-/* LCD, each segment must be driven seperately.
+/* LCD, each segment must be driven separately.
  *
  * Layout:
  *
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index adb1e8c36b46..00c60e2e0ff7 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -1347,7 +1347,7 @@ isdn_ioctl(struct inode *inode, struct file *file, uint cmd, ulong arg)
 /*
  * isdn net devices manage lots of configuration variables as linked lists.
  * Those lists must only be manipulated from user space. Some of the ioctl's
- * service routines access user space and are not atomic. Therefor, ioctl's
+ * service routines access user space and are not atomic. Therefore, ioctl's
  * manipulating the lists and ioctl's sleeping while accessing the lists
  * are serialized by means of a semaphore.
  */
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.h b/drivers/media/dvb/dvb-core/dvb_frontend.h
index 52e4ce4304ee..80dda308ff74 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.h
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.h
@@ -214,14 +214,14 @@ struct dvb_tuner_ops {
 	int (*get_status)(struct dvb_frontend *fe, u32 *status);
 	int (*get_rf_strength)(struct dvb_frontend *fe, u16 *strength);
 
-	/** These are provided seperately from set_params in order to facilitate silicon
-	 * tuners which require sophisticated tuning loops, controlling each parameter seperately. */
+	/** These are provided separately from set_params in order to facilitate silicon
+	 * tuners which require sophisticated tuning loops, controlling each parameter separately. */
 	int (*set_frequency)(struct dvb_frontend *fe, u32 frequency);
 	int (*set_bandwidth)(struct dvb_frontend *fe, u32 bandwidth);
 
 	/*
-	 * These are provided seperately from set_params in order to facilitate silicon
-	 * tuners which require sophisticated tuning loops, controlling each parameter seperately.
+	 * These are provided separately from set_params in order to facilitate silicon
+	 * tuners which require sophisticated tuning loops, controlling each parameter separately.
 	 */
 	int (*set_state)(struct dvb_frontend *fe, enum tuner_param param, struct tuner_state *state);
 	int (*get_state)(struct dvb_frontend *fe, enum tuner_param param, struct tuner_state *state);
diff --git a/drivers/media/video/bt8xx/bttv-cards.c b/drivers/media/video/bt8xx/bttv-cards.c
index 12279f6d9bc4..716870ae85d5 100644
--- a/drivers/media/video/bt8xx/bttv-cards.c
+++ b/drivers/media/video/bt8xx/bttv-cards.c
@@ -4404,7 +4404,7 @@ static void rv605_muxsel(struct bttv *btv, unsigned int input)
 /* Tibet Systems 'Progress DVR' CS16 muxsel helper [Chris Fanning]
  *
  * The CS16 (available on eBay cheap) is a PCI board with four Fusion
- * 878A chips, a PCI bridge, an Atmel microcontroller, four sync seperator
+ * 878A chips, a PCI bridge, an Atmel microcontroller, four sync separator
  * chips, ten eight input analog multiplexors, a not chip and a few
  * other components.
  *
@@ -4426,7 +4426,7 @@ static void rv605_muxsel(struct bttv *btv, unsigned int input)
  *
  * There is an ATMEL microcontroller with an 8031 core on board.  I have not
  * determined what function (if any) it provides.  With the microcontroller
- * and sync seperator chips a guess is that it might have to do with video
+ * and sync separator chips a guess is that it might have to do with video
  * switching and maybe some digital I/O.
  */
 static void tibetCS16_muxsel(struct bttv *btv, unsigned int input)
diff --git a/drivers/media/video/gspca/ov519.c b/drivers/media/video/gspca/ov519.c
index b4f965731244..e5e4c4440d39 100644
--- a/drivers/media/video/gspca/ov519.c
+++ b/drivers/media/video/gspca/ov519.c
@@ -503,7 +503,7 @@ static const struct v4l2_pix_format ovfx2_ov3610_mode[] = {
 /*
  * The FX2 chip does not give us a zero length read at end of frame.
  * It does, however, give a short read at the end of a frame, if
- * neccessary, rather than run two frames together.
+ * necessary, rather than run two frames together.
  *
  * By choosing the right bulk transfer size, we are guaranteed to always
  * get a short read for the last read of each frame.  Frame sizes are
diff --git a/drivers/media/video/pwc/philips.txt b/drivers/media/video/pwc/philips.txt
index f9f3584281d8..d38dd791511e 100644
--- a/drivers/media/video/pwc/philips.txt
+++ b/drivers/media/video/pwc/philips.txt
@@ -33,7 +33,7 @@ a lot of extra information, a FAQ, and the binary plugin 'PWCX'. This plugin
 contains decompression routines that allow you to use higher image sizes and
 framerates; in addition the webcam uses less bandwidth on the USB bus (handy
 if you want to run more than 1 camera simultaneously). These routines fall
-under a NDA, and may therefor not be distributed as source; however, its use
+under a NDA, and may therefore not be distributed as source; however, its use
 is completely optional.
 
 You can build this code either into your kernel, or as a module. I recommend
diff --git a/drivers/media/video/sn9c102/sn9c102_sensor.h b/drivers/media/video/sn9c102/sn9c102_sensor.h
index 4af7382da5c5..494957b10bac 100644
--- a/drivers/media/video/sn9c102/sn9c102_sensor.h
+++ b/drivers/media/video/sn9c102/sn9c102_sensor.h
@@ -120,7 +120,7 @@ extern int sn9c102_write_regs(struct sn9c102_device*, const u8 valreg[][2],
 /*
    Write multiple registers with constant values. For example:
    sn9c102_write_const_regs(cam, {0x00, 0x14}, {0x60, 0x17}, {0x0f, 0x18});
-   Register adresses must be < 256.
+   Register addresses must be < 256.
 */
 #define sn9c102_write_const_regs(sn9c102_device, data...)                     \
 	({ static const u8 _valreg[][2] = {data};                             \
diff --git a/drivers/media/video/tea6420.c b/drivers/media/video/tea6420.c
index 0446524d3543..6bf6bc7dbc7f 100644
--- a/drivers/media/video/tea6420.c
+++ b/drivers/media/video/tea6420.c
@@ -6,7 +6,7 @@
 
     The tea6420 is a bus controlled audio-matrix with 5 stereo inputs,
     4 stereo outputs and gain control for each output.
-    It is cascadable, i.e. it can be found at the adresses 0x98
+    It is cascadable, i.e. it can be found at the addresses 0x98
     and 0x9a on the i2c-bus.
 
     For detailed informations download the specifications directly
diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index 0cc5eeff5ee8..10491e4e305d 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -523,7 +523,7 @@ unsigned long sm501_set_clock(struct device *dev,
 	unsigned long clock = readl(sm->regs + SM501_CURRENT_CLOCK);
 	unsigned char reg;
 	unsigned int pll_reg = 0;
-	unsigned long sm501_freq; /* the actual frequency acheived */
+	unsigned long sm501_freq; /* the actual frequency achieved */
 
 	struct sm501_clock to;
 
@@ -533,7 +533,7 @@ unsigned long sm501_set_clock(struct device *dev,
 
 	switch (clksrc) {
 	case SM501_CLOCK_P2XCLK:
-		/* This clock is divided in half so to achive the
+		/* This clock is divided in half so to achieve the
 		 * requested frequency the value must be multiplied by
 		 * 2. This clock also has an additional pre divisor */
 
@@ -562,7 +562,7 @@ unsigned long sm501_set_clock(struct device *dev,
 		break;
 
 	case SM501_CLOCK_V2XCLK:
-		/* This clock is divided in half so to achive the
+		/* This clock is divided in half so to achieve the
 		 * requested frequency the value must be multiplied by 2. */
 
 		sm501_freq = (sm501_select_clock(2 * req_freq, &to, 3) / 2);
@@ -648,7 +648,7 @@ unsigned long sm501_find_clock(struct device *dev,
 			       unsigned long req_freq)
 {
 	struct sm501_devdata *sm = dev_get_drvdata(dev);
-	unsigned long sm501_freq; /* the frequency achiveable by the 501 */
+	unsigned long sm501_freq; /* the frequency achieveable by the 501 */
 	struct sm501_clock to;
 
 	switch (clksrc) {
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index 60a2b69e54f5..16cc91c827c9 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -4,7 +4,7 @@
  *  This is a driver for the SDHC controller found in Freescale MX2/MX3
  *  SoCs. It is basically the same hardware as found on MX1 (imxmmc.c).
  *  Unlike the hardware found on MX1, this hardware just works and does
- *  not need all the quirks found in imxmmc.c, hence the seperate driver.
+ *  not need all the quirks found in imxmmc.c, hence the separate driver.
  *
  *  Copyright (C) 2008 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
  *  Copyright (C) 2006 Pavel Pisa, PiKRON <ppisa@pikron.com>
diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c
index 1bec5e1ce6ac..8db1148dfa47 100644
--- a/drivers/mtd/chips/jedec_probe.c
+++ b/drivers/mtd/chips/jedec_probe.c
@@ -226,7 +226,7 @@ struct unlock_addr {
  * exists, but is for MTD_UADDR_NOT_SUPPORTED - and, therefore,
  * should not be used.  The  problem is that structures with
  * initializers have extra fields initialized to 0.  It is _very_
- * desireable to have the unlock address entries for unsupported
+ * desirable to have the unlock address entries for unsupported
  * data widths automatically initialized - that means that
  * MTD_UADDR_NOT_SUPPORTED must be 0 and the first entry here
  * must go unused.
diff --git a/drivers/mtd/nand/bcm_umi_nand.c b/drivers/mtd/nand/bcm_umi_nand.c
index 087bcd745bb7..7d1cca7a31a9 100644
--- a/drivers/mtd/nand/bcm_umi_nand.c
+++ b/drivers/mtd/nand/bcm_umi_nand.c
@@ -381,7 +381,7 @@ static int __devinit bcm_umi_nand_probe(struct platform_device *pdev)
 	if (!r)
 		return -ENXIO;
 
-	/* map physical adress */
+	/* map physical address */
 	bcm_umi_io_base = ioremap(r->start, r->end - r->start + 1);
 
 	if (!bcm_umi_io_base) {
@@ -525,7 +525,7 @@ static int bcm_umi_nand_remove(struct platform_device *pdev)
 	/* Release resources, unregister device */
 	nand_release(board_mtd);
 
-	/* unmap physical adress */
+	/* unmap physical address */
 	iounmap(bcm_umi_io_base);
 
 	/* Free the MTD device structure */
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index 45dec5770da0..b2900d8406d3 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -507,7 +507,7 @@ static void mxc_do_addr_cycle(struct mtd_info *mtd, int column, int page_addr)
 		 * MXC NANDFC can only perform full page+spare or
 		 * spare-only read/write.  When the upper layers
 		 * layers perform a read/write buf operation,
-		 * we will used the saved column adress to index into
+		 * we will used the saved column address to index into
 		 * the full page.
 		 */
 		send_addr(host, 0, page_addr == -1);
diff --git a/drivers/net/atlx/atl2.h b/drivers/net/atlx/atl2.h
index d918bbe621ea..927e4de6474d 100644
--- a/drivers/net/atlx/atl2.h
+++ b/drivers/net/atlx/atl2.h
@@ -442,7 +442,7 @@ struct atl2_hw {
 struct atl2_ring_header {
     /* pointer to the descriptor ring memory */
     void *desc;
-    /* physical adress of the descriptor ring */
+    /* physical address of the descriptor ring */
     dma_addr_t dma;
     /* length of descriptor ring in bytes */
     unsigned int size;
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 109d2783e4d8..bef02330464d 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -248,7 +248,7 @@ static void restart_sched(unsigned long);
  *
  * Interrupts are handled by a single CPU and it is likely that on a MP system
  * the application is migrated to another CPU. In that scenario, we try to
- * seperate the RX(in irq context) and TX state in order to decrease memory
+ * separate the RX(in irq context) and TX state in order to decrease memory
  * contention.
  */
 struct sge {
diff --git a/drivers/net/e1000e/82571.c b/drivers/net/e1000e/82571.c
index 02d67d047d96..dc4eb87309c5 100644
--- a/drivers/net/e1000e/82571.c
+++ b/drivers/net/e1000e/82571.c
@@ -1363,7 +1363,7 @@ static s32 e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw)
  *
  *  1) down
  *  2) autoneg_progress
- *  3) autoneg_complete (the link sucessfully autonegotiated)
+ *  3) autoneg_complete (the link successfully autonegotiated)
  *  4) forced_up (the link has been forced up, it did not autonegotiate)
  *
  **/
diff --git a/drivers/net/e1000e/lib.c b/drivers/net/e1000e/lib.c
index 2fa9b36a2c5a..3af0b1b82832 100644
--- a/drivers/net/e1000e/lib.c
+++ b/drivers/net/e1000e/lib.c
@@ -587,7 +587,7 @@ s32 e1000e_check_for_serdes_link(struct e1000_hw *hw)
 				if (!(rxcw & E1000_RXCW_IV)) {
 					mac->serdes_has_link = true;
 					e_dbg("SERDES: Link up - autoneg "
-					   "completed sucessfully.\n");
+					   "completed successfully.\n");
 				} else {
 					mac->serdes_has_link = false;
 					e_dbg("SERDES: Link down - invalid"
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 997124d2992a..e2ce8f8a21a7 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -674,7 +674,7 @@ static void igb_set_interrupt_capability(struct igb_adapter *adapter)
 	/* start with one vector for every rx queue */
 	numvecs = adapter->num_rx_queues;
 
-	/* if tx handler is seperate add 1 for every tx queue */
+	/* if tx handler is separate add 1 for every tx queue */
 	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
 		numvecs += adapter->num_tx_queues;
 
diff --git a/drivers/net/irda/sa1100_ir.c b/drivers/net/irda/sa1100_ir.c
index c412e8026173..1dcdce0631aa 100644
--- a/drivers/net/irda/sa1100_ir.c
+++ b/drivers/net/irda/sa1100_ir.c
@@ -331,7 +331,7 @@ static int sa1100_irda_resume(struct platform_device *pdev)
 		 * If we missed a speed change, initialise at the new speed
 		 * directly.  It is debatable whether this is actually
 		 * required, but in the interests of continuing from where
-		 * we left off it is desireable.  The converse argument is
+		 * we left off it is desirable.  The converse argument is
 		 * that we should re-negotiate at 9600 baud again.
 		 */
 		if (si->newspeed) {
diff --git a/drivers/net/qlge/qlge_ethtool.c b/drivers/net/qlge/qlge_ethtool.c
index 058fa0a48c6f..b8d21ab212c6 100644
--- a/drivers/net/qlge/qlge_ethtool.c
+++ b/drivers/net/qlge/qlge_ethtool.c
@@ -402,7 +402,7 @@ static int ql_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
 		u32 wol = 0;
 		status = ql_mb_wol_mode(qdev, wol);
 		QPRINTK(qdev, DRV, ERR, "WOL %s (wol code 0x%x) on %s\n",
-			(status == 0) ? "cleared sucessfully" : "clear failed",
+			(status == 0) ? "cleared successfully" : "clear failed",
 			wol, qdev->ndev->name);
 	}
 
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index 894a7c84faef..a35845b48ea4 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -3517,7 +3517,7 @@ int ql_wol(struct ql_adapter *qdev)
 		wol |= MB_WOL_MODE_ON;
 		status = ql_mb_wol_mode(qdev, wol);
 		QPRINTK(qdev, DRV, ERR, "WOL %s (wol code 0x%x) on %s\n",
-			(status == 0) ? "Sucessfully set" : "Failed", wol,
+			(status == 0) ? "Successfully set" : "Failed", wol,
 			qdev->ndev->name);
 	}
 
diff --git a/drivers/net/sfc/regs.h b/drivers/net/sfc/regs.h
index 89d606fe9248..18a3be428348 100644
--- a/drivers/net/sfc/regs.h
+++ b/drivers/net/sfc/regs.h
@@ -95,7 +95,7 @@
 #define	FRF_AA_INT_ACK_KER_FIELD_LBN 0
 #define	FRF_AA_INT_ACK_KER_FIELD_WIDTH 32
 
-/* INT_ISR0_REG: Function 0 Interrupt Acknowlege Status register */
+/* INT_ISR0_REG: Function 0 Interrupt Acknowledge Status register */
 #define	FR_BZ_INT_ISR0 0x00000090
 #define	FRF_BZ_INT_ISR_REG_LBN 0
 #define	FRF_BZ_INT_ISR_REG_WIDTH 64
diff --git a/drivers/net/smsc9420.c b/drivers/net/smsc9420.c
index 12f0f5d74e3c..2ae1972bcb46 100644
--- a/drivers/net/smsc9420.c
+++ b/drivers/net/smsc9420.c
@@ -1348,7 +1348,7 @@ static int smsc9420_open(struct net_device *dev)
 
 	netif_carrier_off(dev);
 
-	/* disable, mask and acknowlege all interrupts */
+	/* disable, mask and acknowledge all interrupts */
 	spin_lock_irqsave(&pd->int_lock, flags);
 	int_cfg = smsc9420_reg_read(pd, INT_CFG) & (~INT_CFG_IRQ_EN_);
 	smsc9420_reg_write(pd, INT_CFG, int_cfg);
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index 218524857bfc..839b1f065d3c 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -474,7 +474,7 @@ spider_net_prepare_rx_descr(struct spider_net_card *card,
  * spider_net_enable_rxchtails - sets RX dmac chain tail addresses
  * @card: card structure
  *
- * spider_net_enable_rxchtails sets the RX DMAC chain tail adresses in the
+ * spider_net_enable_rxchtails sets the RX DMAC chain tail addresses in the
  * chip by writing to the appropriate register. DMA is enabled in
  * spider_net_enable_rxdmac.
  */
@@ -1820,7 +1820,7 @@ spider_net_enable_card(struct spider_net_card *card)
 
 	spider_net_write_reg(card, SPIDER_NET_ECMODE, SPIDER_NET_ECMODE_VALUE);
 
-	/* set chain tail adress for RX chains and
+	/* set chain tail address for RX chains and
 	 * enable DMA */
 	spider_net_enable_rxchtails(card);
 	spider_net_enable_rxdmac(card);
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index b571a1babab9..a88fcb39ba15 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -782,7 +782,7 @@ static int gem_rx(struct gem *gp, int work_to_do)
 			break;
 
 		/* When writing back RX descriptor, GEM writes status
-		 * then buffer address, possibly in seperate transactions.
+		 * then buffer address, possibly in separate transactions.
 		 * If we don't wait for the chip to write both, we could
 		 * post a new buffer to this descriptor then have GEM spam
 		 * on the buffer address.  We sync on the RX completion
diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c
index 80b404f2b938..ed4e9c42935c 100644
--- a/drivers/net/tehuti.c
+++ b/drivers/net/tehuti.c
@@ -1857,7 +1857,7 @@ static void bdx_tx_push_desc(struct bdx_priv *priv, void *data, int size)
  * @data - desc's data
  * @size - desc's size
  *
- * NOTE: this func does check for available space and, if neccessary, waits for
+ * NOTE: this func does check for available space and, if necessary, waits for
  *   NIC to read existing data before writing new one.
  */
 static void bdx_tx_push_desc_safe(struct bdx_priv *priv, void *data, int size)
diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c
index e3c42f5ac4a9..4e4c402319c9 100644
--- a/drivers/net/tokenring/tms380tr.c
+++ b/drivers/net/tokenring/tms380tr.c
@@ -693,7 +693,7 @@ static netdev_tx_t tms380tr_hardware_send_packet(struct sk_buff *skb,
  * NOTE: This function should be used whenever the status of any TPL must be
  * modified by the driver, because the compiler may otherwise change the
  * order of instructions such that writing the TPL status may be executed at
- * an undesireable time. When this function is used, the status is always
+ * an undesirable time. When this function is used, the status is always
  * written when the function is called.
  */
 static void tms380tr_write_tpl_status(TPL *tpl, unsigned int Status)
@@ -2266,7 +2266,7 @@ static void tms380tr_rcv_status_irq(struct net_device *dev)
  * This function should be used whenever the status of any RPL must be
  * modified by the driver, because the compiler may otherwise change the
  * order of instructions such that writing the RPL status may be executed
- * at an undesireable time. When this function is used, the status is
+ * at an undesirable time. When this function is used, the status is
  * always written when the function is called.
  */
 static void tms380tr_write_rpl_status(RPL *rpl, unsigned int Status)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2834a01bae24..e572ecc09a44 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1365,7 +1365,7 @@ static int tun_chr_close(struct inode *inode, struct file *file)
 
 		__tun_detach(tun);
 
-		/* If desireable, unregister the netdevice. */
+		/* If desirable, unregister the netdevice. */
 		if (!(tun->flags & TUN_PERSIST)) {
 			rtnl_lock();
 			if (dev->reg_state == NETREG_REGISTERED)
diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
index eb8fe7e16c6c..8eec97799e05 100644
--- a/drivers/net/ucc_geth.c
+++ b/drivers/net/ucc_geth.c
@@ -429,7 +429,7 @@ static void hw_add_addr_in_hash(struct ucc_geth_private *ugeth,
 	    ucc_fast_get_qe_cr_subblock(ugeth->ug_info->uf_info.ucc_num);
 
 	/* Ethernet frames are defined in Little Endian mode,
-	therefor to insert */
+	therefore to insert */
 	/* the address to the hash (Big Endian mode), we reverse the bytes.*/
 
 	set_mac_addr(&p_82xx_addr_filt->taddr.h, p_enet_addr);
diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c
index 64cdfeb299ca..6c459f5cb5df 100644
--- a/drivers/net/wimax/i2400m/fw.c
+++ b/drivers/net/wimax/i2400m/fw.c
@@ -612,7 +612,7 @@ ssize_t i2400m_bm_cmd(struct i2400m *i2400m,
 		goto error_wait_for_ack;
 	}
 	rx_bytes = result;
-	/* verify the ack and read more if neccessary [result is the
+	/* verify the ack and read more if necessary [result is the
 	 * final amount of bytes we get in the ack]  */
 	result = __i2400m_bm_ack_verify(i2400m, opcode, ack, ack_size, flags);
 	if (result < 0)
diff --git a/drivers/net/wimax/i2400m/i2400m.h b/drivers/net/wimax/i2400m/i2400m.h
index 04df9bbe340f..820b128705ec 100644
--- a/drivers/net/wimax/i2400m/i2400m.h
+++ b/drivers/net/wimax/i2400m/i2400m.h
@@ -627,7 +627,7 @@ enum i2400m_bm_cmd_flags {
  * @I2400M_BRI_NO_REBOOT: Do not reboot the device and proceed
  *     directly to wait for a reboot barker from the device.
  * @I2400M_BRI_MAC_REINIT: We need to reinitialize the boot
- *     rom after reading the MAC adress. This is quite a dirty hack,
+ *     rom after reading the MAC address. This is quite a dirty hack,
  *     if you ask me -- the device requires the bootrom to be
  *     intialized after reading the MAC address.
  */
diff --git a/drivers/net/wimax/i2400m/sdio.c b/drivers/net/wimax/i2400m/sdio.c
index 76a50ac02ebb..14f876b1358b 100644
--- a/drivers/net/wimax/i2400m/sdio.c
+++ b/drivers/net/wimax/i2400m/sdio.c
@@ -304,7 +304,7 @@ error_kzalloc:
  *
  * The device will be fully reset internally, but won't be
  * disconnected from the bus (so no reenumeration will
- * happen). Firmware upload will be neccessary.
+ * happen). Firmware upload will be necessary.
  *
  * The device will send a reboot barker that will trigger the driver
  * to reinitialize the state via __i2400m_dev_reset_handle.
@@ -314,7 +314,7 @@ error_kzalloc:
  *
  * The device will be fully reset internally, disconnected from the
  * bus an a reenumeration will happen. Firmware upload will be
- * neccessary. Thus, we don't do any locking or struct
+ * necessary. Thus, we don't do any locking or struct
  * reinitialization, as we are going to be fully disconnected and
  * reenumerated.
  *
diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c
index 98f4f8c5fb68..99f04c475898 100644
--- a/drivers/net/wimax/i2400m/usb.c
+++ b/drivers/net/wimax/i2400m/usb.c
@@ -246,7 +246,7 @@ error_kzalloc:
  *
  * The device will be fully reset internally, but won't be
  * disconnected from the USB bus (so no reenumeration will
- * happen). Firmware upload will be neccessary.
+ * happen). Firmware upload will be necessary.
  *
  * The device will send a reboot barker in the notification endpoint
  * that will trigger the driver to reinitialize the state
@@ -257,7 +257,7 @@ error_kzalloc:
  *
  * The device will be fully reset internally, disconnected from the
  * USB bus an a reenumeration will happen. Firmware upload will be
- * neccessary. Thus, we don't do any locking or struct
+ * necessary. Thus, we don't do any locking or struct
  * reinitialization, as we are going to be fully disconnected and
  * reenumerated.
  *
diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c
index f9d6db8d013e..46a1e19c6787 100644
--- a/drivers/net/wireless/ath/ar9170/main.c
+++ b/drivers/net/wireless/ath/ar9170/main.c
@@ -2535,7 +2535,7 @@ void *ar9170_alloc(size_t priv_size)
 	/*
 	 * this buffer is used for rx stream reconstruction.
 	 * Under heavy load this device (or the transport layer?)
-	 * tends to split the streams into seperate rx descriptors.
+	 * tends to split the streams into separate rx descriptors.
 	 */
 
 	skb = __dev_alloc_skb(AR9170_MAX_RX_BUFFER_SIZE, GFP_KERNEL);
diff --git a/drivers/net/wireless/iwmc3200wifi/lmac.h b/drivers/net/wireless/iwmc3200wifi/lmac.h
index a3a79b5e2898..a855a99e49b8 100644
--- a/drivers/net/wireless/iwmc3200wifi/lmac.h
+++ b/drivers/net/wireless/iwmc3200wifi/lmac.h
@@ -262,7 +262,7 @@ struct iwm_ct_kill_cfg_cmd {
 
 /* Power Management */
 #define POWER_TABLE_CMD			0x77
-#define SAVE_RESTORE_ADRESS_CMD		0x78
+#define SAVE_RESTORE_ADDRESS_CMD		0x78
 #define REPLY_WATERMARK_CMD		0x79
 #define PM_DEBUG_STATISTIC_NOTIFIC	0x7B
 #define PD_FLUSH_N_NOTIFICATION		0x7C
diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c
index 83f2592c59de..486c93559c29 100644
--- a/drivers/net/wireless/rt2x00/rt2500usb.c
+++ b/drivers/net/wireless/rt2x00/rt2500usb.c
@@ -368,7 +368,7 @@ static int rt2500usb_config_key(struct rt2x00_dev *rt2x00dev,
 
 		/*
 		 * The encryption key doesn't fit within the CSR cache,
-		 * this means we should allocate it seperately and use
+		 * this means we should allocate it separately and use
 		 * rt2x00usb_vendor_request() to send the key to the hardware.
 		 */
 		reg = KEY_ENTRY(key->hw_key_idx);
@@ -382,7 +382,7 @@ static int rt2500usb_config_key(struct rt2x00_dev *rt2x00dev,
 		/*
 		 * The driver does not support the IV/EIV generation
 		 * in hardware. However it demands the data to be provided
-		 * both seperately as well as inside the frame.
+		 * both separately as well as inside the frame.
 		 * We already provided the CONFIG_CRYPTO_COPY_IV to rt2x00lib
 		 * to ensure rt2x00lib will not strip the data from the
 		 * frame after the copy, now we must tell mac80211
diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
index ab95346cf6a3..2e5c8a13758b 100644
--- a/drivers/net/wireless/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/rt2x00/rt2800usb.c
@@ -100,7 +100,7 @@ static int rt2800usb_check_firmware(struct rt2x00_dev *rt2x00dev,
 	 * There are 2 variations of the rt2870 firmware.
 	 * a) size: 4kb
 	 * b) size: 8kb
-	 * Note that (b) contains 2 seperate firmware blobs of 4k
+	 * Note that (b) contains 2 separate firmware blobs of 4k
 	 * within the file. The first blob is the same firmware as (a),
 	 * but the second blob is for the additional chipsets.
 	 */
@@ -118,7 +118,7 @@ static int rt2800usb_check_firmware(struct rt2x00_dev *rt2x00dev,
 
 	/*
 	 * 8kb firmware files must be checked as if it were
-	 * 2 seperate firmware files.
+	 * 2 separate firmware files.
 	 */
 	while (offset < len) {
 		if (!rt2800usb_check_crc(data + offset, 4096))
diff --git a/drivers/net/wireless/rt2x00/rt2x00debug.c b/drivers/net/wireless/rt2x00/rt2x00debug.c
index 7d323a763b54..afee806affc2 100644
--- a/drivers/net/wireless/rt2x00/rt2x00debug.c
+++ b/drivers/net/wireless/rt2x00/rt2x00debug.c
@@ -109,7 +109,7 @@ struct rt2x00debug_intf {
 
 	/*
 	 * HW crypto statistics.
-	 * All statistics are stored seperately per cipher type.
+	 * All statistics are stored separately per cipher type.
 	 */
 	struct rt2x00debug_crypto crypto_stats[CIPHER_MAX];
 
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 265e66dba552..5e1d5167fff4 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -397,7 +397,7 @@ void rt2x00lib_rxdone(struct rt2x00_dev *rt2x00dev,
 	/*
 	 * Hardware might have stripped the IV/EIV/ICV data,
 	 * in that case it is possible that the data was
-	 * provided seperately (through hardware descriptor)
+	 * provided separately (through hardware descriptor)
 	 * in which case we should reinsert the data into the frame.
 	 */
 	if ((rxdesc.dev_flags & RXDONE_CRYPTO_IV) &&
diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c
index 9915a09141ef..38ffca9b0fe7 100644
--- a/drivers/net/wireless/rt2x00/rt2x00queue.c
+++ b/drivers/net/wireless/rt2x00/rt2x00queue.c
@@ -502,7 +502,7 @@ int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb,
 	/*
 	 * When hardware encryption is supported, and this frame
 	 * is to be encrypted, we should strip the IV/EIV data from
-	 * the frame so we can provide it to the driver seperately.
+	 * the frame so we can provide it to the driver separately.
 	 */
 	if (test_bit(ENTRY_TXD_ENCRYPT, &txdesc.flags) &&
 	    !test_bit(ENTRY_TXD_ENCRYPT_IV, &txdesc.flags)) {
diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c
index 0ca589306d71..99459db61efd 100644
--- a/drivers/net/wireless/rt2x00/rt61pci.c
+++ b/drivers/net/wireless/rt2x00/rt61pci.c
@@ -476,7 +476,7 @@ static int rt61pci_config_pairwise_key(struct rt2x00_dev *rt2x00dev,
 		 * The driver does not support the IV/EIV generation
 		 * in hardware. However it doesn't support the IV/EIV
 		 * inside the ieee80211 frame either, but requires it
-		 * to be provided seperately for the descriptor.
+		 * to be provided separately for the descriptor.
 		 * rt2x00lib will cut the IV/EIV data out of all frames
 		 * given to us by mac80211, but we must tell mac80211
 		 * to generate the IV/EIV data.
diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c
index ced3b6ab5e16..527368a45fd5 100644
--- a/drivers/net/wireless/rt2x00/rt73usb.c
+++ b/drivers/net/wireless/rt2x00/rt73usb.c
@@ -339,7 +339,7 @@ static int rt73usb_config_shared_key(struct rt2x00_dev *rt2x00dev,
 		 * The driver does not support the IV/EIV generation
 		 * in hardware. However it doesn't support the IV/EIV
 		 * inside the ieee80211 frame either, but requires it
-		 * to be provided seperately for the descriptor.
+		 * to be provided separately for the descriptor.
 		 * rt2x00lib will cut the IV/EIV data out of all frames
 		 * given to us by mac80211, but we must tell mac80211
 		 * to generate the IV/EIV data.
@@ -439,7 +439,7 @@ static int rt73usb_config_pairwise_key(struct rt2x00_dev *rt2x00dev,
 		 * The driver does not support the IV/EIV generation
 		 * in hardware. However it doesn't support the IV/EIV
 		 * inside the ieee80211 frame either, but requires it
-		 * to be provided seperately for the descriptor.
+		 * to be provided separately for the descriptor.
 		 * rt2x00lib will cut the IV/EIV data out of all frames
 		 * given to us by mac80211, but we must tell mac80211
 		 * to generate the IV/EIV data.
@@ -1665,7 +1665,7 @@ static void rt73usb_fill_rxdone(struct queue_entry *entry,
 
 		/*
 		 * Hardware has stripped IV/EIV data from 802.11 frame during
-		 * decryption. It has provided the data seperately but rt2x00lib
+		 * decryption. It has provided the data separately but rt2x00lib
 		 * should decide if it should be reinserted.
 		 */
 		rxdesc->flags |= RX_FLAG_IV_STRIPPED;
diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c
index 62ddf5202b79..2a4c566456e7 100644
--- a/drivers/s390/char/raw3270.c
+++ b/drivers/s390/char/raw3270.c
@@ -373,7 +373,7 @@ raw3270_irq (struct ccw_device *cdev, unsigned long intparm, struct irb *irb)
 		rq->rc = ccw_device_start(rp->cdev, &rq->ccw,
 					  (unsigned long) rq, 0, 0);
 		if (rq->rc == 0)
-			return;	/* Sucessfully restarted. */
+			return;	/* Successfully restarted. */
 		break;
 	case RAW3270_IO_STOP:
 		if (!rq)
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index ec88c59842e3..f6d72e1f2a38 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -196,7 +196,7 @@ __sclp_start_request(struct sclp_req *req)
 	req->start_count++;
 
 	if (rc == 0) {
-		/* Sucessfully started request */
+		/* Successfully started request */
 		req->status = SCLP_REQ_RUNNING;
 		sclp_running_state = sclp_running_state_running;
 		__sclp_set_request_timer(SCLP_RETRY_INTERVAL * HZ,
diff --git a/drivers/scsi/a100u2w.c b/drivers/scsi/a100u2w.c
index 208d6df9ed59..ff5716d5f044 100644
--- a/drivers/scsi/a100u2w.c
+++ b/drivers/scsi/a100u2w.c
@@ -492,7 +492,7 @@ static void init_alloc_map(struct orc_host * host)
  *	init_orchid		-	initialise the host adapter
  *	@host:host adapter to initialise
  *
- *	Initialise the controller and if neccessary load the firmware.
+ *	Initialise the controller and if necessary load the firmware.
  *
  *	Returns -1 if the initialisation fails.
  */
diff --git a/drivers/scsi/initio.c b/drivers/scsi/initio.c
index 89a59484be02..a7714160fbc3 100644
--- a/drivers/scsi/initio.c
+++ b/drivers/scsi/initio.c
@@ -531,7 +531,7 @@ static void initio_read_eeprom(unsigned long base)
  *	initio_stop_bm		-	stop bus master
  *	@host: InitIO we are stopping
  *
- *	Stop any pending DMA operation, aborting the DMA if neccessary
+ *	Stop any pending DMA operation, aborting the DMA if necessary
  */
 
 static void initio_stop_bm(struct initio_host * host)
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index 96ee599d9a05..96446a85e008 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -48,7 +48,7 @@ struct kmem_cache *scsi_pkt_cachep;
 #define FC_SRB_CMD_SENT		(1 << 0)	/* cmd has been sent */
 #define FC_SRB_RCV_STATUS	(1 << 1)	/* response has arrived */
 #define FC_SRB_ABORT_PENDING	(1 << 2)	/* cmd abort sent to device */
-#define FC_SRB_ABORTED		(1 << 3)	/* abort acknowleged */
+#define FC_SRB_ABORTED		(1 << 3)	/* abort acknowledged */
 #define FC_SRB_DISCONTIG	(1 << 4)	/* non-sequential data recvd */
 #define FC_SRB_COMPL		(1 << 5)	/* fc_io_compl has been run */
 #define FC_SRB_FCP_PROCESSING_TMO (1 << 6)	/* timer function processing */
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 2cc39684ce97..c898f47f30ba 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -969,7 +969,7 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
  * function returns, it does not guarantee all the IOCBs are actually aborted.
  *
  * Return code
- *   0 - Sucessfully issued abort iocb on all outstanding flogis (Always 0)
+ *   0 - Successfully issued abort iocb on all outstanding flogis (Always 0)
  **/
 int
 lpfc_els_abort_flogi(struct lpfc_hba *phba)
@@ -3117,7 +3117,7 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	if (ndlp && NLP_CHK_NODE_ACT(ndlp) &&
 	    (*((uint32_t *) (pcmd)) == ELS_CMD_LS_RJT)) {
 		/* A LS_RJT associated with Default RPI cleanup has its own
-		 * seperate code path.
+		 * separate code path.
 		 */
 		if (!(ndlp->nlp_flag & NLP_RM_DFLT_RPI))
 			ls_rjt = 1;
diff --git a/drivers/scsi/pcmcia/nsp_cs.h b/drivers/scsi/pcmcia/nsp_cs.h
index 7db28cd49446..8c61a4fe1db9 100644
--- a/drivers/scsi/pcmcia/nsp_cs.h
+++ b/drivers/scsi/pcmcia/nsp_cs.h
@@ -187,7 +187,7 @@
 #define S_IO		BIT(1)    /* Input/Output line from SCSI bus */
 #define S_CD		BIT(2)    /* Command/Data line from SCSI bus */
 #define S_BUSY		BIT(3)    /* Busy line from SCSI bus         */
-#define S_ACK		BIT(4)    /* Acknowlege line from SCSI bus   */
+#define S_ACK		BIT(4)    /* Acknowledge line from SCSI bus  */
 #define S_REQUEST	BIT(5)    /* Request line from SCSI bus      */
 #define S_SELECT	BIT(6)	  /*                                 */
 #define S_ATN		BIT(7)	  /*                                 */
diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
index 9b44c6f1b10e..7985ae45d688 100644
--- a/drivers/scsi/pm8001/pm8001_hwi.c
+++ b/drivers/scsi/pm8001/pm8001_hwi.c
@@ -2924,7 +2924,7 @@ hw_event_sas_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb)
 		break;
 	default:
 		PM8001_MSG_DBG(pm8001_ha,
-			pm8001_printk("unkown device type(%x)\n", deviceType));
+			pm8001_printk("unknown device type(%x)\n", deviceType));
 		break;
 	}
 	phy->phy_type |= PORT_TYPE_SAS;
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index 7f9c83a76390..3b2c98fba834 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -600,7 +600,7 @@ static void pm8001_free_dev(struct pm8001_device *pm8001_dev)
   * by the command "OPC_INB_REG_DEV", after that the HBA will assign a
   * device ID(according to device's sas address) and returned it to LLDD. From
   * now on, we communicate with HBA FW with the device ID which HBA assigned
-  * rather than sas address. it is the neccessary step for our HBA but it is
+  * rather than sas address. it is the necessary step for our HBA but it is
   * the optional for other HBA driver.
   */
 static int pm8001_dev_found_notify(struct domain_device *dev)
diff --git a/drivers/scsi/pmcraid.h b/drivers/scsi/pmcraid.h
index 92f89d50850c..b8ad07c3449e 100644
--- a/drivers/scsi/pmcraid.h
+++ b/drivers/scsi/pmcraid.h
@@ -938,7 +938,7 @@ static struct pmcraid_ioasc_error pmcraid_ioasc_error_table[] = {
 
 /*
  * pmcraid_ioctl_header - definition of header structure that preceeds all the
- * buffers given as ioctl arguements.
+ * buffers given as ioctl arguments.
  *
  * .signature           : always ASCII string, "PMCRAID"
  * .reserved            : not used
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 255da53e5a01..5d94772d449d 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2105,7 +2105,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
  *	which is followed by sdaaa.
  *
  *	This is basically 26 base counting with one extra 'nil' entry
- *	at the beggining from the second digit on and can be
+ *	at the beginning from the second digit on and can be
  *	determined using similar method as 26 base conversion with the
  *	index shifted -1 after each digit is computed.
  *
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index c010733877ae..1fabede9e061 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -275,7 +275,7 @@ static inline u32 ack_bit(unsigned int irq)
  * Claim the FIQ handler (only one can be active at any one time) and
  * then setup the correct transfer code for this transfer.
  *
- * This call updates all the necessary state information if sucessful,
+ * This call updates all the necessary state information if successful,
  * so the caller does not need to do anything more than start the transfer
  * as normal, since the IRQ will have been re-routed to the FIQ handler.
 */
diff --git a/drivers/usb/musb/musb_regs.h b/drivers/usb/musb/musb_regs.h
index 473a94ef905f..7c14d5c5a8ac 100644
--- a/drivers/usb/musb/musb_regs.h
+++ b/drivers/usb/musb/musb_regs.h
@@ -436,7 +436,7 @@ static inline void  musb_write_txhubport(void __iomem *mbase, u8 epnum,
 #define MUSB_FLAT_OFFSET(_epnum, _offset)	\
 	(USB_OFFSET(USB_EP_NI0_TXMAXP) + (0x40 * (_epnum)) + (_offset))
 
-/* Not implemented - HW has seperate Tx/Rx FIFO */
+/* Not implemented - HW has separate Tx/Rx FIFO */
 #define MUSB_TXCSR_MODE			0x0000
 
 static inline void musb_write_txfifosz(void __iomem *mbase, u8 c_size)
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index a591ebec0f89..52a81a312b86 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -154,7 +154,7 @@ struct cypress_private {
 	int isthrottled;		   /* if throttled, discard reads */
 	wait_queue_head_t delta_msr_wait;  /* used for TIOCMIWAIT */
 	char prev_status, diff_status;	   /* used for TIOCMIWAIT */
-	/* we pass a pointer to this as the arguement sent to
+	/* we pass a pointer to this as the argument sent to
 	   cypress_set_termios old_termios */
 	struct ktermios tmp_termios; 	   /* stores the old termios settings */
 };
diff --git a/drivers/video/omap/lcdc.c b/drivers/video/omap/lcdc.c
index a33483910dc8..9557f963662e 100644
--- a/drivers/video/omap/lcdc.c
+++ b/drivers/video/omap/lcdc.c
@@ -389,7 +389,7 @@ static int omap_lcdc_enable_plane(int plane, int enable)
 /*
  * Configure the LCD DMA for a palette load operation and do the palette
  * downloading synchronously. We don't use the frame+palette load mode of
- * the controller, since the palette can always be downloaded seperately.
+ * the controller, since the palette can always be downloaded separately.
  */
 static void load_palette(void)
 {
diff --git a/drivers/video/s1d13xxxfb.c b/drivers/video/s1d13xxxfb.c
index 0deb0a8867b7..7b63429f1a7c 100644
--- a/drivers/video/s1d13xxxfb.c
+++ b/drivers/video/s1d13xxxfb.c
@@ -517,12 +517,12 @@ s1d13xxxfb_bitblt_copyarea(struct fb_info *info, const struct fb_copyarea *area)
 		src = (sy * stride) + (bpp * sx);
 	}
 
-	/* set source adress */
+	/* set source address */
 	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_SRC_START0, (src & 0xff));
 	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_SRC_START1, (src >> 8) & 0x00ff);
 	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_SRC_START2, (src >> 16) & 0x00ff);
 
-	/* set destination adress */
+	/* set destination address */
 	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_DST_START0, (dst & 0xff));
 	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_DST_START1, (dst >> 8) & 0x00ff);
 	s1d13xxxfb_writereg(info->par, S1DREG_BBLT_DST_START2, (dst >> 16) & 0x00ff);
diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c
index 35370d0ecf03..b7dc1800efa9 100644
--- a/drivers/video/sm501fb.c
+++ b/drivers/video/sm501fb.c
@@ -411,7 +411,7 @@ static int sm501fb_set_par_common(struct fb_info *info,
 	struct sm501fb_par  *par = info->par;
 	struct sm501fb_info *fbi = par->info;
 	unsigned long pixclock;      /* pixelclock in Hz */
-	unsigned long sm501pixclock; /* pixelclock the 501 can achive in Hz */
+	unsigned long sm501pixclock; /* pixelclock the 501 can achieve in Hz */
 	unsigned int mem_type;
 	unsigned int clock_type;
 	unsigned int head_addr;
diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index dc5ef14bdc1c..8306d53307ed 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -128,7 +128,7 @@ err_range:
 /*
  * Allocate a block in the given allocation zone.
  * Since we have to byte-swap the bitmap on little-endian
- * machines, this is rather expensive. Therefor we will
+ * machines, this is rather expensive. Therefore we will
  * preallocate up to 16 blocks from the same word, if
  * possible. We are not doing preallocations in the
  * header zone, though.
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 18d77297ccc8..364fcfc0c5df 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1393,7 +1393,7 @@ static inline void fill_note(struct memelfnote *note, const char *name, int type
 
 /*
  * fill up all the fields in prstatus from the given task struct, except
- * registers which need to be filled up seperately.
+ * registers which need to be filled up separately.
  */
 static void fill_prstatus(struct elf_prstatus *prstatus,
 			  struct task_struct *p, long signr)
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index b44ce0a0711c..b1d61d0bdfc7 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -54,7 +54,7 @@ void cifs_dfs_release_automount_timer(void)
  * Extracts sharename form full UNC.
  * i.e. strips from UNC trailing path that is not part of share
  * name and fixup missing '\' in the begining of DFS node refferal
- * if neccessary.
+ * if necessary.
  * Returns pointer to share name on success or ERR_PTR on error.
  * Caller is responsible for freeing returned string.
  */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 941441d3e386..0e22440d2f0f 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -3886,7 +3886,7 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
 		goto parse_DFS_referrals_exit;
 	}
 
-	/* collect neccessary data from referrals */
+	/* collect necessary data from referrals */
 	for (i = 0; i < *num_of_nodes; i++) {
 		char *temp;
 		int max_len;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 82c415be87a4..12a9ec73a888 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -928,7 +928,7 @@ out2:
 }
 
 /**
- * mext_check_argumants - Check whether move extent can be done
+ * mext_check_arguments - Check whether move extent can be done
  *
  * @orig_inode:		original inode
  * @donor_inode:	donor inode
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1a822ce2b24b..ec14d19ce501 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -850,7 +850,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 	req->in.args[0].size = sizeof(*arg);
 	req->in.args[0].value = arg;
 	req->out.numargs = 1;
-	/* Variable length arguement used for backward compatibility
+	/* Variable length argument used for backward compatibility
 	   with interface version < 7.5.  Rest of init_out is zeroed
 	   by do_get_request(), so a short reply is not a problem */
 	req->out.argvar = 1;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index edfee24f3636..0556f7fededd 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -992,7 +992,7 @@ static const struct lm_lockops nolock_ops = {
 /**
  * gfs2_lm_mount - mount a locking protocol
  * @sdp: the filesystem
- * @args: mount arguements
+ * @args: mount arguments
  * @silent: if 1, don't complain if the FS isn't a GFS2 fs
  *
  * Returns: errno
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 006f9ad838a2..57ae203c8abf 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1398,7 +1398,7 @@ int journal_stop(handle_t *handle)
 	 * the case where our storage is so fast that it is more optimal to go
 	 * ahead and force a flush and wait for the transaction to be committed
 	 * than it is to wait for an arbitrary amount of time for new writers to
-	 * join the transaction.  We acheive this by measuring how long it takes
+	 * join the transaction.  We achieve this by measuring how long it takes
 	 * to commit a transaction, and compare it with how long this
 	 * transaction has been running, and if run time < commit time then we
 	 * sleep for the delta and commit.  This greatly helps super fast disks
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index a8587e90fd5a..143d43a93b72 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1528,7 +1528,7 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c)
 	} } while (0);
 
 /* Encode as an array of strings the string given with components
- * seperated @sep.
+ * separated @sep.
  */
 static __be32 nfsd4_encode_components(char sep, char *components,
 				   __be32 **pp, int *buflen)
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index c5e4a49e3a12..ccb9c44f478d 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1757,7 +1757,7 @@ out:
  * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of
  * flock() calls. The locking approach this requires is sufficiently
  * different from all other cluster lock types that we implement a
- * seperate path to the "low-level" dlm calls. In particular:
+ * separate path to the "low-level" dlm calls. In particular:
  *
  * - No optimization of lock levels is done - we take at exactly
  *   what's been requested.
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index d35a27f4523e..83e9b1249aed 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -453,7 +453,7 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
 	if (i == -1) {
 		/*
 		 * Holes can be larger than the maximum size of an
-		 * extent, so we return their lengths in a seperate
+		 * extent, so we return their lengths in a separate
 		 * field.
 		 */
 		if (hole_len) {
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 65c872761177..ecc04b5ede57 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -169,7 +169,7 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
 			return 0;	// No free blocks in this bitmap
 		}
 
-		/* search for a first zero bit -- beggining of a window */
+		/* search for a first zero bit -- beginning of a window */
 		*beg = reiserfs_find_next_zero_le_bit
 		    ((unsigned long *)(bh->b_data), boundary, *beg);
 
diff --git a/include/linux/hil.h b/include/linux/hil.h
index 13352d7d0caf..523785a9de70 100644
--- a/include/linux/hil.h
+++ b/include/linux/hil.h
@@ -168,14 +168,14 @@ enum hil_command {
 	HIL_CMD_PR6	= 0x45,	/* Prompt6 */
 	HIL_CMD_PR7	= 0x46,	/* Prompt7 */
 	HIL_CMD_PRM	= 0x47,	/* Prompt (General Purpose) */
-	HIL_CMD_AK1	= 0x48,	/* Acknowlege1 */  
-	HIL_CMD_AK2	= 0x49,	/* Acknowlege2 */
-	HIL_CMD_AK3	= 0x4a,	/* Acknowlege3 */
-	HIL_CMD_AK4	= 0x4b,	/* Acknowlege4 */
-	HIL_CMD_AK5	= 0x4c,	/* Acknowlege5 */
-	HIL_CMD_AK6	= 0x4d,	/* Acknowlege6 */
-	HIL_CMD_AK7	= 0x4e,	/* Acknowlege7 */
-	HIL_CMD_ACK	= 0x4f,	/* Acknowlege (General Purpose) */
+	HIL_CMD_AK1	= 0x48,	/* Acknowledge1 */  
+	HIL_CMD_AK2	= 0x49,	/* Acknowledge2 */
+	HIL_CMD_AK3	= 0x4a,	/* Acknowledge3 */
+	HIL_CMD_AK4	= 0x4b,	/* Acknowledge4 */
+	HIL_CMD_AK5	= 0x4c,	/* Acknowledge5 */
+	HIL_CMD_AK6	= 0x4d,	/* Acknowledge6 */
+	HIL_CMD_AK7	= 0x4e,	/* Acknowledge7 */
+	HIL_CMD_ACK	= 0x4f,	/* Acknowledge (General Purpose) */
 
 	/* 0x50 to 0x78 reserved for future use  */
 	/* 0x80 to 0xEF device-specific commands */
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index 3a2b2d9b0472..de48d167568b 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -64,7 +64,7 @@ For crash recovery after replication node failure,
   usually the condition is softened to regions that _may_ have been target of
   in-flight WRITE IO, e.g. by only lazily clearing the on-disk write-intent
   bitmap, trading frequency of meta data transactions against amount of
-  (possibly unneccessary) resync traffic.
+  (possibly unnecessary) resync traffic.
 
   If we set a hard limit on the area that may be "hot" at any given time, we
   limit the amount of resync traffic needed for crash recovery.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index abdfacc58653..a70957b138ed 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1533,7 +1533,7 @@ struct task_struct {
 
 	struct list_head	*scm_work_list;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	/* Index of current stored adress in ret_stack */
+	/* Index of current stored address in ret_stack */
 	int curr_ret_stack;
 	/* Stack of return addresses for return function tracing */
 	struct ftrace_ret_stack	*ret_stack;
diff --git a/include/media/davinci/vpfe_capture.h b/include/media/davinci/vpfe_capture.h
index d863e5e8426d..4314a5f6a087 100644
--- a/include/media/davinci/vpfe_capture.h
+++ b/include/media/davinci/vpfe_capture.h
@@ -165,7 +165,7 @@ struct vpfe_device {
 	u8 started;
 	/*
 	 * offset where second field starts from the starting of the
-	 * buffer for field seperated YCbCr formats
+	 * buffer for field separated YCbCr formats
 	 */
 	u32 field_off;
 };
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8816a20c2597..aff48d657181 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -133,7 +133,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
 }
 
 /* This function calculates a "timeout" which is equivalent to the timeout of a
- * TCP connection after "boundary" unsucessful, exponentially backed-off
+ * TCP connection after "boundary" unsuccessful, exponentially backed-off
  * retransmissions with an initial RTO of TCP_RTO_MIN.
  */
 static bool retransmits_timed_out(struct sock *sk,
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 0f7c6e6a4248..54e4c8bb23e7 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -743,7 +743,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 		break;
 	default:
 		/* should not get here, PLINK_BLOCKED is dealt with at the
-		 * beggining of the function
+		 * beginning of the function
 		 */
 		spin_unlock_bh(&sta->lock);
 		break;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 023966b569bf..fbe94adee7ac 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -276,7 +276,7 @@ EXPORT_SYMBOL_GPL(ct_sip_parse_request);
  * tabs, spaces and continuation lines, which are treated as a single whitespace
  * character.
  *
- * Some headers may appear multiple times. A comma seperated list of values is
+ * Some headers may appear multiple times. A comma separated list of values is
  * equivalent to multiple headers.
  */
 static const struct sip_header ct_sip_hdrs[] = {
@@ -412,7 +412,7 @@ int ct_sip_get_header(const struct nf_conn *ct, const char *dptr,
 }
 EXPORT_SYMBOL_GPL(ct_sip_get_header);
 
-/* Get next header field in a list of comma seperated values */
+/* Get next header field in a list of comma separated values */
 static int ct_sip_next_header(const struct nf_conn *ct, const char *dptr,
 			      unsigned int dataoff, unsigned int datalen,
 			      enum sip_header_types type,
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index dd16e404424f..cbaac92dad59 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -1,6 +1,6 @@
 /*
  *	xt_hashlimit - Netfilter module to limit the number of packets per time
- *	seperately for each hashbucket (sourceip/sourceport/dstip/dstport)
+ *	separately for each hashbucket (sourceip/sourceport/dstip/dstport)
  *
  *	(C) 2003-2004 by Harald Welte <laforge@netfilter.org>
  *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 4e4ca65cd320..500886bda9b4 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -475,7 +475,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
 	 * used to provide an upper bound to this doubling operation.
 	 *
 	 * Special Case:  the first HB doesn't trigger exponential backoff.
-	 * The first unacknowleged HB triggers it.  We do this with a flag
+	 * The first unacknowledged HB triggers it.  We do this with a flag
 	 * that indicates that we have an outstanding HB.
 	 */
 	if (!is_hb || transport->hb_sent) {
diff --git a/scripts/gfp-translate b/scripts/gfp-translate
index 073cb6d152a0..d81b968d864e 100644
--- a/scripts/gfp-translate
+++ b/scripts/gfp-translate
@@ -19,7 +19,7 @@ usage() {
 	exit 0
 }
 
-# Parse command-line arguements
+# Parse command-line arguments
 while [ $# -gt 0 ]; do
 	case $1 in
 		--source)
diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c
index a1b10d1a384d..db0ed1cbd982 100644
--- a/sound/pci/rme9652/hdspm.c
+++ b/sound/pci/rme9652/hdspm.c
@@ -2479,7 +2479,7 @@ static int snd_hdspm_put_qs_wire(struct snd_kcontrol *kcontrol,
    on MADICARD 
   - playback mixer matrix: [channelout+64] [output] [value]
   - input(thru) mixer matrix: [channelin] [output] [value]
-  (better do 2 kontrols for seperation ?)
+  (better do 2 kontrols for separation ?)
 */
 
 #define HDSPM_MIXER(xname, xindex) \
diff --git a/sound/soc/codecs/wm8990.c b/sound/soc/codecs/wm8990.c
index 341481e0e830..427614a2762b 100644
--- a/sound/soc/codecs/wm8990.c
+++ b/sound/soc/codecs/wm8990.c
@@ -990,7 +990,7 @@ static int wm8990_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id,
 		reg = snd_soc_read(codec, WM8990_CLOCKING_2);
 		snd_soc_write(codec, WM8990_CLOCKING_2, reg | WM8990_SYSCLK_SRC);
 
-		/* set up N , fractional mode and pre-divisor if neccessary */
+		/* set up N , fractional mode and pre-divisor if necessary */
 		snd_soc_write(codec, WM8990_PLL1, pll_div.n | WM8990_SDM |
 			(pll_div.div2?WM8990_PRESCALE:0));
 		snd_soc_write(codec, WM8990_PLL2, (u8)(pll_div.k>>8));
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index e8daf5ca6fd2..44408c2621cf 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -321,7 +321,7 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
 			new_depth_mask &= ~(1 << (depth - 1));
 
 		/*
-		 * But we keep the older depth mask for the line seperator
+		 * But we keep the older depth mask for the line separator
 		 * to keep the level link until we reach the last child
 		 */
 		ret += ipchain__fprintf_graph_line(fp, depth, depth_mask,
-- 
cgit v1.2.3


From b9efa1b27e25b1286504973c0a6bf0f24106faa8 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 20 Jan 2010 16:06:27 -0500
Subject: nfs41: implement cb_recall_slot

Drain the fore channel and reset the max_slots to the new value.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.h         |  8 ++++++++
 fs/nfs/callback_proc.c    | 32 ++++++++++++++++++++++++++++++++
 fs/nfs/callback_xdr.c     | 22 +++++++++++++++++++++-
 fs/nfs/nfs4_fs.h          |  2 ++
 fs/nfs/nfs4state.c        | 44 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfs_fs_sb.h |  2 ++
 6 files changed, 109 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index d4036be0b589..85a7cfd1b8dd 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -119,6 +119,14 @@ struct cb_recallanyargs {
 };
 
 extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy);
+
+struct cb_recallslotargs {
+	struct sockaddr	*crsa_addr;
+	uint32_t	crsa_target_max_slots;
+};
+extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args,
+					  void *dummy);
+
 #endif /* CONFIG_NFS_V4_1 */
 
 extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 4062f7690a33..e5155d9df595 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -361,4 +361,36 @@ out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
 	return status;
 }
+
+/* Reduce the fore channel's max_slots to the target value */
+unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy)
+{
+	struct nfs_client *clp;
+	struct nfs4_slot_table *fc_tbl;
+	int status;
+
+	status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
+	clp = nfs_find_client(args->crsa_addr, 4);
+	if (clp == NULL)
+		goto out;
+
+	dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
+		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
+		args->crsa_target_max_slots);
+
+	fc_tbl = &clp->cl_session->fc_slot_table;
+
+	status = htonl(NFS4ERR_BAD_HIGH_SLOT);
+	if (args->crsa_target_max_slots >= fc_tbl->max_slots ||
+	    args->crsa_target_max_slots < 1)
+		goto out;
+
+	fc_tbl->target_max_slots = args->crsa_target_max_slots;
+	nfs41_handle_recall_slot(clp);
+	status = htonl(NFS4_OK);
+	nfs_put_client(clp);	/* balance nfs_find_client */
+out:
+	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
+	return status;
+}
 #endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 08b430d922c4..8e66e20b59fd 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -24,6 +24,7 @@
 #define CB_OP_SEQUENCE_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ + \
 					4 + 1 + 3)
 #define CB_OP_RECALLANY_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
+#define CB_OP_RECALLSLOT_RES_MAXSZ	(CB_OP_HDR_RES_MAXSZ)
 #endif /* CONFIG_NFS_V4_1 */
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
@@ -349,6 +350,20 @@ static unsigned decode_recallany_args(struct svc_rqst *rqstp,
 	return 0;
 }
 
+static unsigned decode_recallslot_args(struct svc_rqst *rqstp,
+					struct xdr_stream *xdr,
+					struct cb_recallslotargs *args)
+{
+	__be32 *p;
+
+	args->crsa_addr = svc_addr(rqstp);
+	p = read_buf(xdr, 4);
+	if (unlikely(p == NULL))
+		return htonl(NFS4ERR_BADXDR);
+	args->crsa_target_max_slots = ntohl(*p++);
+	return 0;
+}
+
 #endif /* CONFIG_NFS_V4_1 */
 
 static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
@@ -557,6 +572,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
 	case OP_CB_RECALL:
 	case OP_CB_SEQUENCE:
 	case OP_CB_RECALL_ANY:
+	case OP_CB_RECALL_SLOT:
 		*op = &callback_ops[op_nr];
 		break;
 
@@ -565,7 +581,6 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
 	case OP_CB_NOTIFY:
 	case OP_CB_PUSH_DELEG:
 	case OP_CB_RECALLABLE_OBJ_AVAIL:
-	case OP_CB_RECALL_SLOT:
 	case OP_CB_WANTS_CANCELLED:
 	case OP_CB_NOTIFY_LOCK:
 		return htonl(NFS4ERR_NOTSUPP);
@@ -734,6 +749,11 @@ static struct callback_op callback_ops[] = {
 		.decode_args = (callback_decode_arg_t)decode_recallany_args,
 		.res_maxsize = CB_OP_RECALLANY_RES_MAXSZ,
 	},
+	[OP_CB_RECALL_SLOT] = {
+		.process_op = (callback_process_op_t)nfs4_callback_recallslot,
+		.decode_args = (callback_decode_arg_t)decode_recallslot_args,
+		.res_maxsize = CB_OP_RECALLSLOT_RES_MAXSZ,
+	},
 #endif /* CONFIG_NFS_V4_1 */
 };
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 0c6fda33d66e..a187200a7aac 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -46,6 +46,7 @@ enum nfs4_client_state {
 	NFS4CLNT_DELEGRETURN,
 	NFS4CLNT_SESSION_RESET,
 	NFS4CLNT_SESSION_DRAINING,
+	NFS4CLNT_RECALL_SLOT,
 };
 
 /*
@@ -280,6 +281,7 @@ extern void nfs4_schedule_state_manager(struct nfs_client *);
 extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
 extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
 extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
+extern void nfs41_handle_recall_slot(struct nfs_client *clp);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 8406cacd3240..9164758c1ace 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1249,6 +1249,12 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
 }
 
 #ifdef CONFIG_NFS_V4_1
+void nfs41_handle_recall_slot(struct nfs_client *clp)
+{
+	set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
+	nfs4_schedule_state_recovery(clp);
+}
+
 void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
 {
 	if (!flags)
@@ -1299,9 +1305,38 @@ out:
 	return status;
 }
 
+static int nfs4_recall_slot(struct nfs_client *clp)
+{
+	struct nfs4_slot_table *fc_tbl = &clp->cl_session->fc_slot_table;
+	struct nfs4_channel_attrs *fc_attrs = &clp->cl_session->fc_attrs;
+	struct nfs4_slot *new, *old;
+	int i;
+
+	nfs4_begin_drain_session(clp);
+	new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot),
+		      GFP_KERNEL);
+        if (!new)
+		return -ENOMEM;
+
+	spin_lock(&fc_tbl->slot_tbl_lock);
+	for (i = 0; i < fc_tbl->target_max_slots; i++)
+		new[i].seq_nr = fc_tbl->slots[i].seq_nr;
+	old = fc_tbl->slots;
+	fc_tbl->slots = new;
+	fc_tbl->max_slots = fc_tbl->target_max_slots;
+	fc_tbl->target_max_slots = 0;
+	fc_attrs->max_reqs = fc_tbl->max_slots;
+	spin_unlock(&fc_tbl->slot_tbl_lock);
+
+	kfree(old);
+	nfs4_end_drain_session(clp);
+	return 0;
+}
+
 #else /* CONFIG_NFS_V4_1 */
 static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
 static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
+static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
 #endif /* CONFIG_NFS_V4_1 */
 
 /* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
@@ -1398,6 +1433,15 @@ static void nfs4_state_manager(struct nfs_client *clp)
 			nfs_client_return_marked_delegations(clp);
 			continue;
 		}
+		/* Recall session slots */
+		if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state)
+		   && nfs4_has_session(clp)) {
+			status = nfs4_recall_slot(clp);
+			if (status < 0)
+				goto out_error;
+			continue;
+		}
+
 
 		nfs4_clear_state_manager_bit(clp);
 		/* Did we race with an attempt to give us more work? */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 34fc6be5bfcf..ecd9e6c74d06 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -193,6 +193,8 @@ struct nfs4_slot_table {
 	int		max_slots;		/* # slots in table */
 	int		highest_used_slotid;	/* sent to server on each SEQ.
 						 * op for dynamic resizing */
+	int		target_max_slots;	/* Set by CB_RECALL_SLOT as
+						 * the new max_slots */
 };
 
 static inline int slot_idx(struct nfs4_slot_table *tbl, struct nfs4_slot *sp)
-- 
cgit v1.2.3


From ba17686f62db88f6a591121e768a0c83a2a2647d Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Tue, 26 Jan 2010 21:24:04 -0500
Subject: nfs41 do not allocate unused back channel pages

Signed-off-by: Andy Adamson <andros@netapp.com>
[Trond.Myklebust@netapp.com: moved definition of svc_is_backchannel()
 into include/linux/sunrpc/bc_xprt.h.]
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/bc_xprt.h | 15 +++++++++++++++
 net/sunrpc/svc.c               |  4 ++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index 6508f0dc0eff..d7152b451e21 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -38,12 +38,27 @@ int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
 void xprt_destroy_backchannel(struct rpc_xprt *, int max_reqs);
 void bc_release_request(struct rpc_task *);
 int bc_send(struct rpc_rqst *req);
+
+/*
+ * Determine if a shared backchannel is in use
+ */
+static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
+{
+	if (rqstp->rq_server->bc_xprt)
+		return 1;
+	return 0;
+}
 #else /* CONFIG_NFS_V4_1 */
 static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
 					 unsigned int min_reqs)
 {
 	return 0;
 }
+
+static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
+{
+	return 0;
+}
 #endif /* CONFIG_NFS_V4_1 */
 #endif /* _LINUX_SUNRPC_BC_XPRT_H */
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 538ca433a56c..6dcf8c9c784c 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -506,6 +506,10 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
 {
 	unsigned int pages, arghi;
 
+	/* bc_xprt uses fore channel allocated buffers */
+	if (svc_is_backchannel(rqstp))
+		return 1;
+
 	pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply.
 				       * We assume one is at most one page
 				       */
-- 
cgit v1.2.3


From eeb5b4ae81f4a750355fa0c15f4fea22fdf83be1 Mon Sep 17 00:00:00 2001
From: Kevin Dankwardt <k@kcomputing.com>
Date: Wed, 10 Feb 2010 23:43:40 +0900
Subject: fat: Fix stat->f_namelen

I found that the length of a file name when created cannot exceed 255
characters, yet, pathconf(), via statfs(), returns the maximum as 260.

Signed-off-by: Kevin Dankwardt <k@kcomputing.com>
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
---
 fs/fat/inode.c           | 2 +-
 fs/fat/namei_vfat.c      | 6 +++---
 include/linux/msdos_fs.h | 3 ++-
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 14da530b05ca..d0a504c8feef 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -558,7 +558,7 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_bavail = sbi->free_clusters;
 	buf->f_fsid.val[0] = (u32)id;
 	buf->f_fsid.val[1] = (u32)(id >> 32);
-	buf->f_namelen = sbi->options.isvfat ? 260 : 12;
+	buf->f_namelen = sbi->options.isvfat ? FAT_LFN_LEN : 12;
 
 	return 0;
 }
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 411c192a05fa..c1ef50154868 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -502,14 +502,14 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
 		*outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname);
 		if (*outlen < 0)
 			return *outlen;
-		else if (*outlen > 255)
+		else if (*outlen > FAT_LFN_LEN)
 			return -ENAMETOOLONG;
 
 		op = &outname[*outlen * sizeof(wchar_t)];
 	} else {
 		if (nls) {
 			for (i = 0, ip = name, op = outname, *outlen = 0;
-			     i < len && *outlen <= 255;
+			     i < len && *outlen <= FAT_LFN_LEN;
 			     *outlen += 1)
 			{
 				if (escape && (*ip == ':')) {
@@ -549,7 +549,7 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
 				return -ENAMETOOLONG;
 		} else {
 			for (i = 0, ip = name, op = outname, *outlen = 0;
-			     i < len && *outlen <= 255;
+			     i < len && *outlen <= FAT_LFN_LEN;
 			     i++, *outlen += 1)
 			{
 				*op++ = *ip++;
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index ce38f1caa5e1..34066e65fdeb 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -15,6 +15,7 @@
 #define MSDOS_DPB_BITS	4		/* log2(MSDOS_DPB) */
 #define MSDOS_DPS	(SECTOR_SIZE / sizeof(struct msdos_dir_entry))
 #define MSDOS_DPS_BITS	4		/* log2(MSDOS_DPS) */
+#define MSDOS_LONGNAME	256		/* maximum name length */
 #define CF_LE_W(v)	le16_to_cpu(v)
 #define CF_LE_L(v)	le32_to_cpu(v)
 #define CT_LE_W(v)	cpu_to_le16(v)
@@ -47,8 +48,8 @@
 #define DELETED_FLAG	0xe5	/* marks file as deleted when in name[0] */
 #define IS_FREE(n)	(!*(n) || *(n) == DELETED_FLAG)
 
+#define FAT_LFN_LEN	255	/* maximum long name length */
 #define MSDOS_NAME	11	/* maximum name length */
-#define MSDOS_LONGNAME	256	/* maximum name length */
 #define MSDOS_SLOTS	21	/* max # of slots for short and long names */
 #define MSDOS_DOT	".          "	/* ".", padded to MSDOS_NAME chars */
 #define MSDOS_DOTDOT	"..         "	/* "..", padded to MSDOS_NAME chars */
-- 
cgit v1.2.3


From ced5b697a76d325e7a7ac7d382dbbb632c765093 Mon Sep 17 00:00:00 2001
From: Brandon Phiilps <bphilips@suse.de>
Date: Wed, 10 Feb 2010 01:20:06 -0800
Subject: x86: Avoid race condition in pci_enable_msix()

Keep chip_data in create_irq_nr and destroy_irq.

When two drivers are setting up MSI-X at the same time via
pci_enable_msix() there is a race.  See this dmesg excerpt:

[   85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X
[   85.170611]   alloc irq_desc for 99 on node -1
[   85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X
[   85.170614]   alloc kstat_irqs on node -1
[   85.170616] alloc irq_2_iommu on node -1
[   85.170617]   alloc irq_desc for 100 on node -1
[   85.170619]   alloc kstat_irqs on node -1
[   85.170621] alloc irq_2_iommu on node -1
[   85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X
[   85.170626]   alloc irq_desc for 101 on node -1
[   85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X
[   85.170630]   alloc kstat_irqs on node -1
[   85.170631] alloc irq_2_iommu on node -1
[   85.170635]   alloc irq_desc for 102 on node -1
[   85.170636]   alloc kstat_irqs on node -1
[   85.170639] alloc irq_2_iommu on node -1
[   85.170646] BUG: unable to handle kernel NULL pointer dereference
at 0000000000000088

As you can see igb and ixgbe are both alternating on create_irq_nr()
via pci_enable_msix() in their probe function.

ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe
choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and
calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data =
NULL via dynamic_irq_init().

igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[]
via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this:

	cfg_new = irq_desc_ptrs[102]->chip_data;
	if (cfg_new->vector != 0)
		continue;

This hits the NULL deref.

Another possible race exists via pci_disable_msix() in a driver or in
the number of error paths that call free_msi_irqs():

destroy_irq()
dynamic_irq_cleanup() which sets desc->chip_data = NULL
...race window...
desc->chip_data = cfg;

Remove the save and restore code for cfg in create_irq_nr() and
destroy_irq() and take the desc->lock when checking the irq_cfg.

Reported-and-analyzed-by: Brandon Philips <bphilips@suse.de>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <1265793639-15071-3-git-send-email-yinghai@kernel.org>
Signed-off-by: Brandon Phililps <bphilips@suse.de>
Cc: stable@kernel.org
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/apic/io_apic.c | 18 ++++-----------
 include/linux/irq.h            |  2 ++
 kernel/irq/chip.c              | 52 ++++++++++++++++++++++++++++++++++--------
 3 files changed, 50 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 53243ca7816d..c86591b906fa 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3228,12 +3228,9 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
 	}
 	spin_unlock_irqrestore(&vector_lock, flags);
 
-	if (irq > 0) {
-		dynamic_irq_init(irq);
-		/* restore it, in case dynamic_irq_init clear it */
-		if (desc_new)
-			desc_new->chip_data = cfg_new;
-	}
+	if (irq > 0)
+		dynamic_irq_init_keep_chip_data(irq);
+
 	return irq;
 }
 
@@ -3256,17 +3253,12 @@ void destroy_irq(unsigned int irq)
 {
 	unsigned long flags;
 	struct irq_cfg *cfg;
-	struct irq_desc *desc;
 
-	/* store it, in case dynamic_irq_cleanup clear it */
-	desc = irq_to_desc(irq);
-	cfg = desc->chip_data;
-	dynamic_irq_cleanup(irq);
-	/* connect back irq_cfg */
-	desc->chip_data = cfg;
+	dynamic_irq_cleanup_keep_chip_data(irq);
 
 	free_irte(irq);
 	spin_lock_irqsave(&vector_lock, flags);
+	cfg = irq_to_desc(irq)->chip_data;
 	__clear_irq_vector(irq, cfg);
 	spin_unlock_irqrestore(&vector_lock, flags);
 }
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 451481c082b5..4d9b26e044bc 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -400,7 +400,9 @@ static inline int irq_has_action(unsigned int irq)
 
 /* Dynamic irq helper functions */
 extern void dynamic_irq_init(unsigned int irq);
+void dynamic_irq_init_keep_chip_data(unsigned int irq);
 extern void dynamic_irq_cleanup(unsigned int irq);
+void dynamic_irq_cleanup_keep_chip_data(unsigned int irq);
 
 /* Set/get chip/data for an IRQ: */
 extern int set_irq_chip(unsigned int irq, struct irq_chip *chip);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index ecc3fa28f666..d70394f12ee9 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -18,11 +18,7 @@
 
 #include "internals.h"
 
-/**
- *	dynamic_irq_init - initialize a dynamically allocated irq
- *	@irq:	irq number to initialize
- */
-void dynamic_irq_init(unsigned int irq)
+static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data)
 {
 	struct irq_desc *desc;
 	unsigned long flags;
@@ -41,7 +37,8 @@ void dynamic_irq_init(unsigned int irq)
 	desc->depth = 1;
 	desc->msi_desc = NULL;
 	desc->handler_data = NULL;
-	desc->chip_data = NULL;
+	if (!keep_chip_data)
+		desc->chip_data = NULL;
 	desc->action = NULL;
 	desc->irq_count = 0;
 	desc->irqs_unhandled = 0;
@@ -55,10 +52,26 @@ void dynamic_irq_init(unsigned int irq)
 }
 
 /**
- *	dynamic_irq_cleanup - cleanup a dynamically allocated irq
+ *	dynamic_irq_init - initialize a dynamically allocated irq
  *	@irq:	irq number to initialize
  */
-void dynamic_irq_cleanup(unsigned int irq)
+void dynamic_irq_init(unsigned int irq)
+{
+	dynamic_irq_init_x(irq, false);
+}
+
+/**
+ *	dynamic_irq_init_keep_chip_data - initialize a dynamically allocated irq
+ *	@irq:	irq number to initialize
+ *
+ *	does not set irq_to_desc(irq)->chip_data to NULL
+ */
+void dynamic_irq_init_keep_chip_data(unsigned int irq)
+{
+	dynamic_irq_init_x(irq, true);
+}
+
+static void dynamic_irq_cleanup_x(unsigned int irq, bool keep_chip_data)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
@@ -77,7 +90,8 @@ void dynamic_irq_cleanup(unsigned int irq)
 	}
 	desc->msi_desc = NULL;
 	desc->handler_data = NULL;
-	desc->chip_data = NULL;
+	if (!keep_chip_data)
+		desc->chip_data = NULL;
 	desc->handle_irq = handle_bad_irq;
 	desc->chip = &no_irq_chip;
 	desc->name = NULL;
@@ -85,6 +99,26 @@ void dynamic_irq_cleanup(unsigned int irq)
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
+/**
+ *	dynamic_irq_cleanup - cleanup a dynamically allocated irq
+ *	@irq:	irq number to initialize
+ */
+void dynamic_irq_cleanup(unsigned int irq)
+{
+	dynamic_irq_cleanup_x(irq, false);
+}
+
+/**
+ *	dynamic_irq_cleanup_keep_chip_data - cleanup a dynamically allocated irq
+ *	@irq:	irq number to initialize
+ *
+ *	does not set irq_to_desc(irq)->chip_data to NULL
+ */
+void dynamic_irq_cleanup_keep_chip_data(unsigned int irq)
+{
+	dynamic_irq_cleanup_x(irq, true);
+}
+
 
 /**
  *	set_irq_chip - set the irq chip for an irq
-- 
cgit v1.2.3


From 27811d8cabe56e0c3622251b049086f49face4ff Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 10 Feb 2010 01:20:07 -0800
Subject: x86: Move range related operation to one file

We have almost the same code for mtrr cleanup and amd_bus checkup, and
this code  will also be used in replacing bootmem with early_res,
so try to move them together and reuse it from different parts.

Also rename update_range to subtract_range as that is what the
function is actually doing.

-v2: update comments as Christoph requested

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <1265793639-15071-4-git-send-email-yinghai@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/mtrr/cleanup.c | 180 ++++---------------------------------
 arch/x86/kernel/mmconf-fam10h_64.c |   7 +-
 arch/x86/pci/amd_bus.c             |  70 +++------------
 include/linux/range.h              |  22 +++++
 kernel/Makefile                    |   2 +-
 kernel/range.c                     | 163 +++++++++++++++++++++++++++++++++
 6 files changed, 214 insertions(+), 230 deletions(-)
 create mode 100644 include/linux/range.h
 create mode 100644 kernel/range.c

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 09b1698e0466..669da09ab9a8 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -22,10 +22,10 @@
 #include <linux/pci.h>
 #include <linux/smp.h>
 #include <linux/cpu.h>
-#include <linux/sort.h>
 #include <linux/mutex.h>
 #include <linux/uaccess.h>
 #include <linux/kvm_para.h>
+#include <linux/range.h>
 
 #include <asm/processor.h>
 #include <asm/e820.h>
@@ -34,11 +34,6 @@
 
 #include "mtrr.h"
 
-struct res_range {
-	unsigned long	start;
-	unsigned long	end;
-};
-
 struct var_mtrr_range_state {
 	unsigned long	base_pfn;
 	unsigned long	size_pfn;
@@ -56,7 +51,7 @@ struct var_mtrr_state {
 /* Should be related to MTRR_VAR_RANGES nums */
 #define RANGE_NUM				256
 
-static struct res_range __initdata		range[RANGE_NUM];
+static struct range __initdata		range[RANGE_NUM];
 static int __initdata				nr_range;
 
 static struct var_mtrr_range_state __initdata	range_state[RANGE_NUM];
@@ -64,152 +59,11 @@ static struct var_mtrr_range_state __initdata	range_state[RANGE_NUM];
 static int __initdata debug_print;
 #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
 
-
-static int __init
-add_range(struct res_range *range, int nr_range,
-	  unsigned long start, unsigned long end)
-{
-	/* Out of slots: */
-	if (nr_range >= RANGE_NUM)
-		return nr_range;
-
-	range[nr_range].start = start;
-	range[nr_range].end = end;
-
-	nr_range++;
-
-	return nr_range;
-}
-
-static int __init
-add_range_with_merge(struct res_range *range, int nr_range,
-		     unsigned long start, unsigned long end)
-{
-	int i;
-
-	/* Try to merge it with old one: */
-	for (i = 0; i < nr_range; i++) {
-		unsigned long final_start, final_end;
-		unsigned long common_start, common_end;
-
-		if (!range[i].end)
-			continue;
-
-		common_start = max(range[i].start, start);
-		common_end = min(range[i].end, end);
-		if (common_start > common_end + 1)
-			continue;
-
-		final_start = min(range[i].start, start);
-		final_end = max(range[i].end, end);
-
-		range[i].start = final_start;
-		range[i].end =  final_end;
-		return nr_range;
-	}
-
-	/* Need to add it: */
-	return add_range(range, nr_range, start, end);
-}
-
-static void __init
-subtract_range(struct res_range *range, unsigned long start, unsigned long end)
-{
-	int i, j;
-
-	for (j = 0; j < RANGE_NUM; j++) {
-		if (!range[j].end)
-			continue;
-
-		if (start <= range[j].start && end >= range[j].end) {
-			range[j].start = 0;
-			range[j].end = 0;
-			continue;
-		}
-
-		if (start <= range[j].start && end < range[j].end &&
-		    range[j].start < end + 1) {
-			range[j].start = end + 1;
-			continue;
-		}
-
-
-		if (start > range[j].start && end >= range[j].end &&
-		    range[j].end > start - 1) {
-			range[j].end = start - 1;
-			continue;
-		}
-
-		if (start > range[j].start && end < range[j].end) {
-			/* Find the new spare: */
-			for (i = 0; i < RANGE_NUM; i++) {
-				if (range[i].end == 0)
-					break;
-			}
-			if (i < RANGE_NUM) {
-				range[i].end = range[j].end;
-				range[i].start = end + 1;
-			} else {
-				printk(KERN_ERR "run of slot in ranges\n");
-			}
-			range[j].end = start - 1;
-			continue;
-		}
-	}
-}
-
-static int __init cmp_range(const void *x1, const void *x2)
-{
-	const struct res_range *r1 = x1;
-	const struct res_range *r2 = x2;
-	long start1, start2;
-
-	start1 = r1->start;
-	start2 = r2->start;
-
-	return start1 - start2;
-}
-
-static int __init clean_sort_range(struct res_range *range, int az)
-{
-	int i, j, k = az - 1, nr_range = 0;
-
-	for (i = 0; i < k; i++) {
-		if (range[i].end)
-			continue;
-		for (j = k; j > i; j--) {
-			if (range[j].end) {
-				k = j;
-				break;
-			}
-		}
-		if (j == i)
-			break;
-		range[i].start = range[k].start;
-		range[i].end   = range[k].end;
-		range[k].start = 0;
-		range[k].end   = 0;
-		k--;
-	}
-	/* count it */
-	for (i = 0; i < az; i++) {
-		if (!range[i].end) {
-			nr_range = i;
-			break;
-		}
-	}
-
-	/* sort them */
-	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
-
-	return nr_range;
-}
-
 #define BIOS_BUG_MSG KERN_WARNING \
 	"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
 
 static int __init
-x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
+x86_get_mtrr_mem_range(struct range *range, int nr_range,
 		       unsigned long extra_remove_base,
 		       unsigned long extra_remove_size)
 {
@@ -223,13 +77,13 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 			continue;
 		base = range_state[i].base_pfn;
 		size = range_state[i].size_pfn;
-		nr_range = add_range_with_merge(range, nr_range, base,
-						base + size - 1);
+		nr_range = add_range_with_merge(range, RANGE_NUM, nr_range,
+						base, base + size - 1);
 	}
 	if (debug_print) {
 		printk(KERN_DEBUG "After WB checking\n");
 		for (i = 0; i < nr_range; i++)
-			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+			printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
 				 range[i].start, range[i].end + 1);
 	}
 
@@ -252,10 +106,10 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 			size -= (1<<(20-PAGE_SHIFT)) - base;
 			base = 1<<(20-PAGE_SHIFT);
 		}
-		subtract_range(range, base, base + size - 1);
+		subtract_range(range, RANGE_NUM, base, base + size - 1);
 	}
 	if (extra_remove_size)
-		subtract_range(range, extra_remove_base,
+		subtract_range(range, RANGE_NUM, extra_remove_base,
 				 extra_remove_base + extra_remove_size  - 1);
 
 	if  (debug_print) {
@@ -263,7 +117,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 		for (i = 0; i < RANGE_NUM; i++) {
 			if (!range[i].end)
 				continue;
-			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+			printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
 				 range[i].start, range[i].end + 1);
 		}
 	}
@@ -273,20 +127,16 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 	if  (debug_print) {
 		printk(KERN_DEBUG "After sorting\n");
 		for (i = 0; i < nr_range; i++)
-			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+			printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
 				 range[i].start, range[i].end + 1);
 	}
 
-	/* clear those is not used */
-	for (i = nr_range; i < RANGE_NUM; i++)
-		memset(&range[i], 0, sizeof(range[i]));
-
 	return nr_range;
 }
 
 #ifdef CONFIG_MTRR_SANITIZER
 
-static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
+static unsigned long __init sum_ranges(struct range *range, int nr_range)
 {
 	unsigned long sum = 0;
 	int i;
@@ -621,7 +471,7 @@ static int __init parse_mtrr_spare_reg(char *arg)
 early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
 
 static int __init
-x86_setup_var_mtrrs(struct res_range *range, int nr_range,
+x86_setup_var_mtrrs(struct range *range, int nr_range,
 		    u64 chunk_size, u64 gran_size)
 {
 	struct var_mtrr_state var_state;
@@ -742,7 +592,7 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
 		      unsigned long x_remove_base,
 		      unsigned long x_remove_size, int i)
 {
-	static struct res_range range_new[RANGE_NUM];
+	static struct range range_new[RANGE_NUM];
 	unsigned long range_sums_new;
 	static int nr_range_new;
 	int num_reg;
@@ -869,10 +719,10 @@ int __init mtrr_cleanup(unsigned address_bits)
 	 * [0, 1M) should always be covered by var mtrr with WB
 	 * and fixed mtrrs should take effect before var mtrr for it:
 	 */
-	nr_range = add_range_with_merge(range, nr_range, 0,
+	nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0,
 					(1ULL<<(20 - PAGE_SHIFT)) - 1);
 	/* Sort the ranges: */
-	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+	sort_range(range, nr_range);
 
 	range_sums = sum_ranges(range, nr_range);
 	printk(KERN_INFO "total RAM covered: %ldM\n",
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index 712d15fdc416..71825806cd44 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -7,6 +7,8 @@
 #include <linux/string.h>
 #include <linux/pci.h>
 #include <linux/dmi.h>
+#include <linux/range.h>
+
 #include <asm/pci-direct.h>
 #include <linux/sort.h>
 #include <asm/io.h>
@@ -30,11 +32,6 @@ static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = {
 	{ 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 },
 };
 
-struct range {
-	u64 start;
-	u64 end;
-};
-
 static int __cpuinit cmp_range(const void *x1, const void *x2)
 {
 	const struct range *r1 = x1;
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 95ecbd495955..2356ea18697d 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -2,6 +2,8 @@
 #include <linux/pci.h>
 #include <linux/topology.h>
 #include <linux/cpu.h>
+#include <linux/range.h>
+
 #include <asm/pci_x86.h>
 
 #ifdef CONFIG_X86_64
@@ -17,58 +19,6 @@
 
 #ifdef CONFIG_X86_64
 
-#define RANGE_NUM 16
-
-struct res_range {
-	size_t start;
-	size_t end;
-};
-
-static void __init update_range(struct res_range *range, size_t start,
-				size_t end)
-{
-	int i;
-	int j;
-
-	for (j = 0; j < RANGE_NUM; j++) {
-		if (!range[j].end)
-			continue;
-
-		if (start <= range[j].start && end >= range[j].end) {
-			range[j].start = 0;
-			range[j].end = 0;
-			continue;
-		}
-
-		if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
-			range[j].start = end + 1;
-			continue;
-		}
-
-
-		if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
-			range[j].end = start - 1;
-			continue;
-		}
-
-		if (start > range[j].start && end < range[j].end) {
-			/* find the new spare */
-			for (i = 0; i < RANGE_NUM; i++) {
-				if (range[i].end == 0)
-					break;
-			}
-			if (i < RANGE_NUM) {
-				range[i].end = range[j].end;
-				range[i].start = end + 1;
-			} else {
-				printk(KERN_ERR "run of slot in ranges\n");
-			}
-			range[j].end = start - 1;
-			continue;
-		}
-	}
-}
-
 struct pci_hostbridge_probe {
 	u32 bus;
 	u32 slot;
@@ -111,6 +61,8 @@ static void __init get_pci_mmcfg_amd_fam10h_range(void)
 	fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1;
 }
 
+#define RANGE_NUM 16
+
 /**
  * early_fill_mp_bus_to_node()
  * called before pcibios_scan_root and pci_scan_bus
@@ -132,7 +84,7 @@ static int __init early_fill_mp_bus_info(void)
 	struct resource *res;
 	size_t start;
 	size_t end;
-	struct res_range range[RANGE_NUM];
+	struct range range[RANGE_NUM];
 	u64 val;
 	u32 address;
 
@@ -226,7 +178,7 @@ static int __init early_fill_mp_bus_info(void)
 		if (end > 0xffff)
 			end = 0xffff;
 		update_res(info, start, end, IORESOURCE_IO, 1);
-		update_range(range, start, end);
+		subtract_range(range, RANGE_NUM, start, end);
 	}
 	/* add left over io port range to def node/link, [0, 0xffff] */
 	/* find the position */
@@ -256,14 +208,14 @@ static int __init early_fill_mp_bus_info(void)
 	end = (val & 0xffffff800000ULL);
 	printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20);
 	if (end < (1ULL<<32))
-		update_range(range, 0, end - 1);
+		subtract_range(range, RANGE_NUM, 0, end - 1);
 
 	/* get mmconfig */
 	get_pci_mmcfg_amd_fam10h_range();
 	/* need to take out mmconf range */
 	if (fam10h_mmconf_end) {
 		printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end);
-		update_range(range, fam10h_mmconf_start, fam10h_mmconf_end);
+		subtract_range(range, RANGE_NUM, fam10h_mmconf_start, fam10h_mmconf_end);
 	}
 
 	/* mmio resource */
@@ -318,7 +270,7 @@ static int __init early_fill_mp_bus_info(void)
 				/* we got a hole */
 				endx = fam10h_mmconf_start - 1;
 				update_res(info, start, endx, IORESOURCE_MEM, 0);
-				update_range(range, start, endx);
+				subtract_range(range, RANGE_NUM, start, endx);
 				printk(KERN_CONT " ==> [%llx, %llx]", (u64)start, endx);
 				start = fam10h_mmconf_end + 1;
 				changed = 1;
@@ -334,7 +286,7 @@ static int __init early_fill_mp_bus_info(void)
 		}
 
 		update_res(info, start, end, IORESOURCE_MEM, 1);
-		update_range(range, start, end);
+		subtract_range(range, RANGE_NUM, start, end);
 		printk(KERN_CONT "\n");
 	}
 
@@ -349,7 +301,7 @@ static int __init early_fill_mp_bus_info(void)
 		rdmsrl(address, val);
 		end = (val & 0xffffff800000ULL);
 		printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20);
-		update_range(range, 1ULL<<32, end - 1);
+		subtract_range(range, RANGE_NUM, 1ULL<<32, end - 1);
 	}
 
 	/*
diff --git a/include/linux/range.h b/include/linux/range.h
new file mode 100644
index 000000000000..0789f1412e1f
--- /dev/null
+++ b/include/linux/range.h
@@ -0,0 +1,22 @@
+#ifndef _LINUX_RANGE_H
+#define _LINUX_RANGE_H
+
+struct range {
+	u64   start;
+	u64   end;
+};
+
+int add_range(struct range *range, int az, int nr_range,
+		u64 start, u64 end);
+
+
+int add_range_with_merge(struct range *range, int az, int nr_range,
+				u64 start, u64 end);
+
+void subtract_range(struct range *range, int az, u64 start, u64 end);
+
+int clean_sort_range(struct range *range, int az);
+
+void sort_range(struct range *range, int nr_range);
+
+#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 864ff75d65f2..ad47330ccf32 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
-	    async.o
+	    async.o range.o
 obj-y += groups.o
 
 ifdef CONFIG_FUNCTION_TRACER
diff --git a/kernel/range.c b/kernel/range.c
new file mode 100644
index 000000000000..71e0021281fe
--- /dev/null
+++ b/kernel/range.c
@@ -0,0 +1,163 @@
+/*
+ * Range add and subtract
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sort.h>
+
+#include <linux/range.h>
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+int add_range(struct range *range, int az, int nr_range, u64 start, u64 end)
+{
+	if (start > end)
+		return nr_range;
+
+	/* Out of slots: */
+	if (nr_range >= az)
+		return nr_range;
+
+	range[nr_range].start = start;
+	range[nr_range].end = end;
+
+	nr_range++;
+
+	return nr_range;
+}
+
+int add_range_with_merge(struct range *range, int az, int nr_range,
+		     u64 start, u64 end)
+{
+	int i;
+
+	if (start > end)
+		return nr_range;
+
+	/* Try to merge it with old one: */
+	for (i = 0; i < nr_range; i++) {
+		u64 final_start, final_end;
+		u64 common_start, common_end;
+
+		if (!range[i].end)
+			continue;
+
+		common_start = max(range[i].start, start);
+		common_end = min(range[i].end, end);
+		if (common_start > common_end + 1)
+			continue;
+
+		final_start = min(range[i].start, start);
+		final_end = max(range[i].end, end);
+
+		range[i].start = final_start;
+		range[i].end =  final_end;
+		return nr_range;
+	}
+
+	/* Need to add it: */
+	return add_range(range, az, nr_range, start, end);
+}
+
+void subtract_range(struct range *range, int az, u64 start, u64 end)
+{
+	int i, j;
+
+	if (start > end)
+		return;
+
+	for (j = 0; j < az; j++) {
+		if (!range[j].end)
+			continue;
+
+		if (start <= range[j].start && end >= range[j].end) {
+			range[j].start = 0;
+			range[j].end = 0;
+			continue;
+		}
+
+		if (start <= range[j].start && end < range[j].end &&
+		    range[j].start < end + 1) {
+			range[j].start = end + 1;
+			continue;
+		}
+
+
+		if (start > range[j].start && end >= range[j].end &&
+		    range[j].end > start - 1) {
+			range[j].end = start - 1;
+			continue;
+		}
+
+		if (start > range[j].start && end < range[j].end) {
+			/* Find the new spare: */
+			for (i = 0; i < az; i++) {
+				if (range[i].end == 0)
+					break;
+			}
+			if (i < az) {
+				range[i].end = range[j].end;
+				range[i].start = end + 1;
+			} else {
+				printk(KERN_ERR "run of slot in ranges\n");
+			}
+			range[j].end = start - 1;
+			continue;
+		}
+	}
+}
+
+static int cmp_range(const void *x1, const void *x2)
+{
+	const struct range *r1 = x1;
+	const struct range *r2 = x2;
+	s64 start1, start2;
+
+	start1 = r1->start;
+	start2 = r2->start;
+
+	return start1 - start2;
+}
+
+int clean_sort_range(struct range *range, int az)
+{
+	int i, j, k = az - 1, nr_range = 0;
+
+	for (i = 0; i < k; i++) {
+		if (range[i].end)
+			continue;
+		for (j = k; j > i; j--) {
+			if (range[j].end) {
+				k = j;
+				break;
+			}
+		}
+		if (j == i)
+			break;
+		range[i].start = range[k].start;
+		range[i].end   = range[k].end;
+		range[k].start = 0;
+		range[k].end   = 0;
+		k--;
+	}
+	/* count it */
+	for (i = 0; i < az; i++) {
+		if (!range[i].end) {
+			nr_range = i;
+			break;
+		}
+	}
+
+	/* sort them */
+	sort(range, nr_range, sizeof(struct range), cmp_range, NULL);
+
+	return nr_range;
+}
+
+void sort_range(struct range *range, int nr_range)
+{
+	/* sort them */
+	sort(range, nr_range, sizeof(struct range), cmp_range, NULL);
+}
-- 
cgit v1.2.3


From 9ad3f2c7c69659c343843393944d739fec1f2e73 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 10 Feb 2010 01:20:11 -0800
Subject: x86/pci: Add cap_resource()

Prepare for 32bit pci root bus

-v2: hpa said we should compare with (resource_size_t)~0
-v3: according to Linus to use MAX_RESOURCE instead.
     also need need to put related patches together
-v4: according to Andrew, use min in cap_resource()

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <1265793639-15071-8-git-send-email-yinghai@kernel.org>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/pci/amd_bus.c  | 8 +++++---
 arch/x86/pci/bus_numa.c | 4 ++++
 include/linux/range.h   | 8 ++++++++
 3 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index f06bb1b4a80a..f7e13b63154e 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -201,7 +201,7 @@ static int __init early_fill_mp_bus_info(void)
 
 	memset(range, 0, sizeof(range));
 	/* 0xfd00000000-0xffffffffff for HT */
-	range[0].end = (0xfdULL<<32) - 1;
+	range[0].end = cap_resource((0xfdULL<<32) - 1);
 
 	/* need to take out [0, TOM) for RAM*/
 	address = MSR_K8_TOP_MEM1;
@@ -286,7 +286,8 @@ static int __init early_fill_mp_bus_info(void)
 			}
 		}
 
-		update_res(info, start, end, IORESOURCE_MEM, 1);
+		update_res(info, cap_resource(start), cap_resource(end),
+				 IORESOURCE_MEM, 1);
 		subtract_range(range, RANGE_NUM, start, end);
 		printk(KERN_CONT "\n");
 	}
@@ -321,7 +322,8 @@ static int __init early_fill_mp_bus_info(void)
 			if (!range[i].end)
 				continue;
 
-			update_res(info, range[i].start, range[i].end,
+			update_res(info, cap_resource(range[i].start),
+				   cap_resource(range[i].end),
 				   IORESOURCE_MEM, 1);
 		}
 	}
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index 3411687b676e..3510778aa8bb 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -1,5 +1,6 @@
 #include <linux/init.h>
 #include <linux/pci.h>
+#include <linux/range.h>
 
 #include "bus_numa.h"
 
@@ -55,6 +56,9 @@ void __devinit update_res(struct pci_root_info *info, resource_size_t start,
 	if (start > end)
 		return;
 
+	if (start == MAX_RESOURCE)
+		return;
+
 	if (!merge)
 		goto addit;
 
diff --git a/include/linux/range.h b/include/linux/range.h
index 0789f1412e1f..bd184a5db791 100644
--- a/include/linux/range.h
+++ b/include/linux/range.h
@@ -19,4 +19,12 @@ int clean_sort_range(struct range *range, int az);
 
 void sort_range(struct range *range, int nr_range);
 
+#define MAX_RESOURCE ((resource_size_t)~0)
+static inline resource_size_t cap_resource(u64 val)
+{
+	if (val > MAX_RESOURCE)
+		return MAX_RESOURCE;
+
+	return val;
+}
 #endif
-- 
cgit v1.2.3


From c3ae90c099bb62387507e86da7cf799850444b08 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Tue, 9 Feb 2010 05:49:11 +0000
Subject: drm: introduce drm_gem_object_[handle_]unreference_unlocked

This patch introduces the drm_gem_object_unreference_unlocked
and drm_gem_object_handle_unreference_unlocked functions that
do not require holding struct_mutex.

drm_gem_object_unreference_unlocked calls the new
->gem_free_object_unlocked entry point if available, and
otherwise just takes struct_mutex and just calls ->gem_free_object

Signed-off-by: Luca Barbieri <luca@luca-barbieri.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_gem.c | 49 ++++++++++++++++++++++++++++++++++++++++++-----
 include/drm/drmP.h        | 28 ++++++++++++++++++++++++---
 2 files changed, 69 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 8bf3770f294e..4018b3bfc72e 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -411,8 +411,19 @@ drm_gem_release(struct drm_device *dev, struct drm_file *file_private)
 	mutex_unlock(&dev->struct_mutex);
 }
 
+static void
+drm_gem_object_free_common(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	fput(obj->filp);
+	atomic_dec(&dev->object_count);
+	atomic_sub(obj->size, &dev->object_memory);
+	kfree(obj);
+}
+
 /**
  * Called after the last reference to the object has been lost.
+ * Must be called holding struct_ mutex
  *
  * Frees the object
  */
@@ -427,13 +438,39 @@ drm_gem_object_free(struct kref *kref)
 	if (dev->driver->gem_free_object != NULL)
 		dev->driver->gem_free_object(obj);
 
-	fput(obj->filp);
-	atomic_dec(&dev->object_count);
-	atomic_sub(obj->size, &dev->object_memory);
-	kfree(obj);
+	drm_gem_object_free_common(obj);
 }
 EXPORT_SYMBOL(drm_gem_object_free);
 
+/**
+ * Called after the last reference to the object has been lost.
+ * Must be called without holding struct_mutex
+ *
+ * Frees the object
+ */
+void
+drm_gem_object_free_unlocked(struct kref *kref)
+{
+	struct drm_gem_object *obj = (struct drm_gem_object *) kref;
+	struct drm_device *dev = obj->dev;
+
+	if (dev->driver->gem_free_object_unlocked != NULL)
+		dev->driver->gem_free_object_unlocked(obj);
+	else if (dev->driver->gem_free_object != NULL) {
+		mutex_lock(&dev->struct_mutex);
+		dev->driver->gem_free_object(obj);
+		mutex_unlock(&dev->struct_mutex);
+	}
+
+	drm_gem_object_free_common(obj);
+}
+EXPORT_SYMBOL(drm_gem_object_free_unlocked);
+
+static void drm_gem_object_ref_bug(struct kref *list_kref)
+{
+	BUG();
+}
+
 /**
  * Called after the last handle to the object has been closed
  *
@@ -458,8 +495,10 @@ drm_gem_object_handle_free(struct kref *kref)
 		/*
 		 * The object name held a reference to this object, drop
 		 * that now.
+		*
+		* This cannot be the last reference, since the handle holds one too.
 		 */
-		drm_gem_object_unreference(obj);
+		kref_put(&obj->refcount, drm_gem_object_ref_bug);
 	} else
 		spin_unlock(&dev->object_name_lock);
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index ffac157fb5b2..4a3c4e441027 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -801,6 +801,7 @@ struct drm_driver {
 	 */
 	int (*gem_init_object) (struct drm_gem_object *obj);
 	void (*gem_free_object) (struct drm_gem_object *obj);
+	void (*gem_free_object_unlocked) (struct drm_gem_object *obj);
 
 	/* vga arb irq handler */
 	void (*vgaarb_irq)(struct drm_device *dev, bool state);
@@ -1427,6 +1428,7 @@ extern void drm_sysfs_connector_remove(struct drm_connector *connector);
 int drm_gem_init(struct drm_device *dev);
 void drm_gem_destroy(struct drm_device *dev);
 void drm_gem_object_free(struct kref *kref);
+void drm_gem_object_free_unlocked(struct kref *kref);
 struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev,
 					    size_t size);
 void drm_gem_object_handle_free(struct kref *kref);
@@ -1443,10 +1445,15 @@ drm_gem_object_reference(struct drm_gem_object *obj)
 static inline void
 drm_gem_object_unreference(struct drm_gem_object *obj)
 {
-	if (obj == NULL)
-		return;
+	if (obj != NULL)
+		kref_put(&obj->refcount, drm_gem_object_free);
+}
 
-	kref_put(&obj->refcount, drm_gem_object_free);
+static inline void
+drm_gem_object_unreference_unlocked(struct drm_gem_object *obj)
+{
+	if (obj != NULL)
+		kref_put(&obj->refcount, drm_gem_object_free_unlocked);
 }
 
 int drm_gem_handle_create(struct drm_file *file_priv,
@@ -1475,6 +1482,21 @@ drm_gem_object_handle_unreference(struct drm_gem_object *obj)
 	drm_gem_object_unreference(obj);
 }
 
+static inline void
+drm_gem_object_handle_unreference_unlocked(struct drm_gem_object *obj)
+{
+	if (obj == NULL)
+		return;
+
+	/*
+	* Must bump handle count first as this may be the last
+	* ref, in which case the object would disappear before we
+	* checked for a name
+	*/
+	kref_put(&obj->handlecount, drm_gem_object_handle_free);
+	drm_gem_object_unreference_unlocked(obj);
+}
+
 struct drm_gem_object *drm_gem_object_lookup(struct drm_device *dev,
 					     struct drm_file *filp,
 					     u32 handle);
-- 
cgit v1.2.3


From cf4f7e8c47b3298cf90239bad5d86fdcad0c89eb Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Thu, 11 Feb 2010 15:40:25 -0800
Subject: RDMA/cm: Remove unused definition of RDMA_PS_SCTP

The defined SCTP number is incorrect (0x83, rather than 0x84), and
since it is not used anywhere, simply remove the definition.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 include/rdma/rdma_cm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index c6b2962315b3..4fae90304648 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -67,7 +67,6 @@ enum rdma_port_space {
 	RDMA_PS_IPOIB = 0x0002,
 	RDMA_PS_TCP   = 0x0106,
 	RDMA_PS_UDP   = 0x0111,
-	RDMA_PS_SCTP  = 0x0183
 };
 
 struct rdma_addr {
-- 
cgit v1.2.3


From 08677214e318297f228237be0042aac754f48f1d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 10 Feb 2010 01:20:20 -0800
Subject: x86: Make 64 bit use early_res instead of bootmem before slab

Finally we can use early_res to replace bootmem for x86_64 now.

Still can use CONFIG_NO_BOOTMEM to enable it or not.

-v2: fix 32bit compiling about MAX_DMA32_PFN
-v3: folded bug fix from LKML message below

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4B747239.4070907@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/Kconfig            |  13 +++
 arch/x86/include/asm/e820.h |   6 ++
 arch/x86/kernel/e820.c      | 159 +++++++++++++++++++++++++++++++++---
 arch/x86/kernel/setup.c     |   2 +
 arch/x86/mm/init_64.c       |   4 +
 arch/x86/mm/numa_64.c       |  20 +++--
 include/linux/bootmem.h     |   7 ++
 include/linux/mm.h          |   5 ++
 include/linux/mmzone.h      |   2 +
 mm/bootmem.c                | 195 +++++++++++++++++++++++++++++++++++++++++++-
 mm/page_alloc.c             |  59 +++++++++++++-
 mm/percpu.c                 |   3 +
 mm/sparse-vmemmap.c         |   2 +-
 13 files changed, 454 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index eb4092568f9e..95439843cebc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -568,6 +568,19 @@ config PARAVIRT_DEBUG
 	  Enable to debug paravirt_ops internals.  Specifically, BUG if
 	  a paravirt_op is missing when it is called.
 
+config NO_BOOTMEM
+	default y
+	bool "Disable Bootmem code"
+	depends on X86_64
+	---help---
+	  Use early_res directly instead of bootmem before slab is ready.
+		- allocator (buddy) [generic]
+		- early allocator (bootmem) [generic]
+		- very early allocator (reserve_early*()) [x86]
+		- very very early allocator (early brk model) [x86]
+	  So reduce one layer between early allocator to final allocator
+
+
 config MEMTEST
 	bool "Memtest"
 	---help---
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 761249e396fe..7d72e5fb7008 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -117,6 +117,12 @@ extern void free_early(u64 start, u64 end);
 extern void early_res_to_bootmem(u64 start, u64 end);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 
+void reserve_early_without_check(u64 start, u64 end, char *name);
+u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
+			 u64 size, u64 align);
+#include <linux/range.h>
+int get_free_all_memory_range(struct range **rangep, int nodeid);
+
 extern unsigned long e820_end_of_ram_pfn(void);
 extern unsigned long e820_end_of_low_ram_pfn(void);
 extern int e820_find_active_region(const struct e820entry *ei,
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index e09c18c8f3c1..90a85295f332 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -977,6 +977,25 @@ void __init reserve_early(u64 start, u64 end, char *name)
 	__reserve_early(start, end, name, 0);
 }
 
+void __init reserve_early_without_check(u64 start, u64 end, char *name)
+{
+	struct early_res *r;
+
+	if (start >= end)
+		return;
+
+	__check_and_double_early_res(end);
+
+	r = &early_res[early_res_count];
+
+	r->start = start;
+	r->end = end;
+	r->overlap_ok = 0;
+	if (name)
+		strncpy(r->name, name, sizeof(r->name) - 1);
+	early_res_count++;
+}
+
 void __init free_early(u64 start, u64 end)
 {
 	struct early_res *r;
@@ -991,6 +1010,94 @@ void __init free_early(u64 start, u64 end)
 	drop_range(i);
 }
 
+#ifdef CONFIG_NO_BOOTMEM
+static void __init subtract_early_res(struct range *range, int az)
+{
+	int i, count;
+	u64 final_start, final_end;
+	int idx = 0;
+
+	count  = 0;
+	for (i = 0; i < max_early_res && early_res[i].end; i++)
+		count++;
+
+	/* need to skip first one ?*/
+	if (early_res != early_res_x)
+		idx = 1;
+
+#if 1
+	printk(KERN_INFO "Subtract (%d early reservations)\n", count);
+#endif
+	for (i = idx; i < count; i++) {
+		struct early_res *r = &early_res[i];
+#if 0
+		printk(KERN_INFO "  #%d [%010llx - %010llx] %15s", i,
+			r->start, r->end, r->name);
+#endif
+		final_start = PFN_DOWN(r->start);
+		final_end = PFN_UP(r->end);
+		if (final_start >= final_end) {
+#if 0
+			printk(KERN_CONT "\n");
+#endif
+			continue;
+		}
+#if 0
+		printk(KERN_CONT " subtract pfn [%010llx - %010llx]\n",
+			final_start, final_end);
+#endif
+		subtract_range(range, az, final_start, final_end);
+	}
+
+}
+
+int __init get_free_all_memory_range(struct range **rangep, int nodeid)
+{
+	int i, count;
+	u64 start = 0, end;
+	u64 size;
+	u64 mem;
+	struct range *range;
+	int nr_range;
+
+	count  = 0;
+	for (i = 0; i < max_early_res && early_res[i].end; i++)
+		count++;
+
+	count *= 2;
+
+	size = sizeof(struct range) * count;
+#ifdef MAX_DMA32_PFN
+	if (max_pfn_mapped > MAX_DMA32_PFN)
+		start = MAX_DMA32_PFN << PAGE_SHIFT;
+#endif
+	end = max_pfn_mapped << PAGE_SHIFT;
+	mem = find_e820_area(start, end, size, sizeof(struct range));
+	if (mem == -1ULL)
+		panic("can not find more space for range free");
+
+	range = __va(mem);
+	/* use early_node_map[] and early_res to get range array at first */
+	memset(range, 0, size);
+	nr_range = 0;
+
+	/* need to go over early_node_map to find out good range for node */
+	nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
+	subtract_early_res(range, count);
+	nr_range = clean_sort_range(range, count);
+
+	/* need to clear it ? */
+	if (nodeid == MAX_NUMNODES) {
+		memset(&early_res[0], 0,
+			 sizeof(struct early_res) * max_early_res);
+		early_res = NULL;
+		max_early_res = 0;
+	}
+
+	*rangep = range;
+	return nr_range;
+}
+#else
 void __init early_res_to_bootmem(u64 start, u64 end)
 {
 	int i, count;
@@ -1028,6 +1135,7 @@ void __init early_res_to_bootmem(u64 start, u64 end)
 	max_early_res = 0;
 	early_res_count = 0;
 }
+#endif
 
 /* Check for already reserved areas */
 static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
@@ -1081,6 +1189,35 @@ again:
 	return changed;
 }
 
+/*
+ * Find a free area with specified alignment in a specific range.
+ * only with the area.between start to end is active range from early_node_map
+ * so they are good as RAM
+ */
+u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
+			 u64 size, u64 align)
+{
+	u64 addr, last;
+
+	addr = round_up(ei_start, align);
+	if (addr < start)
+		addr = round_up(start, align);
+	if (addr >= ei_last)
+		goto out;
+	while (bad_addr(&addr, size, align) && addr+size <= ei_last)
+		;
+	last = addr + size;
+	if (last > ei_last)
+		goto out;
+	if (last > end)
+		goto out;
+
+	return addr;
+
+out:
+	return -1ULL;
+}
+
 /*
  * Find a free area with specified alignment in a specific range.
  */
@@ -1090,24 +1227,20 @@ u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
 
 	for (i = 0; i < e820.nr_map; i++) {
 		struct e820entry *ei = &e820.map[i];
-		u64 addr, last;
-		u64 ei_last;
+		u64 addr;
+		u64 ei_start, ei_last;
 
 		if (ei->type != E820_RAM)
 			continue;
-		addr = round_up(ei->addr, align);
+
 		ei_last = ei->addr + ei->size;
-		if (addr < start)
-			addr = round_up(start, align);
-		if (addr >= ei_last)
-			continue;
-		while (bad_addr(&addr, size, align) && addr+size <= ei_last)
-			;
-		last = addr + size;
-		if (last > ei_last)
-			continue;
-		if (last > end)
+		ei_start = ei->addr;
+		addr = find_early_area(ei_start, ei_last, start, end,
+					 size, align);
+
+		if (addr == -1ULL)
 			continue;
+
 		return addr;
 	}
 	return -1ULL;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ea4141b48518..d49e168bda8c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -967,7 +967,9 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	initmem_init(0, max_pfn, acpi, k8);
+#ifndef CONFIG_NO_BOOTMEM
 	early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
+#endif
 
 	dma32_reserve_bootmem();
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a15abaae5ba4..53158b7e5d46 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -572,6 +572,7 @@ kernel_physical_mapping_init(unsigned long start,
 void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
 				int acpi, int k8)
 {
+#ifndef CONFIG_NO_BOOTMEM
 	unsigned long bootmap_size, bootmap;
 
 	bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
@@ -585,6 +586,9 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
 					 0, end_pfn);
 	e820_register_active_regions(0, start_pfn, end_pfn);
 	free_bootmem_with_active_regions(0, end_pfn);
+#else
+	e820_register_active_regions(0, start_pfn, end_pfn);
+#endif
 }
 #endif
 
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 02f13cb99bc2..a20e17059afd 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -198,11 +198,13 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
 void __init
 setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 {
-	unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
+	unsigned long start_pfn, last_pfn, nodedata_phys;
 	const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
-	unsigned long bootmap_start, nodedata_phys;
-	void *bootmap;
 	int nid;
+#ifndef CONFIG_NO_BOOTMEM
+	unsigned long bootmap_start, bootmap_pages, bootmap_size;
+	void *bootmap;
+#endif
 
 	if (!end)
 		return;
@@ -216,7 +218,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 
 	start = roundup(start, ZONE_ALIGN);
 
-	printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
+	printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid,
 	       start, end);
 
 	start_pfn = start >> PAGE_SHIFT;
@@ -235,10 +237,13 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 		printk(KERN_INFO "    NODE_DATA(%d) on node %d\n", nodeid, nid);
 
 	memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
-	NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
+	NODE_DATA(nodeid)->node_id = nodeid;
 	NODE_DATA(nodeid)->node_start_pfn = start_pfn;
 	NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
 
+#ifndef CONFIG_NO_BOOTMEM
+	NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
+
 	/*
 	 * Find a place for the bootmem map
 	 * nodedata_phys could be on other nodes by alloc_bootmem,
@@ -275,6 +280,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 		printk(KERN_INFO "    bootmap(%d) on node %d\n", nodeid, nid);
 
 	free_bootmem_with_active_regions(nodeid, end);
+#endif
 
 	node_set_online(nodeid);
 }
@@ -733,6 +739,10 @@ unsigned long __init numa_free_all_bootmem(void)
 	for_each_online_node(i)
 		pages += free_all_bootmem_node(NODE_DATA(i));
 
+#ifdef CONFIG_NO_BOOTMEM
+	pages += free_all_memory_core_early(MAX_NUMNODES);
+#endif
+
 	return pages;
 }
 
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index b10ec49ee2dd..266ab9291232 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -23,6 +23,7 @@ extern unsigned long max_pfn;
 extern unsigned long saved_max_pfn;
 #endif
 
+#ifndef CONFIG_NO_BOOTMEM
 /*
  * node_bootmem_map is a map pointer - the bits represent all physical 
  * memory pages (including holes) on the node.
@@ -37,6 +38,7 @@ typedef struct bootmem_data {
 } bootmem_data_t;
 
 extern bootmem_data_t bootmem_node_data[];
+#endif
 
 extern unsigned long bootmem_bootmap_pages(unsigned long);
 
@@ -46,6 +48,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
 				       unsigned long endpfn);
 extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
 
+unsigned long free_all_memory_core_early(int nodeid);
 extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
 extern unsigned long free_all_bootmem(void);
 
@@ -84,6 +87,10 @@ extern void *__alloc_bootmem_node(pg_data_t *pgdat,
 				  unsigned long size,
 				  unsigned long align,
 				  unsigned long goal);
+void *__alloc_bootmem_node_high(pg_data_t *pgdat,
+				  unsigned long size,
+				  unsigned long align,
+				  unsigned long goal);
 extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
 				  unsigned long size,
 				  unsigned long align,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8b2fa8593c61..f2c5b3cee8a1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -12,6 +12,7 @@
 #include <linux/prio_tree.h>
 #include <linux/debug_locks.h>
 #include <linux/mm_types.h>
+#include <linux/range.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -1049,6 +1050,10 @@ extern void get_pfn_range_for_nid(unsigned int nid,
 extern unsigned long find_min_pfn_with_active_regions(void);
 extern void free_bootmem_with_active_regions(int nid,
 						unsigned long max_low_pfn);
+int add_from_early_node_map(struct range *range, int az,
+				   int nr_range, int nid);
+void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
+				 u64 goal, u64 limit);
 typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
 extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
 extern void sparse_memory_present_with_active_regions(int nid);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 30fe668c2542..eae8387b6007 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -620,7 +620,9 @@ typedef struct pglist_data {
 	struct page_cgroup *node_page_cgroup;
 #endif
 #endif
+#ifndef CONFIG_NO_BOOTMEM
 	struct bootmem_data *bdata;
+#endif
 #ifdef CONFIG_MEMORY_HOTPLUG
 	/*
 	 * Must be held any time you expect node_start_pfn, node_present_pages
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 7d1486875e1c..d7c791ef0036 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -13,6 +13,7 @@
 #include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/kmemleak.h>
+#include <linux/range.h>
 
 #include <asm/bug.h>
 #include <asm/io.h>
@@ -32,6 +33,7 @@ unsigned long max_pfn;
 unsigned long saved_max_pfn;
 #endif
 
+#ifndef CONFIG_NO_BOOTMEM
 bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
 
 static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
@@ -142,7 +144,7 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
 	min_low_pfn = start;
 	return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
 }
-
+#endif
 /*
  * free_bootmem_late - free bootmem pages directly to page allocator
  * @addr: starting address of the range
@@ -167,6 +169,60 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size)
 	}
 }
 
+#ifdef CONFIG_NO_BOOTMEM
+static void __init __free_pages_memory(unsigned long start, unsigned long end)
+{
+	int i;
+	unsigned long start_aligned, end_aligned;
+	int order = ilog2(BITS_PER_LONG);
+
+	start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
+	end_aligned = end & ~(BITS_PER_LONG - 1);
+
+	if (end_aligned <= start_aligned) {
+#if 1
+		printk(KERN_DEBUG " %lx - %lx\n", start, end);
+#endif
+		for (i = start; i < end; i++)
+			__free_pages_bootmem(pfn_to_page(i), 0);
+
+		return;
+	}
+
+#if 1
+	printk(KERN_DEBUG " %lx %lx - %lx %lx\n",
+		 start, start_aligned, end_aligned, end);
+#endif
+	for (i = start; i < start_aligned; i++)
+		__free_pages_bootmem(pfn_to_page(i), 0);
+
+	for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
+		__free_pages_bootmem(pfn_to_page(i), order);
+
+	for (i = end_aligned; i < end; i++)
+		__free_pages_bootmem(pfn_to_page(i), 0);
+}
+
+unsigned long __init free_all_memory_core_early(int nodeid)
+{
+	int i;
+	u64 start, end;
+	unsigned long count = 0;
+	struct range *range = NULL;
+	int nr_range;
+
+	nr_range = get_free_all_memory_range(&range, nodeid);
+
+	for (i = 0; i < nr_range; i++) {
+		start = range[i].start;
+		end = range[i].end;
+		count += end - start;
+		__free_pages_memory(start, end);
+	}
+
+	return count;
+}
+#else
 static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 {
 	int aligned;
@@ -227,6 +283,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 
 	return count;
 }
+#endif
 
 /**
  * free_all_bootmem_node - release a node's free pages to the buddy allocator
@@ -237,7 +294,12 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
 {
 	register_page_bootmem_info_node(pgdat);
+#ifdef CONFIG_NO_BOOTMEM
+	/* free_all_memory_core_early(MAX_NUMNODES) will be called later */
+	return 0;
+#else
 	return free_all_bootmem_core(pgdat->bdata);
+#endif
 }
 
 /**
@@ -247,9 +309,14 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
  */
 unsigned long __init free_all_bootmem(void)
 {
+#ifdef CONFIG_NO_BOOTMEM
+	return free_all_memory_core_early(NODE_DATA(0)->node_id);
+#else
 	return free_all_bootmem_core(NODE_DATA(0)->bdata);
+#endif
 }
 
+#ifndef CONFIG_NO_BOOTMEM
 static void __init __free(bootmem_data_t *bdata,
 			unsigned long sidx, unsigned long eidx)
 {
@@ -344,6 +411,7 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
 	}
 	BUG();
 }
+#endif
 
 /**
  * free_bootmem_node - mark a page range as usable
@@ -358,6 +426,12 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
 void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 			      unsigned long size)
 {
+#ifdef CONFIG_NO_BOOTMEM
+	free_early(physaddr, physaddr + size);
+#if 0
+	printk(KERN_DEBUG "free %lx %lx\n", physaddr, size);
+#endif
+#else
 	unsigned long start, end;
 
 	kmemleak_free_part(__va(physaddr), size);
@@ -366,6 +440,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 	end = PFN_DOWN(physaddr + size);
 
 	mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
+#endif
 }
 
 /**
@@ -379,6 +454,12 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
  */
 void __init free_bootmem(unsigned long addr, unsigned long size)
 {
+#ifdef CONFIG_NO_BOOTMEM
+	free_early(addr, addr + size);
+#if 0
+	printk(KERN_DEBUG "free %lx %lx\n", addr, size);
+#endif
+#else
 	unsigned long start, end;
 
 	kmemleak_free_part(__va(addr), size);
@@ -387,6 +468,7 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
 	end = PFN_DOWN(addr + size);
 
 	mark_bootmem(start, end, 0, 0);
+#endif
 }
 
 /**
@@ -403,12 +485,17 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
 int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 				 unsigned long size, int flags)
 {
+#ifdef CONFIG_NO_BOOTMEM
+	panic("no bootmem");
+	return 0;
+#else
 	unsigned long start, end;
 
 	start = PFN_DOWN(physaddr);
 	end = PFN_UP(physaddr + size);
 
 	return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
+#endif
 }
 
 /**
@@ -424,14 +511,20 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 int __init reserve_bootmem(unsigned long addr, unsigned long size,
 			    int flags)
 {
+#ifdef CONFIG_NO_BOOTMEM
+	panic("no bootmem");
+	return 0;
+#else
 	unsigned long start, end;
 
 	start = PFN_DOWN(addr);
 	end = PFN_UP(addr + size);
 
 	return mark_bootmem(start, end, 1, flags);
+#endif
 }
 
+#ifndef CONFIG_NO_BOOTMEM
 static unsigned long __init align_idx(struct bootmem_data *bdata,
 				      unsigned long idx, unsigned long step)
 {
@@ -582,12 +675,33 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
 #endif
 	return NULL;
 }
+#endif
 
 static void * __init ___alloc_bootmem_nopanic(unsigned long size,
 					unsigned long align,
 					unsigned long goal,
 					unsigned long limit)
 {
+#ifdef CONFIG_NO_BOOTMEM
+	void *ptr;
+
+	if (WARN_ON_ONCE(slab_is_available()))
+		return kzalloc(size, GFP_NOWAIT);
+
+restart:
+
+	ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);
+
+	if (ptr)
+		return ptr;
+
+	if (goal != 0) {
+		goal = 0;
+		goto restart;
+	}
+
+	return NULL;
+#else
 	bootmem_data_t *bdata;
 	void *region;
 
@@ -613,6 +727,7 @@ restart:
 	}
 
 	return NULL;
+#endif
 }
 
 /**
@@ -631,7 +746,13 @@ restart:
 void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
 					unsigned long goal)
 {
-	return ___alloc_bootmem_nopanic(size, align, goal, 0);
+	unsigned long limit = 0;
+
+#ifdef CONFIG_NO_BOOTMEM
+	limit = -1UL;
+#endif
+
+	return ___alloc_bootmem_nopanic(size, align, goal, limit);
 }
 
 static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
@@ -665,9 +786,16 @@ static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
 void * __init __alloc_bootmem(unsigned long size, unsigned long align,
 			      unsigned long goal)
 {
-	return ___alloc_bootmem(size, align, goal, 0);
+	unsigned long limit = 0;
+
+#ifdef CONFIG_NO_BOOTMEM
+	limit = -1UL;
+#endif
+
+	return ___alloc_bootmem(size, align, goal, limit);
 }
 
+#ifndef CONFIG_NO_BOOTMEM
 static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
 				unsigned long size, unsigned long align,
 				unsigned long goal, unsigned long limit)
@@ -684,6 +812,7 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
 
 	return ___alloc_bootmem(size, align, goal, limit);
 }
+#endif
 
 /**
  * __alloc_bootmem_node - allocate boot memory from a specific node
@@ -706,7 +835,46 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 	if (WARN_ON_ONCE(slab_is_available()))
 		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
 
+#ifdef CONFIG_NO_BOOTMEM
+	return __alloc_memory_core_early(pgdat->node_id, size, align,
+					 goal, -1ULL);
+#else
 	return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
+#endif
+}
+
+void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
+				   unsigned long align, unsigned long goal)
+{
+#ifdef MAX_DMA32_PFN
+	unsigned long end_pfn;
+
+	if (WARN_ON_ONCE(slab_is_available()))
+		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+
+	/* update goal according ...MAX_DMA32_PFN */
+	end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages;
+
+	if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
+	    (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
+		void *ptr;
+		unsigned long new_goal;
+
+		new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
+#ifdef CONFIG_NO_BOOTMEM
+		ptr =  __alloc_memory_core_early(pgdat->node_id, size, align,
+						 new_goal, -1ULL);
+#else
+		ptr = alloc_bootmem_core(pgdat->bdata, size, align,
+						 new_goal, 0);
+#endif
+		if (ptr)
+			return ptr;
+	}
+#endif
+
+	return __alloc_bootmem_node(pgdat, size, align, goal);
+
 }
 
 #ifdef CONFIG_SPARSEMEM
@@ -720,6 +888,16 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
 void * __init alloc_bootmem_section(unsigned long size,
 				    unsigned long section_nr)
 {
+#ifdef CONFIG_NO_BOOTMEM
+	unsigned long pfn, goal, limit;
+
+	pfn = section_nr_to_pfn(section_nr);
+	goal = pfn << PAGE_SHIFT;
+	limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
+
+	return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
+					 SMP_CACHE_BYTES, goal, limit);
+#else
 	bootmem_data_t *bdata;
 	unsigned long pfn, goal, limit;
 
@@ -729,6 +907,7 @@ void * __init alloc_bootmem_section(unsigned long size,
 	bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
 
 	return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
+#endif
 }
 #endif
 
@@ -740,11 +919,16 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
 	if (WARN_ON_ONCE(slab_is_available()))
 		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
 
+#ifdef CONFIG_NO_BOOTMEM
+	ptr =  __alloc_memory_core_early(pgdat->node_id, size, align,
+						 goal, -1ULL);
+#else
 	ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
 	if (ptr)
 		return ptr;
 
 	ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
+#endif
 	if (ptr)
 		return ptr;
 
@@ -795,6 +979,11 @@ void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
 	if (WARN_ON_ONCE(slab_is_available()))
 		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
 
+#ifdef CONFIG_NO_BOOTMEM
+	return __alloc_memory_core_early(pgdat->node_id, size, align,
+				goal, ARCH_LOW_ADDRESS_LIMIT);
+#else
 	return ___alloc_bootmem_node(pgdat->bdata, size, align,
 				goal, ARCH_LOW_ADDRESS_LIMIT);
+#endif
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8deb9d0fd5b1..78821a28e394 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3435,6 +3435,59 @@ void __init free_bootmem_with_active_regions(int nid,
 	}
 }
 
+int __init add_from_early_node_map(struct range *range, int az,
+				   int nr_range, int nid)
+{
+	int i;
+	u64 start, end;
+
+	/* need to go over early_node_map to find out good range for node */
+	for_each_active_range_index_in_nid(i, nid) {
+		start = early_node_map[i].start_pfn;
+		end = early_node_map[i].end_pfn;
+		nr_range = add_range(range, az, nr_range, start, end);
+	}
+	return nr_range;
+}
+
+void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
+					u64 goal, u64 limit)
+{
+	int i;
+	void *ptr;
+
+	/* need to go over early_node_map to find out good range for node */
+	for_each_active_range_index_in_nid(i, nid) {
+		u64 addr;
+		u64 ei_start, ei_last;
+
+		ei_last = early_node_map[i].end_pfn;
+		ei_last <<= PAGE_SHIFT;
+		ei_start = early_node_map[i].start_pfn;
+		ei_start <<= PAGE_SHIFT;
+		addr = find_early_area(ei_start, ei_last,
+					 goal, limit, size, align);
+
+		if (addr == -1ULL)
+			continue;
+
+#if 0
+		printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n",
+				nid,
+				ei_start, ei_last, goal, limit, size,
+				align, addr);
+#endif
+
+		ptr = phys_to_virt(addr);
+		memset(ptr, 0, size);
+		reserve_early_without_check(addr, addr + size, "BOOTMEM");
+		return ptr;
+	}
+
+	return NULL;
+}
+
+
 void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
 {
 	int i;
@@ -4467,7 +4520,11 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
 }
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-struct pglist_data __refdata contig_page_data = { .bdata = &bootmem_node_data[0] };
+struct pglist_data __refdata contig_page_data = {
+#ifndef CONFIG_NO_BOOTMEM
+ .bdata = &bootmem_node_data[0]
+#endif
+ };
 EXPORT_SYMBOL(contig_page_data);
 #endif
 
diff --git a/mm/percpu.c b/mm/percpu.c
index 083e7c91e5f6..841defeeef86 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1929,7 +1929,10 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
 			}
 			/* copy and return the unused part */
 			memcpy(ptr, __per_cpu_load, ai->static_size);
+#ifndef CONFIG_NO_BOOTMEM
+			/* fix partial free ! */
 			free_fn(ptr + size_sum, ai->unit_size - size_sum);
+#endif
 		}
 	}
 
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index d9714bdcb4a3..9506c39942f6 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -40,7 +40,7 @@ static void * __init_refok __earlyonly_bootmem_alloc(int node,
 				unsigned long align,
 				unsigned long goal)
 {
-	return __alloc_bootmem_node(NODE_DATA(node), size, align, goal);
+	return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal);
 }
 
 
-- 
cgit v1.2.3


From 9bdac914240759457175ac0d6529a37d2820bc4d Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 10 Feb 2010 01:20:22 -0800
Subject: sparsemem: Put mem map for one node together.

Add vmemmap_alloc_block_buf for mem map only.

It will fallback to the old way if it cannot get a block that big.

Before this patch, when a node have 128g ram installed, memmap are
split into two parts or more.
[    0.000000]  [ffffea0000000000-ffffea003fffffff] PMD -> [ffff880100600000-ffff88013e9fffff] on node 1
[    0.000000]  [ffffea0040000000-ffffea006fffffff] PMD -> [ffff88013ec00000-ffff88016ebfffff] on node 1
[    0.000000]  [ffffea0070000000-ffffea007fffffff] PMD -> [ffff882000600000-ffff8820105fffff] on node 0
[    0.000000]  [ffffea0080000000-ffffea00bfffffff] PMD -> [ffff882010800000-ffff8820507fffff] on node 0
[    0.000000]  [ffffea00c0000000-ffffea00dfffffff] PMD -> [ffff882050a00000-ffff8820709fffff] on node 0
[    0.000000]  [ffffea00e0000000-ffffea00ffffffff] PMD -> [ffff884000600000-ffff8840205fffff] on node 2
[    0.000000]  [ffffea0100000000-ffffea013fffffff] PMD -> [ffff884020800000-ffff8840607fffff] on node 2
[    0.000000]  [ffffea0140000000-ffffea014fffffff] PMD -> [ffff884060a00000-ffff8840709fffff] on node 2
[    0.000000]  [ffffea0150000000-ffffea017fffffff] PMD -> [ffff886000600000-ffff8860305fffff] on node 3
[    0.000000]  [ffffea0180000000-ffffea01bfffffff] PMD -> [ffff886030800000-ffff8860707fffff] on node 3
[    0.000000]  [ffffea01c0000000-ffffea01ffffffff] PMD -> [ffff888000600000-ffff8880405fffff] on node 4
[    0.000000]  [ffffea0200000000-ffffea022fffffff] PMD -> [ffff888040800000-ffff8880707fffff] on node 4
[    0.000000]  [ffffea0230000000-ffffea023fffffff] PMD -> [ffff88a000600000-ffff88a0105fffff] on node 5
[    0.000000]  [ffffea0240000000-ffffea027fffffff] PMD -> [ffff88a010800000-ffff88a0507fffff] on node 5
[    0.000000]  [ffffea0280000000-ffffea029fffffff] PMD -> [ffff88a050a00000-ffff88a0709fffff] on node 5
[    0.000000]  [ffffea02a0000000-ffffea02bfffffff] PMD -> [ffff88c000600000-ffff88c0205fffff] on node 6
[    0.000000]  [ffffea02c0000000-ffffea02ffffffff] PMD -> [ffff88c020800000-ffff88c0607fffff] on node 6
[    0.000000]  [ffffea0300000000-ffffea030fffffff] PMD -> [ffff88c060a00000-ffff88c0709fffff] on node 6
[    0.000000]  [ffffea0310000000-ffffea033fffffff] PMD -> [ffff88e000600000-ffff88e0305fffff] on node 7
[    0.000000]  [ffffea0340000000-ffffea037fffffff] PMD -> [ffff88e030800000-ffff88e0707fffff] on node 7

after patch will get
[    0.000000]  [ffffea0000000000-ffffea006fffffff] PMD -> [ffff880100200000-ffff88016e5fffff] on node 0
[    0.000000]  [ffffea0070000000-ffffea00dfffffff] PMD -> [ffff882000200000-ffff8820701fffff] on node 1
[    0.000000]  [ffffea00e0000000-ffffea014fffffff] PMD -> [ffff884000200000-ffff8840701fffff] on node 2
[    0.000000]  [ffffea0150000000-ffffea01bfffffff] PMD -> [ffff886000200000-ffff8860701fffff] on node 3
[    0.000000]  [ffffea01c0000000-ffffea022fffffff] PMD -> [ffff888000200000-ffff8880701fffff] on node 4
[    0.000000]  [ffffea0230000000-ffffea029fffffff] PMD -> [ffff88a000200000-ffff88a0701fffff] on node 5
[    0.000000]  [ffffea02a0000000-ffffea030fffffff] PMD -> [ffff88c000200000-ffff88c0701fffff] on node 6
[    0.000000]  [ffffea0310000000-ffffea037fffffff] PMD -> [ffff88e000200000-ffff88e0701fffff] on node 7

-v2: change buf to vmemmap_buf instead according to Ingo
     also add CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER according to Ingo
-v3: according to Andrew, use sizeof(name) instead of hard coded 15

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <1265793639-15071-19-git-send-email-yinghai@kernel.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/mm/init_64.c |   2 +-
 include/linux/mm.h    |   7 ++++
 mm/Kconfig            |   4 ++
 mm/sparse-vmemmap.c   |  74 ++++++++++++++++++++++++++++++++-
 mm/sparse.c           | 111 +++++++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 195 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 53158b7e5d46..e9b040e1cde5 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -977,7 +977,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
 			if (pmd_none(*pmd)) {
 				pte_t entry;
 
-				p = vmemmap_alloc_block(PMD_SIZE, node);
+				p = vmemmap_alloc_block_buf(PMD_SIZE, node);
 				if (!p)
 					return -ENOMEM;
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f2c5b3cee8a1..f6002e5dc187 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1326,12 +1326,19 @@ extern int randomize_va_space;
 const char * arch_vma_name(struct vm_area_struct *vma);
 void print_vma_addr(char *prefix, unsigned long rip);
 
+void sparse_mem_maps_populate_node(struct page **map_map,
+				   unsigned long pnum_begin,
+				   unsigned long pnum_end,
+				   unsigned long map_count,
+				   int nodeid);
+
 struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
 pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
 pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);
 pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
 pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
 void *vmemmap_alloc_block(unsigned long size, int node);
+void *vmemmap_alloc_block_buf(unsigned long size, int node);
 void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
 int vmemmap_populate_basepages(struct page *start_page,
 						unsigned long pages, int node);
diff --git a/mm/Kconfig b/mm/Kconfig
index 17b8947aa7da..e4a33b9479b2 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -115,6 +115,10 @@ config SPARSEMEM_EXTREME
 config SPARSEMEM_VMEMMAP_ENABLE
 	bool
 
+config SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
+	def_bool y
+	depends on SPARSEMEM && X86_64
+
 config SPARSEMEM_VMEMMAP
 	bool "Sparse Memory virtual memmap"
 	depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 9506c39942f6..392b9bb5bc01 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -43,6 +43,8 @@ static void * __init_refok __earlyonly_bootmem_alloc(int node,
 	return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal);
 }
 
+static void *vmemmap_buf;
+static void *vmemmap_buf_end;
 
 void * __meminit vmemmap_alloc_block(unsigned long size, int node)
 {
@@ -64,6 +66,24 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
 				__pa(MAX_DMA_ADDRESS));
 }
 
+/* need to make sure size is all the same during early stage */
+void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
+{
+	void *ptr;
+
+	if (!vmemmap_buf)
+		return vmemmap_alloc_block(size, node);
+
+	/* take the from buf */
+	ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size);
+	if (ptr + size > vmemmap_buf_end)
+		return vmemmap_alloc_block(size, node);
+
+	vmemmap_buf = ptr + size;
+
+	return ptr;
+}
+
 void __meminit vmemmap_verify(pte_t *pte, int node,
 				unsigned long start, unsigned long end)
 {
@@ -80,7 +100,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
 	pte_t *pte = pte_offset_kernel(pmd, addr);
 	if (pte_none(*pte)) {
 		pte_t entry;
-		void *p = vmemmap_alloc_block(PAGE_SIZE, node);
+		void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
 		if (!p)
 			return NULL;
 		entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
@@ -163,3 +183,55 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
 
 	return map;
 }
+
+void __init sparse_mem_maps_populate_node(struct page **map_map,
+					  unsigned long pnum_begin,
+					  unsigned long pnum_end,
+					  unsigned long map_count, int nodeid)
+{
+	unsigned long pnum;
+	unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
+	void *vmemmap_buf_start;
+
+	size = ALIGN(size, PMD_SIZE);
+	vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count,
+			 PMD_SIZE, __pa(MAX_DMA_ADDRESS));
+
+	if (vmemmap_buf_start) {
+		vmemmap_buf = vmemmap_buf_start;
+		vmemmap_buf_end = vmemmap_buf_start + size * map_count;
+	}
+
+	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+		struct mem_section *ms;
+
+		if (!present_section_nr(pnum))
+			continue;
+
+		map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
+		if (map_map[pnum])
+			continue;
+		ms = __nr_to_section(pnum);
+		printk(KERN_ERR "%s: sparsemem memory map backing failed "
+			"some memory will not be available.\n", __func__);
+		ms->section_mem_map = 0;
+	}
+
+	if (vmemmap_buf_start) {
+		/* need to free left buf */
+#ifdef CONFIG_NO_BOOTMEM
+		free_early(__pa(vmemmap_buf_start), __pa(vmemmap_buf_end));
+		if (vmemmap_buf_start < vmemmap_buf) {
+			char name[15];
+
+			snprintf(name, sizeof(name), "MEMMAP %d", nodeid);
+			reserve_early_without_check(__pa(vmemmap_buf_start),
+						    __pa(vmemmap_buf), name);
+		}
+#else
+		free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf);
+#endif
+		vmemmap_buf = NULL;
+		vmemmap_buf_end = NULL;
+	}
+}
diff --git a/mm/sparse.c b/mm/sparse.c
index 0cdaf0b58457..9b6b93a4d78d 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -390,8 +390,65 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
 		       PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION));
 	return map;
 }
+void __init sparse_mem_maps_populate_node(struct page **map_map,
+					  unsigned long pnum_begin,
+					  unsigned long pnum_end,
+					  unsigned long map_count, int nodeid)
+{
+	void *map;
+	unsigned long pnum;
+	unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
+
+	map = alloc_remap(nodeid, size * map_count);
+	if (map) {
+		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+			if (!present_section_nr(pnum))
+				continue;
+			map_map[pnum] = map;
+			map += size;
+		}
+		return;
+	}
+
+	size = PAGE_ALIGN(size);
+	map = alloc_bootmem_pages_node(NODE_DATA(nodeid), size * map_count);
+	if (map) {
+		for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+			if (!present_section_nr(pnum))
+				continue;
+			map_map[pnum] = map;
+			map += size;
+		}
+		return;
+	}
+
+	/* fallback */
+	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
+		struct mem_section *ms;
+
+		if (!present_section_nr(pnum))
+			continue;
+		map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
+		if (map_map[pnum])
+			continue;
+		ms = __nr_to_section(pnum);
+		printk(KERN_ERR "%s: sparsemem memory map backing failed "
+			"some memory will not be available.\n", __func__);
+		ms->section_mem_map = 0;
+	}
+}
 #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 
+static void __init sparse_early_mem_maps_alloc_node(struct page **map_map,
+				 unsigned long pnum_begin,
+				 unsigned long pnum_end,
+				 unsigned long map_count, int nodeid)
+{
+	sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
+					 map_count, nodeid);
+}
+
+#ifndef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
 static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
 {
 	struct page *map;
@@ -407,6 +464,7 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
 	ms->section_mem_map = 0;
 	return NULL;
 }
+#endif
 
 void __attribute__((weak)) __meminit vmemmap_populate_print_last(void)
 {
@@ -420,12 +478,14 @@ void __init sparse_init(void)
 {
 	unsigned long pnum;
 	struct page *map;
+	struct page **map_map;
 	unsigned long *usemap;
 	unsigned long **usemap_map;
-	int size;
+	int size, size2;
 	int nodeid_begin = 0;
 	unsigned long pnum_begin = 0;
 	unsigned long usemap_count;
+	unsigned long map_count;
 
 	/*
 	 * map is using big page (aka 2M in x86 64 bit)
@@ -478,6 +538,48 @@ void __init sparse_init(void)
 	sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS,
 					 usemap_count, nodeid_begin);
 
+#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
+	size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
+	map_map = alloc_bootmem(size2);
+	if (!map_map)
+		panic("can not allocate map_map\n");
+
+	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
+		struct mem_section *ms;
+
+		if (!present_section_nr(pnum))
+			continue;
+		ms = __nr_to_section(pnum);
+		nodeid_begin = sparse_early_nid(ms);
+		pnum_begin = pnum;
+		break;
+	}
+	map_count = 1;
+	for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
+		struct mem_section *ms;
+		int nodeid;
+
+		if (!present_section_nr(pnum))
+			continue;
+		ms = __nr_to_section(pnum);
+		nodeid = sparse_early_nid(ms);
+		if (nodeid == nodeid_begin) {
+			map_count++;
+			continue;
+		}
+		/* ok, we need to take cake of from pnum_begin to pnum - 1*/
+		sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum,
+						 map_count, nodeid_begin);
+		/* new start, update count etc*/
+		nodeid_begin = nodeid;
+		pnum_begin = pnum;
+		map_count = 1;
+	}
+	/* ok, last chunk */
+	sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS,
+					 map_count, nodeid_begin);
+#endif
+
 	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
 		if (!present_section_nr(pnum))
 			continue;
@@ -486,7 +588,11 @@ void __init sparse_init(void)
 		if (!usemap)
 			continue;
 
+#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
+		map = map_map[pnum];
+#else
 		map = sparse_early_mem_map_alloc(pnum);
+#endif
 		if (!map)
 			continue;
 
@@ -496,6 +602,9 @@ void __init sparse_init(void)
 
 	vmemmap_populate_print_last();
 
+#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
+	free_bootmem(__pa(map_map), size2);
+#endif
 	free_bootmem(__pa(usemap_map), size);
 }
 
-- 
cgit v1.2.3


From 9b3be9f99203d9a400e8547f0e80f1d8f8e5738c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 10 Feb 2010 01:20:29 -0800
Subject: Move round_up/down to kernel.h

... in preparation of moving early_res to kernel/.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <1265793639-15071-26-git-send-email-yinghai@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/proto.h | 10 ----------
 include/linux/kernel.h       | 10 ++++++++++
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 4009f6534f52..6f414ed88620 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -23,14 +23,4 @@ extern int reboot_force;
 
 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
 
-/*
- * This looks more complex than it should be. But we need to
- * get the type for the ~ right in round_down (it needs to be
- * as wide as the result!), and we want to evaluate the macro
- * arguments just once each.
- */
-#define __round_mask(x,y) ((__typeof__(x))((y)-1))
-#define round_up(x,y) ((((x)-1) | __round_mask(x,y))+1)
-#define round_down(x,y) ((x) & ~__round_mask(x,y))
-
 #endif /* _ASM_X86_PROTO_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 328bca609b9b..d45e372fad81 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -44,6 +44,16 @@ extern const char linux_proc_banner[];
 
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
 
+/*
+ * This looks more complex than it should be. But we need to
+ * get the type for the ~ right in round_down (it needs to be
+ * as wide as the result!), and we want to evaluate the macro
+ * arguments just once each.
+ */
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+#define round_down(x, y) ((x) & ~__round_mask(x, y))
+
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
-- 
cgit v1.2.3


From e902ec9906e844f4613fa6190c6fa65f162dc86e Mon Sep 17 00:00:00 2001
From: Jiro SEKIBA <jir@unicus.jp>
Date: Sat, 30 Jan 2010 18:06:35 +0900
Subject: nilfs2: issue discard request after cleaning segments

This adds a function to send discard requests for given array of
segment numbers, and calls the function when garbage collection
succeeded.

Signed-off-by: Jiro SEKIBA <jir@unicus.jp>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 Documentation/filesystems/nilfs2.txt |  3 +++
 fs/nilfs2/segment.c                  | 10 ++++++++++
 fs/nilfs2/super.c                    |  8 +++++++-
 fs/nilfs2/the_nilfs.c                | 38 ++++++++++++++++++++++++++++++++++++
 fs/nilfs2/the_nilfs.h                |  1 +
 include/linux/nilfs2_fs.h            |  1 +
 6 files changed, 60 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt
index 839efd8a8a8c..cf6d0d85ca82 100644
--- a/Documentation/filesystems/nilfs2.txt
+++ b/Documentation/filesystems/nilfs2.txt
@@ -74,6 +74,9 @@ norecovery		Disable recovery of the filesystem on mount.
 			This disables every write access on the device for
 			read-only mounts or snapshots.  This option will fail
 			for r/w mounts on an unclean volume.
+discard			Issue discard/TRIM commands to the underlying block
+			device when blocks are freed.  This is useful for SSD
+			devices and sparse/thinly-provisioned LUNs.
 
 NILFS2 usage
 ============
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 105b508b47a8..9280b0f10792 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2560,6 +2560,16 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
 		set_current_state(TASK_INTERRUPTIBLE);
 		schedule_timeout(sci->sc_interval);
 	}
+	if (nilfs_test_opt(sbi, DISCARD)) {
+		int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs,
+						 sci->sc_nfreesegs);
+		if (ret) {
+			printk(KERN_WARNING
+			       "NILFS warning: error %d on discard request, "
+			       "turning discards off for the device\n", ret);
+			nilfs_clear_opt(sbi, DISCARD);
+		}
+	}
 
  out_unlock:
 	sci->sc_freesegs = NULL;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 8173faee31e6..3f88401a375b 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -481,6 +481,8 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 		seq_printf(seq, ",order=strict");
 	if (nilfs_test_opt(sbi, NORECOVERY))
 		seq_printf(seq, ",norecovery");
+	if (nilfs_test_opt(sbi, DISCARD))
+		seq_printf(seq, ",discard");
 
 	return 0;
 }
@@ -550,7 +552,7 @@ static const struct export_operations nilfs_export_ops = {
 enum {
 	Opt_err_cont, Opt_err_panic, Opt_err_ro,
 	Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery,
-	Opt_err,
+	Opt_discard, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -561,6 +563,7 @@ static match_table_t tokens = {
 	{Opt_snapshot, "cp=%u"},
 	{Opt_order, "order=%s"},
 	{Opt_norecovery, "norecovery"},
+	{Opt_discard, "discard"},
 	{Opt_err, NULL}
 };
 
@@ -614,6 +617,9 @@ static int parse_options(char *options, struct super_block *sb)
 		case Opt_norecovery:
 			nilfs_set_opt(sbi, NORECOVERY);
 			break;
+		case Opt_discard:
+			nilfs_set_opt(sbi, DISCARD);
+			break;
 		default:
 			printk(KERN_ERR
 			       "NILFS: Unrecognized mount option \"%s\"\n", p);
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 6241e1722efc..92733d5651d2 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -646,6 +646,44 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
 	goto out;
 }
 
+int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump,
+			    size_t nsegs)
+{
+	sector_t seg_start, seg_end;
+	sector_t start = 0, nblocks = 0;
+	unsigned int sects_per_block;
+	__u64 *sn;
+	int ret = 0;
+
+	sects_per_block = (1 << nilfs->ns_blocksize_bits) /
+		bdev_logical_block_size(nilfs->ns_bdev);
+	for (sn = segnump; sn < segnump + nsegs; sn++) {
+		nilfs_get_segment_range(nilfs, *sn, &seg_start, &seg_end);
+
+		if (!nblocks) {
+			start = seg_start;
+			nblocks = seg_end - seg_start + 1;
+		} else if (start + nblocks == seg_start) {
+			nblocks += seg_end - seg_start + 1;
+		} else {
+			ret = blkdev_issue_discard(nilfs->ns_bdev,
+						   start * sects_per_block,
+						   nblocks * sects_per_block,
+						   GFP_NOFS,
+						   DISCARD_FL_BARRIER);
+			if (ret < 0)
+				return ret;
+			nblocks = 0;
+		}
+	}
+	if (nblocks)
+		ret = blkdev_issue_discard(nilfs->ns_bdev,
+					   start * sects_per_block,
+					   nblocks * sects_per_block,
+					   GFP_NOFS, DISCARD_FL_BARRIER);
+	return ret;
+}
+
 int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks)
 {
 	struct inode *dat = nilfs_dat_inode(nilfs);
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 589786e33464..fd057f8ad439 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -221,6 +221,7 @@ struct the_nilfs *find_or_create_nilfs(struct block_device *);
 void put_nilfs(struct the_nilfs *);
 int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *);
 int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *);
+int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t);
 int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
 struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64);
 int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int);
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 3fe02cf8b65a..640702e97457 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -153,6 +153,7 @@ struct nilfs_super_root {
 						   semantics also for data */
 #define NILFS_MOUNT_NORECOVERY		0x4000  /* Disable write access during
 						   mount-time recovery */
+#define NILFS_MOUNT_DISCARD		0x8000  /* Issue DISCARD requests */
 
 
 /**
-- 
cgit v1.2.3


From dfff0615d28bdb3e8d213e5537dd069265912667 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Fri, 12 Feb 2010 21:58:11 +0100
Subject: tree-wide: fix typos "ass?o[sc]iac?te" -> "associate" in comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/usb/musb/davinci.c | 2 +-
 drivers/zorro/zorro.ids    | 2 +-
 include/net/irda/irttp.h   | 2 +-
 kernel/irq/chip.c          | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/musb/davinci.c b/drivers/usb/musb/davinci.c
index 66913811af5e..a883f9dd3f8a 100644
--- a/drivers/usb/musb/davinci.c
+++ b/drivers/usb/musb/davinci.c
@@ -274,7 +274,7 @@ static irqreturn_t davinci_interrupt(int irq, void *__hci)
 	/* NOTE: DaVinci shadows the Mentor IRQs.  Don't manage them through
 	 * the Mentor registers (except for setup), use the TI ones and EOI.
 	 *
-	 * Docs describe irq "vector" registers asociated with the CPPI and
+	 * Docs describe irq "vector" registers associated with the CPPI and
 	 * USB EOI registers.  These hold a bitmask corresponding to the
 	 * current IRQ, not an irq handler address.  Would using those bits
 	 * resolve some of the races observed in this dispatch code??
diff --git a/drivers/zorro/zorro.ids b/drivers/zorro/zorro.ids
index 0c0f99e2dd62..de24e3decedd 100644
--- a/drivers/zorro/zorro.ids
+++ b/drivers/zorro/zorro.ids
@@ -108,7 +108,7 @@
 	0c00  500XP/SupraDrive WordSync [SCSI Host Adapter]
 	0d00  SupraDrive WordSync II [SCSI Host Adapter]
 	1000  2400zi+ [Modem]
-0422  Computer Systems Assosiates
+0422  Computer Systems Associates
 	1100  Magnum 40 [Accelerator and SCSI Host Adapter]
 	1500  12 Gauge [SCSI Host Adapter]
 0439  Marc Michael Groth
diff --git a/include/net/irda/irttp.h b/include/net/irda/irttp.h
index 0788c23d2828..11aee7a2972a 100644
--- a/include/net/irda/irttp.h
+++ b/include/net/irda/irttp.h
@@ -97,7 +97,7 @@
 #define TTP_MAX_SDU_SIZE 0x01
 
 /*
- *  This structure contains all data assosiated with one instance of a TTP 
+ *  This structure contains all data associated with one instance of a TTP 
  *  connection.
  */
 struct tsap_cb {
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index ecc3fa28f666..ec8a96382461 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -520,7 +520,7 @@ out:
  *	signal. The occurence is latched into the irq controller hardware
  *	and must be acked in order to be reenabled. After the ack another
  *	interrupt can happen on the same source even before the first one
- *	is handled by the assosiacted event handler. If this happens it
+ *	is handled by the associated event handler. If this happens it
  *	might be necessary to disable (mask) the interrupt depending on the
  *	controller hardware. This requires to reenable the interrupt inside
  *	of the loop which handles the interrupts which have arrived while
-- 
cgit v1.2.3


From 43cf38eb5cea91245502df3fcee4dbfc1c74dd1c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 2 Feb 2010 14:38:57 +0900
Subject: percpu: add __percpu sparse annotations to core kernel subsystems

Add __percpu sparse annotations to core subsystems.

These annotations are to make sparse consider percpu variables to be
in a different address space and warn if accessed without going
through percpu accessors.  This patch doesn't affect normal builds.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-mm@kvack.org
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Eric Biederman <ebiederm@xmission.com>
---
 include/linux/blktrace_api.h   |  4 ++--
 include/linux/genhd.h          |  2 +-
 include/linux/kexec.h          |  2 +-
 include/linux/mmzone.h         |  2 +-
 include/linux/module.h         |  2 +-
 include/linux/percpu_counter.h |  2 +-
 include/linux/srcu.h           |  2 +-
 kernel/kexec.c                 |  2 +-
 kernel/sched.c                 |  4 ++--
 kernel/stop_machine.c          |  2 +-
 mm/percpu.c                    | 18 ++++++++++--------
 11 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 3b73b9992b26..416bf62d6d46 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -150,8 +150,8 @@ struct blk_user_trace_setup {
 struct blk_trace {
 	int trace_state;
 	struct rchan *rchan;
-	unsigned long *sequence;
-	unsigned char *msg_data;
+	unsigned long __percpu *sequence;
+	unsigned char __percpu *msg_data;
 	u16 act_mask;
 	u64 start_lba;
 	u64 end_lba;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 9717081c75ad..56b50514ab25 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -101,7 +101,7 @@ struct hd_struct {
 	unsigned long stamp;
 	int in_flight[2];
 #ifdef	CONFIG_SMP
-	struct disk_stats *dkstats;
+	struct disk_stats __percpu *dkstats;
 #else
 	struct disk_stats dkstats;
 #endif
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index c356b6914ffd..03e8e8dbc577 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -199,7 +199,7 @@ extern struct kimage *kexec_crash_image;
  */
 extern struct resource crashk_res;
 typedef u32 note_buf_t[KEXEC_NOTE_BYTES/4];
-extern note_buf_t *crash_notes;
+extern note_buf_t __percpu *crash_notes;
 extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
 extern size_t vmcoreinfo_size;
 extern size_t vmcoreinfo_max_size;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7874201a3556..41acd4bf7664 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -301,7 +301,7 @@ struct zone {
 	unsigned long		min_unmapped_pages;
 	unsigned long		min_slab_pages;
 #endif
-	struct per_cpu_pageset	*pageset;
+	struct per_cpu_pageset __percpu *pageset;
 	/*
 	 * free areas of different sizes
 	 */
diff --git a/include/linux/module.h b/include/linux/module.h
index 7e74ae0051cc..dd618eb026aa 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -365,7 +365,7 @@ struct module
 
 	struct module_ref {
 		int count;
-	} *refptr;
+	} __percpu *refptr;
 #endif
 
 #ifdef CONFIG_CONSTRUCTORS
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index a7684a513994..9bd103c844ee 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -21,7 +21,7 @@ struct percpu_counter {
 #ifdef CONFIG_HOTPLUG_CPU
 	struct list_head list;	/* All percpu_counters are on a list */
 #endif
-	s32 *counters;
+	s32 __percpu *counters;
 };
 
 extern int percpu_counter_batch;
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 4765d97dcafb..41eedccc962c 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -33,7 +33,7 @@ struct srcu_struct_array {
 
 struct srcu_struct {
 	int completed;
-	struct srcu_struct_array *per_cpu_ref;
+	struct srcu_struct_array __percpu *per_cpu_ref;
 	struct mutex mutex;
 };
 
diff --git a/kernel/kexec.c b/kernel/kexec.c
index ef077fb73155..87ebe8adc474 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -41,7 +41,7 @@
 #include <asm/sections.h>
 
 /* Per cpu memory for storing cpu states in case of system crash. */
-note_buf_t* crash_notes;
+note_buf_t __percpu *crash_notes;
 
 /* vmcoreinfo stuff */
 static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
diff --git a/kernel/sched.c b/kernel/sched.c
index 3a8fb30a91b1..978edfd35a96 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1566,7 +1566,7 @@ static unsigned long cpu_avg_load_per_task(int cpu)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-static __read_mostly unsigned long *update_shares_data;
+static __read_mostly unsigned long __percpu *update_shares_data;
 
 static void __set_se_shares(struct sched_entity *se, unsigned long shares);
 
@@ -10683,7 +10683,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 struct cpuacct {
 	struct cgroup_subsys_state css;
 	/* cpuusage holds pointer to a u64-type object on every cpu */
-	u64 *cpuusage;
+	u64 __percpu *cpuusage;
 	struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
 	struct cpuacct *parent;
 };
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 912823e2a11b..9bb9fb1bd79c 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -45,7 +45,7 @@ static int refcount;
 static struct workqueue_struct *stop_machine_wq;
 static struct stop_machine_data active, idle;
 static const struct cpumask *active_cpus;
-static void *stop_machine_work;
+static void __percpu *stop_machine_work;
 
 static void set_state(enum stopmachine_state newstate)
 {
diff --git a/mm/percpu.c b/mm/percpu.c
index b336638d20e7..768419d44ad7 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -80,13 +80,15 @@
 /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
 #ifndef __addr_to_pcpu_ptr
 #define __addr_to_pcpu_ptr(addr)					\
-	(void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr	\
-		 + (unsigned long)__per_cpu_start)
+	(void __percpu *)((unsigned long)(addr) -			\
+			  (unsigned long)pcpu_base_addr	+		\
+			  (unsigned long)__per_cpu_start)
 #endif
 #ifndef __pcpu_ptr_to_addr
 #define __pcpu_ptr_to_addr(ptr)						\
-	(void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr	\
-		 - (unsigned long)__per_cpu_start)
+	(void __force *)((unsigned long)(ptr) +				\
+			 (unsigned long)pcpu_base_addr -		\
+			 (unsigned long)__per_cpu_start)
 #endif
 
 struct pcpu_chunk {
@@ -1065,7 +1067,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void)
  * RETURNS:
  * Percpu pointer to the allocated area on success, NULL on failure.
  */
-static void *pcpu_alloc(size_t size, size_t align, bool reserved)
+static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
 {
 	static int warn_limit = 10;
 	struct pcpu_chunk *chunk;
@@ -1194,7 +1196,7 @@ fail_unlock_mutex:
  * RETURNS:
  * Percpu pointer to the allocated area on success, NULL on failure.
  */
-void *__alloc_percpu(size_t size, size_t align)
+void __percpu *__alloc_percpu(size_t size, size_t align)
 {
 	return pcpu_alloc(size, align, false);
 }
@@ -1215,7 +1217,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu);
  * RETURNS:
  * Percpu pointer to the allocated area on success, NULL on failure.
  */
-void *__alloc_reserved_percpu(size_t size, size_t align)
+void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
 {
 	return pcpu_alloc(size, align, true);
 }
@@ -1267,7 +1269,7 @@ static void pcpu_reclaim(struct work_struct *work)
  * CONTEXT:
  * Can be called from atomic context.
  */
-void free_percpu(void *ptr)
+void free_percpu(void __percpu *ptr)
 {
 	void *addr;
 	struct pcpu_chunk *chunk;
-- 
cgit v1.2.3


From 003cb608a2533d0927a83bc4e07e46d7a622eda9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 2 Feb 2010 14:39:01 +0900
Subject: percpu: add __percpu sparse annotations to fs

Add __percpu sparse annotations to fs.

These annotations are to make sparse consider percpu variables to be
in a different address space and warn if accessed without going
through percpu accessors.  This patch doesn't affect normal builds.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Alex Elder <aelder@sgi.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/ext4.h            | 2 +-
 fs/nfs/iostat.h           | 4 ++--
 fs/xfs/xfs_mount.h        | 2 +-
 include/linux/mount.h     | 2 +-
 include/linux/nfs_fs_sb.h | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 874d169a193e..4cedc91ec59d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1014,7 +1014,7 @@ struct ext4_sb_info {
 	atomic_t s_lock_busy;
 
 	/* locality groups */
-	struct ext4_locality_group *s_locality_groups;
+	struct ext4_locality_group __percpu *s_locality_groups;
 
 	/* for write statistics */
 	unsigned long s_sectors_written_start;
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index 46d779abafd3..1d8d5c813b01 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -57,12 +57,12 @@ static inline void nfs_add_fscache_stats(struct inode *inode,
 }
 #endif
 
-static inline struct nfs_iostats *nfs_alloc_iostats(void)
+static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void)
 {
 	return alloc_percpu(struct nfs_iostats);
 }
 
-static inline void nfs_free_iostats(struct nfs_iostats *stats)
+static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats)
 {
 	if (stats != NULL)
 		free_percpu(stats);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1df7e4502967..24c88870cdb2 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -243,7 +243,7 @@ typedef struct xfs_mount {
 	struct xfs_qmops	*m_qm_ops;	/* vector of XQM ops */
 	atomic_t		m_active_trans;	/* number trans frozen */
 #ifdef HAVE_PERCPU_SB
-	xfs_icsb_cnts_t		*m_sb_cnts;	/* per-cpu superblock counters */
+	xfs_icsb_cnts_t __percpu *m_sb_cnts;	/* per-cpu superblock counters */
 	unsigned long		m_icsb_counters; /* disabled per-cpu counters */
 	struct notifier_block	m_icsb_notifier; /* hotplug cpu notifier */
 	struct mutex		m_icsb_mutex;	/* balancer sync lock */
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 5d5275364867..b5f43a34ef88 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -66,7 +66,7 @@ struct vfsmount {
 	int mnt_pinned;
 	int mnt_ghosts;
 #ifdef CONFIG_SMP
-	int *mnt_writers;
+	int __percpu *mnt_writers;
 #else
 	int mnt_writers;
 #endif
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 34fc6be5bfcf..6a2e44fd75e2 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -105,7 +105,7 @@ struct nfs_server {
 	struct rpc_clnt *	client;		/* RPC client handle */
 	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
 	struct nlm_host		*nlm_host;	/* NLM client handle */
-	struct nfs_iostats *	io_stats;	/* I/O statistics */
+	struct nfs_iostats __percpu *io_stats;	/* I/O statistics */
 	struct backing_dev_info	backing_dev_info;
 	atomic_long_t		writeback;	/* number of writeback pages */
 	int			flags;		/* various flags */
-- 
cgit v1.2.3


From a29d8b8e2d811a24bbe49215a0f0c536b72ebc18 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 2 Feb 2010 14:39:15 +0900
Subject: percpu: add __percpu sparse annotations to what's left

Add __percpu sparse annotations to places which didn't make it in one
of the previous patches.  All converions are trivial.

These annotations are to make sparse consider percpu variables to be
in a different address space and warn if accessed without going
through percpu accessors.  This patch doesn't affect normal builds.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Borislav Petkov <borislav.petkov@amd.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Neil Brown <neilb@suse.de>
---
 crypto/cryptd.c                  | 2 +-
 drivers/acpi/processor_perflib.c | 2 +-
 drivers/dma/dmaengine.c          | 2 +-
 drivers/edac/amd64_edac.c        | 2 +-
 drivers/md/raid5.c               | 2 +-
 drivers/md/raid5.h               | 2 +-
 include/acpi/processor.h         | 2 +-
 include/linux/dmaengine.h        | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 704c14115323..ef71318976c7 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -31,7 +31,7 @@ struct cryptd_cpu_queue {
 };
 
 struct cryptd_queue {
-	struct cryptd_cpu_queue *cpu_queue;
+	struct cryptd_cpu_queue __percpu *cpu_queue;
 };
 
 struct cryptd_instance_ctx {
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 2cabadcc4d8c..8c6a6497d7f3 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -557,7 +557,7 @@ end:
 }
 
 int acpi_processor_preregister_performance(
-		struct acpi_processor_performance *performance)
+		struct acpi_processor_performance __percpu *performance)
 {
 	int count, count_target;
 	int retval = 0;
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 6f51a0a7a8bb..4eadd98cea53 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -284,7 +284,7 @@ struct dma_chan_tbl_ent {
 /**
  * channel_table - percpu lookup table for memory-to-memory offload providers
  */
-static struct dma_chan_tbl_ent *channel_table[DMA_TX_TYPE_END];
+static struct dma_chan_tbl_ent __percpu *channel_table[DMA_TX_TYPE_END];
 
 static int __init dma_channel_table_init(void)
 {
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 000dc67b85b7..7b36c8838b2f 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -13,7 +13,7 @@ module_param(report_gart_errors, int, 0644);
 static int ecc_enable_override;
 module_param(ecc_enable_override, int, 0644);
 
-static struct msr *msrs;
+static struct msr __percpu *msrs;
 
 /* Lookup table for all possible MC control instances */
 struct amd64_pvt;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index e84204eb12df..77cb3ab4bf45 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4680,7 +4680,7 @@ static int raid5_alloc_percpu(raid5_conf_t *conf)
 {
 	unsigned long cpu;
 	struct page *spare_page;
-	struct raid5_percpu *allcpus;
+	struct raid5_percpu __percpu *allcpus;
 	void *scribble;
 	int err;
 
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index dd708359b451..0f86f5e36724 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -405,7 +405,7 @@ struct raid5_private_data {
 					      * lists and performing address
 					      * conversions
 					      */
-	} *percpu;
+	} __percpu *percpu;
 	size_t			scribble_len; /* size of scribble region must be
 					       * associated with conf to handle
 					       * cpu hotplug while reshaping
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 0ea5ef4eb6a9..477544fd8e9e 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -238,7 +238,7 @@ struct acpi_processor_errata {
 
 extern int acpi_processor_preregister_performance(struct
 						  acpi_processor_performance
-						  *performance);
+						  __percpu *performance);
 
 extern int acpi_processor_register_performance(struct acpi_processor_performance
 					       *performance, unsigned int cpu);
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 78784982b33e..21fd9b7c6a40 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -162,7 +162,7 @@ struct dma_chan {
 	struct dma_chan_dev *dev;
 
 	struct list_head device_node;
-	struct dma_chan_percpu *local;
+	struct dma_chan_percpu __percpu *local;
 	int client_count;
 	int table_count;
 	void *private;
-- 
cgit v1.2.3


From 580e0ad21d6d6f932461d24b47041e3dd499c23f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 16 Feb 2010 18:40:35 -0800
Subject: core: Move early_res from arch/x86 to kernel/

This makes the range reservation feature available to other
architectures.

-v2: add get_max_mapped, max_pfn_mapped only defined in x86...
     to fix PPC compiling
-v3: according to hpa, add CONFIG_HAVE_EARLY_RES
-v4: fix typo about EARLY_RES in config

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4B7B5723.4070009@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/Kconfig                 |   3 +
 arch/x86/include/asm/e820.h      |   2 +-
 arch/x86/include/asm/early_res.h |  21 --
 arch/x86/kernel/Makefile         |   2 +-
 arch/x86/kernel/e820.c           |  10 +-
 arch/x86/kernel/early_res.c      | 521 ---------------------------------------
 include/linux/early_res.h        |  22 ++
 kernel/Makefile                  |   1 +
 kernel/early_res.c               | 513 ++++++++++++++++++++++++++++++++++++++
 9 files changed, 550 insertions(+), 545 deletions(-)
 delete mode 100644 arch/x86/include/asm/early_res.h
 delete mode 100644 arch/x86/kernel/early_res.c
 create mode 100644 include/linux/early_res.h
 create mode 100644 kernel/early_res.c

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 29f9efb74fc7..0e9f8b10de52 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -183,6 +183,9 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING
 config ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	def_bool y
 
+config HAVE_EARLY_RES
+	def_bool y
+
 config HAVE_INTEL_TXT
 	def_bool y
 	depends on EXPERIMENTAL && DMAR && ACPI
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index a8299e134437..0e22296790d3 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -112,7 +112,7 @@ extern unsigned long end_user_pfn;
 extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
 extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
-#include <asm/early_res.h>
+#include <linux/early_res.h>
 
 extern unsigned long e820_end_of_ram_pfn(void);
 extern unsigned long e820_end_of_low_ram_pfn(void);
diff --git a/arch/x86/include/asm/early_res.h b/arch/x86/include/asm/early_res.h
deleted file mode 100644
index 9758f3df9dad..000000000000
--- a/arch/x86/include/asm/early_res.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef _ASM_X86_EARLY_RES_H
-#define _ASM_X86_EARLY_RES_H
-#ifdef __KERNEL__
-
-extern void reserve_early(u64 start, u64 end, char *name);
-extern void reserve_early_overlap_ok(u64 start, u64 end, char *name);
-extern void free_early(u64 start, u64 end);
-extern void early_res_to_bootmem(u64 start, u64 end);
-
-void reserve_early_without_check(u64 start, u64 end, char *name);
-u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
-			 u64 size, u64 align);
-u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start,
-			 u64 *sizep, u64 align);
-u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align);
-#include <linux/range.h>
-int get_free_all_memory_range(struct range **rangep, int nodeid);
-
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_X86_EARLY_RES_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index f5fb9f0b6277..d87f09bc5a52 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -38,7 +38,7 @@ obj-$(CONFIG_X86_32)	+= probe_roms_32.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o
-obj-y			+= bootflag.o e820.o early_res.o
+obj-y			+= bootflag.o e820.o
 obj-y			+= pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
 obj-y			+= alternative.o i8253.o pci-nommu.o hw_breakpoint.o
 obj-y			+= tsc.o io_delay.o rtc.o
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 36918d8463ab..740b440fbd73 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -17,7 +17,6 @@
 #include <linux/firmware-map.h>
 
 #include <asm/e820.h>
-#include <asm/early_res.h>
 #include <asm/proto.h>
 #include <asm/setup.h>
 
@@ -752,6 +751,15 @@ u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align)
 {
 	return find_e820_area(start, end, size, align);
 }
+
+u64 __init get_max_mapped(void)
+{
+	u64 end = max_pfn_mapped;
+
+	end <<= PAGE_SHIFT;
+
+	return end;
+}
 /*
  * Find next free range after *start
  */
diff --git a/arch/x86/kernel/early_res.c b/arch/x86/kernel/early_res.c
deleted file mode 100644
index 1458dc022343..000000000000
--- a/arch/x86/kernel/early_res.c
+++ /dev/null
@@ -1,521 +0,0 @@
-/*
- * early_res, could be used to replace bootmem
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/mm.h>
-
-#include <asm/early_res.h>
-
-/*
- * Early reserved memory areas.
- */
-/*
- * need to make sure this one is bigger enough before
- * find_fw_memmap_area could be used
- */
-#define MAX_EARLY_RES_X 32
-
-struct early_res {
-	u64 start, end;
-	char name[15];
-	char overlap_ok;
-};
-static struct early_res early_res_x[MAX_EARLY_RES_X] __initdata;
-
-static int max_early_res __initdata = MAX_EARLY_RES_X;
-static struct early_res *early_res __initdata = &early_res_x[0];
-static int early_res_count __initdata;
-
-static int __init find_overlapped_early(u64 start, u64 end)
-{
-	int i;
-	struct early_res *r;
-
-	for (i = 0; i < max_early_res && early_res[i].end; i++) {
-		r = &early_res[i];
-		if (end > r->start && start < r->end)
-			break;
-	}
-
-	return i;
-}
-
-/*
- * Drop the i-th range from the early reservation map,
- * by copying any higher ranges down one over it, and
- * clearing what had been the last slot.
- */
-static void __init drop_range(int i)
-{
-	int j;
-
-	for (j = i + 1; j < max_early_res && early_res[j].end; j++)
-		;
-
-	memmove(&early_res[i], &early_res[i + 1],
-	       (j - 1 - i) * sizeof(struct early_res));
-
-	early_res[j - 1].end = 0;
-	early_res_count--;
-}
-
-/*
- * Split any existing ranges that:
- *  1) are marked 'overlap_ok', and
- *  2) overlap with the stated range [start, end)
- * into whatever portion (if any) of the existing range is entirely
- * below or entirely above the stated range.  Drop the portion
- * of the existing range that overlaps with the stated range,
- * which will allow the caller of this routine to then add that
- * stated range without conflicting with any existing range.
- */
-static void __init drop_overlaps_that_are_ok(u64 start, u64 end)
-{
-	int i;
-	struct early_res *r;
-	u64 lower_start, lower_end;
-	u64 upper_start, upper_end;
-	char name[15];
-
-	for (i = 0; i < max_early_res && early_res[i].end; i++) {
-		r = &early_res[i];
-
-		/* Continue past non-overlapping ranges */
-		if (end <= r->start || start >= r->end)
-			continue;
-
-		/*
-		 * Leave non-ok overlaps as is; let caller
-		 * panic "Overlapping early reservations"
-		 * when it hits this overlap.
-		 */
-		if (!r->overlap_ok)
-			return;
-
-		/*
-		 * We have an ok overlap.  We will drop it from the early
-		 * reservation map, and add back in any non-overlapping
-		 * portions (lower or upper) as separate, overlap_ok,
-		 * non-overlapping ranges.
-		 */
-
-		/* 1. Note any non-overlapping (lower or upper) ranges. */
-		strncpy(name, r->name, sizeof(name) - 1);
-
-		lower_start = lower_end = 0;
-		upper_start = upper_end = 0;
-		if (r->start < start) {
-			lower_start = r->start;
-			lower_end = start;
-		}
-		if (r->end > end) {
-			upper_start = end;
-			upper_end = r->end;
-		}
-
-		/* 2. Drop the original ok overlapping range */
-		drop_range(i);
-
-		i--;		/* resume for-loop on copied down entry */
-
-		/* 3. Add back in any non-overlapping ranges. */
-		if (lower_end)
-			reserve_early_overlap_ok(lower_start, lower_end, name);
-		if (upper_end)
-			reserve_early_overlap_ok(upper_start, upper_end, name);
-	}
-}
-
-static void __init __reserve_early(u64 start, u64 end, char *name,
-						int overlap_ok)
-{
-	int i;
-	struct early_res *r;
-
-	i = find_overlapped_early(start, end);
-	if (i >= max_early_res)
-		panic("Too many early reservations");
-	r = &early_res[i];
-	if (r->end)
-		panic("Overlapping early reservations "
-		      "%llx-%llx %s to %llx-%llx %s\n",
-		      start, end - 1, name ? name : "", r->start,
-		      r->end - 1, r->name);
-	r->start = start;
-	r->end = end;
-	r->overlap_ok = overlap_ok;
-	if (name)
-		strncpy(r->name, name, sizeof(r->name) - 1);
-	early_res_count++;
-}
-
-/*
- * A few early reservtations come here.
- *
- * The 'overlap_ok' in the name of this routine does -not- mean it
- * is ok for these reservations to overlap an earlier reservation.
- * Rather it means that it is ok for subsequent reservations to
- * overlap this one.
- *
- * Use this entry point to reserve early ranges when you are doing
- * so out of "Paranoia", reserving perhaps more memory than you need,
- * just in case, and don't mind a subsequent overlapping reservation
- * that is known to be needed.
- *
- * The drop_overlaps_that_are_ok() call here isn't really needed.
- * It would be needed if we had two colliding 'overlap_ok'
- * reservations, so that the second such would not panic on the
- * overlap with the first.  We don't have any such as of this
- * writing, but might as well tolerate such if it happens in
- * the future.
- */
-void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
-{
-	drop_overlaps_that_are_ok(start, end);
-	__reserve_early(start, end, name, 1);
-}
-
-u64 __init __weak find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align)
-{
-	panic("should have find_fw_memmap_area defined with arch");
-
-	return -1ULL;
-}
-
-static void __init __check_and_double_early_res(u64 ex_start, u64 ex_end)
-{
-	u64 start, end, size, mem;
-	struct early_res *new;
-
-	/* do we have enough slots left ? */
-	if ((max_early_res - early_res_count) > max(max_early_res/8, 2))
-		return;
-
-	/* double it */
-	mem = -1ULL;
-	size = sizeof(struct early_res) * max_early_res * 2;
-	if (early_res == early_res_x)
-		start = 0;
-	else
-		start = early_res[0].end;
-	end = ex_start;
-	if (start + size < end)
-		mem = find_fw_memmap_area(start, end, size,
-					 sizeof(struct early_res));
-	if (mem == -1ULL) {
-		start = ex_end;
-		end = max_pfn_mapped << PAGE_SHIFT;
-		if (start + size < end)
-			mem = find_fw_memmap_area(start, end, size,
-						 sizeof(struct early_res));
-	}
-	if (mem == -1ULL)
-		panic("can not find more space for early_res array");
-
-	new = __va(mem);
-	/* save the first one for own */
-	new[0].start = mem;
-	new[0].end = mem + size;
-	new[0].overlap_ok = 0;
-	/* copy old to new */
-	if (early_res == early_res_x) {
-		memcpy(&new[1], &early_res[0],
-			 sizeof(struct early_res) * max_early_res);
-		memset(&new[max_early_res+1], 0,
-			 sizeof(struct early_res) * (max_early_res - 1));
-		early_res_count++;
-	} else {
-		memcpy(&new[1], &early_res[1],
-			 sizeof(struct early_res) * (max_early_res - 1));
-		memset(&new[max_early_res], 0,
-			 sizeof(struct early_res) * max_early_res);
-	}
-	memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res);
-	early_res = new;
-	max_early_res *= 2;
-	printk(KERN_DEBUG "early_res array is doubled to %d at [%llx - %llx]\n",
-		max_early_res, mem, mem + size - 1);
-}
-
-/*
- * Most early reservations come here.
- *
- * We first have drop_overlaps_that_are_ok() drop any pre-existing
- * 'overlap_ok' ranges, so that we can then reserve this memory
- * range without risk of panic'ing on an overlapping overlap_ok
- * early reservation.
- */
-void __init reserve_early(u64 start, u64 end, char *name)
-{
-	if (start >= end)
-		return;
-
-	__check_and_double_early_res(start, end);
-
-	drop_overlaps_that_are_ok(start, end);
-	__reserve_early(start, end, name, 0);
-}
-
-void __init reserve_early_without_check(u64 start, u64 end, char *name)
-{
-	struct early_res *r;
-
-	if (start >= end)
-		return;
-
-	__check_and_double_early_res(start, end);
-
-	r = &early_res[early_res_count];
-
-	r->start = start;
-	r->end = end;
-	r->overlap_ok = 0;
-	if (name)
-		strncpy(r->name, name, sizeof(r->name) - 1);
-	early_res_count++;
-}
-
-void __init free_early(u64 start, u64 end)
-{
-	struct early_res *r;
-	int i;
-
-	i = find_overlapped_early(start, end);
-	r = &early_res[i];
-	if (i >= max_early_res || r->end != end || r->start != start)
-		panic("free_early on not reserved area: %llx-%llx!",
-			 start, end - 1);
-
-	drop_range(i);
-}
-
-#ifdef CONFIG_NO_BOOTMEM
-static void __init subtract_early_res(struct range *range, int az)
-{
-	int i, count;
-	u64 final_start, final_end;
-	int idx = 0;
-
-	count  = 0;
-	for (i = 0; i < max_early_res && early_res[i].end; i++)
-		count++;
-
-	/* need to skip first one ?*/
-	if (early_res != early_res_x)
-		idx = 1;
-
-#define DEBUG_PRINT_EARLY_RES 1
-
-#if DEBUG_PRINT_EARLY_RES
-	printk(KERN_INFO "Subtract (%d early reservations)\n", count);
-#endif
-	for (i = idx; i < count; i++) {
-		struct early_res *r = &early_res[i];
-#if DEBUG_PRINT_EARLY_RES
-		printk(KERN_INFO "  #%d [%010llx - %010llx] %15s\n", i,
-			r->start, r->end, r->name);
-#endif
-		final_start = PFN_DOWN(r->start);
-		final_end = PFN_UP(r->end);
-		if (final_start >= final_end)
-			continue;
-		subtract_range(range, az, final_start, final_end);
-	}
-
-}
-
-int __init get_free_all_memory_range(struct range **rangep, int nodeid)
-{
-	int i, count;
-	u64 start = 0, end;
-	u64 size;
-	u64 mem;
-	struct range *range;
-	int nr_range;
-
-	count  = 0;
-	for (i = 0; i < max_early_res && early_res[i].end; i++)
-		count++;
-
-	count *= 2;
-
-	size = sizeof(struct range) * count;
-#ifdef MAX_DMA32_PFN
-	if (max_pfn_mapped > MAX_DMA32_PFN)
-		start = MAX_DMA32_PFN << PAGE_SHIFT;
-#endif
-	end = max_pfn_mapped << PAGE_SHIFT;
-	mem = find_fw_memmap_area(start, end, size, sizeof(struct range));
-	if (mem == -1ULL)
-		panic("can not find more space for range free");
-
-	range = __va(mem);
-	/* use early_node_map[] and early_res to get range array at first */
-	memset(range, 0, size);
-	nr_range = 0;
-
-	/* need to go over early_node_map to find out good range for node */
-	nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
-#ifdef CONFIG_X86_32
-	subtract_range(range, count, max_low_pfn, -1ULL);
-#endif
-	subtract_early_res(range, count);
-	nr_range = clean_sort_range(range, count);
-
-	/* need to clear it ? */
-	if (nodeid == MAX_NUMNODES) {
-		memset(&early_res[0], 0,
-			 sizeof(struct early_res) * max_early_res);
-		early_res = NULL;
-		max_early_res = 0;
-	}
-
-	*rangep = range;
-	return nr_range;
-}
-#else
-void __init early_res_to_bootmem(u64 start, u64 end)
-{
-	int i, count;
-	u64 final_start, final_end;
-	int idx = 0;
-
-	count  = 0;
-	for (i = 0; i < max_early_res && early_res[i].end; i++)
-		count++;
-
-	/* need to skip first one ?*/
-	if (early_res != early_res_x)
-		idx = 1;
-
-	printk(KERN_INFO "(%d/%d early reservations) ==> bootmem [%010llx - %010llx]\n",
-			 count - idx, max_early_res, start, end);
-	for (i = idx; i < count; i++) {
-		struct early_res *r = &early_res[i];
-		printk(KERN_INFO "  #%d [%010llx - %010llx] %16s", i,
-			r->start, r->end, r->name);
-		final_start = max(start, r->start);
-		final_end = min(end, r->end);
-		if (final_start >= final_end) {
-			printk(KERN_CONT "\n");
-			continue;
-		}
-		printk(KERN_CONT " ==> [%010llx - %010llx]\n",
-			final_start, final_end);
-		reserve_bootmem_generic(final_start, final_end - final_start,
-				BOOTMEM_DEFAULT);
-	}
-	/* clear them */
-	memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res);
-	early_res = NULL;
-	max_early_res = 0;
-	early_res_count = 0;
-}
-#endif
-
-/* Check for already reserved areas */
-static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
-{
-	int i;
-	u64 addr = *addrp;
-	int changed = 0;
-	struct early_res *r;
-again:
-	i = find_overlapped_early(addr, addr + size);
-	r = &early_res[i];
-	if (i < max_early_res && r->end) {
-		*addrp = addr = round_up(r->end, align);
-		changed = 1;
-		goto again;
-	}
-	return changed;
-}
-
-/* Check for already reserved areas */
-static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
-{
-	int i;
-	u64 addr = *addrp, last;
-	u64 size = *sizep;
-	int changed = 0;
-again:
-	last = addr + size;
-	for (i = 0; i < max_early_res && early_res[i].end; i++) {
-		struct early_res *r = &early_res[i];
-		if (last > r->start && addr < r->start) {
-			size = r->start - addr;
-			changed = 1;
-			goto again;
-		}
-		if (last > r->end && addr < r->end) {
-			addr = round_up(r->end, align);
-			size = last - addr;
-			changed = 1;
-			goto again;
-		}
-		if (last <= r->end && addr >= r->start) {
-			(*sizep)++;
-			return 0;
-		}
-	}
-	if (changed) {
-		*addrp = addr;
-		*sizep = size;
-	}
-	return changed;
-}
-
-/*
- * Find a free area with specified alignment in a specific range.
- * only with the area.between start to end is active range from early_node_map
- * so they are good as RAM
- */
-u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
-			 u64 size, u64 align)
-{
-	u64 addr, last;
-
-	addr = round_up(ei_start, align);
-	if (addr < start)
-		addr = round_up(start, align);
-	if (addr >= ei_last)
-		goto out;
-	while (bad_addr(&addr, size, align) && addr+size <= ei_last)
-		;
-	last = addr + size;
-	if (last > ei_last)
-		goto out;
-	if (last > end)
-		goto out;
-
-	return addr;
-
-out:
-	return -1ULL;
-}
-
-u64 __init find_early_area_size(u64 ei_start, u64 ei_last, u64 start,
-			 u64 *sizep, u64 align)
-{
-	u64 addr, last;
-
-	addr = round_up(ei_start, align);
-	if (addr < start)
-		addr = round_up(start, align);
-	if (addr >= ei_last)
-		goto out;
-	*sizep = ei_last - addr;
-	while (bad_addr_size(&addr, sizep, align) && addr + *sizep <= ei_last)
-		;
-	last = addr + *sizep;
-	if (last > ei_last)
-		goto out;
-
-	return addr;
-
-out:
-	return -1ULL;
-}
diff --git a/include/linux/early_res.h b/include/linux/early_res.h
new file mode 100644
index 000000000000..50f7663bb8b1
--- /dev/null
+++ b/include/linux/early_res.h
@@ -0,0 +1,22 @@
+#ifndef _LINUX_EARLY_RES_H
+#define _LINUX_EARLY_RES_H
+#ifdef __KERNEL__
+
+extern void reserve_early(u64 start, u64 end, char *name);
+extern void reserve_early_overlap_ok(u64 start, u64 end, char *name);
+extern void free_early(u64 start, u64 end);
+extern void early_res_to_bootmem(u64 start, u64 end);
+
+void reserve_early_without_check(u64 start, u64 end, char *name);
+u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
+			 u64 size, u64 align);
+u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start,
+			 u64 *sizep, u64 align);
+u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align);
+u64 get_max_mapped(void);
+#include <linux/range.h>
+int get_free_all_memory_range(struct range **rangep, int nodeid);
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_EARLY_RES_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index ad47330ccf32..1292b863d667 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,6 +11,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
 	    async.o range.o
+obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o
 obj-y += groups.o
 
 ifdef CONFIG_FUNCTION_TRACER
diff --git a/kernel/early_res.c b/kernel/early_res.c
new file mode 100644
index 000000000000..aa5494ac4462
--- /dev/null
+++ b/kernel/early_res.c
@@ -0,0 +1,513 @@
+/*
+ * early_res, could be used to replace bootmem
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/early_res.h>
+
+/*
+ * Early reserved memory areas.
+ */
+/*
+ * need to make sure this one is bigger enough before
+ * find_fw_memmap_area could be used
+ */
+#define MAX_EARLY_RES_X 32
+
+struct early_res {
+	u64 start, end;
+	char name[15];
+	char overlap_ok;
+};
+static struct early_res early_res_x[MAX_EARLY_RES_X] __initdata;
+
+static int max_early_res __initdata = MAX_EARLY_RES_X;
+static struct early_res *early_res __initdata = &early_res_x[0];
+static int early_res_count __initdata;
+
+static int __init find_overlapped_early(u64 start, u64 end)
+{
+	int i;
+	struct early_res *r;
+
+	for (i = 0; i < max_early_res && early_res[i].end; i++) {
+		r = &early_res[i];
+		if (end > r->start && start < r->end)
+			break;
+	}
+
+	return i;
+}
+
+/*
+ * Drop the i-th range from the early reservation map,
+ * by copying any higher ranges down one over it, and
+ * clearing what had been the last slot.
+ */
+static void __init drop_range(int i)
+{
+	int j;
+
+	for (j = i + 1; j < max_early_res && early_res[j].end; j++)
+		;
+
+	memmove(&early_res[i], &early_res[i + 1],
+	       (j - 1 - i) * sizeof(struct early_res));
+
+	early_res[j - 1].end = 0;
+	early_res_count--;
+}
+
+/*
+ * Split any existing ranges that:
+ *  1) are marked 'overlap_ok', and
+ *  2) overlap with the stated range [start, end)
+ * into whatever portion (if any) of the existing range is entirely
+ * below or entirely above the stated range.  Drop the portion
+ * of the existing range that overlaps with the stated range,
+ * which will allow the caller of this routine to then add that
+ * stated range without conflicting with any existing range.
+ */
+static void __init drop_overlaps_that_are_ok(u64 start, u64 end)
+{
+	int i;
+	struct early_res *r;
+	u64 lower_start, lower_end;
+	u64 upper_start, upper_end;
+	char name[15];
+
+	for (i = 0; i < max_early_res && early_res[i].end; i++) {
+		r = &early_res[i];
+
+		/* Continue past non-overlapping ranges */
+		if (end <= r->start || start >= r->end)
+			continue;
+
+		/*
+		 * Leave non-ok overlaps as is; let caller
+		 * panic "Overlapping early reservations"
+		 * when it hits this overlap.
+		 */
+		if (!r->overlap_ok)
+			return;
+
+		/*
+		 * We have an ok overlap.  We will drop it from the early
+		 * reservation map, and add back in any non-overlapping
+		 * portions (lower or upper) as separate, overlap_ok,
+		 * non-overlapping ranges.
+		 */
+
+		/* 1. Note any non-overlapping (lower or upper) ranges. */
+		strncpy(name, r->name, sizeof(name) - 1);
+
+		lower_start = lower_end = 0;
+		upper_start = upper_end = 0;
+		if (r->start < start) {
+			lower_start = r->start;
+			lower_end = start;
+		}
+		if (r->end > end) {
+			upper_start = end;
+			upper_end = r->end;
+		}
+
+		/* 2. Drop the original ok overlapping range */
+		drop_range(i);
+
+		i--;		/* resume for-loop on copied down entry */
+
+		/* 3. Add back in any non-overlapping ranges. */
+		if (lower_end)
+			reserve_early_overlap_ok(lower_start, lower_end, name);
+		if (upper_end)
+			reserve_early_overlap_ok(upper_start, upper_end, name);
+	}
+}
+
+static void __init __reserve_early(u64 start, u64 end, char *name,
+						int overlap_ok)
+{
+	int i;
+	struct early_res *r;
+
+	i = find_overlapped_early(start, end);
+	if (i >= max_early_res)
+		panic("Too many early reservations");
+	r = &early_res[i];
+	if (r->end)
+		panic("Overlapping early reservations "
+		      "%llx-%llx %s to %llx-%llx %s\n",
+		      start, end - 1, name ? name : "", r->start,
+		      r->end - 1, r->name);
+	r->start = start;
+	r->end = end;
+	r->overlap_ok = overlap_ok;
+	if (name)
+		strncpy(r->name, name, sizeof(r->name) - 1);
+	early_res_count++;
+}
+
+/*
+ * A few early reservtations come here.
+ *
+ * The 'overlap_ok' in the name of this routine does -not- mean it
+ * is ok for these reservations to overlap an earlier reservation.
+ * Rather it means that it is ok for subsequent reservations to
+ * overlap this one.
+ *
+ * Use this entry point to reserve early ranges when you are doing
+ * so out of "Paranoia", reserving perhaps more memory than you need,
+ * just in case, and don't mind a subsequent overlapping reservation
+ * that is known to be needed.
+ *
+ * The drop_overlaps_that_are_ok() call here isn't really needed.
+ * It would be needed if we had two colliding 'overlap_ok'
+ * reservations, so that the second such would not panic on the
+ * overlap with the first.  We don't have any such as of this
+ * writing, but might as well tolerate such if it happens in
+ * the future.
+ */
+void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
+{
+	drop_overlaps_that_are_ok(start, end);
+	__reserve_early(start, end, name, 1);
+}
+
+static void __init __check_and_double_early_res(u64 ex_start, u64 ex_end)
+{
+	u64 start, end, size, mem;
+	struct early_res *new;
+
+	/* do we have enough slots left ? */
+	if ((max_early_res - early_res_count) > max(max_early_res/8, 2))
+		return;
+
+	/* double it */
+	mem = -1ULL;
+	size = sizeof(struct early_res) * max_early_res * 2;
+	if (early_res == early_res_x)
+		start = 0;
+	else
+		start = early_res[0].end;
+	end = ex_start;
+	if (start + size < end)
+		mem = find_fw_memmap_area(start, end, size,
+					 sizeof(struct early_res));
+	if (mem == -1ULL) {
+		start = ex_end;
+		end = get_max_mapped();
+		if (start + size < end)
+			mem = find_fw_memmap_area(start, end, size,
+						 sizeof(struct early_res));
+	}
+	if (mem == -1ULL)
+		panic("can not find more space for early_res array");
+
+	new = __va(mem);
+	/* save the first one for own */
+	new[0].start = mem;
+	new[0].end = mem + size;
+	new[0].overlap_ok = 0;
+	/* copy old to new */
+	if (early_res == early_res_x) {
+		memcpy(&new[1], &early_res[0],
+			 sizeof(struct early_res) * max_early_res);
+		memset(&new[max_early_res+1], 0,
+			 sizeof(struct early_res) * (max_early_res - 1));
+		early_res_count++;
+	} else {
+		memcpy(&new[1], &early_res[1],
+			 sizeof(struct early_res) * (max_early_res - 1));
+		memset(&new[max_early_res], 0,
+			 sizeof(struct early_res) * max_early_res);
+	}
+	memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res);
+	early_res = new;
+	max_early_res *= 2;
+	printk(KERN_DEBUG "early_res array is doubled to %d at [%llx - %llx]\n",
+		max_early_res, mem, mem + size - 1);
+}
+
+/*
+ * Most early reservations come here.
+ *
+ * We first have drop_overlaps_that_are_ok() drop any pre-existing
+ * 'overlap_ok' ranges, so that we can then reserve this memory
+ * range without risk of panic'ing on an overlapping overlap_ok
+ * early reservation.
+ */
+void __init reserve_early(u64 start, u64 end, char *name)
+{
+	if (start >= end)
+		return;
+
+	__check_and_double_early_res(start, end);
+
+	drop_overlaps_that_are_ok(start, end);
+	__reserve_early(start, end, name, 0);
+}
+
+void __init reserve_early_without_check(u64 start, u64 end, char *name)
+{
+	struct early_res *r;
+
+	if (start >= end)
+		return;
+
+	__check_and_double_early_res(start, end);
+
+	r = &early_res[early_res_count];
+
+	r->start = start;
+	r->end = end;
+	r->overlap_ok = 0;
+	if (name)
+		strncpy(r->name, name, sizeof(r->name) - 1);
+	early_res_count++;
+}
+
+void __init free_early(u64 start, u64 end)
+{
+	struct early_res *r;
+	int i;
+
+	i = find_overlapped_early(start, end);
+	r = &early_res[i];
+	if (i >= max_early_res || r->end != end || r->start != start)
+		panic("free_early on not reserved area: %llx-%llx!",
+			 start, end - 1);
+
+	drop_range(i);
+}
+
+#ifdef CONFIG_NO_BOOTMEM
+static void __init subtract_early_res(struct range *range, int az)
+{
+	int i, count;
+	u64 final_start, final_end;
+	int idx = 0;
+
+	count  = 0;
+	for (i = 0; i < max_early_res && early_res[i].end; i++)
+		count++;
+
+	/* need to skip first one ?*/
+	if (early_res != early_res_x)
+		idx = 1;
+
+#define DEBUG_PRINT_EARLY_RES 1
+
+#if DEBUG_PRINT_EARLY_RES
+	printk(KERN_INFO "Subtract (%d early reservations)\n", count);
+#endif
+	for (i = idx; i < count; i++) {
+		struct early_res *r = &early_res[i];
+#if DEBUG_PRINT_EARLY_RES
+		printk(KERN_INFO "  #%d [%010llx - %010llx] %15s\n", i,
+			r->start, r->end, r->name);
+#endif
+		final_start = PFN_DOWN(r->start);
+		final_end = PFN_UP(r->end);
+		if (final_start >= final_end)
+			continue;
+		subtract_range(range, az, final_start, final_end);
+	}
+
+}
+
+int __init get_free_all_memory_range(struct range **rangep, int nodeid)
+{
+	int i, count;
+	u64 start = 0, end;
+	u64 size;
+	u64 mem;
+	struct range *range;
+	int nr_range;
+
+	count  = 0;
+	for (i = 0; i < max_early_res && early_res[i].end; i++)
+		count++;
+
+	count *= 2;
+
+	size = sizeof(struct range) * count;
+	end = get_max_mapped();
+#ifdef MAX_DMA32_PFN
+	if (end > (MAX_DMA32_PFN << PAGE_SHIFT))
+		start = MAX_DMA32_PFN << PAGE_SHIFT;
+#endif
+	mem = find_fw_memmap_area(start, end, size, sizeof(struct range));
+	if (mem == -1ULL)
+		panic("can not find more space for range free");
+
+	range = __va(mem);
+	/* use early_node_map[] and early_res to get range array at first */
+	memset(range, 0, size);
+	nr_range = 0;
+
+	/* need to go over early_node_map to find out good range for node */
+	nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
+#ifdef CONFIG_X86_32
+	subtract_range(range, count, max_low_pfn, -1ULL);
+#endif
+	subtract_early_res(range, count);
+	nr_range = clean_sort_range(range, count);
+
+	/* need to clear it ? */
+	if (nodeid == MAX_NUMNODES) {
+		memset(&early_res[0], 0,
+			 sizeof(struct early_res) * max_early_res);
+		early_res = NULL;
+		max_early_res = 0;
+	}
+
+	*rangep = range;
+	return nr_range;
+}
+#else
+void __init early_res_to_bootmem(u64 start, u64 end)
+{
+	int i, count;
+	u64 final_start, final_end;
+	int idx = 0;
+
+	count  = 0;
+	for (i = 0; i < max_early_res && early_res[i].end; i++)
+		count++;
+
+	/* need to skip first one ?*/
+	if (early_res != early_res_x)
+		idx = 1;
+
+	printk(KERN_INFO "(%d/%d early reservations) ==> bootmem [%010llx - %010llx]\n",
+			 count - idx, max_early_res, start, end);
+	for (i = idx; i < count; i++) {
+		struct early_res *r = &early_res[i];
+		printk(KERN_INFO "  #%d [%010llx - %010llx] %16s", i,
+			r->start, r->end, r->name);
+		final_start = max(start, r->start);
+		final_end = min(end, r->end);
+		if (final_start >= final_end) {
+			printk(KERN_CONT "\n");
+			continue;
+		}
+		printk(KERN_CONT " ==> [%010llx - %010llx]\n",
+			final_start, final_end);
+		reserve_bootmem_generic(final_start, final_end - final_start,
+				BOOTMEM_DEFAULT);
+	}
+	/* clear them */
+	memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res);
+	early_res = NULL;
+	max_early_res = 0;
+	early_res_count = 0;
+}
+#endif
+
+/* Check for already reserved areas */
+static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
+{
+	int i;
+	u64 addr = *addrp;
+	int changed = 0;
+	struct early_res *r;
+again:
+	i = find_overlapped_early(addr, addr + size);
+	r = &early_res[i];
+	if (i < max_early_res && r->end) {
+		*addrp = addr = round_up(r->end, align);
+		changed = 1;
+		goto again;
+	}
+	return changed;
+}
+
+/* Check for already reserved areas */
+static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
+{
+	int i;
+	u64 addr = *addrp, last;
+	u64 size = *sizep;
+	int changed = 0;
+again:
+	last = addr + size;
+	for (i = 0; i < max_early_res && early_res[i].end; i++) {
+		struct early_res *r = &early_res[i];
+		if (last > r->start && addr < r->start) {
+			size = r->start - addr;
+			changed = 1;
+			goto again;
+		}
+		if (last > r->end && addr < r->end) {
+			addr = round_up(r->end, align);
+			size = last - addr;
+			changed = 1;
+			goto again;
+		}
+		if (last <= r->end && addr >= r->start) {
+			(*sizep)++;
+			return 0;
+		}
+	}
+	if (changed) {
+		*addrp = addr;
+		*sizep = size;
+	}
+	return changed;
+}
+
+/*
+ * Find a free area with specified alignment in a specific range.
+ * only with the area.between start to end is active range from early_node_map
+ * so they are good as RAM
+ */
+u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
+			 u64 size, u64 align)
+{
+	u64 addr, last;
+
+	addr = round_up(ei_start, align);
+	if (addr < start)
+		addr = round_up(start, align);
+	if (addr >= ei_last)
+		goto out;
+	while (bad_addr(&addr, size, align) && addr+size <= ei_last)
+		;
+	last = addr + size;
+	if (last > ei_last)
+		goto out;
+	if (last > end)
+		goto out;
+
+	return addr;
+
+out:
+	return -1ULL;
+}
+
+u64 __init find_early_area_size(u64 ei_start, u64 ei_last, u64 start,
+			 u64 *sizep, u64 align)
+{
+	u64 addr, last;
+
+	addr = round_up(ei_start, align);
+	if (addr < start)
+		addr = round_up(start, align);
+	if (addr >= ei_last)
+		goto out;
+	*sizep = ei_last - addr;
+	while (bad_addr_size(&addr, sizep, align) && addr + *sizep <= ei_last)
+		;
+	last = addr + *sizep;
+	if (last > ei_last)
+		goto out;
+
+	return addr;
+
+out:
+	return -1ULL;
+}
-- 
cgit v1.2.3


From c44dcc56d2b5c79ba3063d20f76e5347e2e418f6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 11 Feb 2010 02:24:46 -0500
Subject: switch inotify_user to anon_inode

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/notify/inotify/inotify_user.c | 59 ++++------------------------------------
 include/linux/magic.h            |  1 -
 2 files changed, 6 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index a94e8bd8eb1f..472cdf29ef82 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -29,14 +29,12 @@
 #include <linux/init.h> /* module_init */
 #include <linux/inotify.h>
 #include <linux/kernel.h> /* roundup() */
-#include <linux/magic.h> /* superblock magic number */
-#include <linux/mount.h> /* mntget */
 #include <linux/namei.h> /* LOOKUP_FOLLOW */
-#include <linux/path.h> /* struct path */
 #include <linux/sched.h> /* struct user */
 #include <linux/slab.h> /* struct kmem_cache */
 #include <linux/syscalls.h>
 #include <linux/types.h>
+#include <linux/anon_inodes.h>
 #include <linux/uaccess.h>
 #include <linux/poll.h>
 #include <linux/wait.h>
@@ -45,8 +43,6 @@
 
 #include <asm/ioctls.h>
 
-static struct vfsmount *inotify_mnt __read_mostly;
-
 /* these are configurable via /proc/sys/fs/inotify/ */
 static int inotify_max_user_instances __read_mostly;
 static int inotify_max_queued_events __read_mostly;
@@ -645,9 +641,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 {
 	struct fsnotify_group *group;
 	struct user_struct *user;
-	struct file *filp;
-	struct path path;
-	int fd, ret;
+	int ret;
 
 	/* Check the IN_* constants for consistency.  */
 	BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
@@ -656,10 +650,6 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 	if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
 		return -EINVAL;
 
-	fd = get_unused_fd_flags(flags & O_CLOEXEC);
-	if (fd < 0)
-		return fd;
-
 	user = get_current_user();
 	if (unlikely(atomic_read(&user->inotify_devs) >=
 			inotify_max_user_instances)) {
@@ -676,27 +666,14 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 
 	atomic_inc(&user->inotify_devs);
 
-	path.mnt = inotify_mnt;
-	path.dentry = inotify_mnt->mnt_root;
-	path_get(&path);
-	filp = alloc_file(&path, FMODE_READ, &inotify_fops);
-	if (!filp)
-		goto Enfile;
+	ret = anon_inode_getfd("inotify", &inotify_fops, group,
+				  O_RDONLY | flags);
+	if (ret >= 0)
+		return ret;
 
-	filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
-	filp->private_data = group;
-
-	fd_install(fd, filp);
-
-	return fd;
-
-Enfile:
-	ret = -ENFILE;
-	path_put(&path);
 	atomic_dec(&user->inotify_devs);
 out_free_uid:
 	free_uid(user);
-	put_unused_fd(fd);
 	return ret;
 }
 
@@ -783,20 +760,6 @@ out:
 	return ret;
 }
 
-static int
-inotify_get_sb(struct file_system_type *fs_type, int flags,
-	       const char *dev_name, void *data, struct vfsmount *mnt)
-{
-	return get_sb_pseudo(fs_type, "inotify", NULL,
-			INOTIFYFS_SUPER_MAGIC, mnt);
-}
-
-static struct file_system_type inotify_fs_type = {
-    .name	= "inotifyfs",
-    .get_sb	= inotify_get_sb,
-    .kill_sb	= kill_anon_super,
-};
-
 /*
  * inotify_user_setup - Our initialization function.  Note that we cannnot return
  * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
@@ -804,16 +767,6 @@ static struct file_system_type inotify_fs_type = {
  */
 static int __init inotify_user_setup(void)
 {
-	int ret;
-
-	ret = register_filesystem(&inotify_fs_type);
-	if (unlikely(ret))
-		panic("inotify: register_filesystem returned %d!\n", ret);
-
-	inotify_mnt = kern_mount(&inotify_fs_type);
-	if (IS_ERR(inotify_mnt))
-		panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
-
 	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC);
 	event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
 
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 76285e01b39e..eb9800f05782 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -52,7 +52,6 @@
 #define CGROUP_SUPER_MAGIC	0x27e0eb
 
 #define FUTEXFS_SUPER_MAGIC	0xBAD1DEA
-#define INOTIFYFS_SUPER_MAGIC	0x2BAD1DEA
 
 #define STACK_END_MAGIC		0x57AC6E9D
 
-- 
cgit v1.2.3


From cf4c43dd439b90a1a876b3f836ebe745abb9a269 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Wed, 15 Jul 2009 13:13:00 -0700
Subject: PCI: Add pci_bus_find_ext_capability

For use by code that needs to walk extended capability lists before
pci_dev structures are set up.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80CFD@orsmsx508.amr.corp.intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 drivers/pci/pci.c   | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h |  2 ++
 2 files changed, 45 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 315fea47e784..aad62af2b4c6 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -297,6 +297,49 @@ int pci_find_ext_capability(struct pci_dev *dev, int cap)
 }
 EXPORT_SYMBOL_GPL(pci_find_ext_capability);
 
+/**
+ * pci_bus_find_ext_capability - find an extended capability
+ * @bus:   the PCI bus to query
+ * @devfn: PCI device to query
+ * @cap:   capability code
+ *
+ * Like pci_find_ext_capability() but works for pci devices that do not have a
+ * pci_dev structure set up yet.
+ *
+ * Returns the address of the requested capability structure within the
+ * device's PCI configuration space or 0 in case the device does not
+ * support it.
+ */
+int pci_bus_find_ext_capability(struct pci_bus *bus, unsigned int devfn,
+				int cap)
+{
+	u32 header;
+	int ttl;
+	int pos = PCI_CFG_SPACE_SIZE;
+
+	/* minimum 8 bytes per capability */
+	ttl = (PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) / 8;
+
+	if (!pci_bus_read_config_dword(bus, devfn, pos, &header))
+		return 0;
+	if (header == 0xffffffff || header == 0)
+		return 0;
+
+	while (ttl-- > 0) {
+		if (PCI_EXT_CAP_ID(header) == cap)
+			return pos;
+
+		pos = PCI_EXT_CAP_NEXT(header);
+		if (pos < PCI_CFG_SPACE_SIZE)
+			break;
+
+		if (!pci_bus_read_config_dword(bus, devfn, pos, &header))
+			break;
+	}
+
+	return 0;
+}
+
 static int __pci_find_next_ht_cap(struct pci_dev *dev, int pos, int ht_cap)
 {
 	int rc, ttl = PCI_FIND_CAP_TTL;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c1968f464c38..65f8a8f9d3e5 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -631,6 +631,8 @@ enum pci_lost_interrupt_reason pci_lost_interrupt(struct pci_dev *dev);
 int pci_find_capability(struct pci_dev *dev, int cap);
 int pci_find_next_capability(struct pci_dev *dev, u8 pos, int cap);
 int pci_find_ext_capability(struct pci_dev *dev, int cap);
+int pci_bus_find_ext_capability(struct pci_bus *bus, unsigned int devfn,
+				int cap);
 int pci_find_ht_capability(struct pci_dev *dev, int ht_cap);
 int pci_find_next_ht_capability(struct pci_dev *dev, int pos, int ht_cap);
 struct pci_bus *pci_find_next_bus(const struct pci_bus *from);
-- 
cgit v1.2.3


From f501912a35c02eadc55ca9396ece55fe36f785d0 Mon Sep 17 00:00:00 2001
From: Ben Myers <bpm@sgi.com>
Date: Wed, 17 Feb 2010 14:05:11 -0600
Subject: commit_metadata export operation replacing nfsd_sync_dir

- Add commit_metadata export_operation to allow the underlying filesystem to
decide how to commit an inode most efficiently.

- Usage of nfsd_sync_dir and write_inode_now has been replaced with the
commit_metadata function that takes a svc_fh.

- The commit_metadata function calls the commit_metadata export_op if it's
there, or else falls back to sync_inode instead of fsync and write_inode_now
because only metadata need be synced here.

- nfsd4_sync_rec_dir now uses vfs_fsync so that commit_metadata can be static

Signed-off-by: Ben Myers <bpm@sgi.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4recover.c    |   4 +-
 fs/nfsd/vfs.c            | 106 +++++++++++++++++++++++------------------------
 include/linux/exportfs.h |   5 +++
 3 files changed, 58 insertions(+), 57 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 5a754f7b71ed..98fb98e330b4 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -119,9 +119,7 @@ out_no_tfm:
 static void
 nfsd4_sync_rec_dir(void)
 {
-	mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
-	nfsd_sync_dir(rec_dir.dentry);
-	mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
+	vfs_fsync(NULL, rec_dir.dentry, 0);
 }
 
 int
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ed024d329056..8afdba5082e8 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -27,6 +27,8 @@
 #include <linux/jhash.h>
 #include <linux/ima.h>
 #include <asm/uaccess.h>
+#include <linux/exportfs.h>
+#include <linux/writeback.h>
 
 #ifdef CONFIG_NFSD_V3
 #include "xdr3.h"
@@ -271,6 +273,32 @@ out:
 	return err;
 }
 
+/*
+ * Commit metadata changes to stable storage.
+ */
+static int
+commit_metadata(struct svc_fh *fhp)
+{
+	struct inode *inode = fhp->fh_dentry->d_inode;
+	const struct export_operations *export_ops = inode->i_sb->s_export_op;
+	int error = 0;
+
+	if (!EX_ISSYNC(fhp->fh_export))
+		return 0;
+
+	if (export_ops->commit_metadata) {
+		error = export_ops->commit_metadata(inode);
+	} else {
+		struct writeback_control wbc = {
+			.sync_mode = WB_SYNC_ALL,
+			.nr_to_write = 0, /* metadata only */
+		};
+
+		error = sync_inode(inode, &wbc);
+	}
+
+	return error;
+}
 
 /*
  * Set various file attributes.
@@ -768,28 +796,6 @@ nfsd_close(struct file *filp)
 	fput(filp);
 }
 
-/*
- * Sync a directory to disk.
- *
- * We can't just call vfs_fsync because our requirements are slightly odd:
- *
- *  a) we do not have a file struct available
- *  b) we expect to have i_mutex already held by the caller
- */
-int
-nfsd_sync_dir(struct dentry *dentry)
-{
-	struct inode *inode = dentry->d_inode;
-	int error;
-
-	WARN_ON(!mutex_is_locked(&inode->i_mutex));
-
-	error = filemap_write_and_wait(inode->i_mapping);
-	if (!error && inode->i_fop->fsync)
-		error = inode->i_fop->fsync(NULL, dentry, 0);
-	return error;
-}
-
 /*
  * Obtain the readahead parameters for the file
  * specified by (dev, ino).
@@ -1331,12 +1337,14 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		goto out_nfserr;
 	}
 
-	if (EX_ISSYNC(fhp->fh_export)) {
-		err = nfserrno(nfsd_sync_dir(dentry));
-		write_inode_now(dchild->d_inode, 1);
-	}
+	err = nfsd_create_setattr(rqstp, resfhp, iap);
 
-	err2 = nfsd_create_setattr(rqstp, resfhp, iap);
+	/*
+	 * nfsd_setattr already committed the child.  Transactional filesystems
+	 * had a chance to commit changes for both parent and child
+	 * simultaneously making the following commit_metadata a noop.
+	 */
+	err2 = nfserrno(commit_metadata(fhp));
 	if (err2)
 		err = err2;
 	mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1368,7 +1376,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	struct dentry	*dentry, *dchild = NULL;
 	struct inode	*dirp;
 	__be32		err;
-	__be32		err2;
 	int		host_err;
 	__u32		v_mtime=0, v_atime=0;
 
@@ -1463,11 +1470,6 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (created)
 		*created = 1;
 
-	if (EX_ISSYNC(fhp->fh_export)) {
-		err = nfserrno(nfsd_sync_dir(dentry));
-		/* setattr will sync the child (or not) */
-	}
-
 	nfsd_check_ignore_resizing(iap);
 
 	if (createmode == NFS3_CREATE_EXCLUSIVE) {
@@ -1482,9 +1484,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	}
 
  set_attr:
-	err2 = nfsd_create_setattr(rqstp, resfhp, iap);
-	if (err2)
-		err = err2;
+	err = nfsd_create_setattr(rqstp, resfhp, iap);
+
+	/*
+	 * nfsd_setattr already committed the child (and possibly also the parent).
+	 */
+	if (!err)
+		err = nfserrno(commit_metadata(fhp));
 
 	mnt_drop_write(fhp->fh_export->ex_path.mnt);
 	/*
@@ -1599,12 +1605,9 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		}
 	} else
 		host_err = vfs_symlink(dentry->d_inode, dnew, path);
-
-	if (!host_err) {
-		if (EX_ISSYNC(fhp->fh_export))
-			host_err = nfsd_sync_dir(dentry);
-	}
 	err = nfserrno(host_err);
+	if (!err)
+		err = nfserrno(commit_metadata(fhp));
 	fh_unlock(fhp);
 
 	mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1666,11 +1669,9 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
 	}
 	host_err = vfs_link(dold, dirp, dnew);
 	if (!host_err) {
-		if (EX_ISSYNC(ffhp->fh_export)) {
-			err = nfserrno(nfsd_sync_dir(ddir));
-			write_inode_now(dest, 1);
-		}
-		err = 0;
+		err = nfserrno(commit_metadata(ffhp));
+		if (!err)
+			err = nfserrno(commit_metadata(tfhp));
 	} else {
 		if (host_err == -EXDEV && rqstp->rq_vers == 2)
 			err = nfserr_acces;
@@ -1766,10 +1767,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 		goto out_dput_new;
 
 	host_err = vfs_rename(fdir, odentry, tdir, ndentry);
-	if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
-		host_err = nfsd_sync_dir(tdentry);
+	if (!host_err) {
+		host_err = commit_metadata(tfhp);
 		if (!host_err)
-			host_err = nfsd_sync_dir(fdentry);
+			host_err = commit_metadata(ffhp);
 	}
 
 	mnt_drop_write(ffhp->fh_export->ex_path.mnt);
@@ -1850,12 +1851,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 
 	dput(rdentry);
 
-	if (host_err)
-		goto out_drop;
-	if (EX_ISSYNC(fhp->fh_export))
-		host_err = nfsd_sync_dir(dentry);
+	if (!host_err)
+		host_err = commit_metadata(fhp);
 
-out_drop:
 	mnt_drop_write(fhp->fh_export->ex_path.mnt);
 out_nfserr:
 	err = nfserrno(host_err);
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index dc12f416a49f..a9cd507f8cd2 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -96,6 +96,7 @@ struct fid {
  * @fh_to_parent:   find the implied object's parent and get a dentry for it
  * @get_name:       find the name for a given inode in a given directory
  * @get_parent:     find the parent of a given directory
+ * @commit_metadata: commit metadata changes to stable storage
  *
  * See Documentation/filesystems/nfs/Exporting for details on how to use
  * this interface correctly.
@@ -137,6 +138,9 @@ struct fid {
  *    is also a directory.  In the event that it cannot be found, or storage
  *    space cannot be allocated, a %ERR_PTR should be returned.
  *
+ * commit_metadata:
+ *    @commit_metadata should commit metadata changes to stable storage.
+ *
  * Locking rules:
  *    get_parent is called with child->d_inode->i_mutex down
  *    get_name is not (which is possibly inconsistent)
@@ -152,6 +156,7 @@ struct export_operations {
 	int (*get_name)(struct dentry *parent, char *name,
 			struct dentry *child);
 	struct dentry * (*get_parent)(struct dentry *child);
+	int (*commit_metadata)(struct inode *inode);
 };
 
 extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
-- 
cgit v1.2.3


From 7a9f0dd9c49425e2b0e39ada4757bc7a38c84873 Mon Sep 17 00:00:00 2001
From: Pauli Nieminen <suokkos@gmail.com>
Date: Mon, 1 Feb 2010 19:11:15 +0200
Subject: drm: Add generic multipart buffer.

Allocating multiple pages of memory for data that is coming
from user space may fail. To fix memory allocation failures
the buffer object should be split to multiple independ pages.

drm buffer provides generic interface to copy and process
large data arrays from user space.

Interface includes allocation and free functions to allocate
the buffer object and data storage pages.

All access operations are performed relative to a internal
pointer which is advanced with drm_buffer_advance function.

The buffer can be accessed using drm_buffer_pointer_to_XXX
functions if it is known that requested object doesn't split
over a page boundary. These functions don't do any error
checking to maximize performance.

If there is large object which could be split there is special
drm_buffer_read_object function. drm_buffer_read_object takes
a pointer as argument which is used as temporary store for
data if it is split over boundary in the buffer.

Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/Makefile     |   2 +-
 drivers/gpu/drm/drm_buffer.c | 184 +++++++++++++++++++++++++++++++++++++++++++
 include/drm/drm_buffer.h     | 148 ++++++++++++++++++++++++++++++++++
 3 files changed, 333 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/drm_buffer.c
 create mode 100644 include/drm/drm_buffer.h

(limited to 'include')

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 39c5aa75b8f1..abe3f446ca48 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -4,7 +4,7 @@
 
 ccflags-y := -Iinclude/drm
 
-drm-y       :=	drm_auth.o drm_bufs.o drm_cache.o \
+drm-y       :=	drm_auth.o drm_buffer.o drm_bufs.o drm_cache.o \
 		drm_context.o drm_dma.o drm_drawable.o \
 		drm_drv.o drm_fops.o drm_gem.o drm_ioctl.o drm_irq.o \
 		drm_lock.o drm_memory.o drm_proc.o drm_stub.o drm_vm.o \
diff --git a/drivers/gpu/drm/drm_buffer.c b/drivers/gpu/drm/drm_buffer.c
new file mode 100644
index 000000000000..55d03ed05000
--- /dev/null
+++ b/drivers/gpu/drm/drm_buffer.c
@@ -0,0 +1,184 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Pauli Nieminen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ **************************************************************************/
+/*
+ * Multipart buffer for coping data which is larger than the page size.
+ *
+ * Authors:
+ * Pauli Nieminen <suokkos-at-gmail-dot-com>
+ */
+
+#include "drm_buffer.h"
+
+/**
+ * Allocate the drm buffer object.
+ *
+ *   buf: Pointer to a pointer where the object is stored.
+ *   size: The number of bytes to allocate.
+ */
+int drm_buffer_alloc(struct drm_buffer **buf, int size)
+{
+	int nr_pages = size / PAGE_SIZE + 1;
+	int idx;
+
+	/* Allocating pointer table to end of structure makes drm_buffer
+	 * variable sized */
+	*buf = kzalloc(sizeof(struct drm_buffer) + nr_pages*sizeof(char *),
+			GFP_KERNEL);
+
+	if (*buf == NULL) {
+		DRM_ERROR("Failed to allocate drm buffer object to hold"
+				" %d bytes in %d pages.\n",
+				size, nr_pages);
+		return -ENOMEM;
+	}
+
+	(*buf)->size = size;
+
+	for (idx = 0; idx < nr_pages; ++idx) {
+
+		(*buf)->data[idx] =
+			kmalloc(min(PAGE_SIZE, size - idx * PAGE_SIZE),
+				GFP_KERNEL);
+
+
+		if ((*buf)->data[idx] == NULL) {
+			DRM_ERROR("Failed to allocate %dth page for drm"
+					" buffer with %d bytes and %d pages.\n",
+					idx + 1, size, nr_pages);
+			goto error_out;
+		}
+
+	}
+
+	return 0;
+
+error_out:
+
+	/* Only last element can be null pointer so check for it first. */
+	if ((*buf)->data[idx])
+		kfree((*buf)->data[idx]);
+
+	for (--idx; idx >= 0; --idx)
+		kfree((*buf)->data[idx]);
+
+	kfree(*buf);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(drm_buffer_alloc);
+
+/**
+ * Copy the user data to the begin of the buffer and reset the processing
+ * iterator.
+ *
+ *   user_data: A pointer the data that is copied to the buffer.
+ *   size: The Number of bytes to copy.
+ */
+extern int drm_buffer_copy_from_user(struct drm_buffer *buf,
+		void __user *user_data, int size)
+{
+	int nr_pages = size / PAGE_SIZE + 1;
+	int idx;
+
+	if (size > buf->size) {
+		DRM_ERROR("Requesting to copy %d bytes to a drm buffer with"
+				" %d bytes space\n",
+				size, buf->size);
+		return -EFAULT;
+	}
+
+	for (idx = 0; idx < nr_pages; ++idx) {
+
+		if (DRM_COPY_FROM_USER(buf->data[idx],
+			user_data + idx * PAGE_SIZE,
+			min(PAGE_SIZE, size - idx * PAGE_SIZE))) {
+			DRM_ERROR("Failed to copy user data (%p) to drm buffer"
+					" (%p) %dth page.\n",
+					user_data, buf, idx);
+			return -EFAULT;
+
+		}
+	}
+	buf->iterator = 0;
+	return 0;
+}
+EXPORT_SYMBOL(drm_buffer_copy_from_user);
+
+/**
+ * Free the drm buffer object
+ */
+void drm_buffer_free(struct drm_buffer *buf)
+{
+
+	if (buf != NULL) {
+
+		int nr_pages = buf->size / PAGE_SIZE + 1;
+		int idx;
+		for (idx = 0; idx < nr_pages; ++idx)
+			kfree(buf->data[idx]);
+
+		kfree(buf);
+	}
+}
+EXPORT_SYMBOL(drm_buffer_free);
+
+/**
+ * Read an object from buffer that may be split to multiple parts. If object
+ * is not split function just returns the pointer to object in buffer. But in
+ * case of split object data is copied to given stack object that is suplied
+ * by caller.
+ *
+ * The processing location of the buffer is also advanced to the next byte
+ * after the object.
+ *
+ *   objsize: The size of the objet in bytes.
+ *   stack_obj: A pointer to a memory location where object can be copied.
+ */
+void *drm_buffer_read_object(struct drm_buffer *buf,
+		int objsize, void *stack_obj)
+{
+	int idx = drm_buffer_index(buf);
+	int page = drm_buffer_page(buf);
+	void *obj = 0;
+
+	if (idx + objsize <= PAGE_SIZE) {
+		obj = &buf->data[page][idx];
+	} else {
+		/* The object is split which forces copy to temporary object.*/
+		int beginsz = PAGE_SIZE - idx;
+		memcpy(stack_obj, &buf->data[page][idx], beginsz);
+
+		memcpy(stack_obj + beginsz, &buf->data[page + 1][0],
+				objsize - beginsz);
+
+		obj = stack_obj;
+	}
+
+	drm_buffer_advance(buf, objsize);
+	return obj;
+}
+EXPORT_SYMBOL(drm_buffer_read_object);
diff --git a/include/drm/drm_buffer.h b/include/drm/drm_buffer.h
new file mode 100644
index 000000000000..322dbff3f861
--- /dev/null
+++ b/include/drm/drm_buffer.h
@@ -0,0 +1,148 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Pauli Nieminen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ **************************************************************************/
+/*
+ * Multipart buffer for coping data which is larger than the page size.
+ *
+ * Authors:
+ * Pauli Nieminen <suokkos-at-gmail-dot-com>
+ */
+
+#ifndef _DRM_BUFFER_H_
+#define _DRM_BUFFER_H_
+
+#include "drmP.h"
+
+struct drm_buffer {
+	int iterator;
+	int size;
+	char *data[];
+};
+
+
+/**
+ * Return the index of page that buffer is currently pointing at.
+ */
+static inline int drm_buffer_page(struct drm_buffer *buf)
+{
+	return buf->iterator / PAGE_SIZE;
+}
+/**
+ * Return the index of the current byte in the page
+ */
+static inline int drm_buffer_index(struct drm_buffer *buf)
+{
+	return buf->iterator & (PAGE_SIZE - 1);
+}
+/**
+ * Return number of bytes that is left to process
+ */
+static inline int drm_buffer_unprocessed(struct drm_buffer *buf)
+{
+	return buf->size - buf->iterator;
+}
+
+/**
+ * Advance the buffer iterator number of bytes that is given.
+ */
+static inline void drm_buffer_advance(struct drm_buffer *buf, int bytes)
+{
+	buf->iterator += bytes;
+}
+
+/**
+ * Allocate the drm buffer object.
+ *
+ *   buf: A pointer to a pointer where the object is stored.
+ *   size: The number of bytes to allocate.
+ */
+extern int drm_buffer_alloc(struct drm_buffer **buf, int size);
+
+/**
+ * Copy the user data to the begin of the buffer and reset the processing
+ * iterator.
+ *
+ *   user_data: A pointer the data that is copied to the buffer.
+ *   size: The Number of bytes to copy.
+ */
+extern int drm_buffer_copy_from_user(struct drm_buffer *buf,
+		void __user *user_data, int size);
+
+/**
+ * Free the drm buffer object
+ */
+extern void drm_buffer_free(struct drm_buffer *buf);
+
+/**
+ * Read an object from buffer that may be split to multiple parts. If object
+ * is not split function just returns the pointer to object in buffer. But in
+ * case of split object data is copied to given stack object that is suplied
+ * by caller.
+ *
+ * The processing location of the buffer is also advanced to the next byte
+ * after the object.
+ *
+ *   objsize: The size of the objet in bytes.
+ *   stack_obj: A pointer to a memory location where object can be copied.
+ */
+extern void *drm_buffer_read_object(struct drm_buffer *buf,
+		int objsize, void *stack_obj);
+
+/**
+ * Returns the pointer to the dword which is offset number of elements from the
+ * current processing location.
+ *
+ * Caller must make sure that dword is not split in the buffer. This
+ * requirement is easily met if all the sizes of objects in buffer are
+ * multiples of dword and PAGE_SIZE is multiple dword.
+ *
+ * Call to this function doesn't change the processing location.
+ *
+ *   offset: The index of the dword relative to the internat iterator.
+ */
+static inline void *drm_buffer_pointer_to_dword(struct drm_buffer *buffer,
+		int offset)
+{
+	int iter = buffer->iterator + offset * 4;
+	return &buffer->data[iter / PAGE_SIZE][iter & (PAGE_SIZE - 1)];
+}
+/**
+ * Returns the pointer to the dword which is offset number of elements from
+ * the current processing location.
+ *
+ * Call to this function doesn't change the processing location.
+ *
+ *   offset: The index of the byte relative to the internat iterator.
+ */
+static inline void *drm_buffer_pointer_to_byte(struct drm_buffer *buffer,
+		int offset)
+{
+	int iter = buffer->iterator + offset;
+	return &buffer->data[iter / PAGE_SIZE][iter & (PAGE_SIZE - 1)];
+}
+
+#endif
-- 
cgit v1.2.3


From 189b3b1c89761054fee3438f063d7f257306e2d8 Mon Sep 17 00:00:00 2001
From: "wzt.wzt@gmail.com" <wzt.wzt@gmail.com>
Date: Tue, 23 Feb 2010 23:15:28 +0800
Subject: Security: add static to security_ops and default_security_ops
 variable

Enhance the security framework to support resetting the active security
module. This eliminates the need for direct use of the security_ops and
default_security_ops variables outside of security.c, so make security_ops
and default_security_ops static. Also remove the secondary_ops variable as
a cleanup since there is no use for that. secondary_ops was originally used by
SELinux to call the "secondary" security module (capability or dummy),
but that was replaced by direct calls to capability and the only
remaining use is to save and restore the original security ops pointer
value if SELinux is disabled by early userspace based on /etc/selinux/config.
Further, if we support this directly in the security framework, then we can
just use &default_security_ops for this purpose since that is now available.

Signed-off-by: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Acked-by:  Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h |  2 ++
 security/capability.c    |  4 ----
 security/security.c      | 11 +++++++++--
 security/selinux/hooks.c | 13 +------------
 4 files changed, 12 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index a4dc74d86ac6..233d20b52c1b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -95,6 +95,8 @@ struct seq_file;
 extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb);
 extern int cap_netlink_recv(struct sk_buff *skb, int cap);
 
+void reset_security_ops(void);
+
 #ifdef CONFIG_MMU
 extern unsigned long mmap_min_addr;
 extern unsigned long dac_mmap_min_addr;
diff --git a/security/capability.c b/security/capability.c
index 5c700e1a4fd3..4875142b858d 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -906,10 +906,6 @@ static void cap_audit_rule_free(void *lsmrule)
 }
 #endif /* CONFIG_AUDIT */
 
-struct security_operations default_security_ops = {
-	.name	= "default",
-};
-
 #define set_to_cap_if_null(ops, function)				\
 	do {								\
 		if (!ops->function) {					\
diff --git a/security/security.c b/security/security.c
index 971092c06f31..edae56b78771 100644
--- a/security/security.c
+++ b/security/security.c
@@ -23,10 +23,12 @@ static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] =
 	CONFIG_DEFAULT_SECURITY;
 
 /* things that live in capability.c */
-extern struct security_operations default_security_ops;
 extern void security_fixup_ops(struct security_operations *ops);
 
-struct security_operations *security_ops;	/* Initialized to NULL */
+static struct security_operations *security_ops;
+static struct security_operations default_security_ops = {
+	.name	= "default",
+};
 
 static inline int verify(struct security_operations *ops)
 {
@@ -63,6 +65,11 @@ int __init security_init(void)
 	return 0;
 }
 
+void reset_security_ops(void)
+{
+	security_ops = &default_security_ops;
+}
+
 /* Save user chosen LSM */
 static int __init choose_lsm(char *str)
 {
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 6b36ce2eef2e..dc7660074b99 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -126,13 +126,6 @@ __setup("selinux=", selinux_enabled_setup);
 int selinux_enabled = 1;
 #endif
 
-
-/*
- * Minimal support for a secondary security module,
- * just to allow the use of the capability module.
- */
-static struct security_operations *secondary_ops;
-
 /* Lists of inode and superblock security structures initialized
    before the policy was loaded. */
 static LIST_HEAD(superblock_security_head);
@@ -5674,9 +5667,6 @@ static __init int selinux_init(void)
 					    0, SLAB_PANIC, NULL);
 	avc_init();
 
-	secondary_ops = security_ops;
-	if (!secondary_ops)
-		panic("SELinux: No initial security operations\n");
 	if (register_security(&selinux_ops))
 		panic("SELinux: Unable to register with kernel.\n");
 
@@ -5837,8 +5827,7 @@ int selinux_disable(void)
 	selinux_disabled = 1;
 	selinux_enabled = 0;
 
-	/* Reset security_ops to the secondary module, dummy or capability. */
-	security_ops = secondary_ops;
+	reset_security_ops();
 
 	/* Try to destroy the avc node cache */
 	avc_disable();
-- 
cgit v1.2.3


From a712ffbc199849364c46e9112b93b66de08e2c26 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Thu, 4 Feb 2010 10:59:27 -0800
Subject: x86/PCI: Moorestown PCI support

The Moorestown platform only has a few devices that actually support
PCI config cycles.  The rest of the devices use an in-RAM MCFG space
for the purposes of device enumeration and initialization.

There are a few uglies in the fake support, like BAR sizes that aren't
a power of two, sizing detection, and writes to the real devices, but
other than that it's pretty straightforward.

Another way to think of this is not really as PCI at all, but just a
table in RAM describing which devices are present, their capabilities
and their offsets in MMIO space.  This could have been done with a
special new firmware table on this platform, but given that we do have
some real PCI devices too, simply describing things in an MCFG type
space was pretty simple.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
LKML-Reference: <43F901BD926A4E43B106BF17856F07559FB80D08@orsmsx508.amr.corp.intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/pci/Makefile    |   2 +-
 arch/x86/pci/mrst.c      | 258 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci_regs.h |   1 +
 3 files changed, 260 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/pci/mrst.c

(limited to 'include')

diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 39fba37f702f..4753ebc19cae 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_X86_VISWS)		+= visws.o
 
 obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
 
-obj-y				+= common.o early.o
+obj-y				+= common.o early.o mrst.o
 obj-y				+= amd_bus.o
 obj-$(CONFIG_X86_64)		+= bus_numa.o
 
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
new file mode 100644
index 000000000000..6e9e1a35a5d7
--- /dev/null
+++ b/arch/x86/pci/mrst.c
@@ -0,0 +1,258 @@
+/*
+ * Moorestown PCI support
+ *   Copyright (c) 2008 Intel Corporation
+ *     Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Moorestown has an interesting PCI implementation:
+ *   - configuration space is memory mapped (as defined by MCFG)
+ *   - Lincroft devices also have a real, type 1 configuration space
+ *   - Early Lincroft silicon has a type 1 access bug that will cause
+ *     a hang if non-existent devices are accessed
+ *   - some devices have the "fixed BAR" capability, which means
+ *     they can't be relocated or modified; check for that during
+ *     BAR sizing
+ *
+ * So, we use the MCFG space for all reads and writes, but also send
+ * Lincroft writes to type 1 space.  But only read/write if the device
+ * actually exists, otherwise return all 1s for reads and bit bucket
+ * the writes.
+ */
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/dmi.h>
+
+#include <asm/acpi.h>
+#include <asm/segment.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/pci_x86.h>
+#include <asm/hw_irq.h>
+#include <asm/io_apic.h>
+
+#define PCIE_CAP_OFFSET	0x100
+
+/* Fixed BAR fields */
+#define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00	/* Fixed BAR (TBD) */
+#define PCI_FIXED_BAR_0_SIZE	0x04
+#define PCI_FIXED_BAR_1_SIZE	0x08
+#define PCI_FIXED_BAR_2_SIZE	0x0c
+#define PCI_FIXED_BAR_3_SIZE	0x10
+#define PCI_FIXED_BAR_4_SIZE	0x14
+#define PCI_FIXED_BAR_5_SIZE	0x1c
+
+/**
+ * fixed_bar_cap - return the offset of the fixed BAR cap if found
+ * @bus: PCI bus
+ * @devfn: device in question
+ *
+ * Look for the fixed BAR cap on @bus and @devfn, returning its offset
+ * if found or 0 otherwise.
+ */
+static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn)
+{
+	int pos;
+	u32 pcie_cap = 0, cap_data;
+
+	pos = PCIE_CAP_OFFSET;
+	while (pos) {
+		if (raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
+					  devfn, pos, 4, &pcie_cap))
+			return 0;
+
+		if (pcie_cap == 0xffffffff)
+			return 0;
+
+		if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) {
+			raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
+					      devfn, pos + 4, 4, &cap_data);
+			if ((cap_data & 0xffff) == PCIE_VNDR_CAP_ID_FIXED_BAR)
+				return pos;
+		}
+
+		pos = pcie_cap >> 20;
+	}
+
+	return 0;
+}
+
+static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn,
+				   int reg, int len, u32 val, int offset)
+{
+	u32 size;
+	unsigned int domain, busnum;
+	int bar = (reg - PCI_BASE_ADDRESS_0) >> 2;
+
+	domain = pci_domain_nr(bus);
+	busnum = bus->number;
+
+	if (val == ~0 && len == 4) {
+		unsigned long decode;
+
+		raw_pci_ext_ops->read(domain, busnum, devfn,
+			       offset + 8 + (bar * 4), 4, &size);
+
+		/* Turn the size into a decode pattern for the sizing code */
+		if (size) {
+			decode = size - 1;
+			decode |= decode >> 1;
+			decode |= decode >> 2;
+			decode |= decode >> 4;
+			decode |= decode >> 8;
+			decode |= decode >> 16;
+			decode++;
+			decode = ~(decode - 1);
+		} else {
+			decode = ~0;
+		}
+
+		/*
+		 * If val is all ones, the core code is trying to size the reg,
+		 * so update the mmconfig space with the real size.
+		 *
+		 * Note: this assumes the fixed size we got is a power of two.
+		 */
+		return raw_pci_ext_ops->write(domain, busnum, devfn, reg, 4,
+				       decode);
+	}
+
+	/* This is some other kind of BAR write, so just do it. */
+	return raw_pci_ext_ops->write(domain, busnum, devfn, reg, len, val);
+}
+
+/**
+ * type1_access_ok - check whether to use type 1
+ * @bus: bus number
+ * @devfn: device & function in question
+ *
+ * If the bus is on a Lincroft chip and it exists, or is not on a Lincroft at
+ * all, the we can go ahead with any reads & writes.  If it's on a Lincroft,
+ * but doesn't exist, avoid the access altogether to keep the chip from
+ * hanging.
+ */
+static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg)
+{
+	/* This is a workaround for A0 LNC bug where PCI status register does
+	 * not have new CAP bit set. can not be written by SW either.
+	 *
+	 * PCI header type in real LNC indicates a single function device, this
+	 * will prevent probing other devices under the same function in PCI
+	 * shim. Therefore, use the header type in shim instead.
+	 */
+	if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE)
+		return 0;
+	if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0)))
+		return 1;
+	return 0; /* langwell on others */
+}
+
+static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
+		    int size, u32 *value)
+{
+	if (type1_access_ok(bus->number, devfn, where))
+		return pci_direct_conf1.read(pci_domain_nr(bus), bus->number,
+					devfn, where, size, value);
+	return raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
+			      devfn, where, size, value);
+}
+
+static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
+		     int size, u32 value)
+{
+	int offset;
+
+	/* On MRST, there is no PCI ROM BAR, this will cause a subsequent read
+	 * to ROM BAR return 0 then being ignored.
+	 */
+	if (where == PCI_ROM_ADDRESS)
+		return 0;
+
+	/*
+	 * Devices with fixed BARs need special handling:
+	 *   - BAR sizing code will save, write ~0, read size, restore
+	 *   - so writes to fixed BARs need special handling
+	 *   - other writes to fixed BAR devices should go through mmconfig
+	 */
+	offset = fixed_bar_cap(bus, devfn);
+	if (offset &&
+	    (where >= PCI_BASE_ADDRESS_0 && where <= PCI_BASE_ADDRESS_5)) {
+		return pci_device_update_fixed(bus, devfn, where, size, value,
+					       offset);
+	}
+
+	/*
+	 * On Moorestown update both real & mmconfig space
+	 * Note: early Lincroft silicon can't handle type 1 accesses to
+	 *       non-existent devices, so just eat the write in that case.
+	 */
+	if (type1_access_ok(bus->number, devfn, where))
+		return pci_direct_conf1.write(pci_domain_nr(bus), bus->number,
+					      devfn, where, size, value);
+	return raw_pci_ext_ops->write(pci_domain_nr(bus), bus->number, devfn,
+			       where, size, value);
+}
+
+static int mrst_pci_irq_enable(struct pci_dev *dev)
+{
+	u8 pin;
+	struct io_apic_irq_attr irq_attr;
+
+	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+
+	/* MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to
+	 * IOAPIC RTE entries, so we just enable RTE for the device.
+	 */
+	irq_attr.ioapic = mp_find_ioapic(dev->irq);
+	irq_attr.ioapic_pin = dev->irq;
+	irq_attr.trigger = 1; /* level */
+	irq_attr.polarity = 1; /* active low */
+	io_apic_set_pci_routing(&dev->dev, dev->irq, &irq_attr);
+
+	return 0;
+}
+
+struct pci_ops pci_mrst_ops = {
+	.read = pci_read,
+	.write = pci_write,
+};
+
+/**
+ * pci_mrst_init - installs pci_mrst_ops
+ *
+ * Moorestown has an interesting PCI implementation (see above).
+ * Called when the early platform detection installs it.
+ */
+int __init pci_mrst_init(void)
+{
+	printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n");
+	pci_mmcfg_late_init();
+	pcibios_enable_irq = mrst_pci_irq_enable;
+	pci_root_ops = pci_mrst_ops;
+	/* Continue with standard init */
+	return 1;
+}
+
+/*
+ * Langwell devices reside at fixed offsets, don't try to move them.
+ */
+static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev)
+{
+	unsigned long offset;
+	u32 size;
+	int i;
+
+	/* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */
+	offset = fixed_bar_cap(dev->bus, dev->devfn);
+	if (!offset || PCI_DEVFN(2, 0) == dev->devfn ||
+	    PCI_DEVFN(2, 2) == dev->devfn)
+		return;
+
+	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+		pci_read_config_dword(dev, offset + 8 + (i * 4), &size);
+		dev->resource[i].end = dev->resource[i].start + size - 1;
+		dev->resource[i].flags |= IORESOURCE_PCI_FIXED;
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixed_bar_fixup);
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 9f2ad0aa3c39..c8f302991b66 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -507,6 +507,7 @@
 #define PCI_EXT_CAP_ID_VC	2
 #define PCI_EXT_CAP_ID_DSN	3
 #define PCI_EXT_CAP_ID_PWR	4
+#define PCI_EXT_CAP_ID_VNDR	11
 #define PCI_EXT_CAP_ID_ACS	13
 #define PCI_EXT_CAP_ID_ARI	14
 #define PCI_EXT_CAP_ID_ATS	15
-- 
cgit v1.2.3


From 17a55f79fd8051a6a8a6e84176c83af71877a98b Mon Sep 17 00:00:00 2001
From: Alexander Chiang <achiang@hp.com>
Date: Tue, 2 Feb 2010 19:09:16 +0000
Subject: IB/core: Pack struct ib_device a little tighter

A small change to reduce the size of ib_device to 1112 bytes
(from 1128).

Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 include/rdma/ib_verbs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 09509edb1c5f..a585e0f92bc3 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -984,9 +984,9 @@ struct ib_device {
 	struct list_head              event_handler_list;
 	spinlock_t                    event_handler_lock;
 
+	spinlock_t                    client_data_lock;
 	struct list_head              core_list;
 	struct list_head              client_data_list;
-	spinlock_t                    client_data_lock;
 
 	struct ib_cache               cache;
 	int                          *pkey_tbl_len;
@@ -1144,8 +1144,8 @@ struct ib_device {
 		IB_DEV_UNREGISTERED
 	}                            reg_state;
 
-	u64			     uverbs_cmd_mask;
 	int			     uverbs_abi_ver;
+	u64			     uverbs_cmd_mask;
 
 	char			     node_desc[64];
 	__be64			     node_guid;
-- 
cgit v1.2.3


From abfe5a01ef1e463cbafdae461b693db34e308c02 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sat, 20 Feb 2010 12:13:49 +0100
Subject: firewire: cdev: add more flexible cycle timer ioctl

The system time from CLOCK_REALTIME is not monotonic, hence problematic
for the main user of the FW_CDEV_IOC_GET_CYCLE_TIMER ioctl.  This issue
exists in its successor ABI, i.e. raw1394, too.
http://subversion.ffado.org/ticket/242

We now offer an alternative ioctl which lets the caller choose between
CLOCK_REALTIME, CLOCK_MONOTONIC, and CLOCK_MONOTONIC_RAW as source of
the local time, very similar to the clock_gettime libc function.  The
format of the local time return value matches that of clock_gettime
(seconds and nanoseconds, instead of a single microseconds value from
the existing ioctl).

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-cdev.c  | 38 ++++++++++++++++++++++++++++++++------
 include/linux/firewire-cdev.h | 31 +++++++++++++++++++++++++++----
 2 files changed, 59 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 3c1ac0933d24..a4aa477b9b2c 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -1031,22 +1031,46 @@ static int ioctl_stop_iso(struct client *client, void *buffer)
 	return fw_iso_context_stop(client->iso_context);
 }
 
-static int ioctl_get_cycle_timer(struct client *client, void *buffer)
+static int ioctl_get_cycle_timer2(struct client *client, void *buffer)
 {
-	struct fw_cdev_get_cycle_timer *request = buffer;
+	struct fw_cdev_get_cycle_timer2 *request = buffer;
 	struct fw_card *card = client->device->card;
-	struct timeval tv;
+	struct timespec ts = {0, 0};
 	u32 cycle_time;
+	int ret = 0;
 
 	local_irq_disable();
 
 	cycle_time = card->driver->get_cycle_time(card);
-	do_gettimeofday(&tv);
+
+	switch (request->clk_id) {
+	case CLOCK_REALTIME:      getnstimeofday(&ts);                   break;
+	case CLOCK_MONOTONIC:     do_posix_clock_monotonic_gettime(&ts); break;
+	case CLOCK_MONOTONIC_RAW: getrawmonotonic(&ts);                  break;
+	default:
+		ret = -EINVAL;
+	}
 
 	local_irq_enable();
 
-	request->local_time = tv.tv_sec * 1000000ULL + tv.tv_usec;
-	request->cycle_timer = cycle_time;
+	request->tv_sec		= ts.tv_sec;
+	request->tv_nsec	= ts.tv_nsec;
+	request->cycle_timer	= cycle_time;
+
+	return ret;
+}
+
+static int ioctl_get_cycle_timer(struct client *client, void *buffer)
+{
+	struct fw_cdev_get_cycle_timer *request = buffer;
+	struct fw_cdev_get_cycle_timer2 ct2;
+
+	ct2.clk_id = CLOCK_REALTIME;
+	ioctl_get_cycle_timer2(client, &ct2);
+
+	request->local_time	= ct2.tv_sec * USEC_PER_SEC +
+				  ct2.tv_nsec / NSEC_PER_USEC;
+	request->cycle_timer	= ct2.cycle_timer;
 
 	return 0;
 }
@@ -1320,6 +1344,7 @@ static int (* const ioctl_handlers[])(struct client *client, void *buffer) = {
 	ioctl_get_speed,
 	ioctl_send_broadcast_request,
 	ioctl_send_stream_packet,
+	ioctl_get_cycle_timer2,
 };
 
 static int dispatch_ioctl(struct client *client,
@@ -1341,6 +1366,7 @@ static int dispatch_ioctl(struct client *client,
 		struct fw_cdev_get_cycle_timer		_0c;
 		struct fw_cdev_allocate_iso_resource	_0d;
 		struct fw_cdev_send_stream_packet	_13;
+		struct fw_cdev_get_cycle_timer2		_14;
 	})];
 	int ret;
 
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 520ecf86cbb3..baa8290c8416 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -248,6 +248,9 @@ union fw_cdev_event {
 #define FW_CDEV_IOC_SEND_BROADCAST_REQUEST       _IOW('#', 0x12, struct fw_cdev_send_request)
 #define FW_CDEV_IOC_SEND_STREAM_PACKET           _IOW('#', 0x13, struct fw_cdev_send_stream_packet)
 
+/* available since kernel version 2.6.34 */
+#define FW_CDEV_IOC_GET_CYCLE_TIMER2   _IOWR('#', 0x14, struct fw_cdev_get_cycle_timer2)
+
 /*
  * FW_CDEV_VERSION History
  *  1  (2.6.22)  - initial version
@@ -544,20 +547,40 @@ struct fw_cdev_stop_iso {
 /**
  * struct fw_cdev_get_cycle_timer - read cycle timer register
  * @local_time:   system time, in microseconds since the Epoch
- * @cycle_timer:  isochronous cycle timer, as per OHCI 1.1 clause 5.13
+ * @cycle_timer:  Cycle Time register contents
  *
  * The %FW_CDEV_IOC_GET_CYCLE_TIMER ioctl reads the isochronous cycle timer
- * and also the system clock.  This allows to express the receive time of an
- * isochronous packet as a system time with microsecond accuracy.
+ * and also the system clock (%CLOCK_REALTIME).  This allows to express the
+ * receive time of an isochronous packet as a system time.
  *
  * @cycle_timer consists of 7 bits cycleSeconds, 13 bits cycleCount, and
- * 12 bits cycleOffset, in host byte order.
+ * 12 bits cycleOffset, in host byte order.  Cf. the Cycle Time register
+ * per IEEE 1394 or Isochronous Cycle Timer register per OHCI-1394.
  */
 struct fw_cdev_get_cycle_timer {
 	__u64 local_time;
 	__u32 cycle_timer;
 };
 
+/**
+ * struct fw_cdev_get_cycle_timer2 - read cycle timer register
+ * @tv_sec:       system time, seconds
+ * @tv_nsec:      system time, sub-seconds part in nanoseconds
+ * @clk_id:       input parameter, clock from which to get the system time
+ * @cycle_timer:  Cycle Time register contents
+ *
+ * The %FW_CDEV_IOC_GET_CYCLE_TIMER2 works like
+ * %FW_CDEV_IOC_GET_CYCLE_TIMER but lets you choose a clock like with POSIX'
+ * clock_gettime function.  Supported @clk_id values are POSIX' %CLOCK_REALTIME
+ * and %CLOCK_MONOTONIC and Linux' %CLOCK_MONOTONIC_RAW.
+ */
+struct fw_cdev_get_cycle_timer2 {
+	__s64 tv_sec;
+	__s32 tv_nsec;
+	__s32 clk_id;
+	__u32 cycle_timer;
+};
+
 /**
  * struct fw_cdev_allocate_iso_resource - (De)allocate a channel or bandwidth
  * @closure:	Passed back to userspace in correponding iso resource events
-- 
cgit v1.2.3


From e94b6d7736107c07b1b089797651d02994d268c7 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 21 Feb 2010 12:48:57 +0100
Subject: firewire: cdev: increment ABI version number

so that clients can detect whether the FW_CDEV_IOC_GET_CYCLE_TIMER ioctl
is reliable (on all tested controllers, especially the widely used VIA
controllers, also NEC controllers, see commits b677532b and 1c1517ef).

Also add a comment on the 2.6.32 iso xmit enhancement and on dual-buffer
IR having been disabled in 2.6.33.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 include/linux/firewire-cdev.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index baa8290c8416..40b11013408e 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -256,8 +256,12 @@ union fw_cdev_event {
  *  1  (2.6.22)  - initial version
  *  2  (2.6.30)  - changed &fw_cdev_event_iso_interrupt.header if
  *                 &fw_cdev_create_iso_context.header_size is 8 or more
+ *     (2.6.32)  - added time stamp to xmit &fw_cdev_event_iso_interrupt
+ *     (2.6.33)  - IR has always packet-per-buffer semantics now, not one of
+ *                 dual-buffer or packet-per-buffer depending on hardware
+ *  3  (2.6.34)  - made &fw_cdev_get_cycle_timer reliable
  */
-#define FW_CDEV_VERSION 2
+#define FW_CDEV_VERSION 3
 
 /**
  * struct fw_cdev_get_info - General purpose information ioctl
@@ -556,6 +560,9 @@ struct fw_cdev_stop_iso {
  * @cycle_timer consists of 7 bits cycleSeconds, 13 bits cycleCount, and
  * 12 bits cycleOffset, in host byte order.  Cf. the Cycle Time register
  * per IEEE 1394 or Isochronous Cycle Timer register per OHCI-1394.
+ *
+ * In version 1 and 2 of the ABI, this ioctl returned unreliable (non-
+ * monotonic) @cycle_timer values on certain controllers.
  */
 struct fw_cdev_get_cycle_timer {
 	__u64 local_time;
-- 
cgit v1.2.3


From 6498ba04aee69540f8f586438f90d58e5b8e6936 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 21 Feb 2010 17:57:05 +0100
Subject: firewire: ohci: remove unused dualbuffer IR code

This code was no longer used since 2.6.33, "firewire: ohci: always use
packet-per-buffer mode for isochronous reception" commit 090699c0.  If
anybody needs this code in the future for special purposes, it can be
brought back in.  But it must not be re-enabled by default; drivers
(kernelspace or userspace drivers) should only get this mode if they
explicitly request it.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/ohci.c | 184 +-----------------------------------------------
 include/linux/pci_ids.h |   1 -
 2 files changed, 1 insertion(+), 184 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 0f7c4bb978e7..047331e59b31 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -72,20 +72,6 @@ struct descriptor {
 	__le16 transfer_status;
 } __attribute__((aligned(16)));
 
-struct db_descriptor {
-	__le16 first_size;
-	__le16 control;
-	__le16 second_req_count;
-	__le16 first_req_count;
-	__le32 branch_address;
-	__le16 second_res_count;
-	__le16 first_res_count;
-	__le32 reserved0;
-	__le32 first_buffer;
-	__le32 second_buffer;
-	__le32 reserved1;
-} __attribute__((aligned(16)));
-
 #define CONTROL_SET(regs)	(regs)
 #define CONTROL_CLEAR(regs)	((regs) + 4)
 #define COMMAND_PTR(regs)	((regs) + 12)
@@ -187,7 +173,6 @@ struct fw_ohci {
 	int generation;
 	int request_generation;	/* for timestamping incoming requests */
 
-	bool use_dualbuffer;
 	bool old_uninorth;
 	bool bus_reset_packet_quirk;
 	bool iso_cycle_timer_quirk;
@@ -1863,52 +1848,6 @@ static void copy_iso_headers(struct iso_context *ctx, void *p)
 	ctx->header_length += ctx->base.header_size;
 }
 
-static int handle_ir_dualbuffer_packet(struct context *context,
-				       struct descriptor *d,
-				       struct descriptor *last)
-{
-	struct iso_context *ctx =
-		container_of(context, struct iso_context, context);
-	struct db_descriptor *db = (struct db_descriptor *) d;
-	__le32 *ir_header;
-	size_t header_length;
-	void *p, *end;
-
-	if (db->first_res_count != 0 && db->second_res_count != 0) {
-		if (ctx->excess_bytes <= le16_to_cpu(db->second_req_count)) {
-			/* This descriptor isn't done yet, stop iteration. */
-			return 0;
-		}
-		ctx->excess_bytes -= le16_to_cpu(db->second_req_count);
-	}
-
-	header_length = le16_to_cpu(db->first_req_count) -
-		le16_to_cpu(db->first_res_count);
-
-	p = db + 1;
-	end = p + header_length;
-	while (p < end) {
-		copy_iso_headers(ctx, p);
-		ctx->excess_bytes +=
-			(le32_to_cpu(*(__le32 *)(p + 4)) >> 16) & 0xffff;
-		p += max(ctx->base.header_size, (size_t)8);
-	}
-
-	ctx->excess_bytes -= le16_to_cpu(db->second_req_count) -
-		le16_to_cpu(db->second_res_count);
-
-	if (le16_to_cpu(db->control) & DESCRIPTOR_IRQ_ALWAYS) {
-		ir_header = (__le32 *) (db + 1);
-		ctx->base.callback(&ctx->base,
-				   le32_to_cpu(ir_header[0]) & 0xffff,
-				   ctx->header_length, ctx->header,
-				   ctx->base.callback_data);
-		ctx->header_length = 0;
-	}
-
-	return 1;
-}
-
 static int handle_ir_packet_per_buffer(struct context *context,
 				       struct descriptor *d,
 				       struct descriptor *last)
@@ -1995,10 +1934,7 @@ static struct fw_iso_context *ohci_allocate_iso_context(struct fw_card *card,
 		channels = &ohci->ir_context_channels;
 		mask = &ohci->ir_context_mask;
 		list = ohci->ir_context_list;
-		if (ohci->use_dualbuffer)
-			callback = handle_ir_dualbuffer_packet;
-		else
-			callback = handle_ir_packet_per_buffer;
+		callback = handle_ir_packet_per_buffer;
 	}
 
 	spin_lock_irqsave(&ohci->lock, flags);
@@ -2061,8 +1997,6 @@ static int ohci_start_iso(struct fw_iso_context *base,
 	} else {
 		index = ctx - ohci->ir_context_list;
 		control = IR_CONTEXT_ISOCH_HEADER;
-		if (ohci->use_dualbuffer)
-			control |= IR_CONTEXT_DUAL_BUFFER_MODE;
 		match = (tags << 28) | (sync << 8) | ctx->base.channel;
 		if (cycle >= 0) {
 			match |= (cycle & 0x07fff) << 12;
@@ -2223,92 +2157,6 @@ static int ohci_queue_iso_transmit(struct fw_iso_context *base,
 	return 0;
 }
 
-static int ohci_queue_iso_receive_dualbuffer(struct fw_iso_context *base,
-					     struct fw_iso_packet *packet,
-					     struct fw_iso_buffer *buffer,
-					     unsigned long payload)
-{
-	struct iso_context *ctx = container_of(base, struct iso_context, base);
-	struct db_descriptor *db = NULL;
-	struct descriptor *d;
-	struct fw_iso_packet *p;
-	dma_addr_t d_bus, page_bus;
-	u32 z, header_z, length, rest;
-	int page, offset, packet_count, header_size;
-
-	/*
-	 * FIXME: Cycle lost behavior should be configurable: lose
-	 * packet, retransmit or terminate..
-	 */
-
-	p = packet;
-	z = 2;
-
-	/*
-	 * The OHCI controller puts the isochronous header and trailer in the
-	 * buffer, so we need at least 8 bytes.
-	 */
-	packet_count = p->header_length / ctx->base.header_size;
-	header_size = packet_count * max(ctx->base.header_size, (size_t)8);
-
-	/* Get header size in number of descriptors. */
-	header_z = DIV_ROUND_UP(header_size, sizeof(*d));
-	page     = payload >> PAGE_SHIFT;
-	offset   = payload & ~PAGE_MASK;
-	rest     = p->payload_length;
-	/*
-	 * The controllers I've tested have not worked correctly when
-	 * second_req_count is zero.  Rather than do something we know won't
-	 * work, return an error
-	 */
-	if (rest == 0)
-		return -EINVAL;
-
-	while (rest > 0) {
-		d = context_get_descriptors(&ctx->context,
-					    z + header_z, &d_bus);
-		if (d == NULL)
-			return -ENOMEM;
-
-		db = (struct db_descriptor *) d;
-		db->control = cpu_to_le16(DESCRIPTOR_STATUS |
-					  DESCRIPTOR_BRANCH_ALWAYS);
-		db->first_size =
-		    cpu_to_le16(max(ctx->base.header_size, (size_t)8));
-		if (p->skip && rest == p->payload_length) {
-			db->control |= cpu_to_le16(DESCRIPTOR_WAIT);
-			db->first_req_count = db->first_size;
-		} else {
-			db->first_req_count = cpu_to_le16(header_size);
-		}
-		db->first_res_count = db->first_req_count;
-		db->first_buffer = cpu_to_le32(d_bus + sizeof(*db));
-
-		if (p->skip && rest == p->payload_length)
-			length = 4;
-		else if (offset + rest < PAGE_SIZE)
-			length = rest;
-		else
-			length = PAGE_SIZE - offset;
-
-		db->second_req_count = cpu_to_le16(length);
-		db->second_res_count = db->second_req_count;
-		page_bus = page_private(buffer->pages[page]);
-		db->second_buffer = cpu_to_le32(page_bus + offset);
-
-		if (p->interrupt && length == rest)
-			db->control |= cpu_to_le16(DESCRIPTOR_IRQ_ALWAYS);
-
-		context_append(&ctx->context, d, z, header_z);
-		offset = (offset + length) & ~PAGE_MASK;
-		rest -= length;
-		if (offset == 0)
-			page++;
-	}
-
-	return 0;
-}
-
 static int ohci_queue_iso_receive_packet_per_buffer(struct fw_iso_context *base,
 					struct fw_iso_packet *packet,
 					struct fw_iso_buffer *buffer,
@@ -2399,9 +2247,6 @@ static int ohci_queue_iso(struct fw_iso_context *base,
 	spin_lock_irqsave(&ctx->context.ohci->lock, flags);
 	if (base->type == FW_ISO_CONTEXT_TRANSMIT)
 		ret = ohci_queue_iso_transmit(base, packet, buffer, payload);
-	else if (ctx->context.ohci->use_dualbuffer)
-		ret = ohci_queue_iso_receive_dualbuffer(base, packet,
-							buffer, payload);
 	else
 		ret = ohci_queue_iso_receive_packet_per_buffer(base, packet,
 							buffer, payload);
@@ -2456,10 +2301,6 @@ static void ohci_pmac_off(struct pci_dev *dev)
 #define ohci_pmac_off(dev)
 #endif /* CONFIG_PPC_PMAC */
 
-#define PCI_VENDOR_ID_AGERE		PCI_VENDOR_ID_ATT
-#define PCI_DEVICE_ID_AGERE_FW643	0x5901
-#define PCI_DEVICE_ID_TI_TSB43AB23	0x8024
-
 static int __devinit pci_probe(struct pci_dev *dev,
 			       const struct pci_device_id *ent)
 {
@@ -2508,29 +2349,6 @@ static int __devinit pci_probe(struct pci_dev *dev,
 	}
 
 	version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff;
-#if 0
-	/* FIXME: make it a context option or remove dual-buffer mode */
-	ohci->use_dualbuffer = version >= OHCI_VERSION_1_1;
-#endif
-
-	/* dual-buffer mode is broken if more than one IR context is active */
-	if (dev->vendor == PCI_VENDOR_ID_AGERE &&
-	    dev->device == PCI_DEVICE_ID_AGERE_FW643)
-		ohci->use_dualbuffer = false;
-
-	/* dual-buffer mode is broken */
-	if (dev->vendor == PCI_VENDOR_ID_RICOH &&
-	    dev->device == PCI_DEVICE_ID_RICOH_R5C832)
-		ohci->use_dualbuffer = false;
-
-/* x86-32 currently doesn't use highmem for dma_alloc_coherent */
-#if !defined(CONFIG_X86_32)
-	/* dual-buffer mode is broken with descriptor addresses above 2G */
-	if (dev->vendor == PCI_VENDOR_ID_TI &&
-	    (dev->device == PCI_DEVICE_ID_TI_TSB43AB22 ||
-	     dev->device == PCI_DEVICE_ID_TI_TSB43AB23))
-		ohci->use_dualbuffer = false;
-#endif
 
 #if defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32)
 	ohci->old_uninorth = dev->vendor == PCI_VENDOR_ID_APPLE &&
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index cca8a044e2b6..a6f80a129ff6 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -770,7 +770,6 @@
 #define PCI_VENDOR_ID_TI		0x104c
 #define PCI_DEVICE_ID_TI_TVP4020	0x3d07
 #define PCI_DEVICE_ID_TI_4450		0x8011
-#define PCI_DEVICE_ID_TI_TSB43AB22	0x8023
 #define PCI_DEVICE_ID_TI_XX21_XX11	0x8031
 #define PCI_DEVICE_ID_TI_XX21_XX11_FM	0x8033
 #define PCI_DEVICE_ID_TI_XX21_XX11_SD	0x8034
-- 
cgit v1.2.3


From 920d706c892e8f8cfff95f46aeb95fc6344f0bd5 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.co.il>
Date: Mon, 8 Feb 2010 11:40:37 +0000
Subject: IB/core: Fix and clean up ib_ud_header_init()

ib_ud_header_init() first clears header and then fills up the various
fields.  Later on, it tests header->immediate_present, which it has
already cleared, so the condition is always false.  Fix this by adding
an immediate_present parameter and setting header->immediate_present
as is done with grh_present.  Also remove unused calculation of
header_len.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/ud_header.c    | 14 ++++----------
 drivers/infiniband/hw/mlx4/qp.c        |  2 +-
 drivers/infiniband/hw/mthca/mthca_qp.c |  2 +-
 include/rdma/ib_pack.h                 |  1 +
 4 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 8ec7876bedcf..650b501eb142 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -181,6 +181,7 @@ static const struct ib_field deth_table[] = {
  * ib_ud_header_init - Initialize UD header structure
  * @payload_bytes:Length of packet payload
  * @grh_present:GRH flag (if non-zero, GRH will be included)
+ * @immediate_present: specify if immediate data should be used
  * @header:Structure to initialize
  *
  * ib_ud_header_init() initializes the lrh.link_version, lrh.link_next_header,
@@ -191,21 +192,13 @@ static const struct ib_field deth_table[] = {
  */
 void ib_ud_header_init(int     		    payload_bytes,
 		       int    		    grh_present,
+		       int		    immediate_present,
 		       struct ib_ud_header *header)
 {
-	int header_len;
 	u16 packet_length;
 
 	memset(header, 0, sizeof *header);
 
-	header_len =
-		IB_LRH_BYTES  +
-		IB_BTH_BYTES  +
-		IB_DETH_BYTES;
-	if (grh_present) {
-		header_len += IB_GRH_BYTES;
-	}
-
 	header->lrh.link_version     = 0;
 	header->lrh.link_next_header =
 		grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL;
@@ -231,7 +224,8 @@ void ib_ud_header_init(int     		    payload_bytes,
 
 	header->lrh.packet_length = cpu_to_be16(packet_length);
 
-	if (header->immediate_present)
+	header->immediate_present	     = immediate_present;
+	if (immediate_present)
 		header->bth.opcode           = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
 	else
 		header->bth.opcode           = IB_OPCODE_UD_SEND_ONLY;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 2a97c964b9ef..a1823523d7a2 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1228,7 +1228,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
 	for (i = 0; i < wr->num_sge; ++i)
 		send_size += wr->sg_list[i].length;
 
-	ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header);
+	ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), 0, &sqp->ud_header);
 
 	sqp->ud_header.lrh.service_level   =
 		be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index c10576fa60c1..d2d172e6289c 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1494,7 +1494,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
 	u16 pkey;
 
 	ib_ud_header_init(256, /* assume a MAD */
-			  mthca_ah_grh_present(to_mah(wr->wr.ud.ah)),
+			  mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0,
 			  &sqp->ud_header);
 
 	err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header);
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index d7fc45c4eba9..cbb50f4da3dd 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -232,6 +232,7 @@ void ib_unpack(const struct ib_field        *desc,
 
 void ib_ud_header_init(int     		   payload_bytes,
 		       int    		   grh_present,
+		       int		   immediate_present,
 		       struct ib_ud_header *header);
 
 int ib_ud_header_pack(struct ib_ud_header *header,
-- 
cgit v1.2.3


From 939461d59d6ac4e5142f767d24810c9b4b5caa38 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sun, 14 Feb 2010 07:10:10 +0100
Subject: drm/radeon/kms: add support for square microtiles on r3xx-r5xx
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Marek Olšák <maraeo@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r300.c     | 8 +++++++-
 drivers/gpu/drm/radeon/r300_reg.h | 2 ++
 include/drm/radeon_drm.h          | 1 +
 3 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index dc32cd13a837..4cef90cd74e5 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -705,6 +705,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 			tile_flags |= R300_TXO_MACRO_TILE;
 		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
 			tile_flags |= R300_TXO_MICRO_TILE;
+		else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE)
+			tile_flags |= R300_TXO_MICRO_TILE_SQUARE;
 
 		tmp = idx_value + ((u32)reloc->lobj.gpu_offset);
 		tmp |= tile_flags;
@@ -755,6 +757,8 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 			tile_flags |= R300_COLOR_TILE_ENABLE;
 		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
 			tile_flags |= R300_COLOR_MICROTILE_ENABLE;
+		else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE)
+			tile_flags |= R300_COLOR_MICROTILE_SQUARE_ENABLE;
 
 		tmp = idx_value & ~(0x7 << 16);
 		tmp |= tile_flags;
@@ -826,7 +830,9 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
 			tile_flags |= R300_DEPTHMACROTILE_ENABLE;
 		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
-			tile_flags |= R300_DEPTHMICROTILE_TILED;;
+			tile_flags |= R300_DEPTHMICROTILE_TILED;
+		else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE)
+			tile_flags |= R300_DEPTHMICROTILE_TILED_SQUARE;
 
 		tmp = idx_value & ~(0x7 << 16);
 		tmp |= tile_flags;
diff --git a/drivers/gpu/drm/radeon/r300_reg.h b/drivers/gpu/drm/radeon/r300_reg.h
index 1735a2b69580..1a0d5362cd79 100644
--- a/drivers/gpu/drm/radeon/r300_reg.h
+++ b/drivers/gpu/drm/radeon/r300_reg.h
@@ -952,6 +952,7 @@
 #       define R300_TXO_ENDIAN_HALFDW_SWAP       (3 << 0)
 #       define R300_TXO_MACRO_TILE               (1 << 2)
 #       define R300_TXO_MICRO_TILE               (1 << 3)
+#       define R300_TXO_MICRO_TILE_SQUARE        (2 << 3)
 #       define R300_TXO_OFFSET_MASK              0xffffffe0
 #       define R300_TXO_OFFSET_SHIFT             5
 	/* END: Guess from R200 */
@@ -1360,6 +1361,7 @@
 #       define R300_COLORPITCH_MASK              0x00001FF8 /* GUESS */
 #       define R300_COLOR_TILE_ENABLE            (1 << 16) /* GUESS */
 #       define R300_COLOR_MICROTILE_ENABLE       (1 << 17) /* GUESS */
+#       define R300_COLOR_MICROTILE_SQUARE_ENABLE (2 << 17)
 #       define R300_COLOR_ENDIAN_NO_SWAP         (0 << 18) /* GUESS */
 #       define R300_COLOR_ENDIAN_WORD_SWAP       (1 << 18) /* GUESS */
 #       define R300_COLOR_ENDIAN_DWORD_SWAP      (2 << 18) /* GUESS */
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index 39537f3cf98a..81e614bf2dc3 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -808,6 +808,7 @@ struct drm_radeon_gem_create {
 #define RADEON_TILING_SWAP_32BIT  0x8
 #define RADEON_TILING_SURFACE     0x10 /* this object requires a surface
 					* when mapped - i.e. front buffer */
+#define RADEON_TILING_MICRO_SQUARE 0x20
 
 struct drm_radeon_gem_set_tiling {
 	uint32_t	handle;
-- 
cgit v1.2.3


From a1606a9596e54da90ad6209071b357a4c1b0fa82 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Fri, 12 Feb 2010 10:27:35 +1000
Subject: drm/nouveau: new gem pushbuf interface, bump to 0.0.16

This commit breaks the userspace interface, and requires a new libdrm for
nouveau to operate again.

The multiple GEM_PUSHBUF ioctls that were present in 0.0.15 for
compatibility purposes are now gone, and replaced with the new ioctl which
allows for multiple push buffers to be submitted (necessary for hw index
buffers in the nv50 3d driver) and relocations to be applied on any buffer.

A number of other ioctls (CARD_INIT, GEM_PIN, GEM_UNPIN) that were needed
for userspace modesetting have also been removed.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Francisco Jerez <currojerez@riseup.net>
---
 drivers/gpu/drm/nouveau/nouveau_channel.c |  13 +-
 drivers/gpu/drm/nouveau/nouveau_dma.c     |   4 +-
 drivers/gpu/drm/nouveau/nouveau_dma.h     |   4 +-
 drivers/gpu/drm/nouveau/nouveau_drv.h     |  19 +-
 drivers/gpu/drm/nouveau/nouveau_gem.c     | 479 ++++++++++--------------------
 drivers/gpu/drm/nouveau/nouveau_state.c   |   7 -
 include/drm/nouveau_drm.h                 |  86 ++----
 7 files changed, 208 insertions(+), 404 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index ceb83961b16f..6dfb425cbae9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -385,6 +385,14 @@ nouveau_ioctl_fifo_alloc(struct drm_device *dev, void *data,
 		return ret;
 	init->channel  = chan->id;
 
+	if (chan->dma.ib_max)
+		init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_VRAM |
+					NOUVEAU_GEM_DOMAIN_GART;
+	else if (chan->pushbuf_bo->bo.mem.mem_type == TTM_PL_VRAM)
+		init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_VRAM;
+	else
+		init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_GART;
+
 	init->subchan[0].handle = NvM2MF;
 	if (dev_priv->card_type < NV_50)
 		init->subchan[0].grclass = 0x0039;
@@ -424,7 +432,6 @@ nouveau_ioctl_fifo_free(struct drm_device *dev, void *data,
  ***********************************/
 
 struct drm_ioctl_desc nouveau_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_NOUVEAU_CARD_INIT, nouveau_ioctl_card_init, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GETPARAM, nouveau_ioctl_getparam, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_SETPARAM, nouveau_ioctl_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_CHANNEL_ALLOC, nouveau_ioctl_fifo_alloc, DRM_AUTH),
@@ -434,13 +441,9 @@ struct drm_ioctl_desc nouveau_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GPUOBJ_FREE, nouveau_ioctl_gpuobj_free, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_PUSHBUF_CALL, nouveau_gem_ioctl_pushbuf_call, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_PIN, nouveau_gem_ioctl_pin, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_UNPIN, nouveau_gem_ioctl_unpin, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_PUSHBUF_CALL2, nouveau_gem_ioctl_pushbuf_call2, DRM_AUTH),
 };
 
 int nouveau_max_ioctl = DRM_ARRAY_SIZE(nouveau_ioctls);
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index 679b03c28df1..c8482a108a78 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -179,7 +179,7 @@ READ_GET(struct nouveau_channel *chan, uint32_t *prev_get, uint32_t *timeout)
 
 void
 nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo,
-	      int delta, int dwords)
+	      int delta, int length)
 {
 	struct nouveau_bo *pb = chan->pushbuf_bo;
 	uint64_t offset = bo->bo.offset + delta;
@@ -187,7 +187,7 @@ nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo,
 
 	BUG_ON(chan->dma.ib_free < 1);
 	nouveau_bo_wr32(pb, ip++, lower_32_bits(offset));
-	nouveau_bo_wr32(pb, ip++, upper_32_bits(offset) | dwords << 10);
+	nouveau_bo_wr32(pb, ip++, upper_32_bits(offset) | length << 8);
 
 	chan->dma.ib_put = (chan->dma.ib_put + 1) & chan->dma.ib_max;
 	nvchan_wr32(chan, 0x8c, chan->dma.ib_put);
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h
index da6e16dafa4d..8b05c15866d5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.h
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.h
@@ -32,7 +32,7 @@
 #endif
 
 void nv50_dma_push(struct nouveau_channel *, struct nouveau_bo *,
-		   int delta, int dwords);
+		   int delta, int length);
 
 /*
  * There's a hw race condition where you can't jump to your PUT offset,
@@ -149,7 +149,7 @@ FIRE_RING(struct nouveau_channel *chan)
 
 	if (chan->dma.ib_max) {
 		nv50_dma_push(chan, chan->pushbuf_bo, chan->dma.put << 2,
-			      chan->dma.cur - chan->dma.put);
+			      (chan->dma.cur - chan->dma.put) << 2);
 	} else {
 		WRITE_PUT(chan->dma.cur);
 	}
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index d221044e0793..a33423622860 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -34,7 +34,7 @@
 
 #define DRIVER_MAJOR		0
 #define DRIVER_MINOR		0
-#define DRIVER_PATCHLEVEL	15
+#define DRIVER_PATCHLEVEL	16
 
 #define NOUVEAU_FAMILY   0x0000FFFF
 #define NOUVEAU_FLAGS    0xFFFF0000
@@ -83,6 +83,7 @@ struct nouveau_bo {
 	struct drm_file *reserved_by;
 	struct list_head entry;
 	int pbbo_index;
+	bool validate_mapped;
 
 	struct nouveau_channel *channel;
 
@@ -704,12 +705,6 @@ extern bool nouveau_wait_until(struct drm_device *, uint64_t timeout,
 			       uint32_t reg, uint32_t mask, uint32_t val);
 extern bool nouveau_wait_for_idle(struct drm_device *);
 extern int  nouveau_card_init(struct drm_device *);
-extern int  nouveau_ioctl_card_init(struct drm_device *, void *data,
-				    struct drm_file *);
-extern int  nouveau_ioctl_suspend(struct drm_device *, void *data,
-				  struct drm_file *);
-extern int  nouveau_ioctl_resume(struct drm_device *, void *data,
-				 struct drm_file *);
 
 /* nouveau_mem.c */
 extern int  nouveau_mem_init_heap(struct mem_block **, uint64_t start,
@@ -1160,16 +1155,6 @@ extern int nouveau_gem_ioctl_new(struct drm_device *, void *,
 				 struct drm_file *);
 extern int nouveau_gem_ioctl_pushbuf(struct drm_device *, void *,
 				     struct drm_file *);
-extern int nouveau_gem_ioctl_pushbuf_call(struct drm_device *, void *,
-					  struct drm_file *);
-extern int nouveau_gem_ioctl_pushbuf_call2(struct drm_device *, void *,
-					   struct drm_file *);
-extern int nouveau_gem_ioctl_pin(struct drm_device *, void *,
-				 struct drm_file *);
-extern int nouveau_gem_ioctl_unpin(struct drm_device *, void *,
-				   struct drm_file *);
-extern int nouveau_gem_ioctl_tile(struct drm_device *, void *,
-				  struct drm_file *);
 extern int nouveau_gem_ioctl_cpu_prep(struct drm_device *, void *,
 				      struct drm_file *);
 extern int nouveau_gem_ioctl_cpu_fini(struct drm_device *, void *,
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index df72cd847025..fee959c72f40 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -243,6 +243,11 @@ validate_fini_list(struct list_head *list, struct nouveau_fence *fence)
 			nouveau_fence_unref((void *)&prev_fence);
 		}
 
+		if (unlikely(nvbo->validate_mapped)) {
+			ttm_bo_kunmap(&nvbo->kmap);
+			nvbo->validate_mapped = false;
+		}
+
 		list_del(&nvbo->entry);
 		nvbo->reserved_by = NULL;
 		ttm_bo_unreserve(&nvbo->bo);
@@ -302,11 +307,14 @@ retry:
 			if (ret == -EAGAIN)
 				ret = ttm_bo_wait_unreserved(&nvbo->bo, false);
 			drm_gem_object_unreference(gem);
-			if (ret)
+			if (ret) {
+				NV_ERROR(dev, "fail reserve\n");
 				return ret;
+			}
 			goto retry;
 		}
 
+		b->user_priv = (uint64_t)(unsigned long)nvbo;
 		nvbo->reserved_by = file_priv;
 		nvbo->pbbo_index = i;
 		if ((b->valid_domains & NOUVEAU_GEM_DOMAIN_VRAM) &&
@@ -336,8 +344,10 @@ retry:
 			}
 
 			ret = ttm_bo_wait_cpu(&nvbo->bo, false);
-			if (ret)
+			if (ret) {
+				NV_ERROR(dev, "fail wait_cpu\n");
 				return ret;
+			}
 			goto retry;
 		}
 	}
@@ -351,6 +361,7 @@ validate_list(struct nouveau_channel *chan, struct list_head *list,
 {
 	struct drm_nouveau_gem_pushbuf_bo __user *upbbo =
 				(void __force __user *)(uintptr_t)user_pbbo_ptr;
+	struct drm_device *dev = chan->dev;
 	struct nouveau_bo *nvbo;
 	int ret, relocs = 0;
 
@@ -362,39 +373,46 @@ validate_list(struct nouveau_channel *chan, struct list_head *list,
 			spin_lock(&nvbo->bo.lock);
 			ret = ttm_bo_wait(&nvbo->bo, false, false, false);
 			spin_unlock(&nvbo->bo.lock);
-			if (unlikely(ret))
+			if (unlikely(ret)) {
+				NV_ERROR(dev, "fail wait other chan\n");
 				return ret;
+			}
 		}
 
 		ret = nouveau_gem_set_domain(nvbo->gem, b->read_domains,
 					     b->write_domains,
 					     b->valid_domains);
-		if (unlikely(ret))
+		if (unlikely(ret)) {
+			NV_ERROR(dev, "fail set_domain\n");
 			return ret;
+		}
 
 		nvbo->channel = chan;
 		ret = ttm_bo_validate(&nvbo->bo, &nvbo->placement,
 				      false, false);
 		nvbo->channel = NULL;
-		if (unlikely(ret))
+		if (unlikely(ret)) {
+			NV_ERROR(dev, "fail ttm_validate\n");
 			return ret;
+		}
 
-		if (nvbo->bo.offset == b->presumed_offset &&
+		if (nvbo->bo.offset == b->presumed.offset &&
 		    ((nvbo->bo.mem.mem_type == TTM_PL_VRAM &&
-		      b->presumed_domain & NOUVEAU_GEM_DOMAIN_VRAM) ||
+		      b->presumed.domain & NOUVEAU_GEM_DOMAIN_VRAM) ||
 		     (nvbo->bo.mem.mem_type == TTM_PL_TT &&
-		      b->presumed_domain & NOUVEAU_GEM_DOMAIN_GART)))
+		      b->presumed.domain & NOUVEAU_GEM_DOMAIN_GART)))
 			continue;
 
 		if (nvbo->bo.mem.mem_type == TTM_PL_TT)
-			b->presumed_domain = NOUVEAU_GEM_DOMAIN_GART;
+			b->presumed.domain = NOUVEAU_GEM_DOMAIN_GART;
 		else
-			b->presumed_domain = NOUVEAU_GEM_DOMAIN_VRAM;
-		b->presumed_offset = nvbo->bo.offset;
-		b->presumed_ok = 0;
+			b->presumed.domain = NOUVEAU_GEM_DOMAIN_VRAM;
+		b->presumed.offset = nvbo->bo.offset;
+		b->presumed.valid = 0;
 		relocs++;
 
-		if (DRM_COPY_TO_USER(&upbbo[nvbo->pbbo_index], b, sizeof(*b)))
+		if (DRM_COPY_TO_USER(&upbbo[nvbo->pbbo_index].presumed,
+				     &b->presumed, sizeof(b->presumed)))
 			return -EFAULT;
 	}
 
@@ -408,6 +426,7 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
 			     uint64_t user_buffers, int nr_buffers,
 			     struct validate_op *op, int *apply_relocs)
 {
+	struct drm_device *dev = chan->dev;
 	int ret, relocs = 0;
 
 	INIT_LIST_HEAD(&op->vram_list);
@@ -418,11 +437,14 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
 		return 0;
 
 	ret = validate_init(chan, file_priv, pbbo, nr_buffers, op);
-	if (unlikely(ret))
+	if (unlikely(ret)) {
+		NV_ERROR(dev, "validate_init\n");
 		return ret;
+	}
 
 	ret = validate_list(chan, &op->vram_list, pbbo, user_buffers);
 	if (unlikely(ret < 0)) {
+		NV_ERROR(dev, "validate vram_list\n");
 		validate_fini(op, NULL);
 		return ret;
 	}
@@ -430,6 +452,7 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
 
 	ret = validate_list(chan, &op->gart_list, pbbo, user_buffers);
 	if (unlikely(ret < 0)) {
+		NV_ERROR(dev, "validate gart_list\n");
 		validate_fini(op, NULL);
 		return ret;
 	}
@@ -437,6 +460,7 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
 
 	ret = validate_list(chan, &op->both_list, pbbo, user_buffers);
 	if (unlikely(ret < 0)) {
+		NV_ERROR(dev, "validate both_list\n");
 		validate_fini(op, NULL);
 		return ret;
 	}
@@ -465,59 +489,82 @@ u_memcpya(uint64_t user, unsigned nmemb, unsigned size)
 }
 
 static int
-nouveau_gem_pushbuf_reloc_apply(struct nouveau_channel *chan, int nr_bo,
-				struct drm_nouveau_gem_pushbuf_bo *bo,
-				unsigned nr_relocs, uint64_t ptr_relocs,
-				unsigned nr_dwords, unsigned first_dword,
-				uint32_t *pushbuf, bool is_iomem)
+nouveau_gem_pushbuf_reloc_apply(struct drm_device *dev,
+				struct drm_nouveau_gem_pushbuf *req,
+				struct drm_nouveau_gem_pushbuf_bo *bo)
 {
 	struct drm_nouveau_gem_pushbuf_reloc *reloc = NULL;
-	struct drm_device *dev = chan->dev;
 	int ret = 0;
 	unsigned i;
 
-	reloc = u_memcpya(ptr_relocs, nr_relocs, sizeof(*reloc));
+	reloc = u_memcpya(req->relocs, req->nr_relocs, sizeof(*reloc));
 	if (IS_ERR(reloc))
 		return PTR_ERR(reloc);
 
-	for (i = 0; i < nr_relocs; i++) {
+	for (i = 0; i < req->nr_relocs; i++) {
 		struct drm_nouveau_gem_pushbuf_reloc *r = &reloc[i];
 		struct drm_nouveau_gem_pushbuf_bo *b;
+		struct nouveau_bo *nvbo;
 		uint32_t data;
 
-		if (r->bo_index >= nr_bo || r->reloc_index < first_dword ||
-		    r->reloc_index >= first_dword + nr_dwords) {
-			NV_ERROR(dev, "Bad relocation %d\n", i);
-			NV_ERROR(dev, "  bo: %d max %d\n", r->bo_index, nr_bo);
-			NV_ERROR(dev, "  id: %d max %d\n", r->reloc_index, nr_dwords);
+		if (unlikely(r->bo_index > req->nr_buffers)) {
+			NV_ERROR(dev, "reloc bo index invalid\n");
 			ret = -EINVAL;
 			break;
 		}
 
 		b = &bo[r->bo_index];
-		if (b->presumed_ok)
+		if (b->presumed.valid)
 			continue;
 
+		if (unlikely(r->reloc_bo_index > req->nr_buffers)) {
+			NV_ERROR(dev, "reloc container bo index invalid\n");
+			ret = -EINVAL;
+			break;
+		}
+		nvbo = (void *)(unsigned long)bo[r->reloc_bo_index].user_priv;
+
+		if (unlikely(r->reloc_bo_offset + 4 >
+			     nvbo->bo.mem.num_pages << PAGE_SHIFT)) {
+			NV_ERROR(dev, "reloc outside of bo\n");
+			ret = -EINVAL;
+			break;
+		}
+
+		if (!nvbo->kmap.virtual) {
+			ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages,
+					  &nvbo->kmap);
+			if (ret) {
+				NV_ERROR(dev, "failed kmap for reloc\n");
+				break;
+			}
+			nvbo->validate_mapped = true;
+		}
+
 		if (r->flags & NOUVEAU_GEM_RELOC_LOW)
-			data = b->presumed_offset + r->data;
+			data = b->presumed.offset + r->data;
 		else
 		if (r->flags & NOUVEAU_GEM_RELOC_HIGH)
-			data = (b->presumed_offset + r->data) >> 32;
+			data = (b->presumed.offset + r->data) >> 32;
 		else
 			data = r->data;
 
 		if (r->flags & NOUVEAU_GEM_RELOC_OR) {
-			if (b->presumed_domain == NOUVEAU_GEM_DOMAIN_GART)
+			if (b->presumed.domain == NOUVEAU_GEM_DOMAIN_GART)
 				data |= r->tor;
 			else
 				data |= r->vor;
 		}
 
-		if (is_iomem)
-			iowrite32_native(data, (void __force __iomem *)
-						&pushbuf[r->reloc_index]);
-		else
-			pushbuf[r->reloc_index] = data;
+		spin_lock(&nvbo->bo.lock);
+		ret = ttm_bo_wait(&nvbo->bo, false, false, false);
+		if (ret) {
+			NV_ERROR(dev, "reloc wait_idle failed: %d\n", ret);
+			break;
+		}
+		spin_unlock(&nvbo->bo.lock);
+
+		nouveau_bo_wr32(nvbo, r->reloc_bo_offset >> 2, data);
 	}
 
 	kfree(reloc);
@@ -528,125 +575,50 @@ int
 nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 			  struct drm_file *file_priv)
 {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct drm_nouveau_gem_pushbuf *req = data;
-	struct drm_nouveau_gem_pushbuf_bo *bo = NULL;
+	struct drm_nouveau_gem_pushbuf_push *push;
+	struct drm_nouveau_gem_pushbuf_bo *bo;
 	struct nouveau_channel *chan;
 	struct validate_op op;
-	struct nouveau_fence* fence = 0;
-	uint32_t *pushbuf = NULL;
-	int ret = 0, do_reloc = 0, i;
+	struct nouveau_fence *fence = 0;
+	int i, j, ret = 0, do_reloc = 0;
 
 	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
 	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(req->channel, file_priv, chan);
 
-	if (req->nr_dwords >= chan->dma.max ||
-	    req->nr_buffers > NOUVEAU_GEM_MAX_BUFFERS ||
-	    req->nr_relocs > NOUVEAU_GEM_MAX_RELOCS) {
-		NV_ERROR(dev, "Pushbuf config exceeds limits:\n");
-		NV_ERROR(dev, "  dwords : %d max %d\n", req->nr_dwords,
-			 chan->dma.max - 1);
-		NV_ERROR(dev, "  buffers: %d max %d\n", req->nr_buffers,
-			 NOUVEAU_GEM_MAX_BUFFERS);
-		NV_ERROR(dev, "  relocs : %d max %d\n", req->nr_relocs,
-			 NOUVEAU_GEM_MAX_RELOCS);
-		return -EINVAL;
-	}
-
-	pushbuf = u_memcpya(req->dwords, req->nr_dwords, sizeof(uint32_t));
-	if (IS_ERR(pushbuf))
-		return PTR_ERR(pushbuf);
-
-	bo = u_memcpya(req->buffers, req->nr_buffers, sizeof(*bo));
-	if (IS_ERR(bo)) {
-		kfree(pushbuf);
-		return PTR_ERR(bo);
-	}
-
-	mutex_lock(&dev->struct_mutex);
-
-	/* Validate buffer list */
-	ret = nouveau_gem_pushbuf_validate(chan, file_priv, bo, req->buffers,
-					   req->nr_buffers, &op, &do_reloc);
-	if (ret)
-		goto out;
-
-	/* Apply any relocations that are required */
-	if (do_reloc) {
-		ret = nouveau_gem_pushbuf_reloc_apply(chan, req->nr_buffers,
-						      bo, req->nr_relocs,
-						      req->relocs,
-						      req->nr_dwords, 0,
-						      pushbuf, false);
-		if (ret)
-			goto out;
-	}
-
-	/* Emit push buffer to the hw
-	 */
-	ret = RING_SPACE(chan, req->nr_dwords);
-	if (ret)
-		goto out;
-
-	OUT_RINGp(chan, pushbuf, req->nr_dwords);
+	req->vram_available = dev_priv->fb_aper_free;
+	req->gart_available = dev_priv->gart_info.aper_free;
+	if (unlikely(req->nr_push == 0))
+		goto out_next;
 
-	ret = nouveau_fence_new(chan, &fence, true);
-	if (ret) {
-		NV_ERROR(dev, "error fencing pushbuf: %d\n", ret);
-		WIND_RING(chan);
-		goto out;
+	if (unlikely(req->nr_push > NOUVEAU_GEM_MAX_PUSH)) {
+		NV_ERROR(dev, "pushbuf push count exceeds limit: %d max %d\n",
+			 req->nr_push, NOUVEAU_GEM_MAX_PUSH);
+		return -EINVAL;
 	}
 
-	if (nouveau_gem_pushbuf_sync(chan)) {
-		ret = nouveau_fence_wait(fence, NULL, false, false);
-		if (ret) {
-			for (i = 0; i < req->nr_dwords; i++)
-				NV_ERROR(dev, "0x%08x\n", pushbuf[i]);
-			NV_ERROR(dev, "^^ above push buffer is fail :(\n");
-		}
+	if (unlikely(req->nr_buffers > NOUVEAU_GEM_MAX_BUFFERS)) {
+		NV_ERROR(dev, "pushbuf bo count exceeds limit: %d max %d\n",
+			 req->nr_buffers, NOUVEAU_GEM_MAX_BUFFERS);
+		return -EINVAL;
 	}
 
-out:
-	validate_fini(&op, fence);
-	nouveau_fence_unref((void**)&fence);
-	mutex_unlock(&dev->struct_mutex);
-	kfree(pushbuf);
-	kfree(bo);
-	return ret;
-}
-
-int
-nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data,
-			       struct drm_file *file_priv)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct drm_nouveau_gem_pushbuf_call *req = data;
-	struct drm_nouveau_gem_pushbuf_bo *bo = NULL;
-	struct nouveau_channel *chan;
-	struct drm_gem_object *gem;
-	struct nouveau_bo *pbbo;
-	struct validate_op op;
-	struct nouveau_fence* fence = 0;
-	int i, ret = 0, do_reloc = 0;
-
-	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
-	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(req->channel, file_priv, chan);
-
-	if (unlikely(req->handle == 0))
-		goto out_next;
-
-	if (req->nr_buffers > NOUVEAU_GEM_MAX_BUFFERS ||
-	    req->nr_relocs > NOUVEAU_GEM_MAX_RELOCS) {
-		NV_ERROR(dev, "Pushbuf config exceeds limits:\n");
-		NV_ERROR(dev, "  buffers: %d max %d\n", req->nr_buffers,
-			 NOUVEAU_GEM_MAX_BUFFERS);
-		NV_ERROR(dev, "  relocs : %d max %d\n", req->nr_relocs,
-			 NOUVEAU_GEM_MAX_RELOCS);
+	if (unlikely(req->nr_relocs > NOUVEAU_GEM_MAX_RELOCS)) {
+		NV_ERROR(dev, "pushbuf reloc count exceeds limit: %d max %d\n",
+			 req->nr_relocs, NOUVEAU_GEM_MAX_RELOCS);
 		return -EINVAL;
 	}
 
+	push = u_memcpya(req->push, req->nr_push, sizeof(*push));
+	if (IS_ERR(push))
+		return PTR_ERR(push);
+
 	bo = u_memcpya(req->buffers, req->nr_buffers, sizeof(*bo));
-	if (IS_ERR(bo))
+	if (IS_ERR(bo)) {
+		kfree(push);
 		return PTR_ERR(bo);
+	}
 
 	mutex_lock(&dev->struct_mutex);
 
@@ -658,94 +630,9 @@ nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data,
 		goto out;
 	}
 
-	/* Validate DMA push buffer */
-	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
-	if (!gem) {
-		NV_ERROR(dev, "Unknown pb handle 0x%08x\n", req->handle);
-		ret = -EINVAL;
-		goto out;
-	}
-	pbbo = nouveau_gem_object(gem);
-
-	if ((req->offset & 3) || req->nr_dwords < 2 ||
-	    (unsigned long)req->offset > (unsigned long)pbbo->bo.mem.size ||
-	    (unsigned long)req->nr_dwords >
-	     ((unsigned long)(pbbo->bo.mem.size - req->offset ) >> 2)) {
-		NV_ERROR(dev, "pb call misaligned or out of bounds: "
-			      "%d + %d * 4 > %ld\n",
-			 req->offset, req->nr_dwords, pbbo->bo.mem.size);
-		ret = -EINVAL;
-		drm_gem_object_unreference(gem);
-		goto out;
-	}
-
-	ret = ttm_bo_reserve(&pbbo->bo, false, false, true,
-			     chan->fence.sequence);
-	if (ret) {
-		NV_ERROR(dev, "resv pb: %d\n", ret);
-		drm_gem_object_unreference(gem);
-		goto out;
-	}
-
-	nouveau_bo_placement_set(pbbo, 1 << chan->pushbuf_bo->bo.mem.mem_type);
-	ret = ttm_bo_validate(&pbbo->bo, &pbbo->placement, false, false);
-	if (ret) {
-		NV_ERROR(dev, "validate pb: %d\n", ret);
-		ttm_bo_unreserve(&pbbo->bo);
-		drm_gem_object_unreference(gem);
-		goto out;
-	}
-
-	list_add_tail(&pbbo->entry, &op.both_list);
-
-	/* If presumed return address doesn't match, we need to map the
-	 * push buffer and fix it..
-	 */
-	if (dev_priv->card_type < NV_20) {
-		uint32_t retaddy;
-
-		if (chan->dma.free < 4 + NOUVEAU_DMA_SKIPS) {
-			ret = nouveau_dma_wait(chan, 0, 4 + NOUVEAU_DMA_SKIPS);
-			if (ret) {
-				NV_ERROR(dev, "jmp_space: %d\n", ret);
-				goto out;
-			}
-		}
-
-		retaddy  = chan->pushbuf_base + ((chan->dma.cur + 2) << 2);
-		retaddy |= 0x20000000;
-		if (retaddy != req->suffix0) {
-			req->suffix0 = retaddy;
-			do_reloc = 1;
-		}
-	}
-
 	/* Apply any relocations that are required */
 	if (do_reloc) {
-		void *pbvirt;
-		bool is_iomem;
-		ret = ttm_bo_kmap(&pbbo->bo, 0, pbbo->bo.mem.num_pages,
-				  &pbbo->kmap);
-		if (ret) {
-			NV_ERROR(dev, "kmap pb: %d\n", ret);
-			goto out;
-		}
-
-		pbvirt = ttm_kmap_obj_virtual(&pbbo->kmap, &is_iomem);
-		ret = nouveau_gem_pushbuf_reloc_apply(chan, req->nr_buffers, bo,
-						      req->nr_relocs,
-						      req->relocs,
-						      req->nr_dwords,
-						      req->offset / 4,
-						      pbvirt, is_iomem);
-
-		if (dev_priv->card_type < NV_20) {
-			nouveau_bo_wr32(pbbo,
-					req->offset / 4 + req->nr_dwords - 2,
-					req->suffix0);
-		}
-
-		ttm_bo_kunmap(&pbbo->kmap);
+		ret = nouveau_gem_pushbuf_reloc_apply(dev, req, bo);
 		if (ret) {
 			NV_ERROR(dev, "reloc apply: %d\n", ret);
 			goto out;
@@ -753,36 +640,74 @@ nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data,
 	}
 
 	if (chan->dma.ib_max) {
-		ret = nouveau_dma_wait(chan, 2, 6);
+		ret = nouveau_dma_wait(chan, req->nr_push + 1, 6);
 		if (ret) {
 			NV_INFO(dev, "nv50cal_space: %d\n", ret);
 			goto out;
 		}
 
-		nv50_dma_push(chan, pbbo, req->offset, req->nr_dwords);
+		for (i = 0; i < req->nr_push; i++) {
+			struct nouveau_bo *nvbo = (void *)(unsigned long)
+				bo[push[i].bo_index].user_priv;
+
+			nv50_dma_push(chan, nvbo, push[i].offset,
+				      push[i].length);
+		}
 	} else
 	if (dev_priv->card_type >= NV_20) {
-		ret = RING_SPACE(chan, 2);
+		ret = RING_SPACE(chan, req->nr_push * 2);
 		if (ret) {
 			NV_ERROR(dev, "cal_space: %d\n", ret);
 			goto out;
 		}
-		OUT_RING(chan, ((pbbo->bo.mem.mm_node->start << PAGE_SHIFT) +
-				  req->offset) | 2);
-		OUT_RING(chan, 0);
+
+		for (i = 0; i < req->nr_push; i++) {
+			struct nouveau_bo *nvbo = (void *)(unsigned long)
+				bo[push[i].bo_index].user_priv;
+			struct drm_mm_node *mem = nvbo->bo.mem.mm_node;
+
+			OUT_RING(chan, ((mem->start << PAGE_SHIFT) +
+					push[i].offset) | 2);
+			OUT_RING(chan, 0);
+		}
 	} else {
-		ret = RING_SPACE(chan, 2 + NOUVEAU_DMA_SKIPS);
+		ret = RING_SPACE(chan, req->nr_push * (2 + NOUVEAU_DMA_SKIPS));
 		if (ret) {
 			NV_ERROR(dev, "jmp_space: %d\n", ret);
 			goto out;
 		}
-		OUT_RING(chan, ((pbbo->bo.mem.mm_node->start << PAGE_SHIFT) +
-				  req->offset) | 0x20000000);
-		OUT_RING(chan, 0);
 
-		/* Space the jumps apart with NOPs. */
-		for (i = 0; i < NOUVEAU_DMA_SKIPS; i++)
+		for (i = 0; i < req->nr_push; i++) {
+			struct nouveau_bo *nvbo = (void *)(unsigned long)
+				bo[push[i].bo_index].user_priv;
+			struct drm_mm_node *mem = nvbo->bo.mem.mm_node;
+			uint32_t cmd;
+
+			cmd = chan->pushbuf_base + ((chan->dma.cur + 2) << 2);
+			cmd |= 0x20000000;
+			if (unlikely(cmd != req->suffix0)) {
+				if (!nvbo->kmap.virtual) {
+					ret = ttm_bo_kmap(&nvbo->bo, 0,
+							  nvbo->bo.mem.
+							  num_pages,
+							  &nvbo->kmap);
+					if (ret) {
+						WIND_RING(chan);
+						goto out;
+					}
+					nvbo->validate_mapped = true;
+				}
+
+				nouveau_bo_wr32(nvbo, (push[i].offset +
+						push[i].length - 8) / 4, cmd);
+			}
+
+			OUT_RING(chan, ((mem->start << PAGE_SHIFT) +
+					push[i].offset) | 0x20000000);
 			OUT_RING(chan, 0);
+			for (j = 0; j < NOUVEAU_DMA_SKIPS; j++)
+				OUT_RING(chan, 0);
+		}
 	}
 
 	ret = nouveau_fence_new(chan, &fence, true);
@@ -797,6 +722,7 @@ out:
 	nouveau_fence_unref((void**)&fence);
 	mutex_unlock(&dev->struct_mutex);
 	kfree(bo);
+	kfree(push);
 
 out_next:
 	if (chan->dma.ib_max) {
@@ -815,19 +741,6 @@ out_next:
 	return ret;
 }
 
-int
-nouveau_gem_ioctl_pushbuf_call2(struct drm_device *dev, void *data,
-				struct drm_file *file_priv)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct drm_nouveau_gem_pushbuf_call *req = data;
-
-	req->vram_available = dev_priv->fb_aper_free;
-	req->gart_available = dev_priv->gart_info.aper_free;
-
-	return nouveau_gem_ioctl_pushbuf_call(dev, data, file_priv);
-}
-
 static inline uint32_t
 domain_to_ttm(struct nouveau_bo *nvbo, uint32_t domain)
 {
@@ -841,74 +754,6 @@ domain_to_ttm(struct nouveau_bo *nvbo, uint32_t domain)
 	return flags;
 }
 
-int
-nouveau_gem_ioctl_pin(struct drm_device *dev, void *data,
-		      struct drm_file *file_priv)
-{
-	struct drm_nouveau_gem_pin *req = data;
-	struct drm_gem_object *gem;
-	struct nouveau_bo *nvbo;
-	int ret = 0;
-
-	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
-
-	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
-		NV_ERROR(dev, "pin only allowed without kernel modesetting\n");
-		return -EINVAL;
-	}
-
-	if (!DRM_SUSER(DRM_CURPROC))
-		return -EPERM;
-
-	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
-	if (!gem)
-		return -EINVAL;
-	nvbo = nouveau_gem_object(gem);
-
-	ret = nouveau_bo_pin(nvbo, domain_to_ttm(nvbo, req->domain));
-	if (ret)
-		goto out;
-
-	req->offset = nvbo->bo.offset;
-	if (nvbo->bo.mem.mem_type == TTM_PL_TT)
-		req->domain = NOUVEAU_GEM_DOMAIN_GART;
-	else
-		req->domain = NOUVEAU_GEM_DOMAIN_VRAM;
-
-out:
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(gem);
-	mutex_unlock(&dev->struct_mutex);
-
-	return ret;
-}
-
-int
-nouveau_gem_ioctl_unpin(struct drm_device *dev, void *data,
-			struct drm_file *file_priv)
-{
-	struct drm_nouveau_gem_pin *req = data;
-	struct drm_gem_object *gem;
-	int ret;
-
-	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
-
-	if (drm_core_check_feature(dev, DRIVER_MODESET))
-		return -EINVAL;
-
-	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
-	if (!gem)
-		return -EINVAL;
-
-	ret = nouveau_bo_unpin(nouveau_gem_object(gem));
-
-	mutex_lock(&dev->struct_mutex);
-	drm_gem_object_unreference(gem);
-	mutex_unlock(&dev->struct_mutex);
-
-	return ret;
-}
-
 int
 nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv)
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index ed5ac0b9a0ac..516a8d36cb10 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -777,13 +777,6 @@ int nouveau_unload(struct drm_device *dev)
 	return 0;
 }
 
-int
-nouveau_ioctl_card_init(struct drm_device *dev, void *data,
-			struct drm_file *file_priv)
-{
-	return nouveau_card_init(dev);
-}
-
 int nouveau_ioctl_getparam(struct drm_device *dev, void *data,
 						struct drm_file *file_priv)
 {
diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h
index f745948b61e4..a6a9f4af5ebd 100644
--- a/include/drm/nouveau_drm.h
+++ b/include/drm/nouveau_drm.h
@@ -25,13 +25,14 @@
 #ifndef __NOUVEAU_DRM_H__
 #define __NOUVEAU_DRM_H__
 
-#define NOUVEAU_DRM_HEADER_PATCHLEVEL 15
+#define NOUVEAU_DRM_HEADER_PATCHLEVEL 16
 
 struct drm_nouveau_channel_alloc {
 	uint32_t     fb_ctxdma_handle;
 	uint32_t     tt_ctxdma_handle;
 
 	int          channel;
+	uint32_t     pushbuf_domains;
 
 	/* Notifier memory */
 	uint32_t     notifier_handle;
@@ -109,68 +110,58 @@ struct drm_nouveau_gem_new {
 	uint32_t align;
 };
 
+#define NOUVEAU_GEM_MAX_BUFFERS 1024
+struct drm_nouveau_gem_pushbuf_bo_presumed {
+	uint32_t valid;
+	uint32_t domain;
+	uint64_t offset;
+};
+
 struct drm_nouveau_gem_pushbuf_bo {
 	uint64_t user_priv;
 	uint32_t handle;
 	uint32_t read_domains;
 	uint32_t write_domains;
 	uint32_t valid_domains;
-	uint32_t presumed_ok;
-	uint32_t presumed_domain;
-	uint64_t presumed_offset;
+	struct drm_nouveau_gem_pushbuf_bo_presumed presumed;
 };
 
 #define NOUVEAU_GEM_RELOC_LOW  (1 << 0)
 #define NOUVEAU_GEM_RELOC_HIGH (1 << 1)
 #define NOUVEAU_GEM_RELOC_OR   (1 << 2)
+#define NOUVEAU_GEM_MAX_RELOCS 1024
 struct drm_nouveau_gem_pushbuf_reloc {
+	uint32_t reloc_bo_index;
+	uint32_t reloc_bo_offset;
 	uint32_t bo_index;
-	uint32_t reloc_index;
 	uint32_t flags;
 	uint32_t data;
 	uint32_t vor;
 	uint32_t tor;
 };
 
-#define NOUVEAU_GEM_MAX_BUFFERS 1024
-#define NOUVEAU_GEM_MAX_RELOCS 1024
+#define NOUVEAU_GEM_MAX_PUSH 512
+struct drm_nouveau_gem_pushbuf_push {
+	uint32_t bo_index;
+	uint32_t pad;
+	uint64_t offset;
+	uint64_t length;
+};
 
 struct drm_nouveau_gem_pushbuf {
 	uint32_t channel;
-	uint32_t nr_dwords;
 	uint32_t nr_buffers;
-	uint32_t nr_relocs;
-	uint64_t dwords;
 	uint64_t buffers;
-	uint64_t relocs;
-};
-
-struct drm_nouveau_gem_pushbuf_call {
-	uint32_t channel;
-	uint32_t handle;
-	uint32_t offset;
-	uint32_t nr_buffers;
 	uint32_t nr_relocs;
-	uint32_t nr_dwords;
-	uint64_t buffers;
+	uint32_t nr_push;
 	uint64_t relocs;
+	uint64_t push;
 	uint32_t suffix0;
 	uint32_t suffix1;
-	/* below only accessed for CALL2 */
 	uint64_t vram_available;
 	uint64_t gart_available;
 };
 
-struct drm_nouveau_gem_pin {
-	uint32_t handle;
-	uint32_t domain;
-	uint64_t offset;
-};
-
-struct drm_nouveau_gem_unpin {
-	uint32_t handle;
-};
-
 #define NOUVEAU_GEM_CPU_PREP_NOWAIT                                  0x00000001
 #define NOUVEAU_GEM_CPU_PREP_NOBLOCK                                 0x00000002
 #define NOUVEAU_GEM_CPU_PREP_WRITE                                   0x00000004
@@ -183,14 +174,6 @@ struct drm_nouveau_gem_cpu_fini {
 	uint32_t handle;
 };
 
-struct drm_nouveau_gem_tile {
-	uint32_t handle;
-	uint32_t offset;
-	uint32_t size;
-	uint32_t tile_mode;
-	uint32_t tile_flags;
-};
-
 enum nouveau_bus_type {
 	NV_AGP     = 0,
 	NV_PCI     = 1,
@@ -200,22 +183,17 @@ enum nouveau_bus_type {
 struct drm_nouveau_sarea {
 };
 
-#define DRM_NOUVEAU_CARD_INIT          0x00
-#define DRM_NOUVEAU_GETPARAM           0x01
-#define DRM_NOUVEAU_SETPARAM           0x02
-#define DRM_NOUVEAU_CHANNEL_ALLOC      0x03
-#define DRM_NOUVEAU_CHANNEL_FREE       0x04
-#define DRM_NOUVEAU_GROBJ_ALLOC        0x05
-#define DRM_NOUVEAU_NOTIFIEROBJ_ALLOC  0x06
-#define DRM_NOUVEAU_GPUOBJ_FREE        0x07
+#define DRM_NOUVEAU_GETPARAM           0x00
+#define DRM_NOUVEAU_SETPARAM           0x01
+#define DRM_NOUVEAU_CHANNEL_ALLOC      0x02
+#define DRM_NOUVEAU_CHANNEL_FREE       0x03
+#define DRM_NOUVEAU_GROBJ_ALLOC        0x04
+#define DRM_NOUVEAU_NOTIFIEROBJ_ALLOC  0x05
+#define DRM_NOUVEAU_GPUOBJ_FREE        0x06
 #define DRM_NOUVEAU_GEM_NEW            0x40
 #define DRM_NOUVEAU_GEM_PUSHBUF        0x41
-#define DRM_NOUVEAU_GEM_PUSHBUF_CALL   0x42
-#define DRM_NOUVEAU_GEM_PIN            0x43 /* !KMS only */
-#define DRM_NOUVEAU_GEM_UNPIN          0x44 /* !KMS only */
-#define DRM_NOUVEAU_GEM_CPU_PREP       0x45
-#define DRM_NOUVEAU_GEM_CPU_FINI       0x46
-#define DRM_NOUVEAU_GEM_INFO           0x47
-#define DRM_NOUVEAU_GEM_PUSHBUF_CALL2  0x48
+#define DRM_NOUVEAU_GEM_CPU_PREP       0x42
+#define DRM_NOUVEAU_GEM_CPU_FINI       0x43
+#define DRM_NOUVEAU_GEM_INFO           0x44
 
 #endif /* __NOUVEAU_DRM_H__ */
-- 
cgit v1.2.3


From 3e58974027b04e84f68b964ef368a6cd758e2f84 Mon Sep 17 00:00:00 2001
From: Nikanth Karthikesan <knikanth@suse.de>
Date: Thu, 25 Feb 2010 14:44:56 +0530
Subject: doc: fix typo in comment explaining rb_tree usage

Fix typo in comment explaining rb_tree usage.
s/int/in

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/rbtree.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 9c295411d01f..5210a5c60877 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -25,10 +25,10 @@
 
   Some example of insert and search follows here. The search is a plain
   normal search over an ordered tree. The insert instead must be implemented
-  int two steps: as first thing the code must insert the element in
-  order as a red leaf in the tree, then the support library function
-  rb_insert_color() must be called. Such function will do the
-  not trivial work to rebalance the rbtree if necessary.
+  in two steps: First, the code must insert the element in order as a red leaf
+  in the tree, and then the support library function rb_insert_color() must
+  be called. Such function will do the not trivial work to rebalance the
+  rbtree, if necessary.
 
 -----------------------------------------------------------------------
 static inline struct page * rb_search_page_cache(struct inode * inode,
-- 
cgit v1.2.3


From afd66255b9a48f5851326ddae50e2203fbf71dc9 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Thu, 25 Feb 2010 08:34:07 -0500
Subject: kprobes: Introduce kprobes jump optimization

Introduce kprobes jump optimization arch-independent parts.
Kprobes uses breakpoint instruction for interrupting execution
flow, on some architectures, it can be replaced by a jump
instruction and interruption emulation code. This gains kprobs'
performance drastically.

To enable this feature, set CONFIG_OPTPROBES=y (default y if the
arch supports OPTPROBE).

Changes in v9:
 - Fix a bug to optimize probe when enabling.
 - Check nearby probes can be optimize/unoptimize when disarming/arming
   kprobes, instead of registering/unregistering. This will help
   kprobe-tracer because most of probes on it are usually disabled.

Changes in v6:
 - Cleanup coding style for readability.
 - Add comments around get/put_online_cpus().

Changes in v5:
 - Use get_online_cpus()/put_online_cpus() for avoiding text_mutex
   deadlock.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Anders Kaseorg <andersk@ksplice.com>
Cc: Tim Abbott <tabbott@ksplice.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Mathieu Desnoyers <compudj@krystal.dyndns.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
LKML-Reference: <20100225133407.6725.81992.stgit@localhost6.localdomain6>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/Kconfig            |  13 ++
 include/linux/kprobes.h |  36 ++++
 kernel/kprobes.c        | 461 ++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 459 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/arch/Kconfig b/arch/Kconfig
index 9d055b4f0585..e0ad3caf16d9 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -57,6 +57,17 @@ config KPROBES
 	  for kernel debugging, non-intrusive instrumentation and testing.
 	  If in doubt, say "N".
 
+config OPTPROBES
+	bool "Kprobes jump optimization support (EXPERIMENTAL)"
+	default y
+	depends on KPROBES
+	depends on !PREEMPT
+	depends on HAVE_OPTPROBES
+	select KALLSYMS_ALL
+	help
+	  This option will allow kprobes to optimize breakpoint to
+	  a jump for reducing its overhead.
+
 config HAVE_EFFICIENT_UNALIGNED_ACCESS
 	bool
 	help
@@ -99,6 +110,8 @@ config HAVE_KPROBES
 config HAVE_KRETPROBES
 	bool
 
+config HAVE_OPTPROBES
+	bool
 #
 # An arch should select this if it provides all these things:
 #
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 1b672f74a32f..aed1f95c582f 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -122,6 +122,11 @@ struct kprobe {
 /* Kprobe status flags */
 #define KPROBE_FLAG_GONE	1 /* breakpoint has already gone */
 #define KPROBE_FLAG_DISABLED	2 /* probe is temporarily disabled */
+#define KPROBE_FLAG_OPTIMIZED	4 /*
+				   * probe is really optimized.
+				   * NOTE:
+				   * this flag is only for optimized_kprobe.
+				   */
 
 /* Has this kprobe gone ? */
 static inline int kprobe_gone(struct kprobe *p)
@@ -134,6 +139,12 @@ static inline int kprobe_disabled(struct kprobe *p)
 {
 	return p->flags & (KPROBE_FLAG_DISABLED | KPROBE_FLAG_GONE);
 }
+
+/* Is this kprobe really running optimized path ? */
+static inline int kprobe_optimized(struct kprobe *p)
+{
+	return p->flags & KPROBE_FLAG_OPTIMIZED;
+}
 /*
  * Special probe type that uses setjmp-longjmp type tricks to resume
  * execution at a specified entry with a matching prototype corresponding
@@ -249,6 +260,31 @@ extern kprobe_opcode_t *get_insn_slot(void);
 extern void free_insn_slot(kprobe_opcode_t *slot, int dirty);
 extern void kprobes_inc_nmissed_count(struct kprobe *p);
 
+#ifdef CONFIG_OPTPROBES
+/*
+ * Internal structure for direct jump optimized probe
+ */
+struct optimized_kprobe {
+	struct kprobe kp;
+	struct list_head list;	/* list for optimizing queue */
+	struct arch_optimized_insn optinsn;
+};
+
+/* Architecture dependent functions for direct jump optimization */
+extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn);
+extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
+extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
+extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
+extern int  arch_optimize_kprobe(struct optimized_kprobe *op);
+extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
+extern kprobe_opcode_t *get_optinsn_slot(void);
+extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
+extern int arch_within_optimized_kprobe(struct optimized_kprobe *op,
+					unsigned long addr);
+
+extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs);
+#endif /* CONFIG_OPTPROBES */
+
 /* Get the kprobe at this addr (if any) - called with preemption disabled */
 struct kprobe *get_kprobe(void *addr);
 void kretprobe_hash_lock(struct task_struct *tsk,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 78105623d739..612af2d61614 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -45,6 +45,7 @@
 #include <linux/kdebug.h>
 #include <linux/memory.h>
 #include <linux/ftrace.h>
+#include <linux/cpu.h>
 
 #include <asm-generic/sections.h>
 #include <asm/cacheflush.h>
@@ -280,6 +281,33 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
 	__free_insn_slot(&kprobe_insn_slots, slot, dirty);
 	mutex_unlock(&kprobe_insn_mutex);
 }
+#ifdef CONFIG_OPTPROBES
+/* For optimized_kprobe buffer */
+static DEFINE_MUTEX(kprobe_optinsn_mutex); /* Protects kprobe_optinsn_slots */
+static struct kprobe_insn_cache kprobe_optinsn_slots = {
+	.pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
+	/* .insn_size is initialized later */
+	.nr_garbage = 0,
+};
+/* Get a slot for optimized_kprobe buffer */
+kprobe_opcode_t __kprobes *get_optinsn_slot(void)
+{
+	kprobe_opcode_t *ret = NULL;
+
+	mutex_lock(&kprobe_optinsn_mutex);
+	ret = __get_insn_slot(&kprobe_optinsn_slots);
+	mutex_unlock(&kprobe_optinsn_mutex);
+
+	return ret;
+}
+
+void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty)
+{
+	mutex_lock(&kprobe_optinsn_mutex);
+	__free_insn_slot(&kprobe_optinsn_slots, slot, dirty);
+	mutex_unlock(&kprobe_optinsn_mutex);
+}
+#endif
 #endif
 
 /* We have preemption disabled.. so it is safe to use __ versions */
@@ -310,23 +338,324 @@ struct kprobe __kprobes *get_kprobe(void *addr)
 		if (p->addr == addr)
 			return p;
 	}
+
 	return NULL;
 }
 
+static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
+
+/* Return true if the kprobe is an aggregator */
+static inline int kprobe_aggrprobe(struct kprobe *p)
+{
+	return p->pre_handler == aggr_pre_handler;
+}
+
+/*
+ * Keep all fields in the kprobe consistent
+ */
+static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
+{
+	memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
+	memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
+}
+
+#ifdef CONFIG_OPTPROBES
+/*
+ * Call all pre_handler on the list, but ignores its return value.
+ * This must be called from arch-dep optimized caller.
+ */
+void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe *kp;
+
+	list_for_each_entry_rcu(kp, &p->list, list) {
+		if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
+			set_kprobe_instance(kp);
+			kp->pre_handler(kp, regs);
+		}
+		reset_kprobe_instance();
+	}
+}
+
+/* Return true(!0) if the kprobe is ready for optimization. */
+static inline int kprobe_optready(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	if (kprobe_aggrprobe(p)) {
+		op = container_of(p, struct optimized_kprobe, kp);
+		return arch_prepared_optinsn(&op->optinsn);
+	}
+
+	return 0;
+}
+
+/*
+ * Return an optimized kprobe whose optimizing code replaces
+ * instructions including addr (exclude breakpoint).
+ */
+struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
+{
+	int i;
+	struct kprobe *p = NULL;
+	struct optimized_kprobe *op;
+
+	/* Don't check i == 0, since that is a breakpoint case. */
+	for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH; i++)
+		p = get_kprobe((void *)(addr - i));
+
+	if (p && kprobe_optready(p)) {
+		op = container_of(p, struct optimized_kprobe, kp);
+		if (arch_within_optimized_kprobe(op, addr))
+			return p;
+	}
+
+	return NULL;
+}
+
+/* Optimization staging list, protected by kprobe_mutex */
+static LIST_HEAD(optimizing_list);
+
+static void kprobe_optimizer(struct work_struct *work);
+static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
+#define OPTIMIZE_DELAY 5
+
+/* Kprobe jump optimizer */
+static __kprobes void kprobe_optimizer(struct work_struct *work)
+{
+	struct optimized_kprobe *op, *tmp;
+
+	/* Lock modules while optimizing kprobes */
+	mutex_lock(&module_mutex);
+	mutex_lock(&kprobe_mutex);
+	if (kprobes_all_disarmed)
+		goto end;
+
+	/*
+	 * Wait for quiesence period to ensure all running interrupts
+	 * are done. Because optprobe may modify multiple instructions
+	 * there is a chance that Nth instruction is interrupted. In that
+	 * case, running interrupt can return to 2nd-Nth byte of jump
+	 * instruction. This wait is for avoiding it.
+	 */
+	synchronize_sched();
+
+	/*
+	 * The optimization/unoptimization refers online_cpus via
+	 * stop_machine() and cpu-hotplug modifies online_cpus.
+	 * And same time, text_mutex will be held in cpu-hotplug and here.
+	 * This combination can cause a deadlock (cpu-hotplug try to lock
+	 * text_mutex but stop_machine can not be done because online_cpus
+	 * has been changed)
+	 * To avoid this deadlock, we need to call get_online_cpus()
+	 * for preventing cpu-hotplug outside of text_mutex locking.
+	 */
+	get_online_cpus();
+	mutex_lock(&text_mutex);
+	list_for_each_entry_safe(op, tmp, &optimizing_list, list) {
+		WARN_ON(kprobe_disabled(&op->kp));
+		if (arch_optimize_kprobe(op) < 0)
+			op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+		list_del_init(&op->list);
+	}
+	mutex_unlock(&text_mutex);
+	put_online_cpus();
+end:
+	mutex_unlock(&kprobe_mutex);
+	mutex_unlock(&module_mutex);
+}
+
+/* Optimize kprobe if p is ready to be optimized */
+static __kprobes void optimize_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	/* Check if the kprobe is disabled or not ready for optimization. */
+	if (!kprobe_optready(p) ||
+	    (kprobe_disabled(p) || kprobes_all_disarmed))
+		return;
+
+	/* Both of break_handler and post_handler are not supported. */
+	if (p->break_handler || p->post_handler)
+		return;
+
+	op = container_of(p, struct optimized_kprobe, kp);
+
+	/* Check there is no other kprobes at the optimized instructions */
+	if (arch_check_optimized_kprobe(op) < 0)
+		return;
+
+	/* Check if it is already optimized. */
+	if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
+		return;
+
+	op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
+	list_add(&op->list, &optimizing_list);
+	if (!delayed_work_pending(&optimizing_work))
+		schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
+}
+
+/* Unoptimize a kprobe if p is optimized */
+static __kprobes void unoptimize_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) {
+		op = container_of(p, struct optimized_kprobe, kp);
+		if (!list_empty(&op->list))
+			/* Dequeue from the optimization queue */
+			list_del_init(&op->list);
+		else
+			/* Replace jump with break */
+			arch_unoptimize_kprobe(op);
+		op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+	}
+}
+
+/* Remove optimized instructions */
+static void __kprobes kill_optimized_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	op = container_of(p, struct optimized_kprobe, kp);
+	if (!list_empty(&op->list)) {
+		/* Dequeue from the optimization queue */
+		list_del_init(&op->list);
+		op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+	}
+	/* Don't unoptimize, because the target code will be freed. */
+	arch_remove_optimized_kprobe(op);
+}
+
+/* Try to prepare optimized instructions */
+static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	op = container_of(p, struct optimized_kprobe, kp);
+	arch_prepare_optimized_kprobe(op);
+}
+
+/* Free optimized instructions and optimized_kprobe */
+static __kprobes void free_aggr_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	op = container_of(p, struct optimized_kprobe, kp);
+	arch_remove_optimized_kprobe(op);
+	kfree(op);
+}
+
+/* Allocate new optimized_kprobe and try to prepare optimized instructions */
+static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
+{
+	struct optimized_kprobe *op;
+
+	op = kzalloc(sizeof(struct optimized_kprobe), GFP_KERNEL);
+	if (!op)
+		return NULL;
+
+	INIT_LIST_HEAD(&op->list);
+	op->kp.addr = p->addr;
+	arch_prepare_optimized_kprobe(op);
+
+	return &op->kp;
+}
+
+static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
+
+/*
+ * Prepare an optimized_kprobe and optimize it
+ * NOTE: p must be a normal registered kprobe
+ */
+static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
+{
+	struct kprobe *ap;
+	struct optimized_kprobe *op;
+
+	ap = alloc_aggr_kprobe(p);
+	if (!ap)
+		return;
+
+	op = container_of(ap, struct optimized_kprobe, kp);
+	if (!arch_prepared_optinsn(&op->optinsn)) {
+		/* If failed to setup optimizing, fallback to kprobe */
+		free_aggr_kprobe(ap);
+		return;
+	}
+
+	init_aggr_kprobe(ap, p);
+	optimize_kprobe(ap);
+}
+
+static void __kprobes __arm_kprobe(struct kprobe *p)
+{
+	struct kprobe *old_p;
+
+	/* Check collision with other optimized kprobes */
+	old_p = get_optimized_kprobe((unsigned long)p->addr);
+	if (unlikely(old_p))
+		unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */
+
+	arch_arm_kprobe(p);
+	optimize_kprobe(p);	/* Try to optimize (add kprobe to a list) */
+}
+
+static void __kprobes __disarm_kprobe(struct kprobe *p)
+{
+	struct kprobe *old_p;
+
+	unoptimize_kprobe(p);	/* Try to unoptimize */
+	arch_disarm_kprobe(p);
+
+	/* If another kprobe was blocked, optimize it. */
+	old_p = get_optimized_kprobe((unsigned long)p->addr);
+	if (unlikely(old_p))
+		optimize_kprobe(old_p);
+}
+
+#else /* !CONFIG_OPTPROBES */
+
+#define optimize_kprobe(p)			do {} while (0)
+#define unoptimize_kprobe(p)			do {} while (0)
+#define kill_optimized_kprobe(p)		do {} while (0)
+#define prepare_optimized_kprobe(p)		do {} while (0)
+#define try_to_optimize_kprobe(p)		do {} while (0)
+#define __arm_kprobe(p)				arch_arm_kprobe(p)
+#define __disarm_kprobe(p)			arch_disarm_kprobe(p)
+
+static __kprobes void free_aggr_kprobe(struct kprobe *p)
+{
+	kfree(p);
+}
+
+static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
+{
+	return kzalloc(sizeof(struct kprobe), GFP_KERNEL);
+}
+#endif /* CONFIG_OPTPROBES */
+
 /* Arm a kprobe with text_mutex */
 static void __kprobes arm_kprobe(struct kprobe *kp)
 {
+	/*
+	 * Here, since __arm_kprobe() doesn't use stop_machine(),
+	 * this doesn't cause deadlock on text_mutex. So, we don't
+	 * need get_online_cpus().
+	 */
 	mutex_lock(&text_mutex);
-	arch_arm_kprobe(kp);
+	__arm_kprobe(kp);
 	mutex_unlock(&text_mutex);
 }
 
 /* Disarm a kprobe with text_mutex */
 static void __kprobes disarm_kprobe(struct kprobe *kp)
 {
+	get_online_cpus();	/* For avoiding text_mutex deadlock */
 	mutex_lock(&text_mutex);
-	arch_disarm_kprobe(kp);
+	__disarm_kprobe(kp);
 	mutex_unlock(&text_mutex);
+	put_online_cpus();
 }
 
 /*
@@ -395,7 +724,7 @@ static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
 void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
 {
 	struct kprobe *kp;
-	if (p->pre_handler != aggr_pre_handler) {
+	if (!kprobe_aggrprobe(p)) {
 		p->nmissed++;
 	} else {
 		list_for_each_entry_rcu(kp, &p->list, list)
@@ -518,15 +847,6 @@ static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
 	free_rp_inst(rp);
 }
 
-/*
- * Keep all fields in the kprobe consistent
- */
-static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
-{
-	memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
-	memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
-}
-
 /*
 * Add the new probe to ap->list. Fail if this is the
 * second jprobe at the address - two jprobes can't coexist
@@ -534,6 +854,10 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
 static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
 {
 	BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
+
+	if (p->break_handler || p->post_handler)
+		unoptimize_kprobe(ap);	/* Fall back to normal kprobe */
+
 	if (p->break_handler) {
 		if (ap->break_handler)
 			return -EEXIST;
@@ -548,7 +872,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
 		ap->flags &= ~KPROBE_FLAG_DISABLED;
 		if (!kprobes_all_disarmed)
 			/* Arm the breakpoint again. */
-			arm_kprobe(ap);
+			__arm_kprobe(ap);
 	}
 	return 0;
 }
@@ -557,12 +881,13 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
  * Fill in the required fields of the "manager kprobe". Replace the
  * earlier kprobe in the hlist with the manager kprobe
  */
-static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
+static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
 {
+	/* Copy p's insn slot to ap */
 	copy_kprobe(p, ap);
 	flush_insn_slot(ap);
 	ap->addr = p->addr;
-	ap->flags = p->flags;
+	ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED;
 	ap->pre_handler = aggr_pre_handler;
 	ap->fault_handler = aggr_fault_handler;
 	/* We don't care the kprobe which has gone. */
@@ -572,8 +897,9 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
 		ap->break_handler = aggr_break_handler;
 
 	INIT_LIST_HEAD(&ap->list);
-	list_add_rcu(&p->list, &ap->list);
+	INIT_HLIST_NODE(&ap->hlist);
 
+	list_add_rcu(&p->list, &ap->list);
 	hlist_replace_rcu(&p->hlist, &ap->hlist);
 }
 
@@ -587,12 +913,12 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
 	int ret = 0;
 	struct kprobe *ap = old_p;
 
-	if (old_p->pre_handler != aggr_pre_handler) {
-		/* If old_p is not an aggr_probe, create new aggr_kprobe. */
-		ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
+	if (!kprobe_aggrprobe(old_p)) {
+		/* If old_p is not an aggr_kprobe, create new aggr_kprobe. */
+		ap = alloc_aggr_kprobe(old_p);
 		if (!ap)
 			return -ENOMEM;
-		add_aggr_kprobe(ap, old_p);
+		init_aggr_kprobe(ap, old_p);
 	}
 
 	if (kprobe_gone(ap)) {
@@ -611,6 +937,9 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
 			 */
 			return ret;
 
+		/* Prepare optimized instructions if possible. */
+		prepare_optimized_kprobe(ap);
+
 		/*
 		 * Clear gone flag to prevent allocating new slot again, and
 		 * set disabled flag because it is not armed yet.
@@ -619,6 +948,7 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
 			    | KPROBE_FLAG_DISABLED;
 	}
 
+	/* Copy ap's insn slot to p */
 	copy_kprobe(ap, p);
 	return add_new_kprobe(ap, p);
 }
@@ -769,27 +1099,34 @@ int __kprobes register_kprobe(struct kprobe *p)
 	p->nmissed = 0;
 	INIT_LIST_HEAD(&p->list);
 	mutex_lock(&kprobe_mutex);
+
+	get_online_cpus();	/* For avoiding text_mutex deadlock. */
+	mutex_lock(&text_mutex);
+
 	old_p = get_kprobe(p->addr);
 	if (old_p) {
+		/* Since this may unoptimize old_p, locking text_mutex. */
 		ret = register_aggr_kprobe(old_p, p);
 		goto out;
 	}
 
-	mutex_lock(&text_mutex);
 	ret = arch_prepare_kprobe(p);
 	if (ret)
-		goto out_unlock_text;
+		goto out;
 
 	INIT_HLIST_NODE(&p->hlist);
 	hlist_add_head_rcu(&p->hlist,
 		       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
 
 	if (!kprobes_all_disarmed && !kprobe_disabled(p))
-		arch_arm_kprobe(p);
+		__arm_kprobe(p);
+
+	/* Try to optimize kprobe */
+	try_to_optimize_kprobe(p);
 
-out_unlock_text:
-	mutex_unlock(&text_mutex);
 out:
+	mutex_unlock(&text_mutex);
+	put_online_cpus();
 	mutex_unlock(&kprobe_mutex);
 
 	if (probed_mod)
@@ -811,7 +1148,7 @@ static int __kprobes __unregister_kprobe_top(struct kprobe *p)
 		return -EINVAL;
 
 	if (old_p == p ||
-	    (old_p->pre_handler == aggr_pre_handler &&
+	    (kprobe_aggrprobe(old_p) &&
 	     list_is_singular(&old_p->list))) {
 		/*
 		 * Only probe on the hash list. Disarm only if kprobes are
@@ -819,7 +1156,7 @@ static int __kprobes __unregister_kprobe_top(struct kprobe *p)
 		 * already have been removed. We save on flushing icache.
 		 */
 		if (!kprobes_all_disarmed && !kprobe_disabled(old_p))
-			disarm_kprobe(p);
+			disarm_kprobe(old_p);
 		hlist_del_rcu(&old_p->hlist);
 	} else {
 		if (p->break_handler && !kprobe_gone(p))
@@ -835,8 +1172,13 @@ noclean:
 		list_del_rcu(&p->list);
 		if (!kprobe_disabled(old_p)) {
 			try_to_disable_aggr_kprobe(old_p);
-			if (!kprobes_all_disarmed && kprobe_disabled(old_p))
-				disarm_kprobe(old_p);
+			if (!kprobes_all_disarmed) {
+				if (kprobe_disabled(old_p))
+					disarm_kprobe(old_p);
+				else
+					/* Try to optimize this probe again */
+					optimize_kprobe(old_p);
+			}
 		}
 	}
 	return 0;
@@ -853,7 +1195,7 @@ static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
 		old_p = list_entry(p->list.next, struct kprobe, list);
 		list_del(&p->list);
 		arch_remove_kprobe(old_p);
-		kfree(old_p);
+		free_aggr_kprobe(old_p);
 	}
 }
 
@@ -1149,7 +1491,7 @@ static void __kprobes kill_kprobe(struct kprobe *p)
 	struct kprobe *kp;
 
 	p->flags |= KPROBE_FLAG_GONE;
-	if (p->pre_handler == aggr_pre_handler) {
+	if (kprobe_aggrprobe(p)) {
 		/*
 		 * If this is an aggr_kprobe, we have to list all the
 		 * chained probes and mark them GONE.
@@ -1158,6 +1500,7 @@ static void __kprobes kill_kprobe(struct kprobe *p)
 			kp->flags |= KPROBE_FLAG_GONE;
 		p->post_handler = NULL;
 		p->break_handler = NULL;
+		kill_optimized_kprobe(p);
 	}
 	/*
 	 * Here, we can remove insn_slot safely, because no thread calls
@@ -1267,6 +1610,11 @@ static int __init init_kprobes(void)
 		}
 	}
 
+#if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT)
+	/* Init kprobe_optinsn_slots */
+	kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
+#endif
+
 	/* By default, kprobes are armed */
 	kprobes_all_disarmed = false;
 
@@ -1285,7 +1633,7 @@ static int __init init_kprobes(void)
 
 #ifdef CONFIG_DEBUG_FS
 static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
-		const char *sym, int offset,char *modname)
+		const char *sym, int offset, char *modname, struct kprobe *pp)
 {
 	char *kprobe_type;
 
@@ -1295,19 +1643,21 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
 		kprobe_type = "j";
 	else
 		kprobe_type = "k";
+
 	if (sym)
-		seq_printf(pi, "%p  %s  %s+0x%x  %s %s%s\n",
+		seq_printf(pi, "%p  %s  %s+0x%x  %s ",
 			p->addr, kprobe_type, sym, offset,
-			(modname ? modname : " "),
-			(kprobe_gone(p) ? "[GONE]" : ""),
-			((kprobe_disabled(p) && !kprobe_gone(p)) ?
-			 "[DISABLED]" : ""));
+			(modname ? modname : " "));
 	else
-		seq_printf(pi, "%p  %s  %p %s%s\n",
-			p->addr, kprobe_type, p->addr,
-			(kprobe_gone(p) ? "[GONE]" : ""),
-			((kprobe_disabled(p) && !kprobe_gone(p)) ?
-			 "[DISABLED]" : ""));
+		seq_printf(pi, "%p  %s  %p ",
+			p->addr, kprobe_type, p->addr);
+
+	if (!pp)
+		pp = p;
+	seq_printf(pi, "%s%s%s\n",
+		(kprobe_gone(p) ? "[GONE]" : ""),
+		((kprobe_disabled(p) && !kprobe_gone(p)) ?  "[DISABLED]" : ""),
+		(kprobe_optimized(pp) ? "[OPTIMIZED]" : ""));
 }
 
 static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
@@ -1343,11 +1693,11 @@ static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v)
 	hlist_for_each_entry_rcu(p, node, head, hlist) {
 		sym = kallsyms_lookup((unsigned long)p->addr, NULL,
 					&offset, &modname, namebuf);
-		if (p->pre_handler == aggr_pre_handler) {
+		if (kprobe_aggrprobe(p)) {
 			list_for_each_entry_rcu(kp, &p->list, list)
-				report_probe(pi, kp, sym, offset, modname);
+				report_probe(pi, kp, sym, offset, modname, p);
 		} else
-			report_probe(pi, p, sym, offset, modname);
+			report_probe(pi, p, sym, offset, modname, NULL);
 	}
 	preempt_enable();
 	return 0;
@@ -1425,12 +1775,13 @@ int __kprobes enable_kprobe(struct kprobe *kp)
 		goto out;
 	}
 
-	if (!kprobes_all_disarmed && kprobe_disabled(p))
-		arm_kprobe(p);
-
-	p->flags &= ~KPROBE_FLAG_DISABLED;
 	if (p != kp)
 		kp->flags &= ~KPROBE_FLAG_DISABLED;
+
+	if (!kprobes_all_disarmed && kprobe_disabled(p)) {
+		p->flags &= ~KPROBE_FLAG_DISABLED;
+		arm_kprobe(p);
+	}
 out:
 	mutex_unlock(&kprobe_mutex);
 	return ret;
@@ -1450,12 +1801,13 @@ static void __kprobes arm_all_kprobes(void)
 	if (!kprobes_all_disarmed)
 		goto already_enabled;
 
+	/* Arming kprobes doesn't optimize kprobe itself */
 	mutex_lock(&text_mutex);
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
 		hlist_for_each_entry_rcu(p, node, head, hlist)
 			if (!kprobe_disabled(p))
-				arch_arm_kprobe(p);
+				__arm_kprobe(p);
 	}
 	mutex_unlock(&text_mutex);
 
@@ -1482,16 +1834,23 @@ static void __kprobes disarm_all_kprobes(void)
 
 	kprobes_all_disarmed = true;
 	printk(KERN_INFO "Kprobes globally disabled\n");
+
+	/*
+	 * Here we call get_online_cpus() for avoiding text_mutex deadlock,
+	 * because disarming may also unoptimize kprobes.
+	 */
+	get_online_cpus();
 	mutex_lock(&text_mutex);
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
 		hlist_for_each_entry_rcu(p, node, head, hlist) {
 			if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
-				arch_disarm_kprobe(p);
+				__disarm_kprobe(p);
 		}
 	}
 
 	mutex_unlock(&text_mutex);
+	put_online_cpus();
 	mutex_unlock(&kprobe_mutex);
 	/* Allow all currently running kprobes to complete */
 	synchronize_sched();
-- 
cgit v1.2.3


From b2be84df99ebc93599c69e931a3c4a5105abfabc Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Thu, 25 Feb 2010 08:34:15 -0500
Subject: kprobes: Jump optimization sysctl interface

Add /proc/sys/debug/kprobes-optimization sysctl which enables
and disables kprobes jump optimization on the fly for debugging.

Changes in v7:
 - Remove ctl_name = CTL_UNNUMBERED for upstream compatibility.

Changes in v6:
- Update comments and coding style.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: systemtap <systemtap@sources.redhat.com>
Cc: DLE <dle-develop@lists.sourceforge.net>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Jim Keniston <jkenisto@us.ibm.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Anders Kaseorg <andersk@ksplice.com>
Cc: Tim Abbott <tabbott@ksplice.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Mathieu Desnoyers <compudj@krystal.dyndns.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
LKML-Reference: <20100225133415.6725.8274.stgit@localhost6.localdomain6>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kprobes.h |  8 +++++
 kernel/kprobes.c        | 88 +++++++++++++++++++++++++++++++++++++++++++++++--
 kernel/sysctl.c         | 12 +++++++
 3 files changed, 105 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index aed1f95c582f..e7d1b2e0070d 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -283,6 +283,14 @@ extern int arch_within_optimized_kprobe(struct optimized_kprobe *op,
 					unsigned long addr);
 
 extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs);
+
+#ifdef CONFIG_SYSCTL
+extern int sysctl_kprobes_optimization;
+extern int proc_kprobes_optimization_handler(struct ctl_table *table,
+					     int write, void __user *buffer,
+					     size_t *length, loff_t *ppos);
+#endif
+
 #endif /* CONFIG_OPTPROBES */
 
 /* Get the kprobe at this addr (if any) - called with preemption disabled */
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 612af2d61614..fa034d29cf73 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -42,6 +42,7 @@
 #include <linux/freezer.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <linux/sysctl.h>
 #include <linux/kdebug.h>
 #include <linux/memory.h>
 #include <linux/ftrace.h>
@@ -360,6 +361,9 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
 }
 
 #ifdef CONFIG_OPTPROBES
+/* NOTE: change this value only with kprobe_mutex held */
+static bool kprobes_allow_optimization;
+
 /*
  * Call all pre_handler on the list, but ignores its return value.
  * This must be called from arch-dep optimized caller.
@@ -428,7 +432,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
 	/* Lock modules while optimizing kprobes */
 	mutex_lock(&module_mutex);
 	mutex_lock(&kprobe_mutex);
-	if (kprobes_all_disarmed)
+	if (kprobes_all_disarmed || !kprobes_allow_optimization)
 		goto end;
 
 	/*
@@ -471,7 +475,7 @@ static __kprobes void optimize_kprobe(struct kprobe *p)
 	struct optimized_kprobe *op;
 
 	/* Check if the kprobe is disabled or not ready for optimization. */
-	if (!kprobe_optready(p) ||
+	if (!kprobe_optready(p) || !kprobes_allow_optimization ||
 	    (kprobe_disabled(p) || kprobes_all_disarmed))
 		return;
 
@@ -588,6 +592,80 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
 	optimize_kprobe(ap);
 }
 
+#ifdef CONFIG_SYSCTL
+static void __kprobes optimize_all_kprobes(void)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct kprobe *p;
+	unsigned int i;
+
+	/* If optimization is already allowed, just return */
+	if (kprobes_allow_optimization)
+		return;
+
+	kprobes_allow_optimization = true;
+	mutex_lock(&text_mutex);
+	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
+		head = &kprobe_table[i];
+		hlist_for_each_entry_rcu(p, node, head, hlist)
+			if (!kprobe_disabled(p))
+				optimize_kprobe(p);
+	}
+	mutex_unlock(&text_mutex);
+	printk(KERN_INFO "Kprobes globally optimized\n");
+}
+
+static void __kprobes unoptimize_all_kprobes(void)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct kprobe *p;
+	unsigned int i;
+
+	/* If optimization is already prohibited, just return */
+	if (!kprobes_allow_optimization)
+		return;
+
+	kprobes_allow_optimization = false;
+	printk(KERN_INFO "Kprobes globally unoptimized\n");
+	get_online_cpus();	/* For avoiding text_mutex deadlock */
+	mutex_lock(&text_mutex);
+	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
+		head = &kprobe_table[i];
+		hlist_for_each_entry_rcu(p, node, head, hlist) {
+			if (!kprobe_disabled(p))
+				unoptimize_kprobe(p);
+		}
+	}
+
+	mutex_unlock(&text_mutex);
+	put_online_cpus();
+	/* Allow all currently running kprobes to complete */
+	synchronize_sched();
+}
+
+int sysctl_kprobes_optimization;
+int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
+				      void __user *buffer, size_t *length,
+				      loff_t *ppos)
+{
+	int ret;
+
+	mutex_lock(&kprobe_mutex);
+	sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;
+	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
+
+	if (sysctl_kprobes_optimization)
+		optimize_all_kprobes();
+	else
+		unoptimize_all_kprobes();
+	mutex_unlock(&kprobe_mutex);
+
+	return ret;
+}
+#endif /* CONFIG_SYSCTL */
+
 static void __kprobes __arm_kprobe(struct kprobe *p)
 {
 	struct kprobe *old_p;
@@ -1610,10 +1688,14 @@ static int __init init_kprobes(void)
 		}
 	}
 
-#if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT)
+#if defined(CONFIG_OPTPROBES)
+#if defined(__ARCH_WANT_KPROBES_INSN_SLOT)
 	/* Init kprobe_optinsn_slots */
 	kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
 #endif
+	/* By default, kprobes can be optimized */
+	kprobes_allow_optimization = true;
+#endif
 
 	/* By default, kprobes are armed */
 	kprobes_all_disarmed = false;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8a68b2448468..40d791d616b5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -50,6 +50,7 @@
 #include <linux/ftrace.h>
 #include <linux/slow-work.h>
 #include <linux/perf_event.h>
+#include <linux/kprobes.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -1449,6 +1450,17 @@ static struct ctl_table debug_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+#endif
+#if defined(CONFIG_OPTPROBES)
+	{
+		.procname	= "kprobes-optimization",
+		.data		= &sysctl_kprobes_optimization,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_kprobes_optimization_handler,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 #endif
 	{ }
 };
-- 
cgit v1.2.3


From fb90ef93df654f2678933efbbf864adac0ae490e Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 24 Feb 2010 18:36:53 -0800
Subject: early_res: Add free_early_partial()

To free partial areas in pcpu_setup...

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
LKML-Reference: <4B85E245.5030001@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_percpu.c |  6 +++++
 include/linux/early_res.h      |  1 +
 kernel/early_res.c             | 55 ++++++++++++++++++++++++++++++++++++++++++
 mm/percpu.c                    |  3 ---
 4 files changed, 62 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 35abcb8b00e9..ef6370b00e70 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -137,7 +137,13 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
 {
+#ifdef CONFIG_NO_BOOTMEM
+	u64 start = __pa(ptr);
+	u64 end = start + size;
+	free_early_partial(start, end);
+#else
 	free_bootmem(__pa(ptr), size);
+#endif
 }
 
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/include/linux/early_res.h b/include/linux/early_res.h
index 50f7663bb8b1..29c09f57a13c 100644
--- a/include/linux/early_res.h
+++ b/include/linux/early_res.h
@@ -5,6 +5,7 @@
 extern void reserve_early(u64 start, u64 end, char *name);
 extern void reserve_early_overlap_ok(u64 start, u64 end, char *name);
 extern void free_early(u64 start, u64 end);
+void free_early_partial(u64 start, u64 end);
 extern void early_res_to_bootmem(u64 start, u64 end);
 
 void reserve_early_without_check(u64 start, u64 end, char *name);
diff --git a/kernel/early_res.c b/kernel/early_res.c
index aa5494ac4462..9ab11cd84853 100644
--- a/kernel/early_res.c
+++ b/kernel/early_res.c
@@ -61,6 +61,40 @@ static void __init drop_range(int i)
 	early_res_count--;
 }
 
+static void __init drop_range_partial(int i, u64 start, u64 end)
+{
+	u64 common_start, common_end;
+	u64 old_start, old_end;
+
+	old_start = early_res[i].start;
+	old_end = early_res[i].end;
+	common_start = max(old_start, start);
+	common_end = min(old_end, end);
+
+	/* no overlap ? */
+	if (common_start >= common_end)
+		return;
+
+	if (old_start < common_start) {
+		/* make head segment */
+		early_res[i].end = common_start;
+		if (old_end > common_end) {
+			/* add another for left over on tail */
+			reserve_early_without_check(common_end, old_end,
+					 early_res[i].name);
+		}
+		return;
+	} else {
+		if (old_end > common_end) {
+			/* reuse the entry for tail left */
+			early_res[i].start = common_end;
+			return;
+		}
+		/* all covered */
+		drop_range(i);
+	}
+}
+
 /*
  * Split any existing ranges that:
  *  1) are marked 'overlap_ok', and
@@ -284,6 +318,27 @@ void __init free_early(u64 start, u64 end)
 	drop_range(i);
 }
 
+void __init free_early_partial(u64 start, u64 end)
+{
+	struct early_res *r;
+	int i;
+
+try_next:
+	i = find_overlapped_early(start, end);
+	if (i >= max_early_res)
+		return;
+
+	r = &early_res[i];
+	/* hole ? */
+	if (r->end >= end && r->start <= start) {
+		drop_range_partial(i, start, end);
+		return;
+	}
+
+	drop_range_partial(i, start, end);
+	goto try_next;
+}
+
 #ifdef CONFIG_NO_BOOTMEM
 static void __init subtract_early_res(struct range *range, int az)
 {
diff --git a/mm/percpu.c b/mm/percpu.c
index 841defeeef86..083e7c91e5f6 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1929,10 +1929,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
 			}
 			/* copy and return the unused part */
 			memcpy(ptr, __per_cpu_load, ai->static_size);
-#ifndef CONFIG_NO_BOOTMEM
-			/* fix partial free ! */
 			free_fn(ptr + size_sum, ai->unit_size - size_sum);
-#endif
 		}
 	}
 
-- 
cgit v1.2.3


From 4c13dd3b48fcb6fbe44f241eb11a057ecd1cba75 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Fri, 26 Feb 2010 09:36:12 +0300
Subject: failslab: add ability to filter slab caches

This patch allow to inject faults only for specific slabs.
In order to preserve default behavior cache filter is off by
default (all caches are faulty).

One may define specific set of slabs like this:
# mark skbuff_head_cache as faulty
echo 1 > /sys/kernel/slab/skbuff_head_cache/failslab
# Turn on cache filter (off by default)
echo 1 > /sys/kernel/debug/failslab/cache-filter
# Turn on fault injection
echo 1 > /sys/kernel/debug/failslab/times
echo 1 > /sys/kernel/debug/failslab/probability

Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Akinobu Mita <akinobu.mita@gmail.com>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 Documentation/vm/slub.txt    |  1 +
 include/linux/fault-inject.h |  5 +++--
 include/linux/slab.h         |  5 +++++
 mm/failslab.c                | 18 +++++++++++++++---
 mm/slab.c                    |  2 +-
 mm/slub.c                    | 29 +++++++++++++++++++++++++++--
 6 files changed, 52 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt
index b37300edf27c..07375e73981a 100644
--- a/Documentation/vm/slub.txt
+++ b/Documentation/vm/slub.txt
@@ -41,6 +41,7 @@ Possible debug options are
 	P		Poisoning (object and padding)
 	U		User tracking (free and alloc)
 	T		Trace (please only use on single slabs)
+	A		Toggle failslab filter mark for the cache
 	O		Switch debugging off for caches that would have
 			caused higher minimum slab orders
 	-		Switch all debugging off (useful if the kernel is
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 06ca9b21dad2..7b64ad40e4ce 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -82,9 +82,10 @@ static inline void cleanup_fault_attr_dentries(struct fault_attr *attr)
 #endif /* CONFIG_FAULT_INJECTION */
 
 #ifdef CONFIG_FAILSLAB
-extern bool should_failslab(size_t size, gfp_t gfpflags);
+extern bool should_failslab(size_t size, gfp_t gfpflags, unsigned long flags);
 #else
-static inline bool should_failslab(size_t size, gfp_t gfpflags)
+static inline bool should_failslab(size_t size, gfp_t gfpflags,
+				unsigned long flags)
 {
 	return false;
 }
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 2da8372519f5..488446289cab 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -70,6 +70,11 @@
 #else
 # define SLAB_NOTRACK		0x00000000UL
 #endif
+#ifdef CONFIG_FAILSLAB
+# define SLAB_FAILSLAB		0x02000000UL	/* Fault injection mark */
+#else
+# define SLAB_FAILSLAB		0x00000000UL
+#endif
 
 /* The following flags affect the page allocator grouping pages by mobility */
 #define SLAB_RECLAIM_ACCOUNT	0x00020000UL		/* Objects are reclaimable */
diff --git a/mm/failslab.c b/mm/failslab.c
index 9339de5f0a91..bb41f98dd8b7 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -1,18 +1,22 @@
 #include <linux/fault-inject.h>
 #include <linux/gfp.h>
+#include <linux/slab.h>
 
 static struct {
 	struct fault_attr attr;
 	u32 ignore_gfp_wait;
+	int cache_filter;
 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
 	struct dentry *ignore_gfp_wait_file;
+	struct dentry *cache_filter_file;
 #endif
 } failslab = {
 	.attr = FAULT_ATTR_INITIALIZER,
 	.ignore_gfp_wait = 1,
+	.cache_filter = 0,
 };
 
-bool should_failslab(size_t size, gfp_t gfpflags)
+bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags)
 {
 	if (gfpflags & __GFP_NOFAIL)
 		return false;
@@ -20,6 +24,9 @@ bool should_failslab(size_t size, gfp_t gfpflags)
         if (failslab.ignore_gfp_wait && (gfpflags & __GFP_WAIT))
 		return false;
 
+	if (failslab.cache_filter && !(cache_flags & SLAB_FAILSLAB))
+		return false;
+
 	return should_fail(&failslab.attr, size);
 }
 
@@ -30,7 +37,6 @@ static int __init setup_failslab(char *str)
 __setup("failslab=", setup_failslab);
 
 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-
 static int __init failslab_debugfs_init(void)
 {
 	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
@@ -46,8 +52,14 @@ static int __init failslab_debugfs_init(void)
 		debugfs_create_bool("ignore-gfp-wait", mode, dir,
 				      &failslab.ignore_gfp_wait);
 
-	if (!failslab.ignore_gfp_wait_file) {
+	failslab.cache_filter_file =
+		debugfs_create_bool("cache-filter", mode, dir,
+				      &failslab.cache_filter);
+
+	if (!failslab.ignore_gfp_wait_file ||
+	    !failslab.cache_filter_file) {
 		err = -ENOMEM;
+		debugfs_remove(failslab.cache_filter_file);
 		debugfs_remove(failslab.ignore_gfp_wait_file);
 		cleanup_fault_attr_dentries(&failslab.attr);
 	}
diff --git a/mm/slab.c b/mm/slab.c
index 7451bdacaf18..33496b704859 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3101,7 +3101,7 @@ static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
 	if (cachep == &cache_cache)
 		return false;
 
-	return should_failslab(obj_size(cachep), flags);
+	return should_failslab(obj_size(cachep), flags, cachep->flags);
 }
 
 static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
diff --git a/mm/slub.c b/mm/slub.c
index 8d71aaf888d7..cab5288736c8 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -151,7 +151,8 @@
  * Set of flags that will prevent slab merging
  */
 #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
-		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE)
+		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
+		SLAB_FAILSLAB)
 
 #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
 		SLAB_CACHE_DMA | SLAB_NOTRACK)
@@ -1020,6 +1021,9 @@ static int __init setup_slub_debug(char *str)
 		case 't':
 			slub_debug |= SLAB_TRACE;
 			break;
+		case 'a':
+			slub_debug |= SLAB_FAILSLAB;
+			break;
 		default:
 			printk(KERN_ERR "slub_debug option '%c' "
 				"unknown. skipped\n", *str);
@@ -1718,7 +1722,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 	lockdep_trace_alloc(gfpflags);
 	might_sleep_if(gfpflags & __GFP_WAIT);
 
-	if (should_failslab(s->objsize, gfpflags))
+	if (should_failslab(s->objsize, gfpflags, s->flags))
 		return NULL;
 
 	local_irq_save(flags);
@@ -4171,6 +4175,23 @@ static ssize_t trace_store(struct kmem_cache *s, const char *buf,
 }
 SLAB_ATTR(trace);
 
+#ifdef CONFIG_FAILSLAB
+static ssize_t failslab_show(struct kmem_cache *s, char *buf)
+{
+	return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
+}
+
+static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
+							size_t length)
+{
+	s->flags &= ~SLAB_FAILSLAB;
+	if (buf[0] == '1')
+		s->flags |= SLAB_FAILSLAB;
+	return length;
+}
+SLAB_ATTR(failslab);
+#endif
+
 static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
 {
 	return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
@@ -4467,6 +4488,10 @@ static struct attribute *slab_attrs[] = {
 	&deactivate_remote_frees_attr.attr,
 	&order_fallback_attr.attr,
 #endif
+#ifdef CONFIG_FAILSLAB
+	&failslab_attr.attr,
+#endif
+
 	NULL
 };
 
-- 
cgit v1.2.3


From bad720ff3e8e47a04bd88d9bbc8317e7d7e049d3 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 22 Oct 2009 16:11:14 -0700
Subject: drm/i915: Add initial bits for VGA modesetting bringup on
 Sandybridge.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c    |  2 +-
 drivers/gpu/drm/i915/i915_dma.c        | 16 +++++++---
 drivers/gpu/drm/i915/i915_drv.h        | 26 +++++++++++++++-
 drivers/gpu/drm/i915/i915_gem.c        |  2 +-
 drivers/gpu/drm/i915/i915_gem_tiling.c |  2 +-
 drivers/gpu/drm/i915/i915_irq.c        | 18 +++++------
 drivers/gpu/drm/i915/intel_bios.c      |  3 +-
 drivers/gpu/drm/i915/intel_crt.c       | 14 ++++-----
 drivers/gpu/drm/i915/intel_display.c   | 56 +++++++++++++++++-----------------
 drivers/gpu/drm/i915/intel_lvds.c      |  2 +-
 drivers/gpu/drm/i915/intel_overlay.c   |  2 +-
 include/drm/drm_pciids.h               |  1 +
 12 files changed, 88 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5eed46312442..1376dfe44c95 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -162,7 +162,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 	struct drm_device *dev = node->minor->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
-	if (!IS_IRONLAKE(dev)) {
+	if (!HAS_PCH_SPLIT(dev)) {
 		seq_printf(m, "Interrupt enable:    %08x\n",
 			   I915_READ(IER));
 		seq_printf(m, "Interrupt identity:  %08x\n",
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index dbfe07c90cbc..7bfded5e90bb 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1094,15 +1094,21 @@ static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size,
 	 * Some of the preallocated space is taken by the GTT
 	 * and popup.  GTT is 1K per MB of aperture size, and popup is 4K.
 	 */
-	if (IS_G4X(dev) || IS_PINEVIEW(dev) || IS_IRONLAKE(dev))
+	if (IS_G4X(dev) || IS_PINEVIEW(dev) || IS_IRONLAKE(dev) || IS_GEN6(dev))
 		overhead = 4096;
 	else
 		overhead = (*aperture_size / 1024) + 4096;
 
 	switch (tmp & INTEL_GMCH_GMS_MASK) {
 	case INTEL_855_GMCH_GMS_DISABLED:
-		DRM_ERROR("video memory is disabled\n");
-		return -1;
+		/* XXX: This is what my A1 silicon has. */
+		if (IS_GEN6(dev)) {
+			stolen = 64 * 1024 * 1024;
+		} else {
+			DRM_ERROR("video memory is disabled\n");
+			return -1;
+		}
+		break;
 	case INTEL_855_GMCH_GMS_STOLEN_1M:
 		stolen = 1 * 1024 * 1024;
 		break;
@@ -1180,7 +1186,7 @@ static unsigned long i915_gtt_to_phys(struct drm_device *dev,
 	int gtt_offset, gtt_size;
 
 	if (IS_I965G(dev)) {
-		if (IS_G4X(dev) || IS_IRONLAKE(dev)) {
+		if (IS_G4X(dev) || IS_IRONLAKE(dev) || IS_GEN6(dev)) {
 			gtt_offset = 2*1024*1024;
 			gtt_size = 2*1024*1024;
 		} else {
@@ -1563,7 +1569,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 
 	dev->driver->get_vblank_counter = i915_get_vblank_counter;
 	dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */
-	if (IS_G4X(dev) || IS_IRONLAKE(dev)) {
+	if (IS_G4X(dev) || IS_IRONLAKE(dev) || IS_GEN6(dev)) {
 		dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */
 		dev->driver->get_vblank_counter = gm45_get_vblank_counter;
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ec06d4865a5f..f97592609da4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1065,7 +1065,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
 #define IS_845G(dev)		((dev)->pci_device == 0x2562)
 #define IS_I85X(dev)		((dev)->pci_device == 0x3582)
 #define IS_I865G(dev)		((dev)->pci_device == 0x2572)
-#define IS_I8XX(dev)		(INTEL_INFO(dev)->is_i8xx)
+#define IS_GEN2(dev)		(INTEL_INFO(dev)->is_i8xx)
 #define IS_I915G(dev)		(INTEL_INFO(dev)->is_i915g)
 #define IS_I915GM(dev)		((dev)->pci_device == 0x2592)
 #define IS_I945G(dev)		((dev)->pci_device == 0x2772)
@@ -1084,8 +1084,29 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
 #define IS_I9XX(dev)		(INTEL_INFO(dev)->is_i9xx)
 #define IS_MOBILE(dev)		(INTEL_INFO(dev)->is_mobile)
 
+#define IS_GEN3(dev)	(IS_I915G(dev) ||			\
+			 IS_I915GM(dev) ||			\
+			 IS_I945G(dev) ||			\
+			 IS_I945GM(dev) ||			\
+			 IS_G33(dev) || \
+			 IS_PINEVIEW(dev))
+#define IS_GEN4(dev)	((dev)->pci_device == 0x2972 ||		\
+			 (dev)->pci_device == 0x2982 ||		\
+			 (dev)->pci_device == 0x2992 ||		\
+			 (dev)->pci_device == 0x29A2 ||		\
+			 (dev)->pci_device == 0x2A02 ||		\
+			 (dev)->pci_device == 0x2A12 ||		\
+			 (dev)->pci_device == 0x2E02 ||		\
+			 (dev)->pci_device == 0x2E12 ||		\
+			 (dev)->pci_device == 0x2E22 ||		\
+			 (dev)->pci_device == 0x2E32 ||		\
+			 (dev)->pci_device == 0x2A42 ||		\
+			 (dev)->pci_device == 0x2E42)
+
 #define I915_NEED_GFX_HWS(dev)	(INTEL_INFO(dev)->need_gfx_hws)
 
+#define IS_GEN6(dev)	((dev)->pci_device == 0x0102)
+
 /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
  * rows, which changed the alignment requirements and fence programming.
  */
@@ -1106,6 +1127,9 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
 #define I915_HAS_FBC(dev) (INTEL_INFO(dev)->has_fbc)
 #define I915_HAS_RC6(dev) (INTEL_INFO(dev)->has_rc6)
 
+#define HAS_PCH_SPLIT(dev) (IS_IRONLAKE(dev) ||	\
+			    IS_GEN6(dev))
+
 #define PRIMARY_RINGBUFFER_SIZE         (128*1024)
 
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b5df30ca0fa2..a35dc8c0882b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1818,7 +1818,7 @@ i915_do_wait_request(struct drm_device *dev, uint32_t seqno, int interruptible)
 		return -EIO;
 
 	if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
-		if (IS_IRONLAKE(dev))
+		if (HAS_PCH_SPLIT(dev))
 			ier = I915_READ(DEIER) | I915_READ(GTIER);
 		else
 			ier = I915_READ(IER);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 20653776965a..b5c55d88ff76 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -92,7 +92,7 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 	uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 
-	if (IS_IRONLAKE(dev)) {
+	if (IS_IRONLAKE(dev) || IS_GEN6(dev)) {
 		/* On Ironlake whatever DRAM config, GPU always do
 		 * same swizzling setup.
 		 */
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index ba1d8314c1ce..ef79d9423f0a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -842,7 +842,7 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 
 	atomic_inc(&dev_priv->irq_received);
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		return ironlake_irq_handler(dev);
 
 	iir = I915_READ(IIR);
@@ -1003,7 +1003,7 @@ void i915_user_irq_get(struct drm_device *dev)
 
 	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
 	if (dev->irq_enabled && (++dev_priv->user_irq_refcount == 1)) {
-		if (IS_IRONLAKE(dev))
+		if (HAS_PCH_SPLIT(dev))
 			ironlake_enable_graphics_irq(dev_priv, GT_USER_INTERRUPT);
 		else
 			i915_enable_irq(dev_priv, I915_USER_INTERRUPT);
@@ -1019,7 +1019,7 @@ void i915_user_irq_put(struct drm_device *dev)
 	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
 	BUG_ON(dev->irq_enabled && dev_priv->user_irq_refcount <= 0);
 	if (dev->irq_enabled && (--dev_priv->user_irq_refcount == 0)) {
-		if (IS_IRONLAKE(dev))
+		if (HAS_PCH_SPLIT(dev))
 			ironlake_disable_graphics_irq(dev_priv, GT_USER_INTERRUPT);
 		else
 			i915_disable_irq(dev_priv, I915_USER_INTERRUPT);
@@ -1127,7 +1127,7 @@ int i915_enable_vblank(struct drm_device *dev, int pipe)
 		return -EINVAL;
 
 	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		ironlake_enable_display_irq(dev_priv, (pipe == 0) ? 
 					    DE_PIPEA_VBLANK: DE_PIPEB_VBLANK);
 	else if (IS_I965G(dev))
@@ -1149,7 +1149,7 @@ void i915_disable_vblank(struct drm_device *dev, int pipe)
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		ironlake_disable_display_irq(dev_priv, (pipe == 0) ? 
 					     DE_PIPEA_VBLANK: DE_PIPEB_VBLANK);
 	else
@@ -1163,7 +1163,7 @@ void i915_enable_interrupt (struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (!IS_IRONLAKE(dev))
+	if (!HAS_PCH_SPLIT(dev))
 		opregion_enable_asle(dev);
 	dev_priv->irq_enabled = 1;
 }
@@ -1349,7 +1349,7 @@ void i915_driver_irq_preinstall(struct drm_device * dev)
 	INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func);
 	INIT_WORK(&dev_priv->error_work, i915_error_work_func);
 
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		ironlake_irq_preinstall(dev);
 		return;
 	}
@@ -1381,7 +1381,7 @@ int i915_driver_irq_postinstall(struct drm_device *dev)
 
 	dev_priv->vblank_pipe = DRM_I915_VBLANK_PIPE_A | DRM_I915_VBLANK_PIPE_B;
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		return ironlake_irq_postinstall(dev);
 
 	/* Unmask the interrupts that we always want on. */
@@ -1469,7 +1469,7 @@ void i915_driver_irq_uninstall(struct drm_device * dev)
 
 	dev_priv->vblank_pipe = 0;
 
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		ironlake_irq_uninstall(dev);
 		return;
 	}
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index 15fbc1b5a83e..70c9d4ba7042 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -247,6 +247,7 @@ static void
 parse_general_features(struct drm_i915_private *dev_priv,
 		       struct bdb_header *bdb)
 {
+	struct drm_device *dev = dev_priv->dev;
 	struct bdb_general_features *general;
 
 	/* Set sensible defaults in case we can't find the general block */
@@ -263,7 +264,7 @@ parse_general_features(struct drm_i915_private *dev_priv,
 			if (IS_I85X(dev_priv->dev))
 				dev_priv->lvds_ssc_freq =
 					general->ssc_freq ? 66 : 48;
-			else if (IS_IRONLAKE(dev_priv->dev))
+			else if (IS_IRONLAKE(dev_priv->dev) || IS_GEN6(dev))
 				dev_priv->lvds_ssc_freq =
 					general->ssc_freq ? 100 : 120;
 			else
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 79dd4026586f..fccf07470c8f 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -39,7 +39,7 @@ static void intel_crt_dpms(struct drm_encoder *encoder, int mode)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 temp, reg;
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		reg = PCH_ADPA;
 	else
 		reg = ADPA;
@@ -113,7 +113,7 @@ static void intel_crt_mode_set(struct drm_encoder *encoder,
 	else
 		dpll_md_reg = DPLL_B_MD;
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		adpa_reg = PCH_ADPA;
 	else
 		adpa_reg = ADPA;
@@ -122,7 +122,7 @@ static void intel_crt_mode_set(struct drm_encoder *encoder,
 	 * Disable separate mode multiplier used when cloning SDVO to CRT
 	 * XXX this needs to be adjusted when we really are cloning
 	 */
-	if (IS_I965G(dev) && !IS_IRONLAKE(dev)) {
+	if (IS_I965G(dev) && !HAS_PCH_SPLIT(dev)) {
 		dpll_md = I915_READ(dpll_md_reg);
 		I915_WRITE(dpll_md_reg,
 			   dpll_md & ~DPLL_MD_UDI_MULTIPLIER_MASK);
@@ -136,11 +136,11 @@ static void intel_crt_mode_set(struct drm_encoder *encoder,
 
 	if (intel_crtc->pipe == 0) {
 		adpa |= ADPA_PIPE_A_SELECT;
-		if (!IS_IRONLAKE(dev))
+		if (!HAS_PCH_SPLIT(dev))
 			I915_WRITE(BCLRPAT_A, 0);
 	} else {
 		adpa |= ADPA_PIPE_B_SELECT;
-		if (!IS_IRONLAKE(dev))
+		if (!HAS_PCH_SPLIT(dev))
 			I915_WRITE(BCLRPAT_B, 0);
 	}
 
@@ -202,7 +202,7 @@ static bool intel_crt_detect_hotplug(struct drm_connector *connector)
 	u32 hotplug_en;
 	int i, tries = 0;
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		return intel_ironlake_crt_detect_hotplug(connector);
 
 	/*
@@ -524,7 +524,7 @@ void intel_crt_init(struct drm_device *dev)
 					  &intel_output->enc);
 
 	/* Set up the DDC bus. */
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		i2c_reg = PCH_GPIOA;
 	else {
 		i2c_reg = GPIOA;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 1b5cd833bc70..ce28f18e0852 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -232,7 +232,7 @@ struct intel_limit {
 #define G4X_P2_DISPLAY_PORT_FAST           10
 #define G4X_P2_DISPLAY_PORT_LIMIT          0
 
-/* Ironlake */
+/* Ironlake / Sandybridge */
 /* as we calculate clock using (register_value + 2) for
    N/M1/M2, so here the range value for them is (actual_value-2).
  */
@@ -690,7 +690,7 @@ static const intel_limit_t *intel_limit(struct drm_crtc *crtc)
 	struct drm_device *dev = crtc->dev;
 	const intel_limit_t *limit;
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		limit = intel_ironlake_limit(crtc);
 	else if (IS_G4X(dev)) {
 		limit = intel_g4x_limit(crtc);
@@ -1371,7 +1371,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 			dspcntr &= ~DISPPLANE_TILED;
 	}
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		/* must disable */
 		dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
 
@@ -1432,7 +1432,7 @@ static void i915_disable_vga (struct drm_device *dev)
 	u8 sr1;
 	u32 vga_reg;
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		vga_reg = CPU_VGACNTRL;
 	else
 		vga_reg = VGACNTRL;
@@ -2116,7 +2116,7 @@ static bool intel_crtc_mode_fixup(struct drm_crtc *crtc,
 				  struct drm_display_mode *adjusted_mode)
 {
 	struct drm_device *dev = crtc->dev;
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		/* FDI link clock is fixed at 2.7G */
 		if (mode->clock * 3 > 27000 * 4)
 			return MODE_CLOCK_HIGH;
@@ -2983,7 +2983,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 					refclk / 1000);
 	} else if (IS_I9XX(dev)) {
 		refclk = 96000;
-		if (IS_IRONLAKE(dev))
+		if (HAS_PCH_SPLIT(dev))
 			refclk = 120000; /* 120Mhz refclk */
 	} else {
 		refclk = 48000;
@@ -3041,7 +3041,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 	}
 
 	/* FDI link */
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		int lane, link_bw, bpp;
 		/* eDP doesn't require FDI link, so just set DP M/N
 		   according to current link config */
@@ -3118,7 +3118,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 	 * PCH B stepping, previous chipset stepping should be
 	 * ignoring this setting.
 	 */
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		temp = I915_READ(PCH_DREF_CONTROL);
 		/* Always enable nonspread source */
 		temp &= ~DREF_NONSPREAD_SOURCE_MASK;
@@ -3165,7 +3165,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 				reduced_clock.m2;
 	}
 
-	if (!IS_IRONLAKE(dev))
+	if (!HAS_PCH_SPLIT(dev))
 		dpll = DPLL_VGA_MODE_DIS;
 
 	if (IS_I9XX(dev)) {
@@ -3178,7 +3178,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 			sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
 			if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
 				dpll |= (sdvo_pixel_multiply - 1) << SDVO_MULTIPLIER_SHIFT_HIRES;
-			else if (IS_IRONLAKE(dev))
+			else if (HAS_PCH_SPLIT(dev))
 				dpll |= (sdvo_pixel_multiply - 1) << PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT;
 		}
 		if (is_dp)
@@ -3190,7 +3190,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 		else {
 			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
 			/* also FPA1 */
-			if (IS_IRONLAKE(dev))
+			if (HAS_PCH_SPLIT(dev))
 				dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
 			if (IS_G4X(dev) && has_reduced_clock)
 				dpll |= (1 << (reduced_clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
@@ -3209,7 +3209,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 			dpll |= DPLLB_LVDS_P2_CLOCK_DIV_14;
 			break;
 		}
-		if (IS_I965G(dev) && !IS_IRONLAKE(dev))
+		if (IS_I965G(dev) && !HAS_PCH_SPLIT(dev))
 			dpll |= (6 << PLL_LOAD_PULSE_PHASE_SHIFT);
 	} else {
 		if (is_lvds) {
@@ -3243,7 +3243,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 
 	/* Ironlake's plane is forced to pipe, bit 24 is to
 	   enable color space conversion */
-	if (!IS_IRONLAKE(dev)) {
+	if (!HAS_PCH_SPLIT(dev)) {
 		if (pipe == 0)
 			dspcntr &= ~DISPPLANE_SEL_PIPE_MASK;
 		else
@@ -3270,14 +3270,14 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 
 
 	/* Disable the panel fitter if it was on our pipe */
-	if (!IS_IRONLAKE(dev) && intel_panel_fitter_pipe(dev) == pipe)
+	if (!HAS_PCH_SPLIT(dev) && intel_panel_fitter_pipe(dev) == pipe)
 		I915_WRITE(PFIT_CONTROL, 0);
 
 	DRM_DEBUG_KMS("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B');
 	drm_mode_debug_printmodeline(mode);
 
 	/* assign to Ironlake registers */
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		fp_reg = pch_fp_reg;
 		dpll_reg = pch_dpll_reg;
 	}
@@ -3298,7 +3298,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 	if (is_lvds) {
 		u32 lvds;
 
-		if (IS_IRONLAKE(dev))
+		if (HAS_PCH_SPLIT(dev))
 			lvds_reg = PCH_LVDS;
 
 		lvds = I915_READ(lvds_reg);
@@ -3344,7 +3344,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 		/* Wait for the clocks to stabilize. */
 		udelay(150);
 
-		if (IS_I965G(dev) && !IS_IRONLAKE(dev)) {
+		if (IS_I965G(dev) && !HAS_PCH_SPLIT(dev)) {
 			if (is_sdvo) {
 				sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
 				I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) |
@@ -3391,14 +3391,14 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 	/* pipesrc and dspsize control the size that is scaled from, which should
 	 * always be the user's requested size.
 	 */
-	if (!IS_IRONLAKE(dev)) {
+	if (!HAS_PCH_SPLIT(dev)) {
 		I915_WRITE(dspsize_reg, ((mode->vdisplay - 1) << 16) |
 				(mode->hdisplay - 1));
 		I915_WRITE(dsppos_reg, 0);
 	}
 	I915_WRITE(pipesrc_reg, ((mode->hdisplay - 1) << 16) | (mode->vdisplay - 1));
 
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		I915_WRITE(data_m1_reg, TU_SIZE(m_n.tu) | m_n.gmch_m);
 		I915_WRITE(data_n1_reg, TU_SIZE(m_n.tu) | m_n.gmch_n);
 		I915_WRITE(link_m1_reg, m_n.link_m);
@@ -3419,7 +3419,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 
 	intel_wait_for_vblank(dev);
 
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		/* enable address swizzle for tiling buffer */
 		temp = I915_READ(DISP_ARB_CTL);
 		I915_WRITE(DISP_ARB_CTL, temp | DISP_TILE_SURFACE_SWIZZLING);
@@ -3454,7 +3454,7 @@ void intel_crtc_load_lut(struct drm_crtc *crtc)
 		return;
 
 	/* use legacy palette for Ironlake */
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		palreg = (intel_crtc->pipe == 0) ? LGC_PALETTE_A :
 						   LGC_PALETTE_B;
 
@@ -3937,7 +3937,7 @@ static void intel_increase_pllclock(struct drm_crtc *crtc, bool schedule)
 	int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
 	int dpll = I915_READ(dpll_reg);
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		return;
 
 	if (!dev_priv->lvds_downclock_avail)
@@ -3976,7 +3976,7 @@ static void intel_decrease_pllclock(struct drm_crtc *crtc)
 	int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
 	int dpll = I915_READ(dpll_reg);
 
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		return;
 
 	if (!dev_priv->lvds_downclock_avail)
@@ -4418,7 +4418,7 @@ static void intel_setup_outputs(struct drm_device *dev)
 	if (IS_MOBILE(dev) && !IS_I830(dev))
 		intel_lvds_init(dev);
 
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		int found;
 
 		if (IS_MOBILE(dev) && (I915_READ(DP_A) & DP_DETECTED))
@@ -4487,7 +4487,7 @@ static void intel_setup_outputs(struct drm_device *dev)
 			DRM_DEBUG_KMS("probing DP_D\n");
 			intel_dp_init(dev, DP_D);
 		}
-	} else if (IS_I8XX(dev))
+	} else if (IS_GEN2(dev))
 		intel_dvo_init(dev);
 
 	if (SUPPORTS_TV(dev))
@@ -4716,7 +4716,7 @@ void intel_init_clock_gating(struct drm_device *dev)
 	 * Disable clock gating reported to work incorrectly according to the
 	 * specs, but enable as much else as we can.
 	 */
-	if (IS_IRONLAKE(dev)) {
+	if (HAS_PCH_SPLIT(dev)) {
 		return;
 	} else if (IS_G4X(dev)) {
 		uint32_t dspclk_gate;
@@ -4789,7 +4789,7 @@ static void intel_init_display(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	/* We always want a DPMS function */
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		dev_priv->display.dpms = ironlake_crtc_dpms;
 	else
 		dev_priv->display.dpms = i9xx_crtc_dpms;
@@ -4832,7 +4832,7 @@ static void intel_init_display(struct drm_device *dev)
 			i830_get_display_clock_speed;
 
 	/* For FIFO watermark updates */
-	if (IS_IRONLAKE(dev))
+	if (HAS_PCH_SPLIT(dev))
 		dev_priv->display.update_wm = NULL;
 	else if (IS_G4X(dev))
 		dev_priv->display.update_wm = g4x_update_wm;
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 93031a75d112..e91e81de5c71 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -661,7 +661,7 @@ static enum drm_connector_status intel_lvds_detect(struct drm_connector *connect
 	/* ACPI lid methods were generally unreliable in this generation, so
 	 * don't even bother.
 	 */
-	if (IS_I8XX(dev))
+	if (IS_GEN2(dev))
 		return connector_status_connected;
 
 	if (!dmi_check_system(bad_lid_status) && !acpi_lid_open())
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index c3fa406912b3..d355d1d527e7 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -172,7 +172,7 @@ struct overlay_registers {
 #define OFC_UPDATE		0x1
 
 #define OVERLAY_NONPHYSICAL(dev) (IS_G33(dev) || IS_I965G(dev))
-#define OVERLAY_EXISTS(dev) (!IS_G4X(dev) && !IS_IRONLAKE(dev))
+#define OVERLAY_EXISTS(dev) (!IS_G4X(dev) && !IS_IRONLAKE(dev) && !IS_GEN6(dev))
 
 
 static struct overlay_registers *intel_overlay_map_regs_atomic(struct intel_overlay *overlay)
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 403490c7b647..676104b7818c 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -593,4 +593,5 @@
 	{0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0x0042, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0x0046, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
+	{0x8086, 0x0102, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0, 0, 0}
-- 
cgit v1.2.3


From 44ee63587dce85593c22497140db16f4e5027860 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 17 Feb 2010 10:50:50 +0900
Subject: percpu: Add __percpu sparse annotations to hw_breakpoint

Add __percpu sparse annotations to hw_breakpoint.

These annotations are to make sparse consider percpu variables to be
in a different address space and warn if accessed without going
through percpu accessors.  This patch doesn't affect normal builds.

In kernel/hw_breakpoint.c, per_cpu(nr_task_bp_pinned, cpu)'s will
trigger spurious noderef related warnings from sparse.  Changing it to
&per_cpu(nr_task_bp_pinned[0], cpu) will work around the problem but
deemed to ugly by the maintainer.  Leave it alone until better
solution can be found.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: K.Prasad <prasad@linux.vnet.ibm.com>
LKML-Reference: <4B7B4B7A.9050902@kernel.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/hw_breakpoint.h           |  8 ++++----
 kernel/hw_breakpoint.c                  | 10 +++++-----
 samples/hw_breakpoint/data_breakpoint.c |  6 +++---
 3 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 5977b724f7c6..c70d27af03f9 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -66,14 +66,14 @@ register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
 				perf_overflow_handler_t	triggered,
 				int cpu);
 
-extern struct perf_event **
+extern struct perf_event * __percpu *
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_overflow_handler_t triggered);
 
 extern int register_perf_hw_breakpoint(struct perf_event *bp);
 extern int __register_perf_hw_breakpoint(struct perf_event *bp);
 extern void unregister_hw_breakpoint(struct perf_event *bp);
-extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
+extern void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events);
 
 extern int dbg_reserve_bp_slot(struct perf_event *bp);
 extern int dbg_release_bp_slot(struct perf_event *bp);
@@ -100,7 +100,7 @@ static inline struct perf_event *
 register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
 				perf_overflow_handler_t	 triggered,
 				int cpu)		{ return NULL; }
-static inline struct perf_event **
+static inline struct perf_event * __percpu *
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_overflow_handler_t triggered)	{ return NULL; }
 static inline int
@@ -109,7 +109,7 @@ static inline int
 __register_perf_hw_breakpoint(struct perf_event *bp) 	{ return -ENOSYS; }
 static inline void unregister_hw_breakpoint(struct perf_event *bp)	{ }
 static inline void
-unregister_wide_hw_breakpoint(struct perf_event **cpu_events)		{ }
+unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)	{ }
 static inline int
 reserve_bp_slot(struct perf_event *bp)			{return -ENOSYS; }
 static inline void release_bp_slot(struct perf_event *bp) 		{ }
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 967e66143e11..6542eacb3fa5 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -413,17 +413,17 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
  *
  * @return a set of per_cpu pointers to perf events
  */
-struct perf_event **
+struct perf_event * __percpu *
 register_wide_hw_breakpoint(struct perf_event_attr *attr,
 			    perf_overflow_handler_t triggered)
 {
-	struct perf_event **cpu_events, **pevent, *bp;
+	struct perf_event * __percpu *cpu_events, **pevent, *bp;
 	long err;
 	int cpu;
 
 	cpu_events = alloc_percpu(typeof(*cpu_events));
 	if (!cpu_events)
-		return ERR_PTR(-ENOMEM);
+		return (void __percpu __force *)ERR_PTR(-ENOMEM);
 
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
@@ -451,7 +451,7 @@ fail:
 	put_online_cpus();
 
 	free_percpu(cpu_events);
-	return ERR_PTR(err);
+	return (void __percpu __force *)ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
 
@@ -459,7 +459,7 @@ EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
  * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
  * @cpu_events: the per cpu set of events to unregister
  */
-void unregister_wide_hw_breakpoint(struct perf_event **cpu_events)
+void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
 {
 	int cpu;
 	struct perf_event **pevent;
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
index c69cbe9b2426..bd0f337afcab 100644
--- a/samples/hw_breakpoint/data_breakpoint.c
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -34,7 +34,7 @@
 #include <linux/perf_event.h>
 #include <linux/hw_breakpoint.h>
 
-struct perf_event **sample_hbp;
+struct perf_event * __percpu *sample_hbp;
 
 static char ksym_name[KSYM_NAME_LEN] = "pid_max";
 module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
@@ -61,8 +61,8 @@ static int __init hw_break_module_init(void)
 	attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
 
 	sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler);
-	if (IS_ERR(sample_hbp)) {
-		ret = PTR_ERR(sample_hbp);
+	if (IS_ERR((void __force *)sample_hbp)) {
+		ret = PTR_ERR((void __force *)sample_hbp);
 		goto fail;
 	}
 
-- 
cgit v1.2.3


From dd8b1cf681eab40bc5afb67bdd06b2ca341f5669 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 27 Feb 2010 17:10:39 +0100
Subject: perf: Remove pointless breakpoint union

Remove pointless union in the breakpoint field of hw_perf_event.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
---
 include/linux/perf_event.h | 5 ++---
 lib/Kconfig.debug          | 8 +++++---
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7b18b4fd5df7..04f06b4be297 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -487,9 +487,8 @@ struct hw_perf_event {
 			struct hrtimer	hrtimer;
 		};
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-		union { /* breakpoint */
-			struct arch_hw_breakpoint	info;
-		};
+		/* breakpoint */
+		struct arch_hw_breakpoint	info;
 #endif
 	};
 	atomic64_t			prev_count;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 65f964e7fe78..4dc24cc13f5c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -520,11 +520,13 @@ config LOCK_STAT
 
 	 For more details, see Documentation/lockstat.txt
 
-	 You can analyze lock events with "perf lock", subcommand of perf.
-	 If you want to use "perf lock", you need to turn on CONFIG_EVENT_TRACING.
+	 This also enables lock events required by "perf lock",
+	 subcommand of perf.
+	 If you want to use "perf lock", you also need to turn on
+	 CONFIG_EVENT_TRACING.
 
 	 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
- 	 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
+	 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
 
 config DEBUG_LOCKDEP
 	bool "Lock dependency engine debugging"
-- 
cgit v1.2.3


From 1883c79a57a5fe25309007590cccb1b2782c41b2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 27 Feb 2010 14:53:08 -0800
Subject: rcu: Make task_subsys_state() RCU-lockdep checks handle boot-time use

It is apparently legal to invoke task_subsys_state() without RCU
protection during early boot time.  After all, there are no
concurrent tasks, so there can be no grace periods completing
concurrently.

But this does need an Acked-by from the cgroups folks.

Located-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1267311188-16603-2-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cgroup.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c9bbcb2a75ae..a73e1ced09b8 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -489,6 +489,7 @@ static inline struct cgroup_subsys_state *task_subsys_state(
 {
 	return rcu_dereference_check(task->cgroups->subsys[subsys_id],
 				     rcu_read_lock_held() ||
+				     !rcu_scheduler_active ||
 				     cgroup_lock_is_held());
 }
 
-- 
cgit v1.2.3


From 76bd061f5c7b7550cdaed68ad6219ea7cee288fc Mon Sep 17 00:00:00 2001
From: "Steven J. Magnani" <steve@digidescorp.com>
Date: Sun, 28 Feb 2010 22:18:16 -0700
Subject: fsldma: Fix cookie issues

fsl_dma_update_completed_cookie() appears to calculate the last completed
cookie incorrectly in the corner case where DMA on cookie 1 is in progress
just following a cookie wrap.

Signed-off-by: Steven J. Magnani <steve@digidescorp.com>
Acked-by: Ira W. Snyder <iws@ovro.caltech.edu>
[dan.j.williams@intel.com: fix an integer overflow warning with INT_MAX]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/fsldma.c      | 5 ++++-
 include/linux/dmaengine.h | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 6541ebf8bf63..bbb4be5a3ff4 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -819,8 +819,11 @@ static void fsl_dma_update_completed_cookie(struct fsldma_chan *chan)
 	desc = to_fsl_desc(chan->ld_running.prev);
 	if (dma_is_idle(chan))
 		cookie = desc->async_tx.cookie;
-	else
+	else {
 		cookie = desc->async_tx.cookie - 1;
+		if (unlikely(cookie < DMA_MIN_COOKIE))
+			cookie = DMA_MAX_COOKIE;
+	}
 
 	chan->completed_cookie = cookie;
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 78784982b33e..4d8d619f28bc 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -31,6 +31,8 @@
  * if dma_cookie_t is >0 it's a DMA request cookie, <0 it's an error code
  */
 typedef s32 dma_cookie_t;
+#define DMA_MIN_COOKIE	1
+#define DMA_MAX_COOKIE	INT_MAX
 
 #define dma_submit_error(cookie) ((cookie) < 0 ? 1 : 0)
 
-- 
cgit v1.2.3


From a55e8d452ed2f6bbecda1a3039e82cd05244be3d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 24 Feb 2010 14:29:14 -0800
Subject: drm/ttm: fix function prototype to match implementation

Fix function prototype to match its actual usage and implementation.

drivers/gpu/drm/ttm/ttm_bo_util.c:341:10: error: symbol 'ttm_io_prot' redeclared with different type (originally declared at include/drm/ttm/ttm_bo_driver.h:911) - incompatible argument 1 (different signedness)

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc:	David Airlie <airlied@linux.ie>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/ttm/ttm_bo_driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 4c4e0f8375b3..e3f1b4a4b601 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -908,7 +908,7 @@ extern int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
  * Utility function that returns the pgprot_t that should be used for
  * setting up a PTE with the caching model indicated by @c_state.
  */
-extern pgprot_t ttm_io_prot(enum ttm_caching_state c_state, pgprot_t tmp);
+extern pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp);
 
 #if (defined(CONFIG_AGP) || (defined(CONFIG_AGP_MODULE) && defined(MODULE)))
 #define TTM_HAS_AGP
-- 
cgit v1.2.3


From 6a9ee8af344e3bd7dbd61e67037096cdf7f83289 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Mon, 1 Feb 2010 15:38:10 +1000
Subject: vga_switcheroo: initial implementation (v15)

Many new laptops now come with 2 gpus, one to be used for low power
modes and one for gaming/on-ac applications. These GPUs are typically
wired to the laptop panel and VGA ports via a multiplexer unit which
is controlled via ACPI methods.

4 combinations of systems typically exist - with 2 ACPI methods.
Intel/ATI - Lenovo W500/T500 - use ATPX ACPI method
ATI/ATI - some ASUS - use ATPX ACPI Method
Intel/Nvidia - - use _DSM ACPI method
Nvidia/Nvidia -  - use _DSM ACPI method.

TODO:
This patch adds support for the ATPX method and initial bits
for the _DSM methods that need to written by someone with
access to the hardware.
Add a proper non-debugfs interface - need to get some proper
testing first.

v2: add power up/down support for both devices
on W500 puts i915/radeon into D3 and cuts power to radeon.

v3: redo probing methods, no DMI list, drm devices call to
register with switcheroo, it tries to find an ATPX method on
any device and once there is two devices + ATPX it inits the
switcher.

v4: ATPX msg handling using buffers - should work on more machines

v5: rearchitect after more mjg59 discussion - move ATPX handling to
    radeon driver.

v6: add file headers + initial nouveau bits (to be filled out).

v7: merge delayed switcher code.

v8: avoid suspend/resume of gpu that is off

v9: rearchitect - mjg59 is always right. - move all ATPX code to
radeon, should allow simpler DSM also proper ATRM handling

v10: add ATRM support for radeon BIOS, add mutex to lock vgasr_priv

v11: fix bug in resuming Intel for 2nd time.

v12: start fixing up nvidia code blindly.

v13: blindly guess at finishing nvidia code

v14: remove radeon audio hacks - fix up intel resume more like upstream

v15: clean up printks + remove unnecessary igd/dis pointers

mount debugfs

/sys/kernel/debug/vgaswitcheroo/switch - should exist if ATPX detected
 + 2 cards.

DIS - immediate change to discrete
IGD - immediate change to IGD
DDIS - delayed change to discrete
DIGD - delayed change to IGD
ON - turn on not in use
OFF - turn off not in use

Tested on W500 (Intel/ATI) and T500 (Intel/ATI)

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/i915/i915_dma.c              |  35 +++
 drivers/gpu/drm/i915/i915_drv.c              |   4 +-
 drivers/gpu/drm/i915/i915_drv.h              |   2 +
 drivers/gpu/drm/i915/intel_fb.c              |   2 +
 drivers/gpu/drm/nouveau/nouveau_acpi.c       | 160 +++++++---
 drivers/gpu/drm/nouveau/nouveau_drv.c        |   9 +-
 drivers/gpu/drm/nouveau/nouveau_drv.h        |  19 +-
 drivers/gpu/drm/nouveau/nouveau_fbcon.c      |   2 +
 drivers/gpu/drm/nouveau/nouveau_state.c      |  32 +-
 drivers/gpu/drm/radeon/Makefile              |   3 +-
 drivers/gpu/drm/radeon/radeon.h              |   8 +
 drivers/gpu/drm/radeon/radeon_atpx_handler.c | 258 +++++++++++++++
 drivers/gpu/drm/radeon/radeon_bios.c         |  44 ++-
 drivers/gpu/drm/radeon/radeon_device.c       |  40 +++
 drivers/gpu/drm/radeon/radeon_drv.c          |   2 +
 drivers/gpu/drm/radeon/radeon_drv.h          |   3 +
 drivers/gpu/drm/radeon/radeon_fb.c           |   3 +
 drivers/gpu/drm/radeon/radeon_kms.c          |   3 +
 drivers/gpu/vga/Kconfig                      |  13 +
 drivers/gpu/vga/Makefile                     |   1 +
 drivers/gpu/vga/vga_switcheroo.c             | 453 +++++++++++++++++++++++++++
 drivers/video/console/fbcon.c                |  18 ++
 include/linux/fb.h                           |   2 +
 include/linux/vga_switcheroo.h               |  58 ++++
 24 files changed, 1104 insertions(+), 70 deletions(-)
 create mode 100644 drivers/gpu/drm/radeon/radeon_atpx_handler.c
 create mode 100644 drivers/gpu/vga/vga_switcheroo.c
 create mode 100644 include/linux/vga_switcheroo.h

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 2307f98349f7..42ca07f04a21 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -35,6 +35,7 @@
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include <linux/vgaarb.h>
+#include <linux/vga_switcheroo.h>
 
 /* Really want an OS-independent resettable timer.  Would like to have
  * this loop run for (eg) 3 sec, but have the timer reset every time
@@ -1199,6 +1200,32 @@ static unsigned int i915_vga_set_decode(void *cookie, bool state)
 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
 }
 
+static void i915_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
+	if (state == VGA_SWITCHEROO_ON) {
+		printk(KERN_INFO "i915: switched off\n");
+		/* i915 resume handler doesn't set to D0 */
+		pci_set_power_state(dev->pdev, PCI_D0);
+		i915_resume(dev);
+	} else {
+		printk(KERN_ERR "i915: switched off\n");
+		i915_suspend(dev, pmm);
+	}
+}
+
+static bool i915_switcheroo_can_switch(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	bool can_switch;
+
+	spin_lock(&dev->count_lock);
+	can_switch = (dev->open_count == 0);
+	spin_unlock(&dev->count_lock);
+	return can_switch;
+}
+
 static int i915_load_modeset_init(struct drm_device *dev,
 				  unsigned long prealloc_start,
 				  unsigned long prealloc_size,
@@ -1260,6 +1287,12 @@ static int i915_load_modeset_init(struct drm_device *dev,
 	if (ret)
 		goto destroy_ringbuffer;
 
+	ret = vga_switcheroo_register_client(dev->pdev,
+					     i915_switcheroo_set_state,
+					     i915_switcheroo_can_switch);
+	if (ret)
+		goto destroy_ringbuffer;
+
 	intel_modeset_init(dev);
 
 	ret = drm_irq_install(dev);
@@ -1544,6 +1577,7 @@ int i915_driver_unload(struct drm_device *dev)
 			dev_priv->child_dev_num = 0;
 		}
 		drm_irq_uninstall(dev);
+		vga_switcheroo_unregister_client(dev->pdev);
 		vga_client_register(dev->pdev, NULL, NULL, NULL);
 	}
 
@@ -1611,6 +1645,7 @@ void i915_driver_lastclose(struct drm_device * dev)
 
 	if (!dev_priv || drm_core_check_feature(dev, DRIVER_MODESET)) {
 		drm_fb_helper_restore();
+		vga_switcheroo_process_delayed_switch();
 		return;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index cf4cb3e9a0c2..fd739efe73ce 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -201,7 +201,7 @@ static int i915_drm_freeze(struct drm_device *dev)
 	return 0;
 }
 
-static int i915_suspend(struct drm_device *dev, pm_message_t state)
+int i915_suspend(struct drm_device *dev, pm_message_t state)
 {
 	int error;
 
@@ -255,7 +255,7 @@ static int i915_drm_thaw(struct drm_device *dev)
 	return error;
 }
 
-static int i915_resume(struct drm_device *dev)
+int i915_resume(struct drm_device *dev)
 {
 	if (pci_enable_device(dev->pdev))
 		return -EIO;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b99b6a841d95..d77e56651352 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -736,6 +736,8 @@ extern unsigned int i915_fbpercrtc;
 extern unsigned int i915_powersave;
 extern unsigned int i915_lvds_downclock;
 
+extern int i915_suspend(struct drm_device *dev, pm_message_t state);
+extern int i915_resume(struct drm_device *dev);
 extern void i915_save_display(struct drm_device *dev);
 extern void i915_restore_display(struct drm_device *dev);
 extern int i915_master_create(struct drm_device *dev, struct drm_master *master);
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index aaabbcbe5905..8cd791dc5b29 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -35,6 +35,7 @@
 #include <linux/delay.h>
 #include <linux/fb.h>
 #include <linux/init.h>
+#include <linux/vga_switcheroo.h>
 
 #include "drmP.h"
 #include "drm.h"
@@ -235,6 +236,7 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 			obj_priv->gtt_offset, fbo);
 
 	mutex_unlock(&dev->struct_mutex);
+	vga_switcheroo_client_fb_set(dev->pdev, info);
 	return 0;
 
 out_unpin:
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 48227e744753..0e0730a53137 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -11,6 +11,8 @@
 #include "nouveau_drm.h"
 #include "nv50_display.h"
 
+#include <linux/vga_switcheroo.h>
+
 #define NOUVEAU_DSM_SUPPORTED 0x00
 #define NOUVEAU_DSM_SUPPORTED_FUNCTIONS 0x00
 
@@ -28,31 +30,30 @@
 #define NOUVEAU_DSM_POWER_SPEED 0x01
 #define NOUVEAU_DSM_POWER_STAMINA 0x02
 
-static int nouveau_dsm(struct drm_device *dev, int func, int arg, int *result)
-{
-	static char muid[] = {
-		0xA0, 0xA0, 0x95, 0x9D, 0x60, 0x00, 0x48, 0x4D,
-		0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4,
-	};
+static struct nouveau_dsm_priv {
+	bool dsm_detected;
+	acpi_handle dhandle;
+	acpi_handle dsm_handle;
+} nouveau_dsm_priv;
+
+static const char nouveau_dsm_muid[] = {
+	0xA0, 0xA0, 0x95, 0x9D, 0x60, 0x00, 0x48, 0x4D,
+	0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4,
+};
 
-	struct pci_dev *pdev = dev->pdev;
-	struct acpi_handle *handle;
+static int nouveau_dsm(acpi_handle handle, int func, int arg, int *result)
+{
 	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_object_list input;
 	union acpi_object params[4];
 	union acpi_object *obj;
 	int err;
 
-	handle = DEVICE_ACPI_HANDLE(&pdev->dev);
-
-	if (!handle)
-		return -ENODEV;
-
 	input.count = 4;
 	input.pointer = params;
 	params[0].type = ACPI_TYPE_BUFFER;
-	params[0].buffer.length = sizeof(muid);
-	params[0].buffer.pointer = (char *)muid;
+	params[0].buffer.length = sizeof(nouveau_dsm_muid);
+	params[0].buffer.pointer = (char *)nouveau_dsm_muid;
 	params[1].type = ACPI_TYPE_INTEGER;
 	params[1].integer.value = 0x00000102;
 	params[2].type = ACPI_TYPE_INTEGER;
@@ -62,7 +63,7 @@ static int nouveau_dsm(struct drm_device *dev, int func, int arg, int *result)
 
 	err = acpi_evaluate_object(handle, "_DSM", &input, &output);
 	if (err) {
-		NV_INFO(dev, "failed to evaluate _DSM: %d\n", err);
+		printk(KERN_INFO "failed to evaluate _DSM: %d\n", err);
 		return err;
 	}
 
@@ -86,40 +87,119 @@ static int nouveau_dsm(struct drm_device *dev, int func, int arg, int *result)
 	return 0;
 }
 
-int nouveau_hybrid_setup(struct drm_device *dev)
+static int nouveau_dsm_switch_mux(acpi_handle handle, int mux_id)
 {
-	int result;
-
-	if (nouveau_dsm(dev, NOUVEAU_DSM_POWER, NOUVEAU_DSM_POWER_STATE,
-								&result))
-		return -ENODEV;
-
-	NV_INFO(dev, "_DSM hardware status gave 0x%x\n", result);
-
-	if (result) { /* Ensure that the external GPU is enabled */
-		nouveau_dsm(dev, NOUVEAU_DSM_LED, NOUVEAU_DSM_LED_SPEED, NULL);
-		nouveau_dsm(dev, NOUVEAU_DSM_POWER, NOUVEAU_DSM_POWER_SPEED,
-									NULL);
-	} else { /* Stamina mode - disable the external GPU */
-		nouveau_dsm(dev, NOUVEAU_DSM_LED, NOUVEAU_DSM_LED_STAMINA,
-									NULL);
-		nouveau_dsm(dev, NOUVEAU_DSM_POWER, NOUVEAU_DSM_POWER_STAMINA,
-									NULL);
-	}
+	return nouveau_dsm(handle, NOUVEAU_DSM_LED, mux_id, NULL);
+}
+
+static int nouveau_dsm_set_discrete_state(acpi_handle handle, enum vga_switcheroo_state state)
+{
+	int arg;
+	if (state == VGA_SWITCHEROO_ON)
+		arg = NOUVEAU_DSM_POWER_SPEED;
+	else
+		arg = NOUVEAU_DSM_POWER_STAMINA;
+	nouveau_dsm(handle, NOUVEAU_DSM_POWER, arg, NULL);
+	return 0;
+}
+
+static int nouveau_dsm_switchto(enum vga_switcheroo_client_id id)
+{
+	if (id == VGA_SWITCHEROO_IGD)
+		return nouveau_dsm_switch_mux(nouveau_dsm_priv.dsm_handle, NOUVEAU_DSM_LED_STAMINA);
+	else
+		return nouveau_dsm_switch_mux(nouveau_dsm_priv.dsm_handle, NOUVEAU_DSM_LED_SPEED);
+}
 
+static int nouveau_dsm_power_state(enum vga_switcheroo_client_id id,
+				   enum vga_switcheroo_state state)
+{
+	if (id == VGA_SWITCHEROO_IGD)
+		return 0;
+
+	return nouveau_dsm_set_discrete_state(nouveau_dsm_priv.dsm_handle, state);
+}
+
+static int nouveau_dsm_init(void)
+{
 	return 0;
 }
 
-bool nouveau_dsm_probe(struct drm_device *dev)
+static int nouveau_dsm_get_client_id(struct pci_dev *pdev)
 {
-	int support = 0;
+	if (nouveau_dsm_priv.dhandle == DEVICE_ACPI_HANDLE(&pdev->dev))
+		return VGA_SWITCHEROO_IGD;
+	else
+		return VGA_SWITCHEROO_DIS;
+}
+
+static struct vga_switcheroo_handler nouveau_dsm_handler = {
+	.switchto = nouveau_dsm_switchto,
+	.power_state = nouveau_dsm_power_state,
+	.init = nouveau_dsm_init,
+	.get_client_id = nouveau_dsm_get_client_id,
+};
 
-	if (nouveau_dsm(dev, NOUVEAU_DSM_SUPPORTED,
-				NOUVEAU_DSM_SUPPORTED_FUNCTIONS, &support))
+static bool nouveau_dsm_pci_probe(struct pci_dev *pdev)
+{
+	acpi_handle dhandle, nvidia_handle;
+	acpi_status status;
+	int ret;
+	uint32_t result;
+
+	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	if (!dhandle)
+		return false;
+	status = acpi_get_handle(dhandle, "_DSM", &nvidia_handle);
+	if (ACPI_FAILURE(status)) {
 		return false;
+	}
 
-	if (!support)
+	ret= nouveau_dsm(nvidia_handle, NOUVEAU_DSM_SUPPORTED,
+			 NOUVEAU_DSM_SUPPORTED_FUNCTIONS, &result);
+	if (ret < 0)
 		return false;
 
+	nouveau_dsm_priv.dhandle = dhandle;
+	nouveau_dsm_priv.dsm_handle = nvidia_handle;
 	return true;
 }
+
+static bool nouveau_dsm_detect(void)
+{
+	char acpi_method_name[255] = { 0 };
+	struct acpi_buffer buffer = {sizeof(acpi_method_name), acpi_method_name};
+	struct pci_dev *pdev = NULL;
+	int has_dsm = 0;
+	int vga_count = 0;
+	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+		vga_count++;
+
+		has_dsm |= (nouveau_dsm_pci_probe(pdev) == true);
+	}
+
+	if (vga_count == 2 && has_dsm) {
+		acpi_get_name(nouveau_dsm_priv.dsm_handle, ACPI_FULL_PATHNAME, &buffer);
+		printk(KERN_INFO "VGA switcheroo: detected DSM switching method %s handle\n",
+		       acpi_method_name);
+		nouveau_dsm_priv.dsm_detected = true;
+		return true;
+	}
+	return false;
+}
+
+void nouveau_register_dsm_handler(void)
+{
+	bool r;
+
+	r = nouveau_dsm_detect();
+	if (!r)
+		return;
+
+	vga_switcheroo_register_handler(&nouveau_dsm_handler);
+}
+
+void nouveau_unregister_dsm_handler(void)
+{
+	vga_switcheroo_unregister_handler();
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index da3b93b84502..f83ec65addba 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -135,7 +135,7 @@ nouveau_pci_remove(struct pci_dev *pdev)
 	drm_put_dev(dev);
 }
 
-static int
+int
 nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
@@ -233,7 +233,7 @@ out_abort:
 	return ret;
 }
 
-static int
+int
 nouveau_pci_resume(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
@@ -402,8 +402,10 @@ static int __init nouveau_init(void)
 			nouveau_modeset = 1;
 	}
 
-	if (nouveau_modeset == 1)
+	if (nouveau_modeset == 1) {
 		driver.driver_features |= DRIVER_MODESET;
+		nouveau_register_dsm_handler();
+	}
 
 	return drm_init(&driver);
 }
@@ -411,6 +413,7 @@ static int __init nouveau_init(void)
 static void __exit nouveau_exit(void)
 {
 	drm_exit(&driver);
+	nouveau_unregister_dsm_handler();
 }
 
 module_init(nouveau_init);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 1c15ef37b71c..85c05feab4f0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -614,7 +614,6 @@ struct drm_nouveau_private {
 	} susres;
 
 	struct backlight_device *backlight;
-	bool acpi_dsm;
 
 	struct nouveau_channel *evo;
 
@@ -682,6 +681,9 @@ extern int nouveau_ignorelid;
 extern int nouveau_nofbaccel;
 extern int nouveau_noaccel;
 
+extern int nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state);
+extern int nouveau_pci_resume(struct pci_dev *pdev);
+
 /* nouveau_state.c */
 extern void nouveau_preclose(struct drm_device *dev, struct drm_file *);
 extern int  nouveau_load(struct drm_device *, unsigned long flags);
@@ -848,19 +850,8 @@ extern int  nouveau_dma_init(struct nouveau_channel *);
 extern int  nouveau_dma_wait(struct nouveau_channel *, int size);
 
 /* nouveau_acpi.c */
-#ifdef CONFIG_ACPI
-extern int nouveau_hybrid_setup(struct drm_device *dev);
-extern bool nouveau_dsm_probe(struct drm_device *dev);
-#else
-static inline int nouveau_hybrid_setup(struct drm_device *dev)
-{
-	return 0;
-}
-static inline bool nouveau_dsm_probe(struct drm_device *dev)
-{
-	return false;
-}
-#endif
+void nouveau_register_dsm_handler(void);
+void nouveau_unregister_dsm_handler(void);
 
 /* nouveau_backlight.c */
 #ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index ea879a2efef3..1ebf22b664dd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -36,6 +36,7 @@
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/screen_info.h>
+#include <linux/vga_switcheroo.h>
 
 #include "drmP.h"
 #include "drm.h"
@@ -370,6 +371,7 @@ nouveau_fbcon_create(struct drm_device *dev, uint32_t fb_width,
 						nvbo->bo.offset, nvbo);
 
 	mutex_unlock(&dev->struct_mutex);
+	vga_switcheroo_client_fb_set(dev->pdev, info);
 	return 0;
 
 out_unref:
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index a4851af5b05e..85d65b91389c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -29,6 +29,7 @@
 #include "drm_sarea.h"
 #include "drm_crtc_helper.h"
 #include <linux/vgaarb.h>
+#include <linux/vga_switcheroo.h>
 
 #include "nouveau_drv.h"
 #include "nouveau_drm.h"
@@ -371,6 +372,30 @@ out_err:
 	return ret;
 }
 
+static void nouveau_switcheroo_set_state(struct pci_dev *pdev,
+					 enum vga_switcheroo_state state)
+{
+	pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
+	if (state == VGA_SWITCHEROO_ON) {
+		printk(KERN_ERR "VGA switcheroo: switched nouveau on\n");
+		nouveau_pci_resume(pdev);
+	} else {
+		printk(KERN_ERR "VGA switcheroo: switched nouveau off\n");
+		nouveau_pci_suspend(pdev, pmm);
+	}
+}
+
+static bool nouveau_switcheroo_can_switch(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	bool can_switch;
+
+	spin_lock(&dev->count_lock);
+	can_switch = (dev->open_count == 0);
+	spin_unlock(&dev->count_lock);
+	return can_switch;
+}
+
 int
 nouveau_card_init(struct drm_device *dev)
 {
@@ -384,6 +409,8 @@ nouveau_card_init(struct drm_device *dev)
 		return 0;
 
 	vga_client_register(dev->pdev, dev, NULL, nouveau_vga_set_decode);
+	vga_switcheroo_register_client(dev->pdev, nouveau_switcheroo_set_state,
+				       nouveau_switcheroo_can_switch);
 
 	/* Initialise internal driver API hooks */
 	ret = nouveau_init_engine_ptrs(dev);
@@ -617,11 +644,6 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
 	NV_DEBUG(dev, "vendor: 0x%X device: 0x%X class: 0x%X\n",
 		 dev->pci_vendor, dev->pci_device, dev->pdev->class);
 
-	dev_priv->acpi_dsm = nouveau_dsm_probe(dev);
-
-	if (dev_priv->acpi_dsm)
-		nouveau_hybrid_setup(dev);
-
 	dev_priv->wq = create_workqueue("nouveau");
 	if (!dev_priv->wq)
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 1cc7b937b1ea..8e62fe13e31c 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -54,7 +54,8 @@ radeon-y += radeon_device.o radeon_kms.o \
 	radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \
 	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \
 	r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \
-	r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o
+	r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \
+	radeon_atpx_handler.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
 
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index c0356bb193e5..a5dfb1557d3e 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -118,6 +118,10 @@ struct radeon_device;
 /*
  * BIOS.
  */
+#define ATRM_BIOS_PAGE 4096
+
+bool radeon_atrm_supported(struct pci_dev *pdev);
+int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len);
 bool radeon_get_bios(struct radeon_device *rdev);
 
 
@@ -838,6 +842,8 @@ struct radeon_device {
 	int			audio_bits_per_sample;
 	uint8_t			audio_status_bits;
 	uint8_t			audio_category_code;
+
+	bool powered_down;
 };
 
 int radeon_device_init(struct radeon_device *rdev,
@@ -1042,6 +1048,8 @@ extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enabl
 extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
 extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
 extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
+extern int radeon_resume_kms(struct drm_device *dev);
+extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state);
 
 /* r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 */
 struct r100_mc_save {
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
new file mode 100644
index 000000000000..0ae52f19071d
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2010 Red Hat Inc.
+ * Author : Dave Airlie <airlied@redhat.com>
+ *
+ * Licensed under GPLv2
+ *
+ * ATPX support for both Intel/ATI
+ */
+
+#include <linux/vga_switcheroo.h>
+#include <acpi/acpi.h>
+#include <acpi/acpi_bus.h>
+#include <linux/pci.h>
+
+#define ATPX_VERSION 0
+#define ATPX_GPU_PWR 2
+#define ATPX_MUX_SELECT 3
+
+#define ATPX_INTEGRATED 0
+#define ATPX_DISCRETE 1
+
+#define ATPX_MUX_IGD 0
+#define ATPX_MUX_DISCRETE 1
+
+static struct radeon_atpx_priv {
+	bool atpx_detected;
+	/* handle for device - and atpx */
+	acpi_handle dhandle;
+	acpi_handle atpx_handle;
+	acpi_handle atrm_handle;
+} radeon_atpx_priv;
+
+/* retrieve the ROM in 4k blocks */
+static int radeon_atrm_call(acpi_handle atrm_handle, uint8_t *bios,
+			    int offset, int len)
+{
+	acpi_status status;
+	union acpi_object atrm_arg_elements[2], *obj;
+	struct acpi_object_list atrm_arg;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL};
+
+	atrm_arg.count = 2;
+	atrm_arg.pointer = &atrm_arg_elements[0];
+
+	atrm_arg_elements[0].type = ACPI_TYPE_INTEGER;
+	atrm_arg_elements[0].integer.value = offset;
+
+	atrm_arg_elements[1].type = ACPI_TYPE_INTEGER;
+	atrm_arg_elements[1].integer.value = len;
+
+	status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer);
+	if (ACPI_FAILURE(status)) {
+		printk("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
+		return -ENODEV;
+	}
+
+	obj = (union acpi_object *)buffer.pointer;
+	memcpy(bios+offset, obj->buffer.pointer, len);
+	kfree(buffer.pointer);
+	return len;
+}
+
+bool radeon_atrm_supported(struct pci_dev *pdev)
+{
+	/* get the discrete ROM only via ATRM */
+	if (!radeon_atpx_priv.atpx_detected)
+		return false;
+
+	if (radeon_atpx_priv.dhandle == DEVICE_ACPI_HANDLE(&pdev->dev))
+		return false;
+	return true;
+}
+
+
+int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len)
+{
+	return radeon_atrm_call(radeon_atpx_priv.atrm_handle, bios, offset, len);
+}
+
+static int radeon_atpx_get_version(acpi_handle handle)
+{
+	acpi_status status;
+	union acpi_object atpx_arg_elements[2], *obj;
+	struct acpi_object_list atpx_arg;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+
+	atpx_arg.count = 2;
+	atpx_arg.pointer = &atpx_arg_elements[0];
+
+	atpx_arg_elements[0].type = ACPI_TYPE_INTEGER;
+	atpx_arg_elements[0].integer.value = ATPX_VERSION;
+
+	atpx_arg_elements[1].type = ACPI_TYPE_INTEGER;
+	atpx_arg_elements[1].integer.value = ATPX_VERSION;
+
+	status = acpi_evaluate_object(handle, NULL, &atpx_arg, &buffer);
+	if (ACPI_FAILURE(status)) {
+		printk("%s: failed to call ATPX: %s\n", __func__, acpi_format_exception(status));
+		return -ENOSYS;
+	}
+	obj = (union acpi_object *)buffer.pointer;
+	if (obj && (obj->type == ACPI_TYPE_BUFFER))
+		printk(KERN_INFO "radeon atpx: version is %d\n", *((u8 *)(obj->buffer.pointer) + 2));
+	kfree(buffer.pointer);
+	return 0;
+}
+
+static int radeon_atpx_execute(acpi_handle handle, int cmd_id, u16 value)
+{
+	acpi_status status;
+	union acpi_object atpx_arg_elements[2];
+	struct acpi_object_list atpx_arg;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	uint8_t buf[4] = {0};
+
+	if (!handle)
+		return -EINVAL;
+
+	atpx_arg.count = 2;
+	atpx_arg.pointer = &atpx_arg_elements[0];
+
+	atpx_arg_elements[0].type = ACPI_TYPE_INTEGER;
+	atpx_arg_elements[0].integer.value = cmd_id;
+
+	buf[2] = value & 0xff;
+	buf[3] = (value >> 8) & 0xff;
+
+	atpx_arg_elements[1].type = ACPI_TYPE_BUFFER;
+	atpx_arg_elements[1].buffer.length = 4;
+	atpx_arg_elements[1].buffer.pointer = buf;
+
+	status = acpi_evaluate_object(handle, NULL, &atpx_arg, &buffer);
+	if (ACPI_FAILURE(status)) {
+		printk("%s: failed to call ATPX: %s\n", __func__, acpi_format_exception(status));
+		return -ENOSYS;
+	}
+	kfree(buffer.pointer);
+
+	return 0;
+}
+
+static int radeon_atpx_set_discrete_state(acpi_handle handle, int state)
+{
+	return radeon_atpx_execute(handle, ATPX_GPU_PWR, state);
+}
+
+static int radeon_atpx_switch_mux(acpi_handle handle, int mux_id)
+{
+	return radeon_atpx_execute(handle, ATPX_MUX_SELECT, mux_id);
+}
+
+
+static int radeon_atpx_switchto(enum vga_switcheroo_client_id id)
+{
+	if (id == VGA_SWITCHEROO_IGD)
+		radeon_atpx_switch_mux(radeon_atpx_priv.atpx_handle, 0);
+	else
+		radeon_atpx_switch_mux(radeon_atpx_priv.atpx_handle, 1);
+	return 0;
+}
+
+static int radeon_atpx_power_state(enum vga_switcheroo_client_id id,
+				   enum vga_switcheroo_state state)
+{
+	/* on w500 ACPI can't change intel gpu state */
+	if (id == VGA_SWITCHEROO_IGD)
+		return 0;
+
+	radeon_atpx_set_discrete_state(radeon_atpx_priv.atpx_handle, state);
+	return 0;
+}
+
+static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev)
+{
+	acpi_handle dhandle, atpx_handle, atrm_handle;
+	acpi_status status;
+
+	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	if (!dhandle)
+		return false;
+
+	status = acpi_get_handle(dhandle, "ATPX", &atpx_handle);
+	if (ACPI_FAILURE(status))
+		return false;
+
+	status = acpi_get_handle(dhandle, "ATRM", &atrm_handle);
+	if (ACPI_FAILURE(status))
+		return false;
+
+	radeon_atpx_priv.dhandle = dhandle;
+	radeon_atpx_priv.atpx_handle = atpx_handle;
+	radeon_atpx_priv.atrm_handle = atrm_handle;
+	return true;
+}
+
+static int radeon_atpx_init(void)
+{
+	/* set up the ATPX handle */
+
+	radeon_atpx_get_version(radeon_atpx_priv.atpx_handle);
+	return 0;
+}
+
+static int radeon_atpx_get_client_id(struct pci_dev *pdev)
+{
+	if (radeon_atpx_priv.dhandle == DEVICE_ACPI_HANDLE(&pdev->dev))
+		return VGA_SWITCHEROO_IGD;
+	else
+		return VGA_SWITCHEROO_DIS;
+}
+
+static struct vga_switcheroo_handler radeon_atpx_handler = {
+	.switchto = radeon_atpx_switchto,
+	.power_state = radeon_atpx_power_state,
+	.init = radeon_atpx_init,
+	.get_client_id = radeon_atpx_get_client_id,
+};
+
+static bool radeon_atpx_detect(void)
+{
+	char acpi_method_name[255] = { 0 };
+	struct acpi_buffer buffer = {sizeof(acpi_method_name), acpi_method_name};
+	struct pci_dev *pdev = NULL;
+	bool has_atpx = false;
+	int vga_count = 0;
+
+	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+		vga_count++;
+
+		has_atpx |= (radeon_atpx_pci_probe_handle(pdev) == true);
+	}
+
+	if (has_atpx && vga_count == 2) {
+		acpi_get_name(radeon_atpx_priv.atpx_handle, ACPI_FULL_PATHNAME, &buffer);
+		printk(KERN_INFO "VGA switcheroo: detected switching method %s handle\n",
+		       acpi_method_name);
+		radeon_atpx_priv.atpx_detected = true;
+		return true;
+	}
+	return false;
+}
+
+void radeon_register_atpx_handler(void)
+{
+	bool r;
+
+	/* detect if we have any ATPX + 2 VGA in the system */
+	r = radeon_atpx_detect();
+	if (!r)
+		return;
+
+	vga_switcheroo_register_handler(&radeon_atpx_handler);
+}
+
+void radeon_unregister_atpx_handler(void)
+{
+	vga_switcheroo_unregister_handler();
+}
diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c
index 906921740c60..a34b909485b8 100644
--- a/drivers/gpu/drm/radeon/radeon_bios.c
+++ b/drivers/gpu/drm/radeon/radeon_bios.c
@@ -30,6 +30,7 @@
 #include "radeon.h"
 #include "atom.h"
 
+#include <linux/vga_switcheroo.h>
 /*
  * BIOS.
  */
@@ -62,7 +63,7 @@ static bool igp_read_bios_from_vram(struct radeon_device *rdev)
 		iounmap(bios);
 		return false;
 	}
-	memcpy(rdev->bios, bios, size);
+	memcpy_fromio(rdev->bios, bios, size);
 	iounmap(bios);
 	return true;
 }
@@ -93,6 +94,38 @@ static bool radeon_read_bios(struct radeon_device *rdev)
 	return true;
 }
 
+/* ATRM is used to get the BIOS on the discrete cards in
+ * dual-gpu systems.
+ */
+static bool radeon_atrm_get_bios(struct radeon_device *rdev)
+{
+	int ret;
+	int size = 64 * 1024;
+	int i;
+
+	if (!radeon_atrm_supported(rdev->pdev))
+		return false;
+
+	rdev->bios = kmalloc(size, GFP_KERNEL);
+	if (!rdev->bios) {
+		DRM_ERROR("Unable to allocate bios\n");
+		return false;
+	}
+
+	for (i = 0; i < size / ATRM_BIOS_PAGE; i++) {
+		ret = radeon_atrm_get_bios_chunk(rdev->bios,
+						 (i * ATRM_BIOS_PAGE),
+						 ATRM_BIOS_PAGE);
+		if (ret <= 0)
+			break;
+	}
+
+	if (i == 0 || rdev->bios[0] != 0x55 || rdev->bios[1] != 0xaa) {
+		kfree(rdev->bios);
+		return false;
+	}
+	return true;
+}
 static bool r700_read_disabled_bios(struct radeon_device *rdev)
 {
 	uint32_t viph_control;
@@ -388,16 +421,16 @@ static bool radeon_read_disabled_bios(struct radeon_device *rdev)
 		return legacy_read_disabled_bios(rdev);
 }
 
+
 bool radeon_get_bios(struct radeon_device *rdev)
 {
 	bool r;
 	uint16_t tmp;
 
-	if (rdev->flags & RADEON_IS_IGP) {
+	r = radeon_atrm_get_bios(rdev);
+	if (r == false)
 		r = igp_read_bios_from_vram(rdev);
-		if (r == false)
-			r = radeon_read_bios(rdev);
-	} else
+	if (r == false)
 		r = radeon_read_bios(rdev);
 	if (r == false) {
 		r = radeon_read_disabled_bios(rdev);
@@ -408,6 +441,7 @@ bool radeon_get_bios(struct radeon_device *rdev)
 		return false;
 	}
 	if (rdev->bios[0] != 0x55 || rdev->bios[1] != 0xaa) {
+		printk("BIOS signature incorrect %x %x\n", rdev->bios[0], rdev->bios[1]);
 		goto free_bios;
 	}
 
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 768b1509fa03..cb8d9a1dd69c 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -30,6 +30,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/radeon_drm.h>
 #include <linux/vgaarb.h>
+#include <linux/vga_switcheroo.h>
 #include "radeon_reg.h"
 #include "radeon.h"
 #include "radeon_asic.h"
@@ -613,6 +614,36 @@ void radeon_check_arguments(struct radeon_device *rdev)
 	}
 }
 
+static void radeon_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct radeon_device *rdev = dev->dev_private;
+	pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
+	if (state == VGA_SWITCHEROO_ON) {
+		printk(KERN_INFO "radeon: switched on\n");
+		/* don't suspend or resume card normally */
+		rdev->powered_down = false;
+		radeon_resume_kms(dev);
+	} else {
+		printk(KERN_INFO "radeon: switched off\n");
+		radeon_suspend_kms(dev, pmm);
+		/* don't suspend or resume card normally */
+		rdev->powered_down = true;
+	}
+}
+
+static bool radeon_switcheroo_can_switch(struct pci_dev *pdev)
+{
+	struct drm_device *dev = pci_get_drvdata(pdev);
+	bool can_switch;
+
+	spin_lock(&dev->count_lock);
+	can_switch = (dev->open_count == 0);
+	spin_unlock(&dev->count_lock);
+	return can_switch;
+}
+
+
 int radeon_device_init(struct radeon_device *rdev,
 		       struct drm_device *ddev,
 		       struct pci_dev *pdev,
@@ -692,6 +723,9 @@ int radeon_device_init(struct radeon_device *rdev,
 	/* this will fail for cards that aren't VGA class devices, just
 	 * ignore it */
 	vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode);
+	vga_switcheroo_register_client(rdev->pdev,
+				       radeon_switcheroo_set_state,
+				       radeon_switcheroo_can_switch);
 
 	r = radeon_init(rdev);
 	if (r)
@@ -723,6 +757,7 @@ void radeon_device_fini(struct radeon_device *rdev)
 	rdev->shutdown = true;
 	radeon_fini(rdev);
 	destroy_workqueue(rdev->wq);
+	vga_switcheroo_unregister_client(rdev->pdev);
 	vga_client_register(rdev->pdev, NULL, NULL, NULL);
 	iounmap(rdev->rmmio);
 	rdev->rmmio = NULL;
@@ -746,6 +781,8 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
 	}
 	rdev = dev->dev_private;
 
+	if (rdev->powered_down)
+		return 0;
 	/* unpin the front buffers */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->fb);
@@ -791,6 +828,9 @@ int radeon_resume_kms(struct drm_device *dev)
 {
 	struct radeon_device *rdev = dev->dev_private;
 
+	if (rdev->powered_down)
+		return 0;
+
 	acquire_console_sem();
 	pci_set_power_state(dev->pdev, PCI_D0);
 	pci_restore_state(dev->pdev);
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 8ba3de7994d4..4ab53aa163b2 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -339,6 +339,7 @@ static int __init radeon_init(void)
 		driver = &kms_driver;
 		driver->driver_features |= DRIVER_MODESET;
 		driver->num_ioctls = radeon_max_kms_ioctl;
+		radeon_register_atpx_handler();
 	}
 	/* if the vga console setting is enabled still
 	 * let modprobe override it */
@@ -348,6 +349,7 @@ static int __init radeon_init(void)
 static void __exit radeon_exit(void)
 {
 	drm_exit(driver);
+	radeon_unregister_atpx_handler();
 }
 
 module_init(radeon_init);
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index c57ad606504d..736237195143 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -455,6 +455,9 @@ extern void r600_blit_swap(struct drm_device *dev,
 			   int sx, int sy, int dx, int dy,
 			   int w, int h, int src_pitch, int dst_pitch, int cpp);
 
+/* atpx handler */
+void radeon_register_atpx_handler(void);
+void radeon_unregister_atpx_handler(void);
 /* Flags for stats.boxes
  */
 #define RADEON_BOX_DMA_IDLE      0x1
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index d71e346e9ab5..561719223988 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -39,6 +39,8 @@
 
 #include "drm_fb_helper.h"
 
+#include <linux/vga_switcheroo.h>
+
 struct radeon_fb_device {
 	struct drm_fb_helper helper;
 	struct radeon_framebuffer	*rfb;
@@ -291,6 +293,7 @@ int radeonfb_create(struct drm_device *dev,
 	rfbdev->rdev = rdev;
 
 	mutex_unlock(&rdev->ddev->struct_mutex);
+	vga_switcheroo_client_fb_set(rdev->ddev->pdev, info);
 	return 0;
 
 out_unref:
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index f23b05606eb5..5db7af6b91f4 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -30,6 +30,8 @@
 #include "radeon.h"
 #include "radeon_drm.h"
 
+#include <linux/vga_switcheroo.h>
+
 int radeon_driver_unload_kms(struct drm_device *dev)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -136,6 +138,7 @@ int radeon_driver_firstopen_kms(struct drm_device *dev)
 
 void radeon_driver_lastclose_kms(struct drm_device *dev)
 {
+	vga_switcheroo_process_delayed_switch();
 }
 
 int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
diff --git a/drivers/gpu/vga/Kconfig b/drivers/gpu/vga/Kconfig
index 790e675b13eb..6116a0196214 100644
--- a/drivers/gpu/vga/Kconfig
+++ b/drivers/gpu/vga/Kconfig
@@ -8,3 +8,16 @@ config VGA_ARB
 	  are accessed at same time they need some kind of coordination. Please
 	  see Documentation/vgaarbiter.txt for more details. Select this to
 	  enable VGA arbiter.
+
+config VGA_SWITCHEROO
+	bool "Laptop Hybrid Grapics - GPU switching support"
+	default y
+	depends on X86
+	depends on ACPI
+	help
+	  Many laptops released in 2008/9/10 have two gpus with a multiplxer
+	  to switch between them. This adds support for dynamic switching when
+          X isn't running and delayed switching until the next logoff. This
+	  features is called hybrid graphics, ATI PowerXpress, and Nvidia
+	  HybridPower.
+
diff --git a/drivers/gpu/vga/Makefile b/drivers/gpu/vga/Makefile
index 7cc8c1ed645b..14ca30b75d0a 100644
--- a/drivers/gpu/vga/Makefile
+++ b/drivers/gpu/vga/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_VGA_ARB)  += vgaarb.o
+obj-$(CONFIG_VGA_SWITCHEROO) += vga_switcheroo.o
diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
new file mode 100644
index 000000000000..a3f587a0aba9
--- /dev/null
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -0,0 +1,453 @@
+/*
+ * Copyright (c) 2010 Red Hat Inc.
+ * Author : Dave Airlie <airlied@redhat.com>
+ *
+ *
+ * Licensed under GPLv2
+ *
+ * vga_switcheroo.c - Support for laptop with dual GPU using one set of outputs
+
+ Switcher interface - methods require for ATPX and DCM
+ - switchto - this throws the output MUX switch
+ - discrete_set_power - sets the power state for the discrete card
+
+ GPU driver interface
+ - set_gpu_state - this should do the equiv of s/r for the card
+		  - this should *not* set the discrete power state
+ - switch_check  - check if the device is in a position to switch now
+ */
+
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/fb.h>
+
+#include <acpi/acpi.h>
+#include <acpi/acpi_bus.h>
+
+#include <linux/pci.h>
+#include <linux/vga_switcheroo.h>
+
+struct vga_switcheroo_client {
+	struct pci_dev *pdev;
+	struct fb_info *fb_info;
+	int pwr_state;
+	void (*set_gpu_state)(struct pci_dev *pdev, enum vga_switcheroo_state);
+	bool (*can_switch)(struct pci_dev *pdev);
+	int id;
+	bool active;
+};
+
+static DEFINE_MUTEX(vgasr_mutex);
+
+struct vgasr_priv {
+
+	bool active;
+	bool delayed_switch_active;
+	enum vga_switcheroo_client_id delayed_client_id;
+
+	struct dentry *debugfs_root;
+	struct dentry *switch_file;
+
+	int registered_clients;
+	struct vga_switcheroo_client clients[VGA_SWITCHEROO_MAX_CLIENTS];
+
+	struct vga_switcheroo_handler *handler;
+};
+
+static int vga_switcheroo_debugfs_init(struct vgasr_priv *priv);
+static void vga_switcheroo_debugfs_fini(struct vgasr_priv *priv);
+
+/* only one switcheroo per system */
+static struct vgasr_priv vgasr_priv;
+
+int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler)
+{
+	mutex_lock(&vgasr_mutex);
+	if (vgasr_priv.handler) {
+		mutex_unlock(&vgasr_mutex);
+		return -EINVAL;
+	}
+
+	vgasr_priv.handler = handler;
+	mutex_unlock(&vgasr_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(vga_switcheroo_register_handler);
+
+void vga_switcheroo_unregister_handler(void)
+{
+	mutex_lock(&vgasr_mutex);
+	vgasr_priv.handler = NULL;
+	mutex_unlock(&vgasr_mutex);
+}
+EXPORT_SYMBOL(vga_switcheroo_unregister_handler);
+
+static void vga_switcheroo_enable(void)
+{
+	int i;
+	int ret;
+	/* call the handler to init */
+	vgasr_priv.handler->init();
+
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		ret = vgasr_priv.handler->get_client_id(vgasr_priv.clients[i].pdev);
+		if (ret < 0)
+			return;
+
+		vgasr_priv.clients[i].id = ret;
+	}
+	vga_switcheroo_debugfs_init(&vgasr_priv);
+	vgasr_priv.active = true;
+}
+
+int vga_switcheroo_register_client(struct pci_dev *pdev,
+				   void (*set_gpu_state)(struct pci_dev *pdev, enum vga_switcheroo_state),
+				   bool (*can_switch)(struct pci_dev *pdev))
+{
+	int index;
+
+	mutex_lock(&vgasr_mutex);
+	/* don't do IGD vs DIS here */
+	if (vgasr_priv.registered_clients & 1)
+		index = 1;
+	else
+		index = 0;
+
+	vgasr_priv.clients[index].pwr_state = VGA_SWITCHEROO_ON;
+	vgasr_priv.clients[index].pdev = pdev;
+	vgasr_priv.clients[index].set_gpu_state = set_gpu_state;
+	vgasr_priv.clients[index].can_switch = can_switch;
+	vgasr_priv.clients[index].id = -1;
+	if (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW)
+		vgasr_priv.clients[index].active = true;
+
+	vgasr_priv.registered_clients |= (1 << index);
+
+	/* if we get two clients + handler */
+	if (vgasr_priv.registered_clients == 0x3 && vgasr_priv.handler) {
+		printk(KERN_INFO "vga_switcheroo: enabled\n");
+		vga_switcheroo_enable();
+	}
+	mutex_unlock(&vgasr_mutex);
+	return 0;
+}
+EXPORT_SYMBOL(vga_switcheroo_register_client);
+
+void vga_switcheroo_unregister_client(struct pci_dev *pdev)
+{
+	int i;
+
+	mutex_lock(&vgasr_mutex);
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		if (vgasr_priv.clients[i].pdev == pdev) {
+			vgasr_priv.registered_clients &= ~(1 << i);
+			break;
+		}
+	}
+
+	printk(KERN_INFO "vga_switcheroo: disabled\n");
+	vga_switcheroo_debugfs_fini(&vgasr_priv);
+	vgasr_priv.active = false;
+	mutex_unlock(&vgasr_mutex);
+}
+EXPORT_SYMBOL(vga_switcheroo_unregister_client);
+
+void vga_switcheroo_client_fb_set(struct pci_dev *pdev,
+				 struct fb_info *info)
+{
+	int i;
+
+	mutex_lock(&vgasr_mutex);
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		if (vgasr_priv.clients[i].pdev == pdev) {
+			vgasr_priv.clients[i].fb_info = info;
+			break;
+		}
+	}
+	mutex_unlock(&vgasr_mutex);
+}
+EXPORT_SYMBOL(vga_switcheroo_client_fb_set);
+
+static int vga_switcheroo_show(struct seq_file *m, void *v)
+{
+	int i;
+	mutex_lock(&vgasr_mutex);
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		seq_printf(m, "%d:%c:%s:%s\n", i,
+			   vgasr_priv.clients[i].active ? '+' : ' ',
+			   vgasr_priv.clients[i].pwr_state ? "Pwr" : "Off",
+			   pci_name(vgasr_priv.clients[i].pdev));
+	}
+	mutex_unlock(&vgasr_mutex);
+	return 0;
+}
+
+static int vga_switcheroo_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, vga_switcheroo_show, NULL);
+}
+
+static int vga_switchon(struct vga_switcheroo_client *client)
+{
+	int ret;
+
+	ret = vgasr_priv.handler->power_state(client->id, VGA_SWITCHEROO_ON);
+	/* call the driver callback to turn on device */
+	client->set_gpu_state(client->pdev, VGA_SWITCHEROO_ON);
+	client->pwr_state = VGA_SWITCHEROO_ON;
+	return 0;
+}
+
+static int vga_switchoff(struct vga_switcheroo_client *client)
+{
+	/* call the driver callback to turn off device */
+	client->set_gpu_state(client->pdev, VGA_SWITCHEROO_OFF);
+	vgasr_priv.handler->power_state(client->id, VGA_SWITCHEROO_OFF);
+	client->pwr_state = VGA_SWITCHEROO_OFF;
+	return 0;
+}
+
+static int vga_switchto(struct vga_switcheroo_client *new_client)
+{
+	int ret;
+	int i;
+	struct vga_switcheroo_client *active = NULL;
+
+	if (new_client->active == true)
+		return 0;
+
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		if (vgasr_priv.clients[i].active == true) {
+			active = &vgasr_priv.clients[i];
+			break;
+		}
+	}
+	if (!active)
+		return 0;
+
+	/* power up the first device */
+	ret = pci_enable_device(new_client->pdev);
+	if (ret)
+		return ret;
+
+	if (new_client->pwr_state == VGA_SWITCHEROO_OFF)
+		vga_switchon(new_client);
+
+	/* swap shadow resource to denote boot VGA device has changed so X starts on new device */
+	active->active = false;
+
+	active->pdev->resource[PCI_ROM_RESOURCE].flags &= ~IORESOURCE_ROM_SHADOW;
+	new_client->pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW;
+
+	if (new_client->fb_info) {
+		struct fb_event event;
+		event.info = new_client->fb_info;
+		fb_notifier_call_chain(FB_EVENT_REMAP_ALL_CONSOLE, &event);
+	}
+
+	ret = vgasr_priv.handler->switchto(new_client->id);
+	if (ret)
+		return ret;
+
+	if (active->pwr_state == VGA_SWITCHEROO_ON)
+		vga_switchoff(active);
+
+	new_client->active = true;
+	return 0;
+}
+
+static ssize_t
+vga_switcheroo_debugfs_write(struct file *filp, const char __user *ubuf,
+			     size_t cnt, loff_t *ppos)
+{
+	char usercmd[64];
+	const char *pdev_name;
+	int i, ret;
+	bool delay = false, can_switch;
+	int client_id = -1;
+	struct vga_switcheroo_client *client = NULL;
+
+	if (cnt > 63)
+		cnt = 63;
+
+	if (copy_from_user(usercmd, ubuf, cnt))
+		return -EFAULT;
+
+	mutex_lock(&vgasr_mutex);
+
+	if (!vgasr_priv.active)
+		return -EINVAL;
+
+	/* pwr off the device not in use */
+	if (strncmp(usercmd, "OFF", 3) == 0) {
+		for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+			if (vgasr_priv.clients[i].active)
+				continue;
+			if (vgasr_priv.clients[i].pwr_state == VGA_SWITCHEROO_ON)
+				vga_switchoff(&vgasr_priv.clients[i]);
+		}
+		goto out;
+	}
+	/* pwr on the device not in use */
+	if (strncmp(usercmd, "ON", 2) == 0) {
+		for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+			if (vgasr_priv.clients[i].active)
+				continue;
+			if (vgasr_priv.clients[i].pwr_state == VGA_SWITCHEROO_OFF)
+				vga_switchon(&vgasr_priv.clients[i]);
+		}
+		goto out;
+	}
+
+	/* request a delayed switch - test can we switch now */
+	if (strncmp(usercmd, "DIGD", 4) == 0) {
+		client_id = VGA_SWITCHEROO_IGD;
+		delay = true;
+	}
+
+	if (strncmp(usercmd, "DDIS", 4) == 0) {
+		client_id = VGA_SWITCHEROO_DIS;
+		delay = true;
+	}
+
+	if (strncmp(usercmd, "IGD", 3) == 0)
+		client_id = VGA_SWITCHEROO_IGD;
+
+	if (strncmp(usercmd, "DIS", 3) == 0)
+		client_id = VGA_SWITCHEROO_DIS;
+
+	if (client_id == -1)
+		goto out;
+
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		if (vgasr_priv.clients[i].id == client_id) {
+			client = &vgasr_priv.clients[i];
+			break;
+		}
+	}
+
+	vgasr_priv.delayed_switch_active = false;
+	/* okay we want a switch - test if devices are willing to switch */
+	can_switch = true;
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		can_switch = vgasr_priv.clients[i].can_switch(vgasr_priv.clients[i].pdev);
+		if (can_switch == false) {
+			printk(KERN_ERR "vga_switcheroo: client %d refused switch\n", i);
+			break;
+		}
+	}
+
+	if (can_switch == false && delay == false)
+		goto out;
+
+	if (can_switch == true) {
+		pdev_name = pci_name(client->pdev);
+		ret = vga_switchto(client);
+		if (ret)
+			printk(KERN_ERR "vga_switcheroo: switching failed %d\n", ret);
+	} else {
+		printk(KERN_INFO "vga_switcheroo: setting delayed switch to client %d\n", client->id);
+		vgasr_priv.delayed_switch_active = true;
+		vgasr_priv.delayed_client_id = client_id;
+
+		/* we should at least power up the card to
+		   make the switch faster */
+		if (client->pwr_state == VGA_SWITCHEROO_OFF)
+			vga_switchon(client);
+	}
+
+out:
+	mutex_unlock(&vgasr_mutex);
+	return cnt;
+}
+
+static const struct file_operations vga_switcheroo_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = vga_switcheroo_debugfs_open,
+	.write = vga_switcheroo_debugfs_write,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void vga_switcheroo_debugfs_fini(struct vgasr_priv *priv)
+{
+	if (priv->switch_file) {
+		debugfs_remove(priv->switch_file);
+		priv->switch_file = NULL;
+	}
+	if (priv->debugfs_root) {
+		debugfs_remove(priv->debugfs_root);
+		priv->debugfs_root = NULL;
+	}
+}
+
+static int vga_switcheroo_debugfs_init(struct vgasr_priv *priv)
+{
+	/* already initialised */
+	if (priv->debugfs_root)
+		return 0;
+	priv->debugfs_root = debugfs_create_dir("vgaswitcheroo", NULL);
+
+	if (!priv->debugfs_root) {
+		printk(KERN_ERR "vga_switcheroo: Cannot create /sys/kernel/debug/vgaswitcheroo\n");
+		goto fail;
+	}
+
+	priv->switch_file = debugfs_create_file("switch", 0644,
+						priv->debugfs_root, NULL, &vga_switcheroo_debugfs_fops);
+	if (!priv->switch_file) {
+		printk(KERN_ERR "vga_switcheroo: cannot create /sys/kernel/debug/vgaswitcheroo/switch\n");
+		goto fail;
+	}
+	return 0;
+fail:
+	vga_switcheroo_debugfs_fini(priv);
+	return -1;
+}
+
+int vga_switcheroo_process_delayed_switch(void)
+{
+	struct vga_switcheroo_client *client = NULL;
+	const char *pdev_name;
+	bool can_switch = true;
+	int i;
+	int ret;
+	int err = -EINVAL;
+
+	mutex_lock(&vgasr_mutex);
+	if (!vgasr_priv.delayed_switch_active)
+		goto err;
+
+	printk(KERN_INFO "vga_switcheroo: processing delayed switch to %d\n", vgasr_priv.delayed_client_id);
+
+	for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) {
+		if (vgasr_priv.clients[i].id == vgasr_priv.delayed_client_id)
+			client = &vgasr_priv.clients[i];
+		can_switch = vgasr_priv.clients[i].can_switch(vgasr_priv.clients[i].pdev);
+		if (can_switch == false) {
+			printk(KERN_ERR "vga_switcheroo: client %d refused switch\n", i);
+			break;
+		}
+	}
+
+	if (can_switch == false || client == NULL)
+		goto err;
+
+	pdev_name = pci_name(client->pdev);
+	ret = vga_switchto(client);
+	if (ret)
+		printk(KERN_ERR "vga_switcheroo: delayed switching failed %d\n", ret);
+
+	vgasr_priv.delayed_switch_active = false;
+	err = 0;
+err:
+	mutex_unlock(&vgasr_mutex);
+	return err;
+}
+EXPORT_SYMBOL(vga_switcheroo_process_delayed_switch);
+
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 3681c6a88212..b0a3fa00706d 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -3025,6 +3025,20 @@ static int fbcon_fb_unregistered(struct fb_info *info)
 	return 0;
 }
 
+static void fbcon_remap_all(int idx)
+{
+	int i;
+	for (i = first_fb_vc; i <= last_fb_vc; i++)
+		set_con2fb_map(i, idx, 0);
+
+	if (con_is_bound(&fb_con)) {
+		printk(KERN_INFO "fbcon: Remapping primary device, "
+		       "fb%i, to tty %i-%i\n", idx,
+		       first_fb_vc + 1, last_fb_vc + 1);
+		info_idx = idx;
+	}
+}
+
 #ifdef CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY
 static void fbcon_select_primary(struct fb_info *info)
 {
@@ -3225,6 +3239,10 @@ static int fbcon_event_notify(struct notifier_block *self,
 		caps = event->data;
 		fbcon_get_requirement(info, caps);
 		break;
+	case FB_EVENT_REMAP_ALL_CONSOLE:
+		idx = info->node;
+		fbcon_remap_all(idx);
+		break;
 	}
 done:
 	return ret;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 369767bd873e..c10163b4c40e 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -543,6 +543,8 @@ struct fb_cursor_user {
 #define FB_EVENT_GET_REQ                0x0D
 /*      Unbind from the console if possible */
 #define FB_EVENT_FB_UNBIND              0x0E
+/*      CONSOLE-SPECIFIC: remap all consoles to new fb - for vga switcheroo */
+#define FB_EVENT_REMAP_ALL_CONSOLE      0x0F
 
 struct fb_event {
 	struct fb_info *info;
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
new file mode 100644
index 000000000000..4b58ab1e8612
--- /dev/null
+++ b/include/linux/vga_switcheroo.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2010 Red Hat Inc.
+ * Author : Dave Airlie <airlied@redhat.com>
+ *
+ * Licensed under GPLv2
+ *
+ * vga_switcheroo.h - Support for laptop with dual GPU using one set of outputs
+ */
+
+#include <acpi/acpi.h>
+#include <linux/fb.h>
+
+enum vga_switcheroo_state {
+	VGA_SWITCHEROO_OFF,
+	VGA_SWITCHEROO_ON,
+};
+
+enum vga_switcheroo_client_id {
+	VGA_SWITCHEROO_IGD,
+	VGA_SWITCHEROO_DIS,
+	VGA_SWITCHEROO_MAX_CLIENTS,
+};
+
+struct vga_switcheroo_handler {
+	int (*switchto)(enum vga_switcheroo_client_id id);
+	int (*power_state)(enum vga_switcheroo_client_id id,
+			   enum vga_switcheroo_state state);
+	int (*init)(void);
+	int (*get_client_id)(struct pci_dev *pdev);
+};
+
+
+#if defined(CONFIG_VGA_SWITCHEROO)
+void vga_switcheroo_unregister_client(struct pci_dev *dev);
+int vga_switcheroo_register_client(struct pci_dev *dev,
+				   void (*set_gpu_state)(struct pci_dev *dev, enum vga_switcheroo_state),
+				   bool (*can_switch)(struct pci_dev *dev));
+
+void vga_switcheroo_client_fb_set(struct pci_dev *dev,
+				  struct fb_info *info);
+
+int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler);
+void vga_switcheroo_unregister_handler(void);
+
+int vga_switcheroo_process_delayed_switch(void);
+
+#else
+
+static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {}
+static inline int vga_switcheroo_register_client(struct pci_dev *dev,
+					  void (*set_gpu_state)(struct pci_dev *dev, enum vga_switcheroo_state),
+					  bool (*can_switch)(struct pci_dev *dev)) { return 0; }
+static inline void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_info *info) {}
+static inline int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler) { return 0; }
+static inline void vga_switcheroo_unregister_handler(void) {}
+static inline int vga_switcheroo_process_delayed_switch(void) { return 0; }
+
+#endif
-- 
cgit v1.2.3


From 8edb381d6705811b278527907a5ae2a9c4db8074 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 1 Mar 2010 21:50:01 +1100
Subject: vga_switcheroo: fix build on platforms with no ACPI

radeon was always including the atpx code unnecessarily, also core
switcheroo was including acpi headers.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_drv.h        |  5 +++++
 drivers/gpu/drm/radeon/Makefile              |  3 ++-
 drivers/gpu/drm/radeon/radeon.h              | 11 +++++++++++
 drivers/gpu/drm/radeon/radeon_atpx_handler.c |  1 -
 drivers/gpu/drm/radeon/radeon_drv.h          |  6 ++++++
 drivers/gpu/vga/vga_switcheroo.c             |  3 ---
 include/linux/vga_switcheroo.h               |  1 -
 7 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index a2e24f252e84..f5b3cbe7dc80 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -852,8 +852,13 @@ extern int  nouveau_dma_init(struct nouveau_channel *);
 extern int  nouveau_dma_wait(struct nouveau_channel *, int slots, int size);
 
 /* nouveau_acpi.c */
+#if defined(CONFIG_VGA_SWITCHEROO)
 void nouveau_register_dsm_handler(void);
 void nouveau_unregister_dsm_handler(void);
+#else
+static inline void nouveau_register_dsm_handler(void) {}
+static inline void nouveau_unregister_dsm_handler(void) {}
+#endif
 
 /* nouveau_backlight.c */
 #ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 0a4d526e4f44..0adf49eea7fa 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -60,8 +60,9 @@ radeon-y += radeon_device.o radeon_kms.o \
 	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \
 	r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \
 	r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \
-	evergreen.o radeon_atpx_handler.o
+	evergreen.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
+radeon-$(CONFIG_VGA_SWITCHEROO) += radone_atpx_handler.o
 
 obj-$(CONFIG_DRM_RADEON)+= radeon.o
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index ad9d55f94398..829e26e8a4bb 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -121,8 +121,19 @@ struct radeon_device;
  */
 #define ATRM_BIOS_PAGE 4096
 
+#if defined(CONFIG_VGA_SWITCHEROO)
 bool radeon_atrm_supported(struct pci_dev *pdev);
 int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len);
+#else
+static inline bool radeon_atrm_supported(struct pci_dev *pdev)
+{
+	return false;
+}
+
+static inline int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len){
+	return -EINVAL;
+}
+#endif
 bool radeon_get_bios(struct radeon_device *rdev);
 
 
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index 0ae52f19071d..3f557c4151e0 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -6,7 +6,6 @@
  *
  * ATPX support for both Intel/ATI
  */
-
 #include <linux/vga_switcheroo.h>
 #include <acpi/acpi.h>
 #include <acpi/acpi_bus.h>
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index 4fe16461bb1b..ec55f2b23c22 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -463,8 +463,14 @@ extern void r600_blit_swap(struct drm_device *dev,
 			   int w, int h, int src_pitch, int dst_pitch, int cpp);
 
 /* atpx handler */
+#if defined(CONFIG_VGA_SWITCHEROO)
 void radeon_register_atpx_handler(void);
 void radeon_unregister_atpx_handler(void);
+#else
+static inline void radeon_register_atpx_handler(void) {}
+static inline void radeon_unregister_atpx_handler(void) {}
+#endif
+
 /* Flags for stats.boxes
  */
 #define RADEON_BOX_DMA_IDLE      0x1
diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index a3f587a0aba9..d6d1149d525d 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -25,9 +25,6 @@
 #include <linux/debugfs.h>
 #include <linux/fb.h>
 
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
-
 #include <linux/pci.h>
 #include <linux/vga_switcheroo.h>
 
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index 4b58ab1e8612..ae9ab13b963d 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -7,7 +7,6 @@
  * vga_switcheroo.h - Support for laptop with dual GPU using one set of outputs
  */
 
-#include <acpi/acpi.h>
 #include <linux/fb.h>
 
 enum vga_switcheroo_state {
-- 
cgit v1.2.3


From 1ccaba3056796ab1f933736d763ffcd1958866cd Mon Sep 17 00:00:00 2001
From: Abhijith Das <adas@redhat.com>
Date: Thu, 10 Dec 2009 18:52:54 -0500
Subject: GFS2: Remove old, unused linked list code from quota

This is the kernel portion of the patch-set for upstream gfs2,
to remove the quota-linked-list stuff and replace it with
fiemap-based traversal of the quota file.

The corresponding userland fixes have been pushed to
STABLE3 and master branches of cluster.git and gfs2-utils.git
respectively (Refer Red Hat bug #536902).

Signed-off-by: Abhi Das <adas@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 include/linux/gfs2_ondisk.h | 30 +-----------------------------
 1 file changed, 1 insertion(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h
index 81f90a59cda6..4f4462974c14 100644
--- a/include/linux/gfs2_ondisk.h
+++ b/include/linux/gfs2_ondisk.h
@@ -179,33 +179,6 @@ struct gfs2_rgrp {
 	__u8 rg_reserved[80]; /* Several fields from gfs1 now reserved */
 };
 
-/*
- * quota linked list: user quotas and group quotas form two separate 
- * singly linked lists. ll_next stores uids or gids of next quotas in the 
- * linked list.
-
-Given the uid/gid, how to calculate the quota file offsets for the corresponding
-gfs2_quota structures on disk:
-
-for user quotas, given uid,
-offset = uid * sizeof(struct gfs2_quota);
-
-for group quotas, given gid,
-offset = (gid * sizeof(struct gfs2_quota)) + sizeof(struct gfs2_quota);
-
-
-  uid:0   gid:0       uid:12   gid:12      uid:17   gid:17     uid:5142 gid:5142
-+-------+-------+    +-------+-------+    +-------+- - - -+    +- - - -+-------+
-| valid | valid | :: | valid | valid | :: | valid | inval | :: | inval | valid |
-+-------+-------+    +-------+-------+    +-------+- - - -+    +- - - -+-------+
-next:12   next:12    next:17 next:5142    next:NULL                    next:NULL
-    |       |            |       |            |<-- user quota list         |
-     \______|___________/ \______|___________/         group quota list -->|
-            |                    |                                         |
-             \__________________/ \_______________________________________/
-
-*/
-
 /*
  * quota structure
  */
@@ -214,8 +187,7 @@ struct gfs2_quota {
 	__be64 qu_limit;
 	__be64 qu_warn;
 	__be64 qu_value;
-	__be32 qu_ll_next; /* location of next quota in list */
-	__u8 qu_reserved[60];
+	__u8 qu_reserved[64];
 };
 
 /*
-- 
cgit v1.2.3


From 46a26bf55714c1e2f17e34683292a389acb8e601 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:16 -0200
Subject: KVM: modify memslots layout in struct kvm

Have a pointer to an allocated region inside struct kvm.

[alex: fix ppc book 3s]

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c  | 10 ++++++----
 arch/powerpc/kvm/book3s.c |  2 +-
 arch/x86/kvm/mmu.c        | 11 ++++++-----
 arch/x86/kvm/vmx.c        |  4 ++--
 arch/x86/kvm/x86.c        |  4 ++--
 include/linux/kvm_host.h  | 12 ++++++++----
 virt/kvm/iommu.c          | 18 ++++++++++++------
 virt/kvm/kvm_main.c       | 36 +++++++++++++++++++++++-------------
 8 files changed, 60 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 5fdeec5fddcf..1ca1dbf48117 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1377,12 +1377,14 @@ static void free_kvm(struct kvm *kvm)
 
 static void kvm_release_vm_pages(struct kvm *kvm)
 {
+	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
 	int i, j;
 	unsigned long base_gfn;
 
-	for (i = 0; i < kvm->nmemslots; i++) {
-		memslot = &kvm->memslots[i];
+	slots = kvm->memslots;
+	for (i = 0; i < slots->nmemslots; i++) {
+		memslot = &slots->memslots[i];
 		base_gfn = memslot->base_gfn;
 
 		for (j = 0; j < memslot->npages; j++) {
@@ -1802,7 +1804,7 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
 	if (log->slot >= KVM_MEMORY_SLOTS)
 		goto out;
 
-	memslot = &kvm->memslots[log->slot];
+	memslot = &kvm->memslots->memslots[log->slot];
 	r = -ENOENT;
 	if (!memslot->dirty_bitmap)
 		goto out;
@@ -1840,7 +1842,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	/* If nothing is dirty, don't bother messing with page tables. */
 	if (is_dirty) {
 		kvm_flush_remote_tlbs(kvm);
-		memslot = &kvm->memslots[log->slot];
+		memslot = &kvm->memslots->memslots[log->slot];
 		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
 		memset(memslot->dirty_bitmap, 0, n);
 	}
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 803505d3e455..bb8873dcb20f 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -865,7 +865,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 
 	/* If nothing is dirty, don't bother messing with page tables. */
 	if (is_dirty) {
-		memslot = &kvm->memslots[log->slot];
+		memslot = &kvm->memslots->memslots[log->slot];
 
 		ga = memslot->base_gfn << PAGE_SHIFT;
 		ga_end = ga + (memslot->npages << PAGE_SHIFT);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4f499d7f7106..81f84d326a84 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -807,13 +807,14 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 {
 	int i, j;
 	int retval = 0;
+	struct kvm_memslots *slots = kvm->memslots;
 
 	/*
 	 * If mmap_sem isn't taken, we can look the memslots with only
 	 * the mmu_lock by skipping over the slots with userspace_addr == 0.
 	 */
-	for (i = 0; i < kvm->nmemslots; i++) {
-		struct kvm_memory_slot *memslot = &kvm->memslots[i];
+	for (i = 0; i < slots->nmemslots; i++) {
+		struct kvm_memory_slot *memslot = &slots->memslots[i];
 		unsigned long start = memslot->userspace_addr;
 		unsigned long end;
 
@@ -3021,8 +3022,8 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
 	unsigned int nr_mmu_pages;
 	unsigned int  nr_pages = 0;
 
-	for (i = 0; i < kvm->nmemslots; i++)
-		nr_pages += kvm->memslots[i].npages;
+	for (i = 0; i < kvm->memslots->nmemslots; i++)
+		nr_pages += kvm->memslots->memslots[i].npages;
 
 	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
 	nr_mmu_pages = max(nr_mmu_pages,
@@ -3295,7 +3296,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
 	int i, j, k;
 
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-		struct kvm_memory_slot *m = &vcpu->kvm->memslots[i];
+		struct kvm_memory_slot *m = &vcpu->kvm->memslots->memslots[i];
 		struct kvm_rmap_desc *d;
 
 		for (j = 0; j < m->npages; ++j) {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 74a66f0c00b4..18698799e365 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1503,8 +1503,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 static gva_t rmode_tss_base(struct kvm *kvm)
 {
 	if (!kvm->arch.tss_addr) {
-		gfn_t base_gfn = kvm->memslots[0].base_gfn +
-				 kvm->memslots[0].npages - 3;
+		gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn +
+				 kvm->memslots->memslots[0].npages - 3;
 		return base_gfn << PAGE_SHIFT;
 	}
 	return kvm->arch.tss_addr;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8798504ace11..3b81cb9da8b8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2427,7 +2427,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 		spin_lock(&kvm->mmu_lock);
 		kvm_mmu_slot_remove_write_access(kvm, log->slot);
 		spin_unlock(&kvm->mmu_lock);
-		memslot = &kvm->memslots[log->slot];
+		memslot = &kvm->memslots->memslots[log->slot];
 		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
 		memset(memslot->dirty_bitmap, 0, n);
 	}
@@ -5223,7 +5223,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 				int user_alloc)
 {
 	int npages = mem->memory_size >> PAGE_SHIFT;
-	struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
+	struct kvm_memory_slot *memslot = &kvm->memslots->memslots[mem->slot];
 
 	/*To keep backward compatibility with older userspace,
 	 *x86 needs to hanlde !user_alloc case.
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bd5a616d9373..782bfb185f8a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -150,14 +150,18 @@ struct kvm_irq_routing_table {};
 
 #endif
 
+struct kvm_memslots {
+	int nmemslots;
+	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
+					KVM_PRIVATE_MEM_SLOTS];
+};
+
 struct kvm {
 	spinlock_t mmu_lock;
 	spinlock_t requests_lock;
 	struct rw_semaphore slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
-	int nmemslots;
-	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
-					KVM_PRIVATE_MEM_SLOTS];
+	struct kvm_memslots *memslots;
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
 	u32 bsp_vcpu_id;
 	struct kvm_vcpu *bsp_vcpu;
@@ -482,7 +486,7 @@ static inline void kvm_guest_exit(void)
 
 static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
 {
-	return slot - kvm->memslots;
+	return slot - kvm->memslots->memslots;
 }
 
 static inline gpa_t gfn_to_gpa(gfn_t gfn)
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 15147583abd1..bc697a66a883 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -76,10 +76,13 @@ unmap_pages:
 static int kvm_iommu_map_memslots(struct kvm *kvm)
 {
 	int i, r = 0;
+	struct kvm_memslots *slots;
 
-	for (i = 0; i < kvm->nmemslots; i++) {
-		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
-					kvm->memslots[i].npages);
+	slots = kvm->memslots;
+
+	for (i = 0; i < slots->nmemslots; i++) {
+		r = kvm_iommu_map_pages(kvm, slots->memslots[i].base_gfn,
+					slots->memslots[i].npages);
 		if (r)
 			break;
 	}
@@ -210,10 +213,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
 static int kvm_iommu_unmap_memslots(struct kvm *kvm)
 {
 	int i;
+	struct kvm_memslots *slots;
+
+	slots = kvm->memslots;
 
-	for (i = 0; i < kvm->nmemslots; i++) {
-		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
-				    kvm->memslots[i].npages);
+	for (i = 0; i < slots->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
+				    slots->memslots[i].npages);
 	}
 
 	return 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bc23b8e0609b..86dd8f3d29c9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -375,12 +375,16 @@ static struct kvm *kvm_create_vm(void)
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
 
+	r = -ENOMEM;
+	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+	if (!kvm->memslots)
+		goto out_err;
+
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (!page) {
-		r = -ENOMEM;
+	if (!page)
 		goto out_err;
-	}
+
 	kvm->coalesced_mmio_ring =
 			(struct kvm_coalesced_mmio_ring *)page_address(page);
 #endif
@@ -416,6 +420,7 @@ out:
 out_err:
 	hardware_disable_all();
 out_err_nodisable:
+	kfree(kvm->memslots);
 	kfree(kvm);
 	return ERR_PTR(r);
 }
@@ -450,9 +455,12 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
 void kvm_free_physmem(struct kvm *kvm)
 {
 	int i;
+	struct kvm_memslots *slots = kvm->memslots;
+
+	for (i = 0; i < slots->nmemslots; ++i)
+		kvm_free_physmem_slot(&slots->memslots[i], NULL);
 
-	for (i = 0; i < kvm->nmemslots; ++i)
-		kvm_free_physmem_slot(&kvm->memslots[i], NULL);
+	kfree(kvm->memslots);
 }
 
 static void kvm_destroy_vm(struct kvm *kvm)
@@ -533,7 +541,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
 		goto out;
 
-	memslot = &kvm->memslots[mem->slot];
+	memslot = &kvm->memslots->memslots[mem->slot];
 	base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
 	npages = mem->memory_size >> PAGE_SHIFT;
 
@@ -554,7 +562,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	/* Check for overlaps */
 	r = -EEXIST;
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-		struct kvm_memory_slot *s = &kvm->memslots[i];
+		struct kvm_memory_slot *s = &kvm->memslots->memslots[i];
 
 		if (s == memslot || !s->npages)
 			continue;
@@ -656,8 +664,8 @@ skip_lpage:
 		kvm_arch_flush_shadow(kvm);
 
 	spin_lock(&kvm->mmu_lock);
-	if (mem->slot >= kvm->nmemslots)
-		kvm->nmemslots = mem->slot + 1;
+	if (mem->slot >= kvm->memslots->nmemslots)
+		kvm->memslots->nmemslots = mem->slot + 1;
 
 	*memslot = new;
 	spin_unlock(&kvm->mmu_lock);
@@ -727,7 +735,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
 	if (log->slot >= KVM_MEMORY_SLOTS)
 		goto out;
 
-	memslot = &kvm->memslots[log->slot];
+	memslot = &kvm->memslots->memslots[log->slot];
 	r = -ENOENT;
 	if (!memslot->dirty_bitmap)
 		goto out;
@@ -781,9 +789,10 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
+	struct kvm_memslots *slots = kvm->memslots;
 
-	for (i = 0; i < kvm->nmemslots; ++i) {
-		struct kvm_memory_slot *memslot = &kvm->memslots[i];
+	for (i = 0; i < slots->nmemslots; ++i) {
+		struct kvm_memory_slot *memslot = &slots->memslots[i];
 
 		if (gfn >= memslot->base_gfn
 		    && gfn < memslot->base_gfn + memslot->npages)
@@ -802,10 +811,11 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
+	struct kvm_memslots *slots = kvm->memslots;
 
 	gfn = unalias_gfn(kvm, gfn);
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-		struct kvm_memory_slot *memslot = &kvm->memslots[i];
+		struct kvm_memory_slot *memslot = &slots->memslots[i];
 
 		if (gfn >= memslot->base_gfn
 		    && gfn < memslot->base_gfn + memslot->npages)
-- 
cgit v1.2.3


From f7784b8ec9b6a041fa828cfbe9012fe51933f5ac Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:18 -0200
Subject: KVM: split kvm_arch_set_memory_region into prepare and commit

Required for SRCU convertion later.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c   | 16 +++++++++++----
 arch/powerpc/kvm/powerpc.c | 18 ++++++++++++----
 arch/s390/kvm/kvm-s390.c   | 25 +++++++++++++++--------
 arch/x86/kvm/x86.c         | 51 ++++++++++++++++++++++++++--------------------
 include/linux/kvm_host.h   |  7 ++++++-
 virt/kvm/kvm_main.c        | 12 +++++------
 6 files changed, 82 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 1ca1dbf48117..0757c7027986 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1578,15 +1578,15 @@ out:
 	return r;
 }
 
-int kvm_arch_set_memory_region(struct kvm *kvm,
-		struct kvm_userspace_memory_region *mem,
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+		struct kvm_memory_slot *memslot,
 		struct kvm_memory_slot old,
+		struct kvm_userspace_memory_region *mem,
 		int user_alloc)
 {
 	unsigned long i;
 	unsigned long pfn;
-	int npages = mem->memory_size >> PAGE_SHIFT;
-	struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
+	int npages = memslot->npages;
 	unsigned long base_gfn = memslot->base_gfn;
 
 	if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
@@ -1610,6 +1610,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 	return 0;
 }
 
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+		struct kvm_userspace_memory_region *mem,
+		struct kvm_memory_slot old,
+		int user_alloc)
+{
+	return;
+}
+
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
 	kvm_flush_remote_tlbs(kvm);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index f06cf93b178e..4633e7850dd2 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -165,14 +165,24 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-int kvm_arch_set_memory_region(struct kvm *kvm,
-                               struct kvm_userspace_memory_region *mem,
-                               struct kvm_memory_slot old,
-                               int user_alloc)
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+                                   struct kvm_memory_slot *memslot,
+                                   struct kvm_memory_slot old,
+                                   struct kvm_userspace_memory_region *mem,
+                                   int user_alloc)
 {
 	return 0;
 }
 
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+               struct kvm_userspace_memory_region *mem,
+               struct kvm_memory_slot old,
+               int user_alloc)
+{
+       return;
+}
+
+
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
 }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3fa0a10e4668..c8002193d9d4 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -690,14 +690,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 }
 
 /* Section: memory related */
-int kvm_arch_set_memory_region(struct kvm *kvm,
-				struct kvm_userspace_memory_region *mem,
-				struct kvm_memory_slot old,
-				int user_alloc)
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				   struct kvm_memory_slot *memslot,
+				   struct kvm_memory_slot old,
+				   struct kvm_userspace_memory_region *mem,
+				   int user_alloc)
 {
-	int i;
-	struct kvm_vcpu *vcpu;
-
 	/* A few sanity checks. We can have exactly one memory slot which has
 	   to start at guest virtual zero and which has to be located at a
 	   page boundary in userland and which has to end at a page boundary.
@@ -720,14 +718,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 	if (!user_alloc)
 		return -EINVAL;
 
+	return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem,
+				struct kvm_memory_slot old,
+				int user_alloc)
+{
+	int i;
+	struct kvm_vcpu *vcpu;
+
 	/* request update of sie control block for all available vcpus */
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
 			continue;
 		kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
 	}
-
-	return 0;
 }
 
 void kvm_arch_flush_shadow(struct kvm *kvm)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1ce833191430..43da65feed49 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5228,13 +5228,13 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kfree(kvm);
 }
 
-int kvm_arch_set_memory_region(struct kvm *kvm,
-				struct kvm_userspace_memory_region *mem,
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				struct kvm_memory_slot *memslot,
 				struct kvm_memory_slot old,
+				struct kvm_userspace_memory_region *mem,
 				int user_alloc)
 {
-	int npages = mem->memory_size >> PAGE_SHIFT;
-	struct kvm_memory_slot *memslot = &kvm->memslots->memslots[mem->slot];
+	int npages = memslot->npages;
 
 	/*To keep backward compatibility with older userspace,
 	 *x86 needs to hanlde !user_alloc case.
@@ -5254,26 +5254,35 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 			if (IS_ERR((void *)userspace_addr))
 				return PTR_ERR((void *)userspace_addr);
 
-			/* set userspace_addr atomically for kvm_hva_to_rmapp */
-			spin_lock(&kvm->mmu_lock);
 			memslot->userspace_addr = userspace_addr;
-			spin_unlock(&kvm->mmu_lock);
-		} else {
-			if (!old.user_alloc && old.rmap) {
-				int ret;
-
-				down_write(&current->mm->mmap_sem);
-				ret = do_munmap(current->mm, old.userspace_addr,
-						old.npages * PAGE_SIZE);
-				up_write(&current->mm->mmap_sem);
-				if (ret < 0)
-					printk(KERN_WARNING
-				       "kvm_vm_ioctl_set_memory_region: "
-				       "failed to munmap memory\n");
-			}
 		}
 	}
 
+
+	return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem,
+				struct kvm_memory_slot old,
+				int user_alloc)
+{
+
+	int npages = mem->memory_size >> PAGE_SHIFT;
+
+	if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
+		int ret;
+
+		down_write(&current->mm->mmap_sem);
+		ret = do_munmap(current->mm, old.userspace_addr,
+				old.npages * PAGE_SIZE);
+		up_write(&current->mm->mmap_sem);
+		if (ret < 0)
+			printk(KERN_WARNING
+			       "kvm_vm_ioctl_set_memory_region: "
+			       "failed to munmap memory\n");
+	}
+
 	spin_lock(&kvm->mmu_lock);
 	if (!kvm->arch.n_requested_mmu_pages) {
 		unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
@@ -5282,8 +5291,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 
 	kvm_mmu_slot_remove_write_access(kvm, mem->slot);
 	spin_unlock(&kvm->mmu_lock);
-
-	return 0;
 }
 
 void kvm_arch_flush_shadow(struct kvm *kvm)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 782bfb185f8a..3c44687b3425 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -253,7 +253,12 @@ int kvm_set_memory_region(struct kvm *kvm,
 int __kvm_set_memory_region(struct kvm *kvm,
 			    struct kvm_userspace_memory_region *mem,
 			    int user_alloc);
-int kvm_arch_set_memory_region(struct kvm *kvm,
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				struct kvm_memory_slot *memslot,
+				struct kvm_memory_slot old,
+				struct kvm_userspace_memory_region *mem,
+				int user_alloc);
+void kvm_arch_commit_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem,
 				struct kvm_memory_slot old,
 				int user_alloc);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 86dd8f3d29c9..c9f6cfe83120 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -663,6 +663,10 @@ skip_lpage:
 	if (!npages)
 		kvm_arch_flush_shadow(kvm);
 
+	r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
+	if (r)
+		goto out_free;
+
 	spin_lock(&kvm->mmu_lock);
 	if (mem->slot >= kvm->memslots->nmemslots)
 		kvm->memslots->nmemslots = mem->slot + 1;
@@ -670,13 +674,7 @@ skip_lpage:
 	*memslot = new;
 	spin_unlock(&kvm->mmu_lock);
 
-	r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc);
-	if (r) {
-		spin_lock(&kvm->mmu_lock);
-		*memslot = old;
-		spin_unlock(&kvm->mmu_lock);
-		goto out_free;
-	}
+	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
 
 	kvm_free_physmem_slot(&old, npages ? &new : NULL);
 	/* Slot deletion case: we have to update the current slot */
-- 
cgit v1.2.3


From 506f0d6f9c40ae7d9634acf3c26358810f42c24a Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:19 -0200
Subject: KVM: introduce gfn_to_pfn_memslot

Which takes a memslot pointer instead of using kvm->memslots.

To be used by SRCU convertion later.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/kvm_host.h |  2 ++
 virt/kvm/kvm_main.c      | 33 +++++++++++++++++++++++++--------
 2 files changed, 27 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3c44687b3425..f1f78deece10 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -273,6 +273,8 @@ void kvm_set_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
 
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
+pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
+			 struct kvm_memory_slot *slot, gfn_t gfn);
 void kvm_release_pfn_dirty(pfn_t);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c9f6cfe83120..4e2321c733f7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -835,21 +835,14 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
-pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
 {
 	struct page *page[1];
-	unsigned long addr;
 	int npages;
 	pfn_t pfn;
 
 	might_sleep();
 
-	addr = gfn_to_hva(kvm, gfn);
-	if (kvm_is_error_hva(addr)) {
-		get_page(bad_page);
-		return page_to_pfn(bad_page);
-	}
-
 	npages = get_user_pages_fast(addr, 1, 1, page);
 
 	if (unlikely(npages != 1)) {
@@ -874,8 +867,32 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
 	return pfn;
 }
 
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
+{
+	unsigned long addr;
+
+	addr = gfn_to_hva(kvm, gfn);
+	if (kvm_is_error_hva(addr)) {
+		get_page(bad_page);
+		return page_to_pfn(bad_page);
+	}
+
+	return hva_to_pfn(kvm, addr);
+}
 EXPORT_SYMBOL_GPL(gfn_to_pfn);
 
+static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
+{
+	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
+}
+
+pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
+			 struct kvm_memory_slot *slot, gfn_t gfn)
+{
+	unsigned long addr = gfn_to_hva_memslot(slot, gfn);
+	return hva_to_pfn(kvm, addr);
+}
+
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 {
 	pfn_t pfn;
-- 
cgit v1.2.3


From 3ad26d8139a82b0510b1e0435ee82ae461d33401 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:20 -0200
Subject: KVM: use gfn_to_pfn_memslot in kvm_iommu_map_pages

So its possible to iommu map a memslot before making it visible to
kvm.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/kvm_host.h |  3 +--
 virt/kvm/iommu.c         | 13 ++++++-------
 virt/kvm/kvm_main.c      |  2 +-
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f1f78deece10..9af240387fe6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -440,8 +440,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 #define KVM_IOMMU_CACHE_COHERENCY	0x1
 
 #ifdef CONFIG_IOMMU_API
-int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
-			unsigned long npages);
+int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
 int kvm_iommu_map_guest(struct kvm *kvm);
 int kvm_iommu_unmap_guest(struct kvm *kvm);
 int kvm_assign_device(struct kvm *kvm,
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index bc697a66a883..cf567d8033db 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -32,10 +32,10 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm);
 static void kvm_iommu_put_pages(struct kvm *kvm,
 				gfn_t base_gfn, unsigned long npages);
 
-int kvm_iommu_map_pages(struct kvm *kvm,
-			gfn_t base_gfn, unsigned long npages)
+int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 {
-	gfn_t gfn = base_gfn;
+	gfn_t gfn = slot->base_gfn;
+	unsigned long npages = slot->npages;
 	pfn_t pfn;
 	int i, r = 0;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
@@ -54,7 +54,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 		if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
 			continue;
 
-		pfn = gfn_to_pfn(kvm, gfn);
+		pfn = gfn_to_pfn_memslot(kvm, slot, gfn);
 		r = iommu_map_range(domain,
 				    gfn_to_gpa(gfn),
 				    pfn_to_hpa(pfn),
@@ -69,7 +69,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 	return 0;
 
 unmap_pages:
-	kvm_iommu_put_pages(kvm, base_gfn, i);
+	kvm_iommu_put_pages(kvm, slot->base_gfn, i);
 	return r;
 }
 
@@ -81,8 +81,7 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
 	slots = kvm->memslots;
 
 	for (i = 0; i < slots->nmemslots; i++) {
-		r = kvm_iommu_map_pages(kvm, slots->memslots[i].base_gfn,
-					slots->memslots[i].npages);
+		r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
 		if (r)
 			break;
 	}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4e2321c733f7..87d296d8b270 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -684,7 +684,7 @@ skip_lpage:
 	spin_unlock(&kvm->mmu_lock);
 #ifdef CONFIG_DMAR
 	/* map the pages in iommu page table */
-	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
+	r = kvm_iommu_map_pages(kvm, memslot);
 	if (r)
 		goto out;
 #endif
-- 
cgit v1.2.3


From bc6678a33d9b952981a8e44a4f876c3ad64ca4d8 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:21 -0200
Subject: KVM: introduce kvm->srcu and convert kvm_set_memory_region to SRCU
 update

Use two steps for memslot deletion: mark the slot invalid (which stops
instantiation of new shadow pages for that slot, but allows destruction),
then instantiate the new empty slot.

Also simplifies kvm_handle_hva locking.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c |   4 +-
 arch/x86/kvm/mmu.c       |  28 +++++-----
 arch/x86/kvm/vmx.c       |   6 +-
 include/linux/kvm.h      |   2 +-
 include/linux/kvm_host.h |   7 +--
 virt/kvm/assigned-dev.c  |   8 +--
 virt/kvm/iommu.c         |   4 +-
 virt/kvm/kvm_main.c      | 141 +++++++++++++++++++++++++++++++++++------------
 8 files changed, 136 insertions(+), 64 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 0757c7027986..b2e4d16dd39e 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1382,7 +1382,7 @@ static void kvm_release_vm_pages(struct kvm *kvm)
 	int i, j;
 	unsigned long base_gfn;
 
-	slots = kvm->memslots;
+	slots = rcu_dereference(kvm->memslots);
 	for (i = 0; i < slots->nmemslots; i++) {
 		memslot = &slots->memslots[i];
 		base_gfn = memslot->base_gfn;
@@ -1837,6 +1837,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	struct kvm_memory_slot *memslot;
 	int is_dirty = 0;
 
+	down_write(&kvm->slots_lock);
 	spin_lock(&kvm->arch.dirty_log_lock);
 
 	r = kvm_ia64_sync_dirty_log(kvm, log);
@@ -1856,6 +1857,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	}
 	r = 0;
 out:
+	up_write(&kvm->slots_lock);
 	spin_unlock(&kvm->arch.dirty_log_lock);
 	return r;
 }
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 81f84d326a84..f8bf42a25995 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -29,6 +29,7 @@
 #include <linux/swap.h>
 #include <linux/hugetlb.h>
 #include <linux/compiler.h>
+#include <linux/srcu.h>
 
 #include <asm/page.h>
 #include <asm/cmpxchg.h>
@@ -807,21 +808,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 {
 	int i, j;
 	int retval = 0;
-	struct kvm_memslots *slots = kvm->memslots;
+	struct kvm_memslots *slots;
+
+	slots = rcu_dereference(kvm->memslots);
 
-	/*
-	 * If mmap_sem isn't taken, we can look the memslots with only
-	 * the mmu_lock by skipping over the slots with userspace_addr == 0.
-	 */
 	for (i = 0; i < slots->nmemslots; i++) {
 		struct kvm_memory_slot *memslot = &slots->memslots[i];
 		unsigned long start = memslot->userspace_addr;
 		unsigned long end;
 
-		/* mmu_lock protects userspace_addr */
-		if (!start)
-			continue;
-
 		end = start + (memslot->npages << PAGE_SHIFT);
 		if (hva >= start && hva < end) {
 			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
@@ -1617,7 +1612,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
 
 static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
 {
-	int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
+	int slot = memslot_id(kvm, gfn);
 	struct kvm_mmu_page *sp = page_header(__pa(pte));
 
 	__set_bit(slot, sp->slot_bitmap);
@@ -3021,9 +3016,11 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
 	int i;
 	unsigned int nr_mmu_pages;
 	unsigned int  nr_pages = 0;
+	struct kvm_memslots *slots;
 
-	for (i = 0; i < kvm->memslots->nmemslots; i++)
-		nr_pages += kvm->memslots->memslots[i].npages;
+	slots = rcu_dereference(kvm->memslots);
+	for (i = 0; i < slots->nmemslots; i++)
+		nr_pages += slots->memslots[i].npages;
 
 	nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
 	nr_mmu_pages = max(nr_mmu_pages,
@@ -3293,10 +3290,12 @@ static void audit_mappings(struct kvm_vcpu *vcpu)
 static int count_rmaps(struct kvm_vcpu *vcpu)
 {
 	int nmaps = 0;
-	int i, j, k;
+	int i, j, k, idx;
 
+	idx = srcu_read_lock(&kvm->srcu);
+	slots = rcu_dereference(kvm->memslots);
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-		struct kvm_memory_slot *m = &vcpu->kvm->memslots->memslots[i];
+		struct kvm_memory_slot *m = &slots->memslots[i];
 		struct kvm_rmap_desc *d;
 
 		for (j = 0; j < m->npages; ++j) {
@@ -3319,6 +3318,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
 			}
 		}
 	}
+	srcu_read_unlock(&kvm->srcu, idx);
 	return nmaps;
 }
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 18698799e365..f1cae7d6113d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1503,7 +1503,11 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 static gva_t rmode_tss_base(struct kvm *kvm)
 {
 	if (!kvm->arch.tss_addr) {
-		gfn_t base_gfn = kvm->memslots->memslots[0].base_gfn +
+		struct kvm_memslots *slots;
+		gfn_t base_gfn;
+
+		slots = rcu_dereference(kvm->memslots);
+		base_gfn = kvm->memslots->memslots[0].base_gfn +
 				 kvm->memslots->memslots[0].npages - 3;
 		return base_gfn << PAGE_SHIFT;
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index a24de0b1858e..f2feef68ffd6 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -103,7 +103,7 @@ struct kvm_userspace_memory_region {
 
 /* for kvm_memory_region::flags */
 #define KVM_MEM_LOG_DIRTY_PAGES  1UL
-
+#define KVM_MEMSLOT_INVALID      (1UL << 1)
 
 /* for KVM_IRQ_LINE */
 struct kvm_irq_level {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9af240387fe6..93bd30701ca7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -162,6 +162,7 @@ struct kvm {
 	struct rw_semaphore slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots *memslots;
+	struct srcu_struct srcu;
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
 	u32 bsp_vcpu_id;
 	struct kvm_vcpu *bsp_vcpu;
@@ -275,6 +276,7 @@ void kvm_set_page_accessed(struct page *page);
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
 pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
 			 struct kvm_memory_slot *slot, gfn_t gfn);
+int memslot_id(struct kvm *kvm, gfn_t gfn);
 void kvm_release_pfn_dirty(pfn_t);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
@@ -490,11 +492,6 @@ static inline void kvm_guest_exit(void)
 	current->flags &= ~PF_VCPU;
 }
 
-static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
-	return slot - kvm->memslots->memslots;
-}
-
 static inline gpa_t gfn_to_gpa(gfn_t gfn)
 {
 	return (gpa_t)gfn << PAGE_SHIFT;
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index f73de631e3ee..f51e684dd238 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -504,12 +504,12 @@ out:
 static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 				      struct kvm_assigned_pci_dev *assigned_dev)
 {
-	int r = 0;
+	int r = 0, idx;
 	struct kvm_assigned_dev_kernel *match;
 	struct pci_dev *dev;
 
 	mutex_lock(&kvm->lock);
-	down_read(&kvm->slots_lock);
+	idx = srcu_read_lock(&kvm->srcu);
 
 	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 				      assigned_dev->assigned_dev_id);
@@ -573,7 +573,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 	}
 
 out:
-	up_read(&kvm->slots_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 	mutex_unlock(&kvm->lock);
 	return r;
 out_list_del:
@@ -585,7 +585,7 @@ out_put:
 	pci_dev_put(dev);
 out_free:
 	kfree(match);
-	up_read(&kvm->slots_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 	mutex_unlock(&kvm->lock);
 	return r;
 }
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index cf567d8033db..65a51432c8e5 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -78,7 +78,7 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
 	int i, r = 0;
 	struct kvm_memslots *slots;
 
-	slots = kvm->memslots;
+	slots = rcu_dereference(kvm->memslots);
 
 	for (i = 0; i < slots->nmemslots; i++) {
 		r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
@@ -214,7 +214,7 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm)
 	int i;
 	struct kvm_memslots *slots;
 
-	slots = kvm->memslots;
+	slots = rcu_dereference(kvm->memslots);
 
 	for (i = 0; i < slots->nmemslots; i++) {
 		kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 87d296d8b270..2bb24a814fdf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -44,6 +44,7 @@
 #include <linux/bitops.h>
 #include <linux/spinlock.h>
 #include <linux/compat.h>
+#include <linux/srcu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -213,7 +214,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
 					     unsigned long address)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
-	int need_tlb_flush;
+	int need_tlb_flush, idx;
 
 	/*
 	 * When ->invalidate_page runs, the linux pte has been zapped
@@ -233,10 +234,12 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
 	 * pte after kvm_unmap_hva returned, without noticing the page
 	 * is going to be freed.
 	 */
+	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
 	kvm->mmu_notifier_seq++;
 	need_tlb_flush = kvm_unmap_hva(kvm, address);
 	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 
 	/* we've to flush the tlb before the pages can be freed */
 	if (need_tlb_flush)
@@ -250,11 +253,14 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
 					pte_t pte)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
+	int idx;
 
+	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
 	kvm->mmu_notifier_seq++;
 	kvm_set_spte_hva(kvm, address, pte);
 	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 }
 
 static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
@@ -263,8 +269,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 						    unsigned long end)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
-	int need_tlb_flush = 0;
+	int need_tlb_flush = 0, idx;
 
+	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
 	/*
 	 * The count increase must become visible at unlock time as no
@@ -275,6 +282,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 	for (; start < end; start += PAGE_SIZE)
 		need_tlb_flush |= kvm_unmap_hva(kvm, start);
 	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 
 	/* we've to flush the tlb before the pages can be freed */
 	if (need_tlb_flush)
@@ -312,11 +320,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
 					      unsigned long address)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
-	int young;
+	int young, idx;
 
+	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
 	young = kvm_age_hva(kvm, address);
 	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
 
 	if (young)
 		kvm_flush_remote_tlbs(kvm);
@@ -379,11 +389,15 @@ static struct kvm *kvm_create_vm(void)
 	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
 	if (!kvm->memslots)
 		goto out_err;
+	if (init_srcu_struct(&kvm->srcu))
+		goto out_err;
 
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (!page)
+	if (!page) {
+		cleanup_srcu_struct(&kvm->srcu);
 		goto out_err;
+	}
 
 	kvm->coalesced_mmio_ring =
 			(struct kvm_coalesced_mmio_ring *)page_address(page);
@@ -391,6 +405,7 @@ static struct kvm *kvm_create_vm(void)
 
 	r = kvm_init_mmu_notifier(kvm);
 	if (r) {
+		cleanup_srcu_struct(&kvm->srcu);
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 		put_page(page);
 #endif
@@ -480,6 +495,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 #else
 	kvm_arch_flush_shadow(kvm);
 #endif
+	cleanup_srcu_struct(&kvm->srcu);
 	kvm_arch_destroy_vm(kvm);
 	hardware_disable_all();
 	mmdrop(mm);
@@ -521,12 +537,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
 			    struct kvm_userspace_memory_region *mem,
 			    int user_alloc)
 {
-	int r;
+	int r, flush_shadow = 0;
 	gfn_t base_gfn;
 	unsigned long npages;
 	unsigned long i;
 	struct kvm_memory_slot *memslot;
 	struct kvm_memory_slot old, new;
+	struct kvm_memslots *slots, *old_memslots;
 
 	r = -EINVAL;
 	/* General sanity checks */
@@ -588,15 +605,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		memset(new.rmap, 0, npages * sizeof(*new.rmap));
 
 		new.user_alloc = user_alloc;
-		/*
-		 * hva_to_rmmap() serialzies with the mmu_lock and to be
-		 * safe it has to ignore memslots with !user_alloc &&
-		 * !userspace_addr.
-		 */
-		if (user_alloc)
-			new.userspace_addr = mem->userspace_addr;
-		else
-			new.userspace_addr = 0;
+		new.userspace_addr = mem->userspace_addr;
 	}
 	if (!npages)
 		goto skip_lpage;
@@ -651,8 +660,9 @@ skip_lpage:
 		if (!new.dirty_bitmap)
 			goto out_free;
 		memset(new.dirty_bitmap, 0, dirty_bytes);
+		/* destroy any largepage mappings for dirty tracking */
 		if (old.npages)
-			kvm_arch_flush_shadow(kvm);
+			flush_shadow = 1;
 	}
 #else  /* not defined CONFIG_S390 */
 	new.user_alloc = user_alloc;
@@ -660,34 +670,72 @@ skip_lpage:
 		new.userspace_addr = mem->userspace_addr;
 #endif /* not defined CONFIG_S390 */
 
-	if (!npages)
+	if (!npages) {
+		r = -ENOMEM;
+		slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+		if (!slots)
+			goto out_free;
+		memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+		if (mem->slot >= slots->nmemslots)
+			slots->nmemslots = mem->slot + 1;
+		slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
+
+		old_memslots = kvm->memslots;
+		rcu_assign_pointer(kvm->memslots, slots);
+		synchronize_srcu_expedited(&kvm->srcu);
+		/* From this point no new shadow pages pointing to a deleted
+		 * memslot will be created.
+		 *
+		 * validation of sp->gfn happens in:
+		 * 	- gfn_to_hva (kvm_read_guest, gfn_to_pfn)
+		 * 	- kvm_is_visible_gfn (mmu_check_roots)
+		 */
 		kvm_arch_flush_shadow(kvm);
+		kfree(old_memslots);
+	}
 
 	r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
 	if (r)
 		goto out_free;
 
-	spin_lock(&kvm->mmu_lock);
-	if (mem->slot >= kvm->memslots->nmemslots)
-		kvm->memslots->nmemslots = mem->slot + 1;
+#ifdef CONFIG_DMAR
+	/* map the pages in iommu page table */
+	if (npages) {
+		r = kvm_iommu_map_pages(kvm, &new);
+		if (r)
+			goto out_free;
+	}
+#endif
 
-	*memslot = new;
-	spin_unlock(&kvm->mmu_lock);
+	r = -ENOMEM;
+	slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+	if (!slots)
+		goto out_free;
+	memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
+	if (mem->slot >= slots->nmemslots)
+		slots->nmemslots = mem->slot + 1;
+
+	/* actual memory is freed via old in kvm_free_physmem_slot below */
+	if (!npages) {
+		new.rmap = NULL;
+		new.dirty_bitmap = NULL;
+		for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
+			new.lpage_info[i] = NULL;
+	}
+
+	slots->memslots[mem->slot] = new;
+	old_memslots = kvm->memslots;
+	rcu_assign_pointer(kvm->memslots, slots);
+	synchronize_srcu_expedited(&kvm->srcu);
 
 	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
 
-	kvm_free_physmem_slot(&old, npages ? &new : NULL);
-	/* Slot deletion case: we have to update the current slot */
-	spin_lock(&kvm->mmu_lock);
-	if (!npages)
-		*memslot = old;
-	spin_unlock(&kvm->mmu_lock);
-#ifdef CONFIG_DMAR
-	/* map the pages in iommu page table */
-	r = kvm_iommu_map_pages(kvm, memslot);
-	if (r)
-		goto out;
-#endif
+	kvm_free_physmem_slot(&old, &new);
+	kfree(old_memslots);
+
+	if (flush_shadow)
+		kvm_arch_flush_shadow(kvm);
+
 	return 0;
 
 out_free:
@@ -787,7 +835,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
-	struct kvm_memslots *slots = kvm->memslots;
+	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
 
 	for (i = 0; i < slots->nmemslots; ++i) {
 		struct kvm_memory_slot *memslot = &slots->memslots[i];
@@ -809,12 +857,15 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
-	struct kvm_memslots *slots = kvm->memslots;
+	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
 
 	gfn = unalias_gfn(kvm, gfn);
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
 		struct kvm_memory_slot *memslot = &slots->memslots[i];
 
+		if (memslot->flags & KVM_MEMSLOT_INVALID)
+			continue;
+
 		if (gfn >= memslot->base_gfn
 		    && gfn < memslot->base_gfn + memslot->npages)
 			return 1;
@@ -823,13 +874,31 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
 
+int memslot_id(struct kvm *kvm, gfn_t gfn)
+{
+	int i;
+	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
+	struct kvm_memory_slot *memslot = NULL;
+
+	gfn = unalias_gfn(kvm, gfn);
+	for (i = 0; i < slots->nmemslots; ++i) {
+		memslot = &slots->memslots[i];
+
+		if (gfn >= memslot->base_gfn
+		    && gfn < memslot->base_gfn + memslot->npages)
+			break;
+	}
+
+	return memslot - slots->memslots;
+}
+
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 {
 	struct kvm_memory_slot *slot;
 
 	gfn = unalias_gfn(kvm, gfn);
 	slot = gfn_to_memslot_unaliased(kvm, gfn);
-	if (!slot)
+	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
 		return bad_hva();
 	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
 }
-- 
cgit v1.2.3


From a983fb238728e1123177e8058d4f644b949a7d05 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:23 -0200
Subject: KVM: x86: switch kvm_set_memory_alias to SRCU update

Using a similar two-step procedure as for memslots.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  4 +++
 arch/x86/kvm/x86.c              | 60 ++++++++++++++++++++++++++++++++++-------
 include/linux/kvm_host.h        |  6 +++++
 virt/kvm/kvm_main.c             |  4 +--
 4 files changed, 63 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7cdcb3d0f770..6c8c7c578c46 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -368,8 +368,12 @@ struct kvm_mem_alias {
 	gfn_t base_gfn;
 	unsigned long npages;
 	gfn_t target_gfn;
+#define KVM_ALIAS_INVALID     1UL
+	unsigned long flags;
 };
 
+#define KVM_ARCH_HAS_UNALIAS_INSTANTIATION
+
 struct kvm_mem_aliases {
 	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
 	int naliases;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e7488350ca16..28127c936c3b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -38,6 +38,7 @@
 #include <linux/intel-iommu.h>
 #include <linux/cpufreq.h>
 #include <linux/user-return-notifier.h>
+#include <linux/srcu.h>
 #include <trace/events/kvm.h>
 #undef TRACE_INCLUDE_FILE
 #define CREATE_TRACE_POINTS
@@ -2223,11 +2224,32 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
 	return kvm->arch.n_alloc_mmu_pages;
 }
 
+gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn)
+{
+	int i;
+	struct kvm_mem_alias *alias;
+	struct kvm_mem_aliases *aliases;
+
+	aliases = rcu_dereference(kvm->arch.aliases);
+
+	for (i = 0; i < aliases->naliases; ++i) {
+		alias = &aliases->aliases[i];
+		if (alias->flags & KVM_ALIAS_INVALID)
+			continue;
+		if (gfn >= alias->base_gfn
+		    && gfn < alias->base_gfn + alias->npages)
+			return alias->target_gfn + gfn - alias->base_gfn;
+	}
+	return gfn;
+}
+
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
 	struct kvm_mem_alias *alias;
-	struct kvm_mem_aliases *aliases = kvm->arch.aliases;
+	struct kvm_mem_aliases *aliases;
+
+	aliases = rcu_dereference(kvm->arch.aliases);
 
 	for (i = 0; i < aliases->naliases; ++i) {
 		alias = &aliases->aliases[i];
@@ -2248,7 +2270,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 {
 	int r, n;
 	struct kvm_mem_alias *p;
-	struct kvm_mem_aliases *aliases;
+	struct kvm_mem_aliases *aliases, *old_aliases;
 
 	r = -EINVAL;
 	/* General sanity checks */
@@ -2265,28 +2287,48 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 	    < alias->target_phys_addr)
 		goto out;
 
+	r = -ENOMEM;
+	aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
+	if (!aliases)
+		goto out;
+
 	down_write(&kvm->slots_lock);
-	spin_lock(&kvm->mmu_lock);
 
-	aliases = kvm->arch.aliases;
+	/* invalidate any gfn reference in case of deletion/shrinking */
+	memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
+	aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID;
+	old_aliases = kvm->arch.aliases;
+	rcu_assign_pointer(kvm->arch.aliases, aliases);
+	synchronize_srcu_expedited(&kvm->srcu);
+	kvm_mmu_zap_all(kvm);
+	kfree(old_aliases);
+
+	r = -ENOMEM;
+	aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
+	if (!aliases)
+		goto out_unlock;
+
+	memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
 
 	p = &aliases->aliases[alias->slot];
 	p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
 	p->npages = alias->memory_size >> PAGE_SHIFT;
 	p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
+	p->flags &= ~(KVM_ALIAS_INVALID);
 
 	for (n = KVM_ALIAS_SLOTS; n > 0; --n)
 		if (aliases->aliases[n - 1].npages)
 			break;
 	aliases->naliases = n;
 
-	spin_unlock(&kvm->mmu_lock);
-	kvm_mmu_zap_all(kvm);
+	old_aliases = kvm->arch.aliases;
+	rcu_assign_pointer(kvm->arch.aliases, aliases);
+	synchronize_srcu_expedited(&kvm->srcu);
+	kfree(old_aliases);
+	r = 0;
 
+out_unlock:
 	up_write(&kvm->slots_lock);
-
-	return 0;
-
 out:
 	return r;
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 93bd30701ca7..20941c0f4045 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -266,6 +266,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 void kvm_disable_largepages(void);
 void kvm_arch_flush_shadow(struct kvm *kvm);
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
+gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn);
+
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
 void kvm_release_page_clean(struct page *page);
@@ -539,6 +541,10 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se
 }
 #endif
 
+#ifndef KVM_ARCH_HAS_UNALIAS_INSTANTIATION
+#define unalias_gfn_instantiation unalias_gfn
+#endif
+
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 
 #define KVM_MAX_IRQ_ROUTES 1024
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2bb24a814fdf..c680f7b64c6f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -859,7 +859,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 	int i;
 	struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
 
-	gfn = unalias_gfn(kvm, gfn);
+	gfn = unalias_gfn_instantiation(kvm, gfn);
 	for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
 		struct kvm_memory_slot *memslot = &slots->memslots[i];
 
@@ -896,7 +896,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 {
 	struct kvm_memory_slot *slot;
 
-	gfn = unalias_gfn(kvm, gfn);
+	gfn = unalias_gfn_instantiation(kvm, gfn);
 	slot = gfn_to_memslot_unaliased(kvm, gfn);
 	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
 		return bad_hva();
-- 
cgit v1.2.3


From e93f8a0f821e290ac5149830110a5f704db7a1fc Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:24 -0200
Subject: KVM: convert io_bus to SRCU

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c  |   4 +-
 arch/x86/kvm/i8254.c      |   6 +--
 arch/x86/kvm/i8259.c      |   4 +-
 arch/x86/kvm/x86.c        |  13 +++---
 include/linux/kvm_host.h  |  27 ++++++------
 virt/kvm/coalesced_mmio.c |   4 +-
 virt/kvm/eventfd.c        |   8 ++--
 virt/kvm/ioapic.c         |   4 +-
 virt/kvm/kvm_main.c       | 106 +++++++++++++++++++++++++++-------------------
 9 files changed, 101 insertions(+), 75 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index b2e4d16dd39e..d0ad538f0083 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -241,10 +241,10 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	return 0;
 mmio:
 	if (p->dir)
-		r = kvm_io_bus_read(&vcpu->kvm->mmio_bus, p->addr,
+		r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
 				    p->size, &p->data);
 	else
-		r = kvm_io_bus_write(&vcpu->kvm->mmio_bus, p->addr,
+		r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
 				     p->size, &p->data);
 	if (r)
 		printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 15578f180e59..4b433de02e5b 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -645,13 +645,13 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
 
 	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
-	ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev);
 	if (ret < 0)
 		goto fail;
 
 	if (flags & KVM_PIT_SPEAKER_DUMMY) {
 		kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
-		ret = __kvm_io_bus_register_dev(&kvm->pio_bus,
+		ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
 						&pit->speaker_dev);
 		if (ret < 0)
 			goto fail_unregister;
@@ -660,7 +660,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 	return pit;
 
 fail_unregister:
-	__kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev);
+	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
 
 fail:
 	if (pit->irq_source_id >= 0)
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index d057c0cbd245..b7d145b20953 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -533,7 +533,9 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
 	 * Initialize PIO device
 	 */
 	kvm_iodevice_init(&s->dev, &picdev_ops);
-	ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev);
+	down_write(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
+	up_write(&kvm->slots_lock);
 	if (ret < 0) {
 		kfree(s);
 		return NULL;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 28127c936c3b..9b42673df4af 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2851,7 +2851,7 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
 	    !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
 		return 0;
 
-	return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v);
+	return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
 }
 
 static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
@@ -2860,7 +2860,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
 	    !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
 		return 0;
 
-	return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v);
+	return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
 }
 
 static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
@@ -3345,11 +3345,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
 	int r;
 
 	if (vcpu->arch.pio.in)
-		r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
+		r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
 				    vcpu->arch.pio.size, pd);
 	else
-		r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
-				     vcpu->arch.pio.size, pd);
+		r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
+				     vcpu->arch.pio.port, vcpu->arch.pio.size,
+				     pd);
 	return r;
 }
 
@@ -3360,7 +3361,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu)
 	int i, r = 0;
 
 	for (i = 0; i < io->cur_count; i++) {
-		if (kvm_io_bus_write(&vcpu->kvm->pio_bus,
+		if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
 				     io->port, io->size, pd)) {
 			r = -EOPNOTSUPP;
 			break;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 20941c0f4045..5e9cb902550b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -57,20 +57,20 @@ struct kvm_io_bus {
 	struct kvm_io_device *devs[NR_IOBUS_DEVS];
 };
 
-void kvm_io_bus_init(struct kvm_io_bus *bus);
-void kvm_io_bus_destroy(struct kvm_io_bus *bus);
-int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, int len,
-		     const void *val);
-int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len,
+enum kvm_bus {
+	KVM_MMIO_BUS,
+	KVM_PIO_BUS,
+	KVM_NR_BUSES
+};
+
+int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+		     int len, const void *val);
+int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
 		    void *val);
-int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
-			       struct kvm_io_device *dev);
-int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 			    struct kvm_io_device *dev);
-void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
-				 struct kvm_io_device *dev);
-void kvm_io_bus_unregister_dev(struct kvm *kvm, struct kvm_io_bus *bus,
-			       struct kvm_io_device *dev);
+int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+			      struct kvm_io_device *dev);
 
 struct kvm_vcpu {
 	struct kvm *kvm;
@@ -171,8 +171,7 @@ struct kvm {
 	atomic_t online_vcpus;
 	struct list_head vm_list;
 	struct mutex lock;
-	struct kvm_io_bus mmio_bus;
-	struct kvm_io_bus pio_bus;
+	struct kvm_io_bus *buses[KVM_NR_BUSES];
 #ifdef CONFIG_HAVE_KVM_EVENTFD
 	struct {
 		spinlock_t        lock;
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index d68e6c68e0ff..a736a93ca7b7 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -110,7 +110,9 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
 	dev->kvm = kvm;
 	kvm->coalesced_mmio_dev = dev;
 
-	ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev);
+	down_write(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
+	up_write(&kvm->slots_lock);
 	if (ret < 0)
 		goto out_free_dev;
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a9d3fc6c681c..315a586ec4d5 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -463,7 +463,7 @@ static int
 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
 	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
-	struct kvm_io_bus        *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
+	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
 	struct _ioeventfd        *p;
 	struct eventfd_ctx       *eventfd;
 	int                       ret;
@@ -518,7 +518,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 	kvm_iodevice_init(&p->dev, &ioeventfd_ops);
 
-	ret = __kvm_io_bus_register_dev(bus, &p->dev);
+	ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev);
 	if (ret < 0)
 		goto unlock_fail;
 
@@ -542,7 +542,7 @@ static int
 kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
 	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
-	struct kvm_io_bus        *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
+	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
 	struct _ioeventfd        *p, *tmp;
 	struct eventfd_ctx       *eventfd;
 	int                       ret = -ENOENT;
@@ -565,7 +565,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 		if (!p->wildcard && p->datamatch != args->datamatch)
 			continue;
 
-		__kvm_io_bus_unregister_dev(bus, &p->dev);
+		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
 		ioeventfd_release(p);
 		ret = 0;
 		break;
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 38a2d20b89de..f326a6f301cc 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -372,7 +372,9 @@ int kvm_ioapic_init(struct kvm *kvm)
 	kvm_ioapic_reset(ioapic);
 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
 	ioapic->kvm = kvm;
-	ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev);
+	down_write(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+	up_write(&kvm->slots_lock);
 	if (ret < 0)
 		kfree(ioapic);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c680f7b64c6f..659bc12ad16a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -85,6 +85,8 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 static int hardware_enable_all(void);
 static void hardware_disable_all(void);
 
+static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
+
 static bool kvm_rebooting;
 
 static bool largepages_enabled = true;
@@ -367,7 +369,7 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
 
 static struct kvm *kvm_create_vm(void)
 {
-	int r = 0;
+	int r = 0, i;
 	struct kvm *kvm = kvm_arch_create_vm();
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	struct page *page;
@@ -391,6 +393,14 @@ static struct kvm *kvm_create_vm(void)
 		goto out_err;
 	if (init_srcu_struct(&kvm->srcu))
 		goto out_err;
+	for (i = 0; i < KVM_NR_BUSES; i++) {
+		kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
+					GFP_KERNEL);
+		if (!kvm->buses[i]) {
+			cleanup_srcu_struct(&kvm->srcu);
+			goto out_err;
+		}
+	}
 
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
@@ -416,11 +426,9 @@ static struct kvm *kvm_create_vm(void)
 	atomic_inc(&kvm->mm->mm_count);
 	spin_lock_init(&kvm->mmu_lock);
 	spin_lock_init(&kvm->requests_lock);
-	kvm_io_bus_init(&kvm->pio_bus);
 	kvm_eventfd_init(kvm);
 	mutex_init(&kvm->lock);
 	mutex_init(&kvm->irq_lock);
-	kvm_io_bus_init(&kvm->mmio_bus);
 	init_rwsem(&kvm->slots_lock);
 	atomic_set(&kvm->users_count, 1);
 	spin_lock(&kvm_lock);
@@ -435,6 +443,8 @@ out:
 out_err:
 	hardware_disable_all();
 out_err_nodisable:
+	for (i = 0; i < KVM_NR_BUSES; i++)
+		kfree(kvm->buses[i]);
 	kfree(kvm->memslots);
 	kfree(kvm);
 	return ERR_PTR(r);
@@ -480,6 +490,7 @@ void kvm_free_physmem(struct kvm *kvm)
 
 static void kvm_destroy_vm(struct kvm *kvm)
 {
+	int i;
 	struct mm_struct *mm = kvm->mm;
 
 	kvm_arch_sync_events(kvm);
@@ -487,8 +498,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	list_del(&kvm->vm_list);
 	spin_unlock(&kvm_lock);
 	kvm_free_irq_routing(kvm);
-	kvm_io_bus_destroy(&kvm->pio_bus);
-	kvm_io_bus_destroy(&kvm->mmio_bus);
+	for (i = 0; i < KVM_NR_BUSES; i++)
+		kvm_io_bus_destroy(kvm->buses[i]);
 	kvm_coalesced_mmio_free(kvm);
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
@@ -1949,12 +1960,7 @@ static struct notifier_block kvm_reboot_notifier = {
 	.priority = 0,
 };
 
-void kvm_io_bus_init(struct kvm_io_bus *bus)
-{
-	memset(bus, 0, sizeof(*bus));
-}
-
-void kvm_io_bus_destroy(struct kvm_io_bus *bus)
+static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 {
 	int i;
 
@@ -1963,13 +1969,15 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 
 		kvm_iodevice_destructor(pos);
 	}
+	kfree(bus);
 }
 
 /* kvm_io_bus_write - called under kvm->slots_lock */
-int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
+int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 		     int len, const void *val)
 {
 	int i;
+	struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
 	for (i = 0; i < bus->dev_count; i++)
 		if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
 			return 0;
@@ -1977,59 +1985,71 @@ int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
 }
 
 /* kvm_io_bus_read - called under kvm->slots_lock */
-int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val)
+int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+		    int len, void *val)
 {
 	int i;
+	struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
+
 	for (i = 0; i < bus->dev_count; i++)
 		if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
 			return 0;
 	return -EOPNOTSUPP;
 }
 
-int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
-			     struct kvm_io_device *dev)
+/* Caller must have write lock on slots_lock. */
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+			    struct kvm_io_device *dev)
 {
-	int ret;
-
-	down_write(&kvm->slots_lock);
-	ret = __kvm_io_bus_register_dev(bus, dev);
-	up_write(&kvm->slots_lock);
+	struct kvm_io_bus *new_bus, *bus;
 
-	return ret;
-}
-
-/* An unlocked version. Caller must have write lock on slots_lock. */
-int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
-			      struct kvm_io_device *dev)
-{
+	bus = kvm->buses[bus_idx];
 	if (bus->dev_count > NR_IOBUS_DEVS-1)
 		return -ENOSPC;
 
-	bus->devs[bus->dev_count++] = dev;
+	new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
+	if (!new_bus)
+		return -ENOMEM;
+	memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
+	new_bus->devs[new_bus->dev_count++] = dev;
+	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
+	synchronize_srcu_expedited(&kvm->srcu);
+	kfree(bus);
 
 	return 0;
 }
 
-void kvm_io_bus_unregister_dev(struct kvm *kvm,
-			       struct kvm_io_bus *bus,
-			       struct kvm_io_device *dev)
+/* Caller must have write lock on slots_lock. */
+int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+			      struct kvm_io_device *dev)
 {
-	down_write(&kvm->slots_lock);
-	__kvm_io_bus_unregister_dev(bus, dev);
-	up_write(&kvm->slots_lock);
-}
+	int i, r;
+	struct kvm_io_bus *new_bus, *bus;
 
-/* An unlocked version. Caller must have write lock on slots_lock. */
-void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
-				 struct kvm_io_device *dev)
-{
-	int i;
+	new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
+	if (!new_bus)
+		return -ENOMEM;
 
-	for (i = 0; i < bus->dev_count; i++)
-		if (bus->devs[i] == dev) {
-			bus->devs[i] = bus->devs[--bus->dev_count];
+	bus = kvm->buses[bus_idx];
+	memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
+
+	r = -ENOENT;
+	for (i = 0; i < new_bus->dev_count; i++)
+		if (new_bus->devs[i] == dev) {
+			r = 0;
+			new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
 			break;
 		}
+
+	if (r) {
+		kfree(new_bus);
+		return r;
+	}
+
+	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
+	synchronize_srcu_expedited(&kvm->srcu);
+	kfree(bus);
+	return r;
 }
 
 static struct notifier_block kvm_cpu_notifier = {
-- 
cgit v1.2.3


From f656ce0185cabbbb0cf96877306879661297c7ad Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:25 -0200
Subject: KVM: switch vcpu context to use SRCU

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c | 15 ++++++---------
 arch/s390/kvm/kvm-s390.h | 10 +++++++---
 arch/x86/kvm/mmu.c       |  7 +++----
 arch/x86/kvm/vmx.c       |  6 +++---
 arch/x86/kvm/x86.c       | 43 ++++++++++++++++++++++++-------------------
 include/linux/kvm_host.h |  2 ++
 6 files changed, 45 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index d0ad538f0083..d5e384641275 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -636,12 +636,9 @@ static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	union context *host_ctx, *guest_ctx;
-	int r;
+	int r, idx;
 
-	/*
-	 * down_read() may sleep and return with interrupts enabled
-	 */
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 again:
 	if (signal_pending(current)) {
@@ -663,7 +660,7 @@ again:
 	if (r < 0)
 		goto vcpu_run_fail;
 
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	kvm_guest_enter();
 
 	/*
@@ -687,7 +684,7 @@ again:
 	kvm_guest_exit();
 	preempt_enable();
 
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 	r = kvm_handle_exit(kvm_run, vcpu);
 
@@ -697,10 +694,10 @@ again:
 	}
 
 out:
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	if (r > 0) {
 		kvm_resched(vcpu);
-		down_read(&vcpu->kvm->slots_lock);
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
 		goto again;
 	}
 
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 06cce8285ba0..60f09ab3672c 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -67,10 +67,14 @@ static inline long kvm_s390_vcpu_get_memsize(struct kvm_vcpu *vcpu)
 
 static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
 {
+	int idx;
 	struct kvm_memory_slot *mem;
+	struct kvm_memslots *memslots;
 
-	down_read(&vcpu->kvm->slots_lock);
-	mem = &vcpu->kvm->memslots[0];
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+	memslots = rcu_dereference(vcpu->kvm->memslots);
+
+	mem = &memslots->memslots[0];
 
 	vcpu->arch.sie_block->gmsor = mem->userspace_addr;
 	vcpu->arch.sie_block->gmslm =
@@ -78,7 +82,7 @@ static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
 		(mem->npages << PAGE_SHIFT) +
 		VIRTIODESCSPACE - 1ul;
 
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 }
 
 /* implemented in priv.c */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f8bf42a25995..25aabd00aa01 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2933,10 +2933,9 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
 	spin_lock(&kvm_lock);
 
 	list_for_each_entry(kvm, &vm_list, vm_list) {
-		int npages;
+		int npages, idx;
 
-		if (!down_read_trylock(&kvm->slots_lock))
-			continue;
+		idx = srcu_read_lock(&kvm->srcu);
 		spin_lock(&kvm->mmu_lock);
 		npages = kvm->arch.n_alloc_mmu_pages -
 			 kvm->arch.n_free_mmu_pages;
@@ -2949,7 +2948,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
 		nr_to_scan--;
 
 		spin_unlock(&kvm->mmu_lock);
-		up_read(&kvm->slots_lock);
+		srcu_read_unlock(&kvm->srcu, idx);
 	}
 	if (kvm_freed)
 		list_move_tail(&kvm_freed->vm_list, &vm_list);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f1cae7d6113d..22ab7137d1d0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2478,10 +2478,10 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u64 msr;
-	int ret;
+	int ret, idx;
 
 	vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	if (!init_rmode(vmx->vcpu.kvm)) {
 		ret = -ENOMEM;
 		goto out;
@@ -2589,7 +2589,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 	vmx->emulation_required = 0;
 
 out:
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	return ret;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9b42673df4af..53bc06a68105 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1306,15 +1306,15 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
 		    int (*do_msr)(struct kvm_vcpu *vcpu,
 				  unsigned index, u64 *data))
 {
-	int i;
+	int i, idx;
 
 	vcpu_load(vcpu);
 
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	for (i = 0; i < msrs->nmsrs; ++i)
 		if (do_msr(vcpu, entries[i].index, &entries[i].data))
 			break;
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 
 	vcpu_put(vcpu);
 
@@ -3900,14 +3900,15 @@ static void vapic_enter(struct kvm_vcpu *vcpu)
 static void vapic_exit(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
+	int idx;
 
 	if (!apic || !apic->vapic_addr)
 		return;
 
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	kvm_release_page_dirty(apic->vapic_page);
 	mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 }
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -4036,7 +4037,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		kvm_lapic_sync_to_vapic(vcpu);
 	}
 
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 
 	kvm_guest_enter();
 
@@ -4078,7 +4079,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	preempt_enable();
 
-	down_read(&vcpu->kvm->slots_lock);
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 	/*
 	 * Profile KVM exit RIPs:
@@ -4100,6 +4101,7 @@ out:
 static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
 	int r;
+	struct kvm *kvm = vcpu->kvm;
 
 	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
 		pr_debug("vcpu %d received sipi with vector # %x\n",
@@ -4111,7 +4113,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 	}
 
-	down_read(&vcpu->kvm->slots_lock);
+	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 	vapic_enter(vcpu);
 
 	r = 1;
@@ -4119,9 +4121,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
 			r = vcpu_enter_guest(vcpu);
 		else {
-			up_read(&vcpu->kvm->slots_lock);
+			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 			kvm_vcpu_block(vcpu);
-			down_read(&vcpu->kvm->slots_lock);
+			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 			if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
 			{
 				switch(vcpu->arch.mp_state) {
@@ -4156,13 +4158,13 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 			++vcpu->stat.signal_exits;
 		}
 		if (need_resched()) {
-			up_read(&vcpu->kvm->slots_lock);
+			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 			kvm_resched(vcpu);
-			down_read(&vcpu->kvm->slots_lock);
+			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 		}
 	}
 
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 	post_kvm_run_save(vcpu);
 
 	vapic_exit(vcpu);
@@ -4201,10 +4203,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		vcpu->mmio_read_completed = 1;
 		vcpu->mmio_needed = 0;
 
-		down_read(&vcpu->kvm->slots_lock);
+		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 		r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0,
 					EMULTYPE_NO_DECODE);
-		up_read(&vcpu->kvm->slots_lock);
+		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 		if (r == EMULATE_DO_MMIO) {
 			/*
 			 * Read-modify-write.  Back to userspace.
@@ -4967,11 +4969,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 {
 	unsigned long vaddr = tr->linear_address;
 	gpa_t gpa;
+	int idx;
 
 	vcpu_load(vcpu);
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr);
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	tr->physical_address = gpa;
 	tr->valid = gpa != UNMAPPED_GVA;
 	tr->writeable = 1;
@@ -5223,11 +5226,13 @@ fail:
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
+	int idx;
+
 	kfree(vcpu->arch.mce_banks);
 	kvm_free_lapic(vcpu);
-	down_read(&vcpu->kvm->slots_lock);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	kvm_mmu_destroy(vcpu);
-	up_read(&vcpu->kvm->slots_lock);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	free_page((unsigned long)vcpu->arch.pio_data);
 }
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5e9cb902550b..0bb9aa295e6c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -83,6 +83,8 @@ struct kvm_vcpu {
 	struct kvm_run *run;
 	unsigned long requests;
 	unsigned long guest_debug;
+	int srcu_idx;
+
 	int fpu_active;
 	int guest_fpu_loaded;
 	wait_queue_head_t wq;
-- 
cgit v1.2.3


From 79fac95ecfa3969aab8119d37ccd7226165f933a Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 23 Dec 2009 14:35:26 -0200
Subject: KVM: convert slots_lock to a mutex

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c  |  4 ++--
 arch/powerpc/kvm/book3s.c |  4 ++--
 arch/x86/kvm/i8254.c      |  2 +-
 arch/x86/kvm/i8259.c      |  4 ++--
 arch/x86/kvm/vmx.c        |  8 ++++----
 arch/x86/kvm/x86.c        | 16 ++++++++--------
 include/linux/kvm_host.h  |  2 +-
 virt/kvm/coalesced_mmio.c | 14 +++++++-------
 virt/kvm/eventfd.c        | 10 +++++-----
 virt/kvm/ioapic.c         |  4 ++--
 virt/kvm/kvm_main.c       | 10 +++++-----
 11 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index d5e384641275..e6ac549f8d55 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1834,7 +1834,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	struct kvm_memory_slot *memslot;
 	int is_dirty = 0;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	spin_lock(&kvm->arch.dirty_log_lock);
 
 	r = kvm_ia64_sync_dirty_log(kvm, log);
@@ -1854,7 +1854,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	}
 	r = 0;
 out:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	spin_unlock(&kvm->arch.dirty_log_lock);
 	return r;
 }
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index bb8873dcb20f..492dcc198dd3 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -857,7 +857,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	int is_dirty = 0;
 	int r, n;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 
 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
 	if (r)
@@ -879,7 +879,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 
 	r = 0;
 out:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
 
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4b433de02e5b..6a74246f80c6 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -605,7 +605,7 @@ static const struct kvm_io_device_ops speaker_dev_ops = {
 	.write    = speaker_ioport_write,
 };
 
-/* Caller must have writers lock on slots_lock */
+/* Caller must hold slots_lock */
 struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 {
 	struct kvm_pit *pit;
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index b7d145b20953..d5753a75d58c 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -533,9 +533,9 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
 	 * Initialize PIO device
 	 */
 	kvm_iodevice_init(&s->dev, &picdev_ops);
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	if (ret < 0) {
 		kfree(s);
 		return NULL;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 22ab7137d1d0..f04e2ff21383 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2223,7 +2223,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
 	struct kvm_userspace_memory_region kvm_userspace_mem;
 	int r = 0;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	if (kvm->arch.apic_access_page)
 		goto out;
 	kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
@@ -2236,7 +2236,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
 
 	kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
 out:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
 
@@ -2245,7 +2245,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
 	struct kvm_userspace_memory_region kvm_userspace_mem;
 	int r = 0;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	if (kvm->arch.ept_identity_pagetable)
 		goto out;
 	kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
@@ -2260,7 +2260,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
 	kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
 			kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
 out:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 53bc06a68105..aff3479867a8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2208,14 +2208,14 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
 	if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
 		return -EINVAL;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	spin_lock(&kvm->mmu_lock);
 
 	kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
 	kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
 
 	spin_unlock(&kvm->mmu_lock);
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return 0;
 }
 
@@ -2292,7 +2292,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 	if (!aliases)
 		goto out;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 
 	/* invalidate any gfn reference in case of deletion/shrinking */
 	memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
@@ -2328,7 +2328,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 	r = 0;
 
 out_unlock:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 out:
 	return r;
 }
@@ -2462,7 +2462,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	unsigned long is_dirty = 0;
 	unsigned long *dirty_bitmap = NULL;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 
 	r = -EINVAL;
 	if (log->slot >= KVM_MEMORY_SLOTS)
@@ -2512,7 +2512,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 out_free:
 	vfree(dirty_bitmap);
 out:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
 
@@ -2625,7 +2625,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 				   sizeof(struct kvm_pit_config)))
 			goto out;
 	create_pit:
-		down_write(&kvm->slots_lock);
+		mutex_lock(&kvm->slots_lock);
 		r = -EEXIST;
 		if (kvm->arch.vpit)
 			goto create_pit_unlock;
@@ -2634,7 +2634,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		if (kvm->arch.vpit)
 			r = 0;
 	create_pit_unlock:
-		up_write(&kvm->slots_lock);
+		mutex_unlock(&kvm->slots_lock);
 		break;
 	case KVM_IRQ_LINE_STATUS:
 	case KVM_IRQ_LINE: {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0bb9aa295e6c..bb0314ea9267 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -161,7 +161,7 @@ struct kvm_memslots {
 struct kvm {
 	spinlock_t mmu_lock;
 	spinlock_t requests_lock;
-	struct rw_semaphore slots_lock;
+	struct mutex slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots *memslots;
 	struct srcu_struct srcu;
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index a736a93ca7b7..5de6594260cb 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -110,9 +110,9 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
 	dev->kvm = kvm;
 	kvm->coalesced_mmio_dev = dev;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	if (ret < 0)
 		goto out_free_dev;
 
@@ -140,16 +140,16 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
 	if (dev == NULL)
 		return -EINVAL;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
-		up_write(&kvm->slots_lock);
+		mutex_unlock(&kvm->slots_lock);
 		return -ENOBUFS;
 	}
 
 	dev->zone[dev->nb_zones] = *zone;
 	dev->nb_zones++;
 
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return 0;
 }
 
@@ -163,7 +163,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
 	if (dev == NULL)
 		return -EINVAL;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 
 	i = dev->nb_zones;
 	while(i) {
@@ -181,7 +181,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
 		i--;
 	}
 
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 
 	return 0;
 }
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 315a586ec4d5..486c604365d9 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -508,7 +508,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 	else
 		p->wildcard = true;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 
 	/* Verify that there isnt a match already */
 	if (ioeventfd_check_collision(kvm, p)) {
@@ -524,12 +524,12 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 	list_add_tail(&p->list, &kvm->ioeventfds);
 
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 
 	return 0;
 
 unlock_fail:
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 
 fail:
 	kfree(p);
@@ -551,7 +551,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 	if (IS_ERR(eventfd))
 		return PTR_ERR(eventfd);
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 
 	list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
 		bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
@@ -571,7 +571,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 		break;
 	}
 
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 
 	eventfd_ctx_put(eventfd);
 
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index f326a6f301cc..f01392f51e86 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -372,9 +372,9 @@ int kvm_ioapic_init(struct kvm *kvm)
 	kvm_ioapic_reset(ioapic);
 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
 	ioapic->kvm = kvm;
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	if (ret < 0)
 		kfree(ioapic);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 659bc12ad16a..2b7cd6c0d9ca 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -429,7 +429,7 @@ static struct kvm *kvm_create_vm(void)
 	kvm_eventfd_init(kvm);
 	mutex_init(&kvm->lock);
 	mutex_init(&kvm->irq_lock);
-	init_rwsem(&kvm->slots_lock);
+	mutex_init(&kvm->slots_lock);
 	atomic_set(&kvm->users_count, 1);
 	spin_lock(&kvm_lock);
 	list_add(&kvm->vm_list, &vm_list);
@@ -763,9 +763,9 @@ int kvm_set_memory_region(struct kvm *kvm,
 {
 	int r;
 
-	down_write(&kvm->slots_lock);
+	mutex_lock(&kvm->slots_lock);
 	r = __kvm_set_memory_region(kvm, mem, user_alloc);
-	up_write(&kvm->slots_lock);
+	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
 EXPORT_SYMBOL_GPL(kvm_set_memory_region);
@@ -1997,7 +1997,7 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 	return -EOPNOTSUPP;
 }
 
-/* Caller must have write lock on slots_lock. */
+/* Caller must hold slots_lock. */
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 			    struct kvm_io_device *dev)
 {
@@ -2019,7 +2019,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 	return 0;
 }
 
-/* Caller must have write lock on slots_lock. */
+/* Caller must hold slots_lock. */
 int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 			      struct kvm_io_device *dev)
 {
-- 
cgit v1.2.3


From 02daab21d94dc4cf01b2fd09863d59a436900322 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 30 Dec 2009 12:40:26 +0200
Subject: KVM: Lazify fpu activation and deactivation

Defer fpu deactivation as much as possible - if the guest fpu is loaded, keep
it loaded until the next heavyweight exit (where we are forced to unload it).
This reduces unnecessary exits.

We also defer fpu activation on clts; while clts signals the intent to use the
fpu, we can't be sure the guest will actually use it.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm.c              | 35 +++++++++++++++++++++--------------
 arch/x86/kvm/vmx.c              | 25 +++++++++----------------
 arch/x86/kvm/x86.c              |  7 ++++++-
 include/linux/kvm_host.h        |  1 +
 5 files changed, 38 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 693046a7a12d..93bee7abb71c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -506,6 +506,7 @@ struct kvm_x86_ops {
 	void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
 	unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
 	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+	void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
 
 	void (*tlb_flush)(struct kvm_vcpu *vcpu);
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 3899c2d19830..5b336a80f31e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -984,17 +984,11 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	if (npt_enabled)
 		goto set;
 
-	if (kvm_read_cr0_bits(vcpu, X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
-		svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
-		vcpu->fpu_active = 1;
-	}
-
 	vcpu->arch.cr0 = cr0;
 	cr0 |= X86_CR0_PG | X86_CR0_WP;
-	if (!vcpu->fpu_active) {
-		svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
+
+	if (!vcpu->fpu_active)
 		cr0 |= X86_CR0_TS;
-	}
 set:
 	/*
 	 * re-enable caching here because the QEMU bios
@@ -1250,6 +1244,8 @@ static int nm_interception(struct vcpu_svm *svm)
 	svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
 	if (!kvm_read_cr0_bits(&svm->vcpu, X86_CR0_TS))
 		svm->vmcb->save.cr0 &= ~X86_CR0_TS;
+	else
+		svm->vmcb->save.cr0 |= X86_CR0_TS;
 	svm->vcpu.fpu_active = 1;
 
 	return 1;
@@ -2586,6 +2582,8 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu)
 
 static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
 {
+	if (npt_enabled)
+		vcpu->fpu_active = 1;
 }
 
 static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
@@ -2805,12 +2803,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 
 	svm->vmcb->save.cr3 = root;
 	force_new_asid(vcpu);
-
-	if (vcpu->fpu_active) {
-		svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
-		svm->vmcb->save.cr0 |= X86_CR0_TS;
-		vcpu->fpu_active = 0;
-	}
 }
 
 static int is_disabled(void)
@@ -2926,6 +2918,20 @@ static bool svm_rdtscp_supported(void)
 	return false;
 }
 
+static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	if (npt_enabled) {
+		/* hack: npt requires active fpu at this time */
+		vcpu->fpu_active = 1;
+		return;
+	}
+
+	svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR;
+	svm->vmcb->save.cr0 |= X86_CR0_TS;
+}
+
 static struct kvm_x86_ops svm_x86_ops = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
@@ -2967,6 +2973,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.cache_reg = svm_cache_reg,
 	.get_rflags = svm_get_rflags,
 	.set_rflags = svm_set_rflags,
+	.fpu_deactivate = svm_fpu_deactivate,
 
 	.tlb_flush = svm_flush_tlb,
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index dbcdb55094f7..d11be3fb7c80 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -66,7 +66,7 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 #define KVM_GUEST_CR0_MASK						\
 	(KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST				\
-	(X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP)
+	(X86_CR0_WP | X86_CR0_NE | X86_CR0_MP)
 #define KVM_VM_CR0_ALWAYS_ON						\
 	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_CR4_GUEST_OWNED_BITS				      \
@@ -579,9 +579,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 {
 	u32 eb;
 
-	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR);
-	if (!vcpu->fpu_active)
-		eb |= 1u << NM_VECTOR;
+	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR)
+		| (1u << NM_VECTOR);
 	/*
 	 * Unconditionally intercept #DB so we can maintain dr6 without
 	 * reading it every exit.
@@ -595,6 +594,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 		eb = ~0;
 	if (enable_ept)
 		eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
+	if (vcpu->fpu_active)
+		eb &= ~(1u << NM_VECTOR);
 	vmcs_write32(EXCEPTION_BITMAP, eb);
 }
 
@@ -806,9 +807,6 @@ static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
 
 static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->fpu_active)
-		return;
-	vcpu->fpu_active = 0;
 	vmcs_set_bits(GUEST_CR0, X86_CR0_TS);
 	update_exception_bitmap(vcpu);
 }
@@ -1737,8 +1735,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	else
 		hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
 
-	vmx_fpu_deactivate(vcpu);
-
 	if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
 		enter_pmode(vcpu);
 
@@ -1757,12 +1753,12 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	if (enable_ept)
 		ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
 
+	if (!vcpu->fpu_active)
+		hw_cr0 |= X86_CR0_TS;
+
 	vmcs_writel(CR0_READ_SHADOW, cr0);
 	vmcs_writel(GUEST_CR0, hw_cr0);
 	vcpu->arch.cr0 = cr0;
-
-	if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
-		vmx_fpu_activate(vcpu);
 }
 
 static u64 construct_eptp(unsigned long root_hpa)
@@ -1793,8 +1789,6 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
 	vmx_flush_tlb(vcpu);
 	vmcs_writel(GUEST_CR3, guest_cr3);
-	if (kvm_read_cr0_bits(vcpu, X86_CR0_PE))
-		vmx_fpu_deactivate(vcpu);
 }
 
 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -3002,11 +2996,9 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 		};
 		break;
 	case 2: /* clts */
-		vmx_fpu_deactivate(vcpu);
 		vcpu->arch.cr0 &= ~X86_CR0_TS;
 		vmcs_writel(CR0_READ_SHADOW, kvm_read_cr0(vcpu));
 		trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
-		vmx_fpu_activate(vcpu);
 		skip_emulated_instruction(vcpu);
 		return 1;
 	case 1: /*mov from cr*/
@@ -4127,6 +4119,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.cache_reg = vmx_cache_reg,
 	.get_rflags = vmx_get_rflags,
 	.set_rflags = vmx_set_rflags,
+	.fpu_deactivate = vmx_fpu_deactivate,
 
 	.tlb_flush = vmx_flush_tlb,
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 748b15d8e46d..1de2ad7a004d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1509,8 +1509,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	kvm_x86_ops->vcpu_put(vcpu);
 	kvm_put_guest_fpu(vcpu);
+	kvm_x86_ops->vcpu_put(vcpu);
 }
 
 static int is_efer_nx(void)
@@ -4006,6 +4006,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			r = 0;
 			goto out;
 		}
+		if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) {
+			vcpu->fpu_active = 0;
+			kvm_x86_ops->fpu_deactivate(vcpu);
+		}
 	}
 
 	preempt_disable();
@@ -5075,6 +5079,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 	kvm_fx_save(&vcpu->arch.guest_fx_image);
 	kvm_fx_restore(&vcpu->arch.host_fx_image);
 	++vcpu->stat.fpu_reload;
+	set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
 }
 EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bb0314ea9267..dfde04b0d453 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -38,6 +38,7 @@
 #define KVM_REQ_MMU_SYNC           7
 #define KVM_REQ_KVMCLOCK_UPDATE    8
 #define KVM_REQ_KICK               9
+#define KVM_REQ_DEACTIVATE_FPU    10
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
-- 
cgit v1.2.3


From 55cd8e5a4edb8e235163ffe8264b9aaa8d7c050f Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 17 Jan 2010 15:51:22 +0200
Subject: KVM: Implement bare minimum of HYPER-V MSRs

Minimum HYPER-V implementation should have GUEST_OS_ID, HYPERCALL and
VP_INDEX MSRs.

[avi: fix build on i386]

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Vadim Rozenfeld <vrozenfe@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |   4 +
 arch/x86/include/asm/kvm_para.h |   1 +
 arch/x86/kvm/trace.h            |  32 +++++++
 arch/x86/kvm/x86.c              | 193 +++++++++++++++++++++++++++++++++++++++-
 include/linux/kvm.h             |   1 +
 5 files changed, 230 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 93bee7abb71c..67d19e422006 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -413,6 +413,10 @@ struct kvm_arch {
 	s64 kvmclock_offset;
 
 	struct kvm_xen_hvm_config xen_hvm_config;
+
+	/* fields used by HYPER-V emulation */
+	u64 hv_guest_os_id;
+	u64 hv_hypercall;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c584076a47f4..ffae1420e7d7 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_KVM_PARA_H
 
 #include <linux/types.h>
+#include <asm/hyperv.h>
 
 /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
  * should be used to determine that a VM is running under KVM.
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 816e0449db0b..1cb3d0e990f3 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -55,6 +55,38 @@ TRACE_EVENT(kvm_hypercall,
 		 __entry->a3)
 );
 
+/*
+ * Tracepoint for hypercall.
+ */
+TRACE_EVENT(kvm_hv_hypercall,
+	TP_PROTO(__u16 code, bool fast, __u16 rep_cnt, __u16 rep_idx,
+		 __u64 ingpa, __u64 outgpa),
+	TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa),
+
+	TP_STRUCT__entry(
+		__field(	__u16, 		code		)
+		__field(	bool,		fast		)
+		__field(	__u16,		rep_cnt		)
+		__field(	__u16,		rep_idx		)
+		__field(	__u64,		ingpa		)
+		__field(	__u64,		outgpa		)
+	),
+
+	TP_fast_assign(
+		__entry->code		= code;
+		__entry->fast		= fast;
+		__entry->rep_cnt	= rep_cnt;
+		__entry->rep_idx	= rep_idx;
+		__entry->ingpa		= ingpa;
+		__entry->outgpa		= outgpa;
+	),
+
+	TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx",
+		  __entry->code, __entry->fast ? "fast" : "slow",
+		  __entry->rep_cnt, __entry->rep_idx,  __entry->ingpa,
+		  __entry->outgpa)
+);
+
 /*
  * Tracepoint for PIO.
  */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1ad34d185da9..480137db4770 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -622,9 +622,10 @@ static inline u32 bit(int bitno)
  * kvm-specific. Those are put in the beginning of the list.
  */
 
-#define KVM_SAVE_MSRS_BEGIN	2
+#define KVM_SAVE_MSRS_BEGIN	4
 static u32 msrs_to_save[] = {
 	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
+	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
 	MSR_K6_STAR,
 #ifdef CONFIG_X86_64
@@ -1004,6 +1005,74 @@ out:
 	return r;
 }
 
+static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
+{
+	return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
+}
+
+static bool kvm_hv_msr_partition_wide(u32 msr)
+{
+	bool r = false;
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+	case HV_X64_MSR_HYPERCALL:
+		r = true;
+		break;
+	}
+
+	return r;
+}
+
+static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	struct kvm *kvm = vcpu->kvm;
+
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+		kvm->arch.hv_guest_os_id = data;
+		/* setting guest os id to zero disables hypercall page */
+		if (!kvm->arch.hv_guest_os_id)
+			kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
+		break;
+	case HV_X64_MSR_HYPERCALL: {
+		u64 gfn;
+		unsigned long addr;
+		u8 instructions[4];
+
+		/* if guest os id is not set hypercall should remain disabled */
+		if (!kvm->arch.hv_guest_os_id)
+			break;
+		if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
+			kvm->arch.hv_hypercall = data;
+			break;
+		}
+		gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
+		addr = gfn_to_hva(kvm, gfn);
+		if (kvm_is_error_hva(addr))
+			return 1;
+		kvm_x86_ops->patch_hypercall(vcpu, instructions);
+		((unsigned char *)instructions)[3] = 0xc3; /* ret */
+		if (copy_to_user((void __user *)addr, instructions, 4))
+			return 1;
+		kvm->arch.hv_hypercall = data;
+		break;
+	}
+	default:
+		pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
+			  "data 0x%llx\n", msr, data);
+		return 1;
+	}
+	return 0;
+}
+
+static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x data 0x%llx\n",
+		  msr, data);
+
+	return 1;
+}
+
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
 	switch (msr) {
@@ -1118,6 +1187,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
 			"0x%x data 0x%llx\n", msr, data);
 		break;
+	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
+		if (kvm_hv_msr_partition_wide(msr)) {
+			int r;
+			mutex_lock(&vcpu->kvm->lock);
+			r = set_msr_hyperv_pw(vcpu, msr, data);
+			mutex_unlock(&vcpu->kvm->lock);
+			return r;
+		} else
+			return set_msr_hyperv(vcpu, msr, data);
+		break;
 	default:
 		if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
 			return xen_hvm_config(vcpu, data);
@@ -1217,6 +1296,48 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	return 0;
 }
 
+static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	u64 data = 0;
+	struct kvm *kvm = vcpu->kvm;
+
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+		data = kvm->arch.hv_guest_os_id;
+		break;
+	case HV_X64_MSR_HYPERCALL:
+		data = kvm->arch.hv_hypercall;
+		break;
+	default:
+		pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+		return 1;
+	}
+
+	*pdata = data;
+	return 0;
+}
+
+static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	u64 data = 0;
+
+	switch (msr) {
+	case HV_X64_MSR_VP_INDEX: {
+		int r;
+		struct kvm_vcpu *v;
+		kvm_for_each_vcpu(r, v, vcpu->kvm)
+			if (v == vcpu)
+				data = r;
+		break;
+	}
+	default:
+		pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+		return 1;
+	}
+	*pdata = data;
+	return 0;
+}
+
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
 	u64 data;
@@ -1283,6 +1404,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_IA32_MCG_STATUS:
 	case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
 		return get_msr_mce(vcpu, msr, pdata);
+	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
+		if (kvm_hv_msr_partition_wide(msr)) {
+			int r;
+			mutex_lock(&vcpu->kvm->lock);
+			r = get_msr_hyperv_pw(vcpu, msr, pdata);
+			mutex_unlock(&vcpu->kvm->lock);
+			return r;
+		} else
+			return get_msr_hyperv(vcpu, msr, pdata);
+		break;
 	default:
 		if (!ignore_msrs) {
 			pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
@@ -1398,6 +1529,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_XEN_HVM:
 	case KVM_CAP_ADJUST_CLOCK:
 	case KVM_CAP_VCPU_EVENTS:
+	case KVM_CAP_HYPERV:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -3618,11 +3750,70 @@ static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
 		return a0 | ((gpa_t)a1 << 32);
 }
 
+int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
+{
+	u64 param, ingpa, outgpa, ret;
+	uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
+	bool fast, longmode;
+	int cs_db, cs_l;
+
+	/*
+	 * hypercall generates UD from non zero cpl and real mode
+	 * per HYPER-V spec
+	 */
+	if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
+	    !kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 0;
+	}
+
+	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+	longmode = is_long_mode(vcpu) && cs_l == 1;
+
+	if (!longmode) {
+		param = (kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffff);
+		ingpa = (kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffff);
+		outgpa = (kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
+			(kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffff);
+	}
+#ifdef CONFIG_X86_64
+	else {
+		param = kvm_register_read(vcpu, VCPU_REGS_RCX);
+		ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
+		outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
+	}
+#endif
+
+	code = param & 0xffff;
+	fast = (param >> 16) & 0x1;
+	rep_cnt = (param >> 32) & 0xfff;
+	rep_idx = (param >> 48) & 0xfff;
+
+	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
+
+	res = HV_STATUS_INVALID_HYPERCALL_CODE;
+
+	ret = res | (((u64)rep_done & 0xfff) << 32);
+	if (longmode) {
+		kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
+	} else {
+		kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
+		kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
+	}
+
+	return 1;
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
 	unsigned long nr, a0, a1, a2, a3, ret;
 	int r = 1;
 
+	if (kvm_hv_hypercall_enabled(vcpu->kvm))
+		return kvm_hv_hypercall(vcpu);
+
 	nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
 	a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index f2feef68ffd6..e227cbae70ad 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -497,6 +497,7 @@ struct kvm_ioeventfd {
 #endif
 #define KVM_CAP_S390_PSW 42
 #define KVM_CAP_PPC_SEGSTATE 43
+#define KVM_CAP_HYPERV 44
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 10388a07164c1512b3a3d0273b9adc230f82790e Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 17 Jan 2010 15:51:23 +0200
Subject: KVM: Add HYPER-V apic access MSRs

Implement HYPER-V apic MSRs. Spec defines three MSRs that speed-up
access to EOI/TPR/ICR apic registers for PV guests.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Vadim Rozenfeld <vrozenfe@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/lapic.c            | 31 ++++++++++++++++++++++++++++++
 arch/x86/kvm/lapic.h            |  8 ++++++++
 arch/x86/kvm/x86.c              | 42 +++++++++++++++++++++++++++++++++++++----
 include/linux/kvm.h             |  1 +
 5 files changed, 80 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 67d19e422006..a1f0b5dd7d75 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -363,6 +363,8 @@ struct kvm_vcpu_arch {
 	/* used for guest single stepping over the given code position */
 	u16 singlestep_cs;
 	unsigned long singlestep_rip;
+	/* fields used by HYPER-V emulation */
+	u64 hv_vapic;
 };
 
 struct kvm_mem_alias {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ba8c045da782..4b224f90087b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1246,3 +1246,34 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 
 	return 0;
 }
+
+int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	if (!irqchip_in_kernel(vcpu->kvm))
+		return 1;
+
+	/* if this is ICR write vector before command */
+	if (reg == APIC_ICR)
+		apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
+	return apic_reg_write(apic, reg, (u32)data);
+}
+
+int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	u32 low, high = 0;
+
+	if (!irqchip_in_kernel(vcpu->kvm))
+		return 1;
+
+	if (apic_reg_read(apic, reg, 4, &low))
+		return 1;
+	if (reg == APIC_ICR)
+		apic_reg_read(apic, APIC_ICR2, 4, &high);
+
+	*data = (((u64)high) << 32) | low;
+
+	return 0;
+}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 40010b09c4aa..f5fe32c5edad 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -48,4 +48,12 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
 
 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
+
+int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
+
+static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
+}
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 480137db4770..552be51e4d84 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -622,10 +622,11 @@ static inline u32 bit(int bitno)
  * kvm-specific. Those are put in the beginning of the list.
  */
 
-#define KVM_SAVE_MSRS_BEGIN	4
+#define KVM_SAVE_MSRS_BEGIN	5
 static u32 msrs_to_save[] = {
 	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
 	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
+	HV_X64_MSR_APIC_ASSIST_PAGE,
 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
 	MSR_K6_STAR,
 #ifdef CONFIG_X86_64
@@ -1067,10 +1068,36 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 
 static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
-	pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x data 0x%llx\n",
-		  msr, data);
+	switch (msr) {
+	case HV_X64_MSR_APIC_ASSIST_PAGE: {
+		unsigned long addr;
 
-	return 1;
+		if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
+			vcpu->arch.hv_vapic = data;
+			break;
+		}
+		addr = gfn_to_hva(vcpu->kvm, data >>
+				  HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
+		if (kvm_is_error_hva(addr))
+			return 1;
+		if (clear_user((void __user *)addr, PAGE_SIZE))
+			return 1;
+		vcpu->arch.hv_vapic = data;
+		break;
+	}
+	case HV_X64_MSR_EOI:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
+	case HV_X64_MSR_ICR:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
+	case HV_X64_MSR_TPR:
+		return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
+	default:
+		pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
+			  "data 0x%llx\n", msr, data);
+		return 1;
+	}
+
+	return 0;
 }
 
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
@@ -1330,6 +1357,12 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 				data = r;
 		break;
 	}
+	case HV_X64_MSR_EOI:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
+	case HV_X64_MSR_ICR:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
+	case HV_X64_MSR_TPR:
+		return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
 	default:
 		pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		return 1;
@@ -1530,6 +1563,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_ADJUST_CLOCK:
 	case KVM_CAP_VCPU_EVENTS:
 	case KVM_CAP_HYPERV:
+	case KVM_CAP_HYPERV_VAPIC:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index e227cbae70ad..5ce61738dc30 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -498,6 +498,7 @@ struct kvm_ioeventfd {
 #define KVM_CAP_S390_PSW 42
 #define KVM_CAP_PPC_SEGSTATE 43
 #define KVM_CAP_HYPERV 44
+#define KVM_CAP_HYPERV_VAPIC 45
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From c25bc1638a1211f57cccbabdd8b732813b852340 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 17 Jan 2010 15:51:24 +0200
Subject: KVM: Implement NotifyLongSpinWait HYPER-V hypercall

Windows issues this hypercall after guest was spinning on a spinlock
for too many iterations.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Vadim Rozenfeld <vrozenfe@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c  | 10 +++++++++-
 include/linux/kvm.h |  1 +
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 552be51e4d84..9f72a443455b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1564,6 +1564,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_VCPU_EVENTS:
 	case KVM_CAP_HYPERV:
 	case KVM_CAP_HYPERV_VAPIC:
+	case KVM_CAP_HYPERV_SPIN:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -3827,7 +3828,14 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 
 	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
 
-	res = HV_STATUS_INVALID_HYPERCALL_CODE;
+	switch (code) {
+	case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
+		kvm_vcpu_on_spin(vcpu);
+		break;
+	default:
+		res = HV_STATUS_INVALID_HYPERCALL_CODE;
+		break;
+	}
 
 	ret = res | (((u64)rep_done & 0xfff) << 32);
 	if (longmode) {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 5ce61738dc30..4c4937e7f65f 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -499,6 +499,7 @@ struct kvm_ioeventfd {
 #define KVM_CAP_PPC_SEGSTATE 43
 #define KVM_CAP_HYPERV 44
 #define KVM_CAP_HYPERV_VAPIC 45
+#define KVM_CAP_HYPERV_SPIN 46
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 0c04851c0c093ce98ab4ca69556480d779292418 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 21 Jan 2010 15:31:52 +0200
Subject: KVM: trace guest fpu loads and unloads

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c         |  2 ++
 include/trace/events/kvm.h | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 27af6e353b06..3b90298fb980 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5292,6 +5292,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 	vcpu->guest_fpu_loaded = 1;
 	kvm_fx_save(&vcpu->arch.host_fx_image);
 	kvm_fx_restore(&vcpu->arch.guest_fx_image);
+	trace_kvm_fpu(1);
 }
 
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
@@ -5304,6 +5305,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 	kvm_fx_restore(&vcpu->arch.host_fx_image);
 	++vcpu->stat.fpu_reload;
 	set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
+	trace_kvm_fpu(0);
 }
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index dbe108455275..8abdc1230143 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -145,6 +145,25 @@ TRACE_EVENT(kvm_mmio,
 		  __entry->len, __entry->gpa, __entry->val)
 );
 
+#define kvm_fpu_load_symbol	\
+	{0, "unload"},		\
+	{1, "load"}
+
+TRACE_EVENT(kvm_fpu,
+	TP_PROTO(int load),
+	TP_ARGS(load),
+
+	TP_STRUCT__entry(
+		__field(	u32,	        load		)
+	),
+
+	TP_fast_assign(
+		__entry->load		= load;
+	),
+
+	TP_printk("%s", __print_symbolic(__entry->load, kvm_fpu_load_symbol))
+);
+
 #endif /* _TRACE_KVM_MAIN_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From ab9f4ecbb6d39a18e300a0d10a4968c37404aa76 Mon Sep 17 00:00:00 2001
From: "Zhai, Edwin" <edwin.zhai@intel.com>
Date: Fri, 29 Jan 2010 14:38:44 +0800
Subject: KVM: enable PCI multiple-segments for pass-through device

Enable optional parameter (default 0) - PCI segment (or domain) besides
BDF, when assigning PCI device to guest.

Signed-off-by: Zhai Edwin <edwin.zhai@intel.com>
Acked-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c       | 1 +
 include/linux/kvm.h      | 4 +++-
 include/linux/kvm_host.h | 1 +
 virt/kvm/assigned-dev.c  | 4 +++-
 virt/kvm/iommu.c         | 9 ++++++---
 5 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d47ceda7a928..0bf3df527afc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1569,6 +1569,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_HYPERV:
 	case KVM_CAP_HYPERV_VAPIC:
 	case KVM_CAP_HYPERV_SPIN:
+	case KVM_CAP_PCI_SEGMENT:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 4c4937e7f65f..dfa54be881f4 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -500,6 +500,7 @@ struct kvm_ioeventfd {
 #define KVM_CAP_HYPERV 44
 #define KVM_CAP_HYPERV_VAPIC 45
 #define KVM_CAP_HYPERV_SPIN 46
+#define KVM_CAP_PCI_SEGMENT 47
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -694,8 +695,9 @@ struct kvm_assigned_pci_dev {
 	__u32 busnr;
 	__u32 devfn;
 	__u32 flags;
+	__u32 segnr;
 	union {
-		__u32 reserved[12];
+		__u32 reserved[11];
 	};
 };
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index dfde04b0d453..665c37063f30 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -400,6 +400,7 @@ struct kvm_assigned_dev_kernel {
 	struct work_struct interrupt_work;
 	struct list_head list;
 	int assigned_dev_id;
+	int host_segnr;
 	int host_busnr;
 	int host_devfn;
 	unsigned int entries_nr;
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index f51e684dd238..057e2cca6af5 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -526,7 +526,8 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 		r = -ENOMEM;
 		goto out;
 	}
-	dev = pci_get_bus_and_slot(assigned_dev->busnr,
+	dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
+				   assigned_dev->busnr,
 				   assigned_dev->devfn);
 	if (!dev) {
 		printk(KERN_INFO "%s: host device not found\n", __func__);
@@ -548,6 +549,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 	pci_reset_function(dev);
 
 	match->assigned_dev_id = assigned_dev->assigned_dev_id;
+	match->host_segnr = assigned_dev->segnr;
 	match->host_busnr = assigned_dev->busnr;
 	match->host_devfn = assigned_dev->devfn;
 	match->flags = assigned_dev->flags;
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 65a51432c8e5..80fd3ad3b2de 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -106,7 +106,8 @@ int kvm_assign_device(struct kvm *kvm,
 
 	r = iommu_attach_device(domain, &pdev->dev);
 	if (r) {
-		printk(KERN_ERR "assign device %x:%x.%x failed",
+		printk(KERN_ERR "assign device %x:%x:%x.%x failed",
+			pci_domain_nr(pdev->bus),
 			pdev->bus->number,
 			PCI_SLOT(pdev->devfn),
 			PCI_FUNC(pdev->devfn));
@@ -127,7 +128,8 @@ int kvm_assign_device(struct kvm *kvm,
 			goto out_unmap;
 	}
 
-	printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
+	printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
+		assigned_dev->host_segnr,
 		assigned_dev->host_busnr,
 		PCI_SLOT(assigned_dev->host_devfn),
 		PCI_FUNC(assigned_dev->host_devfn));
@@ -154,7 +156,8 @@ int kvm_deassign_device(struct kvm *kvm,
 
 	iommu_detach_device(domain, &pdev->dev);
 
-	printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n",
+	printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
+		assigned_dev->host_segnr,
 		assigned_dev->host_busnr,
 		PCI_SLOT(assigned_dev->host_devfn),
 		PCI_FUNC(assigned_dev->host_devfn));
-- 
cgit v1.2.3


From 8f0b1ab6fb045a1324d9435ba00c2940783b0041 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 28 Jan 2010 12:37:56 +0100
Subject: KVM: Introduce kvm_host_page_size

This patch introduces a generic function to find out the
host page size for a given gfn. This function is needed by
the kvm iommu code. This patch also simplifies the x86
host_mapping_level function.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c       | 18 ++----------------
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 25 +++++++++++++++++++++++++
 3 files changed, 28 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index dc4d954efacd..913ef4b7939a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -468,24 +468,10 @@ static int has_wrprotected_page(struct kvm *kvm,
 
 static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
 {
-	unsigned long page_size = PAGE_SIZE;
-	struct vm_area_struct *vma;
-	unsigned long addr;
+	unsigned long page_size;
 	int i, ret = 0;
 
-	addr = gfn_to_hva(kvm, gfn);
-	if (kvm_is_error_hva(addr))
-		return PT_PAGE_TABLE_LEVEL;
-
-	down_read(&current->mm->mmap_sem);
-	vma = find_vma(current->mm, addr);
-	if (!vma)
-		goto out;
-
-	page_size = vma_kernel_pagesize(vma);
-
-out:
-	up_read(&current->mm->mmap_sem);
+	page_size = kvm_host_page_size(kvm, gfn);
 
 	for (i = PT_PAGE_TABLE_LEVEL;
 	     i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 665c37063f30..3145b281de9d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -300,6 +300,7 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
+unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2b0974a14835..0a360c26cc34 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -45,6 +45,7 @@
 #include <linux/spinlock.h>
 #include <linux/compat.h>
 #include <linux/srcu.h>
+#include <linux/hugetlb.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -867,6 +868,30 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
 
+unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
+{
+	struct vm_area_struct *vma;
+	unsigned long addr, size;
+
+	size = PAGE_SIZE;
+
+	addr = gfn_to_hva(kvm, gfn);
+	if (kvm_is_error_hva(addr))
+		return PAGE_SIZE;
+
+	down_read(&current->mm->mmap_sem);
+	vma = find_vma(current->mm, addr);
+	if (!vma)
+		goto out;
+
+	size = vma_kernel_pagesize(vma);
+
+out:
+	up_read(&current->mm->mmap_sem);
+
+	return size;
+}
+
 int memslot_id(struct kvm *kvm, gfn_t gfn)
 {
 	int i;
-- 
cgit v1.2.3


From 90bb6fc556ab255abd798bcf4ff5769690ab2eea Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 31 Dec 2009 12:10:16 +0200
Subject: KVM: MMU: Add tracepoint for guest page aging

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c         | 11 ++++++++---
 include/trace/events/kvm.h | 22 ++++++++++++++++++++++
 2 files changed, 30 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b8da6715d08b..739793240d1d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -151,6 +151,9 @@ module_param(oos_shadow, bool, 0644);
 #define ACC_USER_MASK    PT_USER_MASK
 #define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
 
+#include <trace/events/kvm.h>
+
+#undef TRACE_INCLUDE_FILE
 #define CREATE_TRACE_POINTS
 #include "mmutrace.h"
 
@@ -792,6 +795,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 					 unsigned long data))
 {
 	int i, j;
+	int ret;
 	int retval = 0;
 	struct kvm_memslots *slots;
 
@@ -806,16 +810,17 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 		if (hva >= start && hva < end) {
 			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
 
-			retval |= handler(kvm, &memslot->rmap[gfn_offset],
-					  data);
+			ret = handler(kvm, &memslot->rmap[gfn_offset], data);
 
 			for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
 				int idx = gfn_offset;
 				idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
-				retval |= handler(kvm,
+				ret |= handler(kvm,
 					&memslot->lpage_info[j][idx].rmap_pde,
 					data);
 			}
+			trace_kvm_age_page(hva, memslot, ret);
+			retval |= ret;
 		}
 	}
 
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 8abdc1230143..b17d49dfc3ef 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -164,6 +164,28 @@ TRACE_EVENT(kvm_fpu,
 	TP_printk("%s", __print_symbolic(__entry->load, kvm_fpu_load_symbol))
 );
 
+TRACE_EVENT(kvm_age_page,
+	TP_PROTO(ulong hva, struct kvm_memory_slot *slot, int ref),
+	TP_ARGS(hva, slot, ref),
+
+	TP_STRUCT__entry(
+		__field(	u64,	hva		)
+		__field(	u64,	gfn		)
+		__field(	u8,	referenced	)
+	),
+
+	TP_fast_assign(
+		__entry->hva		= hva;
+		__entry->gfn		=
+		  slot->base_gfn + ((hva - slot->userspace_addr) >> PAGE_SHIFT);
+		__entry->referenced	= ref;
+	),
+
+	TP_printk("hva %llx gfn %llx %s",
+		  __entry->hva, __entry->gfn,
+		  __entry->referenced ? "YOUNG" : "OLD")
+);
+
 #endif /* _TRACE_KVM_MAIN_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 70e335e16882df5b5d6971022e63c3603a1e8c23 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 18 Feb 2010 11:25:22 +0200
Subject: KVM: Convert kvm->requests_lock to raw_spinlock_t

The code relies on kvm->requests_lock inhibiting preemption.

Noted by Jan Kiszka.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 virt/kvm/kvm_main.c      | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3145b281de9d..a3fd0f91d943 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -161,7 +161,7 @@ struct kvm_memslots {
 
 struct kvm {
 	spinlock_t mmu_lock;
-	spinlock_t requests_lock;
+	raw_spinlock_t requests_lock;
 	struct mutex slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots *memslots;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0a360c26cc34..548f9253c195 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -138,7 +138,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 
 	zalloc_cpumask_var(&cpus, GFP_ATOMIC);
 
-	spin_lock(&kvm->requests_lock);
+	raw_spin_lock(&kvm->requests_lock);
 	me = smp_processor_id();
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (test_and_set_bit(req, &vcpu->requests))
@@ -153,7 +153,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 		smp_call_function_many(cpus, ack_flush, NULL, 1);
 	else
 		called = false;
-	spin_unlock(&kvm->requests_lock);
+	raw_spin_unlock(&kvm->requests_lock);
 	free_cpumask_var(cpus);
 	return called;
 }
@@ -409,7 +409,7 @@ static struct kvm *kvm_create_vm(void)
 	kvm->mm = current->mm;
 	atomic_inc(&kvm->mm->mm_count);
 	spin_lock_init(&kvm->mmu_lock);
-	spin_lock_init(&kvm->requests_lock);
+	raw_spin_lock_init(&kvm->requests_lock);
 	kvm_eventfd_init(kvm);
 	mutex_init(&kvm->lock);
 	mutex_init(&kvm->irq_lock);
-- 
cgit v1.2.3


From d2be1651b736002e0c76d7095d6c0ba77b4a897c Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Tue, 23 Feb 2010 17:47:57 +0100
Subject: KVM: x86: Add KVM_CAP_X86_ROBUST_SINGLESTEP

This marks the guest single-step API improvement of 94fe45da and
91586a3b with a capability flag to allow reliable detection by user
space.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Cc: stable@kernel.org (2.6.33)
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c  | 1 +
 include/linux/kvm.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c3d2acbbb91b..e46282a56565 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1570,6 +1570,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_HYPERV_VAPIC:
 	case KVM_CAP_HYPERV_SPIN:
 	case KVM_CAP_PCI_SEGMENT:
+	case KVM_CAP_X86_ROBUST_SINGLESTEP:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index dfa54be881f4..60df9c84ecae 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -501,6 +501,7 @@ struct kvm_ioeventfd {
 #define KVM_CAP_HYPERV_VAPIC 45
 #define KVM_CAP_HYPERV_SPIN 46
 #define KVM_CAP_PCI_SEGMENT 47
+#define KVM_CAP_X86_ROBUST_SINGLESTEP 51
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 73a19e4c0301908ce6346715fd08a74308451f5a Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 2 Mar 2010 11:39:15 +0900
Subject: serial: sh-sci: Add DMA support.

Support using DMA for sending and receiving data over SCI(F) interfaces of
various SH SoCs.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/serial/Kconfig     |   4 +
 drivers/serial/sh-sci.c    | 618 +++++++++++++++++++++++++++++++++++++++++----
 include/linux/serial_sci.h |   6 +
 3 files changed, 582 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 888a0ce91c4b..11ebe862457b 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -1009,6 +1009,10 @@ config SERIAL_SH_SCI_CONSOLE
 	depends on SERIAL_SH_SCI=y
 	select SERIAL_CORE_CONSOLE
 
+config SERIAL_SH_SCI_DMA
+	bool "DMA support"
+	depends on SERIAL_SH_SCI && SH_DMAE && EXPERIMENTAL
+
 config SERIAL_PNX8XXX
 	bool "Enable PNX8XXX SoCs' UART Support"
 	depends on MIPS && (SOC_PNX8550 || SOC_PNX833X)
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 42f3333c4ad0..f3841cd8fc5d 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -48,6 +48,9 @@
 #include <linux/ctype.h>
 #include <linux/err.h>
 #include <linux/list.h>
+#include <linux/dmaengine.h>
+#include <linux/scatterlist.h>
+#include <linux/timer.h>
 
 #ifdef CONFIG_SUPERH
 #include <asm/sh_bios.h>
@@ -84,6 +87,27 @@ struct sci_port {
 	struct clk		*dclk;
 
 	struct list_head	node;
+	struct dma_chan			*chan_tx;
+	struct dma_chan			*chan_rx;
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
+	struct device			*dma_dev;
+	enum sh_dmae_slave_chan_id	slave_tx;
+	enum sh_dmae_slave_chan_id	slave_rx;
+	struct dma_async_tx_descriptor	*desc_tx;
+	struct dma_async_tx_descriptor	*desc_rx[2];
+	dma_cookie_t			cookie_tx;
+	dma_cookie_t			cookie_rx[2];
+	dma_cookie_t			active_rx;
+	struct scatterlist		sg_tx;
+	unsigned int			sg_len_tx;
+	struct scatterlist		sg_rx[2];
+	size_t				buf_len_rx;
+	struct sh_dmae_slave		param_tx;
+	struct sh_dmae_slave		param_rx;
+	struct work_struct		work_tx;
+	struct work_struct		work_rx;
+	struct timer_list		rx_timer;
+#endif
 };
 
 struct sh_sci_priv {
@@ -269,29 +293,44 @@ static inline void sci_init_pins(struct uart_port *port, unsigned int cflag)
     defined(CONFIG_CPU_SUBTYPE_SH7780) || \
     defined(CONFIG_CPU_SUBTYPE_SH7785) || \
     defined(CONFIG_CPU_SUBTYPE_SH7786)
-static inline int scif_txroom(struct uart_port *port)
+static int scif_txfill(struct uart_port *port)
 {
-	return SCIF_TXROOM_MAX - (sci_in(port, SCTFDR) & 0xff);
+	return sci_in(port, SCTFDR) & 0xff;
 }
 
-static inline int scif_rxroom(struct uart_port *port)
+static int scif_txroom(struct uart_port *port)
+{
+	return SCIF_TXROOM_MAX - scif_txfill(port);
+}
+
+static int scif_rxfill(struct uart_port *port)
 {
 	return sci_in(port, SCRFDR) & 0xff;
 }
 #elif defined(CONFIG_CPU_SUBTYPE_SH7763)
-static inline int scif_txroom(struct uart_port *port)
+static int scif_txfill(struct uart_port *port)
 {
-	if ((port->mapbase == 0xffe00000) ||
-	    (port->mapbase == 0xffe08000)) {
+	if (port->mapbase == 0xffe00000 ||
+	    port->mapbase == 0xffe08000)
 		/* SCIF0/1*/
-		return SCIF_TXROOM_MAX - (sci_in(port, SCTFDR) & 0xff);
-	} else {
+		return sci_in(port, SCTFDR) & 0xff;
+	else
 		/* SCIF2 */
-		return SCIF2_TXROOM_MAX - (sci_in(port, SCFDR) >> 8);
-	}
+		return sci_in(port, SCFDR) >> 8;
+}
+
+static int scif_txroom(struct uart_port *port)
+{
+	if (port->mapbase == 0xffe00000 ||
+	    port->mapbase == 0xffe08000)
+		/* SCIF0/1*/
+		return SCIF_TXROOM_MAX - scif_txfill(port);
+	else
+		/* SCIF2 */
+		return SCIF2_TXROOM_MAX - scif_txfill(port);
 }
 
-static inline int scif_rxroom(struct uart_port *port)
+static int scif_rxfill(struct uart_port *port)
 {
 	if ((port->mapbase == 0xffe00000) ||
 	    (port->mapbase == 0xffe08000)) {
@@ -303,23 +342,33 @@ static inline int scif_rxroom(struct uart_port *port)
 	}
 }
 #else
-static inline int scif_txroom(struct uart_port *port)
+static int scif_txfill(struct uart_port *port)
 {
-	return SCIF_TXROOM_MAX - (sci_in(port, SCFDR) >> 8);
+	return sci_in(port, SCFDR) >> 8;
 }
 
-static inline int scif_rxroom(struct uart_port *port)
+static int scif_txroom(struct uart_port *port)
+{
+	return SCIF_TXROOM_MAX - scif_txfill(port);
+}
+
+static int scif_rxfill(struct uart_port *port)
 {
 	return sci_in(port, SCFDR) & SCIF_RFDC_MASK;
 }
 #endif
 
-static inline int sci_txroom(struct uart_port *port)
+static int sci_txfill(struct uart_port *port)
 {
-	return (sci_in(port, SCxSR) & SCI_TDRE) != 0;
+	return !(sci_in(port, SCxSR) & SCI_TDRE);
 }
 
-static inline int sci_rxroom(struct uart_port *port)
+static int sci_txroom(struct uart_port *port)
+{
+	return !sci_txfill(port);
+}
+
+static int sci_rxfill(struct uart_port *port)
 {
 	return (sci_in(port, SCxSR) & SCxSR_RDxF(port)) != 0;
 }
@@ -406,9 +455,9 @@ static inline void sci_receive_chars(struct uart_port *port)
 
 	while (1) {
 		if (port->type == PORT_SCI)
-			count = sci_rxroom(port);
+			count = sci_rxfill(port);
 		else
-			count = scif_rxroom(port);
+			count = scif_rxfill(port);
 
 		/* Don't copy more bytes than there is room for in the buffer */
 		count = tty_buffer_request_room(tty, count);
@@ -453,10 +502,10 @@ static inline void sci_receive_chars(struct uart_port *port)
 				}
 
 				/* Store data and status */
-				if (status&SCxSR_FER(port)) {
+				if (status & SCxSR_FER(port)) {
 					flag = TTY_FRAME;
 					dev_notice(port->dev, "frame error\n");
-				} else if (status&SCxSR_PER(port)) {
+				} else if (status & SCxSR_PER(port)) {
 					flag = TTY_PARITY;
 					dev_notice(port->dev, "parity error\n");
 				} else
@@ -618,13 +667,39 @@ static inline int sci_handle_breaks(struct uart_port *port)
 	return copied;
 }
 
-static irqreturn_t sci_rx_interrupt(int irq, void *port)
+static irqreturn_t sci_rx_interrupt(int irq, void *ptr)
 {
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
+	struct uart_port *port = ptr;
+	struct sci_port *s = to_sci_port(port);
+
+	if (s->chan_rx) {
+		unsigned long tout;
+		u16 scr = sci_in(port, SCSCR);
+		u16 ssr = sci_in(port, SCxSR);
+
+		/* Disable future Rx interrupts */
+		sci_out(port, SCSCR, scr & ~SCI_CTRL_FLAGS_RIE);
+		/* Clear current interrupt */
+		sci_out(port, SCxSR, ssr & ~(1 | SCxSR_RDxF(port)));
+		/* Calculate delay for 1.5 DMA buffers */
+		tout = (port->timeout - HZ / 50) * s->buf_len_rx * 3 /
+			port->fifosize / 2;
+		dev_dbg(port->dev, "Rx IRQ: setup timeout in %u ms\n",
+			tout * 1000 / HZ);
+		if (tout < 2)
+			tout = 2;
+		mod_timer(&s->rx_timer, jiffies + tout);
+
+		return IRQ_HANDLED;
+	}
+#endif
+
 	/* I think sci_receive_chars has to be called irrespective
 	 * of whether the I_IXOFF is set, otherwise, how is the interrupt
 	 * to be disabled?
 	 */
-	sci_receive_chars(port);
+	sci_receive_chars(ptr);
 
 	return IRQ_HANDLED;
 }
@@ -680,6 +755,7 @@ static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr)
 {
 	unsigned short ssr_status, scr_status, err_enabled;
 	struct uart_port *port = ptr;
+	struct sci_port *s = to_sci_port(port);
 	irqreturn_t ret = IRQ_NONE;
 
 	ssr_status = sci_in(port, SCxSR);
@@ -687,10 +763,15 @@ static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr)
 	err_enabled = scr_status & (SCI_CTRL_FLAGS_REIE | SCI_CTRL_FLAGS_RIE);
 
 	/* Tx Interrupt */
-	if ((ssr_status & SCxSR_TDxE(port)) && (scr_status & SCI_CTRL_FLAGS_TIE))
+	if ((ssr_status & SCxSR_TDxE(port)) && (scr_status & SCI_CTRL_FLAGS_TIE) &&
+	    !s->chan_tx)
 		ret = sci_tx_interrupt(irq, ptr);
-	/* Rx Interrupt */
-	if ((ssr_status & SCxSR_RDxF(port)) && (scr_status & SCI_CTRL_FLAGS_RIE))
+	/*
+	 * Rx Interrupt: if we're using DMA, the DMA controller clears RDF /
+	 * DR flags
+	 */
+	if (((ssr_status & SCxSR_RDxF(port)) || s->chan_rx) &&
+	    (scr_status & SCI_CTRL_FLAGS_RIE))
 		ret = sci_rx_interrupt(irq, ptr);
 	/* Error Interrupt */
 	if ((ssr_status & SCxSR_ERRORS(port)) && err_enabled)
@@ -699,6 +780,10 @@ static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr)
 	if ((ssr_status & SCxSR_BRK(port)) && err_enabled)
 		ret = sci_br_interrupt(irq, ptr);
 
+	WARN_ONCE(ret == IRQ_NONE,
+		  "%s: %d IRQ %d, status %x, control %x\n", __func__,
+		  irq, port->line, ssr_status, scr_status);
+
 	return ret;
 }
 
@@ -800,7 +885,9 @@ static void sci_free_irq(struct sci_port *port)
 static unsigned int sci_tx_empty(struct uart_port *port)
 {
 	unsigned short status = sci_in(port, SCxSR);
-	return status & SCxSR_TEND(port) ? TIOCSER_TEMT : 0;
+	unsigned short in_tx_fifo = scif_txfill(port);
+
+	return (status & SCxSR_TEND(port)) && !in_tx_fifo ? TIOCSER_TEMT : 0;
 }
 
 static void sci_set_mctrl(struct uart_port *port, unsigned int mctrl)
@@ -812,16 +899,299 @@ static void sci_set_mctrl(struct uart_port *port, unsigned int mctrl)
 
 static unsigned int sci_get_mctrl(struct uart_port *port)
 {
-	/* This routine is used for geting signals of: DTR, DCD, DSR, RI,
+	/* This routine is used for getting signals of: DTR, DCD, DSR, RI,
 	   and CTS/RTS */
 
 	return TIOCM_DTR | TIOCM_RTS | TIOCM_DSR;
 }
 
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
+static void sci_dma_tx_complete(void *arg)
+{
+	struct sci_port *s = arg;
+	struct uart_port *port = &s->port;
+	struct circ_buf *xmit = &port->state->xmit;
+	unsigned long flags;
+
+	dev_dbg(port->dev, "%s(%d)\n", __func__, port->line);
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	xmit->tail += s->sg_tx.length;
+	xmit->tail &= UART_XMIT_SIZE - 1;
+
+	port->icount.tx += s->sg_tx.length;
+
+	async_tx_ack(s->desc_tx);
+	s->cookie_tx = -EINVAL;
+	s->desc_tx = NULL;
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+
+	if (uart_circ_chars_pending(xmit))
+		schedule_work(&s->work_tx);
+}
+
+/* Locking: called with port lock held */
+static int sci_dma_rx_push(struct sci_port *s, struct tty_struct *tty,
+			   size_t count)
+{
+	struct uart_port *port = &s->port;
+	int i, active, room;
+
+	room = tty_buffer_request_room(tty, count);
+
+	if (s->active_rx == s->cookie_rx[0]) {
+		active = 0;
+	} else if (s->active_rx == s->cookie_rx[1]) {
+		active = 1;
+	} else {
+		dev_err(port->dev, "cookie %d not found!\n", s->active_rx);
+		return 0;
+	}
+
+	if (room < count)
+		dev_warn(port->dev, "Rx overrun: dropping %u bytes\n",
+			 count - room);
+	if (!room)
+		return room;
+
+	for (i = 0; i < room; i++)
+		tty_insert_flip_char(tty, ((u8 *)sg_virt(&s->sg_rx[active]))[i],
+				     TTY_NORMAL);
+
+	port->icount.rx += room;
+
+	return room;
+}
+
+static void sci_dma_rx_complete(void *arg)
+{
+	struct sci_port *s = arg;
+	struct uart_port *port = &s->port;
+	struct tty_struct *tty = port->state->port.tty;
+	unsigned long flags;
+	int count;
+
+	dev_dbg(port->dev, "%s(%d)\n", __func__, port->line);
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	count = sci_dma_rx_push(s, tty, s->buf_len_rx);
+
+	mod_timer(&s->rx_timer, jiffies + msecs_to_jiffies(5));
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	if (count)
+		tty_flip_buffer_push(tty);
+
+	schedule_work(&s->work_rx);
+}
+
+static void sci_start_rx(struct uart_port *port);
+static void sci_start_tx(struct uart_port *port);
+
+static void sci_rx_dma_release(struct sci_port *s, bool enable_pio)
+{
+	struct dma_chan *chan = s->chan_rx;
+	struct uart_port *port = &s->port;
+	unsigned long flags;
+
+	s->chan_rx = NULL;
+	s->cookie_rx[0] = s->cookie_rx[1] = -EINVAL;
+	dma_release_channel(chan);
+	dma_free_coherent(port->dev, s->buf_len_rx * 2,
+			  sg_virt(&s->sg_rx[0]), sg_dma_address(&s->sg_rx[0]));
+	if (enable_pio)
+		sci_start_rx(port);
+}
+
+static void sci_tx_dma_release(struct sci_port *s, bool enable_pio)
+{
+	struct dma_chan *chan = s->chan_tx;
+	struct uart_port *port = &s->port;
+	unsigned long flags;
+
+	s->chan_tx = NULL;
+	s->cookie_tx = -EINVAL;
+	dma_release_channel(chan);
+	if (enable_pio)
+		sci_start_tx(port);
+}
+
+static void sci_submit_rx(struct sci_port *s)
+{
+	struct dma_chan *chan = s->chan_rx;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		struct scatterlist *sg = &s->sg_rx[i];
+		struct dma_async_tx_descriptor *desc;
+
+		desc = chan->device->device_prep_slave_sg(chan,
+			sg, 1, DMA_FROM_DEVICE, DMA_PREP_INTERRUPT);
+
+		if (desc) {
+			s->desc_rx[i] = desc;
+			desc->callback = sci_dma_rx_complete;
+			desc->callback_param = s;
+			s->cookie_rx[i] = desc->tx_submit(desc);
+		}
+
+		if (!desc || s->cookie_rx[i] < 0) {
+			if (i) {
+				async_tx_ack(s->desc_rx[0]);
+				s->cookie_rx[0] = -EINVAL;
+			}
+			if (desc) {
+				async_tx_ack(desc);
+				s->cookie_rx[i] = -EINVAL;
+			}
+			dev_warn(s->port.dev,
+				 "failed to re-start DMA, using PIO\n");
+			sci_rx_dma_release(s, true);
+			return;
+		}
+	}
+
+	s->active_rx = s->cookie_rx[0];
+
+	dma_async_issue_pending(chan);
+}
+
+static void work_fn_rx(struct work_struct *work)
+{
+	struct sci_port *s = container_of(work, struct sci_port, work_rx);
+	struct uart_port *port = &s->port;
+	struct dma_async_tx_descriptor *desc;
+	int new;
+
+	if (s->active_rx == s->cookie_rx[0]) {
+		new = 0;
+	} else if (s->active_rx == s->cookie_rx[1]) {
+		new = 1;
+	} else {
+		dev_err(port->dev, "cookie %d not found!\n", s->active_rx);
+		return;
+	}
+	desc = s->desc_rx[new];
+
+	if (dma_async_is_tx_complete(s->chan_rx, s->active_rx, NULL, NULL) !=
+	    DMA_SUCCESS) {
+		/* Handle incomplete DMA receive */
+		struct tty_struct *tty = port->state->port.tty;
+		struct dma_chan *chan = s->chan_rx;
+		struct sh_desc *sh_desc = container_of(desc, struct sh_desc,
+						       async_tx);
+		unsigned long flags;
+		int count;
+
+		chan->device->device_terminate_all(chan);
+		dev_dbg(port->dev, "Read %u bytes with cookie %d\n",
+			sh_desc->partial, sh_desc->cookie);
+
+		spin_lock_irqsave(&port->lock, flags);
+		count = sci_dma_rx_push(s, tty, sh_desc->partial);
+		spin_unlock_irqrestore(&port->lock, flags);
+
+		if (count)
+			tty_flip_buffer_push(tty);
+
+		sci_submit_rx(s);
+
+		return;
+	}
+
+	s->cookie_rx[new] = desc->tx_submit(desc);
+	if (s->cookie_rx[new] < 0) {
+		dev_warn(port->dev, "Failed submitting Rx DMA descriptor\n");
+		sci_rx_dma_release(s, true);
+		return;
+	}
+
+	dev_dbg(port->dev, "%s: cookie %d #%d\n", __func__,
+		s->cookie_rx[new], new);
+
+	s->active_rx = s->cookie_rx[!new];
+}
+
+static void work_fn_tx(struct work_struct *work)
+{
+	struct sci_port *s = container_of(work, struct sci_port, work_tx);
+	struct dma_async_tx_descriptor *desc;
+	struct dma_chan *chan = s->chan_tx;
+	struct uart_port *port = &s->port;
+	struct circ_buf *xmit = &port->state->xmit;
+	struct scatterlist *sg = &s->sg_tx;
+
+	/*
+	 * DMA is idle now.
+	 * Port xmit buffer is already mapped, and it is one page... Just adjust
+	 * offsets and lengths. Since it is a circular buffer, we have to
+	 * transmit till the end, and then the rest. Take the port lock to get a
+	 * consistent xmit buffer state.
+	 */
+	spin_lock_irq(&port->lock);
+	sg->offset = xmit->tail & (UART_XMIT_SIZE - 1);
+	sg->dma_address = (sg_dma_address(sg) & ~(UART_XMIT_SIZE - 1)) +
+		sg->offset;
+	sg->length = min((int)CIRC_CNT(xmit->head, xmit->tail, UART_XMIT_SIZE),
+		CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE));
+	sg->dma_length = sg->length;
+	spin_unlock_irq(&port->lock);
+
+	BUG_ON(!sg->length);
+
+	desc = chan->device->device_prep_slave_sg(chan,
+			sg, s->sg_len_tx, DMA_TO_DEVICE,
+			DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!desc) {
+		/* switch to PIO */
+		sci_tx_dma_release(s, true);
+		return;
+	}
+
+	dma_sync_sg_for_device(port->dev, sg, 1, DMA_TO_DEVICE);
+
+	spin_lock_irq(&port->lock);
+	s->desc_tx = desc;
+	desc->callback = sci_dma_tx_complete;
+	desc->callback_param = s;
+	spin_unlock_irq(&port->lock);
+	s->cookie_tx = desc->tx_submit(desc);
+	if (s->cookie_tx < 0) {
+		dev_warn(port->dev, "Failed submitting Tx DMA descriptor\n");
+		/* switch to PIO */
+		sci_tx_dma_release(s, true);
+		return;
+	}
+
+	dev_dbg(port->dev, "%s: %p: %d...%d, cookie %d\n", __func__,
+		xmit->buf, xmit->tail, xmit->head, s->cookie_tx);
+
+	dma_async_issue_pending(chan);
+}
+#endif
+
 static void sci_start_tx(struct uart_port *port)
 {
 	unsigned short ctrl;
 
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
+	struct sci_port *s = to_sci_port(port);
+
+	if (s->chan_tx) {
+		if (!uart_circ_empty(&s->port.state->xmit) && s->cookie_tx < 0)
+			schedule_work(&s->work_tx);
+
+		return;
+	}
+#endif
+
 	/* Set TIE (Transmit Interrupt Enable) bit in SCSCR */
 	ctrl = sci_in(port, SCSCR);
 	ctrl |= SCI_CTRL_FLAGS_TIE;
@@ -838,13 +1208,12 @@ static void sci_stop_tx(struct uart_port *port)
 	sci_out(port, SCSCR, ctrl);
 }
 
-static void sci_start_rx(struct uart_port *port, unsigned int tty_start)
+static void sci_start_rx(struct uart_port *port)
 {
-	unsigned short ctrl;
+	unsigned short ctrl = SCI_CTRL_FLAGS_RIE | SCI_CTRL_FLAGS_REIE;
 
 	/* Set RIE (Receive Interrupt Enable) bit in SCSCR */
-	ctrl = sci_in(port, SCSCR);
-	ctrl |= SCI_CTRL_FLAGS_RIE | SCI_CTRL_FLAGS_REIE;
+	ctrl |= sci_in(port, SCSCR);
 	sci_out(port, SCSCR, ctrl);
 }
 
@@ -868,16 +1237,154 @@ static void sci_break_ctl(struct uart_port *port, int break_state)
 	/* Nothing here yet .. */
 }
 
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
+static bool filter(struct dma_chan *chan, void *slave)
+{
+	struct sh_dmae_slave *param = slave;
+
+	dev_dbg(chan->device->dev, "%s: slave ID %d\n", __func__,
+		param->slave_id);
+
+	if (param->dma_dev == chan->device->dev) {
+		chan->private = param;
+		return true;
+	} else {
+		return false;
+	}
+}
+
+static void rx_timer_fn(unsigned long arg)
+{
+	struct sci_port *s = (struct sci_port *)arg;
+	struct uart_port *port = &s->port;
+
+	u16 scr = sci_in(port, SCSCR);
+	sci_out(port, SCSCR, scr | SCI_CTRL_FLAGS_RIE);
+	dev_dbg(port->dev, "DMA Rx timed out\n");
+	schedule_work(&s->work_rx);
+}
+
+static void sci_request_dma(struct uart_port *port)
+{
+	struct sci_port *s = to_sci_port(port);
+	struct sh_dmae_slave *param;
+	struct dma_chan *chan;
+	dma_cap_mask_t mask;
+	int nent;
+
+	dev_dbg(port->dev, "%s: port %d DMA %p\n", __func__,
+		port->line, s->dma_dev);
+
+	if (!s->dma_dev)
+		return;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	param = &s->param_tx;
+
+	/* Slave ID, e.g., SHDMA_SLAVE_SCIF0_TX */
+	param->slave_id = s->slave_tx;
+	param->dma_dev = s->dma_dev;
+
+	s->cookie_tx = -EINVAL;
+	chan = dma_request_channel(mask, filter, param);
+	dev_dbg(port->dev, "%s: TX: got channel %p\n", __func__, chan);
+	if (chan) {
+		s->chan_tx = chan;
+		sg_init_table(&s->sg_tx, 1);
+		/* UART circular tx buffer is an aligned page. */
+		BUG_ON((int)port->state->xmit.buf & ~PAGE_MASK);
+		sg_set_page(&s->sg_tx, virt_to_page(port->state->xmit.buf),
+			    UART_XMIT_SIZE, (int)port->state->xmit.buf & ~PAGE_MASK);
+		nent = dma_map_sg(port->dev, &s->sg_tx, 1, DMA_TO_DEVICE);
+		if (!nent)
+			sci_tx_dma_release(s, false);
+		else
+			dev_dbg(port->dev, "%s: mapped %d@%p to %x\n", __func__,
+				sg_dma_len(&s->sg_tx),
+				port->state->xmit.buf, sg_dma_address(&s->sg_tx));
+
+		s->sg_len_tx = nent;
+
+		INIT_WORK(&s->work_tx, work_fn_tx);
+	}
+
+	param = &s->param_rx;
+
+	/* Slave ID, e.g., SHDMA_SLAVE_SCIF0_RX */
+	param->slave_id = s->slave_rx;
+	param->dma_dev = s->dma_dev;
+
+	chan = dma_request_channel(mask, filter, param);
+	dev_dbg(port->dev, "%s: RX: got channel %p\n", __func__, chan);
+	if (chan) {
+		dma_addr_t dma[2];
+		void *buf[2];
+		int i;
+
+		s->chan_rx = chan;
+
+		s->buf_len_rx = 2 * max(16, (int)port->fifosize);
+		buf[0] = dma_alloc_coherent(port->dev, s->buf_len_rx * 2,
+					    &dma[0], GFP_KERNEL);
+
+		if (!buf[0]) {
+			dev_warn(port->dev,
+				 "failed to allocate dma buffer, using PIO\n");
+			sci_rx_dma_release(s, true);
+			return;
+		}
+
+		buf[1] = buf[0] + s->buf_len_rx;
+		dma[1] = dma[0] + s->buf_len_rx;
+
+		for (i = 0; i < 2; i++) {
+			struct scatterlist *sg = &s->sg_rx[i];
+
+			sg_init_table(sg, 1);
+			sg_set_page(sg, virt_to_page(buf[i]), s->buf_len_rx,
+				    (int)buf[i] & ~PAGE_MASK);
+			sg->dma_address = dma[i];
+			sg->dma_length = sg->length;
+		}
+
+		INIT_WORK(&s->work_rx, work_fn_rx);
+		setup_timer(&s->rx_timer, rx_timer_fn, (unsigned long)s);
+
+		sci_submit_rx(s);
+	}
+}
+
+static void sci_free_dma(struct uart_port *port)
+{
+	struct sci_port *s = to_sci_port(port);
+
+	if (!s->dma_dev)
+		return;
+
+	if (s->chan_tx)
+		sci_tx_dma_release(s, false);
+	if (s->chan_rx)
+		sci_rx_dma_release(s, false);
+}
+#endif
+
 static int sci_startup(struct uart_port *port)
 {
 	struct sci_port *s = to_sci_port(port);
 
+	dev_dbg(port->dev, "%s(%d)\n", __func__, port->line);
+
 	if (s->enable)
 		s->enable(port);
 
 	sci_request_irq(s);
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
+	sci_request_dma(port);
+#endif
 	sci_start_tx(port);
-	sci_start_rx(port, 1);
+	sci_start_rx(port);
 
 	return 0;
 }
@@ -886,8 +1393,13 @@ static void sci_shutdown(struct uart_port *port)
 {
 	struct sci_port *s = to_sci_port(port);
 
+	dev_dbg(port->dev, "%s(%d)\n", __func__, port->line);
+
 	sci_stop_rx(port);
 	sci_stop_tx(port);
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
+	sci_free_dma(port);
+#endif
 	sci_free_irq(s);
 
 	if (s->disable)
@@ -937,6 +1449,9 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios,
 
 	sci_out(port, SCSMR, smr_val);
 
+	dev_dbg(port->dev, "%s: SMR %x, t %x, SCSCR %x\n", __func__, smr_val, t,
+		SCSCR_INIT(port));
+
 	if (t > 0) {
 		if (t >= 256) {
 			sci_out(port, SCSMR, (sci_in(port, SCSMR) & ~3) | 1);
@@ -954,7 +1469,7 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios,
 	sci_out(port, SCSCR, SCSCR_INIT(port));
 
 	if ((termios->c_cflag & CREAD) != 0)
-		sci_start_rx(port, 0);
+		sci_start_rx(port);
 }
 
 static const char *sci_type(struct uart_port *port)
@@ -1049,19 +1564,21 @@ static void __devinit sci_init_single(struct platform_device *dev,
 				      unsigned int index,
 				      struct plat_sci_port *p)
 {
-	sci_port->port.ops	= &sci_uart_ops;
-	sci_port->port.iotype	= UPIO_MEM;
-	sci_port->port.line	= index;
+	struct uart_port *port = &sci_port->port;
+
+	port->ops	= &sci_uart_ops;
+	port->iotype	= UPIO_MEM;
+	port->line	= index;
 
 	switch (p->type) {
 	case PORT_SCIFA:
-		sci_port->port.fifosize = 64;
+		port->fifosize = 64;
 		break;
 	case PORT_SCIF:
-		sci_port->port.fifosize = 16;
+		port->fifosize = 16;
 		break;
 	default:
-		sci_port->port.fifosize = 1;
+		port->fifosize = 1;
 		break;
 	}
 
@@ -1070,19 +1587,28 @@ static void __devinit sci_init_single(struct platform_device *dev,
 		sci_port->dclk = clk_get(&dev->dev, "peripheral_clk");
 		sci_port->enable = sci_clk_enable;
 		sci_port->disable = sci_clk_disable;
-		sci_port->port.dev = &dev->dev;
+		port->dev = &dev->dev;
 	}
 
 	sci_port->break_timer.data = (unsigned long)sci_port;
 	sci_port->break_timer.function = sci_break_timer;
 	init_timer(&sci_port->break_timer);
 
-	sci_port->port.mapbase	= p->mapbase;
-	sci_port->port.membase	= p->membase;
+	port->mapbase	= p->mapbase;
+	port->membase	= p->membase;
 
-	sci_port->port.irq	= p->irqs[SCIx_TXI_IRQ];
-	sci_port->port.flags	= p->flags;
-	sci_port->type		= sci_port->port.type = p->type;
+	port->irq	= p->irqs[SCIx_TXI_IRQ];
+	port->flags	= p->flags;
+	sci_port->type	= port->type = p->type;
+
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
+	sci_port->dma_dev	= p->dma_dev;
+	sci_port->slave_tx	= p->dma_slave_tx;
+	sci_port->slave_rx	= p->dma_slave_rx;
+
+	dev_dbg(port->dev, "%s: DMA device %p, tx %d, rx %d\n", __func__,
+		p->dma_dev, p->dma_slave_tx, p->dma_slave_rx);
+#endif
 
 	memcpy(&sci_port->irqs, &p->irqs, sizeof(p->irqs));
 }
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 1c297ddc9d5a..1b177d29a7f0 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -2,6 +2,7 @@
 #define __LINUX_SERIAL_SCI_H
 
 #include <linux/serial_core.h>
+#include <asm/dmaengine.h>
 
 /*
  * Generic header for SuperH SCI(F) (used by sh/sh64/h8300 and related parts)
@@ -16,6 +17,8 @@ enum {
 	SCIx_NR_IRQS,
 };
 
+struct device;
+
 /*
  * Platform device specific platform_data struct
  */
@@ -26,6 +29,9 @@ struct plat_sci_port {
 	unsigned int	type;			/* SCI / SCIF / IRDA */
 	upf_t		flags;			/* UPF_* flags */
 	char		*clk;			/* clock string */
+	struct device	*dma_dev;
+	enum sh_dmae_slave_chan_id dma_slave_tx;
+	enum sh_dmae_slave_chan_id dma_slave_rx;
 };
 
 #endif /* __LINUX_SERIAL_SCI_H */
-- 
cgit v1.2.3


From b5527a7766f0505dc72efe3cefe5e9dea826f611 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 2 Mar 2010 12:23:42 +0100
Subject: i2c: Add SMBus alert support

SMBus alert support. The SMBus alert protocol allows several SMBus
slave devices to share a single interrupt pin on the SMBus master,
while still allowing the master to know which slave triggered the
interrupt.

This is based on preliminary work by David Brownell. The key
difference between David's implementation and mine is that his was
part of i2c-core, while mine is split into a separate, standalone
module named i2c-smbus. The i2c-smbus module is meant to include
support for all SMBus extensions to the I2C protocol in the future.

The benefit of this approach is a zero cost for I2C bus segments which
do not need SMBus alert support. Where David's implementation
increased the size of struct i2c_adapter by 7% (40 bytes on i386),
mine doesn't touch it. Where David's implementation added over 150
lines of code to i2c-core (+10%), mine doesn't touch it. The only
change that touches all the users of the i2c subsystem is a new
callback in struct i2c_driver (common to both implementations.) I seem
to remember Trent was worried about the footprint of David'd
implementation, hopefully mine addresses the issue.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Jonathan Cameron <jic23@cam.ac.uk>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: Trent Piepho <tpiepho@freescale.com>
---
 Documentation/i2c/smbus-protocol |  16 +++
 drivers/i2c/Makefile             |   2 +-
 drivers/i2c/i2c-smbus.c          | 263 +++++++++++++++++++++++++++++++++++++++
 include/linux/i2c-smbus.h        |  50 ++++++++
 include/linux/i2c.h              |   7 ++
 5 files changed, 337 insertions(+), 1 deletion(-)
 create mode 100644 drivers/i2c/i2c-smbus.c
 create mode 100644 include/linux/i2c-smbus.h

(limited to 'include')

diff --git a/Documentation/i2c/smbus-protocol b/Documentation/i2c/smbus-protocol
index 9df47441f0e7..7c19d1a2bea0 100644
--- a/Documentation/i2c/smbus-protocol
+++ b/Documentation/i2c/smbus-protocol
@@ -185,6 +185,22 @@ the protocol. All ARP communications use slave address 0x61 and
 require PEC checksums.
 
 
+SMBus Alert
+===========
+
+SMBus Alert was introduced in Revision 1.0 of the specification.
+
+The SMBus alert protocol allows several SMBus slave devices to share a
+single interrupt pin on the SMBus master, while still allowing the master
+to know which slave triggered the interrupt.
+
+This is implemented the following way in the Linux kernel:
+* I2C bus drivers which support SMBus alert should call
+  i2c_setup_smbus_alert() to setup SMBus alert support.
+* I2C drivers for devices which can trigger SMBus alerts should implement
+  the optional alert() callback.
+
+
 I2C Block Transactions
 ======================
 
diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile
index ba26e6cbe74e..7111c93bd3e6 100644
--- a/drivers/i2c/Makefile
+++ b/drivers/i2c/Makefile
@@ -3,7 +3,7 @@
 #
 
 obj-$(CONFIG_I2C_BOARDINFO)	+= i2c-boardinfo.o
-obj-$(CONFIG_I2C)		+= i2c-core.o
+obj-$(CONFIG_I2C)		+= i2c-core.o i2c-smbus.o
 obj-$(CONFIG_I2C_CHARDEV)	+= i2c-dev.o
 obj-y				+= busses/ chips/ algos/
 
diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c
new file mode 100644
index 000000000000..421278221243
--- /dev/null
+++ b/drivers/i2c/i2c-smbus.c
@@ -0,0 +1,263 @@
+/*
+ * i2c-smbus.c - SMBus extensions to the I2C protocol
+ *
+ * Copyright (C) 2008 David Brownell
+ * Copyright (C) 2010 Jean Delvare <khali@linux-fr.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/semaphore.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/i2c.h>
+#include <linux/i2c-smbus.h>
+
+struct i2c_smbus_alert {
+	unsigned int		alert_edge_triggered:1;
+	int			irq;
+	struct work_struct	alert;
+	struct i2c_client	*ara;		/* Alert response address */
+};
+
+struct alert_data {
+	unsigned short		addr;
+	u8			flag:1;
+};
+
+/* If this is the alerting device, notify its driver */
+static int smbus_do_alert(struct device *dev, void *addrp)
+{
+	struct i2c_client *client = i2c_verify_client(dev);
+	struct alert_data *data = addrp;
+
+	if (!client || client->addr != data->addr)
+		return 0;
+	if (client->flags & I2C_CLIENT_TEN)
+		return 0;
+
+	/*
+	 * Drivers should either disable alerts, or provide at least
+	 * a minimal handler.  Lock so client->driver won't change.
+	 */
+	down(&dev->sem);
+	if (client->driver) {
+		if (client->driver->alert)
+			client->driver->alert(client, data->flag);
+		else
+			dev_warn(&client->dev, "no driver alert()!\n");
+	} else
+		dev_dbg(&client->dev, "alert with no driver\n");
+	up(&dev->sem);
+
+	/* Stop iterating after we find the device */
+	return -EBUSY;
+}
+
+/*
+ * The alert IRQ handler needs to hand work off to a task which can issue
+ * SMBus calls, because those sleeping calls can't be made in IRQ context.
+ */
+static void smbus_alert(struct work_struct *work)
+{
+	struct i2c_smbus_alert *alert;
+	struct i2c_client *ara;
+	unsigned short prev_addr = 0;	/* Not a valid address */
+
+	alert = container_of(work, struct i2c_smbus_alert, alert);
+	ara = alert->ara;
+
+	for (;;) {
+		s32 status;
+		struct alert_data data;
+
+		/*
+		 * Devices with pending alerts reply in address order, low
+		 * to high, because of slave transmit arbitration.  After
+		 * responding, an SMBus device stops asserting SMBALERT#.
+		 *
+		 * Note that SMBus 2.0 reserves 10-bit addresess for future
+		 * use.  We neither handle them, nor try to use PEC here.
+		 */
+		status = i2c_smbus_read_byte(ara);
+		if (status < 0)
+			break;
+
+		data.flag = status & 1;
+		data.addr = status >> 1;
+
+		if (data.addr == prev_addr) {
+			dev_warn(&ara->dev, "Duplicate SMBALERT# from dev "
+				"0x%02x, skipping\n", data.addr);
+			break;
+		}
+		dev_dbg(&ara->dev, "SMBALERT# from dev 0x%02x, flag %d\n",
+			data.addr, data.flag);
+
+		/* Notify driver for the device which issued the alert */
+		device_for_each_child(&ara->adapter->dev, &data,
+				      smbus_do_alert);
+		prev_addr = data.addr;
+	}
+
+	/* We handled all alerts; re-enable level-triggered IRQs */
+	if (!alert->alert_edge_triggered)
+		enable_irq(alert->irq);
+}
+
+static irqreturn_t smbalert_irq(int irq, void *d)
+{
+	struct i2c_smbus_alert *alert = d;
+
+	/* Disable level-triggered IRQs until we handle them */
+	if (!alert->alert_edge_triggered)
+		disable_irq_nosync(irq);
+
+	schedule_work(&alert->alert);
+	return IRQ_HANDLED;
+}
+
+/* Setup SMBALERT# infrastructure */
+static int smbalert_probe(struct i2c_client *ara,
+			  const struct i2c_device_id *id)
+{
+	struct i2c_smbus_alert_setup *setup = ara->dev.platform_data;
+	struct i2c_smbus_alert *alert;
+	struct i2c_adapter *adapter = ara->adapter;
+	int res;
+
+	alert = kzalloc(sizeof(struct i2c_smbus_alert), GFP_KERNEL);
+	if (!alert)
+		return -ENOMEM;
+
+	alert->alert_edge_triggered = setup->alert_edge_triggered;
+	alert->irq = setup->irq;
+	INIT_WORK(&alert->alert, smbus_alert);
+	alert->ara = ara;
+
+	if (setup->irq > 0) {
+		res = devm_request_irq(&ara->dev, setup->irq, smbalert_irq,
+				       0, "smbus_alert", alert);
+		if (res) {
+			kfree(alert);
+			return res;
+		}
+	}
+
+	i2c_set_clientdata(ara, alert);
+	dev_info(&adapter->dev, "supports SMBALERT#, %s trigger\n",
+		 setup->alert_edge_triggered ? "edge" : "level");
+
+	return 0;
+}
+
+/* IRQ resource is managed so it is freed automatically */
+static int smbalert_remove(struct i2c_client *ara)
+{
+	struct i2c_smbus_alert *alert = i2c_get_clientdata(ara);
+
+	cancel_work_sync(&alert->alert);
+
+	i2c_set_clientdata(ara, NULL);
+	kfree(alert);
+	return 0;
+}
+
+static const struct i2c_device_id smbalert_ids[] = {
+	{ "smbus_alert", 0 },
+	{ /* LIST END */ }
+};
+MODULE_DEVICE_TABLE(i2c, smbalert_ids);
+
+static struct i2c_driver smbalert_driver = {
+	.driver = {
+		.name	= "smbus_alert",
+	},
+	.probe		= smbalert_probe,
+	.remove		= smbalert_remove,
+	.id_table	= smbalert_ids,
+};
+
+/**
+ * i2c_setup_smbus_alert - Setup SMBus alert support
+ * @adapter: the target adapter
+ * @setup: setup data for the SMBus alert handler
+ * Context: can sleep
+ *
+ * Setup handling of the SMBus alert protocol on a given I2C bus segment.
+ *
+ * Handling can be done either through our IRQ handler, or by the
+ * adapter (from its handler, periodic polling, or whatever).
+ *
+ * NOTE that if we manage the IRQ, we *MUST* know if it's level or
+ * edge triggered in order to hand it to the workqueue correctly.
+ * If triggering the alert seems to wedge the system, you probably
+ * should have said it's level triggered.
+ *
+ * This returns the ara client, which should be saved for later use with
+ * i2c_handle_smbus_alert() and ultimately i2c_unregister_device(); or NULL
+ * to indicate an error.
+ */
+struct i2c_client *i2c_setup_smbus_alert(struct i2c_adapter *adapter,
+					 struct i2c_smbus_alert_setup *setup)
+{
+	struct i2c_board_info ara_board_info = {
+		I2C_BOARD_INFO("smbus_alert", 0x0c),
+		.platform_data = setup,
+	};
+
+	return i2c_new_device(adapter, &ara_board_info);
+}
+EXPORT_SYMBOL_GPL(i2c_setup_smbus_alert);
+
+/**
+ * i2c_handle_smbus_alert - Handle an SMBus alert
+ * @ara: the ARA client on the relevant adapter
+ * Context: can't sleep
+ *
+ * Helper function to be called from an I2C bus driver's interrupt
+ * handler. It will schedule the alert work, in turn calling the
+ * corresponding I2C device driver's alert function.
+ *
+ * It is assumed that ara is a valid i2c client previously returned by
+ * i2c_setup_smbus_alert().
+ */
+int i2c_handle_smbus_alert(struct i2c_client *ara)
+{
+	struct i2c_smbus_alert *alert = i2c_get_clientdata(ara);
+
+	return schedule_work(&alert->alert);
+}
+EXPORT_SYMBOL_GPL(i2c_handle_smbus_alert);
+
+static int __init i2c_smbus_init(void)
+{
+	return i2c_add_driver(&smbalert_driver);
+}
+
+static void __exit i2c_smbus_exit(void)
+{
+	i2c_del_driver(&smbalert_driver);
+}
+
+module_init(i2c_smbus_init);
+module_exit(i2c_smbus_exit);
+
+MODULE_AUTHOR("Jean Delvare <khali@linux-fr.org>");
+MODULE_DESCRIPTION("SMBus protocol extensions support");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c-smbus.h b/include/linux/i2c-smbus.h
new file mode 100644
index 000000000000..63f57a8c8b31
--- /dev/null
+++ b/include/linux/i2c-smbus.h
@@ -0,0 +1,50 @@
+/*
+ * i2c-smbus.h - SMBus extensions to the I2C protocol
+ *
+ * Copyright (C) 2010 Jean Delvare <khali@linux-fr.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _LINUX_I2C_SMBUS_H
+#define _LINUX_I2C_SMBUS_H
+
+#include <linux/i2c.h>
+
+
+/**
+ * i2c_smbus_alert_setup - platform data for the smbus_alert i2c client
+ * @alert_edge_triggered: whether the alert interrupt is edge (1) or level (0)
+ *		triggered
+ * @irq: IRQ number, if the smbus_alert driver should take care of interrupt
+ *		handling
+ *
+ * If irq is not specified, the smbus_alert driver doesn't take care of
+ * interrupt handling. In that case it is up to the I2C bus driver to either
+ * handle the interrupts or to poll for alerts.
+ *
+ * If irq is specified then it it crucial that alert_edge_triggered is
+ * properly set.
+ */
+struct i2c_smbus_alert_setup {
+	unsigned int		alert_edge_triggered:1;
+	int			irq;
+};
+
+struct i2c_client *i2c_setup_smbus_alert(struct i2c_adapter *adapter,
+					 struct i2c_smbus_alert_setup *setup);
+int i2c_handle_smbus_alert(struct i2c_client *ara);
+
+#endif /* _LINUX_I2C_SMBUS_H */
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 02fc617782ef..476abd09c921 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -152,6 +152,13 @@ struct i2c_driver {
 	int (*suspend)(struct i2c_client *, pm_message_t mesg);
 	int (*resume)(struct i2c_client *);
 
+	/* Alert callback, for example for the SMBus alert protocol.
+	 * The format and meaning of the data value depends on the protocol.
+	 * For the SMBus alert protocol, there is a single bit of data passed
+	 * as the alert response's low bit ("event flag").
+	 */
+	void (*alert)(struct i2c_client *, unsigned int data);
+
 	/* a ioctl like command that can be used to perform specific functions
 	 * with the device.
 	 */
-- 
cgit v1.2.3


From 0c43ea544c1086fbbed5a6c99ea58eb64674ea8f Mon Sep 17 00:00:00 2001
From: Zhangfei Gao <zgao6@marvell.com>
Date: Tue, 2 Mar 2010 12:23:49 +0100
Subject: i2c: Document the message size limit

i2c_master_send & i2c_master_recv do not support more than 64 kb
transfer, since msg.len is u16.

Signed-off-by: Zhangfei Gao <zgao6@marvell.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/i2c/writing-clients | 5 +++--
 drivers/i2c/i2c-core.c            | 4 ++--
 include/linux/i2c.h               | 1 +
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients
index 0a74603eb671..3219ee0dbfef 100644
--- a/Documentation/i2c/writing-clients
+++ b/Documentation/i2c/writing-clients
@@ -318,8 +318,9 @@ Plain I2C communication
 These routines read and write some bytes from/to a client. The client
 contains the i2c address, so you do not have to include it. The second
 parameter contains the bytes to read/write, the third the number of bytes
-to read/write (must be less than the length of the buffer.) Returned is
-the actual number of bytes read/written.
+to read/write (must be less than the length of the buffer, also should be
+less than 64k since msg.len is u16.) Returned is the actual number of bytes
+read/written.
 
 	int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msg,
 			 int num);
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 4131698008b9..3202a86f420e 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1183,7 +1183,7 @@ EXPORT_SYMBOL(i2c_transfer);
  * i2c_master_send - issue a single I2C message in master transmit mode
  * @client: Handle to slave device
  * @buf: Data that will be written to the slave
- * @count: How many bytes to write
+ * @count: How many bytes to write, must be less than 64k since msg.len is u16
  *
  * Returns negative errno, or else the number of bytes written.
  */
@@ -1210,7 +1210,7 @@ EXPORT_SYMBOL(i2c_master_send);
  * i2c_master_recv - issue a single I2C message in master receive mode
  * @client: Handle to slave device
  * @buf: Where to store data read from slave
- * @count: How many bytes to read
+ * @count: How many bytes to read, must be less than 64k since msg.len is u16
  *
  * Returns negative errno, or else the number of bytes read.
  */
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 476abd09c921..0a5da639b327 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -53,6 +53,7 @@ struct i2c_board_info;
  * on a bus (or read from them). Apart from two basic transfer functions to
  * transmit one message at a time, a more complex version can be used to
  * transmit an arbitrary number of messages without interruption.
+ * @count must be be less than 64k since msg.len is u16.
  */
 extern int i2c_master_send(struct i2c_client *client, const char *buf,
 			   int count);
-- 
cgit v1.2.3


From 320ebf09cbb6d01954c9a060266aa8e0d27f4638 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Mar 2010 12:35:37 +0100
Subject: perf, x86: Restrict the ANY flag

The ANY flag can show SMT data of another task (like 'top'),
so we want to disable it when system-wide profiling is
disabled.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c |  3 +++
 include/linux/perf_event.h       | 15 +++++++++++++++
 kernel/perf_event.c              | 15 ---------------
 3 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6531b4bdb22d..aab2e1ce9dee 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -503,6 +503,9 @@ static int __hw_perf_event_init(struct perf_event *event)
 	 */
 	if (attr->type == PERF_TYPE_RAW) {
 		hwc->config |= x86_pmu.raw_event(attr->config);
+		if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
+		    perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+			return -EACCES;
 		return 0;
 	}
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 04f06b4be297..90e0521b1690 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -857,6 +857,21 @@ extern int sysctl_perf_event_paranoid;
 extern int sysctl_perf_event_mlock;
 extern int sysctl_perf_event_sample_rate;
 
+static inline bool perf_paranoid_tracepoint_raw(void)
+{
+	return sysctl_perf_event_paranoid > -1;
+}
+
+static inline bool perf_paranoid_cpu(void)
+{
+	return sysctl_perf_event_paranoid > 0;
+}
+
+static inline bool perf_paranoid_kernel(void)
+{
+	return sysctl_perf_event_paranoid > 1;
+}
+
 extern void perf_event_init(void);
 extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size);
 extern void perf_bp_event(struct perf_event *event, void *data);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index a661e7991865..482d5e1d3764 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -56,21 +56,6 @@ static atomic_t nr_task_events __read_mostly;
  */
 int sysctl_perf_event_paranoid __read_mostly = 1;
 
-static inline bool perf_paranoid_tracepoint_raw(void)
-{
-	return sysctl_perf_event_paranoid > -1;
-}
-
-static inline bool perf_paranoid_cpu(void)
-{
-	return sysctl_perf_event_paranoid > 0;
-}
-
-static inline bool perf_paranoid_kernel(void)
-{
-	return sysctl_perf_event_paranoid > 1;
-}
-
 int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */
 
 /*
-- 
cgit v1.2.3


From 3082a2b7b1af1b1508c1c3fa589566064f926f40 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 16 Feb 2010 16:36:25 -0500
Subject: rfkill: Add support for KEY_RFKILL

Add support for handling KEY_RFKILL in the rfkill input module. This
simply toggles the state of all rfkill devices. The comment in rfkill.h
is also updated to reflect that RFKILL_TYPE_ALL may be used inside the
kernel.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h | 2 +-
 net/rfkill/input.c     | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 97059d08a626..4f82326eb294 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -29,7 +29,7 @@
 /**
  * enum rfkill_type - type of rfkill switch.
  *
- * @RFKILL_TYPE_ALL: toggles all switches (userspace only)
+ * @RFKILL_TYPE_ALL: toggles all switches (requests only - not a switch type)
  * @RFKILL_TYPE_WLAN: switch is on a 802.11 wireless network device.
  * @RFKILL_TYPE_BLUETOOTH: switch is on a bluetooth device.
  * @RFKILL_TYPE_UWB: switch is on a ultra wideband device.
diff --git a/net/rfkill/input.c b/net/rfkill/input.c
index a7295ad5f9cb..3713d7ecab96 100644
--- a/net/rfkill/input.c
+++ b/net/rfkill/input.c
@@ -212,6 +212,9 @@ static void rfkill_event(struct input_handle *handle, unsigned int type,
 		case KEY_WIMAX:
 			rfkill_schedule_toggle(RFKILL_TYPE_WIMAX);
 			break;
+		case KEY_RFKILL:
+			rfkill_schedule_toggle(RFKILL_TYPE_ALL);
+			break;
 		}
 	} else if (type == EV_SW && code == SW_RFKILL_ALL)
 		rfkill_schedule_evsw_rfkillall(data);
@@ -294,6 +297,11 @@ static const struct input_device_id rfkill_ids[] = {
 		.evbit = { BIT_MASK(EV_KEY) },
 		.keybit = { [BIT_WORD(KEY_WIMAX)] = BIT_MASK(KEY_WIMAX) },
 	},
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
+		.evbit = { BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(KEY_RFKILL)] = BIT_MASK(KEY_RFKILL) },
+	},
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_SWBIT,
 		.evbit = { BIT(EV_SW) },
-- 
cgit v1.2.3


From ac6ec5b1de5d1d5afcbe88d73c05df71dca0ac39 Mon Sep 17 00:00:00 2001
From: "Ira W. Snyder" <iws@ovro.caltech.edu>
Date: Mon, 21 Dec 2009 16:26:45 -0800
Subject: serial: 8250_pci: add support for MCS9865 / SYBA 6x Serial Port Card

This patch is heavily based on an earlier patch found on the linux-serial
mailing list [1], written by Darius Augulis.

The previous incarnation of this patch only supported a 2x serial port
card.  I have added support for my SYBA 6x serial port card, and tested on
x86.

[1]: http://marc.info/?l=linux-serial&m=124975806304760

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Cc: Darius Augulis <augulis.darius@gmail.com>
Cc: Greg KH <greg@kroah.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/parport/parport_pc.c |  6 ++++++
 drivers/serial/8250_pci.c    | 22 +++++++++++++++++++++-
 include/linux/pci_ids.h      |  1 +
 3 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index ad113b0f62db..0950fa40684f 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -2908,6 +2908,7 @@ enum parport_pc_pci_cards {
 	netmos_9805,
 	netmos_9815,
 	netmos_9901,
+	netmos_9865,
 	quatech_sppxp100,
 };
 
@@ -2989,6 +2990,7 @@ static struct parport_pc_pci {
 	/* netmos_9805 */               { 1, { { 0, -1 }, } },
 	/* netmos_9815 */               { 2, { { 0, -1 }, { 2, -1 }, } },
 	/* netmos_9901 */               { 1, { { 0, -1 }, } },
+	/* netmos_9865 */               { 1, { { 0, -1 }, } },
 	/* quatech_sppxp100 */		{ 1, { { 0, 1 }, } },
 };
 
@@ -3092,6 +3094,10 @@ static const struct pci_device_id parport_pc_pci_tbl[] = {
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, netmos_9815 },
 	{ PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9901,
 	  0xA000, 0x2000, 0, 0, netmos_9901 },
+	{ PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865,
+	  0xA000, 0x1000, 0, 0, netmos_9865 },
+	{ PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865,
+	  0xA000, 0x2000, 0, 0, netmos_9865 },
 	/* Quatech SPPXP-100 Parallel port PCI ExpressCard */
 	{ PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_SPPXP_100,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, quatech_sppxp100 },
diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index b28af13c45a1..8b18c3ce3898 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -760,7 +760,8 @@ static int pci_netmos_init(struct pci_dev *dev)
 	/* subdevice 0x00PS means <P> parallel, <S> serial */
 	unsigned int num_serial = dev->subsystem_device & 0xf;
 
-	if (dev->device == PCI_DEVICE_ID_NETMOS_9901)
+	if ((dev->device == PCI_DEVICE_ID_NETMOS_9901) ||
+		(dev->device == PCI_DEVICE_ID_NETMOS_9865))
 		return 0;
 	if (dev->subsystem_vendor == PCI_VENDOR_ID_IBM &&
 			dev->subsystem_device == 0x0299)
@@ -1479,6 +1480,7 @@ enum pci_board_num_t {
 
 	pbn_b0_bt_1_115200,
 	pbn_b0_bt_2_115200,
+	pbn_b0_bt_4_115200,
 	pbn_b0_bt_8_115200,
 
 	pbn_b0_bt_1_460800,
@@ -1703,6 +1705,12 @@ static struct pciserial_board pci_boards[] __devinitdata = {
 		.base_baud	= 115200,
 		.uart_offset	= 8,
 	},
+	[pbn_b0_bt_4_115200] = {
+		.flags		= FL_BASE0|FL_BASE_BARS,
+		.num_ports	= 4,
+		.base_baud	= 115200,
+		.uart_offset	= 8,
+	},
 	[pbn_b0_bt_8_115200] = {
 		.flags		= FL_BASE0|FL_BASE_BARS,
 		.num_ports	= 8,
@@ -3648,6 +3656,18 @@ static struct pci_device_id serial_pci_tbl[] = {
 		0xA000, 0x1000,
 		0, 0, pbn_b0_1_115200 },
 
+	/*
+	 * Best Connectivity PCI Multi I/O cards
+	 */
+
+	{	PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865,
+		0xA000, 0x1000,
+		0, 0, pbn_b0_1_115200 },
+
+	{	PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865,
+		0xA000, 0x3004,
+		0, 0, pbn_b0_bt_4_115200 },
+
 	/*
 	 * These entries match devices with class COMMUNICATION_SERIAL,
 	 * COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0be824320580..3ec4003f5e64 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2697,6 +2697,7 @@
 #define PCI_DEVICE_ID_NETMOS_9835	0x9835
 #define PCI_DEVICE_ID_NETMOS_9845	0x9845
 #define PCI_DEVICE_ID_NETMOS_9855	0x9855
+#define PCI_DEVICE_ID_NETMOS_9865	0x9865
 #define PCI_DEVICE_ID_NETMOS_9901	0x9901
 
 #define PCI_VENDOR_ID_3COM_2		0xa727
-- 
cgit v1.2.3


From 2a52fcb54fdf4b557730022aefcc794d567591fb Mon Sep 17 00:00:00 2001
From: Kiros Yeh <kiros@korenix.com>
Date: Mon, 21 Dec 2009 16:26:48 -0800
Subject: serial: add support for Korenix JetCard

Add different model (with a different PCI ID) to support Korenix JetCard.

Signed-off-by: Kiros Yeh <kiros@korenix.com>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/8250_pci.c | 9 +++++++++
 include/linux/pci_ids.h   | 2 ++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 8b18c3ce3898..01c012da4e26 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -3199,6 +3199,15 @@ static struct pci_device_id serial_pci_tbl[] = {
 		0x1208, 0x0004, 0, 0,
 		pbn_b0_4_921600 },
 
+	{	PCI_VENDOR_ID_KORENIX, PCI_DEVICE_ID_KORENIX_JETCARDF2,
+		0x1204, 0x0004, 0, 0,
+		pbn_b0_4_921600 },
+	{	PCI_VENDOR_ID_KORENIX, PCI_DEVICE_ID_KORENIX_JETCARDF2,
+		0x1208, 0x0004, 0, 0,
+		pbn_b0_4_921600 },
+	{	PCI_VENDOR_ID_KORENIX, PCI_DEVICE_ID_KORENIX_JETCARDF3,
+		0x1208, 0x0004, 0, 0,
+		pbn_b0_4_921600 },
 	/*
 	 * Dell Remote Access Card 4 - Tim_T_Murphy@Dell.com
 	 */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 3ec4003f5e64..e91b1fc03e7a 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2333,6 +2333,8 @@
 #define PCI_VENDOR_ID_KORENIX		0x1982
 #define PCI_DEVICE_ID_KORENIX_JETCARDF0	0x1600
 #define PCI_DEVICE_ID_KORENIX_JETCARDF1	0x16ff
+#define PCI_DEVICE_ID_KORENIX_JETCARDF2	0x1700
+#define PCI_DEVICE_ID_KORENIX_JETCARDF3	0x17ff
 
 #define PCI_VENDOR_ID_QMI		0x1a32
 
-- 
cgit v1.2.3


From d9661adfb8e53a7647360140af3b92284cbe52d4 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 18 Feb 2010 16:43:47 +0000
Subject: tty: Keep the default buffering to sub-page units

We allocate during interrupts so while our buffering is normally diced up
small anyway on some hardware at speed we can pressure the VM excessively
for page pairs. We don't really need big buffers to be linear so don't try
so hard.

In order to make this work well we will tidy up excess callers to request_room,
which cannot itself enforce this break up.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/tty_buffer.c |  6 ++++--
 include/linux/tty.h       | 10 ++++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tty_buffer.c b/drivers/char/tty_buffer.c
index 66fa4e10d76b..f27c4d6d956e 100644
--- a/drivers/char/tty_buffer.c
+++ b/drivers/char/tty_buffer.c
@@ -247,7 +247,8 @@ int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars,
 {
 	int copied = 0;
 	do {
-		int space = tty_buffer_request_room(tty, size - copied);
+		int goal = min(size - copied, TTY_BUFFER_PAGE);
+		int space = tty_buffer_request_room(tty, goal);
 		struct tty_buffer *tb = tty->buf.tail;
 		/* If there is no space then tb may be NULL */
 		if (unlikely(space == 0))
@@ -283,7 +284,8 @@ int tty_insert_flip_string_flags(struct tty_struct *tty,
 {
 	int copied = 0;
 	do {
-		int space = tty_buffer_request_room(tty, size - copied);
+		int goal = min(size - copied, TTY_BUFFER_PAGE);
+		int space = tty_buffer_request_room(tty, goal);
 		struct tty_buffer *tb = tty->buf.tail;
 		/* If there is no space then tb may be NULL */
 		if (unlikely(space == 0))
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 6abfcf5b5887..d96e5882f129 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -68,6 +68,16 @@ struct tty_buffer {
 	unsigned long data[0];
 };
 
+/*
+ * We default to dicing tty buffer allocations to this many characters
+ * in order to avoid multiple page allocations. We assume tty_buffer itself
+ * is under 256 bytes. See tty_buffer_find for the allocation logic this
+ * must match
+ */
+
+#define TTY_BUFFER_PAGE		((PAGE_SIZE  - 256) / 2)
+
+
 struct tty_bufhead {
 	struct delayed_work work;
 	spinlock_t lock;
-- 
cgit v1.2.3


From eec9fe7d1ab4a0dfac4cb43047a7657fffd0002f Mon Sep 17 00:00:00 2001
From: Ari Entlich <atrigent@ccs.neu.edu>
Date: Fri, 19 Feb 2010 09:37:55 -0500
Subject: tty: Add a new VT mode which is like VT_PROCESS but doesn't require a
 VT_RELDISP ioctl call

This new VT mode (VT_PROCESS_AUTO) does everything that VT_PROCESS does
except that it doesn't wait for a VT_RELDISP ioctl before switching
away from a VT with that mode.

If the X server eventually uses this new mode, debugging and crash
recovery should become easier. This is because even when currently in
the VT of a frozen X server it would still be possible to switch out
by doing SysRq-r and then CTRL-<number of a text vt>, sshing in and
doing chvt <number of a text vt>, or any other method of VT switching.
The general concensus on #xorg-devel seems to be that it should be
safe to use this with X now that we have KMS.

This also moves the VT_ACKACQ define to a more appropriate place,
for clarity's sake.

Signed-off-by: Ari Entlich <atrigent@ccs.neu.edu>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/vt_ioctl.c | 39 ++++++++++++++++++++-------------------
 include/linux/vt.h      |  3 ++-
 2 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index 6aa10284104a..87778dcf8727 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -888,7 +888,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
 			ret = -EFAULT;
 			goto out;
 		}
-		if (tmp.mode != VT_AUTO && tmp.mode != VT_PROCESS) {
+		if (tmp.mode != VT_AUTO && tmp.mode != VT_PROCESS && tmp.mode != VT_PROCESS_AUTO) {
 			ret = -EINVAL;
 			goto out;
 		}
@@ -1622,7 +1622,7 @@ static void complete_change_console(struct vc_data *vc)
 	 * telling it that it has acquired. Also check if it has died and
 	 * clean up (similar to logic employed in change_console())
 	 */
-	if (vc->vt_mode.mode == VT_PROCESS) {
+	if (vc->vt_mode.mode == VT_PROCESS || vc->vt_mode.mode == VT_PROCESS_AUTO) {
 		/*
 		 * Send the signal as privileged - kill_pid() will
 		 * tell us if the process has gone or something else
@@ -1682,7 +1682,7 @@ void change_console(struct vc_data *new_vc)
 	 * vt to auto control.
 	 */
 	vc = vc_cons[fg_console].d;
-	if (vc->vt_mode.mode == VT_PROCESS) {
+	if (vc->vt_mode.mode == VT_PROCESS || vc->vt_mode.mode == VT_PROCESS_AUTO) {
 		/*
 		 * Send the signal as privileged - kill_pid() will
 		 * tell us if the process has gone or something else
@@ -1693,27 +1693,28 @@ void change_console(struct vc_data *new_vc)
 		 */
 		vc->vt_newvt = new_vc->vc_num;
 		if (kill_pid(vc->vt_pid, vc->vt_mode.relsig, 1) == 0) {
+			if(vc->vt_mode.mode == VT_PROCESS)
+				/*
+				 * It worked. Mark the vt to switch to and
+				 * return. The process needs to send us a
+				 * VT_RELDISP ioctl to complete the switch.
+				 */
+				return;
+		} else {
 			/*
-			 * It worked. Mark the vt to switch to and
-			 * return. The process needs to send us a
-			 * VT_RELDISP ioctl to complete the switch.
+			 * The controlling process has died, so we revert back to
+			 * normal operation. In this case, we'll also change back
+			 * to KD_TEXT mode. I'm not sure if this is strictly correct
+			 * but it saves the agony when the X server dies and the screen
+			 * remains blanked due to KD_GRAPHICS! It would be nice to do
+			 * this outside of VT_PROCESS but there is no single process
+			 * to account for and tracking tty count may be undesirable.
 			 */
-			return;
+			reset_vc(vc);
 		}
 
 		/*
-		 * The controlling process has died, so we revert back to
-		 * normal operation. In this case, we'll also change back
-		 * to KD_TEXT mode. I'm not sure if this is strictly correct
-		 * but it saves the agony when the X server dies and the screen
-		 * remains blanked due to KD_GRAPHICS! It would be nice to do
-		 * this outside of VT_PROCESS but there is no single process
-		 * to account for and tracking tty count may be undesirable.
-		 */
-		reset_vc(vc);
-
-		/*
-		 * Fall through to normal (VT_AUTO) handling of the switch...
+		 * Fall through to normal (VT_AUTO and VT_PROCESS_AUTO) handling of the switch...
 		 */
 	}
 
diff --git a/include/linux/vt.h b/include/linux/vt.h
index d5dd0bc408fd..778b7b2a47d4 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -27,7 +27,7 @@ struct vt_mode {
 #define VT_SETMODE	0x5602	/* set mode of active vt */
 #define		VT_AUTO		0x00	/* auto vt switching */
 #define		VT_PROCESS	0x01	/* process controls switching */
-#define		VT_ACKACQ	0x02	/* acknowledge switch */
+#define		VT_PROCESS_AUTO 0x02	/* process is notified of switching */
 
 struct vt_stat {
 	unsigned short v_active;	/* active vt */
@@ -38,6 +38,7 @@ struct vt_stat {
 #define VT_SENDSIG	0x5604	/* signal to send to bitmask of vts */
 
 #define VT_RELDISP	0x5605	/* release display */
+#define		VT_ACKACQ	0x02	/* acknowledge switch */
 
 #define VT_ACTIVATE	0x5606	/* make vt active */
 #define VT_WAITACTIVE	0x5607	/* wait for vt active */
-- 
cgit v1.2.3


From e9a20171dfa0aa134d2211126d1310f2daea52cf Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@nokia.com>
Date: Thu, 17 Dec 2009 13:01:36 +0200
Subject: USB: otg: add notifier support

The notifier will be used to communicate usb events
to other drivers like the charger chip.

This can be used as source of information to kick
usb charger detection as described by the USB
Battery Charging Specification 1.1 and/or to
pass bMaxPower field of selected usb_configuration
to charger chip in order to use that information
as input current on the charging profile
setup.

Signed-off-by: Felipe Balbi <felipe.balbi@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/otg.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
index 52bb917641f0..6c0b676b27d8 100644
--- a/include/linux/usb/otg.h
+++ b/include/linux/usb/otg.h
@@ -9,6 +9,8 @@
 #ifndef __LINUX_USB_OTG_H
 #define __LINUX_USB_OTG_H
 
+#include <linux/notifier.h>
+
 /* OTG defines lots of enumeration states before device reset */
 enum usb_otg_state {
 	OTG_STATE_UNDEFINED = 0,
@@ -33,6 +35,14 @@ enum usb_otg_state {
 	OTG_STATE_A_VBUS_ERR,
 };
 
+enum usb_xceiv_events {
+	USB_EVENT_NONE,         /* no events or cable disconnected */
+	USB_EVENT_VBUS,         /* vbus valid event */
+	USB_EVENT_ID,           /* id was grounded */
+	USB_EVENT_CHARGER,      /* usb dedicated charger */
+	USB_EVENT_ENUMERATED,   /* gadget driver enumerated */
+};
+
 #define USB_OTG_PULLUP_ID		(1 << 0)
 #define USB_OTG_PULLDOWN_DP		(1 << 1)
 #define USB_OTG_PULLDOWN_DM		(1 << 2)
@@ -70,6 +80,9 @@ struct otg_transceiver {
 	struct otg_io_access_ops	*io_ops;
 	void __iomem			*io_priv;
 
+	/* for notification of usb_xceiv_events */
+	struct blocking_notifier_head	notifier;
+
 	/* to pass extra port status to the root hub */
 	u16			port_status;
 	u16			port_change;
@@ -203,6 +216,18 @@ otg_start_srp(struct otg_transceiver *otg)
 	return otg->start_srp(otg);
 }
 
+/* notifiers */
+static inline int
+otg_register_notifier(struct otg_transceiver *otg, struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&otg->notifier, nb);
+}
+
+static inline void
+otg_unregister_notifier(struct otg_transceiver *otg, struct notifier_block *nb)
+{
+	blocking_notifier_chain_unregister(&otg->notifier, nb);
+}
 
 /* for OTG controller drivers (and maybe other stuff) */
 extern int usb_bus_start_enum(struct usb_bus *bus, unsigned port_num);
-- 
cgit v1.2.3


From 5d3987796c7a747e5ed3ded1eb64a9632d52a1a4 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Fri, 18 Dec 2009 12:14:21 +0100
Subject: USB: storage: Never reset devices that will morph to an old mode

Some devices must be switched to a new mode to fully use them.
A reset would make them revert to the old mode. Therefore a reset
must not be used for error handling with such devices.

Signed-off-by: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/quirks.c       | 3 ++-
 drivers/usb/storage/transport.c | 6 ++++++
 include/linux/usb/quirks.h      | 3 +++
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index ab93918d9207..0b689224394b 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -120,6 +120,7 @@ void usb_detect_quirks(struct usb_device *udev)
 	 * for all devices.  It will affect things like hub resets
 	 * and EMF-related port disables.
 	 */
-	udev->persist_enabled = 1;
+	if (!(udev->quirks & USB_QUIRK_RESET_MORPHS))
+		udev->persist_enabled = 1;
 #endif	/* CONFIG_PM */
 }
diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c
index cc313d16d727..468038126e5e 100644
--- a/drivers/usb/storage/transport.c
+++ b/drivers/usb/storage/transport.c
@@ -47,6 +47,8 @@
 #include <linux/errno.h>
 #include <linux/slab.h>
 
+#include <linux/usb/quirks.h>
+
 #include <scsi/scsi.h>
 #include <scsi/scsi_eh.h>
 #include <scsi/scsi_device.h>
@@ -1297,6 +1299,10 @@ int usb_stor_port_reset(struct us_data *us)
 {
 	int result;
 
+	/*for these devices we must use the class specific method */
+	if (us->pusb_dev->quirks & USB_QUIRK_RESET_MORPHS)
+		return -EPERM;
+
 	result = usb_lock_device_for_reset(us->pusb_dev, us->pusb_intf);
 	if (result < 0)
 		US_DEBUGP("unable to lock device for reset: %d\n", result);
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index 2526f3bbd273..0a555dd131fc 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -19,4 +19,7 @@
 /* device can't handle its Configuration or Interface strings */
 #define USB_QUIRK_CONFIG_INTF_STRINGS	0x00000008
 
+/*device will morph if reset, don't use reset for handling errors */
+#define USB_QUIRK_RESET_MORPHS		0x00000010
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.3


From 551cdbbeb118bd5ed301f8749aef69219284399b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 14 Jan 2010 11:08:04 -0800
Subject: USB: rename USB_SPEED_VARIABLE to USB_SPEED_WIRELESS

It's really the wireless speed, so rename the thing to make
more sense.  Based on a recommendation from David Vrabel

Cc: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/staging/usbip/vhci_sysfs.c | 2 +-
 drivers/usb/core/hub.c             | 6 +++---
 drivers/usb/core/sysfs.c           | 2 +-
 drivers/usb/core/urb.c             | 6 +++---
 drivers/usb/host/xhci-mem.c        | 4 ++--
 drivers/usb/wusbcore/devconnect.c  | 2 +-
 include/linux/usb/ch9.h            | 2 +-
 7 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/staging/usbip/vhci_sysfs.c b/drivers/staging/usbip/vhci_sysfs.c
index d8992d10d555..f6e34e03c8e4 100644
--- a/drivers/staging/usbip/vhci_sysfs.c
+++ b/drivers/staging/usbip/vhci_sysfs.c
@@ -144,7 +144,7 @@ static int valid_args(__u32 rhport, enum usb_device_speed speed)
 	case USB_SPEED_LOW:
 	case USB_SPEED_FULL:
 	case USB_SPEED_HIGH:
-	case USB_SPEED_VARIABLE:
+	case USB_SPEED_WIRELESS:
 		break;
 	default:
 		usbip_uerr("speed %d\n", speed);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 6f84d383ecee..4986ff628465 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1990,7 +1990,7 @@ static int hub_port_wait_reset(struct usb_hub *hub, int port1,
 		if (!(portstatus & USB_PORT_STAT_RESET) &&
 		    (portstatus & USB_PORT_STAT_ENABLE)) {
 			if (hub_is_wusb(hub))
-				udev->speed = USB_SPEED_VARIABLE;
+				udev->speed = USB_SPEED_WIRELESS;
 			else if (portstatus & USB_PORT_STAT_HIGH_SPEED)
 				udev->speed = USB_SPEED_HIGH;
 			else if (portstatus & USB_PORT_STAT_LOW_SPEED)
@@ -2689,7 +2689,7 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
 	 */
 	switch (udev->speed) {
 	case USB_SPEED_SUPER:
-	case USB_SPEED_VARIABLE:	/* fixed at 512 */
+	case USB_SPEED_WIRELESS:	/* fixed at 512 */
 		udev->ep0.desc.wMaxPacketSize = cpu_to_le16(512);
 		break;
 	case USB_SPEED_HIGH:		/* fixed at 64 */
@@ -2717,7 +2717,7 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
 	case USB_SPEED_SUPER:
 				speed = "super";
 				break;
-	case USB_SPEED_VARIABLE:
+	case USB_SPEED_WIRELESS:
 				speed = "variable";
 				type = "Wireless ";
 				break;
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index b1725abf6c7b..1b3c00b3ca3f 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -115,7 +115,7 @@ show_speed(struct device *dev, struct device_attribute *attr, char *buf)
 	case USB_SPEED_HIGH:
 		speed = "480";
 		break;
-	case USB_SPEED_VARIABLE:
+	case USB_SPEED_WIRELESS:
 		speed = "480";
 		break;
 	case USB_SPEED_SUPER:
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index e2bd153cbd89..27080561a1c2 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -437,7 +437,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 	case USB_ENDPOINT_XFER_INT:
 		/* too small? */
 		switch (dev->speed) {
-		case USB_SPEED_VARIABLE:
+		case USB_SPEED_WIRELESS:
 			if (urb->interval < 6)
 				return -EINVAL;
 			break;
@@ -453,7 +453,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 			if (urb->interval > (1 << 15))
 				return -EINVAL;
 			max = 1 << 15;
-		case USB_SPEED_VARIABLE:
+		case USB_SPEED_WIRELESS:
 			if (urb->interval > 16)
 				return -EINVAL;
 			break;
@@ -480,7 +480,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 		default:
 			return -EINVAL;
 		}
-		if (dev->speed != USB_SPEED_VARIABLE) {
+		if (dev->speed != USB_SPEED_WIRELESS) {
 			/* Round down to a power of 2, no more than max */
 			urb->interval = min(max, 1 << ilog2(urb->interval));
 		}
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 8045bc69083d..49f7d72f8b1b 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -454,7 +454,7 @@ int xhci_setup_addressable_virt_dev(struct xhci_hcd *xhci, struct usb_device *ud
 	case USB_SPEED_LOW:
 		slot_ctx->dev_info |= (u32) SLOT_SPEED_LS;
 		break;
-	case USB_SPEED_VARIABLE:
+	case USB_SPEED_WIRELESS:
 		xhci_dbg(xhci, "FIXME xHCI doesn't support wireless speeds\n");
 		return -EINVAL;
 		break;
@@ -498,7 +498,7 @@ int xhci_setup_addressable_virt_dev(struct xhci_hcd *xhci, struct usb_device *ud
 	case USB_SPEED_LOW:
 		ep0_ctx->ep_info2 |= MAX_PACKET(8);
 		break;
-	case USB_SPEED_VARIABLE:
+	case USB_SPEED_WIRELESS:
 		xhci_dbg(xhci, "FIXME xHCI doesn't support wireless speeds\n");
 		return -EINVAL;
 		break;
diff --git a/drivers/usb/wusbcore/devconnect.c b/drivers/usb/wusbcore/devconnect.c
index dced419f7aba..1c918286159c 100644
--- a/drivers/usb/wusbcore/devconnect.c
+++ b/drivers/usb/wusbcore/devconnect.c
@@ -868,7 +868,7 @@ static struct usb_wireless_cap_descriptor wusb_cap_descr_default = {
  * reference that we'll drop.
  *
  * First we need to determine if the device is a WUSB device (else we
- * ignore it). For that we use the speed setting (USB_SPEED_VARIABLE)
+ * ignore it). For that we use the speed setting (USB_SPEED_WIRELESS)
  * [FIXME: maybe we'd need something more definitive]. If so, we track
  * it's usb_busd and from there, the WUSB HC.
  *
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index 94012e649d86..e58369ff8168 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -775,7 +775,7 @@ enum usb_device_speed {
 	USB_SPEED_UNKNOWN = 0,			/* enumerating */
 	USB_SPEED_LOW, USB_SPEED_FULL,		/* usb 1.1 */
 	USB_SPEED_HIGH,				/* usb 2.0 */
-	USB_SPEED_VARIABLE,			/* wireless (usb 2.5) */
+	USB_SPEED_WIRELESS,			/* wireless (usb 2.5) */
 	USB_SPEED_SUPER,			/* usb 3.0 */
 };
 
-- 
cgit v1.2.3


From 5fc4e77911f457b6aa910c704eebe3a58d334116 Mon Sep 17 00:00:00 2001
From: Ajay Kumar Gupta <ajay.gupta@ti.com>
Date: Mon, 28 Dec 2009 13:40:42 +0200
Subject: usb: musb: Add 'extvbus' in musb_hdrc_platform_data

Some of the board might use external Vbus power supply on musb
interface which would require to program ULPI_BUSCONTROL register.

Adding 'extvbus' flag which can be set from such boards which will
be checked at musb driver files before programming ULPI_BUSCONTROL.

Signed-off-by: Ajay Kumar Gupta <ajay.gupta@ti.com>
Signed-off-by: Felipe Balbi <felipe.balbi@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/musb/musb_core.c | 8 ++++++++
 drivers/usb/musb/musb_regs.h | 5 +++++
 include/linux/usb/musb.h     | 3 +++
 3 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 4c8962f976b2..074d380bf883 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -2031,6 +2031,7 @@ bad_config:
 	/* host side needs more setup */
 	if (is_host_enabled(musb)) {
 		struct usb_hcd	*hcd = musb_to_hcd(musb);
+		u8 busctl;
 
 		otg_set_host(musb->xceiv, &hcd->self);
 
@@ -2038,6 +2039,13 @@ bad_config:
 			hcd->self.otg_port = 1;
 		musb->xceiv->host = &hcd->self;
 		hcd->power_budget = 2 * (plat->power ? : 250);
+
+		/* program PHY to use external vBus if required */
+		if (plat->extvbus) {
+			busctl = musb_readb(musb->mregs, MUSB_ULPI_BUSCONTROL);
+			busctl |= MUSB_ULPI_USE_EXTVBUS;
+			musb_writeb(musb->mregs, MUSB_ULPI_BUSCONTROL, busctl);
+		}
 	}
 
 	/* For the host-only role, we can activate right away.
diff --git a/drivers/usb/musb/musb_regs.h b/drivers/usb/musb/musb_regs.h
index 473a94ef905f..9a8621ac5ac2 100644
--- a/drivers/usb/musb/musb_regs.h
+++ b/drivers/usb/musb/musb_regs.h
@@ -72,6 +72,10 @@
 #define MUSB_DEVCTL_HR		0x02
 #define MUSB_DEVCTL_SESSION	0x01
 
+/* MUSB ULPI VBUSCONTROL */
+#define MUSB_ULPI_USE_EXTVBUS	0x01
+#define MUSB_ULPI_USE_EXTVBUSIND 0x02
+
 /* TESTMODE */
 #define MUSB_TEST_FORCE_HOST	0x80
 #define MUSB_TEST_FIFO_ACCESS	0x40
@@ -246,6 +250,7 @@
 
 /* REVISIT: vctrl/vstatus: optional vendor utmi+phy register at 0x68 */
 #define MUSB_HWVERS		0x6C	/* 8 bit */
+#define MUSB_ULPI_BUSCONTROL	0x70	/* 8 bit */
 
 #define MUSB_EPINFO		0x78	/* 8 bit */
 #define MUSB_RAMINFO		0x79	/* 8 bit */
diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index d43755669261..4b7f8fa252f0 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -76,6 +76,9 @@ struct musb_hdrc_platform_data {
 	/* (HOST or OTG) msec/2 after VBUS on till power good */
 	u8		potpgt;
 
+	/* (HOST or OTG) program PHY for external Vbus */
+	unsigned	extvbus:1;
+
 	/* Power the device on or off */
 	int		(*set_power)(int state);
 
-- 
cgit v1.2.3


From 088f7fec8a0e683db72fd8826c5d3ab914e197b1 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 8 Jan 2010 12:56:54 -0500
Subject: USB: implement usb_enable_autosuspend

This patch (as1326) adds usb_enable_autosuspend() and
usb_disable_autosuspend() routines for use by drivers.  If a driver
knows that its device can handle suspends and resumes correctly, it
can enable autosuspend all by itself.  This is equivalent to the user
writing "auto" to the device's power/level attribute.

The implementation differs slightly from what it used to be.  Now
autosuspend is disabled simply by doing usb_autoresume_device() (to
increment the usage counter) and enabled by doing
usb_autosuspend_device() (to decrement the usage counter).

The set_level() attribute method is updated to use the new routines,
and the USB Power-Management documentation is updated.

The patch adds a usb_enable_autosuspend() call to the hub driver's
probe routine, allowing the special-case code for hubs in quirks.c to
be removed.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/usb/power-management.txt | 18 +++++++++++++++
 drivers/usb/core/driver.c              | 42 ++++++++++++++++++++++++++++++++++
 drivers/usb/core/hub.c                 |  3 +++
 drivers/usb/core/quirks.c              |  9 ++++----
 drivers/usb/core/sysfs.c               | 23 ++++++-------------
 include/linux/usb.h                    |  8 +++++++
 6 files changed, 82 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt
index 3bf6818c8cf5..e3fa189c257a 100644
--- a/Documentation/usb/power-management.txt
+++ b/Documentation/usb/power-management.txt
@@ -229,6 +229,11 @@ necessary operations by hand or add them to a udev script.  You can
 also change the idle-delay time; 2 seconds is not the best choice for
 every device.
 
+If a driver knows that its device has proper suspend/resume support,
+it can enable autosuspend all by itself.  For example, the video
+driver for a laptop's webcam might do this, since these devices are
+rarely used and so should normally be autosuspended.
+
 Sometimes it turns out that even when a device does work okay with
 autosuspend there are still problems.  For example, there are
 experimental patches adding autosuspend support to the usbhid driver,
@@ -384,6 +389,19 @@ autosuspend, there's no delay for an autoresume.
 	Other parts of the driver interface
 	-----------------------------------
 
+Drivers can enable autosuspend for their devices by calling
+
+	usb_enable_autosuspend(struct usb_device *udev);
+
+in their probe() routine, if they know that the device is capable of
+suspending and resuming correctly.  This is exactly equivalent to
+writing "auto" to the device's power/level attribute.  Likewise,
+drivers can disable autosuspend by calling
+
+	usb_disable_autosuspend(struct usb_device *udev);
+
+This is exactly the same as writing "on" to the power/level attribute.
+
 Sometimes a driver needs to make sure that remote wakeup is enabled
 during autosuspend.  For example, there's not much point
 autosuspending a keyboard if the user can't cause the keyboard to do a
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 2b39583040d0..057eeab06004 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -1415,6 +1415,48 @@ static int usb_resume_both(struct usb_device *udev, pm_message_t msg)
 
 #ifdef CONFIG_USB_SUSPEND
 
+/**
+ * usb_enable_autosuspend - allow a USB device to be autosuspended
+ * @udev: the USB device which may be autosuspended
+ *
+ * This routine allows @udev to be autosuspended.  An autosuspend won't
+ * take place until the autosuspend_delay has elapsed and all the other
+ * necessary conditions are satisfied.
+ *
+ * The caller must hold @udev's device lock.
+ */
+int usb_enable_autosuspend(struct usb_device *udev)
+{
+	if (udev->autosuspend_disabled) {
+		udev->autosuspend_disabled = 0;
+		usb_autosuspend_device(udev);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(usb_enable_autosuspend);
+
+/**
+ * usb_disable_autosuspend - prevent a USB device from being autosuspended
+ * @udev: the USB device which may not be autosuspended
+ *
+ * This routine prevents @udev from being autosuspended and wakes it up
+ * if it is already autosuspended.
+ *
+ * The caller must hold @udev's device lock.
+ */
+int usb_disable_autosuspend(struct usb_device *udev)
+{
+	int rc = 0;
+
+	if (!udev->autosuspend_disabled) {
+		rc = usb_autoresume_device(udev);
+		if (rc == 0)
+			udev->autosuspend_disabled = 1;
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(usb_disable_autosuspend);
+
 /* Internal routine to adjust a device's usage counter and change
  * its autosuspend state.
  */
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index bfa6123bbdb5..746f26f222ab 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1224,6 +1224,9 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	desc = intf->cur_altsetting;
 	hdev = interface_to_usbdev(intf);
 
+	/* Hubs have proper suspend/resume support */
+	usb_enable_autosuspend(hdev);
+
 	if (hdev->level == MAX_TOPO_LEVEL) {
 		dev_err(&intf->dev,
 			"Unsupported bus topology: hub nested too deep\n");
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 0b689224394b..4314f259524b 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -103,11 +103,10 @@ void usb_detect_quirks(struct usb_device *udev)
 		dev_dbg(&udev->dev, "USB quirks for this device: %x\n",
 				udev->quirks);
 
-	/* By default, disable autosuspend for all non-hubs */
-#ifdef	CONFIG_USB_SUSPEND
-	if (udev->descriptor.bDeviceClass != USB_CLASS_HUB)
-		udev->autosuspend_disabled = 1;
-#endif
+	/* By default, disable autosuspend for all devices.  The hub driver
+	 * will enable it for hubs.
+	 */
+	usb_disable_autosuspend(udev);
 
 	/* For the present, all devices default to USB-PERSIST enabled */
 #if 0		/* was: #ifdef CONFIG_PM */
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index 5a1a0e2b6474..313e241f5ccc 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -389,34 +389,25 @@ set_level(struct device *dev, struct device_attribute *attr,
 	struct usb_device *udev = to_usb_device(dev);
 	int len = count;
 	char *cp;
-	int rc = 0;
-	int old_autosuspend_disabled;
+	int rc;
 
 	cp = memchr(buf, '\n', count);
 	if (cp)
 		len = cp - buf;
 
 	usb_lock_device(udev);
-	old_autosuspend_disabled = udev->autosuspend_disabled;
 
-	/* Setting the flags without calling usb_pm_lock is a subject to
-	 * races, but who cares...
-	 */
 	if (len == sizeof on_string - 1 &&
-			strncmp(buf, on_string, len) == 0) {
-		udev->autosuspend_disabled = 1;
-		rc = usb_external_resume_device(udev, PMSG_USER_RESUME);
+			strncmp(buf, on_string, len) == 0)
+		rc = usb_disable_autosuspend(udev);
 
-	} else if (len == sizeof auto_string - 1 &&
-			strncmp(buf, auto_string, len) == 0) {
-		udev->autosuspend_disabled = 0;
-		rc = usb_external_resume_device(udev, PMSG_USER_RESUME);
+	else if (len == sizeof auto_string - 1 &&
+			strncmp(buf, auto_string, len) == 0)
+		rc = usb_enable_autosuspend(udev);
 
-	} else
+	else
 		rc = -EINVAL;
 
-	if (rc)
-		udev->autosuspend_disabled = old_autosuspend_disabled;
 	usb_unlock_device(udev);
 	return (rc < 0 ? rc : count);
 }
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 332eaea61021..e6419ac89ea2 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -542,6 +542,9 @@ extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id);
 
 /* USB autosuspend and autoresume */
 #ifdef CONFIG_USB_SUSPEND
+extern int usb_enable_autosuspend(struct usb_device *udev);
+extern int usb_disable_autosuspend(struct usb_device *udev);
+
 extern int usb_autopm_get_interface(struct usb_interface *intf);
 extern void usb_autopm_put_interface(struct usb_interface *intf);
 extern int usb_autopm_get_interface_async(struct usb_interface *intf);
@@ -565,6 +568,11 @@ static inline void usb_mark_last_busy(struct usb_device *udev)
 
 #else
 
+static inline int usb_enable_autosuspend(struct usb_device *udev)
+{ return 0; }
+static inline int usb_disable_autosuspend(struct usb_device *udev)
+{ return 0; }
+
 static inline int usb_autopm_get_interface(struct usb_interface *intf)
 { return 0; }
 static inline int usb_autopm_get_interface_async(struct usb_interface *intf)
-- 
cgit v1.2.3


From 9bbdf1e0afe771ca7650f9f476769310bee9d8f3 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 8 Jan 2010 12:57:28 -0500
Subject: USB: convert to the runtime PM framework

This patch (as1329) converts the USB stack over to the PM core's
runtime PM framework.  This involves numerous changes throughout
usbcore, especially to hub.c and driver.c.  Perhaps the most notable
change is that CONFIG_USB_SUSPEND now depends on CONFIG_PM_RUNTIME
instead of CONFIG_PM.

Several fields in the usb_device and usb_interface structures are no
longer needed.  Some code which used to depend on CONFIG_USB_PM now
depends on CONFIG_USB_SUSPEND (requiring some rearrangement of header
files).

The only visible change in behavior should be that following a system
sleep (resume from RAM or resume from hibernation), autosuspended USB
devices will be resumed just like everything else.  They won't remain
suspended.  But if they aren't in use then they will naturally
autosuspend again in a few seconds.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/usb/power-management.txt | 217 +++------
 drivers/usb/core/Kconfig               |   4 +-
 drivers/usb/core/driver.c              | 845 ++++++++++++++-------------------
 drivers/usb/core/hcd.c                 |  13 +-
 drivers/usb/core/hcd.h                 |  10 +-
 drivers/usb/core/hub.c                 |  65 +--
 drivers/usb/core/message.c             |   1 -
 drivers/usb/core/usb.c                 |  35 +-
 drivers/usb/core/usb.h                 |  49 +-
 drivers/usb/misc/usbtest.c             |   4 -
 include/linux/usb.h                    |  31 +-
 11 files changed, 490 insertions(+), 784 deletions(-)

(limited to 'include')

diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt
index e3fa189c257a..2790ad48cfc2 100644
--- a/Documentation/usb/power-management.txt
+++ b/Documentation/usb/power-management.txt
@@ -2,7 +2,7 @@
 
 		 Alan Stern <stern@rowland.harvard.edu>
 
-			    November 10, 2009
+			    December 11, 2009
 
 
@@ -29,9 +29,9 @@ covered to some extent (see Documentation/power/*.txt for more
 information about system PM).
 
 Note: Dynamic PM support for USB is present only if the kernel was
-built with CONFIG_USB_SUSPEND enabled.  System PM support is present
-only if the kernel was built with CONFIG_SUSPEND or CONFIG_HIBERNATION
-enabled.
+built with CONFIG_USB_SUSPEND enabled (which depends on
+CONFIG_PM_RUNTIME).  System PM support is present only if the kernel
+was built with CONFIG_SUSPEND or CONFIG_HIBERNATION enabled.
 
 
 	What is Remote Wakeup?
@@ -326,64 +326,63 @@ driver does so by calling these six functions:
 	void usb_autopm_get_interface_no_resume(struct usb_interface *intf);
 	void usb_autopm_put_interface_no_suspend(struct usb_interface *intf);
 
-The functions work by maintaining a counter in the usb_interface
-structure.  When intf->pm_usage_count is > 0 then the interface is
-deemed to be busy, and the kernel will not autosuspend the interface's
-device.  When intf->pm_usage_count is <= 0 then the interface is
-considered to be idle, and the kernel may autosuspend the device.
+The functions work by maintaining a usage counter in the
+usb_interface's embedded device structure.  When the counter is > 0
+then the interface is deemed to be busy, and the kernel will not
+autosuspend the interface's device.  When the usage counter is = 0
+then the interface is considered to be idle, and the kernel may
+autosuspend the device.
 
-(There is a similar pm_usage_count field in struct usb_device,
+(There is a similar usage counter field in struct usb_device,
 associated with the device itself rather than any of its interfaces.
-This field is used only by the USB core.)
-
-Drivers must not modify intf->pm_usage_count directly; its value
-should be changed only be using the functions listed above.  Drivers
-are responsible for insuring that the overall change to pm_usage_count
-during their lifetime balances out to 0 (it may be necessary for the
-disconnect method to call usb_autopm_put_interface() one or more times
-to fulfill this requirement).  The first two routines use the PM mutex
-in struct usb_device for mutual exclusion; drivers using the async
-routines are responsible for their own synchronization and mutual
-exclusion.
-
-	usb_autopm_get_interface() increments pm_usage_count and
-	attempts an autoresume if the new value is > 0 and the
-	device is suspended.
-
-	usb_autopm_put_interface() decrements pm_usage_count and
-	attempts an autosuspend if the new value is <= 0 and the
-	device isn't suspended.
+This counter is used only by the USB core.)
+
+Drivers need not be concerned about balancing changes to the usage
+counter; the USB core will undo any remaining "get"s when a driver
+is unbound from its interface.  As a corollary, drivers must not call
+any of the usb_autopm_* functions after their diconnect() routine has
+returned.
+
+Drivers using the async routines are responsible for their own
+synchronization and mutual exclusion.
+
+	usb_autopm_get_interface() increments the usage counter and
+	does an autoresume if the device is suspended.  If the
+	autoresume fails, the counter is decremented back.
+
+	usb_autopm_put_interface() decrements the usage counter and
+	attempts an autosuspend if the new value is = 0.
 
 	usb_autopm_get_interface_async() and
 	usb_autopm_put_interface_async() do almost the same things as
-	their non-async counterparts.  The differences are: they do
-	not acquire the PM mutex, and they use a workqueue to do their
+	their non-async counterparts.  The big difference is that they
+	use a workqueue to do the resume or suspend part of their
 	jobs.  As a result they can be called in an atomic context,
 	such as an URB's completion handler, but when they return the
-	device will not generally not yet be in the desired state.
+	device will generally not yet be in the desired state.
 
 	usb_autopm_get_interface_no_resume() and
 	usb_autopm_put_interface_no_suspend() merely increment or
-	decrement the pm_usage_count value; they do not attempt to
-	carry out an autoresume or an autosuspend.  Hence they can be
-	called in an atomic context.
+	decrement the usage counter; they do not attempt to carry out
+	an autoresume or an autosuspend.  Hence they can be called in
+	an atomic context.
 
-The conventional usage pattern is that a driver calls
+The simplest usage pattern is that a driver calls
 usb_autopm_get_interface() in its open routine and
-usb_autopm_put_interface() in its close or release routine.  But
-other patterns are possible.
+usb_autopm_put_interface() in its close or release routine.  But other
+patterns are possible.
 
 The autosuspend attempts mentioned above will often fail for one
 reason or another.  For example, the power/level attribute might be
 set to "on", or another interface in the same device might not be
 idle.  This is perfectly normal.  If the reason for failure was that
-the device hasn't been idle for long enough, a delayed workqueue
-routine is automatically set up to carry out the operation when the
-autosuspend idle-delay has expired.
+the device hasn't been idle for long enough, a timer is scheduled to
+carry out the operation automatically when the autosuspend idle-delay
+has expired.
 
 Autoresume attempts also can fail, although failure would mean that
 the device is no longer present or operating properly.  Unlike
-autosuspend, there's no delay for an autoresume.
+autosuspend, there's no idle-delay for an autoresume.
 
 
 	Other parts of the driver interface
@@ -413,26 +412,27 @@ though, setting this flag won't cause the kernel to autoresume it.
 Normally a driver would set this flag in its probe method, at which
 time the device is guaranteed not to be autosuspended.)
 
-The synchronous usb_autopm_* routines have to run in a sleepable
-process context; they must not be called from an interrupt handler or
-while holding a spinlock.  In fact, the entire autosuspend mechanism
-is not well geared toward interrupt-driven operation.  However there
-is one thing a driver can do in an interrupt handler:
+If a driver does its I/O asynchronously in interrupt context, it
+should call usb_autopm_get_interface_async() before starting output and
+usb_autopm_put_interface_async() when the output queue drains.  When
+it receives an input event, it should call
 
 	usb_mark_last_busy(struct usb_device *udev);
 
-This sets udev->last_busy to the current time.  udev->last_busy is the
-field used for idle-delay calculations; updating it will cause any
-pending autosuspend to be moved back.  The usb_autopm_* routines will
-also set the last_busy field to the current time.
-
-Calling urb_mark_last_busy() from within an URB completion handler is
-subject to races: The kernel may have just finished deciding the
-device has been idle for long enough but not yet gotten around to
-calling the driver's suspend method.  The driver would have to be
-responsible for synchronizing its suspend method with its URB
-completion handler and causing the autosuspend to fail with -EBUSY if
-an URB had completed too recently.
+in the event handler.  This sets udev->last_busy to the current time.
+udev->last_busy is the field used for idle-delay calculations;
+updating it will cause any pending autosuspend to be moved back.  Most
+of the usb_autopm_* routines will also set the last_busy field to the
+current time.
+
+Asynchronous operation is always subject to races.  For example, a
+driver may call one of the usb_autopm_*_interface_async() routines at
+a time when the core has just finished deciding the device has been
+idle for long enough but not yet gotten around to calling the driver's
+suspend method.  The suspend method must be responsible for
+synchronizing with the output request routine and the URB completion
+handler; it should cause autosuspends to fail with -EBUSY if the
+driver needs to use the device.
 
 External suspend calls should never be allowed to fail in this way,
 only autosuspend calls.  The driver can tell them apart by checking
@@ -440,75 +440,23 @@ the PM_EVENT_AUTO bit in the message.event argument to the suspend
 method; this bit will be set for internal PM events (autosuspend) and
 clear for external PM events.
 
-Many of the ingredients in the autosuspend framework are oriented
-towards interfaces: The usb_interface structure contains the
-pm_usage_cnt field, and the usb_autopm_* routines take an interface
-pointer as their argument.  But somewhat confusingly, a few of the
-pieces (i.e., usb_mark_last_busy()) use the usb_device structure
-instead.  Drivers need to keep this straight; they can call
-interface_to_usbdev() to find the device structure for a given
-interface.
 
+	Mutual exclusion
+	----------------
 
-	Locking requirements
-	--------------------
-
-All three suspend/resume methods are always called while holding the
-usb_device's PM mutex.  For external events -- but not necessarily for
-autosuspend or autoresume -- the device semaphore (udev->dev.sem) will
-also be held.  This implies that external suspend/resume events are
-mutually exclusive with calls to probe, disconnect, pre_reset, and
-post_reset; the USB core guarantees that this is true of internal
-suspend/resume events as well.
+For external events -- but not necessarily for autosuspend or
+autoresume -- the device semaphore (udev->dev.sem) will be held when a
+suspend or resume method is called.  This implies that external
+suspend/resume events are mutually exclusive with calls to probe,
+disconnect, pre_reset, and post_reset; the USB core guarantees that
+this is true of autosuspend/autoresume events as well.
 
 If a driver wants to block all suspend/resume calls during some
-critical section, it can simply acquire udev->pm_mutex. Note that
-calls to resume may be triggered indirectly. Block IO due to memory
-allocations can make the vm subsystem resume a device. Thus while
-holding this lock you must not allocate memory with GFP_KERNEL or
-GFP_NOFS.
-
-Alternatively, if the critical section might call some of the
-usb_autopm_* routines, the driver can avoid deadlock by doing:
-
-	down(&udev->dev.sem);
-	rc = usb_autopm_get_interface(intf);
-
-and at the end of the critical section:
-
-	if (!rc)
-		usb_autopm_put_interface(intf);
-	up(&udev->dev.sem);
-
-Holding the device semaphore will block all external PM calls, and the
-usb_autopm_get_interface() will prevent any internal PM calls, even if
-it fails.  (Exercise: Why?)
-
-The rules for locking order are:
-
-	Never acquire any device semaphore while holding any PM mutex.
-
-	Never acquire udev->pm_mutex while holding the PM mutex for
-	a device that isn't a descendant of udev.
-
-In other words, PM mutexes should only be acquired going up the device
-tree, and they should be acquired only after locking all the device
-semaphores you need to hold.  These rules don't matter to drivers very
-much; they usually affect just the USB core.
-
-Still, drivers do need to be careful.  For example, many drivers use a
-private mutex to synchronize their normal I/O activities with their
-disconnect method.  Now if the driver supports autosuspend then it
-must call usb_autopm_put_interface() from somewhere -- maybe from its
-close method.  It should make the call while holding the private mutex,
-since a driver shouldn't call any of the usb_autopm_* functions for an
-interface from which it has been unbound.
-
-But the usb_autpm_* routines always acquire the device's PM mutex, and
-consequently the locking order has to be: private mutex first, PM
-mutex second.  Since the suspend method is always called with the PM
-mutex held, it mustn't try to acquire the private mutex.  It has to
-synchronize with the driver's I/O activities in some other way.
+critical section, the best way is to lock the device and call
+usb_autopm_get_interface() (and do the reverse at the end of the
+critical section).  Holding the device semaphore will block all
+external PM calls, and the usb_autopm_get_interface() will prevent any
+internal PM calls, even if it fails.  (Exercise: Why?)
 
 
 	Interaction between dynamic PM and system PM
@@ -517,22 +465,11 @@ synchronize with the driver's I/O activities in some other way.
 Dynamic power management and system power management can interact in
 a couple of ways.
 
-Firstly, a device may already be manually suspended or autosuspended
-when a system suspend occurs.  Since system suspends are supposed to
-be as transparent as possible, the device should remain suspended
-following the system resume.  The 2.6.23 kernel obeys this principle
-for manually suspended devices but not for autosuspended devices; they
-do get resumed when the system wakes up.  (Presumably they will be
-autosuspended again after their idle-delay time expires.)  In later
-kernels this behavior will be fixed.
-
-(There is an exception.  If a device would undergo a reset-resume
-instead of a normal resume, and the device is enabled for remote
-wakeup, then the reset-resume takes place even if the device was
-already suspended when the system suspend began.  The justification is
-that a reset-resume is a kind of remote-wakeup event.  Or to put it
-another way, a device which needs a reset won't be able to generate
-normal remote-wakeup signals, so it ought to be resumed immediately.)
+Firstly, a device may already be autosuspended when a system suspend
+occurs.  Since system suspends are supposed to be as transparent as
+possible, the device should remain suspended following the system
+resume.  But this theory may not work out well in practice; over time
+the kernel's behavior in this regard has changed.
 
 Secondly, a dynamic power-management event may occur as a system
 suspend is underway.  The window for this is short, since system
diff --git a/drivers/usb/core/Kconfig b/drivers/usb/core/Kconfig
index ad925946f869..97a819c23ef3 100644
--- a/drivers/usb/core/Kconfig
+++ b/drivers/usb/core/Kconfig
@@ -91,8 +91,8 @@ config USB_DYNAMIC_MINORS
 	  If you are unsure about this, say N here.
 
 config USB_SUSPEND
-	bool "USB selective suspend/resume and wakeup"
-	depends on USB && PM
+	bool "USB runtime power management (suspend/resume and wakeup)"
+	depends on USB && PM_RUNTIME
 	help
 	  If you say Y here, you can use driver calls or the sysfs
 	  "power/level" file to suspend or resume individual USB
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 638d54693a1c..6850ec6576f8 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -25,7 +25,7 @@
 #include <linux/device.h>
 #include <linux/usb.h>
 #include <linux/usb/quirks.h>
-#include <linux/workqueue.h>
+#include <linux/pm_runtime.h>
 #include "hcd.h"
 #include "usb.h"
 
@@ -221,7 +221,7 @@ static int usb_probe_device(struct device *dev)
 {
 	struct usb_device_driver *udriver = to_usb_device_driver(dev->driver);
 	struct usb_device *udev = to_usb_device(dev);
-	int error = -ENODEV;
+	int error = 0;
 
 	dev_dbg(dev, "%s\n", __func__);
 
@@ -230,18 +230,23 @@ static int usb_probe_device(struct device *dev)
 	/* The device should always appear to be in use
 	 * unless the driver suports autosuspend.
 	 */
-	udev->pm_usage_cnt = !(udriver->supports_autosuspend);
+	if (!udriver->supports_autosuspend)
+		error = usb_autoresume_device(udev);
 
-	error = udriver->probe(udev);
+	if (!error)
+		error = udriver->probe(udev);
 	return error;
 }
 
 /* called from driver core with dev locked */
 static int usb_unbind_device(struct device *dev)
 {
+	struct usb_device *udev = to_usb_device(dev);
 	struct usb_device_driver *udriver = to_usb_device_driver(dev->driver);
 
-	udriver->disconnect(to_usb_device(dev));
+	udriver->disconnect(udev);
+	if (!udriver->supports_autosuspend)
+		usb_autosuspend_device(udev);
 	return 0;
 }
 
@@ -293,17 +298,16 @@ static int usb_probe_interface(struct device *dev)
 	if (error)
 		return error;
 
-	/* Interface "power state" doesn't correspond to any hardware
-	 * state whatsoever.  We use it to record when it's bound to
-	 * a driver that may start I/0:  it's not frozen/quiesced.
-	 */
-	mark_active(intf);
 	intf->condition = USB_INTERFACE_BINDING;
 
-	/* The interface should always appear to be in use
-	 * unless the driver suports autosuspend.
+	/* Bound interfaces are initially active.  They are
+	 * runtime-PM-enabled only if the driver has autosuspend support.
+	 * They are sensitive to their children's power states.
 	 */
-	atomic_set(&intf->pm_usage_cnt, !driver->supports_autosuspend);
+	pm_runtime_set_active(dev);
+	pm_suspend_ignore_children(dev, false);
+	if (driver->supports_autosuspend)
+		pm_runtime_enable(dev);
 
 	/* Carry out a deferred switch to altsetting 0 */
 	if (intf->needs_altsetting0) {
@@ -323,10 +327,14 @@ static int usb_probe_interface(struct device *dev)
 	return error;
 
  err:
-	mark_quiesced(intf);
 	intf->needs_remote_wakeup = 0;
 	intf->condition = USB_INTERFACE_UNBOUND;
 	usb_cancel_queued_reset(intf);
+
+	/* Unbound interfaces are always runtime-PM-disabled and -suspended */
+	pm_runtime_disable(dev);
+	pm_runtime_set_suspended(dev);
+
 	usb_autosuspend_device(udev);
 	return error;
 }
@@ -376,9 +384,17 @@ static int usb_unbind_interface(struct device *dev)
 	usb_set_intfdata(intf, NULL);
 
 	intf->condition = USB_INTERFACE_UNBOUND;
-	mark_quiesced(intf);
 	intf->needs_remote_wakeup = 0;
 
+	/* Unbound interfaces are always runtime-PM-disabled and -suspended */
+	pm_runtime_disable(dev);
+	pm_runtime_set_suspended(dev);
+
+	/* Undo any residual pm_autopm_get_interface_* calls */
+	for (r = atomic_read(&intf->pm_usage_cnt); r > 0; --r)
+		usb_autopm_put_interface_no_suspend(intf);
+	atomic_set(&intf->pm_usage_cnt, 0);
+
 	if (!error)
 		usb_autosuspend_device(udev);
 
@@ -409,7 +425,6 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 				struct usb_interface *iface, void *priv)
 {
 	struct device *dev = &iface->dev;
-	struct usb_device *udev = interface_to_usbdev(iface);
 	int retval = 0;
 
 	if (dev->driver)
@@ -419,11 +434,16 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 	usb_set_intfdata(iface, priv);
 	iface->needs_binding = 0;
 
-	usb_pm_lock(udev);
 	iface->condition = USB_INTERFACE_BOUND;
-	mark_active(iface);
-	atomic_set(&iface->pm_usage_cnt, !driver->supports_autosuspend);
-	usb_pm_unlock(udev);
+
+	/* Bound interfaces are initially active.  They are
+	 * runtime-PM-enabled only if the driver has autosuspend support.
+	 * They are sensitive to their children's power states.
+	 */
+	pm_runtime_set_active(dev);
+	pm_suspend_ignore_children(dev, false);
+	if (driver->supports_autosuspend)
+		pm_runtime_enable(dev);
 
 	/* if interface was already added, bind now; else let
 	 * the future device_add() bind it, bypassing probe()
@@ -982,7 +1002,6 @@ static void do_unbind_rebind(struct usb_device *udev, int action)
 	}
 }
 
-/* Caller has locked udev's pm_mutex */
 static int usb_suspend_device(struct usb_device *udev, pm_message_t msg)
 {
 	struct usb_device_driver	*udriver;
@@ -1006,7 +1025,6 @@ static int usb_suspend_device(struct usb_device *udev, pm_message_t msg)
 	return status;
 }
 
-/* Caller has locked udev's pm_mutex */
 static int usb_resume_device(struct usb_device *udev, pm_message_t msg)
 {
 	struct usb_device_driver	*udriver;
@@ -1040,27 +1058,20 @@ static int usb_resume_device(struct usb_device *udev, pm_message_t msg)
 	return status;
 }
 
-/* Caller has locked intf's usb_device's pm mutex */
 static int usb_suspend_interface(struct usb_device *udev,
 		struct usb_interface *intf, pm_message_t msg)
 {
 	struct usb_driver	*driver;
 	int			status = 0;
 
-	/* with no hardware, USB interfaces only use FREEZE and ON states */
-	if (udev->state == USB_STATE_NOTATTACHED || !is_active(intf))
-		goto done;
-
-	/* This can happen; see usb_driver_release_interface() */
-	if (intf->condition == USB_INTERFACE_UNBOUND)
+	if (udev->state == USB_STATE_NOTATTACHED ||
+			intf->condition == USB_INTERFACE_UNBOUND)
 		goto done;
 	driver = to_usb_driver(intf->dev.driver);
 
 	if (driver->suspend) {
 		status = driver->suspend(intf, msg);
-		if (status == 0)
-			mark_quiesced(intf);
-		else if (!(msg.event & PM_EVENT_AUTO))
+		if (status && !(msg.event & PM_EVENT_AUTO))
 			dev_err(&intf->dev, "%s error %d\n",
 					"suspend", status);
 	} else {
@@ -1068,7 +1079,6 @@ static int usb_suspend_interface(struct usb_device *udev,
 		intf->needs_binding = 1;
 		dev_warn(&intf->dev, "no %s for driver %s?\n",
 				"suspend", driver->name);
-		mark_quiesced(intf);
 	}
 
  done:
@@ -1076,14 +1086,13 @@ static int usb_suspend_interface(struct usb_device *udev,
 	return status;
 }
 
-/* Caller has locked intf's usb_device's pm_mutex */
 static int usb_resume_interface(struct usb_device *udev,
 		struct usb_interface *intf, pm_message_t msg, int reset_resume)
 {
 	struct usb_driver	*driver;
 	int			status = 0;
 
-	if (udev->state == USB_STATE_NOTATTACHED || is_active(intf))
+	if (udev->state == USB_STATE_NOTATTACHED)
 		goto done;
 
 	/* Don't let autoresume interfere with unbinding */
@@ -1134,90 +1143,11 @@ static int usb_resume_interface(struct usb_device *udev,
 
 done:
 	dev_vdbg(&intf->dev, "%s: status %d\n", __func__, status);
-	if (status == 0 && intf->condition == USB_INTERFACE_BOUND)
-		mark_active(intf);
 
 	/* Later we will unbind the driver and/or reprobe, if necessary */
 	return status;
 }
 
-#ifdef	CONFIG_USB_SUSPEND
-
-/* Internal routine to check whether we may autosuspend a device. */
-static int autosuspend_check(struct usb_device *udev, int reschedule)
-{
-	int			i;
-	struct usb_interface	*intf;
-	unsigned long		suspend_time, j;
-
-	/* For autosuspend, fail fast if anything is in use or autosuspend
-	 * is disabled.  Also fail if any interfaces require remote wakeup
-	 * but it isn't available.
-	 */
-	if (udev->pm_usage_cnt > 0)
-		return -EBUSY;
-	if (udev->autosuspend_delay < 0 || udev->autosuspend_disabled)
-		return -EPERM;
-
-	suspend_time = udev->last_busy + udev->autosuspend_delay;
-	if (udev->actconfig) {
-		for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) {
-			intf = udev->actconfig->interface[i];
-			if (!is_active(intf))
-				continue;
-			if (atomic_read(&intf->pm_usage_cnt) > 0)
-				return -EBUSY;
-			if (intf->needs_remote_wakeup &&
-					!udev->do_remote_wakeup) {
-				dev_dbg(&udev->dev, "remote wakeup needed "
-						"for autosuspend\n");
-				return -EOPNOTSUPP;
-			}
-
-			/* Don't allow autosuspend if the device will need
-			 * a reset-resume and any of its interface drivers
-			 * doesn't include support.
-			 */
-			if (udev->quirks & USB_QUIRK_RESET_RESUME) {
-				struct usb_driver *driver;
-
-				driver = to_usb_driver(intf->dev.driver);
-				if (!driver->reset_resume ||
-				    intf->needs_remote_wakeup)
-					return -EOPNOTSUPP;
-			}
-		}
-	}
-
-	/* If everything is okay but the device hasn't been idle for long
-	 * enough, queue a delayed autosuspend request.  If the device
-	 * _has_ been idle for long enough and the reschedule flag is set,
-	 * likewise queue a delayed (1 second) autosuspend request.
-	 */
-	j = jiffies;
-	if (time_before(j, suspend_time))
-		reschedule = 1;
-	else
-		suspend_time = j + HZ;
-	if (reschedule) {
-		if (!timer_pending(&udev->autosuspend.timer)) {
-			queue_delayed_work(ksuspend_usb_wq, &udev->autosuspend,
-				round_jiffies_up_relative(suspend_time - j));
-		}
-		return -EAGAIN;
-	}
-	return 0;
-}
-
-#else
-
-static inline int autosuspend_check(struct usb_device *udev, int reschedule)
-{
-	return 0;
-}
-
-#endif	/* CONFIG_USB_SUSPEND */
-
 /**
  * usb_suspend_both - suspend a USB device and its interfaces
  * @udev: the usb_device to suspend
@@ -1229,27 +1159,12 @@ static inline int autosuspend_check(struct usb_device *udev, int reschedule)
  * all the interfaces which were suspended are resumed so that they remain
  * in the same state as the device.
  *
- * If an autosuspend is in progress the routine checks first to make sure
- * that neither the device itself or any of its active interfaces is in use
- * (pm_usage_cnt is greater than 0).  If they are, the autosuspend fails.
- *
- * If the suspend succeeds, the routine recursively queues an autosuspend
- * request for @udev's parent device, thereby propagating the change up
- * the device tree.  If all of the parent's children are now suspended,
- * the parent will autosuspend in turn.
- *
- * The suspend method calls are subject to mutual exclusion under control
- * of @udev's pm_mutex.  Many of these calls are also under the protection
- * of @udev's device lock (including all requests originating outside the
- * USB subsystem), but autosuspend requests generated by a child device or
- * interface driver may not be.  Usbcore will insure that the method calls
- * do not arrive during bind, unbind, or reset operations.  However, drivers
- * must be prepared to handle suspend calls arriving at unpredictable times.
- * The only way to block such calls is to do an autoresume (preventing
- * autosuspends) while holding @udev's device lock (preventing outside
- * suspends).
- *
- * The caller must hold @udev->pm_mutex.
+ * Autosuspend requests originating from a child device or an interface
+ * driver may be made without the protection of @udev's device lock, but
+ * all other suspend calls will hold the lock.  Usbcore will insure that
+ * method calls do not arrive during bind, unbind, or reset operations.
+ * However drivers must be prepared to handle suspend calls arriving at
+ * unpredictable times.
  *
  * This routine can run only in process context.
  */
@@ -1258,20 +1173,11 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg)
 	int			status = 0;
 	int			i = 0;
 	struct usb_interface	*intf;
-	struct usb_device	*parent = udev->parent;
 
 	if (udev->state == USB_STATE_NOTATTACHED ||
 			udev->state == USB_STATE_SUSPENDED)
 		goto done;
 
-	udev->do_remote_wakeup = device_may_wakeup(&udev->dev);
-
-	if (msg.event & PM_EVENT_AUTO) {
-		status = autosuspend_check(udev, 0);
-		if (status < 0)
-			goto done;
-	}
-
 	/* Suspend all the interfaces and then udev itself */
 	if (udev->actconfig) {
 		for (; i < udev->actconfig->desc.bNumInterfaces; i++) {
@@ -1286,35 +1192,21 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg)
 
 	/* If the suspend failed, resume interfaces that did get suspended */
 	if (status != 0) {
-		pm_message_t msg2;
-
-		msg2.event = msg.event ^ (PM_EVENT_SUSPEND | PM_EVENT_RESUME);
+		msg.event ^= (PM_EVENT_SUSPEND | PM_EVENT_RESUME);
 		while (--i >= 0) {
 			intf = udev->actconfig->interface[i];
-			usb_resume_interface(udev, intf, msg2, 0);
+			usb_resume_interface(udev, intf, msg, 0);
 		}
 
-		/* Try another autosuspend when the interfaces aren't busy */
-		if (msg.event & PM_EVENT_AUTO)
-			autosuspend_check(udev, status == -EBUSY);
-
-	/* If the suspend succeeded then prevent any more URB submissions,
-	 * flush any outstanding URBs, and propagate the suspend up the tree.
+	/* If the suspend succeeded then prevent any more URB submissions
+	 * and flush any outstanding URBs.
 	 */
 	} else {
-		cancel_delayed_work(&udev->autosuspend);
 		udev->can_submit = 0;
 		for (i = 0; i < 16; ++i) {
 			usb_hcd_flush_endpoint(udev, udev->ep_out[i]);
 			usb_hcd_flush_endpoint(udev, udev->ep_in[i]);
 		}
-
-		/* If this is just a FREEZE or a PRETHAW, udev might
-		 * not really be suspended.  Only true suspends get
-		 * propagated up the device tree.
-		 */
-		if (parent && udev->state == USB_STATE_SUSPENDED)
-			usb_autosuspend_device(parent);
 	}
 
  done:
@@ -1331,23 +1223,12 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg)
  * the resume method for @udev and then calls the resume methods for all
  * the interface drivers in @udev.
  *
- * Before starting the resume, the routine calls itself recursively for
- * the parent device of @udev, thereby propagating the change up the device
- * tree and assuring that @udev will be able to resume.  If the parent is
- * unable to resume successfully, the routine fails.
- *
- * The resume method calls are subject to mutual exclusion under control
- * of @udev's pm_mutex.  Many of these calls are also under the protection
- * of @udev's device lock (including all requests originating outside the
- * USB subsystem), but autoresume requests generated by a child device or
- * interface driver may not be.  Usbcore will insure that the method calls
- * do not arrive during bind, unbind, or reset operations.  However, drivers
- * must be prepared to handle resume calls arriving at unpredictable times.
- * The only way to block such calls is to do an autoresume (preventing
- * other autoresumes) while holding @udev's device lock (preventing outside
- * resumes).
- *
- * The caller must hold @udev->pm_mutex.
+ * Autoresume requests originating from a child device or an interface
+ * driver may be made without the protection of @udev's device lock, but
+ * all other resume calls will hold the lock.  Usbcore will insure that
+ * method calls do not arrive during bind, unbind, or reset operations.
+ * However drivers must be prepared to handle resume calls arriving at
+ * unpredictable times.
  *
  * This routine can run only in process context.
  */
@@ -1356,48 +1237,18 @@ static int usb_resume_both(struct usb_device *udev, pm_message_t msg)
 	int			status = 0;
 	int			i;
 	struct usb_interface	*intf;
-	struct usb_device	*parent = udev->parent;
 
-	cancel_delayed_work(&udev->autosuspend);
 	if (udev->state == USB_STATE_NOTATTACHED) {
 		status = -ENODEV;
 		goto done;
 	}
 	udev->can_submit = 1;
 
-	/* Propagate the resume up the tree, if necessary */
-	if (udev->state == USB_STATE_SUSPENDED) {
-		if (parent) {
-			status = usb_autoresume_device(parent);
-			if (status == 0) {
-				status = usb_resume_device(udev, msg);
-				if (status || udev->state ==
-						USB_STATE_NOTATTACHED) {
-					usb_autosuspend_device(parent);
-
-					/* It's possible usb_resume_device()
-					 * failed after the port was
-					 * unsuspended, causing udev to be
-					 * logically disconnected.  We don't
-					 * want usb_disconnect() to autosuspend
-					 * the parent again, so tell it that
-					 * udev disconnected while still
-					 * suspended. */
-					if (udev->state ==
-							USB_STATE_NOTATTACHED)
-						udev->discon_suspended = 1;
-				}
-			}
-		} else {
-
-			/* We can't progagate beyond the USB subsystem,
-			 * so if a root hub's controller is suspended
-			 * then we're stuck. */
-			status = usb_resume_device(udev, msg);
-		}
-	} else if (udev->reset_resume)
+	/* Resume the device */
+	if (udev->state == USB_STATE_SUSPENDED || udev->reset_resume)
 		status = usb_resume_device(udev, msg);
 
+	/* Resume the interfaces */
 	if (status == 0 && udev->actconfig) {
 		for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) {
 			intf = udev->actconfig->interface[i];
@@ -1413,104 +1264,46 @@ static int usb_resume_both(struct usb_device *udev, pm_message_t msg)
 	return status;
 }
 
-/**
- * usb_external_suspend_device - external suspend of a USB device and its interfaces
- * @udev: the usb_device to suspend
- * @msg: Power Management message describing this state transition
- *
- * This routine handles external suspend requests: ones not generated
- * internally by a USB driver (autosuspend) but rather coming from the user
- * (via sysfs) or the PM core (system sleep).  The suspend will be carried
- * out regardless of @udev's usage counter or those of its interfaces,
- * and regardless of whether or not remote wakeup is enabled.  Of course,
- * interface drivers still have the option of failing the suspend (if
- * there are unsuspended children, for example).
- *
- * The caller must hold @udev's device lock.
- */
-int usb_external_suspend_device(struct usb_device *udev, pm_message_t msg)
-{
-	int	status;
-
-	do_unbind_rebind(udev, DO_UNBIND);
-	usb_pm_lock(udev);
-	status = usb_suspend_both(udev, msg);
-	usb_pm_unlock(udev);
-	return status;
-}
-
-/**
- * usb_external_resume_device - external resume of a USB device and its interfaces
- * @udev: the usb_device to resume
- * @msg: Power Management message describing this state transition
- *
- * This routine handles external resume requests: ones not generated
- * internally by a USB driver (autoresume) but rather coming from the user
- * (via sysfs), the PM core (system resume), or the device itself (remote
- * wakeup).  @udev's usage counter is unaffected.
- *
- * The caller must hold @udev's device lock.
- */
-int usb_external_resume_device(struct usb_device *udev, pm_message_t msg)
-{
-	int	status;
-
-	usb_pm_lock(udev);
-	status = usb_resume_both(udev, msg);
-	udev->last_busy = jiffies;
-	usb_pm_unlock(udev);
-	if (status == 0)
-		do_unbind_rebind(udev, DO_REBIND);
-
-	/* Now that the device is awake, we can start trying to autosuspend
-	 * it again. */
-	if (status == 0)
-		usb_try_autosuspend_device(udev);
-	return status;
-}
-
+/* The device lock is held by the PM core */
 int usb_suspend(struct device *dev, pm_message_t msg)
 {
-	struct usb_device	*udev;
-
-	udev = to_usb_device(dev);
+	struct usb_device	*udev = to_usb_device(dev);
 
-	/* If udev is already suspended, we can skip this suspend and
-	 * we should also skip the upcoming system resume.  High-speed
-	 * root hubs are an exception; they need to resume whenever the
-	 * system wakes up in order for USB-PERSIST port handover to work
-	 * properly.
-	 */
-	if (udev->state == USB_STATE_SUSPENDED) {
-		if (udev->parent || udev->speed != USB_SPEED_HIGH)
-			udev->skip_sys_resume = 1;
-		return 0;
-	}
-
-	udev->skip_sys_resume = 0;
-	return usb_external_suspend_device(udev, msg);
+	do_unbind_rebind(udev, DO_UNBIND);
+	udev->do_remote_wakeup = device_may_wakeup(&udev->dev);
+	return usb_suspend_both(udev, msg);
 }
 
+/* The device lock is held by the PM core */
 int usb_resume(struct device *dev, pm_message_t msg)
 {
-	struct usb_device	*udev;
+	struct usb_device	*udev = to_usb_device(dev);
 	int			status;
 
-	udev = to_usb_device(dev);
+	/* For PM complete calls, all we do is rebind interfaces */
+	if (msg.event == PM_EVENT_ON) {
+		if (udev->state != USB_STATE_NOTATTACHED)
+			do_unbind_rebind(udev, DO_REBIND);
+		status = 0;
 
-	/* If udev->skip_sys_resume is set then udev was already suspended
-	 * when the system sleep started, so we don't want to resume it
-	 * during this system wakeup.
+	/* For all other calls, take the device back to full power and
+	 * tell the PM core in case it was autosuspended previously.
 	 */
-	if (udev->skip_sys_resume)
-		return 0;
-	status = usb_external_resume_device(udev, msg);
+	} else {
+		status = usb_resume_both(udev, msg);
+		if (status == 0) {
+			pm_runtime_disable(dev);
+			pm_runtime_set_active(dev);
+			pm_runtime_enable(dev);
+			udev->last_busy = jiffies;
+		}
+	}
 
 	/* Avoid PM error messages for devices disconnected while suspended
 	 * as we'll display regular disconnect messages just a bit later.
 	 */
 	if (status == -ENODEV)
-		return 0;
+		status = 0;
 	return status;
 }
 
@@ -1560,54 +1353,6 @@ int usb_disable_autosuspend(struct usb_device *udev)
 }
 EXPORT_SYMBOL_GPL(usb_disable_autosuspend);
 
-/* Internal routine to adjust a device's usage counter and change
- * its autosuspend state.
- */
-static int usb_autopm_do_device(struct usb_device *udev, int inc_usage_cnt)
-{
-	int	status = 0;
-
-	usb_pm_lock(udev);
-	udev->pm_usage_cnt += inc_usage_cnt;
-	WARN_ON(udev->pm_usage_cnt < 0);
-	if (inc_usage_cnt)
-		udev->last_busy = jiffies;
-	if (inc_usage_cnt >= 0 && udev->pm_usage_cnt > 0) {
-		if (udev->state == USB_STATE_SUSPENDED)
-			status = usb_resume_both(udev, PMSG_AUTO_RESUME);
-		if (status != 0)
-			udev->pm_usage_cnt -= inc_usage_cnt;
-		else if (inc_usage_cnt)
-			udev->last_busy = jiffies;
-	} else if (inc_usage_cnt <= 0 && udev->pm_usage_cnt <= 0) {
-		status = usb_suspend_both(udev, PMSG_AUTO_SUSPEND);
-	}
-	usb_pm_unlock(udev);
-	return status;
-}
-
-/* usb_autosuspend_work - callback routine to autosuspend a USB device */
-void usb_autosuspend_work(struct work_struct *work)
-{
-	struct usb_device *udev =
-		container_of(work, struct usb_device, autosuspend.work);
-
-	usb_autopm_do_device(udev, 0);
-}
-
-/* usb_autoresume_work - callback routine to autoresume a USB device */
-void usb_autoresume_work(struct work_struct *work)
-{
-	struct usb_device *udev =
-		container_of(work, struct usb_device, autoresume);
-
-	/* Wake it up, let the drivers do their thing, and then put it
-	 * back to sleep.
-	 */
-	if (usb_autopm_do_device(udev, 1) == 0)
-		usb_autopm_do_device(udev, -1);
-}
-
 /**
  * usb_autosuspend_device - delayed autosuspend of a USB device and its interfaces
  * @udev: the usb_device to autosuspend
@@ -1616,12 +1361,9 @@ void usb_autoresume_work(struct work_struct *work)
  * @udev and wants to allow it to autosuspend.  Examples would be when
  * @udev's device file in usbfs is closed or after a configuration change.
  *
- * @udev's usage counter is decremented.  If it or any of the usage counters
- * for an active interface is greater than 0, no autosuspend request will be
- * queued.  (If an interface driver does not support autosuspend then its
- * usage counter is permanently positive.)  Furthermore, if an interface
- * driver requires remote-wakeup capability during autosuspend but remote
- * wakeup is disabled, the autosuspend will fail.
+ * @udev's usage counter is decremented; if it drops to 0 and all the
+ * interfaces are inactive then a delayed autosuspend will be attempted.
+ * The attempt may fail (see autosuspend_check()).
  *
  * The caller must hold @udev's device lock.
  *
@@ -1631,9 +1373,11 @@ void usb_autosuspend_device(struct usb_device *udev)
 {
 	int	status;
 
-	status = usb_autopm_do_device(udev, -1);
-	dev_vdbg(&udev->dev, "%s: cnt %d\n",
-			__func__, udev->pm_usage_cnt);
+	udev->last_busy = jiffies;
+	status = pm_runtime_put_sync(&udev->dev);
+	dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n",
+			__func__, atomic_read(&udev->dev.power.usage_count),
+			status);
 }
 
 /**
@@ -1643,9 +1387,9 @@ void usb_autosuspend_device(struct usb_device *udev)
  * This routine should be called when a core subsystem thinks @udev may
  * be ready to autosuspend.
  *
- * @udev's usage counter left unchanged.  If it or any of the usage counters
- * for an active interface is greater than 0, or autosuspend is not allowed
- * for any other reason, no autosuspend request will be queued.
+ * @udev's usage counter left unchanged.  If it is 0 and all the interfaces
+ * are inactive then an autosuspend will be attempted.  The attempt may
+ * fail or be delayed.
  *
  * The caller must hold @udev's device lock.
  *
@@ -1653,9 +1397,12 @@ void usb_autosuspend_device(struct usb_device *udev)
  */
 void usb_try_autosuspend_device(struct usb_device *udev)
 {
-	usb_autopm_do_device(udev, 0);
-	dev_vdbg(&udev->dev, "%s: cnt %d\n",
-			__func__, udev->pm_usage_cnt);
+	int	status;
+
+	status = pm_runtime_idle(&udev->dev);
+	dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n",
+			__func__, atomic_read(&udev->dev.power.usage_count),
+			status);
 }
 
 /**
@@ -1664,9 +1411,9 @@ void usb_try_autosuspend_device(struct usb_device *udev)
  *
  * This routine should be called when a core subsystem wants to use @udev
  * and needs to guarantee that it is not suspended.  No autosuspend will
- * occur until usb_autosuspend_device is called.  (Note that this will not
- * prevent suspend events originating in the PM core.)  Examples would be
- * when @udev's device file in usbfs is opened or when a remote-wakeup
+ * occur until usb_autosuspend_device() is called.  (Note that this will
+ * not prevent suspend events originating in the PM core.)  Examples would
+ * be when @udev's device file in usbfs is opened or when a remote-wakeup
  * request is received.
  *
  * @udev's usage counter is incremented to prevent subsequent autosuspends.
@@ -1680,42 +1427,14 @@ int usb_autoresume_device(struct usb_device *udev)
 {
 	int	status;
 
-	status = usb_autopm_do_device(udev, 1);
-	dev_vdbg(&udev->dev, "%s: status %d cnt %d\n",
-			__func__, status, udev->pm_usage_cnt);
-	return status;
-}
-
-/* Internal routine to adjust an interface's usage counter and change
- * its device's autosuspend state.
- */
-static int usb_autopm_do_interface(struct usb_interface *intf,
-		int inc_usage_cnt)
-{
-	struct usb_device	*udev = interface_to_usbdev(intf);
-	int			status = 0;
-
-	usb_pm_lock(udev);
-	if (intf->condition == USB_INTERFACE_UNBOUND)
-		status = -ENODEV;
-	else {
-		atomic_add(inc_usage_cnt, &intf->pm_usage_cnt);
-		udev->last_busy = jiffies;
-		if (inc_usage_cnt >= 0 &&
-				atomic_read(&intf->pm_usage_cnt) > 0) {
-			if (udev->state == USB_STATE_SUSPENDED)
-				status = usb_resume_both(udev,
-						PMSG_AUTO_RESUME);
-			if (status != 0)
-				atomic_sub(inc_usage_cnt, &intf->pm_usage_cnt);
-			else
-				udev->last_busy = jiffies;
-		} else if (inc_usage_cnt <= 0 &&
-				atomic_read(&intf->pm_usage_cnt) <= 0) {
-			status = usb_suspend_both(udev, PMSG_AUTO_SUSPEND);
-		}
-	}
-	usb_pm_unlock(udev);
+	status = pm_runtime_get_sync(&udev->dev);
+	if (status < 0)
+		pm_runtime_put_sync(&udev->dev);
+	dev_vdbg(&udev->dev, "%s: cnt %d -> %d\n",
+			__func__, atomic_read(&udev->dev.power.usage_count),
+			status);
+	if (status > 0)
+		status = 0;
 	return status;
 }
 
@@ -1729,34 +1448,25 @@ static int usb_autopm_do_interface(struct usb_interface *intf,
  * closed.
  *
  * The routine decrements @intf's usage counter.  When the counter reaches
- * 0, a delayed autosuspend request for @intf's device is queued.  When
- * the delay expires, if @intf->pm_usage_cnt is still <= 0 along with all
- * the other usage counters for the sibling interfaces and @intf's
- * usb_device, the device and all its interfaces will be autosuspended.
- *
- * Note that @intf->pm_usage_cnt is owned by the interface driver.  The
- * core will not change its value other than the increment and decrement
- * in usb_autopm_get_interface and usb_autopm_put_interface.  The driver
- * may use this simple counter-oriented discipline or may set the value
- * any way it likes.
+ * 0, a delayed autosuspend request for @intf's device is attempted.  The
+ * attempt may fail (see autosuspend_check()).
  *
  * If the driver has set @intf->needs_remote_wakeup then autosuspend will
  * take place only if the device's remote-wakeup facility is enabled.
  *
- * Suspend method calls queued by this routine can arrive at any time
- * while @intf is resumed and its usage counter is equal to 0.  They are
- * not protected by the usb_device's lock but only by its pm_mutex.
- * Drivers must provide their own synchronization.
- *
  * This routine can run only in process context.
  */
 void usb_autopm_put_interface(struct usb_interface *intf)
 {
-	int	status;
+	struct usb_device	*udev = interface_to_usbdev(intf);
+	int			status;
 
-	status = usb_autopm_do_interface(intf, -1);
-	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, atomic_read(&intf->pm_usage_cnt));
+	udev->last_busy = jiffies;
+	atomic_dec(&intf->pm_usage_cnt);
+	status = pm_runtime_put_sync(&intf->dev);
+	dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
+			__func__, atomic_read(&intf->dev.power.usage_count),
+			status);
 }
 EXPORT_SYMBOL_GPL(usb_autopm_put_interface);
 
@@ -1764,11 +1474,11 @@ EXPORT_SYMBOL_GPL(usb_autopm_put_interface);
  * usb_autopm_put_interface_async - decrement a USB interface's PM-usage counter
  * @intf: the usb_interface whose counter should be decremented
  *
- * This routine does essentially the same thing as
- * usb_autopm_put_interface(): it decrements @intf's usage counter and
- * queues a delayed autosuspend request if the counter is <= 0.  The
- * difference is that it does not acquire the device's pm_mutex;
- * callers must handle all synchronization issues themselves.
+ * This routine does much the same thing as usb_autopm_put_interface():
+ * It decrements @intf's usage counter and schedules a delayed
+ * autosuspend request if the counter is <= 0.  The difference is that it
+ * does not perform any synchronization; callers should hold a private
+ * lock and handle all synchronization issues themselves.
  *
  * Typically a driver would call this routine during an URB's completion
  * handler, if no more URBs were pending.
@@ -1778,27 +1488,57 @@ EXPORT_SYMBOL_GPL(usb_autopm_put_interface);
 void usb_autopm_put_interface_async(struct usb_interface *intf)
 {
 	struct usb_device	*udev = interface_to_usbdev(intf);
+	unsigned long		last_busy;
 	int			status = 0;
 
-	if (intf->condition == USB_INTERFACE_UNBOUND) {
-		status = -ENODEV;
-	} else {
-		udev->last_busy = jiffies;
-		atomic_dec(&intf->pm_usage_cnt);
-		if (udev->autosuspend_disabled || udev->autosuspend_delay < 0)
-			status = -EPERM;
-		else if (atomic_read(&intf->pm_usage_cnt) <= 0 &&
-				!timer_pending(&udev->autosuspend.timer)) {
-			queue_delayed_work(ksuspend_usb_wq, &udev->autosuspend,
+	last_busy = udev->last_busy;
+	udev->last_busy = jiffies;
+	atomic_dec(&intf->pm_usage_cnt);
+	pm_runtime_put_noidle(&intf->dev);
+
+	if (!udev->autosuspend_disabled) {
+		/* Optimization: Don't schedule a delayed autosuspend if
+		 * the timer is already running and the expiration time
+		 * wouldn't change.
+		 *
+		 * We have to use the interface's timer.  Attempts to
+		 * schedule a suspend for the device would fail because
+		 * the interface is still active.
+		 */
+		if (intf->dev.power.timer_expires == 0 ||
+				round_jiffies_up(last_busy) !=
+				round_jiffies_up(jiffies)) {
+			status = pm_schedule_suspend(&intf->dev,
+					jiffies_to_msecs(
 					round_jiffies_up_relative(
-						udev->autosuspend_delay));
+						udev->autosuspend_delay)));
 		}
 	}
-	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, atomic_read(&intf->pm_usage_cnt));
+	dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
+			__func__, atomic_read(&intf->dev.power.usage_count),
+			status);
 }
 EXPORT_SYMBOL_GPL(usb_autopm_put_interface_async);
 
+/**
+ * usb_autopm_put_interface_no_suspend - decrement a USB interface's PM-usage counter
+ * @intf: the usb_interface whose counter should be decremented
+ *
+ * This routine decrements @intf's usage counter but does not carry out an
+ * autosuspend.
+ *
+ * This routine can run in atomic context.
+ */
+void usb_autopm_put_interface_no_suspend(struct usb_interface *intf)
+{
+	struct usb_device	*udev = interface_to_usbdev(intf);
+
+	udev->last_busy = jiffies;
+	atomic_dec(&intf->pm_usage_cnt);
+	pm_runtime_put_noidle(&intf->dev);
+}
+EXPORT_SYMBOL_GPL(usb_autopm_put_interface_no_suspend);
+
 /**
  * usb_autopm_get_interface - increment a USB interface's PM-usage counter
  * @intf: the usb_interface whose counter should be incremented
@@ -1811,25 +1551,8 @@ EXPORT_SYMBOL_GPL(usb_autopm_put_interface_async);
  * or @intf is unbound.  A typical example would be a character-device
  * driver when its device file is opened.
  *
- *
- * The routine increments @intf's usage counter.  (However if the
- * autoresume fails then the counter is re-decremented.)  So long as the
- * counter is greater than 0, autosuspend will not be allowed for @intf
- * or its usb_device.  When the driver is finished using @intf it should
- * call usb_autopm_put_interface() to decrement the usage counter and
- * queue a delayed autosuspend request (if the counter is <= 0).
- *
- *
- * Note that @intf->pm_usage_cnt is owned by the interface driver.  The
- * core will not change its value other than the increment and decrement
- * in usb_autopm_get_interface and usb_autopm_put_interface.  The driver
- * may use this simple counter-oriented discipline or may set the value
- * any way it likes.
- *
- * Resume method calls generated by this routine can arrive at any time
- * while @intf is suspended.  They are not protected by the usb_device's
- * lock but only by its pm_mutex.  Drivers must provide their own
- * synchronization.
+ * @intf's usage counter is incremented to prevent subsequent autosuspends.
+ * However if the autoresume fails then the counter is re-decremented.
  *
  * This routine can run only in process context.
  */
@@ -1837,9 +1560,16 @@ int usb_autopm_get_interface(struct usb_interface *intf)
 {
 	int	status;
 
-	status = usb_autopm_do_interface(intf, 1);
-	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, atomic_read(&intf->pm_usage_cnt));
+	status = pm_runtime_get_sync(&intf->dev);
+	if (status < 0)
+		pm_runtime_put_sync(&intf->dev);
+	else
+		atomic_inc(&intf->pm_usage_cnt);
+	dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
+			__func__, atomic_read(&intf->dev.power.usage_count),
+			status);
+	if (status > 0)
+		status = 0;
 	return status;
 }
 EXPORT_SYMBOL_GPL(usb_autopm_get_interface);
@@ -1849,41 +1579,201 @@ EXPORT_SYMBOL_GPL(usb_autopm_get_interface);
  * @intf: the usb_interface whose counter should be incremented
  *
  * This routine does much the same thing as
- * usb_autopm_get_interface(): it increments @intf's usage counter and
- * queues an autoresume request if the result is > 0.  The differences
- * are that it does not acquire the device's pm_mutex (callers must
- * handle all synchronization issues themselves), and it does not
- * autoresume the device directly (it only queues a request).  After a
- * successful call, the device will generally not yet be resumed.
+ * usb_autopm_get_interface(): It increments @intf's usage counter and
+ * queues an autoresume request if the device is suspended.  The
+ * differences are that it does not perform any synchronization (callers
+ * should hold a private lock and handle all synchronization issues
+ * themselves), and it does not autoresume the device directly (it only
+ * queues a request).  After a successful call, the device may not yet be
+ * resumed.
  *
  * This routine can run in atomic context.
  */
 int usb_autopm_get_interface_async(struct usb_interface *intf)
 {
-	struct usb_device	*udev = interface_to_usbdev(intf);
-	int			status = 0;
+	int		status = 0;
+	enum rpm_status	s;
 
-	if (intf->condition == USB_INTERFACE_UNBOUND)
-		status = -ENODEV;
-	else {
+	/* Don't request a resume unless the interface is already suspending
+	 * or suspended.  Doing so would force a running suspend timer to be
+	 * cancelled.
+	 */
+	pm_runtime_get_noresume(&intf->dev);
+	s = ACCESS_ONCE(intf->dev.power.runtime_status);
+	if (s == RPM_SUSPENDING || s == RPM_SUSPENDED)
+		status = pm_request_resume(&intf->dev);
+
+	if (status < 0 && status != -EINPROGRESS)
+		pm_runtime_put_noidle(&intf->dev);
+	else
 		atomic_inc(&intf->pm_usage_cnt);
-		if (atomic_read(&intf->pm_usage_cnt) > 0 &&
-				udev->state == USB_STATE_SUSPENDED)
-			queue_work(ksuspend_usb_wq, &udev->autoresume);
-	}
-	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, atomic_read(&intf->pm_usage_cnt));
+	dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
+			__func__, atomic_read(&intf->dev.power.usage_count),
+			status);
+	if (status > 0)
+		status = 0;
 	return status;
 }
 EXPORT_SYMBOL_GPL(usb_autopm_get_interface_async);
 
-#else
+/**
+ * usb_autopm_get_interface_no_resume - increment a USB interface's PM-usage counter
+ * @intf: the usb_interface whose counter should be incremented
+ *
+ * This routine increments @intf's usage counter but does not carry out an
+ * autoresume.
+ *
+ * This routine can run in atomic context.
+ */
+void usb_autopm_get_interface_no_resume(struct usb_interface *intf)
+{
+	struct usb_device	*udev = interface_to_usbdev(intf);
+
+	udev->last_busy = jiffies;
+	atomic_inc(&intf->pm_usage_cnt);
+	pm_runtime_get_noresume(&intf->dev);
+}
+EXPORT_SYMBOL_GPL(usb_autopm_get_interface_no_resume);
+
+/* Internal routine to check whether we may autosuspend a device. */
+static int autosuspend_check(struct usb_device *udev)
+{
+	int			i;
+	struct usb_interface	*intf;
+	unsigned long		suspend_time, j;
+
+	/* Fail if autosuspend is disabled, or any interfaces are in use, or
+	 * any interface drivers require remote wakeup but it isn't available.
+	 */
+	udev->do_remote_wakeup = device_may_wakeup(&udev->dev);
+	if (udev->actconfig) {
+		for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) {
+			intf = udev->actconfig->interface[i];
+
+			/* We don't need to check interfaces that are
+			 * disabled for runtime PM.  Either they are unbound
+			 * or else their drivers don't support autosuspend
+			 * and so they are permanently active.
+			 */
+			if (intf->dev.power.disable_depth)
+				continue;
+			if (atomic_read(&intf->dev.power.usage_count) > 0)
+				return -EBUSY;
+			if (intf->needs_remote_wakeup &&
+					!udev->do_remote_wakeup) {
+				dev_dbg(&udev->dev, "remote wakeup needed "
+						"for autosuspend\n");
+				return -EOPNOTSUPP;
+			}
+
+			/* Don't allow autosuspend if the device will need
+			 * a reset-resume and any of its interface drivers
+			 * doesn't include support or needs remote wakeup.
+			 */
+			if (udev->quirks & USB_QUIRK_RESET_RESUME) {
+				struct usb_driver *driver;
+
+				driver = to_usb_driver(intf->dev.driver);
+				if (!driver->reset_resume ||
+						intf->needs_remote_wakeup)
+					return -EOPNOTSUPP;
+			}
+		}
+	}
+
+	/* If everything is okay but the device hasn't been idle for long
+	 * enough, queue a delayed autosuspend request.
+	 */
+	j = ACCESS_ONCE(jiffies);
+	suspend_time = udev->last_busy + udev->autosuspend_delay;
+	if (time_before(j, suspend_time)) {
+		pm_schedule_suspend(&udev->dev, jiffies_to_msecs(
+				round_jiffies_up_relative(suspend_time - j)));
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static int usb_runtime_suspend(struct device *dev)
+{
+	int	status = 0;
 
-void usb_autosuspend_work(struct work_struct *work)
-{}
+	/* A USB device can be suspended if it passes the various autosuspend
+	 * checks.  Runtime suspend for a USB device means suspending all the
+	 * interfaces and then the device itself.
+	 */
+	if (is_usb_device(dev)) {
+		struct usb_device	*udev = to_usb_device(dev);
+
+		if (autosuspend_check(udev) != 0)
+			return -EAGAIN;
+
+		status = usb_suspend_both(udev, PMSG_AUTO_SUSPEND);
+
+		/* If an interface fails the suspend, adjust the last_busy
+		 * time so that we don't get another suspend attempt right
+		 * away.
+		 */
+		if (status) {
+			udev->last_busy = jiffies +
+					(udev->autosuspend_delay == 0 ?
+						HZ/2 : 0);
+		}
+
+		/* Prevent the parent from suspending immediately after */
+		else if (udev->parent) {
+			udev->parent->last_busy = jiffies;
+		}
+	}
+
+	/* Runtime suspend for a USB interface doesn't mean anything. */
+	return status;
+}
+
+static int usb_runtime_resume(struct device *dev)
+{
+	/* Runtime resume for a USB device means resuming both the device
+	 * and all its interfaces.
+	 */
+	if (is_usb_device(dev)) {
+		struct usb_device	*udev = to_usb_device(dev);
+		int			status;
+
+		status = usb_resume_both(udev, PMSG_AUTO_RESUME);
+		udev->last_busy = jiffies;
+		return status;
+	}
+
+	/* Runtime resume for a USB interface doesn't mean anything. */
+	return 0;
+}
+
+static int usb_runtime_idle(struct device *dev)
+{
+	/* An idle USB device can be suspended if it passes the various
+	 * autosuspend checks.  An idle interface can be suspended at
+	 * any time.
+	 */
+	if (is_usb_device(dev)) {
+		struct usb_device	*udev = to_usb_device(dev);
+
+		if (autosuspend_check(udev) != 0)
+			return 0;
+	}
+
+	pm_runtime_suspend(dev);
+	return 0;
+}
+
+static struct dev_pm_ops usb_bus_pm_ops = {
+	.runtime_suspend =	usb_runtime_suspend,
+	.runtime_resume =	usb_runtime_resume,
+	.runtime_idle =		usb_runtime_idle,
+};
+
+#else
 
-void usb_autoresume_work(struct work_struct *work)
-{}
+#define usb_bus_pm_ops	(*(struct dev_pm_ops *) NULL)
 
 #endif /* CONFIG_USB_SUSPEND */
 
@@ -1891,4 +1781,5 @@ struct bus_type usb_bus_type = {
 	.name =		"usb",
 	.match =	usb_device_match,
 	.uevent =	usb_uevent,
+	.pm =		&usb_bus_pm_ops,
 };
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index fc4290b6691c..b07ba051118d 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -39,6 +39,7 @@
 #include <linux/platform_device.h>
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
+#include <linux/pm_runtime.h>
 
 #include <linux/usb.h>
 
@@ -1858,6 +1859,10 @@ int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg)
 	return status;
 }
 
+#endif	/* CONFIG_PM */
+
+#ifdef	CONFIG_USB_SUSPEND
+
 /* Workqueue routine for root-hub remote wakeup */
 static void hcd_resume_work(struct work_struct *work)
 {
@@ -1884,12 +1889,12 @@ void usb_hcd_resume_root_hub (struct usb_hcd *hcd)
 
 	spin_lock_irqsave (&hcd_root_hub_lock, flags);
 	if (hcd->rh_registered)
-		queue_work(ksuspend_usb_wq, &hcd->wakeup_work);
+		queue_work(pm_wq, &hcd->wakeup_work);
 	spin_unlock_irqrestore (&hcd_root_hub_lock, flags);
 }
 EXPORT_SYMBOL_GPL(usb_hcd_resume_root_hub);
 
-#endif
+#endif	/* CONFIG_USB_SUSPEND */
 
 /*-------------------------------------------------------------------------*/
 
@@ -2034,7 +2039,7 @@ struct usb_hcd *usb_create_hcd (const struct hc_driver *driver,
 	init_timer(&hcd->rh_timer);
 	hcd->rh_timer.function = rh_timer_func;
 	hcd->rh_timer.data = (unsigned long) hcd;
-#ifdef CONFIG_PM
+#ifdef CONFIG_USB_SUSPEND
 	INIT_WORK(&hcd->wakeup_work, hcd_resume_work);
 #endif
 	mutex_init(&hcd->bandwidth_mutex);
@@ -2234,7 +2239,7 @@ void usb_remove_hcd(struct usb_hcd *hcd)
 	hcd->rh_registered = 0;
 	spin_unlock_irq (&hcd_root_hub_lock);
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_USB_SUSPEND
 	cancel_work_sync(&hcd->wakeup_work);
 #endif
 
diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h
index 70a7e490f81b..8953ded69541 100644
--- a/drivers/usb/core/hcd.h
+++ b/drivers/usb/core/hcd.h
@@ -80,7 +80,7 @@ struct usb_hcd {
 
 	struct timer_list	rh_timer;	/* drives root-hub polling */
 	struct urb		*status_urb;	/* the current status urb */
-#ifdef CONFIG_PM
+#ifdef CONFIG_USB_SUSPEND
 	struct work_struct	wakeup_work;	/* for remote wakeup */
 #endif
 
@@ -464,16 +464,20 @@ extern int usb_find_interface_driver(struct usb_device *dev,
 #define usb_endpoint_out(ep_dir)	(!((ep_dir) & USB_DIR_IN))
 
 #ifdef CONFIG_PM
-extern void usb_hcd_resume_root_hub(struct usb_hcd *hcd);
 extern void usb_root_hub_lost_power(struct usb_device *rhdev);
 extern int hcd_bus_suspend(struct usb_device *rhdev, pm_message_t msg);
 extern int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg);
+#endif /* CONFIG_PM */
+
+#ifdef CONFIG_USB_SUSPEND
+extern void usb_hcd_resume_root_hub(struct usb_hcd *hcd);
 #else
 static inline void usb_hcd_resume_root_hub(struct usb_hcd *hcd)
 {
 	return;
 }
-#endif /* CONFIG_PM */
+#endif /* CONFIG_USB_SUSPEND */
+
 
 /*
  * USB device fs stuff
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 746f26f222ab..0e0a190bbd00 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -22,6 +22,7 @@
 #include <linux/kthread.h>
 #include <linux/mutex.h>
 #include <linux/freezer.h>
+#include <linux/pm_runtime.h>
 
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
@@ -71,7 +72,6 @@ struct usb_hub {
 
 	unsigned		mA_per_port;	/* current for each child */
 
-	unsigned		init_done:1;
 	unsigned		limited_power:1;
 	unsigned		quiescing:1;
 	unsigned		disconnected:1;
@@ -820,7 +820,6 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
 	}
  init3:
 	hub->quiescing = 0;
-	hub->init_done = 1;
 
 	status = usb_submit_urb(hub->urb, GFP_NOIO);
 	if (status < 0)
@@ -861,11 +860,6 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type)
 	int i;
 
 	cancel_delayed_work_sync(&hub->init_work);
-	if (!hub->init_done) {
-		hub->init_done = 1;
-		usb_autopm_put_interface_no_suspend(
-				to_usb_interface(hub->intfdev));
-	}
 
 	/* khubd and related activity won't re-trigger */
 	hub->quiescing = 1;
@@ -1405,10 +1399,8 @@ static void recursively_mark_NOTATTACHED(struct usb_device *udev)
 		if (udev->children[i])
 			recursively_mark_NOTATTACHED(udev->children[i]);
 	}
-	if (udev->state == USB_STATE_SUSPENDED) {
-		udev->discon_suspended = 1;
+	if (udev->state == USB_STATE_SUSPENDED)
 		udev->active_duration -= jiffies;
-	}
 	udev->state = USB_STATE_NOTATTACHED;
 }
 
@@ -1532,31 +1524,6 @@ static void update_address(struct usb_device *udev, int devnum)
 		udev->devnum = devnum;
 }
 
-#ifdef	CONFIG_USB_SUSPEND
-
-static void usb_stop_pm(struct usb_device *udev)
-{
-	/* Synchronize with the ksuspend thread to prevent any more
-	 * autosuspend requests from being submitted, and decrement
-	 * the parent's count of unsuspended children.
-	 */
-	usb_pm_lock(udev);
-	if (udev->parent && !udev->discon_suspended)
-		usb_autosuspend_device(udev->parent);
-	usb_pm_unlock(udev);
-
-	/* Stop any autosuspend or autoresume requests already submitted */
-	cancel_delayed_work_sync(&udev->autosuspend);
-	cancel_work_sync(&udev->autoresume);
-}
-
-#else
-
-static inline void usb_stop_pm(struct usb_device *udev)
-{ }
-
-#endif
-
 /**
  * usb_disconnect - disconnect a device (usbcore-internal)
  * @pdev: pointer to device being disconnected
@@ -1625,8 +1592,6 @@ void usb_disconnect(struct usb_device **pdev)
 	*pdev = NULL;
 	spin_unlock_irq(&device_state_lock);
 
-	usb_stop_pm(udev);
-
 	put_device(&udev->dev);
 }
 
@@ -1803,9 +1768,6 @@ int usb_new_device(struct usb_device *udev)
 	int err;
 
 	if (udev->parent) {
-		/* Increment the parent's count of unsuspended children */
-		usb_autoresume_device(udev->parent);
-
 		/* Initialize non-root-hub device wakeup to disabled;
 		 * device (un)configuration controls wakeup capable
 		 * sysfs power/wakeup controls wakeup enabled/disabled
@@ -1814,6 +1776,10 @@ int usb_new_device(struct usb_device *udev)
 		device_set_wakeup_enable(&udev->dev, 1);
 	}
 
+	/* Tell the runtime-PM framework the device is active */
+	pm_runtime_set_active(&udev->dev);
+	pm_runtime_enable(&udev->dev);
+
 	usb_detect_quirks(udev);
 	err = usb_enumerate_device(udev);	/* Read descriptors */
 	if (err < 0)
@@ -1844,7 +1810,8 @@ int usb_new_device(struct usb_device *udev)
 
 fail:
 	usb_set_device_state(udev, USB_STATE_NOTATTACHED);
-	usb_stop_pm(udev);
+	pm_runtime_disable(&udev->dev);
+	pm_runtime_set_suspended(&udev->dev);
 	return err;
 }
 
@@ -2408,8 +2375,11 @@ int usb_remote_wakeup(struct usb_device *udev)
 
 	if (udev->state == USB_STATE_SUSPENDED) {
 		dev_dbg(&udev->dev, "usb %sresume\n", "wakeup-");
-		usb_mark_last_busy(udev);
-		status = usb_external_resume_device(udev, PMSG_REMOTE_RESUME);
+		status = usb_autoresume_device(udev);
+		if (status == 0) {
+			/* Let the drivers do their thing, then... */
+			usb_autosuspend_device(udev);
+		}
 	}
 	return status;
 }
@@ -2446,11 +2416,6 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg)
 	return status;
 }
 
-int usb_remote_wakeup(struct usb_device *udev)
-{
-	return 0;
-}
-
 #endif
 
 static int hub_suspend(struct usb_interface *intf, pm_message_t msg)
@@ -3268,7 +3233,7 @@ static void hub_events(void)
 		 * disconnected while waiting for the lock to succeed. */
 		usb_lock_device(hdev);
 		if (unlikely(hub->disconnected))
-			goto loop2;
+			goto loop_disconnected;
 
 		/* If the hub has died, clean up after it */
 		if (hdev->state == USB_STATE_NOTATTACHED) {
@@ -3428,7 +3393,7 @@ static void hub_events(void)
 		 * kick_khubd() and allow autosuspend.
 		 */
 		usb_autopm_put_interface(intf);
- loop2:
+ loop_disconnected:
 		usb_unlock_device(hdev);
 		kref_put(&hub->kref, hub_release);
 
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index df73574a9cc9..73de41bb2546 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -1843,7 +1843,6 @@ free_interfaces:
 		intf->dev.dma_mask = dev->dev.dma_mask;
 		INIT_WORK(&intf->reset_ws, __usb_queue_reset_device);
 		device_initialize(&intf->dev);
-		mark_quiesced(intf);
 		dev_set_name(&intf->dev, "%d-%s:%d.%d",
 			dev->bus->busnum, dev->devpath,
 			configuration, alt->desc.bInterfaceNumber);
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 0daff0d968ba..32966ccdff63 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -49,9 +49,6 @@ const char *usbcore_name = "usbcore";
 
 static int nousb;	/* Disable USB when built into kernel image */
 
-/* Workqueue for autosuspend and for remote wakeup of root hubs */
-struct workqueue_struct *ksuspend_usb_wq;
-
 #ifdef	CONFIG_USB_SUSPEND
 static int usb_autosuspend_delay = 2;		/* Default delay value,
 						 * in seconds */
@@ -264,23 +261,6 @@ static int usb_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
 
 #ifdef	CONFIG_PM
 
-static int ksuspend_usb_init(void)
-{
-	/* This workqueue is supposed to be both freezable and
-	 * singlethreaded.  Its job doesn't justify running on more
-	 * than one CPU.
-	 */
-	ksuspend_usb_wq = create_freezeable_workqueue("ksuspend_usbd");
-	if (!ksuspend_usb_wq)
-		return -ENOMEM;
-	return 0;
-}
-
-static void ksuspend_usb_cleanup(void)
-{
-	destroy_workqueue(ksuspend_usb_wq);
-}
-
 /* USB device Power-Management thunks.
  * There's no need to distinguish here between quiescing a USB device
  * and powering it down; the generic_suspend() routine takes care of
@@ -296,7 +276,7 @@ static int usb_dev_prepare(struct device *dev)
 static void usb_dev_complete(struct device *dev)
 {
 	/* Currently used only for rebinding interfaces */
-	usb_resume(dev, PMSG_RESUME);	/* Message event is meaningless */
+	usb_resume(dev, PMSG_ON);	/* FIXME: change to PMSG_COMPLETE */
 }
 
 static int usb_dev_suspend(struct device *dev)
@@ -342,9 +322,7 @@ static const struct dev_pm_ops usb_device_pm_ops = {
 
 #else
 
-#define ksuspend_usb_init()	0
-#define ksuspend_usb_cleanup()	do {} while (0)
-#define usb_device_pm_ops	(*(struct dev_pm_ops *)0)
+#define usb_device_pm_ops	(*(struct dev_pm_ops *) NULL)
 
 #endif	/* CONFIG_PM */
 
@@ -472,9 +450,6 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent,
 	INIT_LIST_HEAD(&dev->filelist);
 
 #ifdef	CONFIG_PM
-	mutex_init(&dev->pm_mutex);
-	INIT_DELAYED_WORK(&dev->autosuspend, usb_autosuspend_work);
-	INIT_WORK(&dev->autoresume, usb_autoresume_work);
 	dev->autosuspend_delay = usb_autosuspend_delay * HZ;
 	dev->connect_time = jiffies;
 	dev->active_duration = -jiffies;
@@ -1117,9 +1092,6 @@ static int __init usb_init(void)
 	if (retval)
 		goto out;
 
-	retval = ksuspend_usb_init();
-	if (retval)
-		goto out;
 	retval = bus_register(&usb_bus_type);
 	if (retval)
 		goto bus_register_failed;
@@ -1159,7 +1131,7 @@ major_init_failed:
 bus_notifier_failed:
 	bus_unregister(&usb_bus_type);
 bus_register_failed:
-	ksuspend_usb_cleanup();
+	usb_debugfs_cleanup();
 out:
 	return retval;
 }
@@ -1181,7 +1153,6 @@ static void __exit usb_exit(void)
 	usb_hub_cleanup();
 	bus_unregister_notifier(&usb_bus_type, &usb_bus_nb);
 	bus_unregister(&usb_bus_type);
-	ksuspend_usb_cleanup();
 	usb_debugfs_cleanup();
 }
 
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index 2b74a7f99c41..cd882203ad34 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -55,25 +55,8 @@ extern void usb_major_cleanup(void);
 extern int usb_suspend(struct device *dev, pm_message_t msg);
 extern int usb_resume(struct device *dev, pm_message_t msg);
 
-extern void usb_autosuspend_work(struct work_struct *work);
-extern void usb_autoresume_work(struct work_struct *work);
 extern int usb_port_suspend(struct usb_device *dev, pm_message_t msg);
 extern int usb_port_resume(struct usb_device *dev, pm_message_t msg);
-extern int usb_external_suspend_device(struct usb_device *udev,
-		pm_message_t msg);
-extern int usb_external_resume_device(struct usb_device *udev,
-		pm_message_t msg);
-extern int usb_remote_wakeup(struct usb_device *dev);
-
-static inline void usb_pm_lock(struct usb_device *udev)
-{
-	mutex_lock_nested(&udev->pm_mutex, udev->level);
-}
-
-static inline void usb_pm_unlock(struct usb_device *udev)
-{
-	mutex_unlock(&udev->pm_mutex);
-}
 
 #else
 
@@ -87,14 +70,6 @@ static inline int usb_port_resume(struct usb_device *udev, pm_message_t msg)
 	return 0;
 }
 
-static inline int usb_remote_wakeup(struct usb_device *udev)
-{
-	return 0;
-}
-
-static inline void usb_pm_lock(struct usb_device *udev) {}
-static inline void usb_pm_unlock(struct usb_device *udev) {}
-
 #endif
 
 #ifdef CONFIG_USB_SUSPEND
@@ -102,6 +77,7 @@ static inline void usb_pm_unlock(struct usb_device *udev) {}
 extern void usb_autosuspend_device(struct usb_device *udev);
 extern void usb_try_autosuspend_device(struct usb_device *udev);
 extern int usb_autoresume_device(struct usb_device *udev);
+extern int usb_remote_wakeup(struct usb_device *dev);
 
 #else
 
@@ -112,9 +88,13 @@ static inline int usb_autoresume_device(struct usb_device *udev)
 	return 0;
 }
 
+static inline int usb_remote_wakeup(struct usb_device *udev)
+{
+	return 0;
+}
+
 #endif
 
-extern struct workqueue_struct *ksuspend_usb_wq;
 extern struct bus_type usb_bus_type;
 extern struct device_type usb_device_type;
 extern struct device_type usb_if_device_type;
@@ -144,23 +124,6 @@ static inline int is_usb_device_driver(struct device_driver *drv)
 			for_devices;
 }
 
-/* Interfaces and their "power state" are owned by usbcore */
-
-static inline void mark_active(struct usb_interface *f)
-{
-	f->is_active = 1;
-}
-
-static inline void mark_quiesced(struct usb_interface *f)
-{
-	f->is_active = 0;
-}
-
-static inline int is_active(const struct usb_interface *f)
-{
-	return f->is_active;
-}
-
 
 /* for labeling diagnostics */
 extern const char *usbcore_name;
diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
index 3dab0c0b196f..707a87da77f8 100644
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -1580,10 +1580,6 @@ usbtest_ioctl (struct usb_interface *intf, unsigned int code, void *buf)
 		return -ERESTARTSYS;
 
 	/* FIXME: What if a system sleep starts while a test is running? */
-	if (!intf->is_active) {
-		mutex_unlock(&dev->lock);
-		return -EHOSTUNREACH;
-	}
 
 	/* some devices, like ez-usb default devices, need a non-default
 	 * altsetting to have any active endpoints.  some tests change
diff --git a/include/linux/usb.h b/include/linux/usb.h
index e6419ac89ea2..ad50fc8a7ad3 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -122,7 +122,6 @@ enum usb_interface_condition {
  *	number from the USB core by calling usb_register_dev().
  * @condition: binding state of the interface: not bound, binding
  *	(in probe()), bound to a driver, or unbinding (in disconnect())
- * @is_active: flag set when the interface is bound and not suspended.
  * @sysfs_files_created: sysfs attributes exist
  * @ep_devs_created: endpoint child pseudo-devices exist
  * @unregistering: flag set when the interface is being unregistered
@@ -135,8 +134,7 @@ enum usb_interface_condition {
  * @dev: driver model's view of this device
  * @usb_dev: if an interface is bound to the USB major, this will point
  *	to the sysfs representation for that device.
- * @pm_usage_cnt: PM usage counter for this interface; autosuspend is not
- *	allowed unless the counter is 0.
+ * @pm_usage_cnt: PM usage counter for this interface
  * @reset_ws: Used for scheduling resets from atomic context.
  * @reset_running: set to 1 if the interface is currently running a
  *      queued reset so that usb_cancel_queued_reset() doesn't try to
@@ -184,7 +182,6 @@ struct usb_interface {
 	int minor;			/* minor number this interface is
 					 * bound to */
 	enum usb_interface_condition condition;		/* state of binding */
-	unsigned is_active:1;		/* the interface is not suspended */
 	unsigned sysfs_files_created:1;	/* the sysfs attributes exist */
 	unsigned ep_devs_created:1;	/* endpoint "devices" exist */
 	unsigned unregistering:1;	/* unregistration is in progress */
@@ -401,7 +398,6 @@ struct usb_tt;
  * @portnum: parent port number (origin 1)
  * @level: number of USB hub ancestors
  * @can_submit: URBs may be submitted
- * @discon_suspended: disconnected while suspended
  * @persist_enabled:  USB_PERSIST enabled for this device
  * @have_langid: whether string_langid is valid
  * @authorized: policy has said we can use it;
@@ -421,20 +417,15 @@ struct usb_tt;
  * @usbfs_dentry: usbfs dentry entry for the device
  * @maxchild: number of ports if hub
  * @children: child devices - USB devices that are attached to this hub
- * @pm_usage_cnt: usage counter for autosuspend
  * @quirks: quirks of the whole device
  * @urbnum: number of URBs submitted for the whole device
  * @active_duration: total time device is not suspended
- * @autosuspend: for delayed autosuspends
- * @autoresume: for autoresumes requested while in_interrupt
- * @pm_mutex: protects PM operations
  * @last_busy: time of last use
  * @autosuspend_delay: in jiffies
  * @connect_time: time device was first connected
  * @do_remote_wakeup:  remote wakeup should be enabled
  * @reset_resume: needs reset instead of resume
  * @autosuspend_disabled: autosuspend disabled by the user
- * @skip_sys_resume: skip the next system resume
  * @wusb_dev: if this is a Wireless USB device, link to the WUSB
  *	specific data for the device.
  * @slot_id: Slot ID assigned by xHCI
@@ -475,7 +466,6 @@ struct usb_device {
 	u8 level;
 
 	unsigned can_submit:1;
-	unsigned discon_suspended:1;
 	unsigned persist_enabled:1;
 	unsigned have_langid:1;
 	unsigned authorized:1;
@@ -499,17 +489,12 @@ struct usb_device {
 	int maxchild;
 	struct usb_device *children[USB_MAXCHILDREN];
 
-	int pm_usage_cnt;
 	u32 quirks;
 	atomic_t urbnum;
 
 	unsigned long active_duration;
 
 #ifdef CONFIG_PM
-	struct delayed_work autosuspend;
-	struct work_struct autoresume;
-	struct mutex pm_mutex;
-
 	unsigned long last_busy;
 	int autosuspend_delay;
 	unsigned long connect_time;
@@ -517,7 +502,6 @@ struct usb_device {
 	unsigned do_remote_wakeup:1;
 	unsigned reset_resume:1;
 	unsigned autosuspend_disabled:1;
-	unsigned skip_sys_resume:1;
 #endif
 	struct wusb_dev *wusb_dev;
 	int slot_id;
@@ -549,17 +533,8 @@ extern int usb_autopm_get_interface(struct usb_interface *intf);
 extern void usb_autopm_put_interface(struct usb_interface *intf);
 extern int usb_autopm_get_interface_async(struct usb_interface *intf);
 extern void usb_autopm_put_interface_async(struct usb_interface *intf);
-
-static inline void usb_autopm_get_interface_no_resume(
-		struct usb_interface *intf)
-{
-	atomic_inc(&intf->pm_usage_cnt);
-}
-static inline void usb_autopm_put_interface_no_suspend(
-		struct usb_interface *intf)
-{
-	atomic_dec(&intf->pm_usage_cnt);
-}
+extern void usb_autopm_get_interface_no_resume(struct usb_interface *intf);
+extern void usb_autopm_put_interface_no_suspend(struct usb_interface *intf);
 
 static inline void usb_mark_last_busy(struct usb_device *udev)
 {
-- 
cgit v1.2.3


From c58bfa6b97731590e42cba6bd13829c4e480992f Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@nokia.com>
Date: Thu, 21 Jan 2010 15:33:55 +0200
Subject: USB: musb: deprecate what we don't use

after 2.6.34, those fields will be removed from
struct musb_hdrc_platform_data, it's expected
that other architectures are fixed by then.

Signed-off-by: Felipe Balbi <felipe.balbi@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/musb.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index 4b7f8fa252f0..8e0f2bc8d8c9 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -30,26 +30,26 @@ struct musb_hdrc_eps_bits {
 struct musb_hdrc_config {
 	/* MUSB configuration-specific details */
 	unsigned	multipoint:1;	/* multipoint device */
-	unsigned	dyn_fifo:1;	/* supports dynamic fifo sizing */
-	unsigned	soft_con:1;	/* soft connect required */
-	unsigned	utm_16:1;	/* utm data witdh is 16 bits */
+	unsigned	dyn_fifo:1 __deprecated; /* supports dynamic fifo sizing */
+	unsigned	soft_con:1 __deprecated; /* soft connect required */
+	unsigned	utm_16:1 __deprecated; /* utm data witdh is 16 bits */
 	unsigned	big_endian:1;	/* true if CPU uses big-endian */
 	unsigned	mult_bulk_tx:1;	/* Tx ep required for multbulk pkts */
 	unsigned	mult_bulk_rx:1;	/* Rx ep required for multbulk pkts */
 	unsigned	high_iso_tx:1;	/* Tx ep required for HB iso */
 	unsigned	high_iso_rx:1;	/* Rx ep required for HD iso */
-	unsigned	dma:1;		/* supports DMA */
-	unsigned	vendor_req:1;	/* vendor registers required */
+	unsigned	dma:1 __deprecated; /* supports DMA */
+	unsigned	vendor_req:1 __deprecated; /* vendor registers required */
 
 	u8		num_eps;	/* number of endpoints _with_ ep0 */
-	u8		dma_channels;	/* number of dma channels */
+	u8		dma_channels __deprecated; /* number of dma channels */
 	u8		dyn_fifo_size;	/* dynamic size in bytes */
-	u8		vendor_ctrl;	/* vendor control reg width */
-	u8		vendor_stat;	/* vendor status reg witdh */
-	u8		dma_req_chan;	/* bitmask for required dma channels */
+	u8		vendor_ctrl __deprecated; /* vendor control reg width */
+	u8		vendor_stat __deprecated; /* vendor status reg witdh */
+	u8		dma_req_chan __deprecated; /* bitmask for required dma channels */
 	u8		ram_bits;	/* ram address size */
 
-	struct musb_hdrc_eps_bits *eps_bits;
+	struct musb_hdrc_eps_bits *eps_bits __deprecated;
 #ifdef CONFIG_BLACKFIN
         /* A GPIO controlling VRSEL in Blackfin */
         unsigned int    gpio_vrsel;
-- 
cgit v1.2.3


From 640e95abdfae9fef5949084c92e80c8f2f8b5ec5 Mon Sep 17 00:00:00 2001
From: Eirik Aanonsen <EAA@wprmedical.com>
Date: Fri, 5 Feb 2010 09:49:25 +0100
Subject: USB: atmel uaba: Adding invert vbus_pin

Adding vbus_pin_inverted so that the usb detect pin can be active high
or low depending on HW implementation also replaced the
gpio_get_value(udc->vbus_pin); with a call to vbus_is_present(udc); This
allows the driver to be loaded and save about 0,15W on the consumption.

Signed-off-by: Eirik Aanonsen <eaa@wprmedical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/avr32/mach-at32ap/at32ap700x.c | 7 +++++--
 drivers/usb/gadget/atmel_usba_udc.c | 5 +++--
 drivers/usb/gadget/atmel_usba_udc.h | 1 +
 include/linux/usb/atmel_usba_udc.h  | 1 +
 4 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index b13d1879e51b..3a4bc1a18433 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -1770,10 +1770,13 @@ at32_add_device_usba(unsigned int id, struct usba_platform_data *data)
 					  ARRAY_SIZE(usba0_resource)))
 		goto out_free_pdev;
 
-	if (data)
+	if (data) {
 		usba_data.pdata.vbus_pin = data->vbus_pin;
-	else
+		usba_data.pdata.vbus_pin_inverted = data->vbus_pin_inverted;
+	} else {
 		usba_data.pdata.vbus_pin = -EINVAL;
+		usba_data.pdata.vbus_pin_inverted = -EINVAL;
+	}
 
 	data = &usba_data.pdata;
 	data->num_ep = ARRAY_SIZE(at32_usba_ep);
diff --git a/drivers/usb/gadget/atmel_usba_udc.c b/drivers/usb/gadget/atmel_usba_udc.c
index 976822f50c70..f79bdfe4bed9 100644
--- a/drivers/usb/gadget/atmel_usba_udc.c
+++ b/drivers/usb/gadget/atmel_usba_udc.c
@@ -320,7 +320,7 @@ static inline void usba_cleanup_debugfs(struct usba_udc *udc)
 static int vbus_is_present(struct usba_udc *udc)
 {
 	if (gpio_is_valid(udc->vbus_pin))
-		return gpio_get_value(udc->vbus_pin);
+		return gpio_get_value(udc->vbus_pin) ^ udc->vbus_pin_inverted;
 
 	/* No Vbus detection: Assume always present */
 	return 1;
@@ -1763,7 +1763,7 @@ static irqreturn_t usba_vbus_irq(int irq, void *devid)
 	if (!udc->driver)
 		goto out;
 
-	vbus = gpio_get_value(udc->vbus_pin);
+	vbus = vbus_is_present(udc);
 	if (vbus != udc->vbus_prev) {
 		if (vbus) {
 			toggle_bias(1);
@@ -2000,6 +2000,7 @@ static int __init usba_udc_probe(struct platform_device *pdev)
 	if (gpio_is_valid(pdata->vbus_pin)) {
 		if (!gpio_request(pdata->vbus_pin, "atmel_usba_udc")) {
 			udc->vbus_pin = pdata->vbus_pin;
+			udc->vbus_pin_inverted = pdata->vbus_pin_inverted;
 
 			ret = request_irq(gpio_to_irq(udc->vbus_pin),
 					usba_vbus_irq, 0,
diff --git a/drivers/usb/gadget/atmel_usba_udc.h b/drivers/usb/gadget/atmel_usba_udc.h
index f7baea307f0d..88a2e07a11a8 100644
--- a/drivers/usb/gadget/atmel_usba_udc.h
+++ b/drivers/usb/gadget/atmel_usba_udc.h
@@ -323,6 +323,7 @@ struct usba_udc {
 	struct platform_device *pdev;
 	int irq;
 	int vbus_pin;
+	int vbus_pin_inverted;
 	struct clk *pclk;
 	struct clk *hclk;
 
diff --git a/include/linux/usb/atmel_usba_udc.h b/include/linux/usb/atmel_usba_udc.h
index 6311fa2d9f82..baf41c8616e9 100644
--- a/include/linux/usb/atmel_usba_udc.h
+++ b/include/linux/usb/atmel_usba_udc.h
@@ -15,6 +15,7 @@ struct usba_ep_data {
 
 struct usba_platform_data {
 	int			vbus_pin;
+	int		 	vbus_pin_inverted;
 	int			num_ep;
 	struct usba_ep_data	ep[0];
 };
-- 
cgit v1.2.3


From efcbd3df079a6f8a8a2d5207c4e8429e02356c79 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 5 Feb 2010 18:09:49 -0800
Subject: USB: Extend and neaten dbg macros

Add format/argument validation for #ifndef DEBUG dbg macro
Neaten dbg macro definitions

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h        | 14 +++++++++-----
 include/linux/usb/serial.h | 13 +++++--------
 2 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index ad50fc8a7ad3..3492abf82e75 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1566,14 +1566,18 @@ extern void usb_register_notify(struct notifier_block *nb);
 extern void usb_unregister_notify(struct notifier_block *nb);
 
 #ifdef DEBUG
-#define dbg(format, arg...) printk(KERN_DEBUG "%s: " format "\n" , \
-	__FILE__ , ## arg)
+#define dbg(format, arg...)						\
+	printk(KERN_DEBUG "%s: " format "\n", __FILE__, ##arg)
 #else
-#define dbg(format, arg...) do {} while (0)
+#define dbg(format, arg...)						\
+do {									\
+	if (0)								\
+		printk(KERN_DEBUG "%s: " format "\n", __FILE__, ##arg); \
+} while (0)
 #endif
 
-#define err(format, arg...) printk(KERN_ERR KBUILD_MODNAME ": " \
-	format "\n" , ## arg)
+#define err(format, arg...)					\
+	printk(KERN_ERR KBUILD_MODNAME ": " format "\n", ##arg)
 
 /* debugfs stuff */
 extern struct dentry *usb_debug_root;
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 1819396ed501..0a458b861933 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -351,14 +351,11 @@ static inline void usb_serial_debug_data(int debug,
 
 /* Use our own dbg macro */
 #undef dbg
-#define dbg(format, arg...) \
-	do { \
-		if (debug) \
-			printk(KERN_DEBUG "%s: " format "\n" , __FILE__ , \
-				## arg); \
-	} while (0)
-
-
+#define dbg(format, arg...)						\
+do {									\
+	if (debug)							\
+		printk(KERN_DEBUG "%s: " format "\n", __FILE__, ##arg);	\
+} while (0)
 
 #endif /* __LINUX_USB_SERIAL_H */
 
-- 
cgit v1.2.3


From 6d61ae9112960a2b3ed3360602dfb3bfd357954f Mon Sep 17 00:00:00 2001
From: Dennis O'Brien <dennis.obrien@eqware.net>
Date: Mon, 15 Feb 2010 08:50:38 -0800
Subject: USB: vstusb.c: removal of driver for Vernier Software & Technology,
 Inc., devices and spectrometers

This patch removes the vstusb driver and support from the Linux tree.
This driver provided support for Vernier Software & Technology devices
and spectrometers (Ocean Optics). This driver is being replaced by a
user space - libusb - implementation.

Signed-off-by: Jim Collar <jim.collar@eqware.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/ioctl/ioctl-number.txt |   1 -
 drivers/usb/misc/Kconfig             |  14 -
 drivers/usb/misc/Makefile            |   1 -
 drivers/usb/misc/vstusb.c            | 783 -----------------------------------
 include/linux/usb/Kbuild             |   1 -
 include/linux/usb/vstusb.h           |  71 ----
 6 files changed, 871 deletions(-)
 delete mode 100644 drivers/usb/misc/vstusb.c
 delete mode 100644 include/linux/usb/vstusb.h

(limited to 'include')

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 35cf64d4436d..35c9b51d20ea 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -139,7 +139,6 @@ Code  Seq#(hex)	Include File		Comments
 'K'	all	linux/kd.h
 'L'	00-1F	linux/loop.h		conflict!
 'L'	10-1F	drivers/scsi/mpt2sas/mpt2sas_ctl.h	conflict!
-'L'	20-2F	linux/usb/vstusb.h
 'L'	E0-FF	linux/ppdd.h		encrypted disk device driver
 					<http://linux01.gwdg.de/~alatham/ppdd.html>
 'M'	all	linux/soundcard.h	conflict!
diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig
index ef9bbef7a88b..55660eaf947c 100644
--- a/drivers/usb/misc/Kconfig
+++ b/drivers/usb/misc/Kconfig
@@ -231,17 +231,3 @@ config USB_ISIGHTFW
 	  driver beforehand. Tools for doing so are available at
 	  http://bersace03.free.fr
 
-config USB_VST
-	tristate "USB VST driver"
-	depends on USB
-	help
-	  This driver is intended for Vernier Software Technologies
-	  bulk usb devices such as their Ocean-Optics spectrometers or
-	  Labquest.
-	  It is a bulk channel driver with configurable read and write
-	  timeouts.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called vstusb.
-
-
diff --git a/drivers/usb/misc/Makefile b/drivers/usb/misc/Makefile
index 36dd40dda1b3..717703e81425 100644
--- a/drivers/usb/misc/Makefile
+++ b/drivers/usb/misc/Makefile
@@ -22,7 +22,6 @@ obj-$(CONFIG_USB_TEST)		+= usbtest.o
 obj-$(CONFIG_USB_TRANCEVIBRATOR)	+= trancevibrator.o
 obj-$(CONFIG_USB_USS720)	+= uss720.o
 obj-$(CONFIG_USB_SEVSEG)	+= usbsevseg.o
-obj-$(CONFIG_USB_VST)		+= vstusb.o
 
 obj-$(CONFIG_USB_SISUSBVGA)	+= sisusbvga/
 
diff --git a/drivers/usb/misc/vstusb.c b/drivers/usb/misc/vstusb.c
deleted file mode 100644
index 874c81bb27b9..000000000000
--- a/drivers/usb/misc/vstusb.c
+++ /dev/null
@@ -1,783 +0,0 @@
-/*****************************************************************************
- *  File: drivers/usb/misc/vstusb.c
- *
- *  Purpose: Support for the bulk USB Vernier Spectrophotometers
- *
- *  Author:     Johnnie Peters
- *              Axian Consulting
- *              Beaverton, OR, USA 97005
- *
- *  Modified by:     EQware Engineering, Inc.
- *                   Oregon City, OR, USA 97045
- *
- *  Copyright:  2007, 2008
- *              Vernier Software & Technology
- *              Beaverton, OR, USA 97005
- *
- *  Web:        www.vernier.com
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- *
- *****************************************************************************/
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/uaccess.h>
-#include <linux/usb.h>
-
-#include <linux/usb/vstusb.h>
-
-#define DRIVER_VERSION "VST USB Driver Version 1.5"
-#define DRIVER_DESC "Vernier Software Technology Bulk USB Driver"
-
-#ifdef CONFIG_USB_DYNAMIC_MINORS
-	#define VSTUSB_MINOR_BASE	0
-#else
-	#define VSTUSB_MINOR_BASE	199
-#endif
-
-#define USB_VENDOR_OCEANOPTICS	0x2457
-#define USB_VENDOR_VERNIER	0x08F7	/* Vernier Software & Technology */
-
-#define USB_PRODUCT_USB2000	0x1002
-#define USB_PRODUCT_ADC1000_FW	0x1003	/* firmware download (renumerates) */
-#define USB_PRODUCT_ADC1000	0x1004
-#define USB_PRODUCT_HR2000_FW	0x1009	/* firmware download (renumerates) */
-#define USB_PRODUCT_HR2000	0x100A
-#define USB_PRODUCT_HR4000_FW	0x1011	/* firmware download (renumerates) */
-#define USB_PRODUCT_HR4000	0x1012
-#define USB_PRODUCT_USB650	0x1014	/* "Red Tide" */
-#define USB_PRODUCT_QE65000	0x1018
-#define USB_PRODUCT_USB4000	0x1022
-#define USB_PRODUCT_USB325	0x1024	/* "Vernier Spectrometer" */
-
-#define USB_PRODUCT_LABPRO	0x0001
-#define USB_PRODUCT_LABQUEST	0x0005
-
-#define VST_MAXBUFFER		(64*1024)
-
-static const struct usb_device_id id_table[] = {
-	{ USB_DEVICE(USB_VENDOR_OCEANOPTICS, USB_PRODUCT_USB2000)},
-	{ USB_DEVICE(USB_VENDOR_OCEANOPTICS, USB_PRODUCT_HR4000)},
-	{ USB_DEVICE(USB_VENDOR_OCEANOPTICS, USB_PRODUCT_USB650)},
-	{ USB_DEVICE(USB_VENDOR_OCEANOPTICS, USB_PRODUCT_USB4000)},
-	{ USB_DEVICE(USB_VENDOR_OCEANOPTICS, USB_PRODUCT_USB325)},
-	{ USB_DEVICE(USB_VENDOR_VERNIER, USB_PRODUCT_LABQUEST)},
-	{ USB_DEVICE(USB_VENDOR_VERNIER, USB_PRODUCT_LABPRO)},
-	{},
-};
-
-MODULE_DEVICE_TABLE(usb, id_table);
-
-struct vstusb_device {
-	struct kref				kref;
-	struct mutex            lock;
-	struct usb_device       *usb_dev;
-	char                    present;
-	char                    isopen;
-	struct usb_anchor       submitted;
-	int                     rd_pipe;
-	int                     rd_timeout_ms;
-	int                     wr_pipe;
-	int                     wr_timeout_ms;
-};
-#define to_vst_dev(d) container_of(d, struct vstusb_device, kref)
-
-static struct usb_driver vstusb_driver;
-
-static void vstusb_delete(struct kref *kref)
-{
-	struct vstusb_device *vstdev = to_vst_dev(kref);
-
-	usb_put_dev(vstdev->usb_dev);
-	kfree(vstdev);
-}
-
-static int vstusb_open(struct inode *inode, struct file *file)
-{
-	struct vstusb_device *vstdev;
-	struct usb_interface *interface;
-
-	interface = usb_find_interface(&vstusb_driver, iminor(inode));
-
-	if (!interface) {
-		printk(KERN_ERR KBUILD_MODNAME
-		       ": %s - error, can't find device for minor %d\n",
-		       __func__, iminor(inode));
-		return -ENODEV;
-	}
-
-	vstdev = usb_get_intfdata(interface);
-
-	if (!vstdev)
-		return -ENODEV;
-
-	/* lock this device */
-	mutex_lock(&vstdev->lock);
-
-	/* can only open one time */
-	if ((!vstdev->present) || (vstdev->isopen)) {
-		mutex_unlock(&vstdev->lock);
-		return -EBUSY;
-	}
-
-	/* increment our usage count */
-	kref_get(&vstdev->kref);
-
-	vstdev->isopen = 1;
-
-	/* save device in the file's private structure */
-	file->private_data = vstdev;
-
-	dev_dbg(&vstdev->usb_dev->dev, "%s: opened\n", __func__);
-
-	mutex_unlock(&vstdev->lock);
-
-	return 0;
-}
-
-static int vstusb_release(struct inode *inode, struct file *file)
-{
-	struct vstusb_device *vstdev;
-
-	vstdev = file->private_data;
-
-	if (vstdev == NULL)
-		return -ENODEV;
-
-	mutex_lock(&vstdev->lock);
-
-	vstdev->isopen = 0;
-
-	dev_dbg(&vstdev->usb_dev->dev, "%s: released\n", __func__);
-
-	mutex_unlock(&vstdev->lock);
-
-	kref_put(&vstdev->kref, vstusb_delete);
-
-	return 0;
-}
-
-static void usb_api_blocking_completion(struct urb *urb)
-{
-	struct completion *completeit = urb->context;
-
-	complete(completeit);
-}
-
-static int vstusb_fill_and_send_urb(struct urb *urb,
-				    struct usb_device *usb_dev,
-				    unsigned int pipe, void *data,
-				    unsigned int len, struct completion *done)
-{
-	struct usb_host_endpoint *ep;
-	struct usb_host_endpoint **hostep;
-	unsigned int pipend;
-
-	int status;
-
-	hostep = usb_pipein(pipe) ? usb_dev->ep_in : usb_dev->ep_out;
-	pipend = usb_pipeendpoint(pipe);
-	ep = hostep[pipend];
-
-	if (!ep || (len == 0))
-		return -EINVAL;
-
-	if ((ep->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
-	    == USB_ENDPOINT_XFER_INT) {
-		pipe = (pipe & ~(3 << 30)) | (PIPE_INTERRUPT << 30);
-		usb_fill_int_urb(urb, usb_dev, pipe, data, len,
-				 (usb_complete_t)usb_api_blocking_completion,
-				 NULL, ep->desc.bInterval);
-	} else
-		usb_fill_bulk_urb(urb, usb_dev, pipe, data, len,
-				  (usb_complete_t)usb_api_blocking_completion,
-				  NULL);
-
-	init_completion(done);
-	urb->context = done;
-	urb->actual_length = 0;
-	status = usb_submit_urb(urb, GFP_KERNEL);
-
-	return status;
-}
-
-static int vstusb_complete_urb(struct urb *urb, struct completion *done,
-			       int timeout, int *actual_length)
-{
-	unsigned long expire;
-	int status;
-
-	expire = timeout ? msecs_to_jiffies(timeout) : MAX_SCHEDULE_TIMEOUT;
-	if (!wait_for_completion_interruptible_timeout(done, expire)) {
-		usb_kill_urb(urb);
-		status = urb->status == -ENOENT ? -ETIMEDOUT : urb->status;
-
-		dev_dbg(&urb->dev->dev,
-			"%s timed out on ep%d%s len=%d/%d, urb status = %d\n",
-			current->comm,
-			usb_pipeendpoint(urb->pipe),
-			usb_pipein(urb->pipe) ? "in" : "out",
-			urb->actual_length,
-			urb->transfer_buffer_length,
-			urb->status);
-
-	} else {
-		if (signal_pending(current)) {
-			/* if really an error */
-			if (urb->status && !((urb->status == -ENOENT)     ||
-					     (urb->status == -ECONNRESET) ||
-					     (urb->status == -ESHUTDOWN))) {
-				status = -EINTR;
-				usb_kill_urb(urb);
-			} else {
-				status = 0;
-			}
-
-			dev_dbg(&urb->dev->dev,
-				"%s: signal pending on ep%d%s len=%d/%d,"
-				"urb status = %d\n",
-				current->comm,
-				usb_pipeendpoint(urb->pipe),
-				usb_pipein(urb->pipe) ? "in" : "out",
-				urb->actual_length,
-				urb->transfer_buffer_length,
-				urb->status);
-
-		} else {
-			status = urb->status;
-		}
-	}
-
-	if (actual_length)
-		*actual_length = urb->actual_length;
-
-	return status;
-}
-
-static ssize_t vstusb_read(struct file *file, char __user *buffer,
-			   size_t count, loff_t *ppos)
-{
-	struct vstusb_device *vstdev;
-	int cnt = -1;
-	void *buf;
-	int retval = 0;
-
-	struct urb              *urb;
-	struct usb_device       *dev;
-	unsigned int            pipe;
-	int                     timeout;
-
-	DECLARE_COMPLETION_ONSTACK(done);
-
-	vstdev = file->private_data;
-
-	if (vstdev == NULL)
-		return -ENODEV;
-
-	/* verify that we actually want to read some data */
-	if ((count == 0) || (count > VST_MAXBUFFER))
-		return -EINVAL;
-
-	/* lock this object */
-	if (mutex_lock_interruptible(&vstdev->lock))
-		return -ERESTARTSYS;
-
-	/* anyone home */
-	if (!vstdev->present) {
-		mutex_unlock(&vstdev->lock);
-		printk(KERN_ERR KBUILD_MODNAME
-		       ": %s: device not present\n", __func__);
-		return -ENODEV;
-	}
-
-	/* pull out the necessary data */
-	dev =     vstdev->usb_dev;
-	pipe =    usb_rcvbulkpipe(dev, vstdev->rd_pipe);
-	timeout = vstdev->rd_timeout_ms;
-
-	buf = kmalloc(count, GFP_KERNEL);
-	if (buf == NULL) {
-		mutex_unlock(&vstdev->lock);
-		return -ENOMEM;
-	}
-
-	urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!urb) {
-		kfree(buf);
-		mutex_unlock(&vstdev->lock);
-		return -ENOMEM;
-	}
-
-	usb_anchor_urb(urb, &vstdev->submitted);
-	retval = vstusb_fill_and_send_urb(urb, dev, pipe, buf, count, &done);
-	mutex_unlock(&vstdev->lock);
-	if (retval) {
-		usb_unanchor_urb(urb);
-		dev_err(&dev->dev, "%s: error %d filling and sending urb %d\n",
-			__func__, retval, pipe);
-		goto exit;
-	}
-
-	retval = vstusb_complete_urb(urb, &done, timeout, &cnt);
-	if (retval) {
-		dev_err(&dev->dev, "%s: error %d completing urb %d\n",
-			__func__, retval, pipe);
-		goto exit;
-	}
-
-	if (copy_to_user(buffer, buf, cnt)) {
-		dev_err(&dev->dev, "%s: can't copy_to_user\n", __func__);
-		retval = -EFAULT;
-	} else {
-		retval = cnt;
-		dev_dbg(&dev->dev, "%s: read %d bytes from pipe %d\n",
-			__func__, cnt, pipe);
-	}
-
-exit:
-	usb_free_urb(urb);
-	kfree(buf);
-	return retval;
-}
-
-static ssize_t vstusb_write(struct file *file, const char __user *buffer,
-			    size_t count, loff_t *ppos)
-{
-	struct vstusb_device *vstdev;
-	int cnt = -1;
-	void *buf;
-	int retval = 0;
-
-	struct urb              *urb;
-	struct usb_device       *dev;
-	unsigned int            pipe;
-	int                     timeout;
-
-	DECLARE_COMPLETION_ONSTACK(done);
-
-	vstdev = file->private_data;
-
-	if (vstdev == NULL)
-		return -ENODEV;
-
-	/* verify that we actually have some data to write */
-	if ((count == 0) || (count > VST_MAXBUFFER))
-		return retval;
-
-	/* lock this object */
-	if (mutex_lock_interruptible(&vstdev->lock))
-		return -ERESTARTSYS;
-
-	/* anyone home */
-	if (!vstdev->present) {
-		mutex_unlock(&vstdev->lock);
-		printk(KERN_ERR KBUILD_MODNAME
-		       ": %s: device not present\n", __func__);
-		return -ENODEV;
-	}
-
-	/* pull out the necessary data */
-	dev =     vstdev->usb_dev;
-	pipe =    usb_sndbulkpipe(dev, vstdev->wr_pipe);
-	timeout = vstdev->wr_timeout_ms;
-
-	buf = kmalloc(count, GFP_KERNEL);
-	if (buf == NULL) {
-		mutex_unlock(&vstdev->lock);
-		return -ENOMEM;
-	}
-
-	urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!urb) {
-		kfree(buf);
-		mutex_unlock(&vstdev->lock);
-		return -ENOMEM;
-	}
-
-	if (copy_from_user(buf, buffer, count)) {
-		mutex_unlock(&vstdev->lock);
-		dev_err(&dev->dev, "%s: can't copy_from_user\n", __func__);
-		retval = -EFAULT;
-		goto exit;
-	}
-
-	usb_anchor_urb(urb, &vstdev->submitted);
-	retval = vstusb_fill_and_send_urb(urb, dev, pipe, buf, count, &done);
-	mutex_unlock(&vstdev->lock);
-	if (retval) {
-		usb_unanchor_urb(urb);
-		dev_err(&dev->dev, "%s: error %d filling and sending urb %d\n",
-			__func__, retval, pipe);
-		goto exit;
-	}
-
-	retval = vstusb_complete_urb(urb, &done, timeout, &cnt);
-	if (retval) {
-		dev_err(&dev->dev, "%s: error %d completing urb %d\n",
-			__func__, retval, pipe);
-		goto exit;
-	} else {
-		retval = cnt;
-		dev_dbg(&dev->dev, "%s: sent %d bytes to pipe %d\n",
-			__func__, cnt, pipe);
-	}
-
-exit:
-	usb_free_urb(urb);
-	kfree(buf);
-	return retval;
-}
-
-static long vstusb_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	int retval = 0;
-	int cnt = -1;
-	void __user *data = (void __user *)arg;
-	struct vstusb_args usb_data;
-
-	struct vstusb_device *vstdev;
-	void *buffer = NULL; /* must be initialized. buffer is
-			      *	referenced on exit but not all
-			      * ioctls allocate it */
-
-	struct urb              *urb = NULL; /* must be initialized. urb is
-					      *	referenced on exit but not all
-					      * ioctls allocate it */
-	struct usb_device       *dev;
-	unsigned int            pipe;
-	int                     timeout;
-
-	DECLARE_COMPLETION_ONSTACK(done);
-
-	vstdev = file->private_data;
-
-	if (_IOC_TYPE(cmd) != VST_IOC_MAGIC) {
-		dev_warn(&vstdev->usb_dev->dev,
-			 "%s: ioctl command %x, bad ioctl magic %x, "
-			 "expected %x\n", __func__, cmd,
-			 _IOC_TYPE(cmd), VST_IOC_MAGIC);
-		return -EINVAL;
-	}
-
-	if (vstdev == NULL)
-		return -ENODEV;
-
-	if (copy_from_user(&usb_data, data, sizeof(struct vstusb_args))) {
-		dev_err(&vstdev->usb_dev->dev, "%s: can't copy_from_user\n",
-			__func__);
-		return -EFAULT;
-	}
-
-	/* lock this object */
-	if (mutex_lock_interruptible(&vstdev->lock)) {
-		retval = -ERESTARTSYS;
-		goto exit;
-	}
-
-	/* anyone home */
-	if (!vstdev->present) {
-		mutex_unlock(&vstdev->lock);
-		dev_err(&vstdev->usb_dev->dev, "%s: device not present\n",
-			__func__);
-		retval = -ENODEV;
-		goto exit;
-	}
-
-	/* pull out the necessary data */
-	dev = vstdev->usb_dev;
-
-	switch (cmd) {
-
-	case IOCTL_VSTUSB_CONFIG_RW:
-
-		vstdev->rd_pipe = usb_data.rd_pipe;
-		vstdev->rd_timeout_ms = usb_data.rd_timeout_ms;
-		vstdev->wr_pipe = usb_data.wr_pipe;
-		vstdev->wr_timeout_ms = usb_data.wr_timeout_ms;
-
-		mutex_unlock(&vstdev->lock);
-
-		dev_dbg(&dev->dev, "%s: setting pipes/timeouts, "
-			"rdpipe = %d, rdtimeout = %d, "
-			"wrpipe = %d, wrtimeout = %d\n", __func__,
-			vstdev->rd_pipe, vstdev->rd_timeout_ms,
-			vstdev->wr_pipe, vstdev->wr_timeout_ms);
-		break;
-
-	case IOCTL_VSTUSB_SEND_PIPE:
-
-		if ((usb_data.count == 0) || (usb_data.count > VST_MAXBUFFER)) {
-			mutex_unlock(&vstdev->lock);
-			retval = -EINVAL;
-			goto exit;
-		}
-
-		buffer = kmalloc(usb_data.count, GFP_KERNEL);
-		if (buffer == NULL) {
-			mutex_unlock(&vstdev->lock);
-			retval = -ENOMEM;
-			goto exit;
-		}
-
-		urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (!urb) {
-			mutex_unlock(&vstdev->lock);
-			retval = -ENOMEM;
-			goto exit;
-		}
-
-		timeout = usb_data.timeout_ms;
-
-		pipe = usb_sndbulkpipe(dev, usb_data.pipe);
-
-		if (copy_from_user(buffer, usb_data.buffer, usb_data.count)) {
-			dev_err(&dev->dev, "%s: can't copy_from_user\n",
-				__func__);
-			mutex_unlock(&vstdev->lock);
-			retval = -EFAULT;
-			goto exit;
-		}
-
-		usb_anchor_urb(urb, &vstdev->submitted);
-		retval = vstusb_fill_and_send_urb(urb, dev, pipe, buffer,
-						  usb_data.count, &done);
-		mutex_unlock(&vstdev->lock);
-		if (retval) {
-			usb_unanchor_urb(urb);
-			dev_err(&dev->dev,
-				"%s: error %d filling and sending urb %d\n",
-				__func__, retval, pipe);
-			goto exit;
-		}
-
-		retval = vstusb_complete_urb(urb, &done, timeout, &cnt);
-		if (retval) {
-			dev_err(&dev->dev, "%s: error %d completing urb %d\n",
-				__func__, retval, pipe);
-		}
-
-		break;
-	case IOCTL_VSTUSB_RECV_PIPE:
-
-		if ((usb_data.count == 0) || (usb_data.count > VST_MAXBUFFER)) {
-			mutex_unlock(&vstdev->lock);
-			retval = -EINVAL;
-			goto exit;
-		}
-
-		buffer = kmalloc(usb_data.count, GFP_KERNEL);
-		if (buffer == NULL) {
-			mutex_unlock(&vstdev->lock);
-			retval = -ENOMEM;
-			goto exit;
-		}
-
-		urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (!urb) {
-			mutex_unlock(&vstdev->lock);
-			retval = -ENOMEM;
-			goto exit;
-		}
-
-		timeout = usb_data.timeout_ms;
-
-		pipe = usb_rcvbulkpipe(dev, usb_data.pipe);
-
-		usb_anchor_urb(urb, &vstdev->submitted);
-		retval = vstusb_fill_and_send_urb(urb, dev, pipe, buffer,
-						  usb_data.count, &done);
-		mutex_unlock(&vstdev->lock);
-		if (retval) {
-			usb_unanchor_urb(urb);
-			dev_err(&dev->dev,
-				"%s: error %d filling and sending urb %d\n",
-				__func__, retval, pipe);
-			goto exit;
-		}
-
-		retval = vstusb_complete_urb(urb, &done, timeout, &cnt);
-		if (retval) {
-			dev_err(&dev->dev, "%s: error %d completing urb %d\n",
-				__func__, retval, pipe);
-			goto exit;
-		}
-
-		if (copy_to_user(usb_data.buffer, buffer, cnt)) {
-			dev_err(&dev->dev, "%s: can't copy_to_user\n",
-				__func__);
-			retval = -EFAULT;
-			goto exit;
-		}
-
-		usb_data.count = cnt;
-		if (copy_to_user(data, &usb_data, sizeof(struct vstusb_args))) {
-			dev_err(&dev->dev, "%s: can't copy_to_user\n",
-				__func__);
-			retval = -EFAULT;
-		} else {
-			dev_dbg(&dev->dev, "%s: recv %zd bytes from pipe %d\n",
-				__func__, usb_data.count, usb_data.pipe);
-		}
-
-		break;
-
-	default:
-		mutex_unlock(&vstdev->lock);
-		dev_warn(&dev->dev, "ioctl_vstusb: invalid ioctl cmd %x\n",
-			 cmd);
-		return -EINVAL;
-		break;
-	}
-exit:
-	usb_free_urb(urb);
-	kfree(buffer);
-	return retval;
-}
-
-static const struct file_operations vstusb_fops = {
-	.owner =                THIS_MODULE,
-	.read =                 vstusb_read,
-	.write =                vstusb_write,
-	.unlocked_ioctl =       vstusb_ioctl,
-	.compat_ioctl =         vstusb_ioctl,
-	.open =                 vstusb_open,
-	.release =              vstusb_release,
-};
-
-static struct usb_class_driver usb_vstusb_class = {
-	.name =         "usb/vstusb%d",
-	.fops =         &vstusb_fops,
-	.minor_base =   VSTUSB_MINOR_BASE,
-};
-
-static int vstusb_probe(struct usb_interface *intf,
-			const struct usb_device_id *id)
-{
-	struct usb_device *dev = interface_to_usbdev(intf);
-	struct vstusb_device *vstdev;
-	int i;
-	int retval = 0;
-
-	/* allocate memory for our device state and intialize it */
-
-	vstdev = kzalloc(sizeof(*vstdev), GFP_KERNEL);
-	if (vstdev == NULL)
-		return -ENOMEM;
-
-	/* must do usb_get_dev() prior to kref_init() since the kref_put()
-	 * release function will do a usb_put_dev() */
-	usb_get_dev(dev);
-	kref_init(&vstdev->kref);
-	mutex_init(&vstdev->lock);
-
-	i = dev->descriptor.bcdDevice;
-
-	dev_dbg(&intf->dev, "Version %1d%1d.%1d%1d found at address %d\n",
-		(i & 0xF000) >> 12, (i & 0xF00) >> 8,
-		(i & 0xF0) >> 4, (i & 0xF), dev->devnum);
-
-	vstdev->present = 1;
-	vstdev->isopen = 0;
-	vstdev->usb_dev = dev;
-	init_usb_anchor(&vstdev->submitted);
-
-	usb_set_intfdata(intf, vstdev);
-	retval = usb_register_dev(intf, &usb_vstusb_class);
-	if (retval) {
-		dev_err(&intf->dev,
-			"%s: Not able to get a minor for this device.\n",
-			__func__);
-		usb_set_intfdata(intf, NULL);
-		kref_put(&vstdev->kref, vstusb_delete);
-		return retval;
-	}
-
-	/* let the user know what node this device is now attached to */
-	dev_info(&intf->dev,
-		 "VST USB Device #%d now attached to major %d minor %d\n",
-		 (intf->minor - VSTUSB_MINOR_BASE), USB_MAJOR, intf->minor);
-
-	dev_info(&intf->dev, "%s, %s\n", DRIVER_DESC, DRIVER_VERSION);
-
-	return retval;
-}
-
-static void vstusb_disconnect(struct usb_interface *intf)
-{
-	struct vstusb_device *vstdev = usb_get_intfdata(intf);
-
-	usb_deregister_dev(intf, &usb_vstusb_class);
-	usb_set_intfdata(intf, NULL);
-
-	if (vstdev) {
-
-		mutex_lock(&vstdev->lock);
-		vstdev->present = 0;
-
-		usb_kill_anchored_urbs(&vstdev->submitted);
-
-		mutex_unlock(&vstdev->lock);
-
-		kref_put(&vstdev->kref, vstusb_delete);
-	}
-
-}
-
-static int vstusb_suspend(struct usb_interface *intf, pm_message_t message)
-{
-	struct vstusb_device *vstdev = usb_get_intfdata(intf);
-	int time;
-	if (!vstdev)
-		return 0;
-
-	mutex_lock(&vstdev->lock);
-	time = usb_wait_anchor_empty_timeout(&vstdev->submitted, 1000);
-	if (!time)
-		usb_kill_anchored_urbs(&vstdev->submitted);
-	mutex_unlock(&vstdev->lock);
-
-	return 0;
-}
-
-static int vstusb_resume(struct usb_interface *intf)
-{
-	return 0;
-}
-
-static struct usb_driver vstusb_driver = {
-	.name =         "vstusb",
-	.probe =        vstusb_probe,
-	.disconnect =   vstusb_disconnect,
-	.suspend =      vstusb_suspend,
-	.resume =       vstusb_resume,
-	.id_table = id_table,
-};
-
-static int __init vstusb_init(void)
-{
-	int rc;
-
-	rc = usb_register(&vstusb_driver);
-	if (rc)
-		printk(KERN_ERR "%s: failed to register (%d)", __func__, rc);
-
-	return rc;
-}
-
-static void __exit vstusb_exit(void)
-{
-	usb_deregister(&vstusb_driver);
-}
-
-module_init(vstusb_init);
-module_exit(vstusb_exit);
-
-MODULE_AUTHOR("Dennis O'Brien/Stephen Ware");
-MODULE_DESCRIPTION(DRIVER_VERSION);
-MODULE_LICENSE("GPL");
diff --git a/include/linux/usb/Kbuild b/include/linux/usb/Kbuild
index 54c446309a2a..29fd73b0bffc 100644
--- a/include/linux/usb/Kbuild
+++ b/include/linux/usb/Kbuild
@@ -5,4 +5,3 @@ header-y += gadgetfs.h
 header-y += midi.h
 header-y += g_printer.h
 header-y += tmc.h
-header-y += vstusb.h
diff --git a/include/linux/usb/vstusb.h b/include/linux/usb/vstusb.h
deleted file mode 100644
index 1cfac67191ff..000000000000
--- a/include/linux/usb/vstusb.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*****************************************************************************
- *  File: drivers/usb/misc/vstusb.h
- *
- *  Purpose: Support for the bulk USB Vernier Spectrophotometers
- *
- *  Author:     EQware Engineering, Inc.
- *              Oregon City, OR, USA 97045
- *
- *  Copyright:  2007, 2008
- *              Vernier Software & Technology
- *              Beaverton, OR, USA 97005
- *
- *  Web:        www.vernier.com
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- *
- *****************************************************************************/
-/*****************************************************************************
- *
- *  The vstusb module is a standard usb 'client' driver running on top of the
- *  standard usb host controller stack.
- *
- *  In general, vstusb supports standard bulk usb pipes.  It supports multiple
- *  devices and multiple pipes per device.
- *
- *  The vstusb driver supports two interfaces:
- *  1 - ioctl SEND_PIPE/RECV_PIPE - a general bulk write/read msg
- *  	interface to any pipe with timeout support;
- *  2 - standard read/write with ioctl config - offers standard read/write
- *  	interface with ioctl configured pipes and timeouts.
- *
- *  Both interfaces can be signal from other process and will abort its i/o
- *  operation.
- *
- *  A timeout of 0 means NO timeout.  The user can still terminate the read via
- *  signal.
- *
- *  If using multiple threads with this driver, the user should ensure that
- *  any reads, writes, or ioctls are complete before closing the device.
- *  Changing read/write timeouts or pipes takes effect on next read/write.
- *
- *****************************************************************************/
-
-struct vstusb_args {
-	union {
-		/* this struct is used for IOCTL_VSTUSB_SEND_PIPE,	*
-		 * IOCTL_VSTUSB_RECV_PIPE, and read()/write() fops	*/
-		struct {
-			void __user	*buffer;
-			size_t          count;
-			unsigned int    timeout_ms;
-			int             pipe;
-		};
-
-		/* this one is used for IOCTL_VSTUSB_CONFIG_RW  	*/
-		struct {
-			int rd_pipe;
-			int rd_timeout_ms;
-			int wr_pipe;
-			int wr_timeout_ms;
-		};
-	};
-};
-
-#define VST_IOC_MAGIC 'L'
-#define VST_IOC_FIRST 0x20
-#define IOCTL_VSTUSB_SEND_PIPE	_IO(VST_IOC_MAGIC, VST_IOC_FIRST)
-#define IOCTL_VSTUSB_RECV_PIPE	_IO(VST_IOC_MAGIC, VST_IOC_FIRST + 1)
-#define IOCTL_VSTUSB_CONFIG_RW	_IO(VST_IOC_MAGIC, VST_IOC_FIRST + 2)
-- 
cgit v1.2.3


From 2832fc11f1360668482beec06dbcd631ae5f0cf1 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 18 Feb 2010 16:43:54 +0000
Subject: USB: tty: Add a function to insert a string of characters with the
 same flag

The USB drivers often want to insert a series of bytes all with the same
flag set - provide a helper for this case.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/tty_buffer.c | 11 ++++++-----
 include/linux/tty_flip.h  |  7 ++++++-
 2 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tty_buffer.c b/drivers/char/tty_buffer.c
index 66fa4e10d76b..4c133459ab7e 100644
--- a/drivers/char/tty_buffer.c
+++ b/drivers/char/tty_buffer.c
@@ -231,9 +231,10 @@ int tty_buffer_request_room(struct tty_struct *tty, size_t size)
 EXPORT_SYMBOL_GPL(tty_buffer_request_room);
 
 /**
- *	tty_insert_flip_string	-	Add characters to the tty buffer
+ *	tty_insert_flip_string_fixed_flag - Add characters to the tty buffer
  *	@tty: tty structure
  *	@chars: characters
+ *	@flag: flag value for each character
  *	@size: size
  *
  *	Queue a series of bytes to the tty buffering. All the characters
@@ -242,8 +243,8 @@ EXPORT_SYMBOL_GPL(tty_buffer_request_room);
  *	Locking: Called functions may take tty->buf.lock
  */
 
-int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars,
-				size_t size)
+int tty_insert_flip_string_fixed_flag(struct tty_struct *tty,
+		const unsigned char *chars, char flag, size_t size)
 {
 	int copied = 0;
 	do {
@@ -253,7 +254,7 @@ int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars,
 		if (unlikely(space == 0))
 			break;
 		memcpy(tb->char_buf_ptr + tb->used, chars, space);
-		memset(tb->flag_buf_ptr + tb->used, TTY_NORMAL, space);
+		memset(tb->flag_buf_ptr + tb->used, flag, space);
 		tb->used += space;
 		copied += space;
 		chars += space;
@@ -262,7 +263,7 @@ int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars,
 	} while (unlikely(size > copied));
 	return copied;
 }
-EXPORT_SYMBOL(tty_insert_flip_string);
+EXPORT_SYMBOL(tty_insert_flip_string_fixed_flag);
 
 /**
  *	tty_insert_flip_string_flags	-	Add characters to the tty buffer
diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
index eb677cf56106..9239d033a0a3 100644
--- a/include/linux/tty_flip.h
+++ b/include/linux/tty_flip.h
@@ -2,8 +2,8 @@
 #define _LINUX_TTY_FLIP_H
 
 extern int tty_buffer_request_room(struct tty_struct *tty, size_t size);
-extern int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars, size_t size);
 extern int tty_insert_flip_string_flags(struct tty_struct *tty, const unsigned char *chars, const char *flags, size_t size);
+extern int tty_insert_flip_string_fixed_flag(struct tty_struct *tty, const unsigned char *chars, char flag, size_t size);
 extern int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars, size_t size);
 extern int tty_prepare_flip_string_flags(struct tty_struct *tty, unsigned char **chars, char **flags, size_t size);
 void tty_schedule_flip(struct tty_struct *tty);
@@ -20,4 +20,9 @@ static inline int tty_insert_flip_char(struct tty_struct *tty,
 	return tty_insert_flip_string_flags(tty, &ch, &flag, 1);
 }
 
+static inline int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars, size_t size)
+{
+	return tty_insert_flip_string_fixed_flag(tty, chars, TTY_NORMAL, size);
+}
+
 #endif /* _LINUX_TTY_FLIP_H */
-- 
cgit v1.2.3


From 87c1e12b5eeb7b30b4b41291bef8e0b41fc3dde9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 2 Mar 2010 02:51:56 +0000
Subject: ipsec: Fix bogus bundle flowi

When I merged the bundle creation code, I introduced a bogus
flowi value in the bundle.  Instead of getting from the caller,
it was instead set to the flow in the route object, which is
totally different.

The end result is that the bundles we created never match, and
we instead end up with an ever growing bundle list.

Thanks to Jamal for find this problem.

Reported-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      | 3 ++-
 net/ipv4/xfrm4_policy.c | 5 +++--
 net/ipv6/xfrm6_policy.c | 3 ++-
 net/xfrm/xfrm_policy.c  | 7 ++++---
 4 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a7df3275b860..d74e080ba6c9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -275,7 +275,8 @@ struct xfrm_policy_afinfo {
 					     struct dst_entry *dst,
 					     int nfheader_len);
 	int			(*fill_dst)(struct xfrm_dst *xdst,
-					    struct net_device *dev);
+					    struct net_device *dev,
+					    struct flowi *fl);
 };
 
 extern int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 67107d63c1cd..e4a1483fba77 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -91,11 +91,12 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
 	return 0;
 }
 
-static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+			  struct flowi *fl)
 {
 	struct rtable *rt = (struct rtable *)xdst->route;
 
-	xdst->u.rt.fl = rt->fl;
+	xdst->u.rt.fl = *fl;
 
 	xdst->u.dst.dev = dev;
 	dev_hold(dev);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index dbdc696f5fc5..ae181651c75a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -116,7 +116,8 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
 	return 0;
 }
 
-static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+			  struct flowi *fl)
 {
 	struct rt6_info *rt = (struct rt6_info*)xdst->route;
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 34a5ef8316e7..843e066649cb 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1372,7 +1372,8 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
 	return err;
 }
 
-static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+				struct flowi *fl)
 {
 	struct xfrm_policy_afinfo *afinfo =
 		xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
@@ -1381,7 +1382,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
 	if (!afinfo)
 		return -EINVAL;
 
-	err = afinfo->fill_dst(xdst, dev);
+	err = afinfo->fill_dst(xdst, dev, fl);
 
 	xfrm_policy_put_afinfo(afinfo);
 
@@ -1486,7 +1487,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
 
-		err = xfrm_fill_dst(xdst, dev);
+		err = xfrm_fill_dst(xdst, dev, fl);
 		if (err)
 			goto free_dst;
 
-- 
cgit v1.2.3


From 309ce156aa27f29338438011d292a8d6496623d3 Mon Sep 17 00:00:00 2001
From: Jayamohan Kallickal <jayamohank@serverengines.com>
Date: Sat, 20 Feb 2010 08:02:10 +0530
Subject: [SCSI] libiscsi: Make iscsi_eh_target_reset start with session reset

The iscsi_eh_target_reset has been modified to attempt
target reset only. If it fails, then iscsi_eh_session_reset
will be called.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: Jayamohan Kallickal <jayamohank@serverengines.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/infiniband/ulp/iser/iscsi_iser.c |  2 +-
 drivers/scsi/be2iscsi/be_main.c          |  2 +-
 drivers/scsi/bnx2i/bnx2i_iscsi.c         |  2 +-
 drivers/scsi/cxgb3i/cxgb3i_iscsi.c       |  2 +-
 drivers/scsi/iscsi_tcp.c                 |  2 +-
 drivers/scsi/libiscsi.c                  | 23 +++++++++++++++++++----
 include/scsi/libiscsi.h                  |  3 ++-
 7 files changed, 26 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 5f7a6fca0a4d..5472b7e9abdc 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -596,7 +596,7 @@ static struct scsi_host_template iscsi_iser_sht = {
 	.cmd_per_lun            = ISER_DEF_CMD_PER_LUN,
 	.eh_abort_handler       = iscsi_eh_abort,
 	.eh_device_reset_handler= iscsi_eh_device_reset,
-	.eh_target_reset_handler= iscsi_eh_target_reset,
+	.eh_target_reset_handler = iscsi_eh_recover_target,
 	.target_alloc		= iscsi_target_alloc,
 	.use_clustering         = DISABLE_CLUSTERING,
 	.proc_name              = "iscsi_iser",
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index 7c22616ab141..4d269b434a78 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -79,7 +79,7 @@ static struct scsi_host_template beiscsi_sht = {
 	.slave_configure = beiscsi_slave_configure,
 	.target_alloc = iscsi_target_alloc,
 	.eh_device_reset_handler = iscsi_eh_device_reset,
-	.eh_target_reset_handler = iscsi_eh_target_reset,
+	.eh_target_reset_handler = iscsi_eh_session_reset,
 	.sg_tablesize = BEISCSI_SGLIST_ELEMENTS,
 	.can_queue = BE2_IO_DEPTH,
 	.this_id = -1,
diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c
index 1c4d1215769d..cb71dc984797 100644
--- a/drivers/scsi/bnx2i/bnx2i_iscsi.c
+++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c
@@ -1989,7 +1989,7 @@ static struct scsi_host_template bnx2i_host_template = {
 	.queuecommand		= iscsi_queuecommand,
 	.eh_abort_handler	= iscsi_eh_abort,
 	.eh_device_reset_handler = iscsi_eh_device_reset,
-	.eh_target_reset_handler = iscsi_eh_target_reset,
+	.eh_target_reset_handler = iscsi_eh_recover_target,
 	.change_queue_depth	= iscsi_change_queue_depth,
 	.can_queue		= 1024,
 	.max_sectors		= 127,
diff --git a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
index 412853c65372..b7c30585dadd 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
+++ b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
@@ -915,7 +915,7 @@ static struct scsi_host_template cxgb3i_host_template = {
 	.cmd_per_lun		= ISCSI_DEF_CMD_PER_LUN,
 	.eh_abort_handler	= iscsi_eh_abort,
 	.eh_device_reset_handler = iscsi_eh_device_reset,
-	.eh_target_reset_handler = iscsi_eh_target_reset,
+	.eh_target_reset_handler = iscsi_eh_recover_target,
 	.target_alloc		= iscsi_target_alloc,
 	.use_clustering		= DISABLE_CLUSTERING,
 	.this_id		= -1,
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 8a89ba900588..249053a9d4fa 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -874,7 +874,7 @@ static struct scsi_host_template iscsi_sw_tcp_sht = {
 	.cmd_per_lun		= ISCSI_DEF_CMD_PER_LUN,
 	.eh_abort_handler       = iscsi_eh_abort,
 	.eh_device_reset_handler= iscsi_eh_device_reset,
-	.eh_target_reset_handler= iscsi_eh_target_reset,
+	.eh_target_reset_handler = iscsi_eh_recover_target,
 	.use_clustering         = DISABLE_CLUSTERING,
 	.slave_alloc            = iscsi_sw_tcp_slave_alloc,
 	.slave_configure        = iscsi_sw_tcp_slave_configure,
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 703eb6a88790..685eaec53218 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -2338,7 +2338,7 @@ EXPORT_SYMBOL_GPL(iscsi_session_recovery_timedout);
  * This function will wait for a relogin, session termination from
  * userspace, or a recovery/replacement timeout.
  */
-static int iscsi_eh_session_reset(struct scsi_cmnd *sc)
+int iscsi_eh_session_reset(struct scsi_cmnd *sc)
 {
 	struct iscsi_cls_session *cls_session;
 	struct iscsi_session *session;
@@ -2389,6 +2389,7 @@ failed:
 	mutex_unlock(&session->eh_mutex);
 	return SUCCESS;
 }
+EXPORT_SYMBOL_GPL(iscsi_eh_session_reset);
 
 static void iscsi_prep_tgt_reset_pdu(struct scsi_cmnd *sc, struct iscsi_tm *hdr)
 {
@@ -2403,8 +2404,7 @@ static void iscsi_prep_tgt_reset_pdu(struct scsi_cmnd *sc, struct iscsi_tm *hdr)
  * iscsi_eh_target_reset - reset target
  * @sc: scsi command
  *
- * This will attempt to send a warm target reset. If that fails
- * then we will drop the session and attempt ERL0 recovery.
+ * This will attempt to send a warm target reset.
  */
 int iscsi_eh_target_reset(struct scsi_cmnd *sc)
 {
@@ -2476,12 +2476,27 @@ done:
 	ISCSI_DBG_EH(session, "tgt %s reset result = %s\n", session->targetname,
 		     rc == SUCCESS ? "SUCCESS" : "FAILED");
 	mutex_unlock(&session->eh_mutex);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(iscsi_eh_target_reset);
 
+/**
+ * iscsi_eh_recover_target - reset target and possibly the session
+ * @sc: scsi command
+ *
+ * This will attempt to send a warm target reset. If that fails,
+ * we will escalate to ERL0 session recovery.
+ */
+int iscsi_eh_recover_target(struct scsi_cmnd *sc)
+{
+	int rc;
+
+	rc = iscsi_eh_target_reset(sc);
 	if (rc == FAILED)
 		rc = iscsi_eh_session_reset(sc);
 	return rc;
 }
-EXPORT_SYMBOL_GPL(iscsi_eh_target_reset);
+EXPORT_SYMBOL_GPL(iscsi_eh_recover_target);
 
 /*
  * Pre-allocate a pool of @max items of @item_size. By default, the pool
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index ff92b46f5153..ae5196aae1a5 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -338,7 +338,8 @@ struct iscsi_host {
 extern int iscsi_change_queue_depth(struct scsi_device *sdev, int depth,
 				    int reason);
 extern int iscsi_eh_abort(struct scsi_cmnd *sc);
-extern int iscsi_eh_target_reset(struct scsi_cmnd *sc);
+extern int iscsi_eh_recover_target(struct scsi_cmnd *sc);
+extern int iscsi_eh_session_reset(struct scsi_cmnd *sc);
 extern int iscsi_eh_device_reset(struct scsi_cmnd *sc);
 extern int iscsi_queuecommand(struct scsi_cmnd *sc,
 			      void (*done)(struct scsi_cmnd *));
-- 
cgit v1.2.3


From 84b6826306119dc3c41ef9d7ed6c408112f63301 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 1 Dec 2009 21:12:27 +0000
Subject: regulator: Add notifier event on regulator disable

The intended use case is for drivers which disable regulators to save
power but need to do some work to restore the hardware state when
restarting.  If the supplies are not actually disabled due to board
limits or sharing with other active devices this notifier allows the
driver to avoid unneeded reinitialisation, particularly when used with
runtime PM.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/core.c           | 7 +++++--
 include/linux/regulator/consumer.h | 4 +++-
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index b60a4c9f8f16..6d2ce8a05331 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1341,6 +1341,9 @@ static int _regulator_disable(struct regulator_dev *rdev)
 				       __func__, rdev_get_name(rdev));
 				return ret;
 			}
+
+			_notifier_call_chain(rdev, REGULATOR_EVENT_DISABLE,
+					     NULL);
 		}
 
 		/* decrease our supplies ref count and disable if required */
@@ -1399,8 +1402,8 @@ static int _regulator_force_disable(struct regulator_dev *rdev)
 			return ret;
 		}
 		/* notify other consumers that power has been forced off */
-		_notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE,
-			NULL);
+		_notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE |
+			REGULATOR_EVENT_DISABLE, NULL);
 	}
 
 	/* decrease our supplies ref count and disable if required */
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 030d92255c7a..28c9fd020d39 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -89,8 +89,9 @@
  * REGULATION_OUT Regulator output is out of regulation.
  * FAIL           Regulator output has failed.
  * OVER_TEMP      Regulator over temp.
- * FORCE_DISABLE  Regulator shut down by software.
+ * FORCE_DISABLE  Regulator forcibly shut down by software.
  * VOLTAGE_CHANGE Regulator voltage changed.
+ * DISABLE        Regulator was disabled.
  *
  * NOTE: These events can be OR'ed together when passed into handler.
  */
@@ -102,6 +103,7 @@
 #define REGULATOR_EVENT_OVER_TEMP		0x10
 #define REGULATOR_EVENT_FORCE_DISABLE		0x20
 #define REGULATOR_EVENT_VOLTAGE_CHANGE		0x40
+#define REGULATOR_EVENT_DISABLE 		0x80
 
 struct regulator;
 
-- 
cgit v1.2.3


From 31aae2beeb3d601d556b6a8c39085940ad1e9f42 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 21 Dec 2009 12:21:52 +0000
Subject: regulator: Allow regulators to specify the time taken to ramp on
 enable

Regulators may sometimes take longer to enable than the control operation
used to do so, either because the regulator has ramp rate control used to
limit inrush current or because the control operation is very fast (GPIO
being the most common example of this).  In order to ensure that consumers
do not rely on the regulator before it is enabled provide an enable_time()
operation and have the core delay for that time before returning to the
caller.

This is implemented as a function since the ramp rate may be specified in
voltage per unit time and therefore the time depend on the configuration.
In future it would be desirable to allow the bulk operations to run the
delays for multiple enables in parallel but this is not currently supported.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/core.c         | 41 ++++++++++++++++++++++++++++++++++------
 include/linux/regulator/driver.h |  6 ++++++
 2 files changed, 41 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 6d2ce8a05331..ca8e1642538b 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -19,6 +19,7 @@
 #include <linux/err.h>
 #include <linux/mutex.h>
 #include <linux/suspend.h>
+#include <linux/delay.h>
 #include <linux/regulator/consumer.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
@@ -1084,6 +1085,13 @@ overflow_err:
 	return NULL;
 }
 
+static int _regulator_get_enable_time(struct regulator_dev *rdev)
+{
+	if (!rdev->desc->ops->enable_time)
+		return 0;
+	return rdev->desc->ops->enable_time(rdev);
+}
+
 /* Internal regulator request function */
 static struct regulator *_regulator_get(struct device *dev, const char *id,
 					int exclusive)
@@ -1251,7 +1259,7 @@ static int _regulator_can_change_status(struct regulator_dev *rdev)
 /* locks held by regulator_enable() */
 static int _regulator_enable(struct regulator_dev *rdev)
 {
-	int ret;
+	int ret, delay;
 
 	/* do we need to enable the supply regulator first */
 	if (rdev->supply) {
@@ -1275,13 +1283,34 @@ static int _regulator_enable(struct regulator_dev *rdev)
 			if (!_regulator_can_change_status(rdev))
 				return -EPERM;
 
-			if (rdev->desc->ops->enable) {
-				ret = rdev->desc->ops->enable(rdev);
-				if (ret < 0)
-					return ret;
-			} else {
+			if (!rdev->desc->ops->enable)
 				return -EINVAL;
+
+			/* Query before enabling in case configuration
+			 * dependant.  */
+			ret = _regulator_get_enable_time(rdev);
+			if (ret >= 0) {
+				delay = ret;
+			} else {
+				printk(KERN_WARNING
+					"%s: enable_time() failed for %s: %d\n",
+					__func__, rdev_get_name(rdev),
+					ret);
+				delay = 0;
 			}
+
+			/* Allow the regulator to ramp; it would be useful
+			 * to extend this for bulk operations so that the
+			 * regulators can ramp together.  */
+			ret = rdev->desc->ops->enable(rdev);
+			if (ret < 0)
+				return ret;
+
+			if (delay >= 1000)
+				mdelay(delay / 1000);
+			else if (delay)
+				udelay(delay);
+
 		} else if (ret < 0) {
 			printk(KERN_ERR "%s: is_enabled() failed for %s: %d\n",
 			       __func__, rdev_get_name(rdev), ret);
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 31f2055eae28..592cd7c642c2 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -58,6 +58,9 @@ enum regulator_status {
  * @get_optimum_mode: Get the most efficient operating mode for the regulator
  *                    when running with the specified parameters.
  *
+ * @enable_time: Time taken for the regulator voltage output voltage to
+ *               stabalise after being enabled, in microseconds.
+ *
  * @set_suspend_voltage: Set the voltage for the regulator when the system
  *                       is suspended.
  * @set_suspend_enable: Mark the regulator as enabled when the system is
@@ -93,6 +96,9 @@ struct regulator_ops {
 	int (*set_mode) (struct regulator_dev *, unsigned int mode);
 	unsigned int (*get_mode) (struct regulator_dev *);
 
+	/* Time taken to enable the regulator */
+	int (*enable_time) (struct regulator_dev *);
+
 	/* report regulator status ... most other accessors report
 	 * control inputs, this reports results of combining inputs
 	 * from Linux (and other sources) with the actual load.
-- 
cgit v1.2.3


From eda79a3041a2cada0d4ee9491c99c3874b322356 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 12 Jan 2010 12:25:13 +0200
Subject: regulator: Add 'start-up time' to fixed voltage regulators

Add a field to specify a delay for the start-up time of
a fixed voltage regulator.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/fixed.c       | 5 +++++
 include/linux/regulator/fixed.h | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index f9f516a3028a..ee3e7eb97b1c 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -24,12 +24,14 @@
 #include <linux/regulator/driver.h>
 #include <linux/regulator/fixed.h>
 #include <linux/gpio.h>
+#include <linux/delay.h>
 
 struct fixed_voltage_data {
 	struct regulator_desc desc;
 	struct regulator_dev *dev;
 	int microvolts;
 	int gpio;
+	unsigned startup_delay;
 	unsigned enable_high:1;
 	unsigned is_enabled:1;
 };
@@ -48,6 +50,8 @@ static int fixed_voltage_enable(struct regulator_dev *dev)
 	if (gpio_is_valid(data->gpio)) {
 		gpio_set_value_cansleep(data->gpio, data->enable_high);
 		data->is_enabled = 1;
+		if (data->startup_delay)
+			udelay(data->startup_delay);
 	}
 
 	return 0;
@@ -117,6 +121,7 @@ static int regulator_fixed_voltage_probe(struct platform_device *pdev)
 
 	drvdata->microvolts = config->microvolts;
 	drvdata->gpio = config->gpio;
+	drvdata->startup_delay = config->startup_delay;
 
 	if (gpio_is_valid(config->gpio)) {
 		drvdata->enable_high = config->enable_high;
diff --git a/include/linux/regulator/fixed.h b/include/linux/regulator/fixed.h
index e94a4a1c7c8a..ffd7d508e726 100644
--- a/include/linux/regulator/fixed.h
+++ b/include/linux/regulator/fixed.h
@@ -25,6 +25,7 @@ struct regulator_init_data;
  * @microvolts:		Output voltage of regulator
  * @gpio:		GPIO to use for enable control
  * 			set to -EINVAL if not used
+ * @startup_delay:	Start-up time in microseconds
  * @enable_high:	Polarity of enable GPIO
  *			1 = Active high, 0 = Active low
  * @enabled_at_boot:	Whether regulator has been enabled at
@@ -41,6 +42,7 @@ struct fixed_voltage_config {
 	const char *supply_name;
 	int microvolts;
 	int gpio;
+	unsigned startup_delay;
 	unsigned enable_high:1;
 	unsigned enabled_at_boot:1;
 	struct regulator_init_data *init_data;
-- 
cgit v1.2.3


From f4b97b36b7c6b2d4455f27d6371869f915cbe8fd Mon Sep 17 00:00:00 2001
From: Alberto Panizzo <maramaopercheseimorto@gmail.com>
Date: Tue, 19 Jan 2010 12:48:54 +0100
Subject: regulator: mc13783: consider Power Gates as digital regulators.

GPO regulators are digital outputs that can be enabled or disabled by a
dedicated bit in mc13783 POWERMISC register.
In this family can be count in also Power Gates (PWGT1 and 2): enabled by
a dedicated pin a Power Gate is an hardware driven supply where the output
(PWGTnDRV) follow this law:

 Bit PWGTxSPIEN | Pin PWGTxEN | PWGTxDRV |  Read Back
   0 = default  |             |          | PWGTxSPIEN
 ---------------+-------------+----------+------------
       1        |      x      |   Low    |     0
       0        |      0      |   High   |     1
       0        |      1      |   Low    |     0

As read back value of control bit reflects the PWGTxDRV state (not the
control value previously written) and mc13783 POWERMISC register contain
only regulator related bits, a dedicated function to manage these bits is
created here with the aim of tracing the real value of PWGTxSPIEN bits
and reproduce it on next writes.

All POWERMISC users _must_ use the new function to not accidentally
disable Power Gates supplies.

v2 changes:
-Better utilization of abstraction layers.
-Voltage query support. GPO's and PWGTxDRV are fixed voltage regulator
 with voltage value of 3.1V and 5.5V respectively.

Signed-off-by: Alberto Panizzo <maramaopercheseimorto@gmail.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/mc13783-regulator.c | 132 ++++++++++++++++++++++++++++++++--
 include/linux/mfd/mc13783.h           |   2 +
 2 files changed, 128 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/mc13783-regulator.c b/drivers/regulator/mc13783-regulator.c
index a40e35ab8555..f7b81845a196 100644
--- a/drivers/regulator/mc13783-regulator.c
+++ b/drivers/regulator/mc13783-regulator.c
@@ -82,6 +82,11 @@
 #define MC13783_REG_POWERMISC_GPO2EN			(1 << 8)
 #define MC13783_REG_POWERMISC_GPO3EN			(1 << 10)
 #define MC13783_REG_POWERMISC_GPO4EN			(1 << 12)
+#define MC13783_REG_POWERMISC_PWGT1SPIEN		(1 << 15)
+#define MC13783_REG_POWERMISC_PWGT2SPIEN		(1 << 16)
+
+#define MC13783_REG_POWERMISC_PWGTSPI_M			(3 << 15)
+
 
 struct mc13783_regulator {
 	struct regulator_desc desc;
@@ -161,8 +166,17 @@ static const int const mc13783_vrf_val[] = {
 	1500000, 1875000, 2700000, 2775000,
 };
 
+static const int const mc13783_gpo_val[] = {
+	3100000,
+};
+
+static const int const mc13783_pwgtdrv_val[] = {
+	5500000,
+};
+
 static struct regulator_ops mc13783_regulator_ops;
 static struct regulator_ops mc13783_fixed_regulator_ops;
+static struct regulator_ops mc13783_gpo_regulator_ops;
 
 #define MC13783_DEFINE(prefix, _name, _reg, _vsel_reg, _voltages)	\
 	[MC13783_ ## prefix ## _ ## _name] = {				\
@@ -197,17 +211,19 @@ static struct regulator_ops mc13783_fixed_regulator_ops;
 		.voltages =  _voltages,					\
 	}
 
-#define MC13783_GPO_DEFINE(prefix, _name, _reg)				\
+#define MC13783_GPO_DEFINE(prefix, _name, _reg,  _voltages)		\
 	[MC13783_ ## prefix ## _ ## _name] = {				\
 		.desc = {						\
 			.name = #prefix "_" #_name,			\
-			.ops = &mc13783_regulator_ops,			\
+			.n_voltages = ARRAY_SIZE(_voltages),		\
+			.ops = &mc13783_gpo_regulator_ops,		\
 			.type = REGULATOR_VOLTAGE,			\
 			.id = MC13783_ ## prefix ## _ ## _name,		\
 			.owner = THIS_MODULE,				\
 		},							\
 		.reg = MC13783_REG_ ## _reg,				\
 		.enable_bit = MC13783_REG_ ## _reg ## _ ## _name ## EN,	\
+		.voltages =  _voltages,					\
 	}
 
 #define MC13783_DEFINE_SW(_name, _reg, _vsel_reg, _voltages)		\
@@ -249,14 +265,17 @@ static struct mc13783_regulator mc13783_regulators[] = {
 			    mc13783_vmmc_val),
 	MC13783_DEFINE_REGU(VMMC2, REGULATORMODE1, REGULATORSETTING1,	\
 			    mc13783_vmmc_val),
-	MC13783_GPO_DEFINE(REGU, GPO1, POWERMISC),
-	MC13783_GPO_DEFINE(REGU, GPO2, POWERMISC),
-	MC13783_GPO_DEFINE(REGU, GPO3, POWERMISC),
-	MC13783_GPO_DEFINE(REGU, GPO4, POWERMISC),
+	MC13783_GPO_DEFINE(REGU, GPO1, POWERMISC, mc13783_gpo_val),
+	MC13783_GPO_DEFINE(REGU, GPO2, POWERMISC, mc13783_gpo_val),
+	MC13783_GPO_DEFINE(REGU, GPO3, POWERMISC, mc13783_gpo_val),
+	MC13783_GPO_DEFINE(REGU, GPO4, POWERMISC, mc13783_gpo_val),
+	MC13783_GPO_DEFINE(REGU, PWGT1SPI, POWERMISC, mc13783_pwgtdrv_val),
+	MC13783_GPO_DEFINE(REGU, PWGT2SPI, POWERMISC, mc13783_pwgtdrv_val),
 };
 
 struct mc13783_regulator_priv {
 	struct mc13783 *mc13783;
+	u32 powermisc_pwgt_state;
 	struct regulator_dev *regulators[];
 };
 
@@ -445,6 +464,107 @@ static struct regulator_ops mc13783_fixed_regulator_ops = {
 	.get_voltage = mc13783_fixed_regulator_get_voltage,
 };
 
+int mc13783_powermisc_rmw(struct mc13783_regulator_priv *priv, u32 mask,
+									u32 val)
+{
+	struct mc13783 *mc13783 = priv->mc13783;
+	int ret;
+	u32 valread;
+
+	BUG_ON(val & ~mask);
+
+	ret = mc13783_reg_read(mc13783, MC13783_REG_POWERMISC, &valread);
+	if (ret)
+		return ret;
+
+	/* Update the stored state for Power Gates. */
+	priv->powermisc_pwgt_state =
+				(priv->powermisc_pwgt_state & ~mask) | val;
+	priv->powermisc_pwgt_state &= MC13783_REG_POWERMISC_PWGTSPI_M;
+
+	/* Construct the new register value */
+	valread = (valread & ~mask) | val;
+	/* Overwrite the PWGTxEN with the stored version */
+	valread = (valread & ~MC13783_REG_POWERMISC_PWGTSPI_M) |
+						priv->powermisc_pwgt_state;
+
+	return mc13783_reg_write(mc13783, MC13783_REG_POWERMISC, valread);
+}
+
+static int mc13783_gpo_regulator_enable(struct regulator_dev *rdev)
+{
+	struct mc13783_regulator_priv *priv = rdev_get_drvdata(rdev);
+	int id = rdev_get_id(rdev);
+	int ret;
+	u32 en_val = mc13783_regulators[id].enable_bit;
+
+	dev_dbg(rdev_get_dev(rdev), "%s id: %d\n", __func__, id);
+
+	/* Power Gate enable value is 0 */
+	if (id == MC13783_REGU_PWGT1SPI ||
+	    id == MC13783_REGU_PWGT2SPI)
+		en_val = 0;
+
+	mc13783_lock(priv->mc13783);
+	ret = mc13783_powermisc_rmw(priv, mc13783_regulators[id].enable_bit,
+					en_val);
+	mc13783_unlock(priv->mc13783);
+
+	return ret;
+}
+
+static int mc13783_gpo_regulator_disable(struct regulator_dev *rdev)
+{
+	struct mc13783_regulator_priv *priv = rdev_get_drvdata(rdev);
+	int id = rdev_get_id(rdev);
+	int ret;
+	u32 dis_val = 0;
+
+	dev_dbg(rdev_get_dev(rdev), "%s id: %d\n", __func__, id);
+
+	/* Power Gate disable value is 1 */
+	if (id == MC13783_REGU_PWGT1SPI ||
+	    id == MC13783_REGU_PWGT2SPI)
+		dis_val = mc13783_regulators[id].enable_bit;
+
+	mc13783_lock(priv->mc13783);
+	ret = mc13783_powermisc_rmw(priv, mc13783_regulators[id].enable_bit,
+					dis_val);
+	mc13783_unlock(priv->mc13783);
+
+	return ret;
+}
+
+static int mc13783_gpo_regulator_is_enabled(struct regulator_dev *rdev)
+{
+	struct mc13783_regulator_priv *priv = rdev_get_drvdata(rdev);
+	int ret, id = rdev_get_id(rdev);
+	unsigned int val;
+
+	mc13783_lock(priv->mc13783);
+	ret = mc13783_reg_read(priv->mc13783, mc13783_regulators[id].reg, &val);
+	mc13783_unlock(priv->mc13783);
+
+	if (ret)
+		return ret;
+
+	/* Power Gates state is stored in powermisc_pwgt_state
+	 * where the meaning of bits is negated */
+	val = (val & ~MC13783_REG_POWERMISC_PWGTSPI_M) |
+	      (priv->powermisc_pwgt_state ^ MC13783_REG_POWERMISC_PWGTSPI_M);
+
+	return (val & mc13783_regulators[id].enable_bit) != 0;
+}
+
+static struct regulator_ops mc13783_gpo_regulator_ops = {
+	.enable = mc13783_gpo_regulator_enable,
+	.disable = mc13783_gpo_regulator_disable,
+	.is_enabled = mc13783_gpo_regulator_is_enabled,
+	.list_voltage = mc13783_regulator_list_voltage,
+	.set_voltage = mc13783_fixed_regulator_set_voltage,
+	.get_voltage = mc13783_fixed_regulator_get_voltage,
+};
+
 static int __devinit mc13783_regulator_probe(struct platform_device *pdev)
 {
 	struct mc13783_regulator_priv *priv;
diff --git a/include/linux/mfd/mc13783.h b/include/linux/mfd/mc13783.h
index 35680409b8cf..94cb51a64037 100644
--- a/include/linux/mfd/mc13783.h
+++ b/include/linux/mfd/mc13783.h
@@ -108,6 +108,8 @@ int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode,
 #define	MC13783_REGU_V2		28
 #define	MC13783_REGU_V3		29
 #define	MC13783_REGU_V4		30
+#define	MC13783_REGU_PWGT1SPI	31
+#define	MC13783_REGU_PWGT2SPI	32
 
 #define MC13783_IRQ_ADCDONE	0
 #define MC13783_IRQ_ADCBISDONE	1
-- 
cgit v1.2.3


From a71b797fdc672714bfff1fdc142042a95e97d7ba Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Mon, 25 Jan 2010 10:24:09 -0500
Subject: regulator: enable max8649 regulator driver

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/Kconfig         |   7 +
 drivers/regulator/Makefile        |   1 +
 drivers/regulator/max8649.c       | 408 ++++++++++++++++++++++++++++++++++++++
 include/linux/regulator/max8649.h |  44 ++++
 4 files changed, 460 insertions(+)
 create mode 100644 drivers/regulator/max8649.c
 create mode 100644 include/linux/regulator/max8649.h

(limited to 'include')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index c565e0d87287..2bc01ee9d9f2 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -69,6 +69,13 @@ config REGULATOR_MAX1586
 	  regulator via I2C bus. The provided regulator is suitable
 	  for PXA27x chips to control VCC_CORE and VCC_USIM voltages.
 
+config REGULATOR_MAX8649
+	tristate "Maxim 8649 voltage regulator"
+	depends on I2C
+	help
+	  This driver controls a Maxim 8649 voltage output regulator via
+	  I2C bus.
+
 config REGULATOR_MAX8660
 	tristate "Maxim 8660/8661 voltage regulator"
 	depends on I2C
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index b3c806c79415..075835be4396 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
 obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
 obj-$(CONFIG_REGULATOR_TWL4030) += twl-regulator.o
+obj-$(CONFIG_REGULATOR_MAX8649)	+= max8649.o
 obj-$(CONFIG_REGULATOR_MAX8660) += max8660.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-dcdc.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-isink.o
diff --git a/drivers/regulator/max8649.c b/drivers/regulator/max8649.c
new file mode 100644
index 000000000000..3ebdf698c648
--- /dev/null
+++ b/drivers/regulator/max8649.c
@@ -0,0 +1,408 @@
+/*
+ * Regulators driver for Maxim max8649
+ *
+ * Copyright (C) 2009-2010 Marvell International Ltd.
+ *      Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/max8649.h>
+
+#define MAX8649_DCDC_VMIN	750000		/* uV */
+#define MAX8649_DCDC_VMAX	1380000		/* uV */
+#define MAX8649_DCDC_STEP	10000		/* uV */
+#define MAX8649_VOL_MASK	0x3f
+
+/* Registers */
+#define MAX8649_MODE0		0x00
+#define MAX8649_MODE1		0x01
+#define MAX8649_MODE2		0x02
+#define MAX8649_MODE3		0x03
+#define MAX8649_CONTROL		0x04
+#define MAX8649_SYNC		0x05
+#define MAX8649_RAMP		0x06
+#define MAX8649_CHIP_ID1	0x08
+#define MAX8649_CHIP_ID2	0x09
+
+/* Bits */
+#define MAX8649_EN_PD		(1 << 7)
+#define MAX8649_VID0_PD		(1 << 6)
+#define MAX8649_VID1_PD		(1 << 5)
+#define MAX8649_VID_MASK	(3 << 5)
+
+#define MAX8649_FORCE_PWM	(1 << 7)
+#define MAX8649_SYNC_EXTCLK	(1 << 6)
+
+#define MAX8649_EXT_MASK	(3 << 6)
+
+#define MAX8649_RAMP_MASK	(7 << 5)
+#define MAX8649_RAMP_DOWN	(1 << 1)
+
+struct max8649_regulator_info {
+	struct regulator_dev	*regulator;
+	struct i2c_client	*i2c;
+	struct device		*dev;
+	struct mutex		io_lock;
+
+	int		vol_reg;
+	unsigned	mode:2;	/* bit[1:0] = VID1, VID0 */
+	unsigned	extclk_freq:2;
+	unsigned	extclk:1;
+	unsigned	ramp_timing:3;
+	unsigned	ramp_down:1;
+};
+
+/* I2C operations */
+
+static inline int max8649_read_device(struct i2c_client *i2c,
+				      int reg, int bytes, void *dest)
+{
+	unsigned char data;
+	int ret;
+
+	data = (unsigned char)reg;
+	ret = i2c_master_send(i2c, &data, 1);
+	if (ret < 0)
+		return ret;
+	ret = i2c_master_recv(i2c, dest, bytes);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static inline int max8649_write_device(struct i2c_client *i2c,
+				       int reg, int bytes, void *src)
+{
+	unsigned char buf[bytes + 1];
+	int ret;
+
+	buf[0] = (unsigned char)reg;
+	memcpy(&buf[1], src, bytes);
+
+	ret = i2c_master_send(i2c, buf, bytes + 1);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static int max8649_reg_read(struct i2c_client *i2c, int reg)
+{
+	struct max8649_regulator_info *info = i2c_get_clientdata(i2c);
+	unsigned char data;
+	int ret;
+
+	mutex_lock(&info->io_lock);
+	ret = max8649_read_device(i2c, reg, 1, &data);
+	mutex_unlock(&info->io_lock);
+
+	if (ret < 0)
+		return ret;
+	return (int)data;
+}
+
+static int max8649_set_bits(struct i2c_client *i2c, int reg,
+			    unsigned char mask, unsigned char data)
+{
+	struct max8649_regulator_info *info = i2c_get_clientdata(i2c);
+	unsigned char value;
+	int ret;
+
+	mutex_lock(&info->io_lock);
+	ret = max8649_read_device(i2c, reg, 1, &value);
+	if (ret < 0)
+		goto out;
+	value &= ~mask;
+	value |= data;
+	ret = max8649_write_device(i2c, reg, 1, &value);
+out:
+	mutex_unlock(&info->io_lock);
+	return ret;
+}
+
+static inline int check_range(int min_uV, int max_uV)
+{
+	if ((min_uV < MAX8649_DCDC_VMIN) || (max_uV > MAX8649_DCDC_VMAX)
+		|| (min_uV > max_uV))
+		return -EINVAL;
+	return 0;
+}
+
+static int max8649_list_voltage(struct regulator_dev *rdev, unsigned index)
+{
+	return (MAX8649_DCDC_VMIN + index * MAX8649_DCDC_STEP);
+}
+
+static int max8649_get_voltage(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	unsigned char data;
+	int ret;
+
+	ret = max8649_reg_read(info->i2c, info->vol_reg);
+	if (ret < 0)
+		return ret;
+	data = (unsigned char)ret & MAX8649_VOL_MASK;
+	return max8649_list_voltage(rdev, data);
+}
+
+static int max8649_set_voltage(struct regulator_dev *rdev,
+			       int min_uV, int max_uV)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	unsigned char data, mask;
+
+	if (check_range(min_uV, max_uV)) {
+		dev_err(info->dev, "invalid voltage range (%d, %d) uV\n",
+			min_uV, max_uV);
+		return -EINVAL;
+	}
+	data = (min_uV - MAX8649_DCDC_VMIN + MAX8649_DCDC_STEP - 1)
+		/ MAX8649_DCDC_STEP;
+	mask = MAX8649_VOL_MASK;
+
+	return max8649_set_bits(info->i2c, info->vol_reg, mask, data);
+}
+
+/* EN_PD means pulldown on EN input */
+static int max8649_enable(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	return max8649_set_bits(info->i2c, MAX8649_CONTROL, MAX8649_EN_PD, 0);
+}
+
+/*
+ * Applied internal pulldown resistor on EN input pin.
+ * If pulldown EN pin outside, it would be better.
+ */
+static int max8649_disable(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	return max8649_set_bits(info->i2c, MAX8649_CONTROL, MAX8649_EN_PD,
+				MAX8649_EN_PD);
+}
+
+static int max8649_is_enabled(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret;
+
+	ret = max8649_reg_read(info->i2c, MAX8649_CONTROL);
+	if (ret < 0)
+		return ret;
+	return !((unsigned char)ret & MAX8649_EN_PD);
+}
+
+static int max8649_enable_time(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	int voltage, rate, ret;
+
+	/* get voltage */
+	ret = max8649_reg_read(info->i2c, info->vol_reg);
+	if (ret < 0)
+		return ret;
+	ret &= MAX8649_VOL_MASK;
+	voltage = max8649_list_voltage(rdev, (unsigned char)ret); /* uV */
+
+	/* get rate */
+	ret = max8649_reg_read(info->i2c, MAX8649_RAMP);
+	if (ret < 0)
+		return ret;
+	ret = (ret & MAX8649_RAMP_MASK) >> 5;
+	rate = (32 * 1000) >> ret;	/* uV/uS */
+
+	return (voltage / rate);
+}
+
+static int max8649_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		max8649_set_bits(info->i2c, info->vol_reg, MAX8649_FORCE_PWM,
+				 MAX8649_FORCE_PWM);
+		break;
+	case REGULATOR_MODE_NORMAL:
+		max8649_set_bits(info->i2c, info->vol_reg,
+				 MAX8649_FORCE_PWM, 0);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static unsigned int max8649_get_mode(struct regulator_dev *rdev)
+{
+	struct max8649_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret;
+
+	ret = max8649_reg_read(info->i2c, info->vol_reg);
+	if (ret & MAX8649_FORCE_PWM)
+		return REGULATOR_MODE_FAST;
+	return REGULATOR_MODE_NORMAL;
+}
+
+static struct regulator_ops max8649_dcdc_ops = {
+	.set_voltage	= max8649_set_voltage,
+	.get_voltage	= max8649_get_voltage,
+	.list_voltage	= max8649_list_voltage,
+	.enable		= max8649_enable,
+	.disable	= max8649_disable,
+	.is_enabled	= max8649_is_enabled,
+	.enable_time	= max8649_enable_time,
+	.set_mode	= max8649_set_mode,
+	.get_mode	= max8649_get_mode,
+
+};
+
+static struct regulator_desc dcdc_desc = {
+	.name		= "max8649",
+	.ops		= &max8649_dcdc_ops,
+	.type		= REGULATOR_VOLTAGE,
+	.n_voltages	= 1 << 6,
+	.owner		= THIS_MODULE,
+};
+
+static int __devinit max8649_regulator_probe(struct i2c_client *client,
+					     const struct i2c_device_id *id)
+{
+	struct max8649_platform_data *pdata = client->dev.platform_data;
+	struct max8649_regulator_info *info = NULL;
+	unsigned char data;
+	int ret;
+
+	info = kzalloc(sizeof(struct max8649_regulator_info), GFP_KERNEL);
+	if (!info) {
+		dev_err(&client->dev, "No enough memory\n");
+		return -ENOMEM;
+	}
+
+	info->i2c = client;
+	info->dev = &client->dev;
+	mutex_init(&info->io_lock);
+	i2c_set_clientdata(client, info);
+
+	info->mode = pdata->mode;
+	switch (info->mode) {
+	case 0:
+		info->vol_reg = MAX8649_MODE0;
+		break;
+	case 1:
+		info->vol_reg = MAX8649_MODE1;
+		break;
+	case 2:
+		info->vol_reg = MAX8649_MODE2;
+		break;
+	case 3:
+		info->vol_reg = MAX8649_MODE3;
+		break;
+	default:
+		break;
+	}
+
+	ret = max8649_reg_read(info->i2c, MAX8649_CHIP_ID1);
+	if (ret < 0) {
+		dev_err(info->dev, "Failed to detect ID of MAX8649:%d\n",
+			ret);
+		goto out;
+	}
+	dev_info(info->dev, "Detected MAX8649 (ID:%x)\n", ret);
+
+	/* enable VID0 & VID1 */
+	max8649_set_bits(info->i2c, MAX8649_CONTROL, MAX8649_VID_MASK, 0);
+
+	/* enable/disable external clock synchronization */
+	info->extclk = pdata->extclk;
+	data = (info->extclk) ? MAX8649_SYNC_EXTCLK : 0;
+	max8649_set_bits(info->i2c, info->vol_reg, MAX8649_SYNC_EXTCLK, data);
+	if (info->extclk) {
+		/* set external clock frequency */
+		info->extclk_freq = pdata->extclk_freq;
+		max8649_set_bits(info->i2c, MAX8649_SYNC, MAX8649_EXT_MASK,
+				 info->extclk_freq);
+	}
+
+	if (pdata->ramp_timing) {
+		info->ramp_timing = pdata->ramp_timing;
+		max8649_set_bits(info->i2c, MAX8649_RAMP, MAX8649_RAMP_MASK,
+				 info->ramp_timing << 5);
+	}
+
+	info->ramp_down = pdata->ramp_down;
+	if (info->ramp_down) {
+		max8649_set_bits(info->i2c, MAX8649_RAMP, MAX8649_RAMP_DOWN,
+				 MAX8649_RAMP_DOWN);
+	}
+
+	info->regulator = regulator_register(&dcdc_desc, &client->dev,
+					     pdata->regulator, info);
+	if (IS_ERR(info->regulator)) {
+		dev_err(info->dev, "failed to register regulator %s\n",
+			dcdc_desc.name);
+		ret = PTR_ERR(info->regulator);
+		goto out;
+	}
+
+	dev_info(info->dev, "Max8649 regulator device is detected.\n");
+	return 0;
+out:
+	kfree(info);
+	return ret;
+}
+
+static int __devexit max8649_regulator_remove(struct i2c_client *client)
+{
+	struct max8649_regulator_info *info = i2c_get_clientdata(client);
+
+	if (info) {
+		if (info->regulator)
+			regulator_unregister(info->regulator);
+		kfree(info);
+	}
+	i2c_set_clientdata(client, NULL);
+
+	return 0;
+}
+
+static const struct i2c_device_id max8649_id[] = {
+	{ "max8649", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max8649_id);
+
+static struct i2c_driver max8649_driver = {
+	.probe		= max8649_regulator_probe,
+	.remove		= __devexit_p(max8649_regulator_remove),
+	.driver		= {
+		.name	= "max8649",
+	},
+	.id_table	= max8649_id,
+};
+
+static int __init max8649_init(void)
+{
+	return i2c_add_driver(&max8649_driver);
+}
+subsys_initcall(max8649_init);
+
+static void __exit max8649_exit(void)
+{
+	i2c_del_driver(&max8649_driver);
+}
+module_exit(max8649_exit);
+
+/* Module information */
+MODULE_DESCRIPTION("MAXIM 8649 voltage regulator driver");
+MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
+MODULE_LICENSE("GPL");
+
diff --git a/include/linux/regulator/max8649.h b/include/linux/regulator/max8649.h
new file mode 100644
index 000000000000..417d14ecd5cb
--- /dev/null
+++ b/include/linux/regulator/max8649.h
@@ -0,0 +1,44 @@
+/*
+ * Interface of Maxim max8649
+ *
+ * Copyright (C) 2009-2010 Marvell International Ltd.
+ *      Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_REGULATOR_MAX8649_H
+#define	__LINUX_REGULATOR_MAX8649_H
+
+#include <linux/regulator/machine.h>
+
+enum {
+	MAX8649_EXTCLK_26MHZ = 0,
+	MAX8649_EXTCLK_13MHZ,
+	MAX8649_EXTCLK_19MHZ,	/* 19.2MHz */
+};
+
+enum {
+	MAX8649_RAMP_32MV = 0,
+	MAX8649_RAMP_16MV,
+	MAX8649_RAMP_8MV,
+	MAX8649_RAMP_4MV,
+	MAX8649_RAMP_2MV,
+	MAX8649_RAMP_1MV,
+	MAX8649_RAMP_0_5MV,
+	MAX8649_RAMP_0_25MV,
+};
+
+struct max8649_platform_data {
+	struct regulator_init_data *regulator;
+
+	unsigned	mode:2;		/* bit[1:0] = VID1,VID0 */
+	unsigned	extclk_freq:2;
+	unsigned	extclk:1;
+	unsigned	ramp_timing:3;
+	unsigned	ramp_down:1;
+};
+
+#endif	/* __LINUX_REGULATOR_MAX8649_H */
-- 
cgit v1.2.3


From 193cf4b99113a4550598ba9e8343e591fc062e23 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Tue, 12 Jan 2010 16:18:08 +0200
Subject: libfs: Unexport and kill simple_prepare_write

Remove the EXPORT_UNUSED_SYMBOL of simple_prepare_write

Collapse simple_prepare_write into it's only caller, though
making it simpler and clearer to understand.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/libfs.c         | 22 ++++++----------------
 include/linux/fs.h |  2 --
 2 files changed, 6 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/fs/libfs.c b/fs/libfs.c
index cd88abdcb436..9e50bcf55857 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -338,28 +338,14 @@ int simple_readpage(struct file *file, struct page *page)
 	return 0;
 }
 
-int simple_prepare_write(struct file *file, struct page *page,
-			unsigned from, unsigned to)
-{
-	if (!PageUptodate(page)) {
-		if (to - from != PAGE_CACHE_SIZE)
-			zero_user_segments(page,
-				0, from,
-				to, PAGE_CACHE_SIZE);
-	}
-	return 0;
-}
-
 int simple_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
 	struct page *page;
 	pgoff_t index;
-	unsigned from;
 
 	index = pos >> PAGE_CACHE_SHIFT;
-	from = pos & (PAGE_CACHE_SIZE - 1);
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
@@ -367,7 +353,12 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
 
 	*pagep = page;
 
-	return simple_prepare_write(file, page, from, from+len);
+	if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
+		unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+
+		zero_user_segments(page, 0, from, from + len, PAGE_CACHE_SIZE);
+	}
+	return 0;
 }
 
 /**
@@ -864,7 +855,6 @@ EXPORT_SYMBOL(simple_getattr);
 EXPORT_SYMBOL(simple_link);
 EXPORT_SYMBOL(simple_lookup);
 EXPORT_SYMBOL(simple_pin_fs);
-EXPORT_UNUSED_SYMBOL(simple_prepare_write);
 EXPORT_SYMBOL(simple_readpage);
 EXPORT_SYMBOL(simple_release_fs);
 EXPORT_SYMBOL(simple_rename);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ebb1cd5bc241..2b124c825e38 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2340,8 +2340,6 @@ extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct
 extern int simple_sync_file(struct file *, struct dentry *, int);
 extern int simple_empty(struct dentry *);
 extern int simple_readpage(struct file *file, struct page *page);
-extern int simple_prepare_write(struct file *file, struct page *page,
-			unsigned offset, unsigned to);
 extern int simple_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata);
-- 
cgit v1.2.3


From 270ba5f7c5dac0bfb564aa35a536fb31ad4075bd Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Tue, 26 Jan 2010 14:12:43 +0000
Subject: fs: re-order super_block to remove 16 bytes of padding on 64bit
 builds

re-order structure super_block to remove 16 bytes of alignment padding
on 64bit builds.

This shrinks the size of super_block from 712 to 696 bytes so requiring
one fewer 64 byte cache lines.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>

-----
patch against 2.6.33-rc5
compiled & tested on x86_64 AMDX2 desktop machine.

I've been running with this patch applied for several weeks with no
problems.

regards
Richard
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2b124c825e38..aa76dae673eb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1314,9 +1314,9 @@ extern spinlock_t sb_lock;
 struct super_block {
 	struct list_head	s_list;		/* Keep this first */
 	dev_t			s_dev;		/* search index; _not_ kdev_t */
-	unsigned long		s_blocksize;
-	unsigned char		s_blocksize_bits;
 	unsigned char		s_dirt;
+	unsigned char		s_blocksize_bits;
+	unsigned long		s_blocksize;
 	loff_t			s_maxbytes;	/* Max file size */
 	struct file_system_type	*s_type;
 	const struct super_operations	*s_op;
@@ -1357,16 +1357,16 @@ struct super_block {
 	void 			*s_fs_info;	/* Filesystem private info */
 	fmode_t			s_mode;
 
+	/* Granularity of c/m/atime in ns.
+	   Cannot be worse than a second */
+	u32		   s_time_gran;
+
 	/*
 	 * The next field is for VFS *only*. No filesystems have any business
 	 * even looking at it. You had been warned.
 	 */
 	struct mutex s_vfs_rename_mutex;	/* Kludge */
 
-	/* Granularity of c/m/atime in ns.
-	   Cannot be worse than a second */
-	u32		   s_time_gran;
-
 	/*
 	 * Filesystem subtype.  If non-empty the filesystem type field
 	 * in /proc/mounts will be "type.subtype"
-- 
cgit v1.2.3


From 2ecdc82ef0b03e67ce5ecee79d0d108177a704df Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 26 Jan 2010 17:27:20 +0100
Subject: kill unused invalidate_inode_pages helper

No one is calling this anymore as everyone has switched to
invalidate_mapping_pages long time ago.  Also update a few
references to it in comments.  nfs has two more, but I can't
easily figure what they are actually referring to, so I left
them as-is.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/usb/gadget/f_mass_storage.c | 2 +-
 drivers/usb/gadget/file_storage.c   | 2 +-
 include/linux/fs.h                  | 6 ------
 mm/filemap.c                        | 2 +-
 4 files changed, 3 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/f_mass_storage.c b/drivers/usb/gadget/f_mass_storage.c
index a37640eba434..77fcd1b697e8 100644
--- a/drivers/usb/gadget/f_mass_storage.c
+++ b/drivers/usb/gadget/f_mass_storage.c
@@ -1041,7 +1041,7 @@ static void invalidate_sub(struct fsg_lun *curlun)
 	unsigned long	rc;
 
 	rc = invalidate_mapping_pages(inode->i_mapping, 0, -1);
-	VLDBG(curlun, "invalidate_inode_pages -> %ld\n", rc);
+	VLDBG(curlun, "invalidate_mapping_pages -> %ld\n", rc);
 }
 
 static int do_verify(struct fsg_common *common)
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index 29dfb0277ffb..7dcdbda49cac 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -1448,7 +1448,7 @@ static void invalidate_sub(struct fsg_lun *curlun)
 	unsigned long	rc;
 
 	rc = invalidate_mapping_pages(inode->i_mapping, 0, -1);
-	VLDBG(curlun, "invalidate_inode_pages -> %ld\n", rc);
+	VLDBG(curlun, "invalidate_mapping_pages -> %ld\n", rc);
 }
 
 static int do_verify(struct fsg_dev *fsg)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index aa76dae673eb..d443c9dd3caa 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2058,12 +2058,6 @@ extern int invalidate_inodes(struct super_block *);
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
 					pgoff_t start, pgoff_t end);
 
-static inline unsigned long __deprecated
-invalidate_inode_pages(struct address_space *mapping)
-{
-	return invalidate_mapping_pages(mapping, 0, ~0UL);
-}
-
 static inline void invalidate_remote_inode(struct inode *inode)
 {
 	if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
diff --git a/mm/filemap.c b/mm/filemap.c
index 698ea80f2102..148b52a5bb7e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1117,7 +1117,7 @@ readpage:
 			if (!PageUptodate(page)) {
 				if (page->mapping == NULL) {
 					/*
-					 * invalidate_inode_pages got it
+					 * invalidate_mapping_pages got it
 					 */
 					unlock_page(page);
 					page_cache_release(page);
-- 
cgit v1.2.3


From 495d6c9c6595ec7b37910dfd42634839431d21fd Mon Sep 17 00:00:00 2001
From: Valerie Aurora <vaurora@redhat.com>
Date: Tue, 26 Jan 2010 14:20:47 -0500
Subject: VFS: Clean up shared mount flag propagation

The handling of mount flags in set_mnt_shared() got a little tangled
up during previous cleanups, with the following problems:

* MNT_PNODE_MASK is defined as a literal constant when it should be a
bitwise xor of other MNT_* flags
* set_mnt_shared() clears and then sets MNT_SHARED (part of MNT_PNODE_MASK)
* MNT_PNODE_MASK could use a comment in mount.h
* MNT_PNODE_MASK is a terrible name, change to MNT_SHARED_MASK

This patch fixes these problems.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c        |  2 +-
 fs/pnode.h            |  2 +-
 include/linux/mount.h | 11 ++++++++++-
 3 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/namespace.c b/fs/namespace.c
index 25c1dcf9e9eb..d25d4602ab50 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1538,7 +1538,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 		err = do_remount_sb(sb, flags, data, 0);
 	if (!err) {
 		spin_lock(&vfsmount_lock);
-		mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK;
+		mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK;
 		path->mnt->mnt_flags = mnt_flags;
 		spin_unlock(&vfsmount_lock);
 	}
diff --git a/fs/pnode.h b/fs/pnode.h
index 6c7ef3252a26..1ea4ae1efcd3 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -25,7 +25,7 @@
 
 static inline void set_mnt_shared(struct vfsmount *mnt)
 {
-	mnt->mnt_flags &= ~MNT_PNODE_MASK;
+	mnt->mnt_flags &= ~MNT_SHARED_MASK;
 	mnt->mnt_flags |= MNT_SHARED;
 }
 
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 5d5275364867..375d43a5d802 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -34,7 +34,16 @@ struct mnt_namespace;
 
 #define MNT_SHARED	0x1000	/* if the vfsmount is a shared mount */
 #define MNT_UNBINDABLE	0x2000	/* if the vfsmount is a unbindable mount */
-#define MNT_PNODE_MASK	0x3000	/* propagation flag mask */
+/*
+ * MNT_SHARED_MASK is the set of flags that should be cleared when a
+ * mount becomes shared.  Currently, this is only the flag that says a
+ * mount cannot be bind mounted, since this is how we create a mount
+ * that shares events with another mount.  If you add a new MNT_*
+ * flag, consider how it interacts with shared mounts.
+ */
+#define MNT_SHARED_MASK	(MNT_UNBINDABLE)
+#define MNT_PROPAGATION_MASK	(MNT_SHARED | MNT_UNBINDABLE)
+
 
 struct vfsmount {
 	struct list_head mnt_hash;
-- 
cgit v1.2.3


From 2096f759abcb42200a81d776f597362fd9265024 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 30 Jan 2010 13:16:21 -0500
Subject: New helper: path_is_under(path1, path2)

Analog of is_subdir for vfsmount,dentry pairs, moved from audit_tree.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c         | 24 ++++++++++++++++++++++++
 include/linux/fs.h  |  1 +
 kernel/audit_tree.c | 51 ++++++++++++---------------------------------------
 3 files changed, 37 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/fs/dcache.c b/fs/dcache.c
index 4365998b8df4..74da947b160b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2191,6 +2191,30 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
 	return result;
 }
 
+int path_is_under(struct path *path1, struct path *path2)
+{
+	struct vfsmount *mnt = path1->mnt;
+	struct dentry *dentry = path1->dentry;
+	int res;
+	spin_lock(&vfsmount_lock);
+	if (mnt != path2->mnt) {
+		for (;;) {
+			if (mnt->mnt_parent == mnt) {
+				spin_unlock(&vfsmount_lock);
+				return 0;
+			}
+			if (mnt->mnt_parent == path2->mnt)
+				break;
+			mnt = mnt->mnt_parent;
+		}
+		dentry = mnt->mnt_mountpoint;
+	}
+	res = is_subdir(dentry, path2->dentry);
+	spin_unlock(&vfsmount_lock);
+	return res;
+}
+EXPORT_SYMBOL(path_is_under);
+
 void d_genocide(struct dentry *root)
 {
 	struct dentry *this_parent = root;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d443c9dd3caa..8d53bc17f93f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2126,6 +2126,7 @@ extern struct file * open_exec(const char *);
  
 /* fs/dcache.c -- generic fs support functions */
 extern int is_subdir(struct dentry *, struct dentry *);
+extern int path_is_under(struct path *, struct path *);
 extern ino_t find_inode_number(struct dentry *, struct qstr *);
 
 #include <linux/err.h>
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 4b05bd9479db..f09b42d9c32d 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -603,22 +603,6 @@ skip_it:
 	mutex_unlock(&audit_filter_mutex);
 }
 
-static int is_under(struct vfsmount *mnt, struct dentry *dentry,
-		    struct path *path)
-{
-	if (mnt != path->mnt) {
-		for (;;) {
-			if (mnt->mnt_parent == mnt)
-				return 0;
-			if (mnt->mnt_parent == path->mnt)
-					break;
-			mnt = mnt->mnt_parent;
-		}
-		dentry = mnt->mnt_mountpoint;
-	}
-	return is_subdir(dentry, path->dentry);
-}
-
 int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
 {
 
@@ -714,29 +698,24 @@ int audit_tag_tree(char *old, char *new)
 {
 	struct list_head cursor, barrier;
 	int failed = 0;
-	struct path path;
+	struct path path1, path2;
 	struct vfsmount *tagged;
 	struct list_head list;
-	struct vfsmount *mnt;
-	struct dentry *dentry;
 	int err;
 
-	err = kern_path(new, 0, &path);
+	err = kern_path(new, 0, &path2);
 	if (err)
 		return err;
-	tagged = collect_mounts(&path);
-	path_put(&path);
+	tagged = collect_mounts(&path2);
+	path_put(&path2);
 	if (!tagged)
 		return -ENOMEM;
 
-	err = kern_path(old, 0, &path);
+	err = kern_path(old, 0, &path1);
 	if (err) {
 		drop_collected_mounts(tagged);
 		return err;
 	}
-	mnt = mntget(path.mnt);
-	dentry = dget(path.dentry);
-	path_put(&path);
 
 	list_add_tail(&list, &tagged->mnt_list);
 
@@ -747,6 +726,7 @@ int audit_tag_tree(char *old, char *new)
 	while (cursor.next != &tree_list) {
 		struct audit_tree *tree;
 		struct vfsmount *p;
+		int good_one = 0;
 
 		tree = container_of(cursor.next, struct audit_tree, list);
 		get_tree(tree);
@@ -754,23 +734,17 @@ int audit_tag_tree(char *old, char *new)
 		list_add(&cursor, &tree->list);
 		mutex_unlock(&audit_filter_mutex);
 
-		err = kern_path(tree->pathname, 0, &path);
-		if (err) {
-			put_tree(tree);
-			mutex_lock(&audit_filter_mutex);
-			continue;
+		err = kern_path(tree->pathname, 0, &path2);
+		if (!err) {
+			good_one = path_is_under(&path1, &path2);
+			path_put(&path2);
 		}
 
-		spin_lock(&vfsmount_lock);
-		if (!is_under(mnt, dentry, &path)) {
-			spin_unlock(&vfsmount_lock);
-			path_put(&path);
+		if (!good_one) {
 			put_tree(tree);
 			mutex_lock(&audit_filter_mutex);
 			continue;
 		}
-		spin_unlock(&vfsmount_lock);
-		path_put(&path);
 
 		list_for_each_entry(p, &list, mnt_list) {
 			failed = tag_chunk(p->mnt_root->d_inode, tree);
@@ -820,8 +794,7 @@ int audit_tag_tree(char *old, char *new)
 	list_del(&cursor);
 	list_del(&list);
 	mutex_unlock(&audit_filter_mutex);
-	dput(dentry);
-	mntput(mnt);
+	path_put(&path1);
 	drop_collected_mounts(tagged);
 	return failed;
 }
-- 
cgit v1.2.3


From 1f707137b55764740981d022d29c622832a61880 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 30 Jan 2010 22:51:25 -0500
Subject: new helper: iterate_mounts()

apply function to vfsmounts in set returned by collect_mounts(),
stop if it returns non-zero.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c      | 15 +++++++++++++++
 include/linux/fs.h  |  3 ++-
 kernel/audit_tree.c | 49 ++++++++++++++++---------------------------------
 3 files changed, 33 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/fs/namespace.c b/fs/namespace.c
index d25d4602ab50..d5906c19e08e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1246,6 +1246,21 @@ void drop_collected_mounts(struct vfsmount *mnt)
 	release_mounts(&umount_list);
 }
 
+int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
+		   struct vfsmount *root)
+{
+	struct vfsmount *mnt;
+	int res = f(root, arg);
+	if (res)
+		return res;
+	list_for_each_entry(mnt, &root->mnt_list, mnt_list) {
+		res = f(mnt, arg);
+		if (res)
+			return res;
+	}
+	return 0;
+}
+
 static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
 {
 	struct vfsmount *p;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8d53bc17f93f..e764f247d0ab 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1794,7 +1794,8 @@ extern int may_umount(struct vfsmount *);
 extern long do_mount(char *, char *, char *, unsigned long, void *);
 extern struct vfsmount *collect_mounts(struct path *);
 extern void drop_collected_mounts(struct vfsmount *);
-
+extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
+			  struct vfsmount *);
 extern int vfs_statfs(struct dentry *, struct kstatfs *);
 
 extern int current_umask(void);
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index f09b42d9c32d..028e85663f27 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -548,6 +548,11 @@ int audit_remove_tree_rule(struct audit_krule *rule)
 	return 0;
 }
 
+static int compare_root(struct vfsmount *mnt, void *arg)
+{
+	return mnt->mnt_root->d_inode == arg;
+}
+
 void audit_trim_trees(void)
 {
 	struct list_head cursor;
@@ -559,7 +564,6 @@ void audit_trim_trees(void)
 		struct path path;
 		struct vfsmount *root_mnt;
 		struct node *node;
-		struct list_head list;
 		int err;
 
 		tree = container_of(cursor.next, struct audit_tree, list);
@@ -577,24 +581,16 @@ void audit_trim_trees(void)
 		if (!root_mnt)
 			goto skip_it;
 
-		list_add_tail(&list, &root_mnt->mnt_list);
 		spin_lock(&hash_lock);
 		list_for_each_entry(node, &tree->chunks, list) {
-			struct audit_chunk *chunk = find_chunk(node);
-			struct inode *inode = chunk->watch.inode;
-			struct vfsmount *mnt;
+			struct inode *inode = find_chunk(node)->watch.inode;
 			node->index |= 1U<<31;
-			list_for_each_entry(mnt, &list, mnt_list) {
-				if (mnt->mnt_root->d_inode == inode) {
-					node->index &= ~(1U<<31);
-					break;
-				}
-			}
+			if (iterate_mounts(compare_root, inode, root_mnt))
+				node->index &= ~(1U<<31);
 		}
 		spin_unlock(&hash_lock);
 		trim_marked(tree);
 		put_tree(tree);
-		list_del_init(&list);
 		drop_collected_mounts(root_mnt);
 skip_it:
 		mutex_lock(&audit_filter_mutex);
@@ -622,13 +618,17 @@ void audit_put_tree(struct audit_tree *tree)
 	put_tree(tree);
 }
 
+static int tag_mount(struct vfsmount *mnt, void *arg)
+{
+	return tag_chunk(mnt->mnt_root->d_inode, arg);
+}
+
 /* called with audit_filter_mutex */
 int audit_add_tree_rule(struct audit_krule *rule)
 {
 	struct audit_tree *seed = rule->tree, *tree;
 	struct path path;
-	struct vfsmount *mnt, *p;
-	struct list_head list;
+	struct vfsmount *mnt;
 	int err;
 
 	list_for_each_entry(tree, &tree_list, list) {
@@ -654,16 +654,9 @@ int audit_add_tree_rule(struct audit_krule *rule)
 		err = -ENOMEM;
 		goto Err;
 	}
-	list_add_tail(&list, &mnt->mnt_list);
 
 	get_tree(tree);
-	list_for_each_entry(p, &list, mnt_list) {
-		err = tag_chunk(p->mnt_root->d_inode, tree);
-		if (err)
-			break;
-	}
-
-	list_del(&list);
+	err = iterate_mounts(tag_mount, tree, mnt);
 	drop_collected_mounts(mnt);
 
 	if (!err) {
@@ -700,7 +693,6 @@ int audit_tag_tree(char *old, char *new)
 	int failed = 0;
 	struct path path1, path2;
 	struct vfsmount *tagged;
-	struct list_head list;
 	int err;
 
 	err = kern_path(new, 0, &path2);
@@ -717,15 +709,12 @@ int audit_tag_tree(char *old, char *new)
 		return err;
 	}
 
-	list_add_tail(&list, &tagged->mnt_list);
-
 	mutex_lock(&audit_filter_mutex);
 	list_add(&barrier, &tree_list);
 	list_add(&cursor, &barrier);
 
 	while (cursor.next != &tree_list) {
 		struct audit_tree *tree;
-		struct vfsmount *p;
 		int good_one = 0;
 
 		tree = container_of(cursor.next, struct audit_tree, list);
@@ -746,12 +735,7 @@ int audit_tag_tree(char *old, char *new)
 			continue;
 		}
 
-		list_for_each_entry(p, &list, mnt_list) {
-			failed = tag_chunk(p->mnt_root->d_inode, tree);
-			if (failed)
-				break;
-		}
-
+		failed = iterate_mounts(tag_mount, tree, tagged);
 		if (failed) {
 			put_tree(tree);
 			mutex_lock(&audit_filter_mutex);
@@ -792,7 +776,6 @@ int audit_tag_tree(char *old, char *new)
 	}
 	list_del(&barrier);
 	list_del(&cursor);
-	list_del(&list);
 	mutex_unlock(&audit_filter_mutex);
 	path_put(&path1);
 	drop_collected_mounts(tagged);
-- 
cgit v1.2.3


From 9f5596af44514f99e3a654a4f7cb813354b9e516 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 5 Feb 2010 00:40:25 -0500
Subject: take check for new events in namespace (guts of mounts_poll()) to
 namespace.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c                | 15 +++++++++++++++
 fs/proc/base.c                | 10 ++--------
 include/linux/mnt_namespace.h |  1 +
 3 files changed, 18 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/namespace.c b/fs/namespace.c
index d5906c19e08e..970fe79d7867 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -737,6 +737,21 @@ static void m_stop(struct seq_file *m, void *v)
 	up_read(&namespace_sem);
 }
 
+int mnt_had_events(struct proc_mounts *p)
+{
+	struct mnt_namespace *ns = p->ns;
+	int res = 0;
+
+	spin_lock(&vfsmount_lock);
+	if (p->event != ns->event) {
+		p->event = ns->event;
+		res = 1;
+	}
+	spin_unlock(&vfsmount_lock);
+
+	return res;
+}
+
 struct proc_fs_info {
 	int flag;
 	const char *str;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 58324c299165..746895ddfda1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -647,17 +647,11 @@ static int mounts_release(struct inode *inode, struct file *file)
 static unsigned mounts_poll(struct file *file, poll_table *wait)
 {
 	struct proc_mounts *p = file->private_data;
-	struct mnt_namespace *ns = p->ns;
 	unsigned res = POLLIN | POLLRDNORM;
 
-	poll_wait(file, &ns->poll, wait);
-
-	spin_lock(&vfsmount_lock);
-	if (p->event != ns->event) {
-		p->event = ns->event;
+	poll_wait(file, &p->ns->poll, wait);
+	if (mnt_had_events(p))
 		res |= POLLERR | POLLPRI;
-	}
-	spin_unlock(&vfsmount_lock);
 
 	return res;
 }
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index d74785c2393a..0b89efc6f215 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -35,6 +35,7 @@ static inline void get_mnt_ns(struct mnt_namespace *ns)
 extern const struct seq_operations mounts_op;
 extern const struct seq_operations mountinfo_op;
 extern const struct seq_operations mountstats_op;
+extern int mnt_had_events(struct proc_mounts *);
 
 #endif
 #endif
-- 
cgit v1.2.3


From 47cd813f2984569570021ce3d34cdf9cb20aa6a2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 5 Feb 2010 02:01:14 -0500
Subject: Take vfsmount_lock to fs/internal.h

no more users left outside of fs/*.c (and very few outside of
fs/namespace.c, actually)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h         | 2 ++
 include/linux/mount.h | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/internal.h b/fs/internal.h
index e96a1667d749..8a03a5447bdf 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -70,6 +70,8 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
 
 extern void __init mnt_init(void);
 
+extern spinlock_t vfsmount_lock;
+
 /*
  * fs_struct.c
  */
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 375d43a5d802..163896137ab5 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -132,7 +132,6 @@ extern int do_add_mount(struct vfsmount *newmnt, struct path *path,
 
 extern void mark_mounts_for_expiry(struct list_head *mounts);
 
-extern spinlock_t vfsmount_lock;
 extern dev_t name_to_dev_t(char *name);
 
 #endif /* _LINUX_MOUNT_H */
-- 
cgit v1.2.3


From 8089352a13b785d4e0df63d87bd2b71c76bb9aee Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 5 Feb 2010 09:30:46 -0500
Subject: Mirror MS_KERNMOUNT in ->mnt_flags

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c        | 2 +-
 fs/super.c            | 3 +++
 include/linux/mount.h | 2 ++
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/namespace.c b/fs/namespace.c
index b0b15cc2117c..ffa3843404e0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1701,7 +1701,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
 {
 	int err;
 
-	mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD);
+	mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
 
 	down_write(&namespace_sem);
 	/* Something was mounted here while we slept */
diff --git a/fs/super.c b/fs/super.c
index 903896ec7c73..f35ac6022109 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -937,6 +937,9 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 	if (!mnt)
 		goto out;
 
+	if (flags & MS_KERNMOUNT)
+		mnt->mnt_flags = MNT_INTERNAL;
+
 	if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
 		secdata = alloc_secdata();
 		if (!secdata)
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 163896137ab5..ca726ebf50a3 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -45,6 +45,8 @@ struct mnt_namespace;
 #define MNT_PROPAGATION_MASK	(MNT_SHARED | MNT_UNBINDABLE)
 
 
+#define MNT_INTERNAL	0x4000
+
 struct vfsmount {
 	struct list_head mnt_hash;
 	struct vfsmount *mnt_parent;	/* fs we are mounted on */
-- 
cgit v1.2.3


From db1f05bb85d7966b9176e293f3ceead1cb8b5d79 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 10 Feb 2010 12:15:53 +0100
Subject: vfs: add NOFOLLOW flag to umount(2)

Add a new UMOUNT_NOFOLLOW flag to umount(2).  This is needed to prevent
symlink attacks in unprivileged unmounts (fuse, samba, ncpfs).

Additionally, return -EINVAL if an unknown flag is used (and specify
an explicitly unused flag: UMOUNT_UNUSED).  This makes it possible for
the caller to determine if a flag is supported or not.

CC: Eugene Teo <eugene@redhat.com>
CC: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c     | 9 ++++++++-
 include/linux/fs.h | 2 ++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/namespace.c b/fs/namespace.c
index ffa3843404e0..8174c8ab5c70 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1136,8 +1136,15 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
 {
 	struct path path;
 	int retval;
+	int lookup_flags = 0;
 
-	retval = user_path(name, &path);
+	if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
+		return -EINVAL;
+
+	if (!(flags & UMOUNT_NOFOLLOW))
+		lookup_flags |= LOOKUP_FOLLOW;
+
+	retval = user_path_at(AT_FDCWD, name, lookup_flags, &path);
 	if (retval)
 		goto out;
 	retval = -EINVAL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e764f247d0ab..5b3182c7eb5f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1305,6 +1305,8 @@ extern int send_sigurg(struct fown_struct *fown);
 #define MNT_FORCE	0x00000001	/* Attempt to forcibily umount */
 #define MNT_DETACH	0x00000002	/* Just detach from the tree */
 #define MNT_EXPIRE	0x00000004	/* Mark for expiry */
+#define UMOUNT_NOFOLLOW	0x00000008	/* Don't follow symlink on umount */
+#define UMOUNT_UNUSED	0x80000000	/* Flag guaranteed to be unused */
 
 extern struct list_head super_blocks;
 extern spinlock_t sb_lock;
-- 
cgit v1.2.3


From 4fa004373133ece3d9b1c0a7e243b0e53760b165 Mon Sep 17 00:00:00 2001
From: Sujith <Sujith.Manoharan@atheros.com>
Date: Mon, 1 Mar 2010 14:42:57 +0530
Subject: mac80211: Fix HT rate control configuration

Handling HT configuration changes involved setting the channel
with the new HT parameters and then issuing a rate_update()
notification to the driver.

This behavior changed after the off-channel changes. Now, the channel
is not updated with the new HT params in enable_ht() - instead, it
is now done when the scan work terminates. This results in the driver
depending on stale information, defaulting to non-HT mode always.

Fix this by passing the new channel type to the driver.

Cc: stable@kernel.org
Signed-off-by: Sujith <Sujith.Manoharan@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/rc.c | 6 +++---
 include/net/mac80211.h              | 3 ++-
 net/mac80211/mlme.c                 | 3 ++-
 net/mac80211/rate.h                 | 5 +++--
 4 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c
index ac34a055c713..0e79e58cf4c9 100644
--- a/drivers/net/wireless/ath/ath9k/rc.c
+++ b/drivers/net/wireless/ath/ath9k/rc.c
@@ -1323,7 +1323,7 @@ static void ath_rate_init(void *priv, struct ieee80211_supported_band *sband,
 
 static void ath_rate_update(void *priv, struct ieee80211_supported_band *sband,
 			    struct ieee80211_sta *sta, void *priv_sta,
-			    u32 changed)
+			    u32 changed, enum nl80211_channel_type oper_chan_type)
 {
 	struct ath_softc *sc = priv;
 	struct ath_rate_priv *ath_rc_priv = priv_sta;
@@ -1340,8 +1340,8 @@ static void ath_rate_update(void *priv, struct ieee80211_supported_band *sband,
 		if (sc->sc_ah->opmode != NL80211_IFTYPE_STATION)
 			return;
 
-		if (sc->hw->conf.channel_type == NL80211_CHAN_HT40MINUS ||
-		    sc->hw->conf.channel_type == NL80211_CHAN_HT40PLUS)
+		if (oper_chan_type == NL80211_CHAN_HT40MINUS ||
+		    oper_chan_type == NL80211_CHAN_HT40PLUS)
 			oper_cw40 = true;
 
 		oper_sgi40 = (sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_40) ?
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 80eb7cc42ce9..45d7d44d7cbe 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2426,7 +2426,8 @@ struct rate_control_ops {
 			  struct ieee80211_sta *sta, void *priv_sta);
 	void (*rate_update)(void *priv, struct ieee80211_supported_band *sband,
 			    struct ieee80211_sta *sta,
-			    void *priv_sta, u32 changed);
+			    void *priv_sta, u32 changed,
+			    enum nl80211_channel_type oper_chan_type);
 	void (*free_sta)(void *priv, struct ieee80211_sta *sta,
 			 void *priv_sta);
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5a268761e4c5..0ab284c32135 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -177,7 +177,8 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 		sta = sta_info_get(sdata, bssid);
 		if (sta)
 			rate_control_rate_update(local, sband, sta,
-						 IEEE80211_RC_HT_CHANGED);
+						 IEEE80211_RC_HT_CHANGED,
+						 local->oper_channel_type);
 		rcu_read_unlock();
         }
 
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index b6108bca96d4..065a96190e32 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -66,7 +66,8 @@ static inline void rate_control_rate_init(struct sta_info *sta)
 
 static inline void rate_control_rate_update(struct ieee80211_local *local,
 				    struct ieee80211_supported_band *sband,
-				    struct sta_info *sta, u32 changed)
+				    struct sta_info *sta, u32 changed,
+				    enum nl80211_channel_type oper_chan_type)
 {
 	struct rate_control_ref *ref = local->rate_ctrl;
 	struct ieee80211_sta *ista = &sta->sta;
@@ -74,7 +75,7 @@ static inline void rate_control_rate_update(struct ieee80211_local *local,
 
 	if (ref && ref->ops->rate_update)
 		ref->ops->rate_update(ref->priv, sband, ista,
-				      priv_sta, changed);
+				      priv_sta, changed, oper_chan_type);
 }
 
 static inline void *rate_control_alloc_sta(struct rate_control_ref *ref,
-- 
cgit v1.2.3


From b30477d5e2961bfd90ad4146c517871ca8a6bebc Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Wed, 3 Mar 2010 11:05:55 +0100
Subject: ALSA: timer - pass real event in snd_timer_notify1() to instance
 callback

Do not use hardcoded SNDRV_TIMER_EVENT_START value.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/asound.h | 2 +-
 sound/core/timer.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/asound.h b/include/sound/asound.h
index 1f57bb92eb5a..098595500632 100644
--- a/include/sound/asound.h
+++ b/include/sound/asound.h
@@ -544,7 +544,7 @@ struct snd_rawmidi_status {
  *  Timer section - /dev/snd/timer
  */
 
-#define SNDRV_TIMER_VERSION		SNDRV_PROTOCOL_VERSION(2, 0, 5)
+#define SNDRV_TIMER_VERSION		SNDRV_PROTOCOL_VERSION(2, 0, 6)
 
 enum {
 	SNDRV_TIMER_CLASS_NONE = -1,
diff --git a/sound/core/timer.c b/sound/core/timer.c
index 8f8b17ac074d..73943651caed 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -393,7 +393,7 @@ static void snd_timer_notify1(struct snd_timer_instance *ti, int event)
 	    event == SNDRV_TIMER_EVENT_CONTINUE)
 		resolution = snd_timer_resolution(ti);
 	if (ti->ccallback)
-		ti->ccallback(ti, SNDRV_TIMER_EVENT_START, &tstamp, resolution);
+		ti->ccallback(ti, event, &tstamp, resolution);
 	if (ti->flags & SNDRV_TIMER_IFLG_SLAVE)
 		return;
 	timer = ti->timer;
-- 
cgit v1.2.3


From c839d30a41dd92eb32d7fcfa2b4e99042fc64bf2 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Wed, 3 Mar 2010 04:46:50 +0000
Subject: net: add scheduler sync hint to tcp_prequeue().

Decreases the odds wakee will suffer from frequent cache misses.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 56f0aec40ed6..75be5a28815d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -939,7 +939,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
 
 		tp->ucopy.memory = 0;
 	} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
-		wake_up_interruptible_poll(sk->sk_sleep,
+		wake_up_interruptible_sync_poll(sk->sk_sleep,
 					   POLLIN | POLLRDNORM | POLLRDBAND);
 		if (!inet_csk_ack_scheduled(sk))
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-- 
cgit v1.2.3


From db1466b3e1bd1727375cdbfcbea4bcce2f860f61 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 Mar 2010 07:46:56 -0800
Subject: rcu: Use wrapper function instead of exporting tasklist_lock

Lockdep-RCU commit d11c563d exported tasklist_lock, which is not
a good thing.  This patch instead exports a function that uses
lockdep to check whether tasklist_lock is held.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
Cc: Christoph Hellwig <hch@lst.de>
LKML-Reference: <1267631219-8713-1-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cred.h  | 2 +-
 include/linux/sched.h | 4 ++++
 kernel/exit.c         | 2 +-
 kernel/fork.c         | 9 ++++++++-
 kernel/pid.c          | 4 +++-
 5 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4db09f89b637..52507c3e1387 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -280,7 +280,7 @@ static inline void put_cred(const struct cred *_cred)
  * task or by holding tasklist_lock to prevent it from being unlinked.
  */
 #define __task_cred(task) \
-	((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock))))
+	((const struct cred *)(rcu_dereference_check((task)->real_cred, rcu_read_lock_held() || lockdep_tasklist_lock_is_held())))
 
 /**
  * get_task_cred - Get another task's objective credentials
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0eef87b58ea5..a47af2064dcc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -258,6 +258,10 @@ extern spinlock_t mmlist_lock;
 
 struct task_struct;
 
+#ifdef CONFIG_PROVE_RCU
+extern int lockdep_tasklist_lock_is_held(void);
+#endif /* #ifdef CONFIG_PROVE_RCU */
+
 extern void sched_init(void);
 extern void sched_init_smp(void);
 extern asmlinkage void schedule_tail(struct task_struct *prev);
diff --git a/kernel/exit.c b/kernel/exit.c
index 45ed043b8bf5..fed3a4db6f04 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -87,7 +87,7 @@ static void __exit_signal(struct task_struct *tsk)
 
 	sighand = rcu_dereference_check(tsk->sighand,
 					rcu_read_lock_held() ||
-					lockdep_is_held(&tasklist_lock));
+					lockdep_tasklist_lock_is_held());
 	spin_lock(&sighand->siglock);
 
 	posix_cpu_timers_exit(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 17bbf093356d..8691c540a470 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -86,7 +86,14 @@ int max_threads;		/* tunable limit on nr_threads */
 DEFINE_PER_CPU(unsigned long, process_counts) = 0;
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
-EXPORT_SYMBOL_GPL(tasklist_lock);
+
+#ifdef CONFIG_PROVE_RCU
+int lockdep_tasklist_lock_is_held(void)
+{
+	return lockdep_is_held(&tasklist_lock);
+}
+EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
+#endif /* #ifdef CONFIG_PROVE_RCU */
 
 int nr_processes(void)
 {
diff --git a/kernel/pid.c b/kernel/pid.c
index b08e697cd83f..b6064405f367 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -367,7 +367,9 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
 	struct task_struct *result = NULL;
 	if (pid) {
 		struct hlist_node *first;
-		first = rcu_dereference_check(pid->tasks[type].first, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock));
+		first = rcu_dereference_check(pid->tasks[type].first,
+					      rcu_read_lock_held() ||
+					      lockdep_tasklist_lock_is_held());
 		if (first)
 			result = hlist_entry(first, struct task_struct, pids[(type)].node);
 	}
-- 
cgit v1.2.3


From 5ed42b8113667c06a6ff2c72717395b5044d30a1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 Mar 2010 07:46:58 -0800
Subject: rcu, cgroup: Relax the check in task_subsys_state() as early boot is
 now handled by lockdep-RCU

This patch removes the check for !rcu_scheduler_active because
this check has been incorporated into rcu_dereference_check().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1267631219-8713-3-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cgroup.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index a73e1ced09b8..c9bbcb2a75ae 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -489,7 +489,6 @@ static inline struct cgroup_subsys_state *task_subsys_state(
 {
 	return rcu_dereference_check(task->cgroups->subsys[subsys_id],
 				     rcu_read_lock_held() ||
-				     !rcu_scheduler_active ||
 				     cgroup_lock_is_held());
 }
 
-- 
cgit v1.2.3


From e6033e3b307fcfae08408e0673266db38392bda4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 Mar 2010 17:50:16 -0800
Subject: rcu: Make rcu_read_lock_sched_held() handle !PREEMPT

The rcu_read_lock_sched_held() needs to unconditionally return
the value "1" in a !PREEMPT kernel, because under !PREEMPT,
-all- kernel code is implicitly preempt-disabled.  This patch
makes this happen.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1267667418-32233-1-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index c84373626336..e22960ecb71a 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -136,6 +136,7 @@ static inline int rcu_read_lock_bh_held(void)
  * can prove otherwise.  Note that disabling of preemption (including
  * disabling irqs) counts as an RCU-sched read-side critical section.
  */
+#ifdef CONFIG_PREEMPT
 static inline int rcu_read_lock_sched_held(void)
 {
 	int lockdep_opinion = 0;
@@ -144,6 +145,12 @@ static inline int rcu_read_lock_sched_held(void)
 		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
 	return lockdep_opinion || preempt_count() != 0 || !rcu_scheduler_active;
 }
+#else /* #ifdef CONFIG_PREEMPT */
+static inline int rcu_read_lock_sched_held(void)
+{
+	return 1;
+}
+#endif /* #else #ifdef CONFIG_PREEMPT */
 
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
@@ -164,10 +171,17 @@ static inline int rcu_read_lock_bh_held(void)
 	return 1;
 }
 
+#ifdef CONFIG_PREEMPT
 static inline int rcu_read_lock_sched_held(void)
 {
 	return preempt_count() != 0 || !rcu_scheduler_active;
 }
+#else /* #ifdef CONFIG_PREEMPT */
+static inline int rcu_read_lock_sched_held(void)
+{
+	return 1;
+}
+#endif /* #else #ifdef CONFIG_PREEMPT */
 
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
-- 
cgit v1.2.3


From 8d53dd546f36073e0d29b0cfc24c665db301e3e7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 Mar 2010 17:50:18 -0800
Subject: rcu, ftrace: Fix RCU lockdep splat in ftrace_perf_buf_prepare()

Change the pair of rcu_dereference() calls in
ftrace_perf_buf_prepare() to rcu_dereference_sched().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1267667418-32233-3-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/ftrace.h             | 4 ++--
 kernel/trace/trace_event_profile.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 0804cd594803..601ad7744247 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -699,9 +699,9 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  *	__cpu = smp_processor_id();
  *
  *	if (in_nmi())
- *		trace_buf = rcu_dereference(perf_trace_buf_nmi);
+ *		trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
  *	else
- *		trace_buf = rcu_dereference(perf_trace_buf);
+ *		trace_buf = rcu_dereference_sched(perf_trace_buf);
  *
  *	if (!trace_buf)
  *		goto end;
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index f0d693005075..c1cc3ab633de 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -138,9 +138,9 @@ __kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
 	cpu = smp_processor_id();
 
 	if (in_nmi())
-		trace_buf = rcu_dereference(perf_trace_buf_nmi);
+		trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
 	else
-		trace_buf = rcu_dereference(perf_trace_buf);
+		trace_buf = rcu_dereference_sched(perf_trace_buf);
 
 	if (!trace_buf)
 		goto err;
-- 
cgit v1.2.3


From 54dbf96c921513bf98484a20ef366d51944a4c4d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 3 Mar 2010 07:46:57 -0800
Subject: rcu: Suppress RCU lockdep warnings during early boot

RCU is used during very early boot, before RCU and lockdep have
been initialized.  So make the underlying primitives
(rcu_read_lock_held(), rcu_read_lock_bh_held(),
rcu_read_lock_sched_held(), and rcu_dereference_check()) check
for early boot via the rcu_scheduler_active flag.  This will
suppress false positives.

Also introduce a debug_lockdep_rcu_enabled() static inline
helper function, which tags the CONTINUE_PROVE_RCU case as
likely(), as suggested by Ingo Molnar.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1267631219-8713-2-git-send-email-paulmck@linux.vnet.ibm.com>
[ v2: removed incomplete debug_lockdep_rcu_update() bits ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e22960ecb71a..75921b83c0ab 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -97,6 +97,11 @@ extern struct lockdep_map rcu_sched_lock_map;
 # define rcu_read_release_sched() \
 		lock_release(&rcu_sched_lock_map, 1, _THIS_IP_)
 
+static inline int debug_lockdep_rcu_enabled(void)
+{
+	return likely(rcu_scheduler_active && debug_locks);
+}
+
 /**
  * rcu_read_lock_held - might we be in RCU read-side critical section?
  *
@@ -104,12 +109,14 @@ extern struct lockdep_map rcu_sched_lock_map;
  * an RCU read-side critical section.  In absence of CONFIG_PROVE_LOCKING,
  * this assumes we are in an RCU read-side critical section unless it can
  * prove otherwise.
+ *
+ * Check rcu_scheduler_active to prevent false positives during boot.
  */
 static inline int rcu_read_lock_held(void)
 {
-	if (debug_locks)
-		return lock_is_held(&rcu_lock_map);
-	return 1;
+	if (!debug_lockdep_rcu_enabled())
+		return 1;
+	return lock_is_held(&rcu_lock_map);
 }
 
 /**
@@ -119,12 +126,14 @@ static inline int rcu_read_lock_held(void)
  * an RCU-bh read-side critical section.  In absence of CONFIG_PROVE_LOCKING,
  * this assumes we are in an RCU-bh read-side critical section unless it can
  * prove otherwise.
+ *
+ * Check rcu_scheduler_active to prevent false positives during boot.
  */
 static inline int rcu_read_lock_bh_held(void)
 {
-	if (debug_locks)
-		return lock_is_held(&rcu_bh_lock_map);
-	return 1;
+	if (!debug_lockdep_rcu_enabled())
+		return 1;
+	return lock_is_held(&rcu_bh_lock_map);
 }
 
 /**
@@ -135,15 +144,19 @@ static inline int rcu_read_lock_bh_held(void)
  * this assumes we are in an RCU-sched read-side critical section unless it
  * can prove otherwise.  Note that disabling of preemption (including
  * disabling irqs) counts as an RCU-sched read-side critical section.
+ *
+ * Check rcu_scheduler_active to prevent false positives during boot.
  */
 #ifdef CONFIG_PREEMPT
 static inline int rcu_read_lock_sched_held(void)
 {
 	int lockdep_opinion = 0;
 
+	if (!debug_lockdep_rcu_enabled())
+		return 1;
 	if (debug_locks)
 		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
-	return lockdep_opinion || preempt_count() != 0 || !rcu_scheduler_active;
+	return lockdep_opinion || preempt_count() != 0;
 }
 #else /* #ifdef CONFIG_PREEMPT */
 static inline int rcu_read_lock_sched_held(void)
@@ -174,7 +187,7 @@ static inline int rcu_read_lock_bh_held(void)
 #ifdef CONFIG_PREEMPT
 static inline int rcu_read_lock_sched_held(void)
 {
-	return preempt_count() != 0 || !rcu_scheduler_active;
+	return !rcu_scheduler_active || preempt_count() != 0;
 }
 #else /* #ifdef CONFIG_PREEMPT */
 static inline int rcu_read_lock_sched_held(void)
@@ -198,7 +211,7 @@ static inline int rcu_read_lock_sched_held(void)
  */
 #define rcu_dereference_check(p, c) \
 	({ \
-		if (debug_locks && !(c)) \
+		if (debug_lockdep_rcu_enabled() && !(c)) \
 			lockdep_rcu_dereference(__FILE__, __LINE__); \
 		rcu_dereference_raw(p); \
 	})
-- 
cgit v1.2.3


From 9df93939b735dd273e49cbee290b9f4738500ef4 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 6 Jan 2010 21:58:48 +0100
Subject: ext3: Use bitops to read/modify EXT3_I(inode)->i_state

At several places we modify EXT3_I(inode)->i_state without holding i_mutex
(ext3_release_file, ext3_bmap, ext3_journalled_writepage, ext3_do_update_inode,
...). These modifications are racy and we can lose updates to i_state. So
convert handling of i_state to use bitops which are atomic.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/file.c            |  4 ++--
 fs/ext3/inode.c           | 18 +++++++++---------
 fs/ext3/xattr.c           | 14 +++++++-------
 include/linux/ext3_fs.h   | 33 +++++++++++++++++++++++++--------
 include/linux/ext3_fs_i.h |  2 +-
 5 files changed, 44 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 388bbdfa0b4e..a86d3302cdc2 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -33,9 +33,9 @@
  */
 static int ext3_release_file (struct inode * inode, struct file * filp)
 {
-	if (EXT3_I(inode)->i_state & EXT3_STATE_FLUSH_ON_CLOSE) {
+	if (ext3_test_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE)) {
 		filemap_flush(inode->i_mapping);
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_FLUSH_ON_CLOSE;
+		ext3_clear_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
 	}
 	/* if we are the last writer on the inode, drop the block reservation */
 	if ((filp->f_mode & FMODE_WRITE) &&
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 455e6e6e5cb9..44b53386ab8b 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1378,7 +1378,7 @@ static int ext3_journalled_write_end(struct file *file,
 	 */
 	if (pos + len > inode->i_size && ext3_can_truncate(inode))
 		ext3_orphan_add(handle, inode);
-	EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
+	ext3_set_inode_state(inode, EXT3_STATE_JDATA);
 	if (inode->i_size > EXT3_I(inode)->i_disksize) {
 		EXT3_I(inode)->i_disksize = inode->i_size;
 		ret2 = ext3_mark_inode_dirty(handle, inode);
@@ -1417,7 +1417,7 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
 	journal_t *journal;
 	int err;
 
-	if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) {
+	if (ext3_test_inode_state(inode, EXT3_STATE_JDATA)) {
 		/*
 		 * This is a REALLY heavyweight approach, but the use of
 		 * bmap on dirty files is expected to be extremely rare:
@@ -1436,7 +1436,7 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
 		 * everything they get.
 		 */
 
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_JDATA;
+		ext3_clear_inode_state(inode, EXT3_STATE_JDATA);
 		journal = EXT3_JOURNAL(inode);
 		journal_lock_updates(journal);
 		err = journal_flush(journal);
@@ -1670,7 +1670,7 @@ static int ext3_journalled_writepage(struct page *page,
 				PAGE_CACHE_SIZE, NULL, write_end_fn);
 		if (ret == 0)
 			ret = err;
-		EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
+		ext3_set_inode_state(inode, EXT3_STATE_JDATA);
 		unlock_page(page);
 	} else {
 		/*
@@ -2402,7 +2402,7 @@ void ext3_truncate(struct inode *inode)
 		goto out_notrans;
 
 	if (inode->i_size == 0 && ext3_should_writeback_data(inode))
-		ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE;
+		ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
 
 	/*
 	 * We have to lock the EOF page here, because lock_page() nests
@@ -2721,7 +2721,7 @@ int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc)
 {
 	/* We have all inode data except xattrs in memory here. */
 	return __ext3_get_inode_loc(inode, iloc,
-		!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR));
+		!ext3_test_inode_state(inode, EXT3_STATE_XATTR));
 }
 
 void ext3_set_inode_flags(struct inode *inode)
@@ -2893,7 +2893,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
 					EXT3_GOOD_OLD_INODE_SIZE +
 					ei->i_extra_isize;
 			if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC))
-				 ei->i_state |= EXT3_STATE_XATTR;
+				 ext3_set_inode_state(inode, EXT3_STATE_XATTR);
 		}
 	} else
 		ei->i_extra_isize = 0;
@@ -2955,7 +2955,7 @@ again:
 
 	/* For fields not not tracking in the in-memory inode,
 	 * initialise them to zero for new inodes. */
-	if (ei->i_state & EXT3_STATE_NEW)
+	if (ext3_test_inode_state(inode, EXT3_STATE_NEW))
 		memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
 
 	ext3_get_inode_flags(ei);
@@ -3052,7 +3052,7 @@ again:
 	rc = ext3_journal_dirty_metadata(handle, bh);
 	if (!err)
 		err = rc;
-	ei->i_state &= ~EXT3_STATE_NEW;
+	ext3_clear_inode_state(inode, EXT3_STATE_NEW);
 
 	atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
 out_brelse:
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 66895ccf76c7..2d2fb2a85961 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -274,7 +274,7 @@ ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
 	void *end;
 	int error;
 
-	if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+	if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR))
 		return -ENODATA;
 	error = ext3_get_inode_loc(inode, &iloc);
 	if (error)
@@ -403,7 +403,7 @@ ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 	void *end;
 	int error;
 
-	if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+	if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR))
 		return 0;
 	error = ext3_get_inode_loc(inode, &iloc);
 	if (error)
@@ -882,7 +882,7 @@ ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i,
 	is->s.base = is->s.first = IFIRST(header);
 	is->s.here = is->s.first;
 	is->s.end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
-	if (EXT3_I(inode)->i_state & EXT3_STATE_XATTR) {
+	if (ext3_test_inode_state(inode, EXT3_STATE_XATTR)) {
 		error = ext3_xattr_check_names(IFIRST(header), is->s.end);
 		if (error)
 			return error;
@@ -914,10 +914,10 @@ ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
 	header = IHDR(inode, ext3_raw_inode(&is->iloc));
 	if (!IS_LAST_ENTRY(s->first)) {
 		header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
-		EXT3_I(inode)->i_state |= EXT3_STATE_XATTR;
+		ext3_set_inode_state(inode, EXT3_STATE_XATTR);
 	} else {
 		header->h_magic = cpu_to_le32(0);
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_XATTR;
+		ext3_clear_inode_state(inode, EXT3_STATE_XATTR);
 	}
 	return 0;
 }
@@ -967,10 +967,10 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 	if (error)
 		goto cleanup;
 
-	if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) {
+	if (ext3_test_inode_state(inode, EXT3_STATE_NEW)) {
 		struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc);
 		memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW;
+		ext3_clear_inode_state(inode, EXT3_STATE_NEW);
 	}
 
 	error = ext3_xattr_ibody_find(inode, &i, &is);
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 6b049030fbe6..e6590f8f0b3c 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -202,14 +202,6 @@ static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags)
 		return flags & EXT3_OTHER_FLMASK;
 }
 
-/*
- * Inode dynamic state flags
- */
-#define EXT3_STATE_JDATA		0x00000001 /* journaled data exists */
-#define EXT3_STATE_NEW			0x00000002 /* inode is newly created */
-#define EXT3_STATE_XATTR		0x00000004 /* has in-inode xattrs */
-#define EXT3_STATE_FLUSH_ON_CLOSE	0x00000008
-
 /* Used to pass group descriptor data when online resize is done */
 struct ext3_new_group_input {
 	__u32 group;            /* Group number for this data */
@@ -560,6 +552,31 @@ static inline int ext3_valid_inum(struct super_block *sb, unsigned long ino)
 		(ino >= EXT3_FIRST_INO(sb) &&
 		 ino <= le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count));
 }
+
+/*
+ * Inode dynamic state flags
+ */
+enum {
+	EXT3_STATE_JDATA,		/* journaled data exists */
+	EXT3_STATE_NEW,			/* inode is newly created */
+	EXT3_STATE_XATTR,		/* has in-inode xattrs */
+	EXT3_STATE_FLUSH_ON_CLOSE,	/* flush dirty pages on close */
+};
+
+static inline int ext3_test_inode_state(struct inode *inode, int bit)
+{
+	return test_bit(bit, &EXT3_I(inode)->i_state);
+}
+
+static inline void ext3_set_inode_state(struct inode *inode, int bit)
+{
+	set_bit(bit, &EXT3_I(inode)->i_state);
+}
+
+static inline void ext3_clear_inode_state(struct inode *inode, int bit)
+{
+	clear_bit(bit, &EXT3_I(inode)->i_state);
+}
 #else
 /* Assume that user mode programs are passing in an ext3fs superblock, not
  * a kernel struct super_block.  This will allow us to call the feature-test
diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h
index 93e7428156ba..7679acdb519a 100644
--- a/include/linux/ext3_fs_i.h
+++ b/include/linux/ext3_fs_i.h
@@ -87,7 +87,7 @@ struct ext3_inode_info {
 	 * near to their parent directory's inode.
 	 */
 	__u32	i_block_group;
-	__u32	i_state;		/* Dynamic state flags for ext3 */
+	unsigned long	i_state;	/* Dynamic state flags for ext3 */
 
 	/* block reservation info */
 	struct ext3_block_alloc_info *i_block_alloc_info;
-- 
cgit v1.2.3


From c7e8d4d6dceeb6fd236991f590d3fa6f97c59874 Mon Sep 17 00:00:00 2001
From: Christoph Egger <siccegge@stud.informatik.uni-erlangen.de>
Date: Fri, 5 Feb 2010 14:13:33 +0100
Subject: jbd[2]: remove references to BUFFER_DEBUG

CONFIG_BUFFER_DEBUG seems to have been removed from the documentation
somewhere around 2.4.15 and seemingly hasn't been available even
longer. It is, however, still referenced at one place from the jbd
code (one is a copy of the other header). Time to clean it up

Signed-off-by: Christoph Egger <siccegge@stud.informatik.uni-erlangen.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/jbd.h  | 11 -----------
 include/linux/jbd2.h | 11 -----------
 2 files changed, 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 331530cd3cc6..f3aa59cb675d 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -246,19 +246,8 @@ typedef struct journal_superblock_s
 
 #define J_ASSERT(assert)	BUG_ON(!(assert))
 
-#if defined(CONFIG_BUFFER_DEBUG)
-void buffer_assertion_failure(struct buffer_head *bh);
-#define J_ASSERT_BH(bh, expr)						\
-	do {								\
-		if (!(expr))						\
-			buffer_assertion_failure(bh);			\
-		J_ASSERT(expr);						\
-	} while (0)
-#define J_ASSERT_JH(jh, expr)	J_ASSERT_BH(jh2bh(jh), expr)
-#else
 #define J_ASSERT_BH(bh, expr)	J_ASSERT(expr)
 #define J_ASSERT_JH(jh, expr)	J_ASSERT(expr)
-#endif
 
 #if defined(JBD_PARANOID_IOFAIL)
 #define J_EXPECT(expr, why...)		J_ASSERT(expr)
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 638ce4554c76..4cf619161ed0 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -284,19 +284,8 @@ typedef struct journal_superblock_s
 
 #define J_ASSERT(assert)	BUG_ON(!(assert))
 
-#if defined(CONFIG_BUFFER_DEBUG)
-void buffer_assertion_failure(struct buffer_head *bh);
-#define J_ASSERT_BH(bh, expr)						\
-	do {								\
-		if (!(expr))						\
-			buffer_assertion_failure(bh);			\
-		J_ASSERT(expr);						\
-	} while (0)
-#define J_ASSERT_JH(jh, expr)	J_ASSERT_BH(jh2bh(jh), expr)
-#else
 #define J_ASSERT_BH(bh, expr)	J_ASSERT(expr)
 #define J_ASSERT_JH(jh, expr)	J_ASSERT(expr)
-#endif
 
 #if defined(JBD2_PARANOID_IOFAIL)
 #define J_EXPECT(expr, why...)		J_ASSERT(expr)
-- 
cgit v1.2.3


From c469070aea5a0ada45a836937c776fd3083dae2b Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Tue, 9 Feb 2010 17:53:36 +0100
Subject: quota: manage reserved space when quota is not active [v2]

Since we implemented generic reserved space management interface,
then it is possible to account reserved space even when quota
is not active (similar to i_blocks/i_bytes).

Without this patch following testcase result in massive comlain from
WARN_ON in dquot_claim_space()

TEST_CASE:
mount /dev/sdb /mnt -oquota
dd if=/dev/zero of=/mnt/test bs=1M count=1
quotaon /mnt
# fs_reserved_spave == 1Mb
# quota_reserved_space == 0, because quota was disabled
dd if=/dev/zero of=/mnt/test seek=1 bs=1M count=1
# fs_reserved_spave == 2Mb
# quota_reserved_space == 1Mb
sync  # ->dquot_claim_space() -> WARN_ON

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c         | 10 ++++++----
 include/linux/quotaops.h | 11 +++++++++--
 2 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index f6eaf0d8fd6a..f11255b18b58 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1354,28 +1354,30 @@ static qsize_t *inode_reserved_space(struct inode * inode)
 	return inode->i_sb->dq_op->get_reserved_space(inode);
 }
 
-static void inode_add_rsv_space(struct inode *inode, qsize_t number)
+void inode_add_rsv_space(struct inode *inode, qsize_t number)
 {
 	spin_lock(&inode->i_lock);
 	*inode_reserved_space(inode) += number;
 	spin_unlock(&inode->i_lock);
 }
+EXPORT_SYMBOL(inode_add_rsv_space);
 
-
-static void inode_claim_rsv_space(struct inode *inode, qsize_t number)
+void inode_claim_rsv_space(struct inode *inode, qsize_t number)
 {
 	spin_lock(&inode->i_lock);
 	*inode_reserved_space(inode) -= number;
 	__inode_add_bytes(inode, number);
 	spin_unlock(&inode->i_lock);
 }
+EXPORT_SYMBOL(inode_claim_rsv_space);
 
-static void inode_sub_rsv_space(struct inode *inode, qsize_t number)
+void inode_sub_rsv_space(struct inode *inode, qsize_t number)
 {
 	spin_lock(&inode->i_lock);
 	*inode_reserved_space(inode) -= number;
 	spin_unlock(&inode->i_lock);
 }
+EXPORT_SYMBOL(inode_sub_rsv_space);
 
 static qsize_t inode_get_rsv_space(struct inode *inode)
 {
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 3ebb23153640..a529d86e7e73 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -26,6 +26,10 @@ static inline void writeout_quota_sb(struct super_block *sb, int type)
 		sb->s_qcop->quota_sync(sb, type);
 }
 
+void inode_add_rsv_space(struct inode *inode, qsize_t number);
+void inode_claim_rsv_space(struct inode *inode, qsize_t number);
+void inode_sub_rsv_space(struct inode *inode, qsize_t number);
+
 int dquot_initialize(struct inode *inode, int type);
 int dquot_drop(struct inode *inode);
 struct dquot *dqget(struct super_block *sb, unsigned int id, int type);
@@ -42,7 +46,6 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number);
 int dquot_reserve_space(struct inode *inode, qsize_t number, int prealloc);
 int dquot_claim_space(struct inode *inode, qsize_t number);
 void dquot_release_reserved_space(struct inode *inode, qsize_t number);
-qsize_t dquot_get_reserved_space(struct inode *inode);
 
 int dquot_free_space(struct inode *inode, qsize_t number);
 int dquot_free_inode(const struct inode *inode, qsize_t number);
@@ -199,6 +202,8 @@ static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr)
 		if (inode->i_sb->dq_op->reserve_space(inode, nr, 0) == NO_QUOTA)
 			return 1;
 	}
+	else
+		inode_add_rsv_space(inode, nr);
 	return 0;
 }
 
@@ -221,7 +226,7 @@ static inline int vfs_dq_claim_space(struct inode *inode, qsize_t nr)
 		if (inode->i_sb->dq_op->claim_space(inode, nr) == NO_QUOTA)
 			return 1;
 	} else
-		inode_add_bytes(inode, nr);
+		inode_claim_rsv_space(inode, nr);
 
 	mark_inode_dirty(inode);
 	return 0;
@@ -235,6 +240,8 @@ void vfs_dq_release_reservation_space(struct inode *inode, qsize_t nr)
 {
 	if (sb_any_quota_active(inode->i_sb))
 		inode->i_sb->dq_op->release_rsv(inode, nr);
+	else
+		inode_sub_rsv_space(inode, nr);
 }
 
 static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
-- 
cgit v1.2.3


From 8c4e4acd660a09e571a71583b5bbe1eee700c9ad Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 16 Feb 2010 03:44:51 -0500
Subject: quota: clean up Q_XQUOTASYNC

Currently Q_XQUOTASYNC calls into the quota_sync method, but XFS does something
entirely different in it than the rest of the filesystems.  xfs_quota which
calls Q_XQUOTASYNC expects an asynchronous data writeout to flush delayed
allocations, while the "VFS" quota support wants to flush changes to the quota
file.

So make Q_XQUOTASYNC call into the writeback code directly and make the
quota_sync method optional as XFS doesn't need in the sense expected by the
rest of the quota code.

GFS2 was using limited XFS-style quota and has a quota_sync method fitting
neither the style used by vfs_quota_sync nor xfs_fs_quota_sync.  I left it
in for now as per discussion with Steve it expects to be called from the
sync path this way.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/quota.c                | 11 +++++++----
 fs/xfs/linux-2.6/xfs_quotaops.c | 15 ---------------
 include/linux/quotaops.h        |  2 +-
 3 files changed, 8 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index d0efe302b1c1..3d31228082ea 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -18,6 +18,7 @@
 #include <linux/capability.h>
 #include <linux/quotaops.h>
 #include <linux/types.h>
+#include <linux/writeback.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
 
@@ -52,7 +53,7 @@ void sync_quota_sb(struct super_block *sb, int type)
 {
 	int cnt;
 
-	if (!sb->s_qcop->quota_sync)
+	if (!sb->s_qcop || !sb->s_qcop->quota_sync)
 		return;
 
 	sb->s_qcop->quota_sync(sb, type);
@@ -318,9 +319,11 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 	case Q_XGETQUOTA:
 		return quota_getxquota(sb, type, id, addr);
 	case Q_XQUOTASYNC:
-		if (!sb->s_qcop->quota_sync)
-			return -ENOSYS;
-		return sb->s_qcop->quota_sync(sb, type);
+		/* caller already holds s_umount */
+		if (sb->s_flags & MS_RDONLY)
+			return -EROFS;
+		writeback_inodes_sb(sb);
+		return 0;
 	default:
 		return -EINVAL;
 	}
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 3d4a0c84d634..07d67c624922 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -43,20 +43,6 @@ xfs_quota_type(int type)
 	}
 }
 
-STATIC int
-xfs_fs_quota_sync(
-	struct super_block	*sb,
-	int			type)
-{
-	struct xfs_mount	*mp = XFS_M(sb);
-
-	if (sb->s_flags & MS_RDONLY)
-		return -EROFS;
-	if (!XFS_IS_QUOTA_RUNNING(mp))
-		return -ENOSYS;
-	return -xfs_sync_data(mp, 0);
-}
-
 STATIC int
 xfs_fs_get_xstate(
 	struct super_block	*sb,
@@ -151,7 +137,6 @@ xfs_fs_set_xquota(
 }
 
 const struct quotactl_ops xfs_quotactl_operations = {
-	.quota_sync		= xfs_fs_quota_sync,
 	.get_xstate		= xfs_fs_get_xstate,
 	.set_xstate		= xfs_fs_set_xstate,
 	.get_xquota		= xfs_fs_get_xquota,
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index a529d86e7e73..69d26bc0f884 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -22,7 +22,7 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb)
 void sync_quota_sb(struct super_block *sb, int type);
 static inline void writeout_quota_sb(struct super_block *sb, int type)
 {
-	if (sb->s_qcop->quota_sync)
+	if (sb->s_qcop && sb->s_qcop->quota_sync)
 		sb->s_qcop->quota_sync(sb, type);
 }
 
-- 
cgit v1.2.3


From 5fb324ad24febe57a8a2e62903dcb7bad546ea71 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 16 Feb 2010 03:44:52 -0500
Subject: quota: move code from sync_quota_sb into vfs_quota_sync

Currenly sync_quota_sb does a lot of sync and truncate action that only
applies to "VFS" style quotas and is actively harmful for the sync
performance in XFS.  Move it into vfs_quota_sync and add a wait parameter
to ->quota_sync to tell if we need it or not.

My audit of the GFS2 code says it's also not needed given the way GFS2
implements quotas, but I'd be happy if this can get a detailed review.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/gfs2/quota.c          |  9 +++++++--
 fs/gfs2/quota.h          |  2 +-
 fs/gfs2/super.c          |  2 +-
 fs/gfs2/sys.c            |  2 +-
 fs/quota/dquot.c         | 29 ++++++++++++++++++++++++++++-
 fs/quota/quota.c         | 46 +++++-----------------------------------------
 fs/sync.c                | 14 +++++++-------
 include/linux/quota.h    |  2 +-
 include/linux/quotaops.h | 17 +----------------
 9 files changed, 52 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e3bf6eab8750..6dbcbad6ab17 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1083,7 +1083,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 	}
 }
 
-int gfs2_quota_sync(struct super_block *sb, int type)
+int gfs2_quota_sync(struct super_block *sb, int type, int wait)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	struct gfs2_quota_data **qda;
@@ -1127,6 +1127,11 @@ int gfs2_quota_sync(struct super_block *sb, int type)
 	return error;
 }
 
+static int gfs2_quota_sync_timeo(struct super_block *sb, int type)
+{
+	return gfs2_quota_sync(sb, type, 0);
+}
+
 int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
 {
 	struct gfs2_quota_data *qd;
@@ -1382,7 +1387,7 @@ int gfs2_quotad(void *data)
 					   &tune->gt_statfs_quantum);
 
 		/* Update quota file */
-		quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t,
+		quotad_check_timeo(sdp, "sync", gfs2_quota_sync_timeo, t,
 				   &quotad_timeo, &tune->gt_quota_quantum);
 
 		/* Check for & recover partially truncated inodes */
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index e271fa07ad02..195f60c8bd14 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -25,7 +25,7 @@ extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid);
 extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 			      u32 uid, u32 gid);
 
-extern int gfs2_quota_sync(struct super_block *sb, int type);
+extern int gfs2_quota_sync(struct super_block *sb, int type, int wait);
 extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
 
 extern int gfs2_quota_init(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b9dd3da22c0a..a8c2bcd0fcc8 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -764,7 +764,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 	int error;
 
 	flush_workqueue(gfs2_delete_workqueue);
-	gfs2_quota_sync(sdp->sd_vfs, 0);
+	gfs2_quota_sync(sdp->sd_vfs, 0, 1);
 	gfs2_statfs_sync(sdp->sd_vfs, 0);
 
 	error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 0dc34621f6a6..4496cc37a0fa 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -167,7 +167,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
 	if (simple_strtol(buf, NULL, 0) != 1)
 		return -EINVAL;
 
-	gfs2_quota_sync(sdp->sd_vfs, 0);
+	gfs2_quota_sync(sdp->sd_vfs, 0, 1);
 	return len;
 }
 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 6c849de5dc8f..4c2213f7ed36 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -570,7 +570,7 @@ out:
 }
 EXPORT_SYMBOL(dquot_scan_active);
 
-int vfs_quota_sync(struct super_block *sb, int type)
+int vfs_quota_sync(struct super_block *sb, int type, int wait)
 {
 	struct list_head *dirty;
 	struct dquot *dquot;
@@ -615,6 +615,33 @@ int vfs_quota_sync(struct super_block *sb, int type)
 	spin_unlock(&dq_list_lock);
 	mutex_unlock(&dqopt->dqonoff_mutex);
 
+	if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE))
+		return 0;
+
+	/* This is not very clever (and fast) but currently I don't know about
+	 * any other simple way of getting quota data to disk and we must get
+	 * them there for userspace to be visible... */
+	if (sb->s_op->sync_fs)
+		sb->s_op->sync_fs(sb, 1);
+	sync_blockdev(sb->s_bdev);
+
+	/*
+	 * Now when everything is written we can discard the pagecache so
+	 * that userspace sees the changes.
+	 */
+	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		if (type != -1 && cnt != type)
+			continue;
+		if (!sb_has_quota_active(sb, cnt))
+			continue;
+		mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex,
+				  I_MUTEX_QUOTA);
+		truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0);
+		mutex_unlock(&sb_dqopt(sb)->files[cnt]->i_mutex);
+	}
+	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+
 	return 0;
 }
 EXPORT_SYMBOL(vfs_quota_sync);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 3d31228082ea..0593b229656c 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -48,44 +48,6 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
 	return security_quotactl(cmd, type, id, sb);
 }
 
-#ifdef CONFIG_QUOTA
-void sync_quota_sb(struct super_block *sb, int type)
-{
-	int cnt;
-
-	if (!sb->s_qcop || !sb->s_qcop->quota_sync)
-		return;
-
-	sb->s_qcop->quota_sync(sb, type);
-
-	if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)
-		return;
-	/* This is not very clever (and fast) but currently I don't know about
-	 * any other simple way of getting quota data to disk and we must get
-	 * them there for userspace to be visible... */
-	if (sb->s_op->sync_fs)
-		sb->s_op->sync_fs(sb, 1);
-	sync_blockdev(sb->s_bdev);
-
-	/*
-	 * Now when everything is written we can discard the pagecache so
-	 * that userspace sees the changes.
-	 */
-	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (type != -1 && cnt != type)
-			continue;
-		if (!sb_has_quota_active(sb, cnt))
-			continue;
-		mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex,
-				  I_MUTEX_QUOTA);
-		truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0);
-		mutex_unlock(&sb_dqopt(sb)->files[cnt]->i_mutex);
-	}
-	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
-}
-#endif
-
 static int quota_sync_all(int type)
 {
 	struct super_block *sb;
@@ -101,6 +63,9 @@ static int quota_sync_all(int type)
 	spin_lock(&sb_lock);
 restart:
 	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (!sb->s_qcop || !sb->s_qcop->quota_sync)
+			continue;
+
 		/* This test just improves performance so it needn't be
 		 * reliable... */
 		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -119,7 +84,7 @@ restart:
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
 		if (sb->s_root)
-			sync_quota_sb(sb, type);
+			sb->s_qcop->quota_sync(sb, type, 1);
 		up_read(&sb->s_umount);
 		spin_lock(&sb_lock);
 		if (__put_super_and_need_restart(sb))
@@ -306,8 +271,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 	case Q_SYNC:
 		if (!sb->s_qcop->quota_sync)
 			return -ENOSYS;
-		sync_quota_sb(sb, type);
-		return 0;
+		return sb->s_qcop->quota_sync(sb, type, 1);
 	case Q_XQUOTAON:
 	case Q_XQUOTAOFF:
 	case Q_XQUOTARM:
diff --git a/fs/sync.c b/fs/sync.c
index 418727a2a239..f557d71cb097 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -34,14 +34,14 @@ static int __sync_filesystem(struct super_block *sb, int wait)
 	if (!sb->s_bdi)
 		return 0;
 
-	/* Avoid doing twice syncing and cache pruning for quota sync */
-	if (!wait) {
-		writeout_quota_sb(sb, -1);
-		writeback_inodes_sb(sb);
-	} else {
-		sync_quota_sb(sb, -1);
+	if (sb->s_qcop && sb->s_qcop->quota_sync)
+		sb->s_qcop->quota_sync(sb, -1, wait);
+
+	if (wait)
 		sync_inodes_sb(sb);
-	}
+	else
+		writeback_inodes_sb(sb);
+
 	if (sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, wait);
 	return __sync_blockdev(sb->s_bdev, wait);
diff --git a/include/linux/quota.h b/include/linux/quota.h
index a6861f117480..570348cbccb1 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -324,7 +324,7 @@ struct dquot_operations {
 struct quotactl_ops {
 	int (*quota_on)(struct super_block *, int, int, char *, int);
 	int (*quota_off)(struct super_block *, int, int);
-	int (*quota_sync)(struct super_block *, int);
+	int (*quota_sync)(struct super_block *, int, int);
 	int (*get_info)(struct super_block *, int, struct if_dqinfo *);
 	int (*set_info)(struct super_block *, int, struct if_dqinfo *);
 	int (*get_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 69d26bc0f884..8cfd0d44c994 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -19,13 +19,6 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb)
 /*
  * declaration of quota_function calls in kernel.
  */
-void sync_quota_sb(struct super_block *sb, int type);
-static inline void writeout_quota_sb(struct super_block *sb, int type)
-{
-	if (sb->s_qcop && sb->s_qcop->quota_sync)
-		sb->s_qcop->quota_sync(sb, type);
-}
-
 void inode_add_rsv_space(struct inode *inode, qsize_t number);
 void inode_claim_rsv_space(struct inode *inode, qsize_t number);
 void inode_sub_rsv_space(struct inode *inode, qsize_t number);
@@ -67,7 +60,7 @@ int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
  	int format_id, int type);
 int vfs_quota_off(struct super_block *sb, int type, int remount);
 int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags);
-int vfs_quota_sync(struct super_block *sb, int type);
+int vfs_quota_sync(struct super_block *sb, int type, int wait);
 int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
@@ -340,14 +333,6 @@ static inline void vfs_dq_free_inode(struct inode *inode)
 {
 }
 
-static inline void sync_quota_sb(struct super_block *sb, int type)
-{
-}
-
-static inline void writeout_quota_sb(struct super_block *sb, int type)
-{
-}
-
 static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
 	return 0;
-- 
cgit v1.2.3


From ad1e6e8da9fe8cb7ecfde8eabacedc3b50fceae4 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Tue, 16 Feb 2010 08:31:49 +0300
Subject: quota: sb_quota state flags cleanup

- remove hardcoded USRQUOTA/GRPQUOTA flags
- convert int to bool for appropriate functions

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c         |  3 ++-
 include/linux/quota.h    | 15 +++++++--------
 include/linux/quotaops.h | 31 +++++++++++++++++--------------
 3 files changed, 26 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 4c2213f7ed36..5a831dc5ab28 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1301,7 +1301,7 @@ int dquot_initialize(struct inode *inode, int type)
 {
 	unsigned int id = 0;
 	int cnt, ret = 0;
-	struct dquot *got[MAXQUOTAS] = { NULL, NULL };
+	struct dquot *got[MAXQUOTAS];
 	struct super_block *sb = inode->i_sb;
 	qsize_t rsv;
 
@@ -1312,6 +1312,7 @@ int dquot_initialize(struct inode *inode, int type)
 
 	/* First get references to structures we might need. */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		got[cnt] = NULL;
 		if (type != -1 && cnt != type)
 			continue;
 		switch (cnt) {
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 570348cbccb1..92547a57e25a 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -357,26 +357,25 @@ enum {
 #define DQUOT_STATE_FLAGS	(DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED | \
 				 DQUOT_SUSPENDED)
 /* Other quota flags */
-#define DQUOT_QUOTA_SYS_FILE	(1 << 6)	/* Quota file is a special
+#define DQUOT_STATE_LAST	(_DQUOT_STATE_FLAGS * MAXQUOTAS)
+#define DQUOT_QUOTA_SYS_FILE	(1 << DQUOT_STATE_LAST)
+						/* Quota file is a special
 						 * system file and user cannot
 						 * touch it. Filesystem is
 						 * responsible for setting
 						 * S_NOQUOTA, S_NOATIME flags
 						 */
-#define DQUOT_NEGATIVE_USAGE	(1 << 7)	/* Allow negative quota usage */
+#define DQUOT_NEGATIVE_USAGE	(1 << (DQUOT_STATE_LAST + 1))
+					       /* Allow negative quota usage */
 
 static inline unsigned int dquot_state_flag(unsigned int flags, int type)
 {
-	if (type == USRQUOTA)
-		return flags;
-	return flags << _DQUOT_STATE_FLAGS;
+	return flags << _DQUOT_STATE_FLAGS * type;
 }
 
 static inline unsigned int dquot_generic_flag(unsigned int flags, int type)
 {
-	if (type == USRQUOTA)
-		return flags;
-	return flags >> _DQUOT_STATE_FLAGS;
+	return (flags >> _DQUOT_STATE_FLAGS * type) & DQUOT_STATE_FLAGS;
 }
 
 #ifdef CONFIG_QUOTA_NETLINK_INTERFACE
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 8cfd0d44c994..e563a20cff4f 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -79,53 +79,56 @@ static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
  * Functions for checking status of quota
  */
 
-static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type)
+static inline bool sb_has_quota_usage_enabled(struct super_block *sb, int type)
 {
 	return sb_dqopt(sb)->flags &
 				dquot_state_flag(DQUOT_USAGE_ENABLED, type);
 }
 
-static inline int sb_has_quota_limits_enabled(struct super_block *sb, int type)
+static inline bool sb_has_quota_limits_enabled(struct super_block *sb, int type)
 {
 	return sb_dqopt(sb)->flags &
 				dquot_state_flag(DQUOT_LIMITS_ENABLED, type);
 }
 
-static inline int sb_has_quota_suspended(struct super_block *sb, int type)
+static inline bool sb_has_quota_suspended(struct super_block *sb, int type)
 {
 	return sb_dqopt(sb)->flags &
 				dquot_state_flag(DQUOT_SUSPENDED, type);
 }
 
-static inline int sb_any_quota_suspended(struct super_block *sb)
+static inline unsigned sb_any_quota_suspended(struct super_block *sb)
 {
-	return sb_has_quota_suspended(sb, USRQUOTA) ||
-		sb_has_quota_suspended(sb, GRPQUOTA);
+	unsigned type, tmsk = 0;
+	for (type = 0; type < MAXQUOTAS; type++)
+		tmsk |= sb_has_quota_suspended(sb, type) << type;
+	return tmsk;
 }
 
 /* Does kernel know about any quota information for given sb + type? */
-static inline int sb_has_quota_loaded(struct super_block *sb, int type)
+static inline bool sb_has_quota_loaded(struct super_block *sb, int type)
 {
 	/* Currently if anything is on, then quota usage is on as well */
 	return sb_has_quota_usage_enabled(sb, type);
 }
 
-static inline int sb_any_quota_loaded(struct super_block *sb)
+static inline unsigned sb_any_quota_loaded(struct super_block *sb)
 {
-	return sb_has_quota_loaded(sb, USRQUOTA) ||
-		sb_has_quota_loaded(sb, GRPQUOTA);
+	unsigned type, tmsk = 0;
+	for (type = 0; type < MAXQUOTAS; type++)
+		tmsk |= sb_has_quota_loaded(sb, type) << type;
+	return	tmsk;
 }
 
-static inline int sb_has_quota_active(struct super_block *sb, int type)
+static inline bool sb_has_quota_active(struct super_block *sb, int type)
 {
 	return sb_has_quota_loaded(sb, type) &&
 	       !sb_has_quota_suspended(sb, type);
 }
 
-static inline int sb_any_quota_active(struct super_block *sb)
+static inline unsigned sb_any_quota_active(struct super_block *sb)
 {
-	return sb_has_quota_active(sb, USRQUOTA) ||
-	       sb_has_quota_active(sb, GRPQUOTA);
+	return sb_any_quota_loaded(sb) & ~sb_any_quota_suspended(sb);
 }
 
 /*
-- 
cgit v1.2.3


From 8ddd69d6df4758bf0cab981481af24cc84419567 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Tue, 16 Feb 2010 08:31:50 +0300
Subject: quota: generalize quota transfer interface

Current quota transfer interface support only uid/gid.
This patch extend interface in order to support various quotas types
The goal is accomplished without changes in most frequently used
vfs_dq_transfer() func.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c         | 32 ++++++++++++++++++++------------
 include/linux/quota.h    |  2 +-
 include/linux/quotaops.h |  2 +-
 3 files changed, 22 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 5a831dc5ab28..4d2041fddefc 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1695,15 +1695,13 @@ EXPORT_SYMBOL(dquot_free_inode);
  * This operation can block, but only after everything is updated
  * A transaction must be started when entering this function.
  */
-int dquot_transfer(struct inode *inode, struct iattr *iattr)
+int dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask)
 {
 	qsize_t space, cur_space;
 	qsize_t rsv_space = 0;
 	struct dquot *transfer_from[MAXQUOTAS];
 	struct dquot *transfer_to[MAXQUOTAS];
 	int cnt, ret = QUOTA_OK;
-	int chuid = iattr->ia_valid & ATTR_UID && inode->i_uid != iattr->ia_uid,
-	    chgid = iattr->ia_valid & ATTR_GID && inode->i_gid != iattr->ia_gid;
 	char warntype_to[MAXQUOTAS];
 	char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
 
@@ -1717,13 +1715,10 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 		transfer_to[cnt] = NULL;
 		warntype_to[cnt] = QUOTA_NL_NOWARN;
 	}
-	if (chuid)
-		transfer_to[USRQUOTA] = dqget(inode->i_sb, iattr->ia_uid,
-					      USRQUOTA);
-	if (chgid)
-		transfer_to[GRPQUOTA] = dqget(inode->i_sb, iattr->ia_gid,
-					      GRPQUOTA);
-
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		if (mask & (1 << cnt))
+			transfer_to[cnt] = dqget(inode->i_sb, chid[cnt], cnt);
+	}
 	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	if (IS_NOQUOTA(inode)) {	/* File without quota accounting? */
 		up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1799,12 +1794,25 @@ over_quota:
 }
 EXPORT_SYMBOL(dquot_transfer);
 
-/* Wrapper for transferring ownership of an inode */
+/* Wrapper for transferring ownership of an inode for uid/gid only
+ * Called from FSXXX_setattr()
+ */
 int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
 {
+	qid_t chid[MAXQUOTAS];
+	unsigned long mask = 0;
+
+	if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) {
+		mask |= 1 << USRQUOTA;
+		chid[USRQUOTA] = iattr->ia_uid;
+	}
+	if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) {
+		mask |= 1 << GRPQUOTA;
+		chid[GRPQUOTA] = iattr->ia_gid;
+	}
 	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
 		vfs_dq_init(inode);
-		if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
+		if (inode->i_sb->dq_op->transfer(inode, chid, mask) == NO_QUOTA)
 			return 1;
 	}
 	return 0;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 92547a57e25a..edf34f2fe87d 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -301,7 +301,7 @@ struct dquot_operations {
 	int (*alloc_inode) (const struct inode *, qsize_t);
 	int (*free_space) (struct inode *, qsize_t);
 	int (*free_inode) (const struct inode *, qsize_t);
-	int (*transfer) (struct inode *, struct iattr *);
+	int (*transfer) (struct inode *, qid_t *, unsigned long);
 	int (*write_dquot) (struct dquot *);		/* Ordinary dquot write */
 	struct dquot *(*alloc_dquot)(struct super_block *, int);	/* Allocate memory for new dquot */
 	void (*destroy_dquot)(struct dquot *);		/* Free memory for dquot */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index e563a20cff4f..e1cae204b5d9 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -43,7 +43,7 @@ void dquot_release_reserved_space(struct inode *inode, qsize_t number);
 int dquot_free_space(struct inode *inode, qsize_t number);
 int dquot_free_inode(const struct inode *inode, qsize_t number);
 
-int dquot_transfer(struct inode *inode, struct iattr *iattr);
+int dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask);
 int dquot_commit(struct dquot *dquot);
 int dquot_acquire(struct dquot *dquot);
 int dquot_release(struct dquot *dquot);
-- 
cgit v1.2.3


From 5dd4056db84387975140ff2568eaa0406f07985e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:00 -0500
Subject: dquot: cleanup space allocation / freeing routines

Get rid of the alloc_space, free_space, reserve_space, claim_space and
release_rsv dquot operations - they are always called from the filesystem
and if a filesystem really needs their own (which none currently does)
it can just call into it's own routine directly.

Move shared logic into the common __dquot_alloc_space,
dquot_claim_space_nodirty and __dquot_free_space low-level methods,
and rationalize the wrappers around it to move as much as possible
code into the common block for CONFIG_QUOTA vs not.  Also rename
all these helpers to be named dquot_* instead of vfs_dq_*.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 Documentation/filesystems/Locking |   6 +-
 fs/ext2/balloc.c                  |  12 ++-
 fs/ext2/xattr.c                   |  10 +-
 fs/ext3/balloc.c                  |  11 +-
 fs/ext3/inode.c                   |   2 +-
 fs/ext3/super.c                   |   2 -
 fs/ext3/xattr.c                   |   8 +-
 fs/ext4/inode.c                   |  20 ++--
 fs/ext4/mballoc.c                 |   6 +-
 fs/ext4/super.c                   |   5 -
 fs/ext4/xattr.c                   |   8 +-
 fs/jfs/jfs_dtree.c                |  28 ++---
 fs/jfs/jfs_extent.c               |  16 +--
 fs/jfs/jfs_xtree.c                |  21 ++--
 fs/jfs/xattr.c                    |  17 ++--
 fs/ocfs2/alloc.c                  |  13 ++-
 fs/ocfs2/aops.c                   |  11 +-
 fs/ocfs2/dir.c                    |  37 +++----
 fs/ocfs2/file.c                   |  11 +-
 fs/ocfs2/namei.c                  |   9 +-
 fs/ocfs2/quota_global.c           |   2 -
 fs/quota/dquot.c                  |  79 +++++----------
 fs/reiserfs/bitmap.c              |  10 +-
 fs/reiserfs/stree.c               |  20 ++--
 fs/reiserfs/super.c               |   2 -
 fs/udf/balloc.c                   |  35 ++++---
 fs/ufs/balloc.c                   |  24 +++--
 include/linux/quota.h             |   8 --
 include/linux/quotaops.h          | 208 +++++++++++---------------------------
 29 files changed, 258 insertions(+), 383 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 18b9d0ca0630..1192fde11638 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -462,9 +462,7 @@ in sys_read() and friends.
 prototypes:
 	int (*initialize) (struct inode *, int);
 	int (*drop) (struct inode *);
-	int (*alloc_space) (struct inode *, qsize_t, int);
 	int (*alloc_inode) (const struct inode *, unsigned long);
-	int (*free_space) (struct inode *, qsize_t);
 	int (*free_inode) (const struct inode *, unsigned long);
 	int (*transfer) (struct inode *, struct iattr *);
 	int (*write_dquot) (struct dquot *);
@@ -481,9 +479,7 @@ What filesystem should expect from the generic quota functions:
 		FS recursion	Held locks when called
 initialize:	yes		maybe dqonoff_sem
 drop:		yes		-
-alloc_space:	->mark_dirty()	-
 alloc_inode:	->mark_dirty()	-
-free_space:	->mark_dirty()	-
 free_inode:	->mark_dirty()	-
 transfer:	yes		-
 write_dquot:	yes		dqonoff_sem or dqptr_sem
@@ -495,7 +491,7 @@ write_info:	yes		dqonoff_sem
 FS recursion means calling ->quota_read() and ->quota_write() from superblock
 operations.
 
-->alloc_space(), ->alloc_inode(), ->free_space(), ->free_inode() are called
+->alloc_inode(), ->free_inode() are called
 only directly by the filesystem and do not call any fs functions only
 the ->mark_dirty() operation.
 
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 7f8d2e5a7ea6..1d081f0cfec2 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -570,7 +570,7 @@ do_more:
 error_return:
 	brelse(bitmap_bh);
 	release_blocks(sb, freed);
-	vfs_dq_free_block(inode, freed);
+	dquot_free_block(inode, freed);
 }
 
 /**
@@ -1236,6 +1236,7 @@ ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal,
 	unsigned short windowsz = 0;
 	unsigned long ngroups;
 	unsigned long num = *count;
+	int ret;
 
 	*errp = -ENOSPC;
 	sb = inode->i_sb;
@@ -1247,8 +1248,9 @@ ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal,
 	/*
 	 * Check quota for allocation of this block.
 	 */
-	if (vfs_dq_alloc_block(inode, num)) {
-		*errp = -EDQUOT;
+	ret = dquot_alloc_block(inode, num);
+	if (ret) {
+		*errp = ret;
 		return 0;
 	}
 
@@ -1409,7 +1411,7 @@ allocated:
 
 	*errp = 0;
 	brelse(bitmap_bh);
-	vfs_dq_free_block(inode, *count-num);
+	dquot_free_block(inode, *count-num);
 	*count = num;
 	return ret_block;
 
@@ -1420,7 +1422,7 @@ out:
 	 * Undo the block allocation
 	 */
 	if (!performed_allocation)
-		vfs_dq_free_block(inode, *count);
+		dquot_free_block(inode, *count);
 	brelse(bitmap_bh);
 	return 0;
 }
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 904f00642f84..e44dc92609be 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -644,8 +644,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 				   the inode.  */
 				ea_bdebug(new_bh, "reusing block");
 
-				error = -EDQUOT;
-				if (vfs_dq_alloc_block(inode, 1)) {
+				error = dquot_alloc_block(inode, 1);
+				if (error) {
 					unlock_buffer(new_bh);
 					goto cleanup;
 				}
@@ -702,7 +702,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 		 * as if nothing happened and cleanup the unused block */
 		if (error && error != -ENOSPC) {
 			if (new_bh && new_bh != old_bh)
-				vfs_dq_free_block(inode, 1);
+				dquot_free_block(inode, 1);
 			goto cleanup;
 		}
 	} else
@@ -734,7 +734,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 			le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
 			if (ce)
 				mb_cache_entry_release(ce);
-			vfs_dq_free_block(inode, 1);
+			dquot_free_block(inode, 1);
 			mark_buffer_dirty(old_bh);
 			ea_bdebug(old_bh, "refcount now=%d",
 				le32_to_cpu(HDR(old_bh)->h_refcount));
@@ -797,7 +797,7 @@ ext2_xattr_delete_inode(struct inode *inode)
 		mark_buffer_dirty(bh);
 		if (IS_SYNC(inode))
 			sync_dirty_buffer(bh);
-		vfs_dq_free_block(inode, 1);
+		dquot_free_block(inode, 1);
 	}
 	EXT2_I(inode)->i_file_acl = 0;
 
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 27967f92e820..161da2d3f890 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -676,7 +676,7 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
 	}
 	ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
 	if (dquot_freed_blocks)
-		vfs_dq_free_block(inode, dquot_freed_blocks);
+		dquot_free_block(inode, dquot_freed_blocks);
 	return;
 }
 
@@ -1502,8 +1502,9 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
 	/*
 	 * Check quota for allocation of this block.
 	 */
-	if (vfs_dq_alloc_block(inode, num)) {
-		*errp = -EDQUOT;
+	err = dquot_alloc_block(inode, num);
+	if (err) {
+		*errp = err;
 		return 0;
 	}
 
@@ -1713,7 +1714,7 @@ allocated:
 
 	*errp = 0;
 	brelse(bitmap_bh);
-	vfs_dq_free_block(inode, *count-num);
+	dquot_free_block(inode, *count-num);
 	*count = num;
 	return ret_block;
 
@@ -1728,7 +1729,7 @@ out:
 	 * Undo the block allocation
 	 */
 	if (!performed_allocation)
-		vfs_dq_free_block(inode, *count);
+		dquot_free_block(inode, *count);
 	brelse(bitmap_bh);
 	return 0;
 }
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index eda9121d7d57..20f02d69365c 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3336,7 +3336,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
  * i_size has been changed by generic_commit_write() and we thus need
  * to include the updated inode in the current transaction.
  *
- * Also, vfs_dq_alloc_space() will always dirty the inode when blocks
+ * Also, dquot_alloc_space() will always dirty the inode when blocks
  * are allocated to the file.
  *
  * If the inode is marked synchronous, we don't honour that here - doing
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 5c54e5f685d4..8c13910a3782 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -752,9 +752,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 static const struct dquot_operations ext3_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.alloc_space	= dquot_alloc_space,
 	.alloc_inode	= dquot_alloc_inode,
-	.free_space	= dquot_free_space,
 	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
 	.write_dquot	= ext3_write_dquot,
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 2d2fb2a85961..534a94c3a933 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -500,7 +500,7 @@ ext3_xattr_release_block(handle_t *handle, struct inode *inode,
 		error = ext3_journal_dirty_metadata(handle, bh);
 		if (IS_SYNC(inode))
 			handle->h_sync = 1;
-		vfs_dq_free_block(inode, 1);
+		dquot_free_block(inode, 1);
 		ea_bdebug(bh, "refcount now=%d; releasing",
 			  le32_to_cpu(BHDR(bh)->h_refcount));
 		if (ce)
@@ -775,8 +775,8 @@ inserted:
 			else {
 				/* The old block is released after updating
 				   the inode. */
-				error = -EDQUOT;
-				if (vfs_dq_alloc_block(inode, 1))
+				error = dquot_alloc_block(inode, 1);
+				if (error)
 					goto cleanup;
 				error = ext3_journal_get_write_access(handle,
 								      new_bh);
@@ -850,7 +850,7 @@ cleanup:
 	return error;
 
 cleanup_dquot:
-	vfs_dq_free_block(inode, 1);
+	dquot_free_block(inode, 1);
 	goto cleanup;
 
 bad_block:
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e11952404e02..9f607ea411c8 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1093,9 +1093,9 @@ void ext4_da_update_reserve_space(struct inode *inode,
 
 	/* Update quota subsystem */
 	if (quota_claim) {
-		vfs_dq_claim_block(inode, used);
+		dquot_claim_block(inode, used);
 		if (mdb_free)
-			vfs_dq_release_reservation_block(inode, mdb_free);
+			dquot_release_reservation_block(inode, mdb_free);
 	} else {
 		/*
 		 * We did fallocate with an offset that is already delayed
@@ -1106,8 +1106,8 @@ void ext4_da_update_reserve_space(struct inode *inode,
 		 * that
 		 */
 		if (allocated_meta_blocks)
-			vfs_dq_claim_block(inode, allocated_meta_blocks);
-		vfs_dq_release_reservation_block(inode, mdb_free + used);
+			dquot_claim_block(inode, allocated_meta_blocks);
+		dquot_release_reservation_block(inode, mdb_free + used);
 	}
 
 	/*
@@ -1836,6 +1836,7 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	unsigned long md_needed, md_reserved;
+	int ret;
 
 	/*
 	 * recalculate the amount of metadata blocks to reserve
@@ -1853,11 +1854,12 @@ repeat:
 	 * later. Real quota accounting is done at pages writeout
 	 * time.
 	 */
-	if (vfs_dq_reserve_block(inode, md_needed + 1))
-		return -EDQUOT;
+	ret = dquot_reserve_block(inode, md_needed + 1);
+	if (ret)
+		return ret;
 
 	if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
-		vfs_dq_release_reservation_block(inode, md_needed + 1);
+		dquot_release_reservation_block(inode, md_needed + 1);
 		if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
 			yield();
 			goto repeat;
@@ -1914,7 +1916,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
 
 	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 
-	vfs_dq_release_reservation_block(inode, to_free);
+	dquot_release_reservation_block(inode, to_free);
 }
 
 static void ext4_da_page_release_reservation(struct page *page,
@@ -5641,7 +5643,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
  * i_size has been changed by generic_commit_write() and we thus need
  * to include the updated inode in the current transaction.
  *
- * Also, vfs_dq_alloc_block() will always dirty the inode when blocks
+ * Also, dquot_alloc_block() will always dirty the inode when blocks
  * are allocated to the file.
  *
  * If the inode is marked synchronous, we don't honour that here - doing
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index d34afad3e137..0b905781e8e6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4254,7 +4254,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
 			return 0;
 		}
 		reserv_blks = ar->len;
-		while (ar->len && vfs_dq_alloc_block(ar->inode, ar->len)) {
+		while (ar->len && dquot_alloc_block(ar->inode, ar->len)) {
 			ar->flags |= EXT4_MB_HINT_NOPREALLOC;
 			ar->len--;
 		}
@@ -4331,7 +4331,7 @@ out2:
 	kmem_cache_free(ext4_ac_cachep, ac);
 out1:
 	if (inquota && ar->len < inquota)
-		vfs_dq_free_block(ar->inode, inquota - ar->len);
+		dquot_free_block(ar->inode, inquota - ar->len);
 out3:
 	if (!ar->len) {
 		if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
@@ -4646,7 +4646,7 @@ do_more:
 	sb->s_dirt = 1;
 error_return:
 	if (freed)
-		vfs_dq_free_block(inode, freed);
+		dquot_free_block(inode, freed);
 	brelse(bitmap_bh);
 	ext4_std_error(sb, err);
 	if (ac)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 735c20d5fd56..fa8f4deda652 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1014,15 +1014,10 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 static const struct dquot_operations ext4_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.alloc_space	= dquot_alloc_space,
-	.reserve_space	= dquot_reserve_space,
-	.claim_space	= dquot_claim_space,
-	.release_rsv	= dquot_release_reserved_space,
 #ifdef CONFIG_QUOTA
 	.get_reserved_space = ext4_get_reserved_space,
 #endif
 	.alloc_inode	= dquot_alloc_inode,
-	.free_space	= dquot_free_space,
 	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
 	.write_dquot	= ext4_write_dquot,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index f3a2f7ed45aa..ab3a95ee5e7e 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -494,7 +494,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 		error = ext4_handle_dirty_metadata(handle, inode, bh);
 		if (IS_SYNC(inode))
 			ext4_handle_sync(handle);
-		vfs_dq_free_block(inode, 1);
+		dquot_free_block(inode, 1);
 		ea_bdebug(bh, "refcount now=%d; releasing",
 			  le32_to_cpu(BHDR(bh)->h_refcount));
 		if (ce)
@@ -787,8 +787,8 @@ inserted:
 			else {
 				/* The old block is released after updating
 				   the inode. */
-				error = -EDQUOT;
-				if (vfs_dq_alloc_block(inode, 1))
+				error = dquot_alloc_block(inode, 1);
+				if (error)
 					goto cleanup;
 				error = ext4_journal_get_write_access(handle,
 								      new_bh);
@@ -876,7 +876,7 @@ cleanup:
 	return error;
 
 cleanup_dquot:
-	vfs_dq_free_block(inode, 1);
+	dquot_free_block(inode, 1);
 	goto cleanup;
 
 bad_block:
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 925871e9887b..0e4623be70ce 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -381,10 +381,10 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
 		 * It's time to move the inline table to an external
 		 * page and begin to build the xtree
 		 */
-		if (vfs_dq_alloc_block(ip, sbi->nbperpage))
+		if (dquot_alloc_block(ip, sbi->nbperpage))
 			goto clean_up;
 		if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
-			vfs_dq_free_block(ip, sbi->nbperpage);
+			dquot_free_block(ip, sbi->nbperpage);
 			goto clean_up;
 		}
 
@@ -408,7 +408,7 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
 			memcpy(&jfs_ip->i_dirtable, temp_table,
 			       sizeof (temp_table));
 			dbFree(ip, xaddr, sbi->nbperpage);
-			vfs_dq_free_block(ip, sbi->nbperpage);
+			dquot_free_block(ip, sbi->nbperpage);
 			goto clean_up;
 		}
 		ip->i_size = PSIZE;
@@ -1027,10 +1027,9 @@ static int dtSplitUp(tid_t tid,
 			n = xlen;
 
 		/* Allocate blocks to quota. */
-		if (vfs_dq_alloc_block(ip, n)) {
-			rc = -EDQUOT;
+		rc = dquot_alloc_block(ip, n);
+		if (rc)
 			goto extendOut;
-		}
 		quota_allocation += n;
 
 		if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
@@ -1308,7 +1307,7 @@ static int dtSplitUp(tid_t tid,
 
 	/* Rollback quota allocation */
 	if (rc && quota_allocation)
-		vfs_dq_free_block(ip, quota_allocation);
+		dquot_free_block(ip, quota_allocation);
 
       dtSplitUp_Exit:
 
@@ -1369,9 +1368,10 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
 		return -EIO;
 
 	/* Allocate blocks to quota. */
-	if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) {
+	rc = dquot_alloc_block(ip, lengthPXD(pxd));
+	if (rc) {
 		release_metapage(rmp);
-		return -EDQUOT;
+		return rc;
 	}
 
 	jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
@@ -1892,6 +1892,7 @@ static int dtSplitRoot(tid_t tid,
 	struct dt_lock *dtlck;
 	struct tlock *tlck;
 	struct lv *lv;
+	int rc;
 
 	/* get split root page */
 	smp = split->mp;
@@ -1916,9 +1917,10 @@ static int dtSplitRoot(tid_t tid,
 	rp = rmp->data;
 
 	/* Allocate blocks to quota. */
-	if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) {
+	rc = dquot_alloc_block(ip, lengthPXD(pxd));
+	if (rc) {
 		release_metapage(rmp);
-		return -EDQUOT;
+		return rc;
 	}
 
 	BT_MARK_DIRTY(rmp, ip);
@@ -2287,7 +2289,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
 	xlen = lengthPXD(&fp->header.self);
 
 	/* Free quota allocation. */
-	vfs_dq_free_block(ip, xlen);
+	dquot_free_block(ip, xlen);
 
 	/* free/invalidate its buffer page */
 	discard_metapage(fmp);
@@ -2363,7 +2365,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
 				xlen = lengthPXD(&p->header.self);
 
 				/* Free quota allocation */
-				vfs_dq_free_block(ip, xlen);
+				dquot_free_block(ip, xlen);
 
 				/* free/invalidate its buffer page */
 				discard_metapage(mp);
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 41d6045dbeb0..5d3bbd10f8db 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -141,10 +141,11 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
 	}
 
 	/* Allocate blocks to quota. */
-	if (vfs_dq_alloc_block(ip, nxlen)) {
+	rc = dquot_alloc_block(ip, nxlen);
+	if (rc) {
 		dbFree(ip, nxaddr, (s64) nxlen);
 		mutex_unlock(&JFS_IP(ip)->commit_mutex);
-		return -EDQUOT;
+		return rc;
 	}
 
 	/* determine the value of the extent flag */
@@ -164,7 +165,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
 	 */
 	if (rc) {
 		dbFree(ip, nxaddr, nxlen);
-		vfs_dq_free_block(ip, nxlen);
+		dquot_free_block(ip, nxlen);
 		mutex_unlock(&JFS_IP(ip)->commit_mutex);
 		return (rc);
 	}
@@ -256,10 +257,11 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
 		goto exit;
 
 	/* Allocat blocks to quota. */
-	if (vfs_dq_alloc_block(ip, nxlen)) {
+	rc = dquot_alloc_block(ip, nxlen);
+	if (rc) {
 		dbFree(ip, nxaddr, (s64) nxlen);
 		mutex_unlock(&JFS_IP(ip)->commit_mutex);
-		return -EDQUOT;
+		return rc;
 	}
 
 	delta = nxlen - xlen;
@@ -297,7 +299,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
 		/* extend the extent */
 		if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) {
 			dbFree(ip, xaddr + xlen, delta);
-			vfs_dq_free_block(ip, nxlen);
+			dquot_free_block(ip, nxlen);
 			goto exit;
 		}
 	} else {
@@ -308,7 +310,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
 		 */
 		if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) {
 			dbFree(ip, nxaddr, nxlen);
-			vfs_dq_free_block(ip, nxlen);
+			dquot_free_block(ip, nxlen);
 			goto exit;
 		}
 	}
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index d654a6458648..6c50871e6220 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -585,10 +585,10 @@ int xtInsert(tid_t tid,		/* transaction id */
 			hint = addressXAD(xad) + lengthXAD(xad) - 1;
 		} else
 			hint = 0;
-		if ((rc = vfs_dq_alloc_block(ip, xlen)))
+		if ((rc = dquot_alloc_block(ip, xlen)))
 			goto out;
 		if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
-			vfs_dq_free_block(ip, xlen);
+			dquot_free_block(ip, xlen);
 			goto out;
 		}
 	}
@@ -617,7 +617,7 @@ int xtInsert(tid_t tid,		/* transaction id */
 			/* undo data extent allocation */
 			if (*xaddrp == 0) {
 				dbFree(ip, xaddr, (s64) xlen);
-				vfs_dq_free_block(ip, xlen);
+				dquot_free_block(ip, xlen);
 			}
 			return rc;
 		}
@@ -985,10 +985,9 @@ xtSplitPage(tid_t tid, struct inode *ip,
 	rbn = addressPXD(pxd);
 
 	/* Allocate blocks to quota. */
-	if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) {
-		rc = -EDQUOT;
+	rc = dquot_alloc_block(ip, lengthPXD(pxd));
+	if (rc)
 		goto clean_up;
-	}
 
 	quota_allocation += lengthPXD(pxd);
 
@@ -1195,7 +1194,7 @@ xtSplitPage(tid_t tid, struct inode *ip,
 
 	/* Rollback quota allocation. */
 	if (quota_allocation)
-		vfs_dq_free_block(ip, quota_allocation);
+		dquot_free_block(ip, quota_allocation);
 
 	return (rc);
 }
@@ -1235,6 +1234,7 @@ xtSplitRoot(tid_t tid,
 	struct pxdlist *pxdlist;
 	struct tlock *tlck;
 	struct xtlock *xtlck;
+	int rc;
 
 	sp = &JFS_IP(ip)->i_xtroot;
 
@@ -1252,9 +1252,10 @@ xtSplitRoot(tid_t tid,
 		return -EIO;
 
 	/* Allocate blocks to quota. */
-	if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) {
+	rc = dquot_alloc_block(ip, lengthPXD(pxd));
+	if (rc) {
 		release_metapage(rmp);
-		return -EDQUOT;
+		return rc;
 	}
 
 	jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
@@ -3680,7 +3681,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
 		ip->i_size = newsize;
 
 	/* update quota allocation to reflect freed blocks */
-	vfs_dq_free_block(ip, nfreed);
+	dquot_free_block(ip, nfreed);
 
 	/*
 	 * free tlock of invalidated pages
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index fad364548bc9..1f594ab21895 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -260,14 +260,14 @@ static int ea_write(struct inode *ip, struct jfs_ea_list *ealist, int size,
 	nblocks = (size + (sb->s_blocksize - 1)) >> sb->s_blocksize_bits;
 
 	/* Allocate new blocks to quota. */
-	if (vfs_dq_alloc_block(ip, nblocks)) {
-		return -EDQUOT;
-	}
+	rc = dquot_alloc_block(ip, nblocks);
+	if (rc)
+		return rc;
 
 	rc = dbAlloc(ip, INOHINT(ip), nblocks, &blkno);
 	if (rc) {
 		/*Rollback quota allocation. */
-		vfs_dq_free_block(ip, nblocks);
+		dquot_free_block(ip, nblocks);
 		return rc;
 	}
 
@@ -332,7 +332,7 @@ static int ea_write(struct inode *ip, struct jfs_ea_list *ealist, int size,
 
       failed:
 	/* Rollback quota allocation. */
-	vfs_dq_free_block(ip, nblocks);
+	dquot_free_block(ip, nblocks);
 
 	dbFree(ip, blkno, nblocks);
 	return rc;
@@ -538,7 +538,8 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
 
 	if (blocks_needed > current_blocks) {
 		/* Allocate new blocks to quota. */
-		if (vfs_dq_alloc_block(inode, blocks_needed))
+		rc = dquot_alloc_block(inode, blocks_needed);
+		if (rc)
 			return -EDQUOT;
 
 		quota_allocation = blocks_needed;
@@ -602,7 +603,7 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
       clean_up:
 	/* Rollback quota allocation */
 	if (quota_allocation)
-		vfs_dq_free_block(inode, quota_allocation);
+		dquot_free_block(inode, quota_allocation);
 
 	return (rc);
 }
@@ -677,7 +678,7 @@ static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf,
 
 	/* If old blocks exist, they must be removed from quota allocation. */
 	if (old_blocks)
-		vfs_dq_free_block(inode, old_blocks);
+		dquot_free_block(inode, old_blocks);
 
 	inode->i_ctime = CURRENT_TIME;
 
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d17bdc718f74..20538dd832a4 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5712,7 +5712,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
 		goto out;
 	}
 
-	vfs_dq_free_space_nodirty(inode,
+	dquot_free_space_nodirty(inode,
 				  ocfs2_clusters_to_bytes(inode->i_sb, len));
 
 	ret = ocfs2_remove_extent(handle, et, cpos, len, meta_ac, dealloc);
@@ -6935,7 +6935,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	vfs_dq_free_space_nodirty(inode,
+	dquot_free_space_nodirty(inode,
 			ocfs2_clusters_to_bytes(osb->sb, clusters_to_del));
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
@@ -7300,11 +7300,10 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		unsigned int page_end;
 		u64 phys;
 
-		if (vfs_dq_alloc_space_nodirty(inode,
-				       ocfs2_clusters_to_bytes(osb->sb, 1))) {
-			ret = -EDQUOT;
+		ret = dquot_alloc_space_nodirty(inode,
+				       ocfs2_clusters_to_bytes(osb->sb, 1));
+		if (ret)
 			goto out_commit;
-		}
 		did_quota = 1;
 
 		ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
@@ -7380,7 +7379,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 
 out_commit:
 	if (ret < 0 && did_quota)
-		vfs_dq_free_space_nodirty(inode,
+		dquot_free_space_nodirty(inode,
 					  ocfs2_clusters_to_bytes(osb->sb, 1));
 
 	ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 7e9df11260f4..7d04c171567d 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1763,10 +1763,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
 
 	wc->w_handle = handle;
 
-	if (clusters_to_alloc && vfs_dq_alloc_space_nodirty(inode,
-			ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc))) {
-		ret = -EDQUOT;
-		goto out_commit;
+	if (clusters_to_alloc) {
+		ret = dquot_alloc_space_nodirty(inode,
+			ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc));
+		if (ret)
+			goto out_commit;
 	}
 	/*
 	 * We don't want this to fail in ocfs2_write_end(), so do it
@@ -1809,7 +1810,7 @@ success:
 	return 0;
 out_quota:
 	if (clusters_to_alloc)
-		vfs_dq_free_space(inode,
+		dquot_free_space(inode,
 			  ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc));
 out_commit:
 	ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 28c3ec238796..a63ea4d74e67 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2964,12 +2964,10 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 		goto out;
 	}
 
-	if (vfs_dq_alloc_space_nodirty(dir,
-				ocfs2_clusters_to_bytes(osb->sb,
-							alloc + dx_alloc))) {
-		ret = -EDQUOT;
+	ret = dquot_alloc_space_nodirty(dir,
+		ocfs2_clusters_to_bytes(osb->sb, alloc + dx_alloc));
+	if (ret)
 		goto out_commit;
-	}
 	did_quota = 1;
 
 	if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
@@ -3178,7 +3176,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 
 out_commit:
 	if (ret < 0 && did_quota)
-		vfs_dq_free_space_nodirty(dir, bytes_allocated);
+		dquot_free_space_nodirty(dir, bytes_allocated);
 
 	ocfs2_commit_trans(osb, handle);
 
@@ -3221,11 +3219,10 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
 	if (extend) {
 		u32 offset = OCFS2_I(dir)->ip_clusters;
 
-		if (vfs_dq_alloc_space_nodirty(dir,
-					ocfs2_clusters_to_bytes(sb, 1))) {
-			status = -EDQUOT;
+		status = dquot_alloc_space_nodirty(dir,
+					ocfs2_clusters_to_bytes(sb, 1));
+		if (status)
 			goto bail;
-		}
 		did_quota = 1;
 
 		status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
@@ -3254,7 +3251,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
 	status = 0;
 bail:
 	if (did_quota && status < 0)
-		vfs_dq_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
+		dquot_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
 	mlog_exit(status);
 	return status;
 }
@@ -3889,11 +3886,10 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
 		goto out;
 	}
 
-	if (vfs_dq_alloc_space_nodirty(dir,
-				       ocfs2_clusters_to_bytes(dir->i_sb, 1))) {
-		ret = -EDQUOT;
+	ret = dquot_alloc_space_nodirty(dir,
+				       ocfs2_clusters_to_bytes(dir->i_sb, 1));
+	if (ret)
 		goto out_commit;
-	}
 	did_quota = 1;
 
 	ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
@@ -3983,7 +3979,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
 
 out_commit:
 	if (ret < 0 && did_quota)
-		vfs_dq_free_space_nodirty(dir,
+		dquot_free_space_nodirty(dir,
 				ocfs2_clusters_to_bytes(dir->i_sb, 1));
 
 	ocfs2_commit_trans(osb, handle);
@@ -4165,11 +4161,10 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
 		goto out;
 	}
 
-	if (vfs_dq_alloc_space_nodirty(dir,
-				       ocfs2_clusters_to_bytes(osb->sb, 1))) {
-		ret = -EDQUOT;
+	ret = dquot_alloc_space_nodirty(dir,
+				       ocfs2_clusters_to_bytes(osb->sb, 1));
+	if (ret)
 		goto out_commit;
-	}
 	did_quota = 1;
 
 	/*
@@ -4229,7 +4224,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
 
 out_commit:
 	if (ret < 0 && did_quota)
-		vfs_dq_free_space_nodirty(dir,
+		dquot_free_space_nodirty(dir,
 					  ocfs2_clusters_to_bytes(dir->i_sb, 1));
 
 	ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 558ce0312421..6cf3d8d18369 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -629,11 +629,10 @@ restart_all:
 	}
 
 restarted_transaction:
-	if (vfs_dq_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb,
-	    clusters_to_add))) {
-		status = -EDQUOT;
+	status = dquot_alloc_space_nodirty(inode,
+			ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
+	if (status)
 		goto leave;
-	}
 	did_quota = 1;
 
 	/* reserve a write to the file entry early on - that we if we
@@ -674,7 +673,7 @@ restarted_transaction:
 	clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 	/* Release unused quota reservation */
-	vfs_dq_free_space(inode,
+	dquot_free_space(inode,
 			ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
 	did_quota = 0;
 
@@ -710,7 +709,7 @@ restarted_transaction:
 
 leave:
 	if (status < 0 && did_quota)
-		vfs_dq_free_space(inode,
+		dquot_free_space(inode,
 			ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
 	if (handle) {
 		ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 50fb26a6a5f5..13adaa1f40cd 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1716,11 +1716,10 @@ static int ocfs2_symlink(struct inode *dir,
 		u32 offset = 0;
 
 		inode->i_op = &ocfs2_symlink_inode_operations;
-		if (vfs_dq_alloc_space_nodirty(inode,
-		    ocfs2_clusters_to_bytes(osb->sb, 1))) {
-			status = -EDQUOT;
+		status = dquot_alloc_space_nodirty(inode,
+		    ocfs2_clusters_to_bytes(osb->sb, 1));
+		if (status)
 			goto bail;
-		}
 		did_quota = 1;
 		status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0,
 					      new_fe_bh,
@@ -1788,7 +1787,7 @@ static int ocfs2_symlink(struct inode *dir,
 	d_instantiate(dentry, inode);
 bail:
 	if (status < 0 && did_quota)
-		vfs_dq_free_space_nodirty(inode,
+		dquot_free_space_nodirty(inode,
 					ocfs2_clusters_to_bytes(osb->sb, 1));
 	if (status < 0 && did_quota_inode)
 		vfs_dq_free_inode(inode);
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index b437dc0c4cad..aa66fb277225 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -853,9 +853,7 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
 const struct dquot_operations ocfs2_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.alloc_space	= dquot_alloc_space,
 	.alloc_inode	= dquot_alloc_inode,
-	.free_space	= dquot_free_space,
 	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
 	.write_dquot	= ocfs2_write_dquot,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 10d021dd37c1..baf202c012cc 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1464,28 +1464,29 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
 }
 
 /*
- * Following four functions update i_blocks+i_bytes fields and
- * quota information (together with appropriate checks)
- * NOTE: We absolutely rely on the fact that caller dirties
- * the inode (usually macros in quotaops.h care about this) and
- * holds a handle for the current transaction so that dquot write and
- * inode write go into the same transaction.
+ * This functions updates i_blocks+i_bytes fields and quota information
+ * (together with appropriate checks).
+ *
+ * NOTE: We absolutely rely on the fact that caller dirties the inode
+ * (usually helpers in quotaops.h care about this) and holds a handle for
+ * the current transaction so that dquot write and inode write go into the
+ * same transaction.
  */
 
 /*
  * This operation can block, but only after everything is updated
  */
 int __dquot_alloc_space(struct inode *inode, qsize_t number,
-			int warn, int reserve)
+		int warn, int reserve)
 {
-	int cnt, ret = QUOTA_OK;
+	int cnt, ret = 0;
 	char warntype[MAXQUOTAS];
 
 	/*
 	 * First test before acquiring mutex - solves deadlocks when we
 	 * re-enter the quota code and are already holding the mutex
 	 */
-	if (IS_NOQUOTA(inode)) {
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
 		inode_incr_space(inode, number, reserve);
 		goto out;
 	}
@@ -1498,9 +1499,9 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
-		if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
-		    == NO_QUOTA) {
-			ret = NO_QUOTA;
+		if (check_bdq(inode->i_dquot[cnt], number, !warn, warntype+cnt)
+				== NO_QUOTA) {
+			ret = -EDQUOT;
 			spin_unlock(&dq_data_lock);
 			goto out_flush_warn;
 		}
@@ -1525,18 +1526,7 @@ out_flush_warn:
 out:
 	return ret;
 }
-
-int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
-{
-	return __dquot_alloc_space(inode, number, warn, 0);
-}
-EXPORT_SYMBOL(dquot_alloc_space);
-
-int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
-{
-	return __dquot_alloc_space(inode, number, warn, 1);
-}
-EXPORT_SYMBOL(dquot_reserve_space);
+EXPORT_SYMBOL(__dquot_alloc_space);
 
 /*
  * This operation can block, but only after everything is updated
@@ -1578,14 +1568,16 @@ warn_put_all:
 }
 EXPORT_SYMBOL(dquot_alloc_inode);
 
-int dquot_claim_space(struct inode *inode, qsize_t number)
+/*
+ * Convert in-memory reserved quotas to real consumed quotas
+ */
+int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 {
 	int cnt;
-	int ret = QUOTA_OK;
 
-	if (IS_NOQUOTA(inode)) {
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
 		inode_claim_rsv_space(inode, number);
-		goto out;
+		return 0;
 	}
 
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1601,24 +1593,23 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
 	spin_unlock(&dq_data_lock);
 	mark_all_dquot_dirty(inode->i_dquot);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
-	return ret;
+	return 0;
 }
-EXPORT_SYMBOL(dquot_claim_space);
+EXPORT_SYMBOL(dquot_claim_space_nodirty);
 
 /*
  * This operation can block, but only after everything is updated
  */
-int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
+void __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
 {
 	unsigned int cnt;
 	char warntype[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
-	if (IS_NOQUOTA(inode)) {
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) {
 		inode_decr_space(inode, number, reserve);
-		return QUOTA_OK;
+		return;
 	}
 
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1641,24 +1632,8 @@ int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
 out_unlock:
 	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	return QUOTA_OK;
-}
-
-int dquot_free_space(struct inode *inode, qsize_t number)
-{
-	return  __dquot_free_space(inode, number, 0);
-}
-EXPORT_SYMBOL(dquot_free_space);
-
-/*
- * Release reserved quota space
- */
-void dquot_release_reserved_space(struct inode *inode, qsize_t number)
-{
-	__dquot_free_space(inode, number, 1);
-
 }
-EXPORT_SYMBOL(dquot_release_reserved_space);
+EXPORT_SYMBOL(__dquot_free_space);
 
 /*
  * This operation can block, but only after everything is updated
@@ -1840,9 +1815,7 @@ EXPORT_SYMBOL(dquot_commit_info);
 const struct dquot_operations dquot_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.alloc_space	= dquot_alloc_space,
 	.alloc_inode	= dquot_alloc_inode,
-	.free_space	= dquot_free_space,
 	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
 	.write_dquot	= dquot_commit,
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 65c872761177..dc014f7def05 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -425,7 +425,7 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
 
 	journal_mark_dirty(th, s, sbh);
 	if (for_unformatted)
-		vfs_dq_free_block_nodirty(inode, 1);
+		dquot_free_block_nodirty(inode, 1);
 }
 
 void reiserfs_free_block(struct reiserfs_transaction_handle *th,
@@ -1049,7 +1049,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
 			       amount_needed, hint->inode->i_uid);
 #endif
 		quota_ret =
-		    vfs_dq_alloc_block_nodirty(hint->inode, amount_needed);
+		    dquot_alloc_block_nodirty(hint->inode, amount_needed);
 		if (quota_ret)	/* Quota exceeded? */
 			return QUOTA_EXCEEDED;
 		if (hint->preallocate && hint->prealloc_size) {
@@ -1058,7 +1058,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
 				       "reiserquota: allocating (prealloc) %d blocks id=%u",
 				       hint->prealloc_size, hint->inode->i_uid);
 #endif
-			quota_ret = vfs_dq_prealloc_block_nodirty(hint->inode,
+			quota_ret = dquot_prealloc_block_nodirty(hint->inode,
 							 hint->prealloc_size);
 			if (quota_ret)
 				hint->preallocate = hint->prealloc_size = 0;
@@ -1092,7 +1092,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
 					       hint->inode->i_uid);
 #endif
 				/* Free not allocated blocks */
-				vfs_dq_free_block_nodirty(hint->inode,
+				dquot_free_block_nodirty(hint->inode,
 					amount_needed + hint->prealloc_size -
 					nr_allocated);
 			}
@@ -1125,7 +1125,7 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
 			       REISERFS_I(hint->inode)->i_prealloc_count,
 			       hint->inode->i_uid);
 #endif
-		vfs_dq_free_block_nodirty(hint->inode, amount_needed +
+		dquot_free_block_nodirty(hint->inode, amount_needed +
 					 hint->prealloc_size - nr_allocated -
 					 REISERFS_I(hint->inode)->
 					 i_prealloc_count);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 5fa7118f04e1..313d39d639eb 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1299,7 +1299,7 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
 		       "reiserquota delete_item(): freeing %u, id=%u type=%c",
 		       quota_cut_bytes, inode->i_uid, head2type(&s_ih));
 #endif
-	vfs_dq_free_space_nodirty(inode, quota_cut_bytes);
+	dquot_free_space_nodirty(inode, quota_cut_bytes);
 
 	/* Return deleted body length */
 	return ret_value;
@@ -1383,7 +1383,7 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
 					       quota_cut_bytes, inode->i_uid,
 					       key2type(key));
 #endif
-				vfs_dq_free_space_nodirty(inode,
+				dquot_free_space_nodirty(inode,
 							 quota_cut_bytes);
 			}
 			break;
@@ -1733,7 +1733,7 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
 		       "reiserquota cut_from_item(): freeing %u id=%u type=%c",
 		       quota_cut_bytes, inode->i_uid, '?');
 #endif
-	vfs_dq_free_space_nodirty(inode, quota_cut_bytes);
+	dquot_free_space_nodirty(inode, quota_cut_bytes);
 	return ret_value;
 }
 
@@ -1968,9 +1968,10 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
 		       key2type(&(key->on_disk_key)));
 #endif
 
-	if (vfs_dq_alloc_space_nodirty(inode, pasted_size)) {
+	retval = dquot_alloc_space_nodirty(inode, pasted_size);
+	if (retval) {
 		pathrelse(search_path);
-		return -EDQUOT;
+		return retval;
 	}
 	init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
 		       pasted_size);
@@ -2024,7 +2025,7 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
 		       pasted_size, inode->i_uid,
 		       key2type(&(key->on_disk_key)));
 #endif
-	vfs_dq_free_space_nodirty(inode, pasted_size);
+	dquot_free_space_nodirty(inode, pasted_size);
 	return retval;
 }
 
@@ -2062,9 +2063,10 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
 #endif
 		/* We can't dirty inode here. It would be immediately written but
 		 * appropriate stat item isn't inserted yet... */
-		if (vfs_dq_alloc_space_nodirty(inode, quota_bytes)) {
+		retval = dquot_alloc_space_nodirty(inode, quota_bytes);
+		if (retval) {
 			pathrelse(path);
-			return -EDQUOT;
+			return retval;
 		}
 	}
 	init_tb_struct(th, &s_ins_balance, th->t_super, path,
@@ -2113,6 +2115,6 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
 		       quota_bytes, inode->i_uid, head2type(ih));
 #endif
 	if (inode)
-		vfs_dq_free_space_nodirty(inode, quota_bytes);
+		dquot_free_space_nodirty(inode, quota_bytes);
 	return retval;
 }
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b4a7dd03bdb9..ea4a77e9d7f5 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -618,9 +618,7 @@ static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
 static const struct dquot_operations reiserfs_quota_operations = {
 	.initialize = dquot_initialize,
 	.drop = dquot_drop,
-	.alloc_space = dquot_alloc_space,
 	.alloc_inode = dquot_alloc_inode,
-	.free_space = dquot_free_space,
 	.free_inode = dquot_free_inode,
 	.transfer = dquot_transfer,
 	.write_dquot = reiserfs_write_dquot,
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 82372e332f08..e2ff180173a2 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -208,7 +208,7 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
 					((char *)bh->b_data)[(bit + i) >> 3]);
 			} else {
 				if (inode)
-					vfs_dq_free_block(inode, 1);
+					dquot_free_block(inode, 1);
 				udf_add_free_space(sb, sbi->s_partition, 1);
 			}
 		}
@@ -260,11 +260,11 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb,
 		while (bit < (sb->s_blocksize << 3) && block_count > 0) {
 			if (!udf_test_bit(bit, bh->b_data))
 				goto out;
-			else if (vfs_dq_prealloc_block(inode, 1))
+			else if (dquot_prealloc_block(inode, 1))
 				goto out;
 			else if (!udf_clear_bit(bit, bh->b_data)) {
 				udf_debug("bit already cleared for block %d\n", bit);
-				vfs_dq_free_block(inode, 1);
+				dquot_free_block(inode, 1);
 				goto out;
 			}
 			block_count--;
@@ -390,10 +390,14 @@ got_block:
 	/*
 	 * Check quota for allocation of this block.
 	 */
-	if (inode && vfs_dq_alloc_block(inode, 1)) {
-		mutex_unlock(&sbi->s_alloc_mutex);
-		*err = -EDQUOT;
-		return 0;
+	if (inode) {
+		int ret = dquot_alloc_block(inode, 1);
+
+		if (ret) {
+			mutex_unlock(&sbi->s_alloc_mutex);
+			*err = ret;
+			return 0;
+		}
 	}
 
 	newblock = bit + (block_group << (sb->s_blocksize_bits + 3)) -
@@ -449,7 +453,7 @@ static void udf_table_free_blocks(struct super_block *sb,
 	/* We do this up front - There are some error conditions that
 	   could occure, but.. oh well */
 	if (inode)
-		vfs_dq_free_block(inode, count);
+		dquot_free_block(inode, count);
 	udf_add_free_space(sb, sbi->s_partition, count);
 
 	start = bloc->logicalBlockNum + offset;
@@ -694,7 +698,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
 		epos.offset -= adsize;
 
 		alloc_count = (elen >> sb->s_blocksize_bits);
-		if (inode && vfs_dq_prealloc_block(inode,
+		if (inode && dquot_prealloc_block(inode,
 			alloc_count > block_count ? block_count : alloc_count))
 			alloc_count = 0;
 		else if (alloc_count > block_count) {
@@ -797,12 +801,13 @@ static int udf_table_new_block(struct super_block *sb,
 	newblock = goal_eloc.logicalBlockNum;
 	goal_eloc.logicalBlockNum++;
 	goal_elen -= sb->s_blocksize;
-
-	if (inode && vfs_dq_alloc_block(inode, 1)) {
-		brelse(goal_epos.bh);
-		mutex_unlock(&sbi->s_alloc_mutex);
-		*err = -EDQUOT;
-		return 0;
+	if (inode) {
+		*err = dquot_alloc_block(inode, 1);
+		if (*err) {
+			brelse(goal_epos.bh);
+			mutex_unlock(&sbi->s_alloc_mutex);
+			return 0;
+		}
 	}
 
 	if (goal_elen)
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 54c16ec95dff..5cfa4d85ccf2 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -85,7 +85,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
 				   "bit already cleared for fragment %u", i);
 	}
 	
-	vfs_dq_free_block(inode, count);
+	dquot_free_block(inode, count);
 
 	
 	fs32_add(sb, &ucg->cg_cs.cs_nffree, count);
@@ -195,7 +195,7 @@ do_more:
 		ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
 		if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
 			ufs_clusteracct (sb, ucpi, blkno, 1);
-		vfs_dq_free_block(inode, uspi->s_fpb);
+		dquot_free_block(inode, uspi->s_fpb);
 
 		fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1);
 		uspi->cs_total.cs_nbfree++;
@@ -511,6 +511,7 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
 	struct ufs_cg_private_info * ucpi;
 	struct ufs_cylinder_group * ucg;
 	unsigned cgno, fragno, fragoff, count, fragsize, i;
+	int ret;
 	
 	UFSD("ENTER, fragment %llu, oldcount %u, newcount %u\n",
 	     (unsigned long long)fragment, oldcount, newcount);
@@ -556,8 +557,9 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
 		fs32_add(sb, &ucg->cg_frsum[fragsize - count], 1);
 	for (i = oldcount; i < newcount; i++)
 		ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i);
-	if (vfs_dq_alloc_block(inode, count)) {
-		*err = -EDQUOT;
+	ret = dquot_alloc_block(inode, count);
+	if (ret) {
+		*err = ret;
 		return 0;
 	}
 
@@ -596,6 +598,7 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno,
 	struct ufs_cylinder_group * ucg;
 	unsigned oldcg, i, j, k, allocsize;
 	u64 result;
+	int ret;
 	
 	UFSD("ENTER, ino %lu, cgno %u, goal %llu, count %u\n",
 	     inode->i_ino, cgno, (unsigned long long)goal, count);
@@ -664,7 +667,7 @@ cg_found:
 		for (i = count; i < uspi->s_fpb; i++)
 			ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i);
 		i = uspi->s_fpb - count;
-		vfs_dq_free_block(inode, i);
+		dquot_free_block(inode, i);
 
 		fs32_add(sb, &ucg->cg_cs.cs_nffree, i);
 		uspi->cs_total.cs_nffree += i;
@@ -676,8 +679,9 @@ cg_found:
 	result = ufs_bitmap_search (sb, ucpi, goal, allocsize);
 	if (result == INVBLOCK)
 		return 0;
-	if (vfs_dq_alloc_block(inode, count)) {
-		*err = -EDQUOT;
+	ret = dquot_alloc_block(inode, count);
+	if (ret) {
+		*err = ret;
 		return 0;
 	}
 	for (i = 0; i < count; i++)
@@ -714,6 +718,7 @@ static u64 ufs_alloccg_block(struct inode *inode,
 	struct ufs_super_block_first * usb1;
 	struct ufs_cylinder_group * ucg;
 	u64 result, blkno;
+	int ret;
 
 	UFSD("ENTER, goal %llu\n", (unsigned long long)goal);
 
@@ -747,8 +752,9 @@ gotit:
 	ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
 	if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
 		ufs_clusteracct (sb, ucpi, blkno, -1);
-	if (vfs_dq_alloc_block(inode, uspi->s_fpb)) {
-		*err = -EDQUOT;
+	ret = dquot_alloc_block(inode, uspi->s_fpb);
+	if (ret) {
+		*err = ret;
 		return INVBLOCK;
 	}
 
diff --git a/include/linux/quota.h b/include/linux/quota.h
index edf34f2fe87d..1b14ad287fe3 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -297,9 +297,7 @@ struct quota_format_ops {
 struct dquot_operations {
 	int (*initialize) (struct inode *, int);
 	int (*drop) (struct inode *);
-	int (*alloc_space) (struct inode *, qsize_t, int);
 	int (*alloc_inode) (const struct inode *, qsize_t);
-	int (*free_space) (struct inode *, qsize_t);
 	int (*free_inode) (const struct inode *, qsize_t);
 	int (*transfer) (struct inode *, qid_t *, unsigned long);
 	int (*write_dquot) (struct dquot *);		/* Ordinary dquot write */
@@ -309,12 +307,6 @@ struct dquot_operations {
 	int (*release_dquot) (struct dquot *);		/* Quota is going to be deleted from disk */
 	int (*mark_dirty) (struct dquot *);		/* Dquot is marked dirty */
 	int (*write_info) (struct super_block *, int);	/* Write of quota "superblock" */
-	/* reserve quota for delayed block allocation */
-	int (*reserve_space) (struct inode *, qsize_t, int);
-	/* claim reserved quota for delayed alloc */
-	int (*claim_space) (struct inode *, qsize_t);
-	/* release rsved quota for delayed alloc */
-	void (*release_rsv) (struct inode *, qsize_t);
 	/* get reserved quota for delayed alloc, value returned is managed by
 	 * quota code only */
 	qsize_t *(*get_reserved_space) (struct inode *);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index e1cae204b5d9..47e85682e118 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -33,14 +33,13 @@ int dquot_scan_active(struct super_block *sb,
 struct dquot *dquot_alloc(struct super_block *sb, int type);
 void dquot_destroy(struct dquot *dquot);
 
-int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
-int dquot_alloc_inode(const struct inode *inode, qsize_t number);
+int __dquot_alloc_space(struct inode *inode, qsize_t number,
+		int warn, int reserve);
+void __dquot_free_space(struct inode *inode, qsize_t number, int reserve);
 
-int dquot_reserve_space(struct inode *inode, qsize_t number, int prealloc);
-int dquot_claim_space(struct inode *inode, qsize_t number);
-void dquot_release_reserved_space(struct inode *inode, qsize_t number);
+int dquot_alloc_inode(const struct inode *inode, qsize_t number);
 
-int dquot_free_space(struct inode *inode, qsize_t number);
+int dquot_claim_space_nodirty(struct inode *inode, qsize_t number);
 int dquot_free_inode(const struct inode *inode, qsize_t number);
 
 int dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask);
@@ -149,60 +148,6 @@ static inline void vfs_dq_init(struct inode *inode)
 		inode->i_sb->dq_op->initialize(inode, -1);
 }
 
-/* The following allocation/freeing/transfer functions *must* be called inside
- * a transaction (deadlocks possible otherwise) */
-static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb)) {
-		/* Used space is updated in alloc_space() */
-		if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA)
-			return 1;
-	}
-	else
-		inode_add_bytes(inode, nr);
-	return 0;
-}
-
-static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
-{
-	int ret;
-        if (!(ret =  vfs_dq_prealloc_space_nodirty(inode, nr)))
-		mark_inode_dirty(inode);
-	return ret;
-}
-
-static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb)) {
-		/* Used space is updated in alloc_space() */
-		if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA)
-			return 1;
-	}
-	else
-		inode_add_bytes(inode, nr);
-	return 0;
-}
-
-static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
-{
-	int ret;
-	if (!(ret = vfs_dq_alloc_space_nodirty(inode, nr)))
-		mark_inode_dirty(inode);
-	return ret;
-}
-
-static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb)) {
-		/* Used space is updated in alloc_space() */
-		if (inode->i_sb->dq_op->reserve_space(inode, nr, 0) == NO_QUOTA)
-			return 1;
-	}
-	else
-		inode_add_rsv_space(inode, nr);
-	return 0;
-}
-
 static inline int vfs_dq_alloc_inode(struct inode *inode)
 {
 	if (sb_any_quota_active(inode->i_sb)) {
@@ -213,47 +158,6 @@ static inline int vfs_dq_alloc_inode(struct inode *inode)
 	return 0;
 }
 
-/*
- * Convert in-memory reserved quotas to real consumed quotas
- */
-static inline int vfs_dq_claim_space(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb)) {
-		if (inode->i_sb->dq_op->claim_space(inode, nr) == NO_QUOTA)
-			return 1;
-	} else
-		inode_claim_rsv_space(inode, nr);
-
-	mark_inode_dirty(inode);
-	return 0;
-}
-
-/*
- * Release reserved (in-memory) quotas
- */
-static inline
-void vfs_dq_release_reservation_space(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb))
-		inode->i_sb->dq_op->release_rsv(inode, nr);
-	else
-		inode_sub_rsv_space(inode, nr);
-}
-
-static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
-{
-	if (sb_any_quota_active(inode->i_sb))
-		inode->i_sb->dq_op->free_space(inode, nr);
-	else
-		inode_sub_bytes(inode, nr);
-}
-
-static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
-{
-	vfs_dq_free_space_nodirty(inode, nr);
-	mark_inode_dirty(inode);
-}
-
 static inline void vfs_dq_free_inode(struct inode *inode)
 {
 	if (sb_any_quota_active(inode->i_sb))
@@ -351,105 +255,109 @@ static inline int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
 	return 0;
 }
 
-static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
+static inline int __dquot_alloc_space(struct inode *inode, qsize_t number,
+		int warn, int reserve)
 {
-	inode_add_bytes(inode, nr);
+	if (!reserve)
+		inode_add_bytes(inode, number);
 	return 0;
 }
 
-static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
+static inline void __dquot_free_space(struct inode *inode, qsize_t number,
+		int reserve)
 {
-	vfs_dq_prealloc_space_nodirty(inode, nr);
-	mark_inode_dirty(inode);
-	return 0;
+	if (!reserve)
+		inode_sub_bytes(inode, number);
 }
 
-static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
+static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 {
-	inode_add_bytes(inode, nr);
+	inode_add_bytes(inode, number);
 	return 0;
 }
 
-static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
+#endif /* CONFIG_QUOTA */
+
+static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
-	vfs_dq_alloc_space_nodirty(inode, nr);
-	mark_inode_dirty(inode);
-	return 0;
+	return __dquot_alloc_space(inode, nr, 1, 0);
 }
 
-static inline int vfs_dq_reserve_space(struct inode *inode, qsize_t nr)
+static inline int dquot_alloc_space(struct inode *inode, qsize_t nr)
 {
-	return 0;
+	int ret;
+
+	ret = dquot_alloc_space_nodirty(inode, nr);
+	if (!ret)
+		mark_inode_dirty(inode);
+	return ret;
 }
 
-static inline int vfs_dq_claim_space(struct inode *inode, qsize_t nr)
+static inline int dquot_alloc_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_alloc_space(inode, nr);
+	return dquot_alloc_space_nodirty(inode, nr << inode->i_blkbits);
 }
 
-static inline
-int vfs_dq_release_reservation_space(struct inode *inode, qsize_t nr)
+static inline int dquot_alloc_block(struct inode *inode, qsize_t nr)
 {
-	return 0;
+	return dquot_alloc_space(inode, nr << inode->i_blkbits);
 }
 
-static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
+static inline int dquot_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	inode_sub_bytes(inode, nr);
+	return __dquot_alloc_space(inode, nr << inode->i_blkbits, 0, 0);
 }
 
-static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
+static inline int dquot_prealloc_block(struct inode *inode, qsize_t nr)
 {
-	vfs_dq_free_space_nodirty(inode, nr);
-	mark_inode_dirty(inode);
-}	
-
-#endif /* CONFIG_QUOTA */
+	int ret;
 
-static inline int vfs_dq_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
-{
-	return vfs_dq_prealloc_space_nodirty(inode, nr << inode->i_blkbits);
+	ret = dquot_prealloc_block_nodirty(inode, nr);
+	if (!ret)
+		mark_inode_dirty(inode);
+	return ret;
 }
 
-static inline int vfs_dq_prealloc_block(struct inode *inode, qsize_t nr)
+static inline int dquot_reserve_block(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_prealloc_space(inode, nr << inode->i_blkbits);
+	return __dquot_alloc_space(inode, nr << inode->i_blkbits, 1, 1);
 }
 
-static inline int vfs_dq_alloc_block_nodirty(struct inode *inode, qsize_t nr)
+static inline int dquot_claim_block(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_alloc_space_nodirty(inode, nr << inode->i_blkbits);
-}
+	int ret;
 
-static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr)
-{
-	return vfs_dq_alloc_space(inode, nr << inode->i_blkbits);
+	ret = dquot_claim_space_nodirty(inode, nr << inode->i_blkbits);
+	if (!ret)
+		mark_inode_dirty(inode);
+	return ret;
 }
 
-static inline int vfs_dq_reserve_block(struct inode *inode, qsize_t nr)
+static inline void dquot_free_space_nodirty(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_reserve_space(inode, nr << inode->i_blkbits);
+	__dquot_free_space(inode, nr, 0);
 }
 
-static inline int vfs_dq_claim_block(struct inode *inode, qsize_t nr)
+static inline void dquot_free_space(struct inode *inode, qsize_t nr)
 {
-	return vfs_dq_claim_space(inode, nr << inode->i_blkbits);
+	dquot_free_space_nodirty(inode, nr);
+	mark_inode_dirty(inode);
 }
 
-static inline
-void vfs_dq_release_reservation_block(struct inode *inode, qsize_t nr)
+static inline void dquot_free_block_nodirty(struct inode *inode, qsize_t nr)
 {
-	vfs_dq_release_reservation_space(inode, nr << inode->i_blkbits);
+	dquot_free_space_nodirty(inode, nr << inode->i_blkbits);
 }
 
-static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr)
+static inline void dquot_free_block(struct inode *inode, qsize_t nr)
 {
-	vfs_dq_free_space_nodirty(inode, nr << inode->i_blkbits);
+	dquot_free_space(inode, nr << inode->i_blkbits);
 }
 
-static inline void vfs_dq_free_block(struct inode *inode, qsize_t nr)
+static inline void dquot_release_reservation_block(struct inode *inode,
+		qsize_t nr)
 {
-	vfs_dq_free_space(inode, nr << inode->i_blkbits);
+	__dquot_free_space(inode, nr << inode->i_blkbits, 1);
 }
 
 #endif /* _LINUX_QUOTAOPS_ */
-- 
cgit v1.2.3


From 63936ddaa16b9486e2d426ed7b09f559a5c60f87 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:01 -0500
Subject: dquot: cleanup inode allocation / freeing routines

Get rid of the alloc_inode and free_inode dquot operations - they are
always called from the filesystem and if a filesystem really needs
their own (which none currently does) it can just call into it's
own routine directly.

Also get rid of the vfs_dq_alloc/vfs_dq_free wrappers and always
call the lowlevel dquot_alloc_inode / dqout_free_inode routines
directly, which now lose the number argument which is always 1.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 Documentation/filesystems/Locking |  8 --------
 fs/ext2/ialloc.c                  | 10 +++++-----
 fs/ext3/ialloc.c                  | 10 +++++-----
 fs/ext3/super.c                   |  2 --
 fs/ext4/ialloc.c                  | 10 +++++-----
 fs/ext4/super.c                   |  2 --
 fs/jfs/inode.c                    |  2 +-
 fs/jfs/jfs_inode.c                |  6 +++---
 fs/ocfs2/inode.c                  |  2 +-
 fs/ocfs2/namei.c                  | 30 +++++++++---------------------
 fs/ocfs2/quota_global.c           |  2 --
 fs/quota/dquot.c                  | 29 +++++++++++++----------------
 fs/reiserfs/inode.c               | 10 +++++-----
 fs/reiserfs/super.c               |  2 --
 fs/udf/ialloc.c                   | 10 ++++++----
 fs/ufs/ialloc.c                   |  7 ++++---
 include/linux/quota.h             |  2 --
 include/linux/quotaops.h          | 24 ++++--------------------
 18 files changed, 61 insertions(+), 107 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 1192fde11638..4428f55f2131 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -462,8 +462,6 @@ in sys_read() and friends.
 prototypes:
 	int (*initialize) (struct inode *, int);
 	int (*drop) (struct inode *);
-	int (*alloc_inode) (const struct inode *, unsigned long);
-	int (*free_inode) (const struct inode *, unsigned long);
 	int (*transfer) (struct inode *, struct iattr *);
 	int (*write_dquot) (struct dquot *);
 	int (*acquire_dquot) (struct dquot *);
@@ -479,8 +477,6 @@ What filesystem should expect from the generic quota functions:
 		FS recursion	Held locks when called
 initialize:	yes		maybe dqonoff_sem
 drop:		yes		-
-alloc_inode:	->mark_dirty()	-
-free_inode:	->mark_dirty()	-
 transfer:	yes		-
 write_dquot:	yes		dqonoff_sem or dqptr_sem
 acquire_dquot:	yes		dqonoff_sem or dqptr_sem
@@ -491,10 +487,6 @@ write_info:	yes		dqonoff_sem
 FS recursion means calling ->quota_read() and ->quota_write() from superblock
 operations.
 
-->alloc_inode(), ->free_inode() are called
-only directly by the filesystem and do not call any fs functions only
-the ->mark_dirty() operation.
-
 More details about quota locking can be found in fs/dquot.c.
 
 --------------------------- vm_operations_struct -----------------------------
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 15387c9c17d8..d12f9809559c 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -121,7 +121,7 @@ void ext2_free_inode (struct inode * inode)
 	if (!is_bad_inode(inode)) {
 		/* Quota is already initialized in iput() */
 		ext2_xattr_delete_inode(inode);
-		vfs_dq_free_inode(inode);
+		dquot_free_inode(inode);
 		vfs_dq_drop(inode);
 	}
 
@@ -586,10 +586,10 @@ got:
 		goto fail_drop;
 	}
 
-	if (vfs_dq_alloc_inode(inode)) {
-		err = -EDQUOT;
+	vfs_dq_init(inode);
+	err = dquot_alloc_inode(inode);
+	if (err)
 		goto fail_drop;
-	}
 
 	err = ext2_init_acl(inode, dir);
 	if (err)
@@ -605,7 +605,7 @@ got:
 	return inode;
 
 fail_free_drop:
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 
 fail_drop:
 	vfs_dq_drop(inode);
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index b39991285136..8bf00e997c38 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -125,7 +125,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 	 */
 	vfs_dq_init(inode);
 	ext3_xattr_delete_inode(handle, inode);
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 	vfs_dq_drop(inode);
 
 	is_directory = S_ISDIR(inode->i_mode);
@@ -588,10 +588,10 @@ got:
 		sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
 
 	ret = inode;
-	if (vfs_dq_alloc_inode(inode)) {
-		err = -EDQUOT;
+	vfs_dq_init(inode);
+	err = dquot_alloc_inode(inode);
+	if (err)
 		goto fail_drop;
-	}
 
 	err = ext3_init_acl(handle, inode, dir);
 	if (err)
@@ -619,7 +619,7 @@ really_out:
 	return ret;
 
 fail_free_drop:
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 
 fail_drop:
 	vfs_dq_drop(inode);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8c13910a3782..8b8bc4f9cb14 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -752,8 +752,6 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 static const struct dquot_operations ext3_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.alloc_inode	= dquot_alloc_inode,
-	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
 	.write_dquot	= ext3_write_dquot,
 	.acquire_dquot	= ext3_acquire_dquot,
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f3624ead4f6c..b0d744cf8b95 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -219,7 +219,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 	 */
 	vfs_dq_init(inode);
 	ext4_xattr_delete_inode(handle, inode);
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 	vfs_dq_drop(inode);
 
 	is_directory = S_ISDIR(inode->i_mode);
@@ -1034,10 +1034,10 @@ got:
 	ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
 
 	ret = inode;
-	if (vfs_dq_alloc_inode(inode)) {
-		err = -EDQUOT;
+	vfs_dq_init(inode);
+	err = dquot_alloc_inode(inode);
+	if (err)
 		goto fail_drop;
-	}
 
 	err = ext4_init_acl(handle, inode, dir);
 	if (err)
@@ -1074,7 +1074,7 @@ really_out:
 	return ret;
 
 fail_free_drop:
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 
 fail_drop:
 	vfs_dq_drop(inode);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fa8f4deda652..d231da8798e3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1017,8 +1017,6 @@ static const struct dquot_operations ext4_quota_operations = {
 #ifdef CONFIG_QUOTA
 	.get_reserved_space = ext4_get_reserved_space,
 #endif
-	.alloc_inode	= dquot_alloc_inode,
-	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
 	.write_dquot	= ext4_write_dquot,
 	.acquire_dquot	= ext4_acquire_dquot,
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index b2ae190a77ba..2562d18988f7 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -159,7 +159,7 @@ void jfs_delete_inode(struct inode *inode)
 		 * Free the inode from the quota allocation.
 		 */
 		vfs_dq_init(inode);
-		vfs_dq_free_inode(inode);
+		dquot_free_inode(inode);
 		vfs_dq_drop(inode);
 	}
 
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index dc0e02159ac9..7762f33e062b 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -116,10 +116,10 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	/*
 	 * Allocate inode to quota.
 	 */
-	if (vfs_dq_alloc_inode(inode)) {
-		rc = -EDQUOT;
+	vfs_dq_init(inode);
+	rc = dquot_alloc_inode(inode);
+	if (rc)
 		goto fail_drop;
-	}
 
 	inode->i_mode = mode;
 	/* inherit flags from parent */
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 88459bdd1ff3..cb7f67d8441a 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -665,7 +665,7 @@ static int ocfs2_remove_inode(struct inode *inode,
 	}
 
 	ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh);
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 
 	status = ocfs2_free_dinode(handle, inode_alloc_inode,
 				   inode_alloc_bh, di);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 13adaa1f40cd..99766b6418eb 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -348,13 +348,9 @@ static int ocfs2_mknod(struct inode *dir,
 		goto leave;
 	}
 
-	/* We don't use standard VFS wrapper because we don't want vfs_dq_init
-	 * to be called. */
-	if (sb_any_quota_active(osb->sb) &&
-	    osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
-		status = -EDQUOT;
+	status = dquot_alloc_inode(inode);
+	if (status)
 		goto leave;
-	}
 	did_quota_inode = 1;
 
 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
@@ -431,7 +427,7 @@ static int ocfs2_mknod(struct inode *dir,
 	status = 0;
 leave:
 	if (status < 0 && did_quota_inode)
-		vfs_dq_free_inode(inode);
+		dquot_free_inode(inode);
 	if (handle)
 		ocfs2_commit_trans(osb, handle);
 
@@ -1688,13 +1684,9 @@ static int ocfs2_symlink(struct inode *dir,
 		goto bail;
 	}
 
-	/* We don't use standard VFS wrapper because we don't want vfs_dq_init
-	 * to be called. */
-	if (sb_any_quota_active(osb->sb) &&
-	    osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
-		status = -EDQUOT;
+	status = dquot_alloc_inode(inode);
+	if (status)
 		goto bail;
-	}
 	did_quota_inode = 1;
 
 	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry,
@@ -1790,7 +1782,7 @@ bail:
 		dquot_free_space_nodirty(inode,
 					ocfs2_clusters_to_bytes(osb->sb, 1));
 	if (status < 0 && did_quota_inode)
-		vfs_dq_free_inode(inode);
+		dquot_free_inode(inode);
 	if (handle)
 		ocfs2_commit_trans(osb, handle);
 
@@ -2098,13 +2090,9 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
 		goto leave;
 	}
 
-	/* We don't use standard VFS wrapper because we don't want vfs_dq_init
-	 * to be called. */
-	if (sb_any_quota_active(osb->sb) &&
-	    osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
-		status = -EDQUOT;
+	status = dquot_alloc_inode(inode);
+	if (status)
 		goto leave;
-	}
 	did_quota_inode = 1;
 
 	inode->i_nlink = 0;
@@ -2139,7 +2127,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
 	insert_inode_hash(inode);
 leave:
 	if (status < 0 && did_quota_inode)
-		vfs_dq_free_inode(inode);
+		dquot_free_inode(inode);
 	if (handle)
 		ocfs2_commit_trans(osb, handle);
 
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index aa66fb277225..ed96b3eeb13c 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -853,8 +853,6 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
 const struct dquot_operations ocfs2_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.alloc_inode	= dquot_alloc_inode,
-	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
 	.write_dquot	= ocfs2_write_dquot,
 	.acquire_dquot	= ocfs2_acquire_dquot,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index baf202c012cc..ed131318b849 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1531,15 +1531,15 @@ EXPORT_SYMBOL(__dquot_alloc_space);
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_alloc_inode(const struct inode *inode, qsize_t number)
+int dquot_alloc_inode(const struct inode *inode)
 {
-	int cnt, ret = NO_QUOTA;
+	int cnt, ret = -EDQUOT;
 	char warntype[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
-	if (IS_NOQUOTA(inode))
-		return QUOTA_OK;
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode))
+		return 0;
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warntype[cnt] = QUOTA_NL_NOWARN;
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1547,7 +1547,7 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
-		if (check_idq(inode->i_dquot[cnt], number, warntype+cnt)
+		if (check_idq(inode->i_dquot[cnt], 1, warntype+cnt)
 		    == NO_QUOTA)
 			goto warn_put_all;
 	}
@@ -1555,12 +1555,12 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
-		dquot_incr_inodes(inode->i_dquot[cnt], number);
+		dquot_incr_inodes(inode->i_dquot[cnt], 1);
 	}
-	ret = QUOTA_OK;
+	ret = 0;
 warn_put_all:
 	spin_unlock(&dq_data_lock);
-	if (ret == QUOTA_OK)
+	if (ret == 0)
 		mark_all_dquot_dirty(inode->i_dquot);
 	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1638,29 +1638,28 @@ EXPORT_SYMBOL(__dquot_free_space);
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_free_inode(const struct inode *inode, qsize_t number)
+void dquot_free_inode(const struct inode *inode)
 {
 	unsigned int cnt;
 	char warntype[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
-	if (IS_NOQUOTA(inode))
-		return QUOTA_OK;
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode))
+		return;
 
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
-		warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
-		dquot_decr_inodes(inode->i_dquot[cnt], number);
+		warntype[cnt] = info_idq_free(inode->i_dquot[cnt], 1);
+		dquot_decr_inodes(inode->i_dquot[cnt], 1);
 	}
 	spin_unlock(&dq_data_lock);
 	mark_all_dquot_dirty(inode->i_dquot);
 	flush_warnings(inode->i_dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	return QUOTA_OK;
 }
 EXPORT_SYMBOL(dquot_free_inode);
 
@@ -1815,8 +1814,6 @@ EXPORT_SYMBOL(dquot_commit_info);
 const struct dquot_operations dquot_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.alloc_inode	= dquot_alloc_inode,
-	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
 	.write_dquot	= dquot_commit,
 	.acquire_dquot	= dquot_acquire,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 2df0f5c7c60b..f56a3d2e6497 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -54,7 +54,7 @@ void reiserfs_delete_inode(struct inode *inode)
 		 * after delete_object so that quota updates go into the same transaction as
 		 * stat data deletion */
 		if (!err) 
-			vfs_dq_free_inode(inode);
+			dquot_free_inode(inode);
 
 		if (journal_end(&th, inode->i_sb, jbegin_count))
 			goto out;
@@ -1765,10 +1765,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 
 	BUG_ON(!th->t_trans_id);
 
-	if (vfs_dq_alloc_inode(inode)) {
-		err = -EDQUOT;
+	vfs_dq_init(inode);
+	err = dquot_alloc_inode(inode);
+	if (err)
 		goto out_end_trans;
-	}
 	if (!dir->i_nlink) {
 		err = -EPERM;
 		goto out_bad_inode;
@@ -1959,7 +1959,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 	INODE_PKEY(inode)->k_objectid = 0;
 
 	/* Quota change must be inside a transaction for journaling */
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 
       out_end_trans:
 	journal_end(th, th->t_super, th->t_blocks_allocated);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index ea4a77e9d7f5..e942ceecf2b8 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -618,8 +618,6 @@ static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
 static const struct dquot_operations reiserfs_quota_operations = {
 	.initialize = dquot_initialize,
 	.drop = dquot_drop,
-	.alloc_inode = dquot_alloc_inode,
-	.free_inode = dquot_free_inode,
 	.transfer = dquot_transfer,
 	.write_dquot = reiserfs_write_dquot,
 	.acquire_dquot = reiserfs_acquire_dquot,
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index c10fa39f97e2..e1856b89c9c8 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -36,7 +36,7 @@ void udf_free_inode(struct inode *inode)
 	 * Note: we must free any quota before locking the superblock,
 	 * as writing the quota to disk may need the lock as well.
 	 */
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 	vfs_dq_drop(inode);
 
 	clear_inode(inode);
@@ -61,7 +61,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
 	struct super_block *sb = dir->i_sb;
 	struct udf_sb_info *sbi = UDF_SB(sb);
 	struct inode *inode;
-	int block;
+	int block, ret;
 	uint32_t start = UDF_I(dir)->i_location.logicalBlockNum;
 	struct udf_inode_info *iinfo;
 	struct udf_inode_info *dinfo = UDF_I(dir);
@@ -153,12 +153,14 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
 	insert_inode_hash(inode);
 	mark_inode_dirty(inode);
 
-	if (vfs_dq_alloc_inode(inode)) {
+	vfs_dq_init(inode);
+	ret = dquot_alloc_inode(inode);
+	if (ret) {
 		vfs_dq_drop(inode);
 		inode->i_flags |= S_NOQUOTA;
 		inode->i_nlink = 0;
 		iput(inode);
-		*err = -EDQUOT;
+		*err = ret;
 		return NULL;
 	}
 
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 3527c00fef0d..02f77882c573 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -95,7 +95,7 @@ void ufs_free_inode (struct inode * inode)
 
 	is_directory = S_ISDIR(inode->i_mode);
 
-	vfs_dq_free_inode(inode);
+	dquot_free_inode(inode);
 	vfs_dq_drop(inode);
 
 	clear_inode (inode);
@@ -355,9 +355,10 @@ cg_found:
 
 	unlock_super (sb);
 
-	if (vfs_dq_alloc_inode(inode)) {
+	vfs_dq_init(inode);
+	err = dquot_alloc_inode(inode);
+	if (err) {
 		vfs_dq_drop(inode);
-		err = -EDQUOT;
 		goto fail_without_unlock;
 	}
 
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 1b14ad287fe3..e3b07895d327 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -297,8 +297,6 @@ struct quota_format_ops {
 struct dquot_operations {
 	int (*initialize) (struct inode *, int);
 	int (*drop) (struct inode *);
-	int (*alloc_inode) (const struct inode *, qsize_t);
-	int (*free_inode) (const struct inode *, qsize_t);
 	int (*transfer) (struct inode *, qid_t *, unsigned long);
 	int (*write_dquot) (struct dquot *);		/* Ordinary dquot write */
 	struct dquot *(*alloc_dquot)(struct super_block *, int);	/* Allocate memory for new dquot */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 47e85682e118..9ce7f051a4ba 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -37,10 +37,10 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 		int warn, int reserve);
 void __dquot_free_space(struct inode *inode, qsize_t number, int reserve);
 
-int dquot_alloc_inode(const struct inode *inode, qsize_t number);
+int dquot_alloc_inode(const struct inode *inode);
 
 int dquot_claim_space_nodirty(struct inode *inode, qsize_t number);
-int dquot_free_inode(const struct inode *inode, qsize_t number);
+void dquot_free_inode(const struct inode *inode);
 
 int dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask);
 int dquot_commit(struct dquot *dquot);
@@ -148,22 +148,6 @@ static inline void vfs_dq_init(struct inode *inode)
 		inode->i_sb->dq_op->initialize(inode, -1);
 }
 
-static inline int vfs_dq_alloc_inode(struct inode *inode)
-{
-	if (sb_any_quota_active(inode->i_sb)) {
-		vfs_dq_init(inode);
-		if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
-			return 1;
-	}
-	return 0;
-}
-
-static inline void vfs_dq_free_inode(struct inode *inode)
-{
-	if (sb_any_quota_active(inode->i_sb))
-		inode->i_sb->dq_op->free_inode(inode, 1);
-}
-
 /* Cannot be called inside a transaction */
 static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
@@ -231,12 +215,12 @@ static inline void vfs_dq_drop(struct inode *inode)
 {
 }
 
-static inline int vfs_dq_alloc_inode(struct inode *inode)
+static inline int dquot_alloc_inode(const struct inode *inode)
 {
 	return 0;
 }
 
-static inline void vfs_dq_free_inode(struct inode *inode)
+static inline void dquot_free_inode(const struct inode *inode)
 {
 }
 
-- 
cgit v1.2.3


From b43fa8284d7790d9cca32c9c55e24f29be2fa33b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:03 -0500
Subject: dquot: cleanup dquot transfer routine

Get rid of the transfer dquot operation - it is now always called from
the filesystem and if a filesystem really needs it's own (which none
currently does) it can just call into it's own routine directly.

Rename the now static low-level dquot_transfer helper to __dquot_transfer
and vfs_dq_transfer to dquot_transfer to have a consistent namespace,
and make the new dquot_transfer return a normal negative errno value
which all callers expect.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 Documentation/filesystems/Locking |  2 --
 drivers/staging/pohmelfs/inode.c  |  2 +-
 fs/ext2/inode.c                   |  2 +-
 fs/ext3/inode.c                   |  2 +-
 fs/ext3/super.c                   |  1 -
 fs/ext4/inode.c                   |  2 +-
 fs/ext4/super.c                   |  1 -
 fs/jfs/file.c                     |  5 +++--
 fs/ocfs2/file.c                   |  4 ++--
 fs/ocfs2/quota_global.c           |  1 -
 fs/quota/dquot.c                  | 12 +++++-------
 fs/reiserfs/inode.c               |  3 +--
 fs/reiserfs/super.c               |  1 -
 fs/udf/file.c                     |  2 +-
 fs/ufs/truncate.c                 |  2 +-
 include/linux/quota.h             |  1 -
 include/linux/quotaops.h          |  5 ++---
 17 files changed, 19 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 4428f55f2131..4574e0272bdd 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -462,7 +462,6 @@ in sys_read() and friends.
 prototypes:
 	int (*initialize) (struct inode *, int);
 	int (*drop) (struct inode *);
-	int (*transfer) (struct inode *, struct iattr *);
 	int (*write_dquot) (struct dquot *);
 	int (*acquire_dquot) (struct dquot *);
 	int (*release_dquot) (struct dquot *);
@@ -477,7 +476,6 @@ What filesystem should expect from the generic quota functions:
 		FS recursion	Held locks when called
 initialize:	yes		maybe dqonoff_sem
 drop:		yes		-
-transfer:	yes		-
 write_dquot:	yes		dqonoff_sem or dqptr_sem
 acquire_dquot:	yes		dqonoff_sem or dqptr_sem
 release_dquot:	yes		dqonoff_sem or dqptr_sem
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index f69b7783027f..11fc4d5c43e1 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -969,7 +969,7 @@ int pohmelfs_setattr_raw(struct inode *inode, struct iattr *attr)
 
 	if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 	    (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
-		err = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+		err = dquot_transfer(inode, attr);
 		if (err)
 			goto err_out_exit;
 	}
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 71b032c65a02..3cfcfd9a131a 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1459,7 +1459,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
 		return error;
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
 	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
-		error = vfs_dq_transfer(inode, iattr) ? -EDQUOT : 0;
+		error = dquot_transfer(inode, iattr);
 		if (error)
 			return error;
 	}
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 20f02d69365c..14d40a4dd6f0 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3160,7 +3160,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 			error = PTR_ERR(handle);
 			goto err_out;
 		}
-		error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+		error = dquot_transfer(inode, attr);
 		if (error) {
 			ext3_journal_stop(handle);
 			return error;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8b8bc4f9cb14..f7d4a2c19dee 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -752,7 +752,6 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 static const struct dquot_operations ext3_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.transfer	= dquot_transfer,
 	.write_dquot	= ext3_write_dquot,
 	.acquire_dquot	= ext3_acquire_dquot,
 	.release_dquot	= ext3_release_dquot,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9f607ea411c8..6a002a6d0624 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5263,7 +5263,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 			error = PTR_ERR(handle);
 			goto err_out;
 		}
-		error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+		error = dquot_transfer(inode, attr);
 		if (error) {
 			ext4_journal_stop(handle);
 			return error;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d231da8798e3..b4253fb7bab6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1017,7 +1017,6 @@ static const struct dquot_operations ext4_quota_operations = {
 #ifdef CONFIG_QUOTA
 	.get_reserved_space = ext4_get_reserved_space,
 #endif
-	.transfer	= dquot_transfer,
 	.write_dquot	= ext4_write_dquot,
 	.acquire_dquot	= ext4_acquire_dquot,
 	.release_dquot	= ext4_release_dquot,
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index a4229e49330e..2c201783836f 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -100,8 +100,9 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
 
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
 	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
-		if (vfs_dq_transfer(inode, iattr))
-			return -EDQUOT;
+		rc = dquot_transfer(inode, iattr);
+		if (rc)
+			return rc;
 	}
 
 	rc = inode_setattr(inode, iattr);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6cf3d8d18369..472e8f8bc892 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1020,7 +1020,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 		/*
 		 * Gather pointers to quota structures so that allocation /
 		 * freeing of quota structures happens here and not inside
-		 * vfs_dq_transfer() where we have problems with lock ordering
+		 * dquot_transfer() where we have problems with lock ordering
 		 */
 		if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid
 		    && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
@@ -1053,7 +1053,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 			mlog_errno(status);
 			goto bail_unlock;
 		}
-		status = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+		status = dquot_transfer(inode, attr);
 		if (status < 0)
 			goto bail_commit;
 	} else {
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index ed96b3eeb13c..b654bd103b6f 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -853,7 +853,6 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
 const struct dquot_operations ocfs2_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.transfer	= dquot_transfer,
 	.write_dquot	= ocfs2_write_dquot,
 	.acquire_dquot	= ocfs2_acquire_dquot,
 	.release_dquot	= ocfs2_release_dquot,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ed131318b849..78ce4c48ad77 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1669,7 +1669,7 @@ EXPORT_SYMBOL(dquot_free_inode);
  * This operation can block, but only after everything is updated
  * A transaction must be started when entering this function.
  */
-int dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask)
+static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask)
 {
 	qsize_t space, cur_space;
 	qsize_t rsv_space = 0;
@@ -1766,12 +1766,11 @@ over_quota:
 	ret = NO_QUOTA;
 	goto warn_put_all;
 }
-EXPORT_SYMBOL(dquot_transfer);
 
 /* Wrapper for transferring ownership of an inode for uid/gid only
  * Called from FSXXX_setattr()
  */
-int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
+int dquot_transfer(struct inode *inode, struct iattr *iattr)
 {
 	qid_t chid[MAXQUOTAS];
 	unsigned long mask = 0;
@@ -1786,12 +1785,12 @@ int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
 	}
 	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
 		vfs_dq_init(inode);
-		if (inode->i_sb->dq_op->transfer(inode, chid, mask) == NO_QUOTA)
-			return 1;
+		if (__dquot_transfer(inode, chid, mask) == NO_QUOTA)
+			return -EDQUOT;
 	}
 	return 0;
 }
-EXPORT_SYMBOL(vfs_dq_transfer);
+EXPORT_SYMBOL(dquot_transfer);
 
 /*
  * Write info of quota file to disk
@@ -1814,7 +1813,6 @@ EXPORT_SYMBOL(dquot_commit_info);
 const struct dquot_operations dquot_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
-	.transfer	= dquot_transfer,
 	.write_dquot	= dquot_commit,
 	.acquire_dquot	= dquot_acquire,
 	.release_dquot	= dquot_release,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index f56a3d2e6497..99a5e5a8ab5a 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3134,8 +3134,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
 						  jbegin_count);
 				if (error)
 					goto out;
-				error =
-				    vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+				error = dquot_transfer(inode, attr);
 				if (error) {
 					journal_end(&th, inode->i_sb,
 						    jbegin_count);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e942ceecf2b8..97c3e8ed7db6 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -618,7 +618,6 @@ static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
 static const struct dquot_operations reiserfs_quota_operations = {
 	.initialize = dquot_initialize,
 	.drop = dquot_drop,
-	.transfer = dquot_transfer,
 	.write_dquot = reiserfs_write_dquot,
 	.acquire_dquot = reiserfs_acquire_dquot,
 	.release_dquot = reiserfs_release_dquot,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 35ca47281faa..2df7fcb677b3 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -229,7 +229,7 @@ static int udf_setattr(struct dentry *dentry, struct iattr *iattr)
 
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
             (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
-		error = vfs_dq_transfer(inode, iattr) ? -EDQUOT : 0;
+		error = dquot_transfer(inode, iattr);
 		if (error)
 			return error;
 	}
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 56ab31f00bd0..87bbab685901 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -520,7 +520,7 @@ static int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 	    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
-		error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+		error = dquot_transfer(inode, attr);
 		if (error)
 			return error;
 	}
diff --git a/include/linux/quota.h b/include/linux/quota.h
index e3b07895d327..422e6aa78edc 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -297,7 +297,6 @@ struct quota_format_ops {
 struct dquot_operations {
 	int (*initialize) (struct inode *, int);
 	int (*drop) (struct inode *);
-	int (*transfer) (struct inode *, qid_t *, unsigned long);
 	int (*write_dquot) (struct dquot *);		/* Ordinary dquot write */
 	struct dquot *(*alloc_dquot)(struct super_block *, int);	/* Allocate memory for new dquot */
 	void (*destroy_dquot)(struct dquot *);		/* Free memory for dquot */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 9ce7f051a4ba..fa27b7218c82 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -42,7 +42,6 @@ int dquot_alloc_inode(const struct inode *inode);
 int dquot_claim_space_nodirty(struct inode *inode, qsize_t number);
 void dquot_free_inode(const struct inode *inode);
 
-int dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask);
 int dquot_commit(struct dquot *dquot);
 int dquot_acquire(struct dquot *dquot);
 int dquot_release(struct dquot *dquot);
@@ -66,7 +65,7 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
 int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
 
 void vfs_dq_drop(struct inode *inode);
-int vfs_dq_transfer(struct inode *inode, struct iattr *iattr);
+int dquot_transfer(struct inode *inode, struct iattr *iattr);
 int vfs_dq_quota_on_remount(struct super_block *sb);
 
 static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
@@ -234,7 +233,7 @@ static inline int vfs_dq_quota_on_remount(struct super_block *sb)
 	return 0;
 }
 
-static inline int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
+static inline int dquot_transfer(struct inode *inode, struct iattr *iattr)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 9f7547580263d4a55efe06ce5cfd567f568be6e8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:05 -0500
Subject: dquot: cleanup dquot drop routine

Get rid of the drop dquot operation - it is now always called from
the filesystem and if a filesystem really needs it's own (which none
currently does) it can just call into it's own routine directly.

Rename the now static low-level dquot_drop helper to __dquot_drop
and vfs_dq_drop to dquot_drop to have a consistent namespace.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 Documentation/filesystems/Locking |  2 --
 fs/ext2/ialloc.c                  |  4 +--
 fs/ext2/super.c                   |  2 +-
 fs/ext3/ialloc.c                  |  4 +--
 fs/ext3/super.c                   |  3 +--
 fs/ext4/ialloc.c                  |  4 +--
 fs/ext4/super.c                   |  3 +--
 fs/jfs/inode.c                    |  2 +-
 fs/jfs/jfs_inode.c                |  2 +-
 fs/jfs/super.c                    |  2 +-
 fs/ocfs2/inode.c                  |  2 +-
 fs/ocfs2/quota_global.c           |  1 -
 fs/quota/dquot.c                  | 52 +++++++++++++++++++--------------------
 fs/reiserfs/inode.c               |  2 +-
 fs/reiserfs/namei.c               |  2 +-
 fs/reiserfs/super.c               |  3 +--
 fs/udf/ialloc.c                   |  4 +--
 fs/udf/inode.c                    |  2 +-
 fs/ufs/ialloc.c                   |  4 +--
 fs/ufs/super.c                    |  2 +-
 include/linux/quota.h             |  1 -
 include/linux/quotaops.h          |  5 ++--
 22 files changed, 49 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 4574e0272bdd..fa10e4bf8e5e 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -461,7 +461,6 @@ in sys_read() and friends.
 --------------------------- dquot_operations -------------------------------
 prototypes:
 	int (*initialize) (struct inode *, int);
-	int (*drop) (struct inode *);
 	int (*write_dquot) (struct dquot *);
 	int (*acquire_dquot) (struct dquot *);
 	int (*release_dquot) (struct dquot *);
@@ -475,7 +474,6 @@ What filesystem should expect from the generic quota functions:
 
 		FS recursion	Held locks when called
 initialize:	yes		maybe dqonoff_sem
-drop:		yes		-
 write_dquot:	yes		dqonoff_sem or dqptr_sem
 acquire_dquot:	yes		dqonoff_sem or dqptr_sem
 release_dquot:	yes		dqonoff_sem or dqptr_sem
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index d12f9809559c..88b71972c626 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -122,7 +122,7 @@ void ext2_free_inode (struct inode * inode)
 		/* Quota is already initialized in iput() */
 		ext2_xattr_delete_inode(inode);
 		dquot_free_inode(inode);
-		vfs_dq_drop(inode);
+		dquot_drop(inode);
 	}
 
 	es = EXT2_SB(sb)->s_es;
@@ -608,7 +608,7 @@ fail_free_drop:
 	dquot_free_inode(inode);
 
 fail_drop:
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
 	unlock_new_inode(inode);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 98815d2a5664..42e4a303b675 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -195,7 +195,7 @@ static void ext2_clear_inode(struct inode *inode)
 {
 	struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info;
 
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	ext2_discard_reservation(inode);
 	EXT2_I(inode)->i_block_alloc_info = NULL;
 	if (unlikely(rsv))
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 8bf00e997c38..7d7238f9f6f3 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -126,7 +126,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 	vfs_dq_init(inode);
 	ext3_xattr_delete_inode(handle, inode);
 	dquot_free_inode(inode);
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 
 	is_directory = S_ISDIR(inode->i_mode);
 
@@ -622,7 +622,7 @@ fail_free_drop:
 	dquot_free_inode(inode);
 
 fail_drop:
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
 	unlock_new_inode(inode);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 2277b1a98e62..0163d0dae124 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -529,7 +529,7 @@ static void ext3_clear_inode(struct inode *inode)
 {
 	struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
 
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	ext3_discard_reservation(inode);
 	EXT3_I(inode)->i_block_alloc_info = NULL;
 	if (unlikely(rsv))
@@ -753,7 +753,6 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 
 static const struct dquot_operations ext3_quota_operations = {
 	.initialize	= dquot_initialize,
-	.drop		= dquot_drop,
 	.write_dquot	= ext3_write_dquot,
 	.acquire_dquot	= ext3_acquire_dquot,
 	.release_dquot	= ext3_release_dquot,
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b0d744cf8b95..ca8986e4b528 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -220,7 +220,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 	vfs_dq_init(inode);
 	ext4_xattr_delete_inode(handle, inode);
 	dquot_free_inode(inode);
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 
 	is_directory = S_ISDIR(inode->i_mode);
 
@@ -1077,7 +1077,7 @@ fail_free_drop:
 	dquot_free_inode(inode);
 
 fail_drop:
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
 	unlock_new_inode(inode);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 56554c8850ec..035516c80df2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -761,7 +761,7 @@ static void destroy_inodecache(void)
 
 static void ext4_clear_inode(struct inode *inode)
 {
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	ext4_discard_preallocations(inode);
 	if (EXT4_JOURNAL(inode))
 		jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
@@ -1014,7 +1014,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 
 static const struct dquot_operations ext4_quota_operations = {
 	.initialize	= dquot_initialize,
-	.drop		= dquot_drop,
 #ifdef CONFIG_QUOTA
 	.get_reserved_space = ext4_get_reserved_space,
 #endif
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 2562d18988f7..22fa412c5289 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -160,7 +160,7 @@ void jfs_delete_inode(struct inode *inode)
 		 */
 		vfs_dq_init(inode);
 		dquot_free_inode(inode);
-		vfs_dq_drop(inode);
+		dquot_drop(inode);
 	}
 
 	clear_inode(inode);
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 7762f33e062b..72b30895422c 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -162,7 +162,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	return inode;
 
 fail_drop:
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
 fail_unlock:
 	inode->i_nlink = 0;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4086fa593419..266699deb1c6 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -133,7 +133,7 @@ static void jfs_destroy_inode(struct inode *inode)
 
 static void jfs_clear_inode(struct inode *inode)
 {
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 }
 
 static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 13eb5d467c40..00eb6a095e68 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1087,7 +1087,7 @@ void ocfs2_clear_inode(struct inode *inode)
 	mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL,
 			"Inode=%lu\n", inode->i_ino);
 
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 
 	/* To preven remote deletes we hold open lock before, now it
 	 * is time to unlock PR and EX open locks. */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index b654bd103b6f..4dca38f487cf 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -852,7 +852,6 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
 
 const struct dquot_operations ocfs2_quota_operations = {
 	.initialize	= dquot_initialize,
-	.drop		= dquot_drop,
 	.write_dquot	= ocfs2_write_dquot,
 	.acquire_dquot	= ocfs2_acquire_dquot,
 	.release_dquot	= ocfs2_release_dquot,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 78ce4c48ad77..cd83c5b871ba 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1358,7 +1358,7 @@ EXPORT_SYMBOL(dquot_initialize);
 /*
  * 	Release all quotas referenced by inode
  */
-int dquot_drop(struct inode *inode)
+static void __dquot_drop(struct inode *inode)
 {
 	int cnt;
 	struct dquot *put[MAXQUOTAS];
@@ -1370,32 +1370,31 @@ int dquot_drop(struct inode *inode)
 	}
 	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	dqput_all(put);
-	return 0;
 }
-EXPORT_SYMBOL(dquot_drop);
 
-/* Wrapper to remove references to quota structures from inode */
-void vfs_dq_drop(struct inode *inode)
-{
-	/* Here we can get arbitrary inode from clear_inode() so we have
-	 * to be careful. OTOH we don't need locking as quota operations
-	 * are allowed to change only at mount time */
-	if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
-	    && inode->i_sb->dq_op->drop) {
-		int cnt;
-		/* Test before calling to rule out calls from proc and such
-                 * where we are not allowed to block. Note that this is
-		 * actually reliable test even without the lock - the caller
-		 * must assure that nobody can come after the DQUOT_DROP and
-		 * add quota pointers back anyway */
-		for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-			if (inode->i_dquot[cnt])
-				break;
-		if (cnt < MAXQUOTAS)
-			inode->i_sb->dq_op->drop(inode);
-	}
-}
-EXPORT_SYMBOL(vfs_dq_drop);
+void dquot_drop(struct inode *inode)
+{
+	int cnt;
+
+	if (IS_NOQUOTA(inode))
+		return;
+
+	/*
+	 * Test before calling to rule out calls from proc and such
+	 * where we are not allowed to block. Note that this is
+	 * actually reliable test even without the lock - the caller
+	 * must assure that nobody can come after the DQUOT_DROP and
+	 * add quota pointers back anyway.
+	 */
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		if (inode->i_dquot[cnt])
+			break;
+	}
+
+	if (cnt < MAXQUOTAS)
+		__dquot_drop(inode);
+}
+EXPORT_SYMBOL(dquot_drop);
 
 /*
  * inode_reserved_space is managed internally by quota, and protected by
@@ -1812,7 +1811,6 @@ EXPORT_SYMBOL(dquot_commit_info);
  */
 const struct dquot_operations dquot_operations = {
 	.initialize	= dquot_initialize,
-	.drop		= dquot_drop,
 	.write_dquot	= dquot_commit,
 	.acquire_dquot	= dquot_acquire,
 	.release_dquot	= dquot_release,
@@ -2029,7 +2027,7 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 		 * When S_NOQUOTA is set, remove dquot references as no more
 		 * references can be added
 		 */
-		sb->dq_op->drop(inode);
+		__dquot_drop(inode);
 	}
 
 	error = -EIO;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 99a5e5a8ab5a..f07c3b69247d 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1964,7 +1964,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
       out_end_trans:
 	journal_end(th, th->t_super, th->t_blocks_allocated);
 	/* Drop can be outside and it needs more credits so it's better to have it outside */
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	inode->i_flags |= S_NOQUOTA;
 	make_bad_inode(inode);
 
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 9d4dcf0b07cb..9dea84e8a79a 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -546,7 +546,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
 */
 static int drop_new_inode(struct inode *inode)
 {
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	make_bad_inode(inode);
 	inode->i_flags |= S_NOQUOTA;
 	iput(inode);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 6b24e70e329b..34f7cd0cb02d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -580,7 +580,7 @@ out:
 
 static void reiserfs_clear_inode(struct inode *inode)
 {
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 }
 
 #ifdef CONFIG_QUOTA
@@ -623,7 +623,6 @@ static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
 
 static const struct dquot_operations reiserfs_quota_operations = {
 	.initialize = dquot_initialize,
-	.drop = dquot_drop,
 	.write_dquot = reiserfs_write_dquot,
 	.acquire_dquot = reiserfs_acquire_dquot,
 	.release_dquot = reiserfs_release_dquot,
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index e1856b89c9c8..15c6e992e587 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -37,7 +37,7 @@ void udf_free_inode(struct inode *inode)
 	 * as writing the quota to disk may need the lock as well.
 	 */
 	dquot_free_inode(inode);
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 
 	clear_inode(inode);
 
@@ -156,7 +156,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
 	vfs_dq_init(inode);
 	ret = dquot_alloc_inode(inode);
 	if (ret) {
-		vfs_dq_drop(inode);
+		dquot_drop(inode);
 		inode->i_flags |= S_NOQUOTA;
 		inode->i_nlink = 0;
 		iput(inode);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 859389a3832b..1199e8e21ee2 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -109,7 +109,7 @@ void udf_clear_inode(struct inode *inode)
 			(unsigned long long)iinfo->i_lenExtents);
 	}
 
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 	kfree(iinfo->i_ext.i_data);
 	iinfo->i_ext.i_data = NULL;
 }
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 02f77882c573..67b4bdb056fb 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -96,7 +96,7 @@ void ufs_free_inode (struct inode * inode)
 	is_directory = S_ISDIR(inode->i_mode);
 
 	dquot_free_inode(inode);
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 
 	clear_inode (inode);
 
@@ -358,7 +358,7 @@ cg_found:
 	vfs_dq_init(inode);
 	err = dquot_alloc_inode(inode);
 	if (err) {
-		vfs_dq_drop(inode);
+		dquot_drop(inode);
 		goto fail_without_unlock;
 	}
 
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 95d61cb3a5b8..66b63a751615 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1434,7 +1434,7 @@ static void destroy_inodecache(void)
 
 static void ufs_clear_inode(struct inode *inode)
 {
-	vfs_dq_drop(inode);
+	dquot_drop(inode);
 }
 
 #ifdef CONFIG_QUOTA
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 422e6aa78edc..aec2e9dac2d7 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -296,7 +296,6 @@ struct quota_format_ops {
 /* Operations working with dquots */
 struct dquot_operations {
 	int (*initialize) (struct inode *, int);
-	int (*drop) (struct inode *);
 	int (*write_dquot) (struct dquot *);		/* Ordinary dquot write */
 	struct dquot *(*alloc_dquot)(struct super_block *, int);	/* Allocate memory for new dquot */
 	void (*destroy_dquot)(struct dquot *);		/* Free memory for dquot */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index fa27b7218c82..a5ebd1abccd8 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -24,7 +24,7 @@ void inode_claim_rsv_space(struct inode *inode, qsize_t number);
 void inode_sub_rsv_space(struct inode *inode, qsize_t number);
 
 int dquot_initialize(struct inode *inode, int type);
-int dquot_drop(struct inode *inode);
+void dquot_drop(struct inode *inode);
 struct dquot *dqget(struct super_block *sb, unsigned int id, int type);
 void dqput(struct dquot *dquot);
 int dquot_scan_active(struct super_block *sb,
@@ -64,7 +64,6 @@ int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
 int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
 
-void vfs_dq_drop(struct inode *inode);
 int dquot_transfer(struct inode *inode, struct iattr *iattr);
 int vfs_dq_quota_on_remount(struct super_block *sb);
 
@@ -210,7 +209,7 @@ static inline void vfs_dq_init(struct inode *inode)
 {
 }
 
-static inline void vfs_dq_drop(struct inode *inode)
+static inline void dquot_drop(struct inode *inode)
 {
 }
 
-- 
cgit v1.2.3


From 907f4554e2521cb28b0009d17167760650a9561c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:06 -0500
Subject: dquot: move dquot initialization responsibility into the filesystem

Currently various places in the VFS call vfs_dq_init directly.  This means
we tie the quota code into the VFS.  Get rid of that and make the
filesystem responsible for the initialization.   For most metadata operations
this is a straight forward move into the methods, but for truncate and
open it's a bit more complicated.

For truncate we currently only call vfs_dq_init for the sys_truncate case
because open already takes care of it for ftruncate and open(O_TRUNC) - the
new code causes an additional vfs_dq_init for those which is harmless.

For open the initialization is moved from do_filp_open into the open method,
which means it happens slightly earlier now, and only for regular files.
The latter is fine because we don't need to initialize it for operations
on special files, and we already do it as part of the namespace operations
for directories.

Add a dquot_file_open helper that filesystems that support generic quotas
can use to fill in ->open.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext2/file.c           |  4 ++--
 fs/ext2/inode.c          |  5 +++++
 fs/ext2/namei.c          | 51 ++++++++++++++++++++++++++++++++----------------
 fs/ext3/file.c           |  2 +-
 fs/ext3/inode.c          |  5 +++++
 fs/ext3/namei.c          | 18 +++++++++++++++++
 fs/ext4/file.c           |  2 +-
 fs/ext4/inode.c          |  5 +++++
 fs/ext4/namei.c          | 17 ++++++++++++++++
 fs/inode.c               |  3 ---
 fs/jfs/file.c            |  4 +++-
 fs/jfs/inode.c           |  3 +++
 fs/jfs/namei.c           | 15 ++++++++++++++
 fs/namei.c               | 16 ---------------
 fs/nfsd/vfs.c            |  4 ----
 fs/ocfs2/file.c          |  5 +++++
 fs/ocfs2/inode.c         |  2 ++
 fs/ocfs2/namei.c         | 11 +++++++++++
 fs/open.c                |  5 +----
 fs/quota/dquot.c         | 14 +++++++++++++
 fs/reiserfs/file.c       |  2 +-
 fs/reiserfs/inode.c      |  5 +++++
 fs/reiserfs/namei.c      | 17 ++++++++++++++++
 fs/reiserfs/xattr.c      |  4 ----
 fs/udf/file.c            |  5 ++++-
 fs/udf/inode.c           |  4 ++++
 fs/udf/namei.c           | 17 ++++++++++++++++
 fs/ufs/file.c            |  2 +-
 fs/ufs/inode.c           |  4 ++++
 fs/ufs/namei.c           | 18 +++++++++++++++++
 fs/ufs/truncate.c        |  3 +++
 include/linux/quotaops.h |  4 ++++
 32 files changed, 220 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 586e3589d4c2..d11f6e484519 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -70,7 +70,7 @@ const struct file_operations ext2_file_operations = {
 	.compat_ioctl	= ext2_compat_ioctl,
 #endif
 	.mmap		= generic_file_mmap,
-	.open		= generic_file_open,
+	.open		= dquot_file_open,
 	.release	= ext2_release_file,
 	.fsync		= ext2_fsync,
 	.splice_read	= generic_file_splice_read,
@@ -87,7 +87,7 @@ const struct file_operations ext2_xip_file_operations = {
 	.compat_ioctl	= ext2_compat_ioctl,
 #endif
 	.mmap		= xip_file_mmap,
-	.open		= generic_file_open,
+	.open		= dquot_file_open,
 	.release	= ext2_release_file,
 	.fsync		= ext2_fsync,
 };
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 3cfcfd9a131a..c87840c33e17 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -58,6 +58,8 @@ static inline int ext2_inode_is_fast_symlink(struct inode *inode)
  */
 void ext2_delete_inode (struct inode * inode)
 {
+	if (!is_bad_inode(inode))
+		vfs_dq_init(inode);
 	truncate_inode_pages(&inode->i_data, 0);
 
 	if (is_bad_inode(inode))
@@ -1457,6 +1459,9 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
 	error = inode_change_ok(inode, iattr);
 	if (error)
 		return error;
+
+	if (iattr->ia_valid & ATTR_SIZE)
+		vfs_dq_init(inode);
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
 	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
 		error = dquot_transfer(inode, iattr);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index dd7175ce5606..5923df7b22af 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -31,6 +31,7 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/quotaops.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -99,24 +100,27 @@ struct dentry *ext2_get_parent(struct dentry *child)
  */
 static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd)
 {
-	struct inode * inode = ext2_new_inode (dir, mode);
-	int err = PTR_ERR(inode);
-	if (!IS_ERR(inode)) {
-		inode->i_op = &ext2_file_inode_operations;
-		if (ext2_use_xip(inode->i_sb)) {
-			inode->i_mapping->a_ops = &ext2_aops_xip;
-			inode->i_fop = &ext2_xip_file_operations;
-		} else if (test_opt(inode->i_sb, NOBH)) {
-			inode->i_mapping->a_ops = &ext2_nobh_aops;
-			inode->i_fop = &ext2_file_operations;
-		} else {
-			inode->i_mapping->a_ops = &ext2_aops;
-			inode->i_fop = &ext2_file_operations;
-		}
-		mark_inode_dirty(inode);
-		err = ext2_add_nondir(dentry, inode);
+	struct inode *inode;
+
+	vfs_dq_init(dir);
+
+	inode = ext2_new_inode(dir, mode);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	inode->i_op = &ext2_file_inode_operations;
+	if (ext2_use_xip(inode->i_sb)) {
+		inode->i_mapping->a_ops = &ext2_aops_xip;
+		inode->i_fop = &ext2_xip_file_operations;
+	} else if (test_opt(inode->i_sb, NOBH)) {
+		inode->i_mapping->a_ops = &ext2_nobh_aops;
+		inode->i_fop = &ext2_file_operations;
+	} else {
+		inode->i_mapping->a_ops = &ext2_aops;
+		inode->i_fop = &ext2_file_operations;
 	}
-	return err;
+	mark_inode_dirty(inode);
+	return ext2_add_nondir(dentry, inode);
 }
 
 static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev)
@@ -127,6 +131,8 @@ static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
+	vfs_dq_init(dir);
+
 	inode = ext2_new_inode (dir, mode);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
@@ -151,6 +157,8 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry,
 	if (l > sb->s_blocksize)
 		goto out;
 
+	vfs_dq_init(dir);
+
 	inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
@@ -194,6 +202,8 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
 	if (inode->i_nlink >= EXT2_LINK_MAX)
 		return -EMLINK;
 
+	vfs_dq_init(dir);
+
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
 	atomic_inc(&inode->i_count);
@@ -216,6 +226,8 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	if (dir->i_nlink >= EXT2_LINK_MAX)
 		goto out;
 
+	vfs_dq_init(dir);
+
 	inode_inc_link_count(dir);
 
 	inode = ext2_new_inode (dir, S_IFDIR | mode);
@@ -262,6 +274,8 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry)
 	struct page * page;
 	int err = -ENOENT;
 
+	vfs_dq_init(dir);
+
 	de = ext2_find_entry (dir, &dentry->d_name, &page);
 	if (!de)
 		goto out;
@@ -304,6 +318,9 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
 	struct ext2_dir_entry_2 * old_de;
 	int err = -ENOENT;
 
+	vfs_dq_init(old_dir);
+	vfs_dq_init(new_dir);
+
 	old_de = ext2_find_entry (old_dir, &old_dentry->d_name, &old_page);
 	if (!old_de)
 		goto out;
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index a86d3302cdc2..3c7fb11a3b29 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -62,7 +62,7 @@ const struct file_operations ext3_file_operations = {
 	.compat_ioctl	= ext3_compat_ioctl,
 #endif
 	.mmap		= generic_file_mmap,
-	.open		= generic_file_open,
+	.open		= dquot_file_open,
 	.release	= ext3_release_file,
 	.fsync		= ext3_sync_file,
 	.splice_read	= generic_file_splice_read,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 14d40a4dd6f0..d7962b0c57b3 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -196,6 +196,9 @@ void ext3_delete_inode (struct inode * inode)
 {
 	handle_t *handle;
 
+	if (!is_bad_inode(inode))
+		vfs_dq_init(inode);
+
 	truncate_inode_pages(&inode->i_data, 0);
 
 	if (is_bad_inode(inode))
@@ -3148,6 +3151,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 	if (error)
 		return error;
 
+	if (ia_valid & ATTR_SIZE)
+		vfs_dq_init(inode);
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
 		handle_t *handle;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 7b0e44f7d66f..a492b371b134 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1696,6 +1696,8 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
 	struct inode * inode;
 	int err, retries = 0;
 
+	vfs_dq_init(dir);
+
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1730,6 +1732,8 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
+	vfs_dq_init(dir);
+
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1766,6 +1770,8 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	if (dir->i_nlink >= EXT3_LINK_MAX)
 		return -EMLINK;
 
+	vfs_dq_init(dir);
+
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -2060,7 +2066,9 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
 
 	/* Initialize quotas before so that eventual writes go in
 	 * separate transaction */
+	vfs_dq_init(dir);
 	vfs_dq_init(dentry->d_inode);
+
 	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -2119,7 +2127,9 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
+	vfs_dq_init(dir);
 	vfs_dq_init(dentry->d_inode);
+
 	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -2174,6 +2184,8 @@ static int ext3_symlink (struct inode * dir,
 	if (l > dir->i_sb->s_blocksize)
 		return -ENAMETOOLONG;
 
+	vfs_dq_init(dir);
+
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
@@ -2228,6 +2240,9 @@ static int ext3_link (struct dentry * old_dentry,
 
 	if (inode->i_nlink >= EXT3_LINK_MAX)
 		return -EMLINK;
+
+	vfs_dq_init(dir);
+
 	/*
 	 * Return -ENOENT if we've raced with unlink and i_nlink is 0.  Doing
 	 * otherwise has the potential to corrupt the orphan inode list.
@@ -2278,6 +2293,9 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 	struct ext3_dir_entry_2 * old_de, * new_de;
 	int retval, flush_file = 0;
 
+	vfs_dq_init(old_dir);
+	vfs_dq_init(new_dir);
+
 	old_bh = new_bh = dir_bh = NULL;
 
 	/* Initialize quotas before so that eventual writes go
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 9630583cef28..85fa464a24ad 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -127,7 +127,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 			sb->s_dirt = 1;
 		}
 	}
-	return generic_file_open(inode, filp);
+	return dquot_file_open(inode, filp);
 }
 
 const struct file_operations ext4_file_operations = {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 6a002a6d0624..eaa22ae9f1f6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -170,6 +170,9 @@ void ext4_delete_inode(struct inode *inode)
 	handle_t *handle;
 	int err;
 
+	if (!is_bad_inode(inode))
+		vfs_dq_init(inode);
+
 	if (ext4_should_order_data(inode))
 		ext4_begin_ordered_truncate(inode, 0);
 	truncate_inode_pages(&inode->i_data, 0);
@@ -5251,6 +5254,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 	if (error)
 		return error;
 
+	if (ia_valid & ATTR_SIZE)
+		vfs_dq_init(inode);
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
 		handle_t *handle;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 17a17e10dd60..20f55c2e7571 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1766,6 +1766,8 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
 	struct inode *inode;
 	int err, retries = 0;
 
+	vfs_dq_init(dir);
+
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1800,6 +1802,8 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
+	vfs_dq_init(dir);
+
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -1837,6 +1841,8 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	if (EXT4_DIR_LINK_MAX(dir))
 		return -EMLINK;
 
+	vfs_dq_init(dir);
+
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
@@ -2136,7 +2142,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
 
 	/* Initialize quotas before so that eventual writes go in
 	 * separate transaction */
+	vfs_dq_init(dir);
 	vfs_dq_init(dentry->d_inode);
+
 	handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -2195,7 +2203,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
+	vfs_dq_init(dir);
 	vfs_dq_init(dentry->d_inode);
+
 	handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -2250,6 +2260,8 @@ static int ext4_symlink(struct inode *dir,
 	if (l > dir->i_sb->s_blocksize)
 		return -ENAMETOOLONG;
 
+	vfs_dq_init(dir);
+
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
@@ -2308,6 +2320,8 @@ static int ext4_link(struct dentry *old_dentry,
 	if (inode->i_nlink >= EXT4_LINK_MAX)
 		return -EMLINK;
 
+	vfs_dq_init(dir);
+
 	/*
 	 * Return -ENOENT if we've raced with unlink and i_nlink is 0.  Doing
 	 * otherwise has the potential to corrupt the orphan inode list.
@@ -2358,6 +2372,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct ext4_dir_entry_2 *old_de, *new_de;
 	int retval, force_da_alloc = 0;
 
+	vfs_dq_init(old_dir);
+	vfs_dq_init(new_dir);
+
 	old_bh = new_bh = dir_bh = NULL;
 
 	/* Initialize quotas before so that eventual writes go
diff --git a/fs/inode.c b/fs/inode.c
index f1aef3482b0e..407bf392e20a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -8,7 +8,6 @@
 #include <linux/mm.h>
 #include <linux/dcache.h>
 #include <linux/init.h>
-#include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/writeback.h>
 #include <linux/module.h>
@@ -1210,8 +1209,6 @@ void generic_delete_inode(struct inode *inode)
 
 	if (op->delete_inode) {
 		void (*delete)(struct inode *) = op->delete_inode;
-		if (!is_bad_inode(inode))
-			vfs_dq_init(inode);
 		/* Filesystems implementing their own
 		 * s_op->delete_inode are required to call
 		 * truncate_inode_pages and clear_inode()
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 2c201783836f..f19bb33eb1eb 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -48,7 +48,7 @@ static int jfs_open(struct inode *inode, struct file *file)
 {
 	int rc;
 
-	if ((rc = generic_file_open(inode, file)))
+	if ((rc = dquot_file_open(inode, file)))
 		return rc;
 
 	/*
@@ -98,6 +98,8 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
 	if (rc)
 		return rc;
 
+	if (iattr->ia_valid & ATTR_SIZE)
+		vfs_dq_init(inode);
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
 	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
 		rc = dquot_transfer(inode, iattr);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 22fa412c5289..1aa2dda16590 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -146,6 +146,9 @@ void jfs_delete_inode(struct inode *inode)
 {
 	jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
 
+	if (!is_bad_inode(inode))
+		vfs_dq_init(inode);
+
 	if (!is_bad_inode(inode) &&
 	    (JFS_IP(inode)->fileset == FILESYSTEM_I)) {
 		truncate_inode_pages(&inode->i_data, 0);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 1d1390afe55e..b7cc29da50b4 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -85,6 +85,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
 
 	jfs_info("jfs_create: dip:0x%p name:%s", dip, dentry->d_name.name);
 
+	vfs_dq_init(dip);
+
 	/*
 	 * search parent directory for entry/freespace
 	 * (dtSearch() returns parent directory page pinned)
@@ -215,6 +217,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 
 	jfs_info("jfs_mkdir: dip:0x%p name:%s", dip, dentry->d_name.name);
 
+	vfs_dq_init(dip);
+
 	/* link count overflow on parent directory ? */
 	if (dip->i_nlink == JFS_LINK_MAX) {
 		rc = -EMLINK;
@@ -356,6 +360,7 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 	jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name);
 
 	/* Init inode for quota operations. */
+	vfs_dq_init(dip);
 	vfs_dq_init(ip);
 
 	/* directory must be empty to be removed */
@@ -483,6 +488,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 	jfs_info("jfs_unlink: dip:0x%p name:%s", dip, dentry->d_name.name);
 
 	/* Init inode for quota operations. */
+	vfs_dq_init(dip);
 	vfs_dq_init(ip);
 
 	if ((rc = get_UCSname(&dname, dentry)))
@@ -805,6 +811,8 @@ static int jfs_link(struct dentry *old_dentry,
 	if (ip->i_nlink == 0)
 		return -ENOENT;
 
+	vfs_dq_init(dir);
+
 	tid = txBegin(ip->i_sb, 0);
 
 	mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT);
@@ -896,6 +904,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 
 	jfs_info("jfs_symlink: dip:0x%p name:%s", dip, name);
 
+	vfs_dq_init(dip);
+
 	ssize = strlen(name) + 1;
 
 	/*
@@ -1087,6 +1097,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	jfs_info("jfs_rename: %s %s", old_dentry->d_name.name,
 		 new_dentry->d_name.name);
 
+	vfs_dq_init(old_dir);
+	vfs_dq_init(new_dir);
+
 	old_ip = old_dentry->d_inode;
 	new_ip = new_dentry->d_inode;
 
@@ -1360,6 +1373,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 
 	jfs_info("jfs_mknod: %s", dentry->d_name.name);
 
+	vfs_dq_init(dir);
+
 	if ((rc = get_UCSname(&dname, dentry)))
 		goto out;
 
diff --git a/fs/namei.c b/fs/namei.c
index a4855af776a8..06abd2bf473c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -19,7 +19,6 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/namei.h>
-#include <linux/quotaops.h>
 #include <linux/pagemap.h>
 #include <linux/fsnotify.h>
 #include <linux/personality.h>
@@ -1461,7 +1460,6 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	error = security_inode_create(dir, dentry, mode);
 	if (error)
 		return error;
-	vfs_dq_init(dir);
 	error = dir->i_op->create(dir, dentry, mode, nd);
 	if (!error)
 		fsnotify_create(dir, dentry);
@@ -1813,9 +1811,6 @@ ok:
 		}
 	}
 	if (!IS_ERR(filp)) {
-		if (acc_mode & MAY_WRITE)
-			vfs_dq_init(nd.path.dentry->d_inode);
-
 		if (will_truncate) {
 			error = handle_truncate(&nd.path);
 			if (error) {
@@ -1996,7 +1991,6 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 	if (error)
 		return error;
 
-	vfs_dq_init(dir);
 	error = dir->i_op->mknod(dir, dentry, mode, dev);
 	if (!error)
 		fsnotify_create(dir, dentry);
@@ -2095,7 +2089,6 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	if (error)
 		return error;
 
-	vfs_dq_init(dir);
 	error = dir->i_op->mkdir(dir, dentry, mode);
 	if (!error)
 		fsnotify_mkdir(dir, dentry);
@@ -2181,8 +2174,6 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 	if (!dir->i_op->rmdir)
 		return -EPERM;
 
-	vfs_dq_init(dir);
-
 	mutex_lock(&dentry->d_inode->i_mutex);
 	dentry_unhash(dentry);
 	if (d_mountpoint(dentry))
@@ -2268,8 +2259,6 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 	if (!dir->i_op->unlink)
 		return -EPERM;
 
-	vfs_dq_init(dir);
-
 	mutex_lock(&dentry->d_inode->i_mutex);
 	if (d_mountpoint(dentry))
 		error = -EBUSY;
@@ -2379,7 +2368,6 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
 	if (error)
 		return error;
 
-	vfs_dq_init(dir);
 	error = dir->i_op->symlink(dir, dentry, oldname);
 	if (!error)
 		fsnotify_create(dir, dentry);
@@ -2463,7 +2451,6 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
 		return error;
 
 	mutex_lock(&inode->i_mutex);
-	vfs_dq_init(dir);
 	error = dir->i_op->link(old_dentry, dir, new_dentry);
 	mutex_unlock(&inode->i_mutex);
 	if (!error)
@@ -2662,9 +2649,6 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (!old_dir->i_op->rename)
 		return -EPERM;
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
-
 	old_name = fsnotify_oldname_init(old_dentry->d_name.name);
 
 	if (is_dir)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8715d194561a..09e9fc043600 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -20,7 +20,6 @@
 #include <linux/fcntl.h>
 #include <linux/namei.h>
 #include <linux/delay.h>
-#include <linux/quotaops.h>
 #include <linux/fsnotify.h>
 #include <linux/posix_acl_xattr.h>
 #include <linux/xattr.h>
@@ -377,7 +376,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 			put_write_access(inode);
 			goto out_nfserr;
 		}
-		vfs_dq_init(inode);
 	}
 
 	/* sanitize the mode change */
@@ -745,8 +743,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			flags = O_RDWR|O_LARGEFILE;
 		else
 			flags = O_WRONLY|O_LARGEFILE;
-
-		vfs_dq_init(inode);
 	}
 	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
 			    flags, current_cred());
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 472e8f8bc892..126198f5a67c 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -107,6 +107,9 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
 		   file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name);
 
+	if (file->f_mode & FMODE_WRITE)
+		vfs_dq_init(inode);
+
 	spin_lock(&oi->ip_lock);
 
 	/* Check that the inode hasn't been wiped from disk by another
@@ -977,6 +980,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 
 	size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
 	if (size_change) {
+		vfs_dq_init(inode);
+
 		status = ocfs2_rw_lock(inode, 1);
 		if (status < 0) {
 			mlog_errno(status);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 00eb6a095e68..77681a690d16 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -971,6 +971,8 @@ void ocfs2_delete_inode(struct inode *inode)
 		goto bail;
 	}
 
+	vfs_dq_init(inode);
+
 	if (!ocfs2_inode_is_valid_to_delete(inode)) {
 		/* It's probably not necessary to truncate_inode_pages
 		 * here but we do it for safety anyway (it will most
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 99766b6418eb..8b5b142eb638 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -244,6 +244,8 @@ static int ocfs2_mknod(struct inode *dir,
 		   (unsigned long)dev, dentry->d_name.len,
 		   dentry->d_name.name);
 
+	vfs_dq_init(dir);
+
 	/* get our super block */
 	osb = OCFS2_SB(dir->i_sb);
 
@@ -632,6 +634,8 @@ static int ocfs2_link(struct dentry *old_dentry,
 	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
 
+	vfs_dq_init(dir);
+
 	err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT);
 	if (err < 0) {
 		if (err != -ENOENT)
@@ -787,6 +791,8 @@ static int ocfs2_unlink(struct inode *dir,
 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
 		   dentry->d_name.len, dentry->d_name.name);
 
+	vfs_dq_init(dir);
+
 	BUG_ON(dentry->d_parent->d_inode != dir);
 
 	mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -1047,6 +1053,9 @@ static int ocfs2_rename(struct inode *old_dir,
 		   old_dentry->d_name.len, old_dentry->d_name.name,
 		   new_dentry->d_name.len, new_dentry->d_name.name);
 
+	vfs_dq_init(old_dir);
+	vfs_dq_init(new_dir);
+
 	osb = OCFS2_SB(old_dir->i_sb);
 
 	if (new_inode) {
@@ -1595,6 +1604,8 @@ static int ocfs2_symlink(struct inode *dir,
 	mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
 		   dentry, symname, dentry->d_name.len, dentry->d_name.name);
 
+	vfs_dq_init(dir);
+
 	sb = dir->i_sb;
 	osb = OCFS2_SB(sb);
 
diff --git a/fs/open.c b/fs/open.c
index 040cef72bc00..b740c4244833 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -8,7 +8,6 @@
 #include <linux/mm.h>
 #include <linux/file.h>
 #include <linux/fdtable.h>
-#include <linux/quotaops.h>
 #include <linux/fsnotify.h>
 #include <linux/module.h>
 #include <linux/slab.h>
@@ -278,10 +277,8 @@ static long do_sys_truncate(const char __user *pathname, loff_t length)
 	error = locks_verify_truncate(inode, NULL, length);
 	if (!error)
 		error = security_path_truncate(&path, length, 0);
-	if (!error) {
-		vfs_dq_init(inode);
+	if (!error)
 		error = do_truncate(path.dentry, length, 0, NULL);
-	}
 
 put_write_and_out:
 	put_write_access(inode);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index cd83c5b871ba..6244bca45c9d 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1820,6 +1820,20 @@ const struct dquot_operations dquot_operations = {
 	.destroy_dquot	= dquot_destroy,
 };
 
+/*
+ * Generic helper for ->open on filesystems supporting disk quotas.
+ */
+int dquot_file_open(struct inode *inode, struct file *file)
+{
+	int error;
+
+	error = generic_file_open(inode, file);
+	if (!error && (file->f_mode & FMODE_WRITE))
+		vfs_dq_init(inode);
+	return error;
+}
+EXPORT_SYMBOL(dquot_file_open);
+
 /*
  * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
  */
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index da2dba082e2d..1d9c12714c5c 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -289,7 +289,7 @@ const struct file_operations reiserfs_file_operations = {
 	.compat_ioctl = reiserfs_compat_ioctl,
 #endif
 	.mmap = reiserfs_file_mmap,
-	.open = generic_file_open,
+	.open = dquot_file_open,
 	.release = reiserfs_file_release,
 	.fsync = reiserfs_sync_file,
 	.aio_read = generic_file_aio_read,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index f07c3b69247d..06995cb48e39 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -34,6 +34,9 @@ void reiserfs_delete_inode(struct inode *inode)
 	int depth;
 	int err;
 
+	if (!is_bad_inode(inode))
+		vfs_dq_init(inode);
+
 	truncate_inode_pages(&inode->i_data, 0);
 
 	depth = reiserfs_write_lock_once(inode->i_sb);
@@ -3073,6 +3076,8 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	depth = reiserfs_write_lock_once(inode->i_sb);
 	if (attr->ia_valid & ATTR_SIZE) {
+		vfs_dq_init(inode);
+
 		/* version 2 items will be caught by the s_maxbytes check
 		 ** done for us in vmtruncate
 		 */
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 9dea84e8a79a..c55e1b9fee5f 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -594,6 +594,8 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	struct reiserfs_transaction_handle th;
 	struct reiserfs_security_handle security;
 
+	vfs_dq_init(dir);
+
 	if (!(inode = new_inode(dir->i_sb))) {
 		return -ENOMEM;
 	}
@@ -666,6 +668,8 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
+	vfs_dq_init(dir);
+
 	if (!(inode = new_inode(dir->i_sb))) {
 		return -ENOMEM;
 	}
@@ -739,6 +743,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	    2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
 		 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
 
+	vfs_dq_init(dir);
+
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
 	/* set flag that new packing locality created and new blocks for the content     * of that directory are not displaced yet */
 	REISERFS_I(dir)->new_packing_locality = 1;
@@ -842,6 +848,8 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
 	    JOURNAL_PER_BALANCE_CNT * 2 + 2 +
 	    4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
+	vfs_dq_init(dir);
+
 	reiserfs_write_lock(dir->i_sb);
 	retval = journal_begin(&th, dir->i_sb, jbegin_count);
 	if (retval)
@@ -923,6 +931,8 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
 	unsigned long savelink;
 	int depth;
 
+	vfs_dq_init(dir);
+
 	inode = dentry->d_inode;
 
 	/* in this transaction we can be doing at max two balancings and update
@@ -1024,6 +1034,8 @@ static int reiserfs_symlink(struct inode *parent_dir,
 	    2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) +
 		 REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb));
 
+	vfs_dq_init(parent_dir);
+
 	if (!(inode = new_inode(parent_dir->i_sb))) {
 		return -ENOMEM;
 	}
@@ -1111,6 +1123,8 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
 	    JOURNAL_PER_BALANCE_CNT * 3 +
 	    2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
+	vfs_dq_init(dir);
+
 	reiserfs_write_lock(dir->i_sb);
 	if (inode->i_nlink >= REISERFS_LINK_MAX) {
 		//FIXME: sd_nlink is 32 bit for new files
@@ -1235,6 +1249,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	    JOURNAL_PER_BALANCE_CNT * 3 + 5 +
 	    4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb);
 
+	vfs_dq_init(old_dir);
+	vfs_dq_init(new_dir);
+
 	old_inode = old_dentry->d_inode;
 	new_dentry_inode = new_dentry->d_inode;
 
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 81f09fab8ae4..37d034ca7d99 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -61,7 +61,6 @@
 static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
 {
 	BUG_ON(!mutex_is_locked(&dir->i_mutex));
-	vfs_dq_init(dir);
 	return dir->i_op->create(dir, dentry, mode, NULL);
 }
 #endif
@@ -69,7 +68,6 @@ static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
 static int xattr_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
 	BUG_ON(!mutex_is_locked(&dir->i_mutex));
-	vfs_dq_init(dir);
 	return dir->i_op->mkdir(dir, dentry, mode);
 }
 
@@ -81,7 +79,6 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
 {
 	int error;
 	BUG_ON(!mutex_is_locked(&dir->i_mutex));
-	vfs_dq_init(dir);
 
 	reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
 					I_MUTEX_CHILD, dir->i_sb);
@@ -97,7 +94,6 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	int error;
 	BUG_ON(!mutex_is_locked(&dir->i_mutex));
-	vfs_dq_init(dir);
 
 	reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
 					I_MUTEX_CHILD, dir->i_sb);
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 2df7fcb677b3..013fa44d9a5e 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -208,7 +208,7 @@ const struct file_operations udf_file_operations = {
 	.read			= do_sync_read,
 	.aio_read		= generic_file_aio_read,
 	.ioctl			= udf_ioctl,
-	.open			= generic_file_open,
+	.open			= dquot_file_open,
 	.mmap			= generic_file_mmap,
 	.write			= do_sync_write,
 	.aio_write		= udf_file_aio_write,
@@ -227,6 +227,9 @@ static int udf_setattr(struct dentry *dentry, struct iattr *iattr)
 	if (error)
 		return error;
 
+	if (iattr->ia_valid & ATTR_SIZE)
+		vfs_dq_init(inode);
+
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
             (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
 		error = dquot_transfer(inode, iattr);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 1199e8e21ee2..f19520268404 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -36,6 +36,7 @@
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
+#include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/crc-itu-t.h>
 
@@ -70,6 +71,9 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
 void udf_delete_inode(struct inode *inode)
 {
+	if (!is_bad_inode(inode))
+		vfs_dq_init(inode);
+
 	truncate_inode_pages(&inode->i_data, 0);
 
 	if (is_bad_inode(inode))
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index cd2115060fdc..e360c3fc4ae4 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -563,6 +563,8 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
 	int err;
 	struct udf_inode_info *iinfo;
 
+	vfs_dq_init(dir);
+
 	lock_kernel();
 	inode = udf_new_inode(dir, mode, &err);
 	if (!inode) {
@@ -616,6 +618,8 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
 	if (!old_valid_dev(rdev))
 		return -EINVAL;
 
+	vfs_dq_init(dir);
+
 	lock_kernel();
 	err = -EIO;
 	inode = udf_new_inode(dir, mode, &err);
@@ -662,6 +666,8 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	struct udf_inode_info *dinfo = UDF_I(dir);
 	struct udf_inode_info *iinfo;
 
+	vfs_dq_init(dir);
+
 	lock_kernel();
 	err = -EMLINK;
 	if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1)
@@ -799,6 +805,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
 	struct fileIdentDesc *fi, cfi;
 	struct kernel_lb_addr tloc;
 
+	vfs_dq_init(dir);
+
 	retval = -ENOENT;
 	lock_kernel();
 	fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
@@ -845,6 +853,8 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
 	struct fileIdentDesc cfi;
 	struct kernel_lb_addr tloc;
 
+	vfs_dq_init(dir);
+
 	retval = -ENOENT;
 	lock_kernel();
 	fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
@@ -899,6 +909,8 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
 	struct buffer_head *bh;
 	struct udf_inode_info *iinfo;
 
+	vfs_dq_init(dir);
+
 	lock_kernel();
 	inode = udf_new_inode(dir, S_IFLNK, &err);
 	if (!inode)
@@ -1069,6 +1081,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
 	int err;
 	struct buffer_head *bh;
 
+	vfs_dq_init(dir);
+
 	lock_kernel();
 	if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
 		unlock_kernel();
@@ -1131,6 +1145,9 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct kernel_lb_addr tloc;
 	struct udf_inode_info *old_iinfo = UDF_I(old_inode);
 
+	vfs_dq_init(old_dir);
+	vfs_dq_init(new_dir);
+
 	lock_kernel();
 	ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
 	if (ofi) {
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 73655c61240a..d84762f3028e 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -40,7 +40,7 @@ const struct file_operations ufs_file_operations = {
 	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
-	.open           = generic_file_open,
+	.open           = dquot_file_open,
 	.fsync		= simple_fsync,
 	.splice_read	= generic_file_splice_read,
 };
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 7cf33379fd46..fff8edab382f 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -36,6 +36,7 @@
 #include <linux/mm.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
+#include <linux/quotaops.h>
 
 #include "ufs_fs.h"
 #include "ufs.h"
@@ -908,6 +909,9 @@ void ufs_delete_inode (struct inode * inode)
 {
 	loff_t old_i_size;
 
+	if (!is_bad_inode(inode))
+		vfs_dq_init(inode);
+
 	truncate_inode_pages(&inode->i_data, 0);
 	if (is_bad_inode(inode))
 		goto no_delete;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 4c26d9e8bc94..c33cb90c516d 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -30,6 +30,7 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
+#include <linux/quotaops.h>
 
 #include "ufs_fs.h"
 #include "ufs.h"
@@ -84,6 +85,9 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode,
 	int err;
 
 	UFSD("BEGIN\n");
+
+	vfs_dq_init(dir);
+
 	inode = ufs_new_inode(dir, mode);
 	err = PTR_ERR(inode);
 
@@ -107,6 +111,9 @@ static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t
 
 	if (!old_valid_dev(rdev))
 		return -EINVAL;
+
+	vfs_dq_init(dir);
+
 	inode = ufs_new_inode(dir, mode);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
@@ -131,6 +138,8 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
 	if (l > sb->s_blocksize)
 		goto out_notlocked;
 
+	vfs_dq_init(dir);
+
 	lock_kernel();
 	inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO);
 	err = PTR_ERR(inode);
@@ -176,6 +185,8 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
 		return -EMLINK;
 	}
 
+	vfs_dq_init(dir);
+
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
 	atomic_inc(&inode->i_count);
@@ -193,6 +204,8 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	if (dir->i_nlink >= UFS_LINK_MAX)
 		goto out;
 
+	vfs_dq_init(dir);
+
 	lock_kernel();
 	inode_inc_link_count(dir);
 
@@ -237,6 +250,8 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry)
 	struct page *page;
 	int err = -ENOENT;
 
+	vfs_dq_init(dir);
+
 	de = ufs_find_entry(dir, &dentry->d_name, &page);
 	if (!de)
 		goto out;
@@ -281,6 +296,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct ufs_dir_entry *old_de;
 	int err = -ENOENT;
 
+	vfs_dq_init(old_dir);
+	vfs_dq_init(new_dir);
+
 	old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page);
 	if (!old_de)
 		goto out;
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 87bbab685901..e5ef8a3ec230 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -527,6 +527,9 @@ static int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 	if (ia_valid & ATTR_SIZE &&
 	    attr->ia_size != i_size_read(inode)) {
 		loff_t old_i_size = inode->i_size;
+
+		vfs_dq_init(inode);
+
 		error = vmtruncate(inode, attr->ia_size);
 		if (error)
 			return error;
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index a5ebd1abccd8..93ac788345e2 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -48,6 +48,8 @@ int dquot_release(struct dquot *dquot);
 int dquot_commit_info(struct super_block *sb, int type);
 int dquot_mark_dquot_dirty(struct dquot *dquot);
 
+int dquot_file_open(struct inode *inode, struct file *file);
+
 int vfs_quota_on(struct super_block *sb, int type, int format_id,
  	char *path, int remount);
 int vfs_quota_enable(struct inode *inode, int type, int format_id,
@@ -342,4 +344,6 @@ static inline void dquot_release_reservation_block(struct inode *inode,
 	__dquot_free_space(inode, nr << inode->i_blkbits, 1);
 }
 
+#define dquot_file_open		generic_file_open
+
 #endif /* _LINUX_QUOTAOPS_ */
-- 
cgit v1.2.3


From 871a293155a24554e153538d36e3a80fa169aefb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:07 -0500
Subject: dquot: cleanup dquot initialize routine

Get rid of the initialize dquot operation - it is now always called from
the filesystem and if a filesystem really needs it's own (which none
currently does) it can just call into it's own routine directly.

Rename the now static low-level dquot_initialize helper to __dquot_initialize
and vfs_dq_init to dquot_initialize to have a consistent namespace.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 Documentation/filesystems/Locking |  2 --
 fs/ext2/file.c                    |  1 +
 fs/ext2/ialloc.c                  |  2 +-
 fs/ext2/inode.c                   |  4 ++--
 fs/ext2/namei.c                   | 16 ++++++++--------
 fs/ext3/file.c                    |  1 +
 fs/ext3/ialloc.c                  |  4 ++--
 fs/ext3/inode.c                   |  6 +++---
 fs/ext3/namei.c                   | 24 ++++++++++++------------
 fs/ext3/super.c                   |  5 ++---
 fs/ext4/file.c                    |  1 +
 fs/ext4/ialloc.c                  |  4 ++--
 fs/ext4/inode.c                   |  4 ++--
 fs/ext4/namei.c                   | 24 ++++++++++++------------
 fs/ext4/super.c                   |  5 ++---
 fs/jfs/file.c                     |  2 +-
 fs/jfs/inode.c                    |  4 ++--
 fs/jfs/jfs_inode.c                |  2 +-
 fs/jfs/namei.c                    | 24 ++++++++++++------------
 fs/ocfs2/file.c                   |  4 ++--
 fs/ocfs2/inode.c                  |  2 +-
 fs/ocfs2/namei.c                  | 14 +++++++-------
 fs/ocfs2/quota_global.c           |  1 -
 fs/ocfs2/refcounttree.c           |  2 +-
 fs/quota/dquot.c                  | 32 ++++++++++++++++++++------------
 fs/reiserfs/inode.c               |  6 +++---
 fs/reiserfs/namei.c               | 22 +++++++++++-----------
 fs/reiserfs/super.c               |  3 +--
 fs/udf/file.c                     |  2 +-
 fs/udf/ialloc.c                   |  2 +-
 fs/udf/inode.c                    |  2 +-
 fs/udf/namei.c                    | 18 +++++++++---------
 fs/ufs/file.c                     |  1 +
 fs/ufs/ialloc.c                   |  2 +-
 fs/ufs/inode.c                    |  2 +-
 fs/ufs/namei.c                    | 16 ++++++++--------
 fs/ufs/truncate.c                 |  2 +-
 include/linux/quota.h             |  1 -
 include/linux/quotaops.h          | 17 ++++-------------
 39 files changed, 141 insertions(+), 145 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index fa10e4bf8e5e..06bbbed71206 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -460,7 +460,6 @@ in sys_read() and friends.
 
 --------------------------- dquot_operations -------------------------------
 prototypes:
-	int (*initialize) (struct inode *, int);
 	int (*write_dquot) (struct dquot *);
 	int (*acquire_dquot) (struct dquot *);
 	int (*release_dquot) (struct dquot *);
@@ -473,7 +472,6 @@ a proper locking wrt the filesystem and call the generic quota operations.
 What filesystem should expect from the generic quota functions:
 
 		FS recursion	Held locks when called
-initialize:	yes		maybe dqonoff_sem
 write_dquot:	yes		dqonoff_sem or dqptr_sem
 acquire_dquot:	yes		dqonoff_sem or dqptr_sem
 release_dquot:	yes		dqonoff_sem or dqptr_sem
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index d11f6e484519..5d198d0697fb 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -20,6 +20,7 @@
 
 #include <linux/time.h>
 #include <linux/pagemap.h>
+#include <linux/quotaops.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 88b71972c626..ad7d572ee8dc 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -586,7 +586,7 @@ got:
 		goto fail_drop;
 	}
 
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	err = dquot_alloc_inode(inode);
 	if (err)
 		goto fail_drop;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index c87840c33e17..45ff49f0a4b5 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -59,7 +59,7 @@ static inline int ext2_inode_is_fast_symlink(struct inode *inode)
 void ext2_delete_inode (struct inode * inode)
 {
 	if (!is_bad_inode(inode))
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 	truncate_inode_pages(&inode->i_data, 0);
 
 	if (is_bad_inode(inode))
@@ -1461,7 +1461,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
 		return error;
 
 	if (iattr->ia_valid & ATTR_SIZE)
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
 	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
 		error = dquot_transfer(inode, iattr);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 5923df7b22af..71efb0e9a3f2 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -102,7 +102,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, st
 {
 	struct inode *inode;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode = ext2_new_inode(dir, mode);
 	if (IS_ERR(inode))
@@ -131,7 +131,7 @@ static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode = ext2_new_inode (dir, mode);
 	err = PTR_ERR(inode);
@@ -157,7 +157,7 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry,
 	if (l > sb->s_blocksize)
 		goto out;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO);
 	err = PTR_ERR(inode);
@@ -202,7 +202,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
 	if (inode->i_nlink >= EXT2_LINK_MAX)
 		return -EMLINK;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
@@ -226,7 +226,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	if (dir->i_nlink >= EXT2_LINK_MAX)
 		goto out;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode_inc_link_count(dir);
 
@@ -274,7 +274,7 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry)
 	struct page * page;
 	int err = -ENOENT;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	de = ext2_find_entry (dir, &dentry->d_name, &page);
 	if (!de)
@@ -318,8 +318,8 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
 	struct ext2_dir_entry_2 * old_de;
 	int err = -ENOENT;
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
 
 	old_de = ext2_find_entry (old_dir, &old_dentry->d_name, &old_page);
 	if (!old_de)
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 3c7fb11a3b29..f55df0e61cbd 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -21,6 +21,7 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd.h>
+#include <linux/quotaops.h>
 #include <linux/ext3_fs.h>
 #include <linux/ext3_jbd.h>
 #include "xattr.h"
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 7d7238f9f6f3..ef9008b885b5 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -123,7 +123,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 	 * Note: we must free any quota before locking the superblock,
 	 * as writing the quota to disk may need the lock as well.
 	 */
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	ext3_xattr_delete_inode(handle, inode);
 	dquot_free_inode(inode);
 	dquot_drop(inode);
@@ -588,7 +588,7 @@ got:
 		sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
 
 	ret = inode;
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	err = dquot_alloc_inode(inode);
 	if (err)
 		goto fail_drop;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index d7962b0c57b3..ffbbc65e3f68 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -197,7 +197,7 @@ void ext3_delete_inode (struct inode * inode)
 	handle_t *handle;
 
 	if (!is_bad_inode(inode))
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 	truncate_inode_pages(&inode->i_data, 0);
 
@@ -3152,7 +3152,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 		return error;
 
 	if (ia_valid & ATTR_SIZE)
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
 		handle_t *handle;
@@ -3250,7 +3250,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
 		ret = 2 * (bpp + indirects) + 2;
 
 #ifdef CONFIG_QUOTA
-	/* We know that structure was already allocated during vfs_dq_init so
+	/* We know that structure was already allocated during dquot_initialize so
 	 * we will be updating only the data blocks + inodes */
 	ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
 #endif
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index a492b371b134..ee184084ca42 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1696,7 +1696,7 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
 	struct inode * inode;
 	int err, retries = 0;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -1732,7 +1732,7 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -1770,7 +1770,7 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	if (dir->i_nlink >= EXT3_LINK_MAX)
 		return -EMLINK;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -2066,8 +2066,8 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
 
 	/* Initialize quotas before so that eventual writes go in
 	 * separate transaction */
-	vfs_dq_init(dir);
-	vfs_dq_init(dentry->d_inode);
+	dquot_initialize(dir);
+	dquot_initialize(dentry->d_inode);
 
 	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
@@ -2127,8 +2127,8 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
-	vfs_dq_init(dir);
-	vfs_dq_init(dentry->d_inode);
+	dquot_initialize(dir);
+	dquot_initialize(dentry->d_inode);
 
 	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
@@ -2184,7 +2184,7 @@ static int ext3_symlink (struct inode * dir,
 	if (l > dir->i_sb->s_blocksize)
 		return -ENAMETOOLONG;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 retry:
 	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -2241,7 +2241,7 @@ static int ext3_link (struct dentry * old_dentry,
 	if (inode->i_nlink >= EXT3_LINK_MAX)
 		return -EMLINK;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	/*
 	 * Return -ENOENT if we've raced with unlink and i_nlink is 0.  Doing
@@ -2293,15 +2293,15 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 	struct ext3_dir_entry_2 * old_de, * new_de;
 	int retval, flush_file = 0;
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
 
 	old_bh = new_bh = dir_bh = NULL;
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
 	if (new_dentry->d_inode)
-		vfs_dq_init(new_dentry->d_inode);
+		dquot_initialize(new_dentry->d_inode);
 	handle = ext3_journal_start(old_dir, 2 *
 					EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 0163d0dae124..e844accbf55d 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -752,7 +752,6 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 				const char *data, size_t len, loff_t off);
 
 static const struct dquot_operations ext3_quota_operations = {
-	.initialize	= dquot_initialize,
 	.write_dquot	= ext3_write_dquot,
 	.acquire_dquot	= ext3_acquire_dquot,
 	.release_dquot	= ext3_release_dquot,
@@ -1480,7 +1479,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 		}
 
 		list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 		if (inode->i_nlink) {
 			printk(KERN_DEBUG
 				"%s: truncating inode %lu to %Ld bytes\n",
@@ -2736,7 +2735,7 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
  * Process 1                         Process 2
  * ext3_create()                     quota_sync()
  *   journal_start()                   write_dquot()
- *   vfs_dq_init()                       down(dqio_mutex)
+ *   dquot_initialize()                       down(dqio_mutex)
  *     down(dqio_mutex)                    journal_start()
  *
  */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 85fa464a24ad..a08a12998c49 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -23,6 +23,7 @@
 #include <linux/jbd2.h>
 #include <linux/mount.h>
 #include <linux/path.h>
+#include <linux/quotaops.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
 #include "xattr.h"
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index ca8986e4b528..9bb2bb9f67ad 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -217,7 +217,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 	 * Note: we must free any quota before locking the superblock,
 	 * as writing the quota to disk may need the lock as well.
 	 */
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	ext4_xattr_delete_inode(handle, inode);
 	dquot_free_inode(inode);
 	dquot_drop(inode);
@@ -1034,7 +1034,7 @@ got:
 	ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
 
 	ret = inode;
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	err = dquot_alloc_inode(inode);
 	if (err)
 		goto fail_drop;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index eaa22ae9f1f6..bec222ca9ba4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -171,7 +171,7 @@ void ext4_delete_inode(struct inode *inode)
 	int err;
 
 	if (!is_bad_inode(inode))
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 	if (ext4_should_order_data(inode))
 		ext4_begin_ordered_truncate(inode, 0);
@@ -5255,7 +5255,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 		return error;
 
 	if (ia_valid & ATTR_SIZE)
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
 		handle_t *handle;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 20f55c2e7571..7f3d2d75a0dc 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1766,7 +1766,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
 	struct inode *inode;
 	int err, retries = 0;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -1802,7 +1802,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -1841,7 +1841,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	if (EXT4_DIR_LINK_MAX(dir))
 		return -EMLINK;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -2142,8 +2142,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
 
 	/* Initialize quotas before so that eventual writes go in
 	 * separate transaction */
-	vfs_dq_init(dir);
-	vfs_dq_init(dentry->d_inode);
+	dquot_initialize(dir);
+	dquot_initialize(dentry->d_inode);
 
 	handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
@@ -2203,8 +2203,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
-	vfs_dq_init(dir);
-	vfs_dq_init(dentry->d_inode);
+	dquot_initialize(dir);
+	dquot_initialize(dentry->d_inode);
 
 	handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
@@ -2260,7 +2260,7 @@ static int ext4_symlink(struct inode *dir,
 	if (l > dir->i_sb->s_blocksize)
 		return -ENAMETOOLONG;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 retry:
 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -2320,7 +2320,7 @@ static int ext4_link(struct dentry *old_dentry,
 	if (inode->i_nlink >= EXT4_LINK_MAX)
 		return -EMLINK;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	/*
 	 * Return -ENOENT if we've raced with unlink and i_nlink is 0.  Doing
@@ -2372,15 +2372,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct ext4_dir_entry_2 *old_de, *new_de;
 	int retval, force_da_alloc = 0;
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
 
 	old_bh = new_bh = dir_bh = NULL;
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
 	if (new_dentry->d_inode)
-		vfs_dq_init(new_dentry->d_inode);
+		dquot_initialize(new_dentry->d_inode);
 	handle = ext4_journal_start(old_dir, 2 *
 					EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 035516c80df2..edcf3b0239d1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1013,7 +1013,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 				const char *data, size_t len, loff_t off);
 
 static const struct dquot_operations ext4_quota_operations = {
-	.initialize	= dquot_initialize,
 #ifdef CONFIG_QUOTA
 	.get_reserved_space = ext4_get_reserved_space,
 #endif
@@ -1931,7 +1930,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 		}
 
 		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 		if (inode->i_nlink) {
 			ext4_msg(sb, KERN_DEBUG,
 				"%s: truncating inode %lu to %lld bytes",
@@ -3700,7 +3699,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
  * Process 1                         Process 2
  * ext4_create()                     quota_sync()
  *   jbd2_journal_start()                  write_dquot()
- *   vfs_dq_init()                         down(dqio_mutex)
+ *   dquot_initialize()                         down(dqio_mutex)
  *     down(dqio_mutex)                    jbd2_journal_start()
  *
  */
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index f19bb33eb1eb..14ba982b3f24 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -99,7 +99,7 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
 		return rc;
 
 	if (iattr->ia_valid & ATTR_SIZE)
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
 	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
 		rc = dquot_transfer(inode, iattr);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 1aa2dda16590..c694a5f15380 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -147,7 +147,7 @@ void jfs_delete_inode(struct inode *inode)
 	jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
 
 	if (!is_bad_inode(inode))
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 	if (!is_bad_inode(inode) &&
 	    (JFS_IP(inode)->fileset == FILESYSTEM_I)) {
@@ -161,7 +161,7 @@ void jfs_delete_inode(struct inode *inode)
 		/*
 		 * Free the inode from the quota allocation.
 		 */
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 		dquot_free_inode(inode);
 		dquot_drop(inode);
 	}
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 72b30895422c..829921b67765 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -116,7 +116,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
 	/*
 	 * Allocate inode to quota.
 	 */
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	rc = dquot_alloc_inode(inode);
 	if (rc)
 		goto fail_drop;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index b7cc29da50b4..4a3e9f39c21d 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -85,7 +85,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
 
 	jfs_info("jfs_create: dip:0x%p name:%s", dip, dentry->d_name.name);
 
-	vfs_dq_init(dip);
+	dquot_initialize(dip);
 
 	/*
 	 * search parent directory for entry/freespace
@@ -217,7 +217,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 
 	jfs_info("jfs_mkdir: dip:0x%p name:%s", dip, dentry->d_name.name);
 
-	vfs_dq_init(dip);
+	dquot_initialize(dip);
 
 	/* link count overflow on parent directory ? */
 	if (dip->i_nlink == JFS_LINK_MAX) {
@@ -360,8 +360,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 	jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name);
 
 	/* Init inode for quota operations. */
-	vfs_dq_init(dip);
-	vfs_dq_init(ip);
+	dquot_initialize(dip);
+	dquot_initialize(ip);
 
 	/* directory must be empty to be removed */
 	if (!dtEmpty(ip)) {
@@ -488,8 +488,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 	jfs_info("jfs_unlink: dip:0x%p name:%s", dip, dentry->d_name.name);
 
 	/* Init inode for quota operations. */
-	vfs_dq_init(dip);
-	vfs_dq_init(ip);
+	dquot_initialize(dip);
+	dquot_initialize(ip);
 
 	if ((rc = get_UCSname(&dname, dentry)))
 		goto out;
@@ -811,7 +811,7 @@ static int jfs_link(struct dentry *old_dentry,
 	if (ip->i_nlink == 0)
 		return -ENOENT;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	tid = txBegin(ip->i_sb, 0);
 
@@ -904,7 +904,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 
 	jfs_info("jfs_symlink: dip:0x%p name:%s", dip, name);
 
-	vfs_dq_init(dip);
+	dquot_initialize(dip);
 
 	ssize = strlen(name) + 1;
 
@@ -1097,8 +1097,8 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	jfs_info("jfs_rename: %s %s", old_dentry->d_name.name,
 		 new_dentry->d_name.name);
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
 
 	old_ip = old_dentry->d_inode;
 	new_ip = new_dentry->d_inode;
@@ -1149,7 +1149,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	} else if (new_ip) {
 		IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL);
 		/* Init inode for quota operations. */
-		vfs_dq_init(new_ip);
+		dquot_initialize(new_ip);
 	}
 
 	/*
@@ -1373,7 +1373,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 
 	jfs_info("jfs_mknod: %s", dentry->d_name.name);
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	if ((rc = get_UCSname(&dname, dentry)))
 		goto out;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 126198f5a67c..364105291282 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -108,7 +108,7 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
 		   file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name);
 
 	if (file->f_mode & FMODE_WRITE)
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 	spin_lock(&oi->ip_lock);
 
@@ -980,7 +980,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 
 	size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
 	if (size_change) {
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 		status = ocfs2_rw_lock(inode, 1);
 		if (status < 0) {
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 77681a690d16..278a223aae14 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -971,7 +971,7 @@ void ocfs2_delete_inode(struct inode *inode)
 		goto bail;
 	}
 
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 
 	if (!ocfs2_inode_is_valid_to_delete(inode)) {
 		/* It's probably not necessary to truncate_inode_pages
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 8b5b142eb638..d9cd4e373a53 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -212,7 +212,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
 	} else
 		inode->i_gid = current_fsgid();
 	inode->i_mode = mode;
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	return inode;
 }
 
@@ -244,7 +244,7 @@ static int ocfs2_mknod(struct inode *dir,
 		   (unsigned long)dev, dentry->d_name.len,
 		   dentry->d_name.name);
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	/* get our super block */
 	osb = OCFS2_SB(dir->i_sb);
@@ -634,7 +634,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT);
 	if (err < 0) {
@@ -791,7 +791,7 @@ static int ocfs2_unlink(struct inode *dir,
 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
 		   dentry->d_name.len, dentry->d_name.name);
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	BUG_ON(dentry->d_parent->d_inode != dir);
 
@@ -1053,8 +1053,8 @@ static int ocfs2_rename(struct inode *old_dir,
 		   old_dentry->d_name.len, old_dentry->d_name.name,
 		   new_dentry->d_name.len, new_dentry->d_name.name);
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
 
 	osb = OCFS2_SB(old_dir->i_sb);
 
@@ -1604,7 +1604,7 @@ static int ocfs2_symlink(struct inode *dir,
 	mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
 		   dentry, symname, dentry->d_name.len, dentry->d_name.name);
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	sb = dir->i_sb;
 	osb = OCFS2_SB(sb);
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 4dca38f487cf..355f41d1d520 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -851,7 +851,6 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
 }
 
 const struct dquot_operations ocfs2_quota_operations = {
-	.initialize	= dquot_initialize,
 	.write_dquot	= ocfs2_write_dquot,
 	.acquire_dquot	= ocfs2_acquire_dquot,
 	.release_dquot	= ocfs2_release_dquot,
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 8ae65c9c020c..f3ae10cde841 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4390,7 +4390,7 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
 	}
 
 	mutex_lock(&inode->i_mutex);
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 	error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
 	mutex_unlock(&inode->i_mutex);
 	if (!error)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 6244bca45c9d..3c0a7e0dff78 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -230,6 +230,7 @@ struct dqstats dqstats;
 EXPORT_SYMBOL(dqstats);
 
 static qsize_t inode_get_rsv_space(struct inode *inode);
+static void __dquot_initialize(struct inode *inode, int type);
 
 static inline unsigned int
 hashfn(const struct super_block *sb, unsigned int id, int type)
@@ -890,7 +891,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
 		spin_unlock(&inode_lock);
 
 		iput(old_inode);
-		sb->dq_op->initialize(inode, type);
+		__dquot_initialize(inode, type);
 		/* We hold a reference to 'inode' so it couldn't have been
 		 * removed from s_inodes list while we dropped the inode_lock.
 		 * We cannot iput the inode now as we can be holding the last
@@ -1293,22 +1294,26 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space)
 }
 
 /*
- *	Initialize quota pointers in inode
- *	We do things in a bit complicated way but by that we avoid calling
- *	dqget() and thus filesystem callbacks under dqptr_sem.
+ * Initialize quota pointers in inode
+ *
+ * We do things in a bit complicated way but by that we avoid calling
+ * dqget() and thus filesystem callbacks under dqptr_sem.
+ *
+ * It is better to call this function outside of any transaction as it
+ * might need a lot of space in journal for dquot structure allocation.
  */
-int dquot_initialize(struct inode *inode, int type)
+static void __dquot_initialize(struct inode *inode, int type)
 {
 	unsigned int id = 0;
-	int cnt, ret = 0;
+	int cnt;
 	struct dquot *got[MAXQUOTAS];
 	struct super_block *sb = inode->i_sb;
 	qsize_t rsv;
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
-	if (IS_NOQUOTA(inode))
-		return 0;
+	if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode))
+		return;
 
 	/* First get references to structures we might need. */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1351,7 +1356,11 @@ out_err:
 	up_write(&sb_dqopt(sb)->dqptr_sem);
 	/* Drop unused references */
 	dqput_all(got);
-	return ret;
+}
+
+void dquot_initialize(struct inode *inode)
+{
+	__dquot_initialize(inode, -1);
 }
 EXPORT_SYMBOL(dquot_initialize);
 
@@ -1783,7 +1792,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 		chid[GRPQUOTA] = iattr->ia_gid;
 	}
 	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 		if (__dquot_transfer(inode, chid, mask) == NO_QUOTA)
 			return -EDQUOT;
 	}
@@ -1810,7 +1819,6 @@ EXPORT_SYMBOL(dquot_commit_info);
  * Definitions of diskquota operations.
  */
 const struct dquot_operations dquot_operations = {
-	.initialize	= dquot_initialize,
 	.write_dquot	= dquot_commit,
 	.acquire_dquot	= dquot_acquire,
 	.release_dquot	= dquot_release,
@@ -1829,7 +1837,7 @@ int dquot_file_open(struct inode *inode, struct file *file)
 
 	error = generic_file_open(inode, file);
 	if (!error && (file->f_mode & FMODE_WRITE))
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 	return error;
 }
 EXPORT_SYMBOL(dquot_file_open);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 06995cb48e39..b8671a54e8ed 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -35,7 +35,7 @@ void reiserfs_delete_inode(struct inode *inode)
 	int err;
 
 	if (!is_bad_inode(inode))
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 	truncate_inode_pages(&inode->i_data, 0);
 
@@ -1768,7 +1768,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 
 	BUG_ON(!th->t_trans_id);
 
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	err = dquot_alloc_inode(inode);
 	if (err)
 		goto out_end_trans;
@@ -3076,7 +3076,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	depth = reiserfs_write_lock_once(inode->i_sb);
 	if (attr->ia_valid & ATTR_SIZE) {
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 		/* version 2 items will be caught by the s_maxbytes check
 		 ** done for us in vmtruncate
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index c55e1b9fee5f..96e4cbbfaa18 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -554,7 +554,7 @@ static int drop_new_inode(struct inode *inode)
 }
 
 /* utility function that does setup for reiserfs_new_inode.
-** vfs_dq_init needs lots of credits so it's better to have it
+** dquot_initialize needs lots of credits so it's better to have it
 ** outside of a transaction, so we had to pull some bits of
 ** reiserfs_new_inode out into this func.
 */
@@ -577,7 +577,7 @@ static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
 	} else {
 		inode->i_gid = current_fsgid();
 	}
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	return 0;
 }
 
@@ -594,7 +594,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	struct reiserfs_transaction_handle th;
 	struct reiserfs_security_handle security;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	if (!(inode = new_inode(dir->i_sb))) {
 		return -ENOMEM;
@@ -668,7 +668,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	if (!(inode = new_inode(dir->i_sb))) {
 		return -ENOMEM;
@@ -743,7 +743,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	    2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
 		 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
 	/* set flag that new packing locality created and new blocks for the content     * of that directory are not displaced yet */
@@ -848,7 +848,7 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
 	    JOURNAL_PER_BALANCE_CNT * 2 + 2 +
 	    4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	reiserfs_write_lock(dir->i_sb);
 	retval = journal_begin(&th, dir->i_sb, jbegin_count);
@@ -931,7 +931,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
 	unsigned long savelink;
 	int depth;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode = dentry->d_inode;
 
@@ -1034,7 +1034,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
 	    2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) +
 		 REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb));
 
-	vfs_dq_init(parent_dir);
+	dquot_initialize(parent_dir);
 
 	if (!(inode = new_inode(parent_dir->i_sb))) {
 		return -ENOMEM;
@@ -1123,7 +1123,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
 	    JOURNAL_PER_BALANCE_CNT * 3 +
 	    2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	reiserfs_write_lock(dir->i_sb);
 	if (inode->i_nlink >= REISERFS_LINK_MAX) {
@@ -1249,8 +1249,8 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	    JOURNAL_PER_BALANCE_CNT * 3 + 5 +
 	    4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb);
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
 
 	old_inode = old_dentry->d_inode;
 	new_dentry_inode = new_dentry->d_inode;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 34f7cd0cb02d..04bf5d791bda 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -246,7 +246,7 @@ static int finish_unfinished(struct super_block *s)
 			retval = remove_save_link_only(s, &save_link_key, 0);
 			continue;
 		}
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 		if (truncate && S_ISDIR(inode->i_mode)) {
 			/* We got a truncate request for a dir which is impossible.
@@ -622,7 +622,6 @@ static int reiserfs_write_info(struct super_block *, int);
 static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
 
 static const struct dquot_operations reiserfs_quota_operations = {
-	.initialize = dquot_initialize,
 	.write_dquot = reiserfs_write_dquot,
 	.acquire_dquot = reiserfs_acquire_dquot,
 	.release_dquot = reiserfs_release_dquot,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 013fa44d9a5e..1eb06774ed90 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -228,7 +228,7 @@ static int udf_setattr(struct dentry *dentry, struct iattr *iattr)
 		return error;
 
 	if (iattr->ia_valid & ATTR_SIZE)
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
             (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 15c6e992e587..fb68c9cd0c3e 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -153,7 +153,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
 	insert_inode_hash(inode);
 	mark_inode_dirty(inode);
 
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	ret = dquot_alloc_inode(inode);
 	if (ret) {
 		dquot_drop(inode);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index f19520268404..c7da1a32b364 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -72,7 +72,7 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
 void udf_delete_inode(struct inode *inode)
 {
 	if (!is_bad_inode(inode))
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 	truncate_inode_pages(&inode->i_data, 0);
 
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index e360c3fc4ae4..96757e3e3e04 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -563,7 +563,7 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
 	int err;
 	struct udf_inode_info *iinfo;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	lock_kernel();
 	inode = udf_new_inode(dir, mode, &err);
@@ -618,7 +618,7 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
 	if (!old_valid_dev(rdev))
 		return -EINVAL;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	lock_kernel();
 	err = -EIO;
@@ -666,7 +666,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	struct udf_inode_info *dinfo = UDF_I(dir);
 	struct udf_inode_info *iinfo;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	lock_kernel();
 	err = -EMLINK;
@@ -805,7 +805,7 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
 	struct fileIdentDesc *fi, cfi;
 	struct kernel_lb_addr tloc;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	retval = -ENOENT;
 	lock_kernel();
@@ -853,7 +853,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
 	struct fileIdentDesc cfi;
 	struct kernel_lb_addr tloc;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	retval = -ENOENT;
 	lock_kernel();
@@ -909,7 +909,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
 	struct buffer_head *bh;
 	struct udf_inode_info *iinfo;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	lock_kernel();
 	inode = udf_new_inode(dir, S_IFLNK, &err);
@@ -1081,7 +1081,7 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
 	int err;
 	struct buffer_head *bh;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	lock_kernel();
 	if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
@@ -1145,8 +1145,8 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct kernel_lb_addr tloc;
 	struct udf_inode_info *old_iinfo = UDF_I(old_inode);
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
 
 	lock_kernel();
 	ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index d84762f3028e..a8962cecde5b 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/quotaops.h>
 
 #include "ufs_fs.h"
 #include "ufs.h"
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 67b4bdb056fb..230ecf608026 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -355,7 +355,7 @@ cg_found:
 
 	unlock_super (sb);
 
-	vfs_dq_init(inode);
+	dquot_initialize(inode);
 	err = dquot_alloc_inode(inode);
 	if (err) {
 		dquot_drop(inode);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index fff8edab382f..09aef49beedb 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -910,7 +910,7 @@ void ufs_delete_inode (struct inode * inode)
 	loff_t old_i_size;
 
 	if (!is_bad_inode(inode))
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 	truncate_inode_pages(&inode->i_data, 0);
 	if (is_bad_inode(inode))
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index c33cb90c516d..118556243e7a 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -86,7 +86,7 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode,
 
 	UFSD("BEGIN\n");
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode = ufs_new_inode(dir, mode);
 	err = PTR_ERR(inode);
@@ -112,7 +112,7 @@ static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t
 	if (!old_valid_dev(rdev))
 		return -EINVAL;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode = ufs_new_inode(dir, mode);
 	err = PTR_ERR(inode);
@@ -138,7 +138,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
 	if (l > sb->s_blocksize)
 		goto out_notlocked;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	lock_kernel();
 	inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO);
@@ -185,7 +185,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
 		return -EMLINK;
 	}
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
@@ -204,7 +204,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 	if (dir->i_nlink >= UFS_LINK_MAX)
 		goto out;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	lock_kernel();
 	inode_inc_link_count(dir);
@@ -250,7 +250,7 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry)
 	struct page *page;
 	int err = -ENOENT;
 
-	vfs_dq_init(dir);
+	dquot_initialize(dir);
 
 	de = ufs_find_entry(dir, &dentry->d_name, &page);
 	if (!de)
@@ -296,8 +296,8 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct ufs_dir_entry *old_de;
 	int err = -ENOENT;
 
-	vfs_dq_init(old_dir);
-	vfs_dq_init(new_dir);
+	dquot_initialize(old_dir);
+	dquot_initialize(new_dir);
 
 	old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page);
 	if (!old_de)
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index e5ef8a3ec230..d3b6270cb377 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -528,7 +528,7 @@ static int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 	    attr->ia_size != i_size_read(inode)) {
 		loff_t old_i_size = inode->i_size;
 
-		vfs_dq_init(inode);
+		dquot_initialize(inode);
 
 		error = vmtruncate(inode, attr->ia_size);
 		if (error)
diff --git a/include/linux/quota.h b/include/linux/quota.h
index aec2e9dac2d7..4aa93554f0eb 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -295,7 +295,6 @@ struct quota_format_ops {
 
 /* Operations working with dquots */
 struct dquot_operations {
-	int (*initialize) (struct inode *, int);
 	int (*write_dquot) (struct dquot *);		/* Ordinary dquot write */
 	struct dquot *(*alloc_dquot)(struct super_block *, int);	/* Allocate memory for new dquot */
 	void (*destroy_dquot)(struct dquot *);		/* Free memory for dquot */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 93ac788345e2..e6fa7acce290 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -23,7 +23,7 @@ void inode_add_rsv_space(struct inode *inode, qsize_t number);
 void inode_claim_rsv_space(struct inode *inode, qsize_t number);
 void inode_sub_rsv_space(struct inode *inode, qsize_t number);
 
-int dquot_initialize(struct inode *inode, int type);
+void dquot_initialize(struct inode *inode);
 void dquot_drop(struct inode *inode);
 struct dquot *dqget(struct super_block *sb, unsigned int id, int type);
 void dqput(struct dquot *dquot);
@@ -139,15 +139,6 @@ extern const struct quotactl_ops vfs_quotactl_ops;
 #define sb_dquot_ops (&dquot_operations)
 #define sb_quotactl_ops (&vfs_quotactl_ops)
 
-/* It is better to call this function outside of any transaction as it might
- * need a lot of space in journal for dquot structure allocation. */
-static inline void vfs_dq_init(struct inode *inode)
-{
-	BUG_ON(!inode->i_sb);
-	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode))
-		inode->i_sb->dq_op->initialize(inode, -1);
-}
-
 /* Cannot be called inside a transaction */
 static inline int vfs_dq_off(struct super_block *sb, int remount)
 {
@@ -207,7 +198,7 @@ static inline int sb_any_quota_active(struct super_block *sb)
 #define sb_dquot_ops				(NULL)
 #define sb_quotactl_ops				(NULL)
 
-static inline void vfs_dq_init(struct inode *inode)
+static inline void dquot_initialize(struct inode *inode)
 {
 }
 
@@ -260,6 +251,8 @@ static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 	return 0;
 }
 
+#define dquot_file_open		generic_file_open
+
 #endif /* CONFIG_QUOTA */
 
 static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr)
@@ -344,6 +337,4 @@ static inline void dquot_release_reservation_block(struct inode *inode,
 	__dquot_free_space(inode, nr << inode->i_blkbits, 1);
 }
 
-#define dquot_file_open		generic_file_open
-
 #endif /* _LINUX_QUOTAOPS_ */
-- 
cgit v1.2.3


From efd8f0e6f6c1faa041f228d7113bd3a9db802d49 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 3 Mar 2010 09:05:08 -0500
Subject: quota: stop using QUOTA_OK / NO_QUOTA

Just use 0 / -EDQUOT directly - that's what it translates to anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c      | 48 ++++++++++++++++++++++++------------------------
 include/linux/quota.h |  3 ---
 2 files changed, 24 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 3c0a7e0dff78..e0b870f4749f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1181,13 +1181,13 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
 	*warntype = QUOTA_NL_NOWARN;
 	if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
 	    test_bit(DQ_FAKE_B, &dquot->dq_flags))
-		return QUOTA_OK;
+		return 0;
 
 	if (dquot->dq_dqb.dqb_ihardlimit &&
 	    newinodes > dquot->dq_dqb.dqb_ihardlimit &&
             !ignore_hardlimit(dquot)) {
 		*warntype = QUOTA_NL_IHARDWARN;
-		return NO_QUOTA;
+		return -EDQUOT;
 	}
 
 	if (dquot->dq_dqb.dqb_isoftlimit &&
@@ -1196,7 +1196,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
 	    get_seconds() >= dquot->dq_dqb.dqb_itime &&
             !ignore_hardlimit(dquot)) {
 		*warntype = QUOTA_NL_ISOFTLONGWARN;
-		return NO_QUOTA;
+		return -EDQUOT;
 	}
 
 	if (dquot->dq_dqb.dqb_isoftlimit &&
@@ -1207,7 +1207,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
 		    sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
 	}
 
-	return QUOTA_OK;
+	return 0;
 }
 
 /* needs dq_data_lock */
@@ -1219,7 +1219,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 	*warntype = QUOTA_NL_NOWARN;
 	if (!sb_has_quota_limits_enabled(sb, dquot->dq_type) ||
 	    test_bit(DQ_FAKE_B, &dquot->dq_flags))
-		return QUOTA_OK;
+		return 0;
 
 	tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace
 		+ space;
@@ -1229,7 +1229,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
             !ignore_hardlimit(dquot)) {
 		if (!prealloc)
 			*warntype = QUOTA_NL_BHARDWARN;
-		return NO_QUOTA;
+		return -EDQUOT;
 	}
 
 	if (dquot->dq_dqb.dqb_bsoftlimit &&
@@ -1239,7 +1239,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
             !ignore_hardlimit(dquot)) {
 		if (!prealloc)
 			*warntype = QUOTA_NL_BSOFTLONGWARN;
-		return NO_QUOTA;
+		return -EDQUOT;
 	}
 
 	if (dquot->dq_dqb.dqb_bsoftlimit &&
@@ -1255,10 +1255,10 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 			 * We don't allow preallocation to exceed softlimit so exceeding will
 			 * be always printed
 			 */
-			return NO_QUOTA;
+			return -EDQUOT;
 	}
 
-	return QUOTA_OK;
+	return 0;
 }
 
 static int info_idq_free(struct dquot *dquot, qsize_t inodes)
@@ -1507,9 +1507,9 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
-		if (check_bdq(inode->i_dquot[cnt], number, !warn, warntype+cnt)
-				== NO_QUOTA) {
-			ret = -EDQUOT;
+		ret = check_bdq(inode->i_dquot[cnt], number, !warn,
+				warntype+cnt);
+		if (ret) {
 			spin_unlock(&dq_data_lock);
 			goto out_flush_warn;
 		}
@@ -1541,7 +1541,7 @@ EXPORT_SYMBOL(__dquot_alloc_space);
  */
 int dquot_alloc_inode(const struct inode *inode)
 {
-	int cnt, ret = -EDQUOT;
+	int cnt, ret = 0;
 	char warntype[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
@@ -1555,8 +1555,8 @@ int dquot_alloc_inode(const struct inode *inode)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
-		if (check_idq(inode->i_dquot[cnt], 1, warntype+cnt)
-		    == NO_QUOTA)
+		ret = check_idq(inode->i_dquot[cnt], 1, warntype + cnt);
+		if (ret)
 			goto warn_put_all;
 	}
 
@@ -1565,7 +1565,7 @@ int dquot_alloc_inode(const struct inode *inode)
 			continue;
 		dquot_incr_inodes(inode->i_dquot[cnt], 1);
 	}
-	ret = 0;
+
 warn_put_all:
 	spin_unlock(&dq_data_lock);
 	if (ret == 0)
@@ -1683,14 +1683,14 @@ static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask
 	qsize_t rsv_space = 0;
 	struct dquot *transfer_from[MAXQUOTAS];
 	struct dquot *transfer_to[MAXQUOTAS];
-	int cnt, ret = QUOTA_OK;
+	int cnt, ret = 0;
 	char warntype_to[MAXQUOTAS];
 	char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
-		return QUOTA_OK;
+		return 0;
 	/* Initialize the arrays */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		transfer_from[cnt] = NULL;
@@ -1715,9 +1715,11 @@ static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask
 		if (!transfer_to[cnt])
 			continue;
 		transfer_from[cnt] = inode->i_dquot[cnt];
-		if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) ==
-		    NO_QUOTA || check_bdq(transfer_to[cnt], space, 0,
-		    warntype_to + cnt) == NO_QUOTA)
+		ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt);
+		if (ret)
+			goto over_quota;
+		ret = check_bdq(transfer_to[cnt], space, 0, warntype_to + cnt);
+		if (ret)
 			goto over_quota;
 	}
 
@@ -1771,7 +1773,6 @@ over_quota:
 	/* Clear dquot pointers we don't want to dqput() */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		transfer_from[cnt] = NULL;
-	ret = NO_QUOTA;
 	goto warn_put_all;
 }
 
@@ -1793,8 +1794,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 	}
 	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
 		dquot_initialize(inode);
-		if (__dquot_transfer(inode, chid, mask) == NO_QUOTA)
-			return -EDQUOT;
+		return __dquot_transfer(inode, chid, mask);
 	}
 	return 0;
 }
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 4aa93554f0eb..b462916b2a0a 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -279,9 +279,6 @@ struct dquot {
 	struct mem_dqblk dq_dqb;	/* Diskquota usage */
 };
 
-#define QUOTA_OK          0
-#define NO_QUOTA          1
-
 /* Operations which must be implemented by each quota format */
 struct quota_format_ops {
 	int (*check_quota_file)(struct super_block *sb, int type);	/* Detect whether file is in our format */
-- 
cgit v1.2.3


From 2b9ddcb8b2ce6a44f0f969000f16b016caa64294 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Thu, 4 Mar 2010 19:46:18 +0100
Subject: ALSA: usb/audio.h: Fix field order

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/audio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index 6bb293684eb8..4d3e450e2b03 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -269,8 +269,8 @@ struct uac_format_type_i_ext_descriptor {
 	__u8 bLength;
 	__u8 bDescriptorType;
 	__u8 bDescriptorSubtype;
-	__u8 bSubslotSize;
 	__u8 bFormatType;
+	__u8 bSubslotSize;
 	__u8 bBitResolution;
 	__u8 bHeaderLength;
 	__u8 bControlSize;
-- 
cgit v1.2.3


From a9185b41a4f84971b930c519f0c63bd450c4810d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 5 Mar 2010 09:21:37 +0100
Subject: pass writeback_control to ->write_inode

This gives the filesystem more information about the writeback that
is happening.  Trond requested this for the NFS unstable write handling,
and other filesystems might benefit from this too by beeing able to
distinguish between the different callers in more detail.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/adfs/adfs.h               |  2 +-
 fs/adfs/inode.c              |  5 +++--
 fs/affs/affs.h               |  3 ++-
 fs/affs/inode.c              |  2 +-
 fs/bfs/inode.c               |  5 +++--
 fs/btrfs/ctree.h             |  2 +-
 fs/btrfs/inode.c             |  4 ++--
 fs/exofs/exofs.h             |  2 +-
 fs/exofs/inode.c             |  4 ++--
 fs/ext2/ext2.h               |  2 +-
 fs/ext2/inode.c              | 11 +++++++++--
 fs/ext3/inode.c              |  4 ++--
 fs/ext4/ext4.h               |  2 +-
 fs/ext4/inode.c              |  6 +++---
 fs/fat/inode.c               |  9 +++++++--
 fs/fs-writeback.c            | 11 +++++------
 fs/gfs2/super.c              |  5 +++--
 fs/hfs/hfs_fs.h              |  2 +-
 fs/hfs/inode.c               |  2 +-
 fs/hfsplus/super.c           |  3 ++-
 fs/jfs/inode.c               |  5 ++++-
 fs/jfs/jfs_inode.h           |  2 +-
 fs/minix/inode.c             |  8 +++++---
 fs/nfs/inode.c               |  5 +++--
 fs/nfs/internal.h            |  2 +-
 fs/ntfs/dir.c                |  2 +-
 fs/ntfs/file.c               |  2 +-
 fs/ntfs/inode.c              |  2 +-
 fs/ntfs/inode.h              |  4 ++--
 fs/ntfs/super.c              |  8 ++++++++
 fs/omfs/inode.c              | 10 ++++++++--
 fs/reiserfs/inode.c          |  4 ++--
 fs/sysv/inode.c              | 10 ++++++++--
 fs/sysv/sysv.h               |  2 +-
 fs/ubifs/dir.c               |  2 +-
 fs/ubifs/file.c              |  8 ++++----
 fs/ubifs/super.c             |  2 +-
 fs/udf/inode.c               |  4 ++--
 fs/udf/udfdecl.h             |  2 +-
 fs/ufs/inode.c               |  5 +++--
 fs/ufs/ufs.h                 |  2 +-
 fs/xfs/linux-2.6/xfs_super.c |  4 ++--
 include/linux/ext3_fs.h      |  2 +-
 include/linux/fs.h           |  2 +-
 include/linux/reiserfs_fs.h  |  2 +-
 45 files changed, 115 insertions(+), 72 deletions(-)

(limited to 'include')

diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 9cc18775b832..2ff622f6f547 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -121,7 +121,7 @@ struct adfs_discmap {
 
 /* Inode stuff */
 struct inode *adfs_iget(struct super_block *sb, struct object_info *obj);
-int adfs_write_inode(struct inode *inode,int unused);
+int adfs_write_inode(struct inode *inode, struct writeback_control *wbc);
 int adfs_notify_change(struct dentry *dentry, struct iattr *attr);
 
 /* map.c */
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 3f57ce4bee5d..0f5e30978135 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -9,6 +9,7 @@
  */
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 #include "adfs.h"
 
 /*
@@ -360,7 +361,7 @@ out:
  * The adfs-specific inode data has already been updated by
  * adfs_notify_change()
  */
-int adfs_write_inode(struct inode *inode, int wait)
+int adfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct super_block *sb = inode->i_sb;
 	struct object_info obj;
@@ -375,7 +376,7 @@ int adfs_write_inode(struct inode *inode, int wait)
 	obj.attr	= ADFS_I(inode)->attr;
 	obj.size	= inode->i_size;
 
-	ret = adfs_dir_update(sb, &obj, wait);
+	ret = adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL);
 	unlock_kernel();
 	return ret;
 }
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 0e40caaba456..861dae68ac12 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -175,7 +175,8 @@ extern void			 affs_delete_inode(struct inode *inode);
 extern void			 affs_clear_inode(struct inode *inode);
 extern struct inode		*affs_iget(struct super_block *sb,
 					unsigned long ino);
-extern int			 affs_write_inode(struct inode *inode, int);
+extern int			 affs_write_inode(struct inode *inode,
+					struct writeback_control *wbc);
 extern int			 affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s32 type);
 
 /* file.c */
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 3c4ec7d864c4..c9744d771d98 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -166,7 +166,7 @@ bad_inode:
 }
 
 int
-affs_write_inode(struct inode *inode, int unused)
+affs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct super_block	*sb = inode->i_sb;
 	struct buffer_head	*bh;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 8f3d9fd89604..f22a7d3dc362 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -15,6 +15,7 @@
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
+#include <linux/writeback.h>
 #include <asm/uaccess.h>
 #include "bfs.h"
 
@@ -98,7 +99,7 @@ error:
 	return ERR_PTR(-EIO);
 }
 
-static int bfs_write_inode(struct inode *inode, int wait)
+static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct bfs_sb_info *info = BFS_SB(inode->i_sb);
 	unsigned int ino = (u16)inode->i_ino;
@@ -147,7 +148,7 @@ static int bfs_write_inode(struct inode *inode, int wait)
 	di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1);
 
 	mark_buffer_dirty(bh);
-	if (wait) {
+	if (wbc->sync_mode == WB_SYNC_ALL) {
 		sync_dirty_buffer(bh);
 		if (buffer_req(bh) && !buffer_uptodate(bh))
 			err = -EIO;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2aa8ec6a0981..8b5cfdd4bfc1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2326,7 +2326,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 int btrfs_readpage(struct file *file, struct page *page);
 void btrfs_delete_inode(struct inode *inode);
 void btrfs_put_inode(struct inode *inode);
-int btrfs_write_inode(struct inode *inode, int wait);
+int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
 void btrfs_dirty_inode(struct inode *inode);
 struct inode *btrfs_alloc_inode(struct super_block *sb);
 void btrfs_destroy_inode(struct inode *inode);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4deb280f8969..c41db6d45ab6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3968,7 +3968,7 @@ err:
 	return ret;
 }
 
-int btrfs_write_inode(struct inode *inode, int wait)
+int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_trans_handle *trans;
@@ -3977,7 +3977,7 @@ int btrfs_write_inode(struct inode *inode, int wait)
 	if (root->fs_info->btree_inode == inode)
 		return 0;
 
-	if (wait) {
+	if (wbc->sync_mode == WB_SYNC_ALL) {
 		trans = btrfs_join_transaction(root, 1);
 		btrfs_set_trans_block_group(trans, inode);
 		ret = btrfs_commit_transaction(trans, root);
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 59b8bf2825c7..8442e353309f 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -261,7 +261,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
 		struct page **pagep, void **fsdata);
 extern struct inode *exofs_iget(struct super_block *, unsigned long);
 struct inode *exofs_new_inode(struct inode *, int);
-extern int exofs_write_inode(struct inode *, int);
+extern int exofs_write_inode(struct inode *, struct writeback_control *wbc);
 extern void exofs_delete_inode(struct inode *);
 
 /* dir.c:                */
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 5514f3c2c2f4..a17e4b733e35 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1280,9 +1280,9 @@ out:
 	return ret;
 }
 
-int exofs_write_inode(struct inode *inode, int wait)
+int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	return exofs_update_inode(inode, wait);
+	return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
 }
 
 /*
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 061914add3cf..0b038e47ad2f 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -118,7 +118,7 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
 
 /* inode.c */
 extern struct inode *ext2_iget (struct super_block *, unsigned long);
-extern int ext2_write_inode (struct inode *, int);
+extern int ext2_write_inode (struct inode *, struct writeback_control *);
 extern void ext2_delete_inode (struct inode *);
 extern int ext2_sync_inode (struct inode *);
 extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 71b032c65a02..36ae1cac767c 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -41,6 +41,8 @@ MODULE_AUTHOR("Remy Card and others");
 MODULE_DESCRIPTION("Second Extended Filesystem");
 MODULE_LICENSE("GPL");
 
+static int __ext2_write_inode(struct inode *inode, int do_sync);
+
 /*
  * Test whether an inode is a fast symlink.
  */
@@ -64,7 +66,7 @@ void ext2_delete_inode (struct inode * inode)
 		goto no_delete;
 	EXT2_I(inode)->i_dtime	= get_seconds();
 	mark_inode_dirty(inode);
-	ext2_write_inode(inode, inode_needs_sync(inode));
+	__ext2_write_inode(inode, inode_needs_sync(inode));
 
 	inode->i_size = 0;
 	if (inode->i_blocks)
@@ -1335,7 +1337,7 @@ bad_inode:
 	return ERR_PTR(ret);
 }
 
-int ext2_write_inode(struct inode *inode, int do_sync)
+static int __ext2_write_inode(struct inode *inode, int do_sync)
 {
 	struct ext2_inode_info *ei = EXT2_I(inode);
 	struct super_block *sb = inode->i_sb;
@@ -1440,6 +1442,11 @@ int ext2_write_inode(struct inode *inode, int do_sync)
 	return err;
 }
 
+int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
+
 int ext2_sync_inode(struct inode *inode)
 {
 	struct writeback_control wbc = {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 455e6e6e5cb9..7aca55fcc976 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3096,7 +3096,7 @@ out_brelse:
  * `stuff()' is running, and the new i_size will be lost.  Plus the inode
  * will no longer be on the superblock's dirty inode list.
  */
-int ext3_write_inode(struct inode *inode, int wait)
+int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	if (current->flags & PF_MEMALLOC)
 		return 0;
@@ -3107,7 +3107,7 @@ int ext3_write_inode(struct inode *inode, int wait)
 		return -EIO;
 	}
 
-	if (!wait)
+	if (wbc->sync_mode != WB_SYNC_ALL)
 		return 0;
 
 	return ext3_force_commit(inode->i_sb);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4cedc91ec59d..50af1a2c65e7 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1416,7 +1416,7 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
 				struct buffer_head *bh_result, int create);
 
 extern struct inode *ext4_iget(struct super_block *, unsigned long);
-extern int  ext4_write_inode(struct inode *, int);
+extern int  ext4_write_inode(struct inode *, struct writeback_control *);
 extern int  ext4_setattr(struct dentry *, struct iattr *);
 extern int  ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
 				struct kstat *stat);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e11952404e02..d01a6cdbf854 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5177,7 +5177,7 @@ out_brelse:
  * `stuff()' is running, and the new i_size will be lost.  Plus the inode
  * will no longer be on the superblock's dirty inode list.
  */
-int ext4_write_inode(struct inode *inode, int wait)
+int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int err;
 
@@ -5191,7 +5191,7 @@ int ext4_write_inode(struct inode *inode, int wait)
 			return -EIO;
 		}
 
-		if (!wait)
+		if (wbc->sync_mode != WB_SYNC_ALL)
 			return 0;
 
 		err = ext4_force_commit(inode->i_sb);
@@ -5201,7 +5201,7 @@ int ext4_write_inode(struct inode *inode, int wait)
 		err = ext4_get_inode_loc(inode, &iloc);
 		if (err)
 			return err;
-		if (wait)
+		if (wbc->sync_mode == WB_SYNC_ALL)
 			sync_dirty_buffer(iloc.bh);
 		if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
 			ext4_error(inode->i_sb, __func__,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 14da530b05ca..fbeecdc194dc 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -577,7 +577,7 @@ static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi,
 	return i_pos;
 }
 
-static int fat_write_inode(struct inode *inode, int wait)
+static int __fat_write_inode(struct inode *inode, int wait)
 {
 	struct super_block *sb = inode->i_sb;
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
@@ -634,9 +634,14 @@ retry:
 	return err;
 }
 
+static int fat_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return __fat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
+
 int fat_sync_inode(struct inode *inode)
 {
-	return fat_write_inode(inode, 1);
+	return __fat_write_inode(inode, 1);
 }
 
 EXPORT_SYMBOL_GPL(fat_sync_inode);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5f2721b1e4be..76fc4d594acb 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -381,10 +381,10 @@ static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
 	move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
 }
 
-static int write_inode(struct inode *inode, int sync)
+static int write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
-		return inode->i_sb->s_op->write_inode(inode, sync);
+		return inode->i_sb->s_op->write_inode(inode, wbc);
 	return 0;
 }
 
@@ -421,7 +421,6 @@ static int
 writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct address_space *mapping = inode->i_mapping;
-	int wait = wbc->sync_mode == WB_SYNC_ALL;
 	unsigned dirty;
 	int ret;
 
@@ -439,7 +438,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 		 * We'll have another go at writing back this inode when we
 		 * completed a full scan of b_io.
 		 */
-		if (!wait) {
+		if (wbc->sync_mode != WB_SYNC_ALL) {
 			requeue_io(inode);
 			return 0;
 		}
@@ -466,7 +465,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	 * This is important for filesystems that modify metadata on data
 	 * I/O completion.
 	 */
-	if (wait) {
+	if (wbc->sync_mode == WB_SYNC_ALL) {
 		int err = filemap_fdatawait(mapping);
 		if (ret == 0)
 			ret = err;
@@ -474,7 +473,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 
 	/* Don't write the inode if only I_DIRTY_PAGES was set */
 	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
-		int err = write_inode(inode, wait);
+		int err = write_inode(inode, wbc);
 		if (ret == 0)
 			ret = err;
 	}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index e5e22629da67..ca87598ead7f 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -22,6 +22,7 @@
 #include <linux/crc32.h>
 #include <linux/time.h>
 #include <linux/wait.h>
+#include <linux/writeback.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -711,7 +712,7 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
  * Returns: errno
  */
 
-static int gfs2_write_inode(struct inode *inode, int sync)
+static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -745,7 +746,7 @@ static int gfs2_write_inode(struct inode *inode, int sync)
 do_unlock:
 	gfs2_glock_dq_uninit(&gh);
 do_flush:
-	if (sync != 0)
+	if (wbc->sync_mode == WB_SYNC_ALL)
 		gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
 	return ret;
 }
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 052387e11671..fe35e3b626c4 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -188,7 +188,7 @@ extern const struct address_space_operations hfs_btree_aops;
 
 extern struct inode *hfs_new_inode(struct inode *, struct qstr *, int);
 extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *);
-extern int hfs_write_inode(struct inode *, int);
+extern int hfs_write_inode(struct inode *, struct writeback_control *);
 extern int hfs_inode_setattr(struct dentry *, struct iattr *);
 extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
 			__be32 log_size, __be32 phys_size, u32 clump_size);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index a1cbff2b4d99..14f5cb1b9fdc 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -381,7 +381,7 @@ void hfs_inode_write_fork(struct inode *inode, struct hfs_extent *ext,
 					 HFS_SB(inode->i_sb)->alloc_blksz);
 }
 
-int hfs_write_inode(struct inode *inode, int unused)
+int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct inode *main_inode = inode;
 	struct hfs_find_data fd;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 43022f3d5148..74b473a8ef92 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -87,7 +87,8 @@ bad_inode:
 	return ERR_PTR(err);
 }
 
-static int hfsplus_write_inode(struct inode *inode, int unused)
+static int hfsplus_write_inode(struct inode *inode,
+		struct writeback_control *wbc)
 {
 	struct hfsplus_vh *vhdr;
 	int ret = 0;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index b2ae190a77ba..182b78cc3e62 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -22,6 +22,7 @@
 #include <linux/buffer_head.h>
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
+#include <linux/writeback.h>
 #include "jfs_incore.h"
 #include "jfs_inode.h"
 #include "jfs_filsys.h"
@@ -120,8 +121,10 @@ int jfs_commit_inode(struct inode *inode, int wait)
 	return rc;
 }
 
-int jfs_write_inode(struct inode *inode, int wait)
+int jfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
+	int wait = wbc->sync_mode == WB_SYNC_ALL;
+
 	if (test_cflag(COMMIT_Nolink, inode))
 		return 0;
 	/*
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 1eff7db34d63..15902b03c2a7 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -26,7 +26,7 @@ extern long jfs_ioctl(struct file *, unsigned int, unsigned long);
 extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long);
 extern struct inode *jfs_iget(struct super_block *, unsigned long);
 extern int jfs_commit_inode(struct inode *, int);
-extern int jfs_write_inode(struct inode*, int);
+extern int jfs_write_inode(struct inode *, struct writeback_control *);
 extern void jfs_delete_inode(struct inode *);
 extern void jfs_dirty_inode(struct inode *);
 extern void jfs_truncate(struct inode *);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 74ea82d72164..756f8c93780c 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -17,8 +17,10 @@
 #include <linux/init.h>
 #include <linux/highuid.h>
 #include <linux/vfs.h>
+#include <linux/writeback.h>
 
-static int minix_write_inode(struct inode * inode, int wait);
+static int minix_write_inode(struct inode *inode,
+		struct writeback_control *wbc);
 static int minix_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int minix_remount (struct super_block * sb, int * flags, char * data);
 
@@ -552,7 +554,7 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode)
 	return bh;
 }
 
-static int minix_write_inode(struct inode *inode, int wait)
+static int minix_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int err = 0;
 	struct buffer_head *bh;
@@ -563,7 +565,7 @@ static int minix_write_inode(struct inode *inode, int wait)
 		bh = V2_minix_update_inode(inode);
 	if (!bh)
 		return -EIO;
-	if (wait && buffer_dirty(bh)) {
+	if (wbc->sync_mode == WB_SYNC_ALL && buffer_dirty(bh)) {
 		sync_dirty_buffer(bh);
 		if (buffer_req(bh) && !buffer_uptodate(bh)) {
 			printk("IO error syncing minix inode [%s:%08lx]\n",
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5ecd952cae1d..7f9ecc46f3fb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -97,11 +97,12 @@ u64 nfs_compat_user_ino64(u64 fileid)
 	return ino;
 }
 
-int nfs_write_inode(struct inode *inode, int sync)
+int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int ret;
 
-	ret = nfs_commit_inode(inode, sync ? FLUSH_SYNC : 0);
+	ret = nfs_commit_inode(inode,
+			wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
 	if (ret >= 0)
 		return 0;
 	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 29e464d23b32..11f82f03c5de 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -211,7 +211,7 @@ extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
 extern struct workqueue_struct *nfsiod_workqueue;
 extern struct inode *nfs_alloc_inode(struct super_block *sb);
 extern void nfs_destroy_inode(struct inode *);
-extern int nfs_write_inode(struct inode *,int);
+extern int nfs_write_inode(struct inode *, struct writeback_control *);
 extern void nfs_clear_inode(struct inode *);
 #ifdef CONFIG_NFS_V4
 extern void nfs4_clear_inode(struct inode *);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 5a9e34475e37..9173e82a45d1 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1545,7 +1545,7 @@ static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry,
  		write_inode_now(bmp_vi, !datasync);
 		iput(bmp_vi);
 	}
-	ret = ntfs_write_inode(vi, 1);
+	ret = __ntfs_write_inode(vi, 1);
 	write_inode_now(vi, !datasync);
 	err = sync_blockdev(vi->i_sb->s_bdev);
 	if (unlikely(err && !ret))
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 43179ddd336f..b681c71d7069 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2182,7 +2182,7 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry,
 	ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
 	BUG_ON(S_ISDIR(vi->i_mode));
 	if (!datasync || !NInoNonResident(NTFS_I(vi)))
-		ret = ntfs_write_inode(vi, 1);
+		ret = __ntfs_write_inode(vi, 1);
 	write_inode_now(vi, !datasync);
 	/*
 	 * NOTE: If we were to use mapping->private_list (see ext2 and
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index dc2505abb6d7..4b57fb1eac2a 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2957,7 +2957,7 @@ out:
  *
  * Return 0 on success and -errno on error.
  */
-int ntfs_write_inode(struct inode *vi, int sync)
+int __ntfs_write_inode(struct inode *vi, int sync)
 {
 	sle64 nt;
 	ntfs_inode *ni = NTFS_I(vi);
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 117eaf8032a3..9a113544605d 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -307,12 +307,12 @@ extern void ntfs_truncate_vfs(struct inode *vi);
 
 extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr);
 
-extern int ntfs_write_inode(struct inode *vi, int sync);
+extern int __ntfs_write_inode(struct inode *vi, int sync);
 
 static inline void ntfs_commit_inode(struct inode *vi)
 {
 	if (!is_bad_inode(vi))
-		ntfs_write_inode(vi, 1);
+		__ntfs_write_inode(vi, 1);
 	return;
 }
 
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 80b04770e8e9..1cf39dfaee7a 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -39,6 +39,7 @@
 #include "dir.h"
 #include "debug.h"
 #include "index.h"
+#include "inode.h"
 #include "aops.h"
 #include "layout.h"
 #include "malloc.h"
@@ -2662,6 +2663,13 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs)
 	return 0;
 }
 
+#ifdef NTFS_RW
+static int ntfs_write_inode(struct inode *vi, struct writeback_control *wbc)
+{
+	return __ntfs_write_inode(vi, wbc->sync_mode == WB_SYNC_ALL);
+}
+#endif
+
 /**
  * The complete super operations.
  */
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index f3b7c1541f3a..75d9b5ba1d45 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -11,6 +11,7 @@
 #include <linux/parser.h>
 #include <linux/buffer_head.h>
 #include <linux/vmalloc.h>
+#include <linux/writeback.h>
 #include <linux/crc-itu-t.h>
 #include "omfs.h"
 
@@ -89,7 +90,7 @@ static void omfs_update_checksums(struct omfs_inode *oi)
 	oi->i_head.h_check_xor = xor;
 }
 
-static int omfs_write_inode(struct inode *inode, int wait)
+static int __omfs_write_inode(struct inode *inode, int wait)
 {
 	struct omfs_inode *oi;
 	struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
@@ -162,9 +163,14 @@ out:
 	return ret;
 }
 
+static int omfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return __omfs_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
+
 int omfs_sync_inode(struct inode *inode)
 {
-	return omfs_write_inode(inode, 1);
+	return __omfs_write_inode(inode, 1);
 }
 
 /*
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 2df0f5c7c60b..0d651f980a8d 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1615,7 +1615,7 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
 ** to properly mark inodes for datasync and such, but only actually
 ** does something when called for a synchronous update.
 */
-int reiserfs_write_inode(struct inode *inode, int do_sync)
+int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct reiserfs_transaction_handle th;
 	int jbegin_count = 1;
@@ -1627,7 +1627,7 @@ int reiserfs_write_inode(struct inode *inode, int do_sync)
 	 ** inode needs to reach disk for safety, and they can safely be
 	 ** ignored because the altered inode has already been logged.
 	 */
-	if (do_sync && !(current->flags & PF_MEMALLOC)) {
+	if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) {
 		reiserfs_write_lock(inode->i_sb);
 		if (!journal_begin(&th, inode->i_sb, jbegin_count)) {
 			reiserfs_update_sd(&th, inode);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 9824743832a7..4573734d723d 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
+#include <linux/writeback.h>
 #include <linux/namei.h>
 #include <asm/byteorder.h>
 #include "sysv.h"
@@ -246,7 +247,7 @@ bad_inode:
 	return ERR_PTR(-EIO);
 }
 
-int sysv_write_inode(struct inode *inode, int wait)
+static int __sysv_write_inode(struct inode *inode, int wait)
 {
 	struct super_block * sb = inode->i_sb;
 	struct sysv_sb_info * sbi = SYSV_SB(sb);
@@ -296,9 +297,14 @@ int sysv_write_inode(struct inode *inode, int wait)
 	return 0;
 }
 
+int sysv_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return __sysv_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
+}
+
 int sysv_sync_inode(struct inode *inode)
 {
-	return sysv_write_inode(inode, 1);
+	return __sysv_write_inode(inode, 1);
 }
 
 static void sysv_delete_inode(struct inode *inode)
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 53786eb5cf60..94cb9b4d76c2 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -142,7 +142,7 @@ extern int __sysv_write_begin(struct file *file, struct address_space *mapping,
 
 /* inode.c */
 extern struct inode *sysv_iget(struct super_block *, unsigned int);
-extern int sysv_write_inode(struct inode *, int);
+extern int sysv_write_inode(struct inode *, struct writeback_control *wbc);
 extern int sysv_sync_inode(struct inode *);
 extern void sysv_set_inode(struct inode *, dev_t);
 extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *);
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 552fb0111fff..401e503d44a1 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -1120,7 +1120,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (release)
 		ubifs_release_budget(c, &ino_req);
 	if (IS_SYNC(old_inode))
-		err = old_inode->i_sb->s_op->write_inode(old_inode, 1);
+		err = old_inode->i_sb->s_op->write_inode(old_inode, NULL);
 	return err;
 
 out_cancel:
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 16a6444330ec..e26c02ab6cd5 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1011,7 +1011,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
 	/* Is the page fully inside @i_size? */
 	if (page->index < end_index) {
 		if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) {
-			err = inode->i_sb->s_op->write_inode(inode, 1);
+			err = inode->i_sb->s_op->write_inode(inode, NULL);
 			if (err)
 				goto out_unlock;
 			/*
@@ -1039,7 +1039,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
 	kunmap_atomic(kaddr, KM_USER0);
 
 	if (i_size > synced_i_size) {
-		err = inode->i_sb->s_op->write_inode(inode, 1);
+		err = inode->i_sb->s_op->write_inode(inode, NULL);
 		if (err)
 			goto out_unlock;
 	}
@@ -1242,7 +1242,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
 	if (release)
 		ubifs_release_budget(c, &req);
 	if (IS_SYNC(inode))
-		err = inode->i_sb->s_op->write_inode(inode, 1);
+		err = inode->i_sb->s_op->write_inode(inode, NULL);
 	return err;
 
 out:
@@ -1316,7 +1316,7 @@ int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync)
 	 * the inode unless this is a 'datasync()' call.
 	 */
 	if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
-		err = inode->i_sb->s_op->write_inode(inode, 1);
+		err = inode->i_sb->s_op->write_inode(inode, NULL);
 		if (err)
 			return err;
 	}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 43f9d19a6f33..4d2f2157dd3f 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -283,7 +283,7 @@ static void ubifs_destroy_inode(struct inode *inode)
 /*
  * Note, Linux write-back code calls this without 'i_mutex'.
  */
-static int ubifs_write_inode(struct inode *inode, int wait)
+static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int err = 0;
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 378a7592257c..b02089247296 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1373,12 +1373,12 @@ static mode_t udf_convert_permissions(struct fileEntry *fe)
 	return mode;
 }
 
-int udf_write_inode(struct inode *inode, int sync)
+int udf_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int ret;
 
 	lock_kernel();
-	ret = udf_update_inode(inode, sync);
+	ret = udf_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
 	unlock_kernel();
 
 	return ret;
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 8d46f4294ee7..4223ac855da9 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -142,7 +142,7 @@ extern void udf_truncate(struct inode *);
 extern void udf_read_inode(struct inode *);
 extern void udf_delete_inode(struct inode *);
 extern void udf_clear_inode(struct inode *);
-extern int udf_write_inode(struct inode *, int);
+extern int udf_write_inode(struct inode *, struct writeback_control *wbc);
 extern long udf_block_map(struct inode *, sector_t);
 extern int udf_extend_file(struct inode *, struct extent_position *,
 			   struct kernel_long_ad *, sector_t);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 7cf33379fd46..0a627e08610b 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -36,6 +36,7 @@
 #include <linux/mm.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 
 #include "ufs_fs.h"
 #include "ufs.h"
@@ -890,11 +891,11 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
 	return 0;
 }
 
-int ufs_write_inode (struct inode * inode, int wait)
+int ufs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	int ret;
 	lock_kernel();
-	ret = ufs_update_inode (inode, wait);
+	ret = ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
 	unlock_kernel();
 	return ret;
 }
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 01d0e2a3b230..43f9f5d5670e 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -106,7 +106,7 @@ extern struct inode * ufs_new_inode (struct inode *, int);
 
 /* inode.c */
 extern struct inode *ufs_iget(struct super_block *, unsigned long);
-extern int ufs_write_inode (struct inode *, int);
+extern int ufs_write_inode (struct inode *, struct writeback_control *);
 extern int ufs_sync_inode (struct inode *);
 extern void ufs_delete_inode (struct inode *);
 extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 8f117db6070e..71345a370d9f 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1063,7 +1063,7 @@ xfs_log_inode(
 STATIC int
 xfs_fs_write_inode(
 	struct inode		*inode,
-	int			sync)
+	struct writeback_control *wbc)
 {
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
@@ -1074,7 +1074,7 @@ xfs_fs_write_inode(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
-	if (sync) {
+	if (wbc->sync_mode == WB_SYNC_ALL) {
 		/*
 		 * Make sure the inode has hit stable storage.  By using the
 		 * log and the fsync transactions we reduce the IOs we have
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 6b049030fbe6..deac2566450e 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -877,7 +877,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 	int create);
 
 extern struct inode *ext3_iget(struct super_block *, unsigned long);
-extern int  ext3_write_inode (struct inode *, int);
+extern int  ext3_write_inode (struct inode *, struct writeback_control *);
 extern int  ext3_setattr (struct dentry *, struct iattr *);
 extern void ext3_delete_inode (struct inode *);
 extern int  ext3_sync_inode (handle_t *, struct inode *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5b3182c7eb5f..45689621a851 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1557,7 +1557,7 @@ struct super_operations {
 	void (*destroy_inode)(struct inode *);
 
    	void (*dirty_inode) (struct inode *);
-	int (*write_inode) (struct inode *, int);
+	int (*write_inode) (struct inode *, struct writeback_control *wbc);
 	void (*drop_inode) (struct inode *);
 	void (*delete_inode) (struct inode *);
 	void (*put_super) (struct super_block *);
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 1ba3cf6edfbb..3b603f474186 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -2034,7 +2034,7 @@ void reiserfs_read_locked_inode(struct inode *inode,
 int reiserfs_find_actor(struct inode *inode, void *p);
 int reiserfs_init_locked_inode(struct inode *inode, void *p);
 void reiserfs_delete_inode(struct inode *inode);
-int reiserfs_write_inode(struct inode *inode, int);
+int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc);
 int reiserfs_get_block(struct inode *inode, sector_t block,
 		       struct buffer_head *bh_result, int create);
 struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
-- 
cgit v1.2.3


From 8fc795f703c5138e1a8bfb88c69f52632031aa6a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Feb 2010 16:46:56 -0800
Subject: NFS: Cleanup - move nfs_write_inode() into fs/nfs/write.c

The sole purpose of nfs_write_inode is to commit unstable writes, so
move it into fs/nfs/write.c, and make nfs_commit_inode static.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c         | 12 ------------
 fs/nfs/write.c         | 24 +++++++++++++++++++++++-
 include/linux/nfs_fs.h |  7 -------
 3 files changed, 23 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7f9ecc46f3fb..89e98312599d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -97,18 +97,6 @@ u64 nfs_compat_user_ino64(u64 fileid)
 	return ino;
 }
 
-int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
-{
-	int ret;
-
-	ret = nfs_commit_inode(inode,
-			wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
-	if (ret >= 0)
-		return 0;
-	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
-	return ret;
-}
-
 void nfs_clear_inode(struct inode *inode)
 {
 	/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d63d964a0392..09e97097baaa 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1391,7 +1391,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
 	.rpc_release = nfs_commit_release,
 };
 
-int nfs_commit_inode(struct inode *inode, int how)
+static int nfs_commit_inode(struct inode *inode, int how)
 {
 	LIST_HEAD(head);
 	int res;
@@ -1406,13 +1406,35 @@ int nfs_commit_inode(struct inode *inode, int how)
 	}
 	return res;
 }
+
+static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
+{
+	int ret;
+
+	ret = nfs_commit_inode(inode,
+			wbc->sync_mode == WB_SYNC_ALL ? FLUSH_SYNC : 0);
+	if (ret >= 0)
+		return 0;
+	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+	return ret;
+}
 #else
 static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 {
 	return 0;
 }
+
+static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
+{
+	return 0;
+}
 #endif
 
+int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+	return nfs_commit_unstable_pages(inode, wbc);
+}
+
 long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
 {
 	struct inode *inode = mapping->host;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d09db1bc9083..384ea3ef2863 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -483,15 +483,8 @@ extern int nfs_wb_nocommit(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-extern int  nfs_commit_inode(struct inode *, int);
 extern struct nfs_write_data *nfs_commitdata_alloc(void);
 extern void nfs_commit_free(struct nfs_write_data *wdata);
-#else
-static inline int
-nfs_commit_inode(struct inode *inode, int how)
-{
-	return 0;
-}
 #endif
 
 static inline int
-- 
cgit v1.2.3


From ff778d02bf867e1733a09b34ad6dbb723b024814 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Feb 2010 16:53:39 -0800
Subject: NFS: Add a count of the number of unstable writes carried by an inode

In order to know when we should do opportunistic commits of the unstable
writes, when the VM is doing a background flush, we add a field to count
the number of unstable writes.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c         |  1 +
 fs/nfs/write.c         | 14 ++++++++++----
 include/linux/nfs_fs.h |  1 +
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 89e98312599d..aa5a831001ab 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1404,6 +1404,7 @@ static void init_once(void *foo)
 	INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
 	INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
 	nfsi->npages = 0;
+	nfsi->ncommit = 0;
 	atomic_set(&nfsi->silly_count, 1);
 	INIT_HLIST_HEAD(&nfsi->silly_list);
 	init_waitqueue_head(&nfsi->waitqueue);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 09e97097baaa..dc08a6fbde67 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -438,6 +438,7 @@ nfs_mark_request_commit(struct nfs_page *req)
 	radix_tree_tag_set(&nfsi->nfs_page_tree,
 			req->wb_index,
 			NFS_PAGE_TAG_COMMIT);
+	nfsi->ncommit++;
 	spin_unlock(&inode->i_lock);
 	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 	inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
@@ -573,11 +574,15 @@ static int
 nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
+	int ret;
 
 	if (!nfs_need_commit(nfsi))
 		return 0;
 
-	return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
+	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
+	if (ret > 0)
+		nfsi->ncommit -= ret;
+	return ret;
 }
 #else
 static inline int nfs_need_commit(struct nfs_inode *nfsi)
@@ -642,9 +647,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
 		spin_lock(&inode->i_lock);
 	}
 
-	if (nfs_clear_request_commit(req))
-		radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
-				req->wb_index, NFS_PAGE_TAG_COMMIT);
+	if (nfs_clear_request_commit(req) &&
+			radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
+				req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL)
+		NFS_I(inode)->ncommit--;
 
 	/* Okay, the request matches. Update the region */
 	if (offset < req->wb_offset) {
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 384ea3ef2863..309217f46e28 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -166,6 +166,7 @@ struct nfs_inode {
 	struct radix_tree_root	nfs_page_tree;
 
 	unsigned long		npages;
+	unsigned long		ncommit;
 
 	/* Open contexts for shared mmap writes */
 	struct list_head	open_files;
-- 
cgit v1.2.3


From c988950eb6dd6f8e6d98503ca094622729e9aa13 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Feb 2010 17:03:21 -0800
Subject: NFS: Simplify nfs_wb_page_cancel()

In all cases we should be able to just remove the request and call
cancel_dirty_page().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c         | 39 +--------------------------------------
 include/linux/nfs_fs.h |  2 --
 2 files changed, 1 insertion(+), 40 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e40e949598fd..dc7f5e9a23b4 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -540,19 +540,6 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u
 	return res;
 }
 
-static void nfs_cancel_commit_list(struct list_head *head)
-{
-	struct nfs_page *req;
-
-	while(!list_empty(head)) {
-		req = nfs_list_entry(head->next);
-		nfs_list_remove_request(req);
-		nfs_clear_request_commit(req);
-		nfs_inode_remove_request(req);
-		nfs_unlock_request(req);
-	}
-}
-
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 static int
 nfs_need_commit(struct nfs_inode *nfsi)
@@ -1495,13 +1482,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 		pages = nfs_scan_commit(inode, &head, idx_start, npages);
 		if (pages == 0)
 			break;
-		if (how & FLUSH_INVALIDATE) {
-			spin_unlock(&inode->i_lock);
-			nfs_cancel_commit_list(&head);
-			ret = pages;
-			spin_lock(&inode->i_lock);
-			continue;
-		}
 		pages += nfs_scan_commit(inode, &head, 0, 0);
 		spin_unlock(&inode->i_lock);
 		ret = nfs_commit_list(inode, &head, how);
@@ -1558,26 +1538,13 @@ int nfs_wb_nocommit(struct inode *inode)
 int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 {
 	struct nfs_page *req;
-	loff_t range_start = page_offset(page);
-	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
-	struct writeback_control wbc = {
-		.bdi = page->mapping->backing_dev_info,
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = LONG_MAX,
-		.range_start = range_start,
-		.range_end = range_end,
-	};
 	int ret = 0;
 
 	BUG_ON(!PageLocked(page));
 	for (;;) {
 		req = nfs_page_find_request(page);
 		if (req == NULL)
-			goto out;
-		if (test_bit(PG_CLEAN, &req->wb_flags)) {
-			nfs_release_request(req);
 			break;
-		}
 		if (nfs_lock_request_dontget(req)) {
 			nfs_inode_remove_request(req);
 			/*
@@ -1591,12 +1558,8 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 		ret = nfs_wait_on_request(req);
 		nfs_release_request(req);
 		if (ret < 0)
-			goto out;
+			break;
 	}
-	if (!PagePrivate(page))
-		return 0;
-	ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE);
-out:
 	return ret;
 }
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 309217f46e28..1083134c02ff 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -34,8 +34,6 @@
 #define FLUSH_LOWPRI		8	/* low priority background flush */
 #define FLUSH_HIGHPRI		16	/* high priority memory reclaim flush */
 #define FLUSH_NOCOMMIT		32	/* Don't send the NFSv3/v4 COMMIT */
-#define FLUSH_INVALIDATE	64	/* Invalidate the page cache */
-#define FLUSH_NOWRITEPAGE	128	/* Don't call writepage() */
 
 #ifdef __KERNEL__
 
-- 
cgit v1.2.3


From acdc53b2146c7ee67feb1f02f7bc3020126514b8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Feb 2010 17:03:26 -0800
Subject: NFS: Replace __nfs_write_mapping with sync_inode()

Now that we have correct COMMIT semantics in writeback_single_inode, we can
reduce and simplify nfs_wb_all(). Also replace nfs_wb_nocommit() with a
call to filemap_write_and_wait(), which doesn't need to hold the
inode->i_mutex.

With that done, we can eliminate nfs_write_mapping() altogether.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c         | 15 +++++----------
 fs/nfs/write.c         | 42 +++++-------------------------------------
 include/linux/nfs_fs.h |  2 --
 3 files changed, 10 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index aa5a831001ab..443772df9b17 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -495,17 +495,11 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
 	int err;
 
-	/*
-	 * Flush out writes to the server in order to update c/mtime.
-	 *
-	 * Hold the i_mutex to suspend application writes temporarily;
-	 * this prevents long-running writing applications from blocking
-	 * nfs_wb_nocommit.
-	 */
+	/* Flush out writes to the server in order to update c/mtime.  */
 	if (S_ISREG(inode->i_mode)) {
-		mutex_lock(&inode->i_mutex);
-		nfs_wb_nocommit(inode);
-		mutex_unlock(&inode->i_mutex);
+		err = filemap_write_and_wait(inode->i_mapping);
+		if (err)
+			goto out;
 	}
 
 	/*
@@ -529,6 +523,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 		generic_fillattr(inode, stat);
 		stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
 	}
+out:
 	return err;
 }
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index dc7f5e9a23b4..0b323091b481 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1454,7 +1454,6 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 	pgoff_t idx_start, idx_end;
 	unsigned int npages = 0;
 	LIST_HEAD(head);
-	int nocommit = how & FLUSH_NOCOMMIT;
 	long pages, ret;
 
 	/* FIXME */
@@ -1471,14 +1470,11 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 				npages = 0;
 		}
 	}
-	how &= ~FLUSH_NOCOMMIT;
 	spin_lock(&inode->i_lock);
 	do {
 		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
 		if (ret != 0)
 			continue;
-		if (nocommit)
-			break;
 		pages = nfs_scan_commit(inode, &head, idx_start, npages);
 		if (pages == 0)
 			break;
@@ -1492,47 +1488,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 	return ret;
 }
 
-static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
-{
-	int ret;
-
-	ret = nfs_writepages(mapping, wbc);
-	if (ret < 0)
-		goto out;
-	ret = nfs_sync_mapping_wait(mapping, wbc, how);
-	if (ret < 0)
-		goto out;
-	return 0;
-out:
-	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-	return ret;
-}
-
-/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */
-static int nfs_write_mapping(struct address_space *mapping, int how)
+/*
+ * flush the inode to disk.
+ */
+int nfs_wb_all(struct inode *inode)
 {
 	struct writeback_control wbc = {
-		.bdi = mapping->backing_dev_info,
 		.sync_mode = WB_SYNC_ALL,
 		.nr_to_write = LONG_MAX,
 		.range_start = 0,
 		.range_end = LLONG_MAX,
 	};
 
-	return __nfs_write_mapping(mapping, &wbc, how);
-}
-
-/*
- * flush the inode to disk.
- */
-int nfs_wb_all(struct inode *inode)
-{
-	return nfs_write_mapping(inode->i_mapping, 0);
-}
-
-int nfs_wb_nocommit(struct inode *inode)
-{
-	return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT);
+	return sync_inode(inode, &wbc);
 }
 
 int nfs_wb_page_cancel(struct inode *inode, struct page *page)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1083134c02ff..93f439e7c5bf 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -33,7 +33,6 @@
 #define FLUSH_STABLE		4	/* commit to stable storage */
 #define FLUSH_LOWPRI		8	/* low priority background flush */
 #define FLUSH_HIGHPRI		16	/* high priority memory reclaim flush */
-#define FLUSH_NOCOMMIT		32	/* Don't send the NFSv3/v4 COMMIT */
 
 #ifdef __KERNEL__
 
@@ -478,7 +477,6 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
  */
 extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
 extern int nfs_wb_all(struct inode *inode);
-extern int nfs_wb_nocommit(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-- 
cgit v1.2.3


From 7f2f12d963e7c33a93bfb0b22f0178eb1e6a4196 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Feb 2010 17:03:28 -0800
Subject: NFS: Simplify nfs_wb_page()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c         | 120 ++++++++++---------------------------------------
 include/linux/nfs_fs.h |   1 -
 2 files changed, 23 insertions(+), 98 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0b323091b481..53ff70e23993 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -502,44 +502,6 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
 }
 #endif
 
-/*
- * Wait for a request to complete.
- *
- * Interruptible by fatal signals only.
- */
-static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_page *req;
-	pgoff_t idx_end, next;
-	unsigned int		res = 0;
-	int			error;
-
-	if (npages == 0)
-		idx_end = ~0;
-	else
-		idx_end = idx_start + npages - 1;
-
-	next = idx_start;
-	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
-		if (req->wb_index > idx_end)
-			break;
-
-		next = req->wb_index + 1;
-		BUG_ON(!NFS_WBACK_BUSY(req));
-
-		kref_get(&req->wb_kref);
-		spin_unlock(&inode->i_lock);
-		error = nfs_wait_on_request(req);
-		nfs_release_request(req);
-		spin_lock(&inode->i_lock);
-		if (error < 0)
-			return error;
-		res++;
-	}
-	return res;
-}
-
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 static int
 nfs_need_commit(struct nfs_inode *nfsi)
@@ -1432,7 +1394,7 @@ out_mark_dirty:
 	return ret;
 }
 #else
-static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
+static int nfs_commit_inode(struct inode *inode, int how)
 {
 	return 0;
 }
@@ -1448,46 +1410,6 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return nfs_commit_unstable_pages(inode, wbc);
 }
 
-long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
-{
-	struct inode *inode = mapping->host;
-	pgoff_t idx_start, idx_end;
-	unsigned int npages = 0;
-	LIST_HEAD(head);
-	long pages, ret;
-
-	/* FIXME */
-	if (wbc->range_cyclic)
-		idx_start = 0;
-	else {
-		idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
-		idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
-		if (idx_end > idx_start) {
-			pgoff_t l_npages = 1 + idx_end - idx_start;
-			npages = l_npages;
-			if (sizeof(npages) != sizeof(l_npages) &&
-					(pgoff_t)npages != l_npages)
-				npages = 0;
-		}
-	}
-	spin_lock(&inode->i_lock);
-	do {
-		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
-		if (ret != 0)
-			continue;
-		pages = nfs_scan_commit(inode, &head, idx_start, npages);
-		if (pages == 0)
-			break;
-		pages += nfs_scan_commit(inode, &head, 0, 0);
-		spin_unlock(&inode->i_lock);
-		ret = nfs_commit_list(inode, &head, how);
-		spin_lock(&inode->i_lock);
-
-	} while (ret >= 0);
-	spin_unlock(&inode->i_lock);
-	return ret;
-}
-
 /*
  * flush the inode to disk.
  */
@@ -1531,45 +1453,49 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 	return ret;
 }
 
-static int nfs_wb_page_priority(struct inode *inode, struct page *page,
-				int how)
+/*
+ * Write back all requests on one page - we do this before reading it.
+ */
+int nfs_wb_page(struct inode *inode, struct page *page)
 {
 	loff_t range_start = page_offset(page);
 	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
 	struct writeback_control wbc = {
-		.bdi = page->mapping->backing_dev_info,
 		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = LONG_MAX,
+		.nr_to_write = 0,
 		.range_start = range_start,
 		.range_end = range_end,
 	};
+	struct nfs_page *req;
+	int need_commit;
 	int ret;
 
-	do {
+	while(PagePrivate(page)) {
 		if (clear_page_dirty_for_io(page)) {
 			ret = nfs_writepage_locked(page, &wbc);
 			if (ret < 0)
 				goto out_error;
-		} else if (!PagePrivate(page))
+		}
+		req = nfs_find_and_lock_request(page);
+		if (!req)
 			break;
-		ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
-		if (ret < 0)
+		if (IS_ERR(req)) {
+			ret = PTR_ERR(req);
 			goto out_error;
-	} while (PagePrivate(page));
+		}
+		need_commit = test_bit(PG_CLEAN, &req->wb_flags);
+		nfs_clear_page_tag_locked(req);
+		if (need_commit) {
+			ret = nfs_commit_inode(inode, FLUSH_SYNC);
+			if (ret < 0)
+				goto out_error;
+		}
+	}
 	return 0;
 out_error:
-	__mark_inode_dirty(inode, I_DIRTY_PAGES);
 	return ret;
 }
 
-/*
- * Write back all requests on one page - we do this before reading it.
- */
-int nfs_wb_page(struct inode *inode, struct page* page)
-{
-	return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
-}
-
 #ifdef CONFIG_MIGRATION
 int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
 		struct page *page)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 93f439e7c5bf..b789d85bff82 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -475,7 +475,6 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
  * Try to write back everything synchronously (but check the
  * return value!)
  */
-extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
 extern int nfs_wb_all(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
-- 
cgit v1.2.3


From 1cda707d52e51a6cafac0aef12d2bd7052d572e6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Feb 2010 17:03:30 -0800
Subject: NFS: Remove requirement for inode->i_mutex from
 nfs_invalidate_mapping

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c           |  2 +-
 fs/nfs/inode.c         | 41 +----------------------------------------
 fs/nfs/symlink.c       |  2 +-
 include/linux/nfs_fs.h |  1 -
 4 files changed, 3 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3c7f03b669fb..a1f6b4438fb1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -560,7 +560,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	desc->entry = &my_entry;
 
 	nfs_block_sillyrename(dentry);
-	res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping);
+	res = nfs_revalidate_mapping(inode, filp->f_mapping);
 	if (res < 0)
 		goto out;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e8b41170d295..dbaaf7d2a188 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -754,7 +754,7 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	return __nfs_revalidate_inode(server, inode);
 }
 
-static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
+static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	
@@ -775,49 +775,10 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa
 	return 0;
 }
 
-static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
-{
-	int ret = 0;
-
-	mutex_lock(&inode->i_mutex);
-	if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_DATA) {
-		ret = nfs_sync_mapping(mapping);
-		if (ret == 0)
-			ret = nfs_invalidate_mapping_nolock(inode, mapping);
-	}
-	mutex_unlock(&inode->i_mutex);
-	return ret;
-}
-
-/**
- * nfs_revalidate_mapping_nolock - Revalidate the pagecache
- * @inode - pointer to host inode
- * @mapping - pointer to mapping
- */
-int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-	int ret = 0;
-
-	if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
-			|| nfs_attribute_timeout(inode) || NFS_STALE(inode)) {
-		ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
-		if (ret < 0)
-			goto out;
-	}
-	if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
-		ret = nfs_invalidate_mapping_nolock(inode, mapping);
-out:
-	return ret;
-}
-
 /**
  * nfs_revalidate_mapping - Revalidate the pagecache
  * @inode - pointer to host inode
  * @mapping - pointer to mapping
- *
- * This version of the function will take the inode->i_mutex and attempt to
- * flush out all dirty data if it needs to invalidate the page cache.
  */
 int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 412738dbfbc7..2ea9e5c27e55 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -50,7 +50,7 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 	struct page *page;
 	void *err;
 
-	err = ERR_PTR(nfs_revalidate_mapping_nolock(inode, inode->i_mapping));
+	err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping));
 	if (err)
 		goto read_failed;
 	page = read_cache_page(&inode->i_data, 0,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index b789d85bff82..1a0b85aa151e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -347,7 +347,6 @@ extern int nfs_attribute_timeout(struct inode *inode);
 extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
 extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
-extern int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping);
 extern int nfs_setattr(struct dentry *, struct iattr *);
 extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
-- 
cgit v1.2.3


From f75580c4afb72c156746b3fc1ec977b1a85d3dee Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 15 Feb 2010 17:27:00 +0000
Subject: net/9p: Add multi channel support.

This is needed for supporting multiple mount points.

We can find out the device names to be used with mount by checking

/sys/devices/virtio-pci/virtio*/device file

if the device file have value 9 then the specific virtio device can
be used for mounting.

ex:
 #cat /sys/devices/virtio-pci/virtio1/device
 9

now we can mount using
# mount -t 9p -o trans=virtio virtio1  /mnt/

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/linux/virtio_9p.h |  2 +-
 net/9p/trans_virtio.c     | 14 ++++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index 095e10d148b4..7a615c3f5e3d 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -6,6 +6,6 @@
 #include <linux/virtio_config.h>
 
 /* Maximum number of virtio channels per partition (1 for now) */
-#define MAX_9P_CHAN	1
+#define MAX_9P_CHAN	10
 
 #endif /* _LINUX_VIRTIO_9P_H */
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index cb50f4ae5eef..df924e5657d3 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -296,13 +296,15 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 
 	mutex_lock(&virtio_9p_lock);
 	while (index < MAX_9P_CHAN) {
-		if (chan->initialized && !chan->inuse) {
-			chan->inuse = true;
-			break;
-		} else {
-			index++;
-			chan = &channels[index];
+		if (chan->initialized &&
+			!strcmp(devname, dev_name(&chan->vdev->dev))) {
+			if (!chan->inuse) {
+				chan->inuse = true;
+				break;
+			}
 		}
+		index++;
+		chan = &channels[index];
 	}
 	mutex_unlock(&virtio_9p_lock);
 
-- 
cgit v1.2.3


From 37c1209d413242d9560e343c040777049a8dd869 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 15 Feb 2010 17:27:01 +0000
Subject: net/9p: Remove MAX_9P_CHAN limit

Use a list to track the channel instead of statically
allocated array

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/linux/virtio_9p.h |  3 --
 net/9p/trans_virtio.c     | 70 ++++++++++++++++++-----------------------------
 2 files changed, 27 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index 7a615c3f5e3d..332275080083 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -5,7 +5,4 @@
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
-/* Maximum number of virtio channels per partition (1 for now) */
-#define MAX_9P_CHAN	10
-
 #endif /* _LINUX_VIRTIO_9P_H */
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index df924e5657d3..05918d3cb40d 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -49,8 +49,6 @@
 
 /* a single mutex to manage channel initialization and attachment */
 static DEFINE_MUTEX(virtio_9p_lock);
-/* global which tracks highest initialized channel */
-static int chan_index;
 
 /**
  * struct virtio_chan - per-instance transport information
@@ -68,8 +66,7 @@ static int chan_index;
  *
  */
 
-static struct virtio_chan {
-	bool initialized;
+struct virtio_chan {
 	bool inuse;
 
 	spinlock_t lock;
@@ -80,7 +77,11 @@ static struct virtio_chan {
 
 	/* Scatterlist: can be too big for stack. */
 	struct scatterlist sg[VIRTQUEUE_NUM];
-} channels[MAX_9P_CHAN];
+
+	struct list_head chan_list;
+};
+
+static struct list_head virtio_chan_list;
 
 /* How many bytes left in this page. */
 static unsigned int rest_of_page(void *data)
@@ -217,9 +218,7 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
  * p9_virtio_probe - probe for existence of 9P virtio channels
  * @vdev: virtio device to probe
  *
- * This probes for existing virtio channels.  At present only
- * a single channel is in use, so in the future more work may need
- * to be done here.
+ * This probes for existing virtio channels.
  *
  */
 
@@ -227,16 +226,10 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 {
 	int err;
 	struct virtio_chan *chan;
-	int index;
 
-	mutex_lock(&virtio_9p_lock);
-	index = chan_index++;
-	chan = &channels[index];
-	mutex_unlock(&virtio_9p_lock);
-
-	if (chan_index > MAX_9P_CHAN) {
-		printk(KERN_ERR "9p: virtio: Maximum channels exceeded\n");
-		BUG();
+	chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL);
+	if (!chan) {
+		printk(KERN_ERR "9p: Failed to allocate virtio 9P channel\n");
 		err = -ENOMEM;
 		goto fail;
 	}
@@ -255,15 +248,15 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	sg_init_table(chan->sg, VIRTQUEUE_NUM);
 
 	chan->inuse = false;
-	chan->initialized = true;
+	mutex_lock(&virtio_9p_lock);
+	list_add_tail(&chan->chan_list, &virtio_chan_list);
+	mutex_unlock(&virtio_9p_lock);
 	return 0;
 
 out_free_vq:
 	vdev->config->del_vqs(vdev);
+	kfree(chan);
 fail:
-	mutex_lock(&virtio_9p_lock);
-	chan_index--;
-	mutex_unlock(&virtio_9p_lock);
 	return err;
 }
 
@@ -280,35 +273,27 @@ fail:
  * We use a simple reference count mechanism to ensure that only a single
  * mount has a channel open at a time.
  *
- * Bugs: doesn't allow identification of a specific channel
- * to allocate, channels are allocated sequentially. This was
- * a pragmatic decision to get things rolling, but ideally some
- * way of identifying the channel to attach to would be nice
- * if we are going to support multiple channels.
- *
  */
 
 static int
 p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 {
-	struct virtio_chan *chan = channels;
-	int index = 0;
+	struct virtio_chan *chan;
+	int found = 0;
 
 	mutex_lock(&virtio_9p_lock);
-	while (index < MAX_9P_CHAN) {
-		if (chan->initialized &&
-			!strcmp(devname, dev_name(&chan->vdev->dev))) {
+	list_for_each_entry(chan, &virtio_chan_list, chan_list) {
+		if (!strcmp(devname, dev_name(&chan->vdev->dev))) {
 			if (!chan->inuse) {
 				chan->inuse = true;
+				found = 1;
 				break;
 			}
 		}
-		index++;
-		chan = &channels[index];
 	}
 	mutex_unlock(&virtio_9p_lock);
 
-	if (index >= MAX_9P_CHAN) {
+	if (!found) {
 		printk(KERN_ERR "9p: no channels available\n");
 		return -ENODEV;
 	}
@@ -331,11 +316,13 @@ static void p9_virtio_remove(struct virtio_device *vdev)
 	struct virtio_chan *chan = vdev->priv;
 
 	BUG_ON(chan->inuse);
+	vdev->config->del_vqs(vdev);
+
+	mutex_lock(&virtio_9p_lock);
+	list_del(&chan->chan_list);
+	mutex_unlock(&virtio_9p_lock);
+	kfree(chan);
 
-	if (chan->initialized) {
-		vdev->config->del_vqs(vdev);
-		chan->initialized = false;
-	}
 }
 
 static struct virtio_device_id id_table[] = {
@@ -366,10 +353,7 @@ static struct p9_trans_module p9_virtio_trans = {
 /* The standard init function */
 static int __init p9_virtio_init(void)
 {
-	int count;
-
-	for (count = 0; count < MAX_9P_CHAN; count++)
-		channels[count].initialized = false;
+	INIT_LIST_HEAD(&virtio_chan_list);
 
 	v9fs_register_trans(&p9_virtio_trans);
 	return register_virtio_driver(&p9_virtio_drv);
-- 
cgit v1.2.3


From 0fb80abd911a7cb1e6548b5279568dc1e8949702 Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Fri, 5 Mar 2010 18:49:11 +0000
Subject: 9P2010.L handshake: Add mount option

Add new mount V9FS mount option to specify protocol version

This patch adds a new mount option to specify protocol version.
With this option it is possible to use "-o version=" switch to
specify 9P protocol version to use. Valid options for version
are:
9p2000
9p2000.u
9p2010.L

Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/net/9p/client.h | 15 +++++++++++++++
 net/9p/client.c         | 28 ++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+)

(limited to 'include')

diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index fb00b329f0d3..d40f8c55dfae 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -29,6 +29,19 @@
 /* Number of requests per row */
 #define P9_ROW_MAXTAG 255
 
+/** enum p9_proto_versions - 9P protocol versions
+ * @p9_proto_legacy: 9P Legacy mode, pre-9P2000.u
+ * @p9_proto_2000u: 9P2000.u extension
+ * @p9_proto_2010L: 9P2010.L extension
+ */
+
+enum p9_proto_versions{
+	p9_proto_legacy = 0,
+	p9_proto_2000u = 1,
+	p9_proto_2010L = 2,
+};
+
+
 /**
  * enum p9_trans_status - different states of underlying transports
  * @Connected: transport is connected and healthy
@@ -111,6 +124,7 @@ struct p9_req_t {
  * @lock: protect @fidlist
  * @msize: maximum data size negotiated by protocol
  * @dotu: extension flags negotiated by protocol
+ * @proto_version: 9P protocol version to use
  * @trans_mod: module API instantiated with this client
  * @trans: tranport instance state and API
  * @conn: connection state information used by trans_fd
@@ -138,6 +152,7 @@ struct p9_client {
 	spinlock_t lock; /* protect client structure */
 	int msize;
 	unsigned char dotu;
+	unsigned char proto_version;
 	struct p9_trans_module *trans_mod;
 	enum p9_trans_status status;
 	void *trans;
diff --git a/net/9p/client.c b/net/9p/client.c
index 09d4f1e2e4a8..3b5f3c94a6eb 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -46,6 +46,7 @@ enum {
 	Opt_msize,
 	Opt_trans,
 	Opt_legacy,
+	Opt_version,
 	Opt_err,
 };
 
@@ -53,9 +54,30 @@ static const match_table_t tokens = {
 	{Opt_msize, "msize=%u"},
 	{Opt_legacy, "noextend"},
 	{Opt_trans, "trans=%s"},
+	{Opt_version, "version=%s"},
 	{Opt_err, NULL},
 };
 
+/* Interpret mount option for protocol version */
+static unsigned char get_protocol_version(const substring_t *name)
+{
+	unsigned char version = -EINVAL;
+	if (!strncmp("9p2000", name->from, name->to-name->from)) {
+		version = p9_proto_legacy;
+		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: Legacy\n");
+	} else if (!strncmp("9p2000.u", name->from, name->to-name->from)) {
+		version = p9_proto_2000u;
+		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.u\n");
+	} else if (!strncmp("9p2010.L", name->from, name->to-name->from)) {
+		version = p9_proto_2010L;
+		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2010.L\n");
+	} else {
+		P9_DPRINTK(P9_DEBUG_ERROR, "Unknown protocol version %s. ",
+							name->from);
+	}
+	return version;
+}
+
 static struct p9_req_t *
 p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
 
@@ -120,6 +142,12 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 		case Opt_legacy:
 			clnt->dotu = 0;
 			break;
+		case Opt_version:
+			ret = get_protocol_version(&args[0]);
+			if (ret == -EINVAL)
+				goto free_and_return;
+			clnt->proto_version = ret;
+			break;
 		default:
 			continue;
 		}
-- 
cgit v1.2.3


From 342fee1d5c7dfa05f4e14ec1e583df4553b09776 Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Fri, 5 Mar 2010 18:50:14 +0000
Subject: 9P2010.L handshake: Remove "dotu" variable

Removes 'dotu' variable and make everything dependent
on 'proto_version' field.

Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/v9fs.c            |  2 +-
 fs/9p/vfs_dir.c         |  2 +-
 include/net/9p/client.h |  3 +-
 net/9p/client.c         | 65 ++++++++++++++++++++++++++-----------------
 net/9p/protocol.c       | 74 +++++++++++++++++++++++++++----------------------
 net/9p/protocol.h       |  6 ++--
 6 files changed, 87 insertions(+), 65 deletions(-)

(limited to 'include')

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 247f10a934ed..6c7f6a251115 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -262,7 +262,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 		goto error;
 	}
 
-	if (!v9ses->clnt->dotu)
+	if (!p9_is_proto_dotu(v9ses->clnt))
 		v9ses->flags &= ~V9FS_PROTO_2000U;
 
 	v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 15cce53bf61e..6580aa449541 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -135,7 +135,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		while (rdir->head < rdir->tail) {
 			err = p9stat_read(rdir->buf + rdir->head,
 						buflen - rdir->head, &st,
-						fid->clnt->dotu);
+						fid->clnt->proto_version);
 			if (err) {
 				P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
 				err = -EIO;
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index d40f8c55dfae..52e1fff709e4 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -151,7 +151,6 @@ struct p9_req_t {
 struct p9_client {
 	spinlock_t lock; /* protect client structure */
 	int msize;
-	unsigned char dotu;
 	unsigned char proto_version;
 	struct p9_trans_module *trans_mod;
 	enum p9_trans_status status;
@@ -224,5 +223,7 @@ int p9_parse_header(struct p9_fcall *, int32_t *, int8_t *, int16_t *, int);
 int p9stat_read(char *, int, struct p9_wstat *, int);
 void p9stat_free(struct p9_wstat *);
 
+int p9_is_proto_dotu(struct p9_client *clnt);
+int p9_is_proto_dotl(struct p9_client *clnt);
 
 #endif /* NET_9P_CLIENT_H */
diff --git a/net/9p/client.c b/net/9p/client.c
index 3b5f3c94a6eb..9994676e57da 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -58,6 +58,18 @@ static const match_table_t tokens = {
 	{Opt_err, NULL},
 };
 
+inline int p9_is_proto_dotl(struct p9_client *clnt)
+{
+	return (clnt->proto_version == p9_proto_2010L);
+}
+EXPORT_SYMBOL(p9_is_proto_dotl);
+
+inline int p9_is_proto_dotu(struct p9_client *clnt)
+{
+	return (clnt->proto_version == p9_proto_2000u);
+}
+EXPORT_SYMBOL(p9_is_proto_dotu);
+
 /* Interpret mount option for protocol version */
 static unsigned char get_protocol_version(const substring_t *name)
 {
@@ -97,7 +109,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 	int option;
 	int ret = 0;
 
-	clnt->dotu = 1;
+	clnt->proto_version = p9_proto_2000u;
 	clnt->msize = 8192;
 
 	if (!opts)
@@ -140,7 +152,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 			}
 			break;
 		case Opt_legacy:
-			clnt->dotu = 0;
+			clnt->proto_version = p9_proto_legacy;
 			break;
 		case Opt_version:
 			ret = get_protocol_version(&args[0]);
@@ -438,14 +450,15 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
 		int ecode;
 		char *ename;
 
-		err = p9pdu_readf(req->rc, c->dotu, "s?d", &ename, &ecode);
+		err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+							&ename, &ecode);
 		if (err) {
 			P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n",
 									err);
 			return err;
 		}
 
-		if (c->dotu)
+		if (p9_is_proto_dotu(c))
 			err = -ecode;
 
 		if (!err || !IS_ERR_VALUE(err))
@@ -543,7 +556,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 	/* marshall the data */
 	p9pdu_prepare(req->tc, tag, type);
 	va_start(ap, fmt);
-	err = p9pdu_vwritef(req->tc, c->dotu, fmt, ap);
+	err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
 	va_end(ap);
 	p9pdu_finalize(req->tc);
 
@@ -655,14 +668,14 @@ int p9_client_version(struct p9_client *c)
 	char *version;
 	int msize;
 
-	P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d extended %d\n",
-							c->msize, c->dotu);
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n",
+						c->msize, c->proto_version);
 	req = p9_client_rpc(c, P9_TVERSION, "ds", c->msize,
-				c->dotu ? "9P2000.u" : "9P2000");
+				p9_is_proto_dotu(c) ? "9P2000.u" : "9P2000");
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	err = p9pdu_readf(req->rc, c->dotu, "ds", &msize, &version);
+	err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version);
 	if (err) {
 		P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err);
 		p9pdu_dump(1, req->rc);
@@ -670,10 +683,10 @@ int p9_client_version(struct p9_client *c)
 	}
 
 	P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version);
-	if (!memcmp(version, "9P2000.u", 8))
-		c->dotu = 1;
-	else if (!memcmp(version, "9P2000", 6))
-		c->dotu = 0;
+	if (!strncmp(version, "9P2000.u", 8))
+		c->proto_version = p9_proto_2000u;
+	else if (!strncmp(version, "9P2000", 6))
+		c->proto_version = p9_proto_legacy;
 	else {
 		err = -EREMOTEIO;
 		goto error;
@@ -728,8 +741,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 		goto put_trans;
 	}
 
-	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d dotu %d\n",
-		clnt, clnt->trans_mod, clnt->msize, clnt->dotu);
+	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n",
+		clnt, clnt->trans_mod, clnt->msize, clnt->proto_version);
 
 	err = clnt->trans_mod->create(clnt, dev_name, options);
 	if (err)
@@ -812,7 +825,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		p9_free_req(clnt, req);
@@ -861,7 +874,7 @@ p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname)
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		p9_free_req(clnt, req);
@@ -919,7 +932,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "R", &nwqids, &wqids);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		p9_free_req(clnt, req);
@@ -980,7 +993,7 @@ int p9_client_open(struct p9_fid *fid, int mode)
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		goto free_and_error;
@@ -1025,7 +1038,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		goto free_and_error;
@@ -1126,7 +1139,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "D", &count, &dataptr);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		goto free_and_error;
@@ -1187,7 +1200,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "d", &count);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		goto free_and_error;
@@ -1227,7 +1240,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
 		goto error;
 	}
 
-	err = p9pdu_readf(req->rc, clnt->dotu, "wS", &ignored, ret);
+	err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret);
 	if (err) {
 		p9pdu_dump(1, req->rc);
 		p9_free_req(clnt, req);
@@ -1254,7 +1267,7 @@ error:
 }
 EXPORT_SYMBOL(p9_client_stat);
 
-static int p9_client_statsize(struct p9_wstat *wst, int optional)
+static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
 {
 	int ret;
 
@@ -1273,7 +1286,7 @@ static int p9_client_statsize(struct p9_wstat *wst, int optional)
 	if (wst->muid)
 		ret += strlen(wst->muid);
 
-	if (optional) {
+	if (proto_version == p9_proto_2000u) {
 		ret += 2+4+4+4;	/* extension[s] n_uid[4] n_gid[4] n_muid[4] */
 		if (wst->extension)
 			ret += strlen(wst->extension);
@@ -1290,7 +1303,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 
 	err = 0;
 	clnt = fid->clnt;
-	wst->size = p9_client_statsize(wst, clnt->dotu);
+	wst->size = p9_client_statsize(wst, clnt->proto_version);
 	P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid);
 	P9_DPRINTK(P9_DEBUG_9P,
 		"     sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index fc70147c771e..94f5a8f65e9c 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -52,7 +52,7 @@
 #endif
 
 static int
-p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...);
+p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
 
 #ifdef CONFIG_NET_9P_DEBUG
 void
@@ -144,7 +144,8 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
 */
 
 static int
-p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
+p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
+	va_list ap)
 {
 	const char *ptr;
 	int errcode = 0;
@@ -194,7 +195,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				int16_t len;
 				int size;
 
-				errcode = p9pdu_readf(pdu, optional, "w", &len);
+				errcode = p9pdu_readf(pdu, proto_version,
+								"w", &len);
 				if (errcode)
 					break;
 
@@ -217,7 +219,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				struct p9_qid *qid =
 				    va_arg(ap, struct p9_qid *);
 
-				errcode = p9pdu_readf(pdu, optional, "bdq",
+				errcode = p9pdu_readf(pdu, proto_version, "bdq",
 						      &qid->type, &qid->version,
 						      &qid->path);
 			}
@@ -230,7 +232,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				stbuf->n_uid = stbuf->n_gid = stbuf->n_muid =
 									-1;
 				errcode =
-				    p9pdu_readf(pdu, optional,
+				    p9pdu_readf(pdu, proto_version,
 						"wwdQdddqssss?sddd",
 						&stbuf->size, &stbuf->type,
 						&stbuf->dev, &stbuf->qid,
@@ -250,7 +252,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				void **data = va_arg(ap, void **);
 
 				errcode =
-				    p9pdu_readf(pdu, optional, "d", count);
+				    p9pdu_readf(pdu, proto_version, "d", count);
 				if (!errcode) {
 					*count =
 					    MIN(*count,
@@ -263,8 +265,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				int16_t *nwname = va_arg(ap, int16_t *);
 				char ***wnames = va_arg(ap, char ***);
 
-				errcode =
-				    p9pdu_readf(pdu, optional, "w", nwname);
+				errcode = p9pdu_readf(pdu, proto_version,
+								"w", nwname);
 				if (!errcode) {
 					*wnames =
 					    kmalloc(sizeof(char *) * *nwname,
@@ -278,7 +280,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 
 					for (i = 0; i < *nwname; i++) {
 						errcode =
-						    p9pdu_readf(pdu, optional,
+						    p9pdu_readf(pdu,
+								proto_version,
 								"s",
 								&(*wnames)[i]);
 						if (errcode)
@@ -306,7 +309,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				*wqids = NULL;
 
 				errcode =
-				    p9pdu_readf(pdu, optional, "w", nwqid);
+				    p9pdu_readf(pdu, proto_version, "w", nwqid);
 				if (!errcode) {
 					*wqids =
 					    kmalloc(*nwqid *
@@ -321,7 +324,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 
 					for (i = 0; i < *nwqid; i++) {
 						errcode =
-						    p9pdu_readf(pdu, optional,
+						    p9pdu_readf(pdu,
+								proto_version,
 								"Q",
 								&(*wqids)[i]);
 						if (errcode)
@@ -336,7 +340,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 			}
 			break;
 		case '?':
-			if (!optional)
+			if (proto_version != p9_proto_2000u)
 				return 0;
 			break;
 		default:
@@ -352,7 +356,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 }
 
 int
-p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
+p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
+	va_list ap)
 {
 	const char *ptr;
 	int errcode = 0;
@@ -389,7 +394,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				if (sptr)
 					len = MIN(strlen(sptr), USHORT_MAX);
 
-				errcode = p9pdu_writef(pdu, optional, "w", len);
+				errcode = p9pdu_writef(pdu, proto_version,
+								"w", len);
 				if (!errcode && pdu_write(pdu, sptr, len))
 					errcode = -EFAULT;
 			}
@@ -398,7 +404,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				const struct p9_qid *qid =
 				    va_arg(ap, const struct p9_qid *);
 				errcode =
-				    p9pdu_writef(pdu, optional, "bdq",
+				    p9pdu_writef(pdu, proto_version, "bdq",
 						 qid->type, qid->version,
 						 qid->path);
 			} break;
@@ -406,7 +412,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				const struct p9_wstat *stbuf =
 				    va_arg(ap, const struct p9_wstat *);
 				errcode =
-				    p9pdu_writef(pdu, optional,
+				    p9pdu_writef(pdu, proto_version,
 						 "wwdQdddqssss?sddd",
 						 stbuf->size, stbuf->type,
 						 stbuf->dev, &stbuf->qid,
@@ -421,8 +427,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				int32_t count = va_arg(ap, int32_t);
 				const void *data = va_arg(ap, const void *);
 
-				errcode =
-				    p9pdu_writef(pdu, optional, "d", count);
+				errcode = p9pdu_writef(pdu, proto_version, "d",
+									count);
 				if (!errcode && pdu_write(pdu, data, count))
 					errcode = -EFAULT;
 			}
@@ -431,8 +437,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				int32_t count = va_arg(ap, int32_t);
 				const char __user *udata =
 						va_arg(ap, const void __user *);
-				errcode =
-				    p9pdu_writef(pdu, optional, "d", count);
+				errcode = p9pdu_writef(pdu, proto_version, "d",
+									count);
 				if (!errcode && pdu_write_u(pdu, udata, count))
 					errcode = -EFAULT;
 			}
@@ -441,14 +447,15 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				int16_t nwname = va_arg(ap, int);
 				const char **wnames = va_arg(ap, const char **);
 
-				errcode =
-				    p9pdu_writef(pdu, optional, "w", nwname);
+				errcode = p9pdu_writef(pdu, proto_version, "w",
+									nwname);
 				if (!errcode) {
 					int i;
 
 					for (i = 0; i < nwname; i++) {
 						errcode =
-						    p9pdu_writef(pdu, optional,
+						    p9pdu_writef(pdu,
+								proto_version,
 								 "s",
 								 wnames[i]);
 						if (errcode)
@@ -462,14 +469,15 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 				struct p9_qid *wqids =
 				    va_arg(ap, struct p9_qid *);
 
-				errcode =
-				    p9pdu_writef(pdu, optional, "w", nwqid);
+				errcode = p9pdu_writef(pdu, proto_version, "w",
+									nwqid);
 				if (!errcode) {
 					int i;
 
 					for (i = 0; i < nwqid; i++) {
 						errcode =
-						    p9pdu_writef(pdu, optional,
+						    p9pdu_writef(pdu,
+								proto_version,
 								 "Q",
 								 &wqids[i]);
 						if (errcode)
@@ -479,7 +487,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 			}
 			break;
 		case '?':
-			if (!optional)
+			if (proto_version != p9_proto_2000u)
 				return 0;
 			break;
 		default:
@@ -494,32 +502,32 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 	return errcode;
 }
 
-int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...)
+int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...)
 {
 	va_list ap;
 	int ret;
 
 	va_start(ap, fmt);
-	ret = p9pdu_vreadf(pdu, optional, fmt, ap);
+	ret = p9pdu_vreadf(pdu, proto_version, fmt, ap);
 	va_end(ap);
 
 	return ret;
 }
 
 static int
-p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...)
+p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...)
 {
 	va_list ap;
 	int ret;
 
 	va_start(ap, fmt);
-	ret = p9pdu_vwritef(pdu, optional, fmt, ap);
+	ret = p9pdu_vwritef(pdu, proto_version, fmt, ap);
 	va_end(ap);
 
 	return ret;
 }
 
-int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu)
+int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version)
 {
 	struct p9_fcall fake_pdu;
 	int ret;
@@ -529,7 +537,7 @@ int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu)
 	fake_pdu.sdata = buf;
 	fake_pdu.offset = 0;
 
-	ret = p9pdu_readf(&fake_pdu, dotu, "S", st);
+	ret = p9pdu_readf(&fake_pdu, proto_version, "S", st);
 	if (ret) {
 		P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret);
 		p9pdu_dump(1, &fake_pdu);
diff --git a/net/9p/protocol.h b/net/9p/protocol.h
index ccde462e7ac5..2431c0f38d56 100644
--- a/net/9p/protocol.h
+++ b/net/9p/protocol.h
@@ -25,9 +25,9 @@
  *
  */
 
-int
-p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap);
-int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...);
+int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
+								va_list ap);
+int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
 int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type);
 int p9pdu_finalize(struct p9_fcall *pdu);
 void p9pdu_dump(int, struct p9_fcall *);
-- 
cgit v1.2.3


From 8eae939f1400326b06d0c9afe53d2a484a326871 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:40 +0000
Subject: net: add limit for socket backlog

We got system OOM while running some UDP netperf testing on the loopback
device. The case is multiple senders sent stream UDP packets to a single
receiver via loopback on local host. Of course, the receiver is not able
to handle all the packets in time. But we surprisingly found that these
packets were not discarded due to the receiver's sk->sk_rcvbuf limit.
Instead, they are kept queuing to sk->sk_backlog and finally ate up all
the memory. We believe this is a secure hole that a none privileged user
can crash the system.

The root cause for this problem is, when the receiver is doing
__release_sock() (i.e. after userspace recv, kernel udp_recvmsg ->
skb_free_datagram_locked -> release_sock), it moves skbs from backlog to
sk_receive_queue with the softirq enabled. In the above case, multiple
busy senders will almost make it an endless loop. The skbs in the
backlog end up eat all the system memory.

The issue is not only for UDP. Any protocols using socket backlog is
potentially affected. The patch adds limit for socket backlog so that
the backlog size cannot be expanded endlessly.

Reported-by: Alex Shi <alex.shi@intel.com>
Cc: David Miller <davem@davemloft.net>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru
Cc: "Pekka Savola (ipv6)" <pekkas@netcore.fi>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Vlad Yasevich <vladislav.yasevich@hp.com>
Cc: Sridhar Samudrala <sri@us.ibm.com>
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Allan Stephens <allan.stephens@windriver.com>
Cc: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 15 ++++++++++++++-
 net/core/sock.c    | 16 ++++++++++++++--
 2 files changed, 28 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 6cb1676e409a..2516d76f043c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -253,6 +253,8 @@ struct sock {
 	struct {
 		struct sk_buff *head;
 		struct sk_buff *tail;
+		int len;
+		int limit;
 	} sk_backlog;
 	wait_queue_head_t	*sk_sleep;
 	struct dst_entry	*sk_dst_cache;
@@ -589,7 +591,7 @@ static inline int sk_stream_memory_free(struct sock *sk)
 	return sk->sk_wmem_queued < sk->sk_sndbuf;
 }
 
-/* The per-socket spinlock must be held here. */
+/* OOB backlog add */
 static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	if (!sk->sk_backlog.tail) {
@@ -601,6 +603,17 @@ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 	skb->next = NULL;
 }
 
+/* The per-socket spinlock must be held here. */
+static inline int sk_add_backlog_limited(struct sock *sk, struct sk_buff *skb)
+{
+	if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1))
+		return -ENOBUFS;
+
+	sk_add_backlog(sk, skb);
+	sk->sk_backlog.len += skb->truesize;
+	return 0;
+}
+
 static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 {
 	return sk->sk_backlog_rcv(sk, skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index fcd397a762ff..6e22dc973d23 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -340,8 +340,12 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 		rc = sk_backlog_rcv(sk, skb);
 
 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
-	} else
-		sk_add_backlog(sk, skb);
+	} else if (sk_add_backlog_limited(sk, skb)) {
+		bh_unlock_sock(sk);
+		atomic_inc(&sk->sk_drops);
+		goto discard_and_relse;
+	}
+
 	bh_unlock_sock(sk);
 out:
 	sock_put(sk);
@@ -1139,6 +1143,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		sock_lock_init(newsk);
 		bh_lock_sock(newsk);
 		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
+		newsk->sk_backlog.len = 0;
 
 		atomic_set(&newsk->sk_rmem_alloc, 0);
 		/*
@@ -1542,6 +1547,12 @@ static void __release_sock(struct sock *sk)
 
 		bh_lock_sock(sk);
 	} while ((skb = sk->sk_backlog.head) != NULL);
+
+	/*
+	 * Doing the zeroing here guarantee we can not loop forever
+	 * while a wild producer attempts to flood us.
+	 */
+	sk->sk_backlog.len = 0;
 }
 
 /**
@@ -1874,6 +1885,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_allocation	=	GFP_KERNEL;
 	sk->sk_rcvbuf		=	sysctl_rmem_default;
 	sk->sk_sndbuf		=	sysctl_wmem_default;
+	sk->sk_backlog.limit	=	sk->sk_rcvbuf << 1;
 	sk->sk_state		=	TCP_CLOSE;
 	sk_set_socket(sk, sock);
 
-- 
cgit v1.2.3


From a3a858ff18a72a8d388e31ab0d98f7e944841a62 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 4 Mar 2010 18:01:47 +0000
Subject: net: backlog functions rename

sk_add_backlog -> __sk_add_backlog
sk_add_backlog_limited -> sk_add_backlog

Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h       | 6 +++---
 net/core/sock.c          | 2 +-
 net/dccp/minisocks.c     | 2 +-
 net/ipv4/tcp_ipv4.c      | 2 +-
 net/ipv4/tcp_minisocks.c | 2 +-
 net/ipv4/udp.c           | 2 +-
 net/ipv6/tcp_ipv6.c      | 2 +-
 net/ipv6/udp.c           | 4 ++--
 net/llc/llc_c_ac.c       | 2 +-
 net/llc/llc_conn.c       | 2 +-
 net/sctp/input.c         | 4 ++--
 net/tipc/socket.c        | 2 +-
 net/x25/x25_dev.c        | 2 +-
 13 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 2516d76f043c..170353dd9570 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -592,7 +592,7 @@ static inline int sk_stream_memory_free(struct sock *sk)
 }
 
 /* OOB backlog add */
-static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb)
+static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	if (!sk->sk_backlog.tail) {
 		sk->sk_backlog.head = sk->sk_backlog.tail = skb;
@@ -604,12 +604,12 @@ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 }
 
 /* The per-socket spinlock must be held here. */
-static inline int sk_add_backlog_limited(struct sock *sk, struct sk_buff *skb)
+static inline int sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1))
 		return -ENOBUFS;
 
-	sk_add_backlog(sk, skb);
+	__sk_add_backlog(sk, skb);
 	sk->sk_backlog.len += skb->truesize;
 	return 0;
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index 6e22dc973d23..61a65a2e0455 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -340,7 +340,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 		rc = sk_backlog_rcv(sk, skb);
 
 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
-	} else if (sk_add_backlog_limited(sk, skb)) {
+	} else if (sk_add_backlog(sk, skb)) {
 		bh_unlock_sock(sk);
 		atomic_inc(&sk->sk_drops);
 		goto discard_and_relse;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index af226a063141..0d508c359fa9 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -254,7 +254,7 @@ int dccp_child_process(struct sock *parent, struct sock *child,
 		 * in main socket hash table and lock on listening
 		 * socket does not protect us more.
 		 */
-		sk_add_backlog(child, skb);
+		__sk_add_backlog(child, skb);
 	}
 
 	bh_unlock_sock(child);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4baf1943b1bd..1915f7dc30e6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1682,7 +1682,7 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v4_do_rcv(sk, skb);
 		}
-	} else if (sk_add_backlog_limited(sk, skb)) {
+	} else if (sk_add_backlog(sk, skb)) {
 		bh_unlock_sock(sk);
 		goto discard_and_relse;
 	}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f206ee5dda80..4199bc6915c5 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -728,7 +728,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
 		 * in main socket hash table and lock on listening
 		 * socket does not protect us more.
 		 */
-		sk_add_backlog(child, skb);
+		__sk_add_backlog(child, skb);
 	}
 
 	bh_unlock_sock(child);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e7eb47f338d4..7af756d0f931 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1371,7 +1371,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		rc = __udp_queue_rcv_skb(sk, skb);
-	else if (sk_add_backlog_limited(sk, skb)) {
+	else if (sk_add_backlog(sk, skb)) {
 		bh_unlock_sock(sk);
 		goto drop;
 	}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c4ea9d5cbfaa..2c378b1bd5cf 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1740,7 +1740,7 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v6_do_rcv(sk, skb);
 		}
-	} else if (sk_add_backlog_limited(sk, skb)) {
+	} else if (sk_add_backlog(sk, skb)) {
 		bh_unlock_sock(sk);
 		goto discard_and_relse;
 	}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 64804912b093..3c0c9c755c92 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -583,7 +583,7 @@ static void flush_stack(struct sock **stack, unsigned int count,
 			bh_lock_sock(sk);
 			if (!sock_owned_by_user(sk))
 				udpv6_queue_rcv_skb(sk, skb1);
-			else if (sk_add_backlog_limited(sk, skb1)) {
+			else if (sk_add_backlog(sk, skb1)) {
 				kfree_skb(skb1);
 				bh_unlock_sock(sk);
 				goto drop;
@@ -758,7 +758,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		udpv6_queue_rcv_skb(sk, skb);
-	else if (sk_add_backlog_limited(sk, skb)) {
+	else if (sk_add_backlog(sk, skb)) {
 		atomic_inc(&sk->sk_drops);
 		bh_unlock_sock(sk);
 		sock_put(sk);
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index 019c780512e8..86d6985b9d49 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -1437,7 +1437,7 @@ static void llc_process_tmr_ev(struct sock *sk, struct sk_buff *skb)
 			llc_conn_state_process(sk, skb);
 		else {
 			llc_set_backlog_type(skb, LLC_EVENT);
-			sk_add_backlog(sk, skb);
+			__sk_add_backlog(sk, skb);
 		}
 	}
 }
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index c0539ffdb272..a12144da7974 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -827,7 +827,7 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
 	else {
 		dprintk("%s: adding to backlog...\n", __func__);
 		llc_set_backlog_type(skb, LLC_PACKET);
-		if (sk_add_backlog_limited(sk, skb))
+		if (sk_add_backlog(sk, skb))
 			goto drop_unlock;
 	}
 out:
diff --git a/net/sctp/input.c b/net/sctp/input.c
index cbc063665e6b..3d74b264ea22 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -341,7 +341,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 		sctp_bh_lock_sock(sk);
 
 		if (sock_owned_by_user(sk)) {
-			if (sk_add_backlog_limited(sk, skb))
+			if (sk_add_backlog(sk, skb))
 				sctp_chunk_free(chunk);
 			else
 				backloged = 1;
@@ -375,7 +375,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
 	struct sctp_ep_common *rcvr = chunk->rcvr;
 	int ret;
 
-	ret = sk_add_backlog_limited(sk, skb);
+	ret = sk_add_backlog(sk, skb);
 	if (!ret) {
 		/* Hold the assoc/ep while hanging on the backlog queue.
 		 * This way, we know structures we need will not disappear
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 22bfbc33a8ac..4b235fc1c70f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1322,7 +1322,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
 	if (!sock_owned_by_user(sk)) {
 		res = filter_rcv(sk, buf);
 	} else {
-		if (sk_add_backlog_limited(sk, buf))
+		if (sk_add_backlog(sk, buf))
 			res = TIPC_ERR_OVERLOAD;
 		else
 			res = TIPC_OK;
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index a9da0dc26f4f..52e304212241 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -53,7 +53,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
 		if (!sock_owned_by_user(sk)) {
 			queued = x25_process_rx_frame(sk, skb);
 		} else {
-			queued = !sk_add_backlog_limited(sk, skb);
+			queued = !sk_add_backlog(sk, skb);
 		}
 		bh_unlock_sock(sk);
 		sock_put(sk);
-- 
cgit v1.2.3


From 723b2f57ad83ee7087acf9a95e8e289414b1f521 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@redhat.com>
Date: Wed, 3 Mar 2010 22:51:50 +0000
Subject: ethtool: Add direct access to ops->get_sset_count

This patch is an alternative approach for accessing string
counts, vs. the drvinfo indirect approach.  This way the drvinfo
space doesn't run out, and we don't break ABI later.

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 17 +++++++++---
 net/core/ethtool.c      | 72 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index cca1c3de140d..f6f961fefbe5 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -253,6 +253,17 @@ struct ethtool_gstrings {
 	__u8	data[0];
 };
 
+struct ethtool_sset_info {
+	__u32	cmd;		/* ETHTOOL_GSSET_INFO */
+	__u32	reserved;
+	__u64	sset_mask;	/* input: each bit selects an sset to query */
+				/* output: each bit a returned sset */
+	__u32	data[0];	/* ETH_SS_xxx count, in order, based on bits
+				   in sset_mask.  One bit implies one
+				   __u32, two bits implies two
+				   __u32's, etc. */
+};
+
 enum ethtool_test_flags {
 	ETH_TEST_FL_OFFLINE	= (1 << 0),	/* online / offline */
 	ETH_TEST_FL_FAILED	= (1 << 1),	/* test passed / failed */
@@ -606,9 +617,9 @@ struct ethtool_ops {
 #define	ETHTOOL_SRXCLSRLINS	0x00000032 /* Insert RX classification rule */
 #define	ETHTOOL_FLASHDEV	0x00000033 /* Flash firmware to device */
 #define	ETHTOOL_RESET		0x00000034 /* Reset hardware */
-
-#define ETHTOOL_SRXNTUPLE	0x00000035 /* Add an n-tuple filter to device */
-#define ETHTOOL_GRXNTUPLE	0x00000036 /* Get n-tuple filters from device */
+#define	ETHTOOL_SRXNTUPLE	0x00000035 /* Add an n-tuple filter to device */
+#define	ETHTOOL_GRXNTUPLE	0x00000036 /* Get n-tuple filters from device */
+#define	ETHTOOL_GSSET_INFO	0x00000037 /* Get string set info */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 0f2f82185ec4..70075c47ada8 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -214,6 +214,10 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 	info.cmd = ETHTOOL_GDRVINFO;
 	ops->get_drvinfo(dev, &info);
 
+	/*
+	 * this method of obtaining string set info is deprecated;
+	 * consider using ETHTOOL_GSSET_INFO instead
+	 */
 	if (ops->get_sset_count) {
 		int rc;
 
@@ -237,6 +241,71 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 	return 0;
 }
 
+/*
+ * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
+ */
+static noinline int ethtool_get_sset_info(struct net_device *dev,
+                                          void __user *useraddr)
+{
+	struct ethtool_sset_info info;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	u64 sset_mask;
+	int i, idx = 0, n_bits = 0, ret, rc;
+	u32 *info_buf = NULL;
+
+	if (!ops->get_sset_count)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&info, useraddr, sizeof(info)))
+		return -EFAULT;
+
+	/* store copy of mask, because we zero struct later on */
+	sset_mask = info.sset_mask;
+	if (!sset_mask)
+		return 0;
+
+	/* calculate size of return buffer */
+	for (i = 0; i < 64; i++)
+		if (sset_mask & (1ULL << i))
+			n_bits++;
+
+	memset(&info, 0, sizeof(info));
+	info.cmd = ETHTOOL_GSSET_INFO;
+
+	info_buf = kzalloc(n_bits * sizeof(u32), GFP_USER);
+	if (!info_buf)
+		return -ENOMEM;
+
+	/*
+	 * fill return buffer based on input bitmask and successful
+	 * get_sset_count return
+	 */
+	for (i = 0; i < 64; i++) {
+		if (!(sset_mask & (1ULL << i)))
+			continue;
+
+		rc = ops->get_sset_count(dev, i);
+		if (rc >= 0) {
+			info.sset_mask |= (1ULL << i);
+			info_buf[idx++] = rc;
+		}
+	}
+
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, &info, sizeof(info)))
+		goto out;
+
+	useraddr += offsetof(struct ethtool_sset_info, data);
+	if (copy_to_user(useraddr, info_buf, idx * sizeof(u32)))
+		goto out;
+
+	ret = 0;
+
+out:
+	kfree(info_buf);
+	return ret;
+}
+
 /*
  * noinline attribute so that gcc doesnt use too much stack in dev_ethtool()
  */
@@ -1471,6 +1540,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GRXNTUPLE:
 		rc = ethtool_get_rx_ntuple(dev, useraddr);
 		break;
+	case ETHTOOL_GSSET_INFO:
+		rc = ethtool_get_sset_info(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From d17792ebdf90289c9fd1bce888076d3d60ecd53b Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Thu, 4 Mar 2010 08:21:53 +0000
Subject: ethtool: Add direct access to ops->get_sset_count

On 03/04/2010 09:26 AM, Ben Hutchings wrote:
> On Thu, 2010-03-04 at 00:51 -0800, Jeff Kirsher wrote:
>> From: Jeff Garzik<jgarzik@redhat.com>
>>
>> This patch is an alternative approach for accessing string
>> counts, vs. the drvinfo indirect approach.  This way the drvinfo
>> space doesn't run out, and we don't break ABI later.
> [...]
>> --- a/net/core/ethtool.c
>> +++ b/net/core/ethtool.c
>> @@ -214,6 +214,10 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
>>   	info.cmd = ETHTOOL_GDRVINFO;
>>   	ops->get_drvinfo(dev,&info);
>>
>> +	/*
>> +	 * this method of obtaining string set info is deprecated;
>> +	 * consider using ETHTOOL_GSSET_INFO instead
>> +	 */
>
> This comment belongs on the interface (ethtool.h) not the
> implementation.

Debatable -- the current comment is located at the callsite of
ops->get_sset_count(), which is where an implementor might think to add
a new call.  Not all the numeric fields in ethtool_drvinfo are obtained
from ->get_sset_count().

Hence the "some" in the attached patch to include/linux/ethtool.h,
addressing your comment.

> [...]
>> +static noinline int ethtool_get_sset_info(struct net_device *dev,
>> +                                          void __user *useraddr)
>> +{
> [...]
>> +	/* calculate size of return buffer */
>> +	for (i = 0; i<  64; i++)
>> +		if (sset_mask&  (1ULL<<  i))
>> +			n_bits++;
> [...]
>
> We have a function for this:
>
> 	n_bits = hweight64(sset_mask);

Agreed.

I've attached a follow-up patch, which should enable my/Jeff's kernel
patch to be applied, followed by this one.

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 7 +++++++
 net/core/ethtool.c      | 7 +++----
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index f6f961fefbe5..b33f316bb92e 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -61,6 +61,13 @@ struct ethtool_drvinfo {
 				/* For PCI devices, use pci_name(pci_dev). */
 	char	reserved1[32];
 	char	reserved2[12];
+				/*
+				 * Some struct members below are filled in
+				 * using ops->get_sset_count().  Obtaining
+				 * this info from ethtool_drvinfo is now
+				 * deprecated; Use ETHTOOL_GSSET_INFO
+				 * instead.
+				 */
 	__u32	n_priv_flags;	/* number of flags valid in ETHTOOL_GPFLAGS */
 	__u32	n_stats;	/* number of u64's from ETHTOOL_GSTATS */
 	__u32	testinfo_len;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 70075c47ada8..33d2ded50f84 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/ethtool.h>
 #include <linux/netdevice.h>
+#include <linux/bitops.h>
 #include <asm/uaccess.h>
 
 /*
@@ -216,7 +217,7 @@ static noinline int ethtool_get_drvinfo(struct net_device *dev, void __user *use
 
 	/*
 	 * this method of obtaining string set info is deprecated;
-	 * consider using ETHTOOL_GSSET_INFO instead
+	 * Use ETHTOOL_GSSET_INFO instead.
 	 */
 	if (ops->get_sset_count) {
 		int rc;
@@ -265,9 +266,7 @@ static noinline int ethtool_get_sset_info(struct net_device *dev,
 		return 0;
 
 	/* calculate size of return buffer */
-	for (i = 0; i < 64; i++)
-		if (sset_mask & (1ULL << i))
-			n_bits++;
+	n_bits = hweight64(sset_mask);
 
 	memset(&info, 0, sizeof(info));
 	info.cmd = ETHTOOL_GSSET_INFO;
-- 
cgit v1.2.3


From 8215d6ec5fee1e76545decea2cd73717efb5cb42 Mon Sep 17 00:00:00 2001
From: Nikanth Karthikesan <knikanth@novell.com>
Date: Sat, 6 Mar 2010 02:32:27 +0000
Subject: dm table: remove unused dm_get_device range parameters

Remove unused parameters(start and len) of dm_get_device()
and fix the callers.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-crypt.c         |  3 +--
 drivers/md/dm-delay.c         |  8 ++++----
 drivers/md/dm-linear.c        |  3 +--
 drivers/md/dm-log.c           |  3 +--
 drivers/md/dm-mpath.c         |  7 +++----
 drivers/md/dm-raid1.c         |  3 +--
 drivers/md/dm-snap.c          |  8 +++-----
 drivers/md/dm-stripe.c        |  3 +--
 drivers/md/dm-table.c         | 10 ++++------
 include/linux/device-mapper.h |  5 ++---
 10 files changed, 21 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index a93637223c8d..3bdbb6115702 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1160,8 +1160,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 	cc->start = tmpll;
 
-	if (dm_get_device(ti, argv[3], cc->start, ti->len,
-			  dm_table_get_mode(ti->table), &cc->dev)) {
+	if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &cc->dev)) {
 		ti->error = "Device lookup failed";
 		goto bad_device;
 	}
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index ebe7381f47c8..852052880d7a 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -156,8 +156,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	if (dm_get_device(ti, argv[0], dc->start_read, ti->len,
-			  dm_table_get_mode(ti->table), &dc->dev_read)) {
+	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
+			  &dc->dev_read)) {
 		ti->error = "Device lookup failed";
 		goto bad;
 	}
@@ -177,8 +177,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad_dev_read;
 	}
 
-	if (dm_get_device(ti, argv[3], dc->start_write, ti->len,
-			  dm_table_get_mode(ti->table), &dc->dev_write)) {
+	if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table),
+			  &dc->dev_write)) {
 		ti->error = "Write device lookup failed";
 		goto bad_dev_read;
 	}
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 82f7d6e6b1ea..9200dbf2391a 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -47,8 +47,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 	lc->start = tmp;
 
-	if (dm_get_device(ti, argv[0], lc->start, ti->len,
-			  dm_table_get_mode(ti->table), &lc->dev)) {
+	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &lc->dev)) {
 		ti->error = "dm-linear: Device lookup failed";
 		goto bad;
 	}
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 7035582786fb..5a08be0222db 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -543,8 +543,7 @@ static int disk_ctr(struct dm_dirty_log *log, struct dm_target *ti,
 		return -EINVAL;
 	}
 
-	r = dm_get_device(ti, argv[0], 0, 0 /* FIXME */,
-			  FMODE_READ | FMODE_WRITE, &dev);
+	r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &dev);
 	if (r)
 		return r;
 
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index c1335487cc72..826bce7343b3 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -607,8 +607,8 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
 	if (!p)
 		return ERR_PTR(-ENOMEM);
 
-	r = dm_get_device(ti, shift(as), ti->begin, ti->len,
-			  dm_table_get_mode(ti->table), &p->path.dev);
+	r = dm_get_device(ti, shift(as), dm_table_get_mode(ti->table),
+			  &p->path.dev);
 	if (r) {
 		ti->error = "error getting device";
 		goto bad;
@@ -1505,8 +1505,7 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
 		goto out;
 	}
 
-	r = dm_get_device(ti, argv[1], ti->begin, ti->len,
-			  dm_table_get_mode(ti->table), &dev);
+	r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev);
 	if (r) {
 		DMWARN("message: error getting device %s",
 		       argv[1]);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index de26fde4098f..6d66ddf39071 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -920,8 +920,7 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
 		return -EINVAL;
 	}
 
-	if (dm_get_device(ti, argv[0], offset, ti->len,
-			  dm_table_get_mode(ti->table),
+	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
 			  &ms->mirror[mirror].dev)) {
 		ti->error = "Device lookup failure";
 		return -ENXIO;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index ee8eb283650d..0789c22ff0d4 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1081,8 +1081,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	argv++;
 	argc--;
 
-	r = dm_get_device(ti, cow_path, 0, 0,
-			  FMODE_READ | FMODE_WRITE, &s->cow);
+	r = dm_get_device(ti, cow_path, FMODE_READ | FMODE_WRITE, &s->cow);
 	if (r) {
 		ti->error = "Cannot get COW device";
 		goto bad_cow;
@@ -1098,7 +1097,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	argv += args_used;
 	argc -= args_used;
 
-	r = dm_get_device(ti, origin_path, 0, ti->len, origin_mode, &s->origin);
+	r = dm_get_device(ti, origin_path, origin_mode, &s->origin);
 	if (r) {
 		ti->error = "Cannot get origin device";
 		goto bad_origin;
@@ -2100,8 +2099,7 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		return -EINVAL;
 	}
 
-	r = dm_get_device(ti, argv[0], 0, ti->len,
-			  dm_table_get_mode(ti->table), &dev);
+	r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dev);
 	if (r) {
 		ti->error = "Cannot get target device";
 		return r;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index bd58703ee8f6..e610725db766 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -80,8 +80,7 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
 	if (sscanf(argv[1], "%llu", &start) != 1)
 		return -EINVAL;
 
-	if (dm_get_device(ti, argv[0], start, sc->stripe_width,
-			  dm_table_get_mode(ti->table),
+	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
 			  &sc->stripe[stripe].dev))
 		return -ENXIO;
 
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 7d70cca585ac..9924ea23032d 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -429,8 +429,7 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
  * it's already present.
  */
 static int __table_get_device(struct dm_table *t, struct dm_target *ti,
-			      const char *path, sector_t start, sector_t len,
-			      fmode_t mode, struct dm_dev **result)
+		      const char *path, fmode_t mode, struct dm_dev **result)
 {
 	int r;
 	dev_t uninitialized_var(dev);
@@ -527,11 +526,10 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
 }
 EXPORT_SYMBOL_GPL(dm_set_device_limits);
 
-int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
-		  sector_t len, fmode_t mode, struct dm_dev **result)
+int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
+		  struct dm_dev **result)
 {
-	return __table_get_device(ti->table, ti, path,
-				  start, len, mode, result);
+	return __table_get_device(ti->table, ti, path, mode, result);
 }
 
 
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index d4c9c0b88adc..1381cd97b4ed 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -118,10 +118,9 @@ struct dm_dev {
 /*
  * Constructors should call these functions to ensure destination devices
  * are opened/closed correctly.
- * FIXME: too many arguments.
  */
-int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
-		  sector_t len, fmode_t mode, struct dm_dev **result);
+int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
+						 struct dm_dev **result);
 void dm_put_device(struct dm_target *ti, struct dm_dev *d);
 
 /*
-- 
cgit v1.2.3


From 3abf85b5b5851b5f28d3d8a920ebb844edd08352 Mon Sep 17 00:00:00 2001
From: Peter Rajnoha <prajnoha@redhat.com>
Date: Sat, 6 Mar 2010 02:32:31 +0000
Subject: dm ioctl: introduce flag indicating uevent was generated

Set a new DM_UEVENT_GENERATED_FLAG when returning from ioctls to
indicate that a uevent was actually generated.  This tells the userspace
caller that it may need to wait for the event to be processed.

Signed-off-by: Peter Rajnoha <prajnoha@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-ioctl.c    | 19 ++++++++++++-------
 drivers/md/dm.c          |  7 ++++---
 drivers/md/dm.h          |  4 ++--
 include/linux/dm-ioctl.h |  9 +++++++--
 4 files changed, 25 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index e3cf5686d0aa..d7500e1c26f2 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -285,7 +285,8 @@ retry:
 	up_write(&_hash_lock);
 }
 
-static int dm_hash_rename(uint32_t cookie, const char *old, const char *new)
+static int dm_hash_rename(uint32_t cookie, uint32_t *flags, const char *old,
+			  const char *new)
 {
 	char *new_name, *old_name;
 	struct hash_cell *hc;
@@ -344,7 +345,8 @@ static int dm_hash_rename(uint32_t cookie, const char *old, const char *new)
 		dm_table_put(table);
 	}
 
-	dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie);
+	if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie))
+		*flags |= DM_UEVENT_GENERATED_FLAG;
 
 	dm_put(hc->md);
 	up_write(&_hash_lock);
@@ -736,10 +738,10 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
 	__hash_remove(hc);
 	up_write(&_hash_lock);
 
-	dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr);
+	if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr))
+		param->flags |= DM_UEVENT_GENERATED_FLAG;
 
 	dm_put(md);
-	param->data_size = 0;
 	return 0;
 }
 
@@ -773,7 +775,9 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
 		return r;
 
 	param->data_size = 0;
-	return dm_hash_rename(param->event_nr, param->name, new_name);
+
+	return dm_hash_rename(param->event_nr, &param->flags, param->name,
+			      new_name);
 }
 
 static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
@@ -899,8 +903,8 @@ static int do_resume(struct dm_ioctl *param)
 
 	if (dm_suspended_md(md)) {
 		r = dm_resume(md);
-		if (!r)
-			dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr);
+		if (!r && !dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr))
+			param->flags |= DM_UEVENT_GENERATED_FLAG;
 	}
 
 	if (old_map)
@@ -1477,6 +1481,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param)
 {
 	/* Always clear this flag */
 	param->flags &= ~DM_BUFFER_FULL_FLAG;
+	param->flags &= ~DM_UEVENT_GENERATED_FLAG;
 
 	/* Ignores parameters */
 	if (cmd == DM_REMOVE_ALL_CMD ||
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7199846364e9..d21e1284604f 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2618,18 +2618,19 @@ out:
 /*-----------------------------------------------------------------
  * Event notification.
  *---------------------------------------------------------------*/
-void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
+int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
 		       unsigned cookie)
 {
 	char udev_cookie[DM_COOKIE_LENGTH];
 	char *envp[] = { udev_cookie, NULL };
 
 	if (!cookie)
-		kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
+		return kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
 	else {
 		snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
 			 DM_COOKIE_ENV_VAR_NAME, cookie);
-		kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp);
+		return kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
+					  action, envp);
 	}
 }
 
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 8dadaa5bc396..bad1724d4869 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -125,8 +125,8 @@ void dm_stripe_exit(void);
 int dm_open_count(struct mapped_device *md);
 int dm_lock_for_deletion(struct mapped_device *md);
 
-void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
-		       unsigned cookie);
+int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
+		      unsigned cookie);
 
 int dm_io_init(void);
 void dm_io_exit(void);
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index aa95508d2f95..2c445e113790 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -266,9 +266,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	16
+#define DM_VERSION_MINOR	17
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2009-11-05)"
+#define DM_VERSION_EXTRA	"-ioctl (2010-03-05)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
@@ -316,4 +316,9 @@ enum {
  */
 #define DM_QUERY_INACTIVE_TABLE_FLAG	(1 << 12) /* In */
 
+/*
+ * If set, a uevent was generated for which the caller may need to wait.
+ */
+#define DM_UEVENT_GENERATED_FLAG	(1 << 13) /* Out */
+
 #endif				/* _LINUX_DM_IOCTL_H */
-- 
cgit v1.2.3


From 924e600d417ead9ef67043988055ba236f114718 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Sat, 6 Mar 2010 02:32:33 +0000
Subject: dm: eliminate some holes data structures

Eliminate a 4-byte hole in 'struct dm_io_memory' by moving 'offset' above the
'ptr' to which it applies (size reduced from 24 to 16 bytes).  And by
association, 1-4 byte hole is eliminated in 'struct dm_io_request' (size
reduced from 56 to 48 bytes).

Eliminate all 6 4-byte holes and 1 cache-line in 'struct dm_snapshot' (size
reduced from 392 to 368 bytes).

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-snap.c  | 26 +++++++++++++-------------
 include/linux/dm-io.h |  4 ++--
 2 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 0789c22ff0d4..54853773510c 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -83,10 +83,10 @@ struct dm_snapshot {
 	/* Whether or not owning mapped_device is suspended */
 	int suspended;
 
-	mempool_t *pending_pool;
-
 	atomic_t pending_exceptions_count;
 
+	mempool_t *pending_pool;
+
 	struct dm_exception_table pending;
 	struct dm_exception_table complete;
 
@@ -96,6 +96,11 @@ struct dm_snapshot {
 	 */
 	spinlock_t pe_lock;
 
+	/* Chunks with outstanding reads */
+	spinlock_t tracked_chunk_lock;
+	mempool_t *tracked_chunk_pool;
+	struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
+
 	/* The on disk metadata handler */
 	struct dm_exception_store *store;
 
@@ -105,10 +110,12 @@ struct dm_snapshot {
 	struct bio_list queued_bios;
 	struct work_struct queued_bios_work;
 
-	/* Chunks with outstanding reads */
-	mempool_t *tracked_chunk_pool;
-	spinlock_t tracked_chunk_lock;
-	struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
+	/* Wait for events based on state_bits */
+	unsigned long state_bits;
+
+	/* Range of chunks currently being merged. */
+	chunk_t first_merging_chunk;
+	int num_merging_chunks;
 
 	/*
 	 * The merge operation failed if this flag is set.
@@ -125,13 +132,6 @@ struct dm_snapshot {
 	 */
 	int merge_failed;
 
-	/* Wait for events based on state_bits */
-	unsigned long state_bits;
-
-	/* Range of chunks currently being merged. */
-	chunk_t first_merging_chunk;
-	int num_merging_chunks;
-
 	/*
 	 * Incoming bios that overlap with chunks being merged must wait
 	 * for them to be committed.
diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
index b6bf17ee2f61..5c9186b93fff 100644
--- a/include/linux/dm-io.h
+++ b/include/linux/dm-io.h
@@ -37,14 +37,14 @@ enum dm_io_mem_type {
 struct dm_io_memory {
 	enum dm_io_mem_type type;
 
+	unsigned offset;
+
 	union {
 		struct page_list *pl;
 		struct bio_vec *bvec;
 		void *vma;
 		void *addr;
 	} ptr;
-
-	unsigned offset;
 };
 
 struct dm_io_notify {
-- 
cgit v1.2.3


From 984b3f5746ed2cde3d184651dabf26980f2b66e5 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 5 Mar 2010 13:41:37 -0800
Subject: bitops: rename for_each_bit() to for_each_set_bit()

Rename for_each_bit to for_each_set_bit in the kernel source tree.  To
permit for_each_clear_bit(), should that ever be added.

The patch includes a macro to map the old for_each_bit() onto the new
for_each_set_bit().  This is a (very) temporary thing to ease the migration.

[akpm@linux-foundation.org: add temporary for_each_bit()]
Suggested-by: Alexey Dobriyan <adobriyan@gmail.com>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Artem Bityutskiy <dedekind@infradead.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/cpu/perf_event.c            |  2 +-
 arch/x86/kernel/cpu/perf_event_intel.c      |  2 +-
 drivers/dma/ioat/dma.c                      |  2 +-
 drivers/gpio/pl061.c                        |  2 +-
 drivers/gpio/timbgpio.c                     |  2 +-
 drivers/i2c/busses/i2c-designware.c         |  4 ++--
 drivers/mfd/htc-egpio.c                     |  2 +-
 drivers/misc/sgi-xp/xpnet.c                 |  2 +-
 drivers/net/gianfar.c                       | 12 ++++++------
 drivers/net/ixgbe/ixgbe_main.c              |  2 +-
 drivers/net/ixgbevf/ixgbevf_main.c          |  2 +-
 drivers/net/wireless/ath/ar9170/main.c      |  2 +-
 drivers/net/wireless/iwmc3200wifi/debugfs.c |  2 +-
 drivers/net/wireless/iwmc3200wifi/rx.c      |  2 +-
 fs/ocfs2/quota_local.c                      |  2 +-
 include/linux/bitops.h                      |  4 +++-
 kernel/sched_cpupri.c                       |  2 +-
 sound/soc/codecs/uda1380.c                  |  2 +-
 18 files changed, 26 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 641ccb9dddbc..b1fbdeecf6c9 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -676,7 +676,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 			if (c->weight != w)
 				continue;
 
-			for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
+			for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
 				if (!test_bit(j, used_mask))
 					break;
 			}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index cf6590cf4a5f..977e7544738c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -757,7 +757,7 @@ again:
 
 	inc_irq_stat(apic_perf_irqs);
 	ack = status;
-	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+	for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
 		struct perf_event *event = cpuc->events[bit];
 
 		clear_bit(bit, (unsigned long *) &status);
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 5d0e42b263df..af14c9a5b8d4 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -71,7 +71,7 @@ static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
 	}
 
 	attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
-	for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
+	for_each_set_bit(bit, &attnstatus, BITS_PER_LONG) {
 		chan = ioat_chan_by_index(instance, bit);
 		tasklet_schedule(&chan->cleanup_task);
 	}
diff --git a/drivers/gpio/pl061.c b/drivers/gpio/pl061.c
index 4ee4c8367a3f..3ad1eeb49609 100644
--- a/drivers/gpio/pl061.c
+++ b/drivers/gpio/pl061.c
@@ -219,7 +219,7 @@ static void pl061_irq_handler(unsigned irq, struct irq_desc *desc)
 		if (pending == 0)
 			continue;
 
-		for_each_bit(offset, &pending, PL061_GPIO_NR)
+		for_each_set_bit(offset, &pending, PL061_GPIO_NR)
 			generic_handle_irq(pl061_to_irq(&chip->gc, offset));
 	}
 	desc->chip->unmask(irq);
diff --git a/drivers/gpio/timbgpio.c b/drivers/gpio/timbgpio.c
index d941f45fe557..4ecba6e5a32d 100644
--- a/drivers/gpio/timbgpio.c
+++ b/drivers/gpio/timbgpio.c
@@ -175,7 +175,7 @@ static void timbgpio_irq(unsigned int irq, struct irq_desc *desc)
 	ipr = ioread32(tgpio->membase + TGPIO_IPR);
 	iowrite32(ipr, tgpio->membase + TGPIO_ICR);
 
-	for_each_bit(offset, &ipr, tgpio->gpio.ngpio)
+	for_each_set_bit(offset, &ipr, tgpio->gpio.ngpio)
 		generic_handle_irq(timbgpio_to_irq(&tgpio->gpio, offset));
 }
 
diff --git a/drivers/i2c/busses/i2c-designware.c b/drivers/i2c/busses/i2c-designware.c
index 9e18ef97f156..3e72b69aa7f8 100644
--- a/drivers/i2c/busses/i2c-designware.c
+++ b/drivers/i2c/busses/i2c-designware.c
@@ -497,13 +497,13 @@ static int i2c_dw_handle_tx_abort(struct dw_i2c_dev *dev)
 	int i;
 
 	if (abort_source & DW_IC_TX_ABRT_NOACK) {
-		for_each_bit(i, &abort_source, ARRAY_SIZE(abort_sources))
+		for_each_set_bit(i, &abort_source, ARRAY_SIZE(abort_sources))
 			dev_dbg(dev->dev,
 				"%s: %s\n", __func__, abort_sources[i]);
 		return -EREMOTEIO;
 	}
 
-	for_each_bit(i, &abort_source, ARRAY_SIZE(abort_sources))
+	for_each_set_bit(i, &abort_source, ARRAY_SIZE(abort_sources))
 		dev_err(dev->dev, "%s: %s\n", __func__, abort_sources[i]);
 
 	if (abort_source & DW_IC_TX_ARB_LOST)
diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c
index aa266e1f69b2..addb846c1e34 100644
--- a/drivers/mfd/htc-egpio.c
+++ b/drivers/mfd/htc-egpio.c
@@ -108,7 +108,7 @@ static void egpio_handler(unsigned int irq, struct irq_desc *desc)
 	ack_irqs(ei);
 	/* Process all set pins. */
 	readval &= ei->irqs_enabled;
-	for_each_bit(irqpin, &readval, ei->nirqs) {
+	for_each_set_bit(irqpin, &readval, ei->nirqs) {
 		/* Run irq handler */
 		pr_debug("got IRQ %d\n", irqpin);
 		irq = ei->irq_start + irqpin;
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 16f0abda1423..57b152f8d1b9 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -475,7 +475,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (skb->data[0] == 0xff) {
 		/* we are being asked to broadcast to all partitions */
-		for_each_bit(dest_partid, xpnet_broadcast_partitions,
+		for_each_set_bit(dest_partid, xpnet_broadcast_partitions,
 			     xp_max_npartitions) {
 
 			xpnet_send(skb, queued_msg, start_addr, end_addr,
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 6aa526ee9096..61a7b4351e78 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -998,7 +998,7 @@ static int gfar_probe(struct of_device *ofdev,
 	}
 
 	/* Need to reverse the bit maps as  bit_map's MSB is q0
-	 * but, for_each_bit parses from right to left, which
+	 * but, for_each_set_bit parses from right to left, which
 	 * basically reverses the queue numbers */
 	for (i = 0; i< priv->num_grps; i++) {
 		priv->gfargrp[i].tx_bit_map = reverse_bitmap(
@@ -1011,7 +1011,7 @@ static int gfar_probe(struct of_device *ofdev,
 	 * also assign queues to groups */
 	for (grp_idx = 0; grp_idx < priv->num_grps; grp_idx++) {
 		priv->gfargrp[grp_idx].num_rx_queues = 0x0;
-		for_each_bit(i, &priv->gfargrp[grp_idx].rx_bit_map,
+		for_each_set_bit(i, &priv->gfargrp[grp_idx].rx_bit_map,
 				priv->num_rx_queues) {
 			priv->gfargrp[grp_idx].num_rx_queues++;
 			priv->rx_queue[i]->grp = &priv->gfargrp[grp_idx];
@@ -1019,7 +1019,7 @@ static int gfar_probe(struct of_device *ofdev,
 			rqueue = rqueue | ((RQUEUE_EN0 | RQUEUE_EX0) >> i);
 		}
 		priv->gfargrp[grp_idx].num_tx_queues = 0x0;
-		for_each_bit (i, &priv->gfargrp[grp_idx].tx_bit_map,
+		for_each_set_bit(i, &priv->gfargrp[grp_idx].tx_bit_map,
 				priv->num_tx_queues) {
 			priv->gfargrp[grp_idx].num_tx_queues++;
 			priv->tx_queue[i]->grp = &priv->gfargrp[grp_idx];
@@ -1709,7 +1709,7 @@ void gfar_configure_coalescing(struct gfar_private *priv,
 
 	if (priv->mode == MQ_MG_MODE) {
 		baddr = &regs->txic0;
-		for_each_bit (i, &tx_mask, priv->num_tx_queues) {
+		for_each_set_bit(i, &tx_mask, priv->num_tx_queues) {
 			if (likely(priv->tx_queue[i]->txcoalescing)) {
 				gfar_write(baddr + i, 0);
 				gfar_write(baddr + i, priv->tx_queue[i]->txic);
@@ -1717,7 +1717,7 @@ void gfar_configure_coalescing(struct gfar_private *priv,
 		}
 
 		baddr = &regs->rxic0;
-		for_each_bit (i, &rx_mask, priv->num_rx_queues) {
+		for_each_set_bit(i, &rx_mask, priv->num_rx_queues) {
 			if (likely(priv->rx_queue[i]->rxcoalescing)) {
 				gfar_write(baddr + i, 0);
 				gfar_write(baddr + i, priv->rx_queue[i]->rxic);
@@ -2607,7 +2607,7 @@ static int gfar_poll(struct napi_struct *napi, int budget)
 		budget_per_queue = left_over_budget/num_queues;
 		left_over_budget = 0;
 
-		for_each_bit(i, &gfargrp->rx_bit_map, priv->num_rx_queues) {
+		for_each_set_bit(i, &gfargrp->rx_bit_map, priv->num_rx_queues) {
 			if (test_bit(i, &serviced_queues))
 				continue;
 			rx_queue = priv->rx_queue[i];
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 45e3532b166f..684af371462d 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -1050,7 +1050,7 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter)
 	 */
 	for (v_idx = 0; v_idx < q_vectors; v_idx++) {
 		q_vector = adapter->q_vector[v_idx];
-		/* XXX for_each_bit(...) */
+		/* XXX for_each_set_bit(...) */
 		r_idx = find_first_bit(q_vector->rxr_idx,
 		                       adapter->num_rx_queues);
 
diff --git a/drivers/net/ixgbevf/ixgbevf_main.c b/drivers/net/ixgbevf/ixgbevf_main.c
index 235b5fd4b8d4..ca653c49b765 100644
--- a/drivers/net/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ixgbevf/ixgbevf_main.c
@@ -751,7 +751,7 @@ static void ixgbevf_configure_msix(struct ixgbevf_adapter *adapter)
 	 */
 	for (v_idx = 0; v_idx < q_vectors; v_idx++) {
 		q_vector = adapter->q_vector[v_idx];
-		/* XXX for_each_bit(...) */
+		/* XXX for_each_set_bit(...) */
 		r_idx = find_first_bit(q_vector->rxr_idx,
 				       adapter->num_rx_queues);
 
diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c
index 8a964f130367..a6452af9c6c5 100644
--- a/drivers/net/wireless/ath/ar9170/main.c
+++ b/drivers/net/wireless/ath/ar9170/main.c
@@ -394,7 +394,7 @@ static void ar9170_tx_fake_ampdu_status(struct ar9170 *ar)
 		ieee80211_tx_status_irqsafe(ar->hw, skb);
 	}
 
-	for_each_bit(i, &queue_bitmap, BITS_PER_BYTE) {
+	for_each_set_bit(i, &queue_bitmap, BITS_PER_BYTE) {
 #ifdef AR9170_QUEUE_STOP_DEBUG
 		printk(KERN_DEBUG "%s: wake queue %d\n",
 		       wiphy_name(ar->hw->wiphy), i);
diff --git a/drivers/net/wireless/iwmc3200wifi/debugfs.c b/drivers/net/wireless/iwmc3200wifi/debugfs.c
index be992ca41cf1..c29c994de0e2 100644
--- a/drivers/net/wireless/iwmc3200wifi/debugfs.c
+++ b/drivers/net/wireless/iwmc3200wifi/debugfs.c
@@ -89,7 +89,7 @@ static int iwm_debugfs_dbg_modules_write(void *data, u64 val)
 	for (i = 0; i < __IWM_DM_NR; i++)
 		iwm->dbg.dbg_module[i] = 0;
 
-	for_each_bit(bit, &iwm->dbg.dbg_modules, __IWM_DM_NR)
+	for_each_set_bit(bit, &iwm->dbg.dbg_modules, __IWM_DM_NR)
 		iwm->dbg.dbg_module[bit] = iwm->dbg.dbg_level;
 
 	return 0;
diff --git a/drivers/net/wireless/iwmc3200wifi/rx.c b/drivers/net/wireless/iwmc3200wifi/rx.c
index ad8f7eabb5aa..8456b4dbd146 100644
--- a/drivers/net/wireless/iwmc3200wifi/rx.c
+++ b/drivers/net/wireless/iwmc3200wifi/rx.c
@@ -1116,7 +1116,7 @@ static int iwm_ntf_stop_resume_tx(struct iwm_priv *iwm, u8 *buf,
 		return -EINVAL;
 	}
 
-	for_each_bit(bit, (unsigned long *)&tid_msk, IWM_UMAC_TID_NR) {
+	for_each_set_bit(bit, (unsigned long *)&tid_msk, IWM_UMAC_TID_NR) {
 		tid_info = &sta_info->tid_info[bit];
 
 		mutex_lock(&tid_info->mutex);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 21f9e71223ca..a6467f3d262e 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -457,7 +457,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
 			break;
 		}
 		dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data;
-		for_each_bit(bit, rchunk->rc_bitmap, ol_chunk_entries(sb)) {
+		for_each_set_bit(bit, rchunk->rc_bitmap, ol_chunk_entries(sb)) {
 			qbh = NULL;
 			status = ocfs2_read_quota_block(lqinode,
 						ol_dqblk_block(sb, chunk, bit),
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 25b8b2f33ae9..b79389879238 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -16,11 +16,13 @@
  */
 #include <asm/bitops.h>
 
-#define for_each_bit(bit, addr, size) \
+#define for_each_set_bit(bit, addr, size) \
 	for ((bit) = find_first_bit((addr), (size)); \
 	     (bit) < (size); \
 	     (bit) = find_next_bit((addr), (size), (bit) + 1))
 
+/* Temporary */
+#define for_each_bit(bit, addr, size) for_each_set_bit(bit, addr, size)
 
 static __inline__ int get_bitmask_order(unsigned int count)
 {
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index eeb3506c4834..82095bf2099f 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -47,7 +47,7 @@ static int convert_prio(int prio)
 }
 
 #define for_each_cpupri_active(array, idx)                    \
-	for_each_bit(idx, array, CPUPRI_NR_PRIORITIES)
+	for_each_set_bit(idx, array, CPUPRI_NR_PRIORITIES)
 
 /**
  * cpupri_find - find the best (lowest-pri) CPU in the system
diff --git a/sound/soc/codecs/uda1380.c b/sound/soc/codecs/uda1380.c
index a2763c2e7348..9cd0a66b7663 100644
--- a/sound/soc/codecs/uda1380.c
+++ b/sound/soc/codecs/uda1380.c
@@ -137,7 +137,7 @@ static void uda1380_flush_work(struct work_struct *work)
 {
 	int bit, reg;
 
-	for_each_bit(bit, &uda1380_cache_dirty, UDA1380_CACHEREGNUM - 0x10) {
+	for_each_set_bit(bit, &uda1380_cache_dirty, UDA1380_CACHEREGNUM - 0x10) {
 		reg = 0x10 + bit;
 		pr_debug("uda1380: flush reg %x val %x:\n", reg,
 				uda1380_read_reg_cache(uda1380_codec, reg));
-- 
cgit v1.2.3


From d559db086ff5be9bcc259e5aa50bf3d881eaf1d1 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 5 Mar 2010 13:41:39 -0800
Subject: mm: clean up mm_counter

Presently, per-mm statistics counter is defined by macro in sched.h

This patch modifies it to
  - defined in mm.h as inlinf functions
  - use array instead of macro's name creation.

This patch is for reducing patch size in future patch to modify
implementation of per-mm counter.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c       |   4 +-
 include/linux/mm.h       | 104 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm_types.h |  33 ++++++++++-----
 include/linux/sched.h    |  54 ------------------------
 kernel/fork.c            |   3 +-
 kernel/tsacct.c          |   1 +
 mm/filemap_xip.c         |   2 +-
 mm/fremap.c              |   2 +-
 mm/memory.c              |  56 +++++++++++++++----------
 mm/oom_kill.c            |   4 +-
 mm/rmap.c                |  10 ++---
 mm/swapfile.c            |   2 +-
 12 files changed, 174 insertions(+), 101 deletions(-)

(limited to 'include')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f277c4a111cb..375581276011 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -65,11 +65,11 @@ unsigned long task_vsize(struct mm_struct *mm)
 int task_statm(struct mm_struct *mm, int *shared, int *text,
 	       int *data, int *resident)
 {
-	*shared = get_mm_counter(mm, file_rss);
+	*shared = get_mm_counter(mm, MM_FILEPAGES);
 	*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
 								>> PAGE_SHIFT;
 	*data = mm->total_vm - mm->shared_vm;
-	*resident = *shared + get_mm_counter(mm, anon_rss);
+	*resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
 	return mm->total_vm;
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 90957f14195c..2124cdb2d1d0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -870,6 +870,110 @@ extern int mprotect_fixup(struct vm_area_struct *vma,
  */
 int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			  struct page **pages);
+/*
+ * per-process(per-mm_struct) statistics.
+ */
+#if USE_SPLIT_PTLOCKS
+/*
+ * The mm counters are not protected by its page_table_lock,
+ * so must be incremented atomically.
+ */
+static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
+{
+	atomic_long_set(&mm->rss_stat.count[member], value);
+}
+
+static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
+{
+	return (unsigned long)atomic_long_read(&mm->rss_stat.count[member]);
+}
+
+static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
+{
+	atomic_long_add(value, &mm->rss_stat.count[member]);
+}
+
+static inline void inc_mm_counter(struct mm_struct *mm, int member)
+{
+	atomic_long_inc(&mm->rss_stat.count[member]);
+}
+
+static inline void dec_mm_counter(struct mm_struct *mm, int member)
+{
+	atomic_long_dec(&mm->rss_stat.count[member]);
+}
+
+#else  /* !USE_SPLIT_PTLOCKS */
+/*
+ * The mm counters are protected by its page_table_lock,
+ * so can be incremented directly.
+ */
+static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
+{
+	mm->rss_stat.count[member] = value;
+}
+
+static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
+{
+	return mm->rss_stat.count[member];
+}
+
+static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
+{
+	mm->rss_stat.count[member] += value;
+}
+
+static inline void inc_mm_counter(struct mm_struct *mm, int member)
+{
+	mm->rss_stat.count[member]++;
+}
+
+static inline void dec_mm_counter(struct mm_struct *mm, int member)
+{
+	mm->rss_stat.count[member]--;
+}
+
+#endif /* !USE_SPLIT_PTLOCKS */
+
+static inline unsigned long get_mm_rss(struct mm_struct *mm)
+{
+	return get_mm_counter(mm, MM_FILEPAGES) +
+		get_mm_counter(mm, MM_ANONPAGES);
+}
+
+static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
+{
+	return max(mm->hiwater_rss, get_mm_rss(mm));
+}
+
+static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)
+{
+	return max(mm->hiwater_vm, mm->total_vm);
+}
+
+static inline void update_hiwater_rss(struct mm_struct *mm)
+{
+	unsigned long _rss = get_mm_rss(mm);
+
+	if ((mm)->hiwater_rss < _rss)
+		(mm)->hiwater_rss = _rss;
+}
+
+static inline void update_hiwater_vm(struct mm_struct *mm)
+{
+	if (mm->hiwater_vm < mm->total_vm)
+		mm->hiwater_vm = mm->total_vm;
+}
+
+static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
+					 struct mm_struct *mm)
+{
+	unsigned long hiwater_rss = get_mm_hiwater_rss(mm);
+
+	if (*maxrss < hiwater_rss)
+		*maxrss = hiwater_rss;
+}
+
 
 /*
  * A callback you can register to apply pressure to ageable caches.
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 36f96271306c..e1ca64be6678 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -24,12 +24,6 @@ struct address_space;
 
 #define USE_SPLIT_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
 
-#if USE_SPLIT_PTLOCKS
-typedef atomic_long_t mm_counter_t;
-#else  /* !USE_SPLIT_PTLOCKS */
-typedef unsigned long mm_counter_t;
-#endif /* !USE_SPLIT_PTLOCKS */
-
 /*
  * Each physical page in the system has a struct page associated with
  * it to keep track of whatever it is we are using the page for at the
@@ -201,6 +195,22 @@ struct core_state {
 	struct completion startup;
 };
 
+enum {
+	MM_FILEPAGES,
+	MM_ANONPAGES,
+	NR_MM_COUNTERS
+};
+
+#if USE_SPLIT_PTLOCKS
+struct mm_rss_stat {
+	atomic_long_t count[NR_MM_COUNTERS];
+};
+#else  /* !USE_SPLIT_PTLOCKS */
+struct mm_rss_stat {
+	unsigned long count[NR_MM_COUNTERS];
+};
+#endif /* !USE_SPLIT_PTLOCKS */
+
 struct mm_struct {
 	struct vm_area_struct * mmap;		/* list of VMAs */
 	struct rb_root mm_rb;
@@ -227,11 +237,6 @@ struct mm_struct {
 						 * by mmlist_lock
 						 */
 
-	/* Special counters, in some configurations protected by the
-	 * page_table_lock, in other configurations by being atomic.
-	 */
-	mm_counter_t _file_rss;
-	mm_counter_t _anon_rss;
 
 	unsigned long hiwater_rss;	/* High-watermark of RSS usage */
 	unsigned long hiwater_vm;	/* High-water virtual memory usage */
@@ -244,6 +249,12 @@ struct mm_struct {
 
 	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
 
+	/*
+	 * Special counters, in some configurations protected by the
+	 * page_table_lock, in other configurations by being atomic.
+	 */
+	struct mm_rss_stat rss_stat;
+
 	struct linux_binfmt *binfmt;
 
 	cpumask_t cpu_vm_mask;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4b1753f7e48e..cbeafa49a53b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -396,60 +396,6 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 #endif
 
-#if USE_SPLIT_PTLOCKS
-/*
- * The mm counters are not protected by its page_table_lock,
- * so must be incremented atomically.
- */
-#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value)
-#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member))
-#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
-#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
-#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
-
-#else  /* !USE_SPLIT_PTLOCKS */
-/*
- * The mm counters are protected by its page_table_lock,
- * so can be incremented directly.
- */
-#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
-#define get_mm_counter(mm, member) ((mm)->_##member)
-#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
-#define inc_mm_counter(mm, member) (mm)->_##member++
-#define dec_mm_counter(mm, member) (mm)->_##member--
-
-#endif /* !USE_SPLIT_PTLOCKS */
-
-#define get_mm_rss(mm)					\
-	(get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
-#define update_hiwater_rss(mm)	do {			\
-	unsigned long _rss = get_mm_rss(mm);		\
-	if ((mm)->hiwater_rss < _rss)			\
-		(mm)->hiwater_rss = _rss;		\
-} while (0)
-#define update_hiwater_vm(mm)	do {			\
-	if ((mm)->hiwater_vm < (mm)->total_vm)		\
-		(mm)->hiwater_vm = (mm)->total_vm;	\
-} while (0)
-
-static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
-{
-	return max(mm->hiwater_rss, get_mm_rss(mm));
-}
-
-static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
-					 struct mm_struct *mm)
-{
-	unsigned long hiwater_rss = get_mm_hiwater_rss(mm);
-
-	if (*maxrss < hiwater_rss)
-		*maxrss = hiwater_rss;
-}
-
-static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)
-{
-	return max(mm->hiwater_vm, mm->total_vm);
-}
 
 extern void set_dumpable(struct mm_struct *mm, int value);
 extern int get_dumpable(struct mm_struct *mm);
diff --git a/kernel/fork.c b/kernel/fork.c
index 17bbf093356d..7616bcf107b9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -455,8 +455,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 		(current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
 	mm->core_state = NULL;
 	mm->nr_ptes = 0;
-	set_mm_counter(mm, file_rss, 0);
-	set_mm_counter(mm, anon_rss, 0);
+	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
 	spin_lock_init(&mm->page_table_lock);
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 00d59d048edf..0a67e041edf8 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -21,6 +21,7 @@
 #include <linux/tsacct_kern.h>
 #include <linux/acct.h>
 #include <linux/jiffies.h>
+#include <linux/mm.h>
 
 /*
  * fill in basic accounting fields
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 1888b2d71bb8..78b94f0b6d5d 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -194,7 +194,7 @@ retry:
 			flush_cache_page(vma, address, pte_pfn(*pte));
 			pteval = ptep_clear_flush_notify(vma, address, pte);
 			page_remove_rmap(page);
-			dec_mm_counter(mm, file_rss);
+			dec_mm_counter(mm, MM_FILEPAGES);
 			BUG_ON(pte_dirty(pteval));
 			pte_unmap_unlock(pte, ptl);
 			page_cache_release(page);
diff --git a/mm/fremap.c b/mm/fremap.c
index b6ec85abbb39..46f5dacf90a2 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -40,7 +40,7 @@ static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
 			page_remove_rmap(page);
 			page_cache_release(page);
 			update_hiwater_rss(mm);
-			dec_mm_counter(mm, file_rss);
+			dec_mm_counter(mm, MM_FILEPAGES);
 		}
 	} else {
 		if (!pte_file(pte))
diff --git a/mm/memory.c b/mm/memory.c
index 72fb5f39bccc..c57678478801 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -121,6 +121,7 @@ static int __init init_zero_pfn(void)
 }
 core_initcall(init_zero_pfn);
 
+
 /*
  * If a p?d_bad entry is found while walking page tables, report
  * the error, before resetting entry to p?d_none.  Usually (but
@@ -376,12 +377,18 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
 	return 0;
 }
 
-static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
+static inline void init_rss_vec(int *rss)
 {
-	if (file_rss)
-		add_mm_counter(mm, file_rss, file_rss);
-	if (anon_rss)
-		add_mm_counter(mm, anon_rss, anon_rss);
+	memset(rss, 0, sizeof(int) * NR_MM_COUNTERS);
+}
+
+static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss)
+{
+	int i;
+
+	for (i = 0; i < NR_MM_COUNTERS; i++)
+		if (rss[i])
+			add_mm_counter(mm, i, rss[i]);
 }
 
 /*
@@ -632,7 +639,10 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (page) {
 		get_page(page);
 		page_dup_rmap(page);
-		rss[PageAnon(page)]++;
+		if (PageAnon(page))
+			rss[MM_ANONPAGES]++;
+		else
+			rss[MM_FILEPAGES]++;
 	}
 
 out_set_pte:
@@ -648,11 +658,12 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	pte_t *src_pte, *dst_pte;
 	spinlock_t *src_ptl, *dst_ptl;
 	int progress = 0;
-	int rss[2];
+	int rss[NR_MM_COUNTERS];
 	swp_entry_t entry = (swp_entry_t){0};
 
 again:
-	rss[1] = rss[0] = 0;
+	init_rss_vec(rss);
+
 	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
 	if (!dst_pte)
 		return -ENOMEM;
@@ -688,7 +699,7 @@ again:
 	arch_leave_lazy_mmu_mode();
 	spin_unlock(src_ptl);
 	pte_unmap_nested(orig_src_pte);
-	add_mm_rss(dst_mm, rss[0], rss[1]);
+	add_mm_rss_vec(dst_mm, rss);
 	pte_unmap_unlock(orig_dst_pte, dst_ptl);
 	cond_resched();
 
@@ -816,8 +827,9 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 	struct mm_struct *mm = tlb->mm;
 	pte_t *pte;
 	spinlock_t *ptl;
-	int file_rss = 0;
-	int anon_rss = 0;
+	int rss[NR_MM_COUNTERS];
+
+	init_rss_vec(rss);
 
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	arch_enter_lazy_mmu_mode();
@@ -863,14 +875,14 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 				set_pte_at(mm, addr, pte,
 					   pgoff_to_pte(page->index));
 			if (PageAnon(page))
-				anon_rss--;
+				rss[MM_ANONPAGES]--;
 			else {
 				if (pte_dirty(ptent))
 					set_page_dirty(page);
 				if (pte_young(ptent) &&
 				    likely(!VM_SequentialReadHint(vma)))
 					mark_page_accessed(page);
-				file_rss--;
+				rss[MM_FILEPAGES]--;
 			}
 			page_remove_rmap(page);
 			if (unlikely(page_mapcount(page) < 0))
@@ -893,7 +905,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 		pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
 	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
 
-	add_mm_rss(mm, file_rss, anon_rss);
+	add_mm_rss_vec(mm, rss);
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
 
@@ -1527,7 +1539,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
 
 	/* Ok, finally just insert the thing.. */
 	get_page(page);
-	inc_mm_counter(mm, file_rss);
+	inc_mm_counter(mm, MM_FILEPAGES);
 	page_add_file_rmap(page);
 	set_pte_at(mm, addr, pte, mk_pte(page, prot));
 
@@ -2163,11 +2175,11 @@ gotten:
 	if (likely(pte_same(*page_table, orig_pte))) {
 		if (old_page) {
 			if (!PageAnon(old_page)) {
-				dec_mm_counter(mm, file_rss);
-				inc_mm_counter(mm, anon_rss);
+				dec_mm_counter(mm, MM_FILEPAGES);
+				inc_mm_counter(mm, MM_ANONPAGES);
 			}
 		} else
-			inc_mm_counter(mm, anon_rss);
+			inc_mm_counter(mm, MM_ANONPAGES);
 		flush_cache_page(vma, address, pte_pfn(orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -2604,7 +2616,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * discarded at swap_free().
 	 */
 
-	inc_mm_counter(mm, anon_rss);
+	inc_mm_counter(mm, MM_ANONPAGES);
 	pte = mk_pte(page, vma->vm_page_prot);
 	if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -2688,7 +2700,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (!pte_none(*page_table))
 		goto release;
 
-	inc_mm_counter(mm, anon_rss);
+	inc_mm_counter(mm, MM_ANONPAGES);
 	page_add_new_anon_rmap(page, vma, address);
 setpte:
 	set_pte_at(mm, address, page_table, entry);
@@ -2842,10 +2854,10 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		if (flags & FAULT_FLAG_WRITE)
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 		if (anon) {
-			inc_mm_counter(mm, anon_rss);
+			inc_mm_counter(mm, MM_ANONPAGES);
 			page_add_new_anon_rmap(page, vma, address);
 		} else {
-			inc_mm_counter(mm, file_rss);
+			inc_mm_counter(mm, MM_FILEPAGES);
 			page_add_file_rmap(page);
 			if (flags & FAULT_FLAG_WRITE) {
 				dirty_page = page;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 237050478f28..35755a4156d6 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -401,8 +401,8 @@ static void __oom_kill_task(struct task_struct *p, int verbose)
 		       "vsz:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
 		       task_pid_nr(p), p->comm,
 		       K(p->mm->total_vm),
-		       K(get_mm_counter(p->mm, anon_rss)),
-		       K(get_mm_counter(p->mm, file_rss)));
+		       K(get_mm_counter(p->mm, MM_ANONPAGES)),
+		       K(get_mm_counter(p->mm, MM_FILEPAGES)));
 	task_unlock(p);
 
 	/*
diff --git a/mm/rmap.c b/mm/rmap.c
index 278cd277bdec..73d0472884c2 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -815,9 +815,9 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 
 	if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
 		if (PageAnon(page))
-			dec_mm_counter(mm, anon_rss);
+			dec_mm_counter(mm, MM_ANONPAGES);
 		else
-			dec_mm_counter(mm, file_rss);
+			dec_mm_counter(mm, MM_FILEPAGES);
 		set_pte_at(mm, address, pte,
 				swp_entry_to_pte(make_hwpoison_entry(page)));
 	} else if (PageAnon(page)) {
@@ -839,7 +839,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 					list_add(&mm->mmlist, &init_mm.mmlist);
 				spin_unlock(&mmlist_lock);
 			}
-			dec_mm_counter(mm, anon_rss);
+			dec_mm_counter(mm, MM_ANONPAGES);
 		} else if (PAGE_MIGRATION) {
 			/*
 			 * Store the pfn of the page in a special migration
@@ -857,7 +857,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 		entry = make_migration_entry(page, pte_write(pteval));
 		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
 	} else
-		dec_mm_counter(mm, file_rss);
+		dec_mm_counter(mm, MM_FILEPAGES);
 
 	page_remove_rmap(page);
 	page_cache_release(page);
@@ -996,7 +996,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
 
 		page_remove_rmap(page);
 		page_cache_release(page);
-		dec_mm_counter(mm, file_rss);
+		dec_mm_counter(mm, MM_FILEPAGES);
 		(*mapcount)--;
 	}
 	pte_unmap_unlock(pte - 1, ptl);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6c0585b16418..893984946a2c 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -840,7 +840,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 		goto out;
 	}
 
-	inc_mm_counter(vma->vm_mm, anon_rss);
+	inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
 	get_page(page);
 	set_pte_at(vma->vm_mm, addr, pte,
 		   pte_mkold(mk_pte(page, vma->vm_page_prot)));
-- 
cgit v1.2.3


From 34e55232e59f7b19050267a05ff1226e5cd122a5 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 5 Mar 2010 13:41:40 -0800
Subject: mm: avoid false sharing of mm_counter

Considering the nature of per mm stats, it's the shared object among
threads and can be a cache-miss point in the page fault path.

This patch adds per-thread cache for mm_counter.  RSS value will be
counted into a struct in task_struct and synchronized with mm's one at
events.

Now, in this patch, the event is the number of calls to handle_mm_fault.
Per-thread value is added to mm at each 64 calls.

 rough estimation with small benchmark on parallel thread (2threads) shows
 [before]
     4.5 cache-miss/faults
 [after]
     4.0 cache-miss/faults
 Anyway, the most contended object is mmap_sem if the number of threads grows.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt |  6 +++
 fs/exec.c                          |  1 +
 include/linux/mm.h                 |  8 ++--
 include/linux/mm_types.h           |  6 +++
 include/linux/sched.h              |  4 +-
 kernel/exit.c                      |  3 +-
 mm/memory.c                        | 94 ++++++++++++++++++++++++++++++++++----
 7 files changed, 107 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 0d07513a67a6..e418f3d8f427 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -188,6 +188,12 @@ memory usage. Its seven fields are explained in Table 1-3.  The stat file
 contains details information about the process itself.  Its fields are
 explained in Table 1-4.
 
+(for SMP CONFIG users)
+For making accounting scalable, RSS related information are handled in
+asynchronous manner and the vaule may not be very precise. To see a precise
+snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table.
+It's slow but very precise.
+
 Table 1-2: Contents of the statm files (as of 2.6.30-rc7)
 ..............................................................................
  Field                       Content
diff --git a/fs/exec.c b/fs/exec.c
index cce6bbdbdbb1..ea7861727efd 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -718,6 +718,7 @@ static int exec_mmap(struct mm_struct *mm)
 	/* Notify parent that we're no longer interested in the old VM */
 	tsk = current;
 	old_mm = current->mm;
+	sync_mm_rss(tsk, old_mm);
 	mm_release(tsk, old_mm);
 
 	if (old_mm) {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2124cdb2d1d0..8e580c07d171 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -873,7 +873,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 /*
  * per-process(per-mm_struct) statistics.
  */
-#if USE_SPLIT_PTLOCKS
+#if defined(SPLIT_RSS_COUNTING)
 /*
  * The mm counters are not protected by its page_table_lock,
  * so must be incremented atomically.
@@ -883,10 +883,7 @@ static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
 	atomic_long_set(&mm->rss_stat.count[member], value);
 }
 
-static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
-{
-	return (unsigned long)atomic_long_read(&mm->rss_stat.count[member]);
-}
+unsigned long get_mm_counter(struct mm_struct *mm, int member);
 
 static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
 {
@@ -974,6 +971,7 @@ static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
 		*maxrss = hiwater_rss;
 }
 
+void sync_mm_rss(struct task_struct *task, struct mm_struct *mm);
 
 /*
  * A callback you can register to apply pressure to ageable caches.
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e1ca64be6678..21861239ab0c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -202,9 +202,15 @@ enum {
 };
 
 #if USE_SPLIT_PTLOCKS
+#define SPLIT_RSS_COUNTING
 struct mm_rss_stat {
 	atomic_long_t count[NR_MM_COUNTERS];
 };
+/* per-thread cached information, */
+struct task_rss_stat {
+	int events;	/* for synchronization threshold */
+	int count[NR_MM_COUNTERS];
+};
 #else  /* !USE_SPLIT_PTLOCKS */
 struct mm_rss_stat {
 	unsigned long count[NR_MM_COUNTERS];
diff --git a/include/linux/sched.h b/include/linux/sched.h
index cbeafa49a53b..46c6f8d5dc06 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1220,7 +1220,9 @@ struct task_struct {
 	struct plist_node pushable_tasks;
 
 	struct mm_struct *mm, *active_mm;
-
+#if defined(SPLIT_RSS_COUNTING)
+	struct task_rss_stat	rss_stat;
+#endif
 /* task state */
 	int exit_state;
 	int exit_code, exit_signal;
diff --git a/kernel/exit.c b/kernel/exit.c
index 45ed043b8bf5..10d3c5d5ae44 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -952,7 +952,8 @@ NORET_TYPE void do_exit(long code)
 				preempt_count());
 
 	acct_update_integrals(tsk);
-
+	/* sync mm's RSS info before statistics gathering */
+	sync_mm_rss(tsk, tsk->mm);
 	group_dead = atomic_dec_and_test(&tsk->signal->live);
 	if (group_dead) {
 		hrtimer_cancel(&tsk->signal->real_timer);
diff --git a/mm/memory.c b/mm/memory.c
index c57678478801..a4597614f18d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -122,6 +122,79 @@ static int __init init_zero_pfn(void)
 core_initcall(init_zero_pfn);
 
 
+#if defined(SPLIT_RSS_COUNTING)
+
+void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm)
+{
+	int i;
+
+	for (i = 0; i < NR_MM_COUNTERS; i++) {
+		if (task->rss_stat.count[i]) {
+			add_mm_counter(mm, i, task->rss_stat.count[i]);
+			task->rss_stat.count[i] = 0;
+		}
+	}
+	task->rss_stat.events = 0;
+}
+
+static void add_mm_counter_fast(struct mm_struct *mm, int member, int val)
+{
+	struct task_struct *task = current;
+
+	if (likely(task->mm == mm))
+		task->rss_stat.count[member] += val;
+	else
+		add_mm_counter(mm, member, val);
+}
+#define inc_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, 1)
+#define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1)
+
+/* sync counter once per 64 page faults */
+#define TASK_RSS_EVENTS_THRESH	(64)
+static void check_sync_rss_stat(struct task_struct *task)
+{
+	if (unlikely(task != current))
+		return;
+	if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH))
+		__sync_task_rss_stat(task, task->mm);
+}
+
+unsigned long get_mm_counter(struct mm_struct *mm, int member)
+{
+	long val = 0;
+
+	/*
+	 * Don't use task->mm here...for avoiding to use task_get_mm()..
+	 * The caller must guarantee task->mm is not invalid.
+	 */
+	val = atomic_long_read(&mm->rss_stat.count[member]);
+	/*
+	 * counter is updated in asynchronous manner and may go to minus.
+	 * But it's never be expected number for users.
+	 */
+	if (val < 0)
+		return 0;
+	return (unsigned long)val;
+}
+
+void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
+{
+	__sync_task_rss_stat(task, mm);
+}
+#else
+
+#define inc_mm_counter_fast(mm, member) inc_mm_counter(mm, member)
+#define dec_mm_counter_fast(mm, member) dec_mm_counter(mm, member)
+
+static void check_sync_rss_stat(struct task_struct *task)
+{
+}
+
+void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
+{
+}
+#endif
+
 /*
  * If a p?d_bad entry is found while walking page tables, report
  * the error, before resetting entry to p?d_none.  Usually (but
@@ -386,6 +459,8 @@ static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss)
 {
 	int i;
 
+	if (current->mm == mm)
+		sync_mm_rss(current, mm);
 	for (i = 0; i < NR_MM_COUNTERS; i++)
 		if (rss[i])
 			add_mm_counter(mm, i, rss[i]);
@@ -1539,7 +1614,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
 
 	/* Ok, finally just insert the thing.. */
 	get_page(page);
-	inc_mm_counter(mm, MM_FILEPAGES);
+	inc_mm_counter_fast(mm, MM_FILEPAGES);
 	page_add_file_rmap(page);
 	set_pte_at(mm, addr, pte, mk_pte(page, prot));
 
@@ -2175,11 +2250,11 @@ gotten:
 	if (likely(pte_same(*page_table, orig_pte))) {
 		if (old_page) {
 			if (!PageAnon(old_page)) {
-				dec_mm_counter(mm, MM_FILEPAGES);
-				inc_mm_counter(mm, MM_ANONPAGES);
+				dec_mm_counter_fast(mm, MM_FILEPAGES);
+				inc_mm_counter_fast(mm, MM_ANONPAGES);
 			}
 		} else
-			inc_mm_counter(mm, MM_ANONPAGES);
+			inc_mm_counter_fast(mm, MM_ANONPAGES);
 		flush_cache_page(vma, address, pte_pfn(orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -2616,7 +2691,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * discarded at swap_free().
 	 */
 
-	inc_mm_counter(mm, MM_ANONPAGES);
+	inc_mm_counter_fast(mm, MM_ANONPAGES);
 	pte = mk_pte(page, vma->vm_page_prot);
 	if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -2700,7 +2775,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (!pte_none(*page_table))
 		goto release;
 
-	inc_mm_counter(mm, MM_ANONPAGES);
+	inc_mm_counter_fast(mm, MM_ANONPAGES);
 	page_add_new_anon_rmap(page, vma, address);
 setpte:
 	set_pte_at(mm, address, page_table, entry);
@@ -2854,10 +2929,10 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		if (flags & FAULT_FLAG_WRITE)
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 		if (anon) {
-			inc_mm_counter(mm, MM_ANONPAGES);
+			inc_mm_counter_fast(mm, MM_ANONPAGES);
 			page_add_new_anon_rmap(page, vma, address);
 		} else {
-			inc_mm_counter(mm, MM_FILEPAGES);
+			inc_mm_counter_fast(mm, MM_FILEPAGES);
 			page_add_file_rmap(page);
 			if (flags & FAULT_FLAG_WRITE) {
 				dirty_page = page;
@@ -3035,6 +3110,9 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	count_vm_event(PGFAULT);
 
+	/* do counter updates before entering really critical section. */
+	check_sync_rss_stat(current);
+
 	if (unlikely(is_vm_hugetlb_page(vma)))
 		return hugetlb_fault(mm, vma, address, flags);
 
-- 
cgit v1.2.3


From b084d4353ff99d824d3bc5a5c2c22c70b1fba722 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Fri, 5 Mar 2010 13:41:42 -0800
Subject: mm: count swap usage

A frequent questions from users about memory management is what numbers of
swap ents are user for processes.  And this information will give some
hints to oom-killer.

Besides we can count the number of swapents per a process by scanning
/proc/<pid>/smaps, this is very slow and not good for usual process
information handler which works like 'ps' or 'top'.  (ps or top is now
enough slow..)

This patch adds a counter of swapents to mm_counter and update is at each
swap events.  Information is exported via /proc/<pid>/status file as

[kamezawa@bluextal memory]$ cat /proc/self/status
Name:   cat
State:  R (running)
Tgid:   2910
Pid:    2910
PPid:   2823
TracerPid:      0
Uid:    500     500     500     500
Gid:    500     500     500     500
FDSize: 256
Groups: 500
VmPeak:    82696 kB
VmSize:    82696 kB
VmLck:         0 kB
VmHWM:       432 kB
VmRSS:       432 kB
VmData:      172 kB
VmStk:        84 kB
VmExe:        48 kB
VmLib:      1568 kB
VmPTE:        40 kB
VmSwap:        0 kB <=============== this.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt |  2 ++
 fs/proc/task_mmu.c                 |  9 ++++++---
 include/linux/mm_types.h           |  1 +
 mm/memory.c                        | 16 ++++++++++++----
 mm/rmap.c                          |  1 +
 mm/swapfile.c                      |  1 +
 6 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index e418f3d8f427..b5c5fc657a88 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -164,6 +164,7 @@ read the file /proc/PID/status:
   VmExe:        68 kB
   VmLib:      1412 kB
   VmPTE:        20 kb
+  VmSwap:        0 kB
   Threads:        1
   SigQ:   0/28578
   SigPnd: 0000000000000000
@@ -219,6 +220,7 @@ Table 1-2: Contents of the statm files (as of 2.6.30-rc7)
  VmExe                       size of text segment
  VmLib                       size of shared library code
  VmPTE                       size of page table entries
+ VmSwap                      size of swap usage (the number of referred swapents)
  Threads                     number of threads
  SigQ                        number of signals queued/max. number for queue
  SigPnd                      bitmap of pending signals for the thread
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 375581276011..183f8ff5f400 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -16,7 +16,7 @@
 
 void task_mem(struct seq_file *m, struct mm_struct *mm)
 {
-	unsigned long data, text, lib;
+	unsigned long data, text, lib, swap;
 	unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
 
 	/*
@@ -36,6 +36,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 	data = mm->total_vm - mm->shared_vm - mm->stack_vm;
 	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
 	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
+	swap = get_mm_counter(mm, MM_SWAPENTS);
 	seq_printf(m,
 		"VmPeak:\t%8lu kB\n"
 		"VmSize:\t%8lu kB\n"
@@ -46,7 +47,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 		"VmStk:\t%8lu kB\n"
 		"VmExe:\t%8lu kB\n"
 		"VmLib:\t%8lu kB\n"
-		"VmPTE:\t%8lu kB\n",
+		"VmPTE:\t%8lu kB\n"
+		"VmSwap:\t%8lu kB\n",
 		hiwater_vm << (PAGE_SHIFT-10),
 		(total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
 		mm->locked_vm << (PAGE_SHIFT-10),
@@ -54,7 +56,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 		total_rss << (PAGE_SHIFT-10),
 		data << (PAGE_SHIFT-10),
 		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
-		(PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10);
+		(PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10,
+		swap << (PAGE_SHIFT-10));
 }
 
 unsigned long task_vsize(struct mm_struct *mm)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 21861239ab0c..19549d7275ab 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -198,6 +198,7 @@ struct core_state {
 enum {
 	MM_FILEPAGES,
 	MM_ANONPAGES,
+	MM_SWAPENTS,
 	NR_MM_COUNTERS
 };
 
diff --git a/mm/memory.c b/mm/memory.c
index a4597614f18d..77d9f840936b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -679,7 +679,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 						 &src_mm->mmlist);
 				spin_unlock(&mmlist_lock);
 			}
-			if (is_write_migration_entry(entry) &&
+			if (likely(!non_swap_entry(entry)))
+				rss[MM_SWAPENTS]++;
+			else if (is_write_migration_entry(entry) &&
 					is_cow_mapping(vm_flags)) {
 				/*
 				 * COW mappings require pages in both parent
@@ -974,9 +976,14 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 		if (pte_file(ptent)) {
 			if (unlikely(!(vma->vm_flags & VM_NONLINEAR)))
 				print_bad_pte(vma, addr, ptent, NULL);
-		} else if
-		  (unlikely(!free_swap_and_cache(pte_to_swp_entry(ptent))))
-			print_bad_pte(vma, addr, ptent, NULL);
+		} else {
+			swp_entry_t entry = pte_to_swp_entry(ptent);
+
+			if (!non_swap_entry(entry))
+				rss[MM_SWAPENTS]--;
+			if (unlikely(!free_swap_and_cache(entry)))
+				print_bad_pte(vma, addr, ptent, NULL);
+		}
 		pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
 	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
 
@@ -2692,6 +2699,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 */
 
 	inc_mm_counter_fast(mm, MM_ANONPAGES);
+	dec_mm_counter_fast(mm, MM_SWAPENTS);
 	pte = mk_pte(page, vma->vm_page_prot);
 	if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
diff --git a/mm/rmap.c b/mm/rmap.c
index 73d0472884c2..5cb47111f79e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -840,6 +840,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 				spin_unlock(&mmlist_lock);
 			}
 			dec_mm_counter(mm, MM_ANONPAGES);
+			inc_mm_counter(mm, MM_SWAPENTS);
 		} else if (PAGE_MIGRATION) {
 			/*
 			 * Store the pfn of the page in a special migration
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 893984946a2c..187a21f8b7bd 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -840,6 +840,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 		goto out;
 	}
 
+	dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
 	inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
 	get_page(page);
 	set_pte_at(vma->vm_mm, addr, pte,
-- 
cgit v1.2.3


From fc91668eaf9e7ba61e867fc2218b7e9fb67faa4f Mon Sep 17 00:00:00 2001
From: Li Hong <lihong.hi@gmail.com>
Date: Fri, 5 Mar 2010 13:41:54 -0800
Subject: mm: remove free_hot_page()

free_hot_page() is just a wrapper around free_hot_cold_page() with
parameter 'cold = 0'.  After adding a clear comment for
free_hot_cold_page(), it is reasonable to remove a level of call.

[akpm@linux-foundation.org: fix build]
Signed-off-by: Li Hong <lihong.hi@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Ming Chun <macli@brc.ubc.ca>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Americo Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h |  2 +-
 mm/page_alloc.c     | 10 +++-------
 mm/swap.c           |  2 +-
 3 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 557bdad320b6..e5567e6762f3 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -325,7 +325,7 @@ void free_pages_exact(void *virt, size_t size);
 
 extern void __free_pages(struct page *page, unsigned int order);
 extern void free_pages(unsigned long addr, unsigned int order);
-extern void free_hot_page(struct page *page);
+extern void free_hot_cold_page(struct page *page, int cold);
 
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr),0)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index caa7df60a4a1..80bcee0c5034 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1074,8 +1074,9 @@ void mark_free_pages(struct zone *zone)
 
 /*
  * Free a 0-order page
+ * cold == 1 ? free a cold page : free a hot page
  */
-static void free_hot_cold_page(struct page *page, int cold)
+void free_hot_cold_page(struct page *page, int cold)
 {
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
@@ -1135,11 +1136,6 @@ out:
 	local_irq_restore(flags);
 }
 
-void free_hot_page(struct page *page)
-{
-	free_hot_cold_page(page, 0);
-}
-	
 /*
  * split_page takes a non-compound higher-order page, and splits it into
  * n (1<<order) sub-pages: page[0..n]
@@ -2010,7 +2006,7 @@ void __free_pages(struct page *page, unsigned int order)
 {
 	if (put_page_testzero(page)) {
 		if (order == 0)
-			free_hot_page(page);
+			free_hot_cold_page(page, 0);
 		else
 			__free_pages_ok(page, order);
 	}
diff --git a/mm/swap.c b/mm/swap.c
index 308e57d8d7ed..9036b89813ac 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -55,7 +55,7 @@ static void __page_cache_release(struct page *page)
 		del_page_from_lru(zone, page);
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
-	free_hot_page(page);
+	free_hot_cold_page(page, 0);
 }
 
 static void put_compound_page(struct page *page)
-- 
cgit v1.2.3


From 93e4a89a8c987189b168a530a331ef6d0fcf07a7 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Fri, 5 Mar 2010 13:41:55 -0800
Subject: mm: restore zone->all_unreclaimable to independence word

commit e815af95 ("change all_unreclaimable zone member to flags") changed
all_unreclaimable member to bit flag.  But it had an undesireble side
effect.  free_one_page() is one of most hot path in linux kernel and
increasing atomic ops in it can reduce kernel performance a bit.

Thus, this patch revert such commit partially. at least
all_unreclaimable shouldn't share memory word with other zone flags.

[akpm@linux-foundation.org: fix patch interaction]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Huang Shijie <shijie8@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  7 +------
 mm/page_alloc.c        |  6 +++---
 mm/vmscan.c            | 22 +++++++++-------------
 mm/vmstat.c            |  2 +-
 4 files changed, 14 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a01a103341bd..bc209d8b7b5c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -306,6 +306,7 @@ struct zone {
 	 * free areas of different sizes
 	 */
 	spinlock_t		lock;
+	int                     all_unreclaimable; /* All pages pinned */
 #ifdef CONFIG_MEMORY_HOTPLUG
 	/* see spanned/present_pages for more description */
 	seqlock_t		span_seqlock;
@@ -417,7 +418,6 @@ struct zone {
 } ____cacheline_internodealigned_in_smp;
 
 typedef enum {
-	ZONE_ALL_UNRECLAIMABLE,		/* all pages pinned */
 	ZONE_RECLAIM_LOCKED,		/* prevents concurrent reclaim */
 	ZONE_OOM_LOCKED,		/* zone is in OOM killer zonelist */
 } zone_flags_t;
@@ -437,11 +437,6 @@ static inline void zone_clear_flag(struct zone *zone, zone_flags_t flag)
 	clear_bit(flag, &zone->flags);
 }
 
-static inline int zone_is_all_unreclaimable(const struct zone *zone)
-{
-	return test_bit(ZONE_ALL_UNRECLAIMABLE, &zone->flags);
-}
-
 static inline int zone_is_reclaim_locked(const struct zone *zone)
 {
 	return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 80bcee0c5034..0734bedabd9c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -530,7 +530,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 	int batch_free = 0;
 
 	spin_lock(&zone->lock);
-	zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
+	zone->all_unreclaimable = 0;
 	zone->pages_scanned = 0;
 
 	__mod_zone_page_state(zone, NR_FREE_PAGES, count);
@@ -568,7 +568,7 @@ static void free_one_page(struct zone *zone, struct page *page, int order,
 				int migratetype)
 {
 	spin_lock(&zone->lock);
-	zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
+	zone->all_unreclaimable = 0;
 	zone->pages_scanned = 0;
 
 	__mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
@@ -2262,7 +2262,7 @@ void show_free_areas(void)
 			K(zone_page_state(zone, NR_BOUNCE)),
 			K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
 			zone->pages_scanned,
-			(zone_is_all_unreclaimable(zone) ? "yes" : "no")
+			(zone->all_unreclaimable ? "yes" : "no")
 			);
 		printk("lowmem_reserve[]:");
 		for (i = 0; i < MAX_NR_ZONES; i++)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bc0f8db8340f..5cbf64dd79c1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1699,8 +1699,7 @@ static void shrink_zones(int priority, struct zonelist *zonelist,
 				continue;
 			note_zone_scanning_priority(zone, priority);
 
-			if (zone_is_all_unreclaimable(zone) &&
-						priority != DEF_PRIORITY)
+			if (zone->all_unreclaimable && priority != DEF_PRIORITY)
 				continue;	/* Let kswapd poll it */
 			sc->all_unreclaimable = 0;
 		} else {
@@ -1927,7 +1926,7 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
 		if (!populated_zone(zone))
 			continue;
 
-		if (zone_is_all_unreclaimable(zone))
+		if (zone->all_unreclaimable)
 			continue;
 
 		if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
@@ -2017,8 +2016,7 @@ loop_again:
 			if (!populated_zone(zone))
 				continue;
 
-			if (zone_is_all_unreclaimable(zone) &&
-			    priority != DEF_PRIORITY)
+			if (zone->all_unreclaimable && priority != DEF_PRIORITY)
 				continue;
 
 			/*
@@ -2061,8 +2059,7 @@ loop_again:
 			if (!populated_zone(zone))
 				continue;
 
-			if (zone_is_all_unreclaimable(zone) &&
-					priority != DEF_PRIORITY)
+			if (zone->all_unreclaimable && priority != DEF_PRIORITY)
 				continue;
 
 			temp_priority[i] = priority;
@@ -2089,12 +2086,11 @@ loop_again:
 						lru_pages);
 			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
 			total_scanned += sc.nr_scanned;
-			if (zone_is_all_unreclaimable(zone))
+			if (zone->all_unreclaimable)
 				continue;
-			if (nr_slab == 0 && zone->pages_scanned >=
-					(zone_reclaimable_pages(zone) * 6))
-					zone_set_flag(zone,
-						      ZONE_ALL_UNRECLAIMABLE);
+			if (nr_slab == 0 &&
+			    zone->pages_scanned >= (zone_reclaimable_pages(zone) * 6))
+				zone->all_unreclaimable = 1;
 			/*
 			 * If we've done a decent amount of scanning and
 			 * the reclaim ratio is low, start doing writepage
@@ -2624,7 +2620,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	    zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages)
 		return ZONE_RECLAIM_FULL;
 
-	if (zone_is_all_unreclaimable(zone))
+	if (zone->all_unreclaimable)
 		return ZONE_RECLAIM_FULL;
 
 	/*
diff --git a/mm/vmstat.c b/mm/vmstat.c
index fc5aa183bc45..7f760cbc73f3 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -763,7 +763,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 		   "\n  prev_priority:     %i"
 		   "\n  start_pfn:         %lu"
 		   "\n  inactive_ratio:    %u",
-			   zone_is_all_unreclaimable(zone),
+		   zone->all_unreclaimable,
 		   zone->prev_priority,
 		   zone->zone_start_pfn,
 		   zone->inactive_ratio);
-- 
cgit v1.2.3


From d96ae5309165d9ed7c008a178238977b73595cd9 Mon Sep 17 00:00:00 2001
From: "akpm@linux-foundation.org" <akpm@linux-foundation.org>
Date: Fri, 5 Mar 2010 13:41:58 -0800
Subject: memory-hotplug: create /sys/firmware/memmap entry for new memory

A memmap is a directory in sysfs which includes 3 text files: start, end
and type.  For example:

start: 	0x100000
end:	0x7e7b1cff
type:	System RAM

Interface firmware_map_add was not called explicitly.  Remove it and add
function firmware_map_add_hotplug as hotplug interface of memmap.

Each memory entry has a memmap in sysfs, When we hot-add new memory, sysfs
does not export memmap entry for it.  We add a call in function add_memory
to function firmware_map_add_hotplug.

Add a new function add_sysfs_fw_map_entry() to create memmap entry, it
will be called when initialize memmap and hot-add memory.

[akpm@linux-foundation.org: un-kernedoc a no longer kerneldoc comment]
Signed-off-by: Shaohui Zheng <shaohui.zheng@intel.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/firmware/memmap.c    | 57 ++++++++++++++++++++++++++++----------------
 include/linux/firmware-map.h |  6 ++---
 mm/memory_hotplug.c          |  4 ++++
 3 files changed, 43 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 56f9234781fa..20f645743ead 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -122,29 +122,53 @@ static int firmware_map_add_entry(u64 start, u64 end,
 	return 0;
 }
 
+/*
+ * Add memmap entry on sysfs
+ */
+static int add_sysfs_fw_map_entry(struct firmware_map_entry *entry)
+{
+	static int map_entries_nr;
+	static struct kset *mmap_kset;
+
+	if (!mmap_kset) {
+		mmap_kset = kset_create_and_add("memmap", NULL, firmware_kobj);
+		if (!mmap_kset)
+			return -ENOMEM;
+	}
+
+	entry->kobj.kset = mmap_kset;
+	if (kobject_add(&entry->kobj, NULL, "%d", map_entries_nr++))
+		kobject_put(&entry->kobj);
+
+	return 0;
+}
+
 /**
- * firmware_map_add() - Adds a firmware mapping entry.
+ * firmware_map_add_hotplug() - Adds a firmware mapping entry when we do
+ * memory hotplug.
  * @start: Start of the memory range.
  * @end:   End of the memory range (inclusive).
  * @type:  Type of the memory range.
  *
- * This function uses kmalloc() for memory
- * allocation. Use firmware_map_add_early() if you want to use the bootmem
- * allocator.
- *
- * That function must be called before late_initcall.
+ * Adds a firmware mapping entry. This function is for memory hotplug, it is
+ * similar to function firmware_map_add_early(). The only difference is that
+ * it will create the syfs entry dynamically.
  *
  * Returns 0 on success, or -ENOMEM if no memory could be allocated.
  **/
-int firmware_map_add(u64 start, u64 end, const char *type)
+int __meminit firmware_map_add_hotplug(u64 start, u64 end, const char *type)
 {
 	struct firmware_map_entry *entry;
 
-	entry = kmalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC);
+	entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC);
 	if (!entry)
 		return -ENOMEM;
 
-	return firmware_map_add_entry(start, end, type, entry);
+	firmware_map_add_entry(start, end, type, entry);
+	/* create the memmap entry */
+	add_sysfs_fw_map_entry(entry);
+
+	return 0;
 }
 
 /**
@@ -154,7 +178,7 @@ int firmware_map_add(u64 start, u64 end, const char *type)
  * @type:  Type of the memory range.
  *
  * Adds a firmware mapping entry. This function uses the bootmem allocator
- * for memory allocation. Use firmware_map_add() if you want to use kmalloc().
+ * for memory allocation.
  *
  * That function must be called before late_initcall.
  *
@@ -214,19 +238,10 @@ static ssize_t memmap_attr_show(struct kobject *kobj,
  */
 static int __init memmap_init(void)
 {
-	int i = 0;
 	struct firmware_map_entry *entry;
-	struct kset *memmap_kset;
-
-	memmap_kset = kset_create_and_add("memmap", NULL, firmware_kobj);
-	if (WARN_ON(!memmap_kset))
-		return -ENOMEM;
 
-	list_for_each_entry(entry, &map_entries, list) {
-		entry->kobj.kset = memmap_kset;
-		if (kobject_add(&entry->kobj, NULL, "%d", i++))
-			kobject_put(&entry->kobj);
-	}
+	list_for_each_entry(entry, &map_entries, list)
+		add_sysfs_fw_map_entry(entry);
 
 	return 0;
 }
diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h
index 875451f1373a..c6dcc1dfe781 100644
--- a/include/linux/firmware-map.h
+++ b/include/linux/firmware-map.h
@@ -24,17 +24,17 @@
  */
 #ifdef CONFIG_FIRMWARE_MEMMAP
 
-int firmware_map_add(u64 start, u64 end, const char *type);
 int firmware_map_add_early(u64 start, u64 end, const char *type);
+int firmware_map_add_hotplug(u64 start, u64 end, const char *type);
 
 #else /* CONFIG_FIRMWARE_MEMMAP */
 
-static inline int firmware_map_add(u64 start, u64 end, const char *type)
+static inline int firmware_map_add_early(u64 start, u64 end, const char *type)
 {
 	return 0;
 }
 
-static inline int firmware_map_add_early(u64 start, u64 end, const char *type)
+static inline int firmware_map_add_hotplug(u64 start, u64 end, const char *type)
 {
 	return 0;
 }
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 030ce8a5bb0e..78e34e63c7b8 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -28,6 +28,7 @@
 #include <linux/pfn.h>
 #include <linux/suspend.h>
 #include <linux/mm_inline.h>
+#include <linux/firmware-map.h>
 
 #include <asm/tlbflush.h>
 
@@ -523,6 +524,9 @@ int __ref add_memory(int nid, u64 start, u64 size)
 		BUG_ON(ret);
 	}
 
+	/* create new memmap entry */
+	firmware_map_add_hotplug(start, start + size, "System RAM");
+
 	goto out;
 
 error:
-- 
cgit v1.2.3


From 0141450f66c3c12a3aaa869748caa64241885cdf Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Fri, 5 Mar 2010 13:42:03 -0800
Subject: readahead: introduce FMODE_RANDOM for POSIX_FADV_RANDOM

This fixes inefficient page-by-page reads on POSIX_FADV_RANDOM.

POSIX_FADV_RANDOM used to set ra_pages=0, which leads to poor performance:
a 16K read will be carried out in 4 _sync_ 1-page reads.

In other places, ra_pages==0 means
- it's ramfs/tmpfs/hugetlbfs/sysfs/configfs
- some IO error happened
where multi-page read IO won't help or should be avoided.

POSIX_FADV_RANDOM actually want a different semantics: to disable the
*heuristic* readahead algorithm, and to use a dumb one which faithfully
submit read IO for whatever application requests.

So introduce a flag FMODE_RANDOM for POSIX_FADV_RANDOM.

Note that the random hint is not likely to help random reads performance
noticeably.  And it may be too permissive on huge request size (its IO
size is not limited by read_ahead_kb).

In Quentin's report (http://lkml.org/lkml/2009/12/24/145), the overall
(NFS read) performance of the application increased by 313%!

Tested-by: Quentin Barnes <qbarnes+nfs@yahoo-inc.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: <stable@kernel.org>			[2.6.33.x]
Cc: <qbarnes+nfs@yahoo-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h |  3 +++
 mm/fadvise.c       | 10 +++++++++-
 mm/readahead.c     |  6 ++++++
 3 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 45689621a851..be87edcaba06 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -87,6 +87,9 @@ struct inodes_stat_t {
  */
 #define FMODE_NOCMTIME		((__force fmode_t)2048)
 
+/* Expect random access pattern */
+#define FMODE_RANDOM		((__force fmode_t)4096)
+
 /*
  * The below are the various read and write types that we support. Some of
  * them include behavioral modifiers that send information down to the
diff --git a/mm/fadvise.c b/mm/fadvise.c
index e43359214f6f..8d723c9e8b75 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -77,12 +77,20 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
 	switch (advice) {
 	case POSIX_FADV_NORMAL:
 		file->f_ra.ra_pages = bdi->ra_pages;
+		spin_lock(&file->f_lock);
+		file->f_mode &= ~FMODE_RANDOM;
+		spin_unlock(&file->f_lock);
 		break;
 	case POSIX_FADV_RANDOM:
-		file->f_ra.ra_pages = 0;
+		spin_lock(&file->f_lock);
+		file->f_mode |= FMODE_RANDOM;
+		spin_unlock(&file->f_lock);
 		break;
 	case POSIX_FADV_SEQUENTIAL:
 		file->f_ra.ra_pages = bdi->ra_pages * 2;
+		spin_lock(&file->f_lock);
+		file->f_mode &= ~FMODE_RANDOM;
+		spin_unlock(&file->f_lock);
 		break;
 	case POSIX_FADV_WILLNEED:
 		if (!mapping->a_ops->readpage) {
diff --git a/mm/readahead.c b/mm/readahead.c
index 033bc135a41f..337b20e946f6 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -501,6 +501,12 @@ void page_cache_sync_readahead(struct address_space *mapping,
 	if (!ra->ra_pages)
 		return;
 
+	/* be dumb */
+	if (filp->f_mode & FMODE_RANDOM) {
+		force_page_cache_readahead(mapping, filp, offset, req_size);
+		return;
+	}
+
 	/* do read-ahead */
 	ondemand_readahead(mapping, ra, filp, false, offset, req_size);
 }
-- 
cgit v1.2.3


From 19adf9c5d5793657118f2002237c0ee49c3b6185 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 5 Mar 2010 13:42:03 -0800
Subject: include/linux/fs.h: convert FMODE_* constants to hex

It was tolerable until Eric went and added 8388608.

Cc: Eric Paris <eparis@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index be87edcaba06..10b8dedcd18b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -60,24 +60,24 @@ struct inodes_stat_t {
  */
 
 /* file is open for reading */
-#define FMODE_READ		((__force fmode_t)1)
+#define FMODE_READ		((__force fmode_t)0x1)
 /* file is open for writing */
-#define FMODE_WRITE		((__force fmode_t)2)
+#define FMODE_WRITE		((__force fmode_t)0x2)
 /* file is seekable */
-#define FMODE_LSEEK		((__force fmode_t)4)
+#define FMODE_LSEEK		((__force fmode_t)0x4)
 /* file can be accessed using pread */
-#define FMODE_PREAD		((__force fmode_t)8)
+#define FMODE_PREAD		((__force fmode_t)0x8)
 /* file can be accessed using pwrite */
-#define FMODE_PWRITE		((__force fmode_t)16)
+#define FMODE_PWRITE		((__force fmode_t)0x10)
 /* File is opened for execution with sys_execve / sys_uselib */
-#define FMODE_EXEC		((__force fmode_t)32)
+#define FMODE_EXEC		((__force fmode_t)0x20)
 /* File is opened with O_NDELAY (only set for block devices) */
-#define FMODE_NDELAY		((__force fmode_t)64)
+#define FMODE_NDELAY		((__force fmode_t)0x40)
 /* File is opened with O_EXCL (only set for block devices) */
-#define FMODE_EXCL		((__force fmode_t)128)
+#define FMODE_EXCL		((__force fmode_t)0x80)
 /* File is opened using open(.., 3, ..) and is writeable only for ioctls
    (specialy hack for floppy.c) */
-#define FMODE_WRITE_IOCTL	((__force fmode_t)256)
+#define FMODE_WRITE_IOCTL	((__force fmode_t)0x100)
 
 /*
  * Don't update ctime and mtime.
@@ -85,10 +85,10 @@ struct inodes_stat_t {
  * Currently a special hack for the XFS open_by_handle ioctl, but we'll
  * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon.
  */
-#define FMODE_NOCMTIME		((__force fmode_t)2048)
+#define FMODE_NOCMTIME		((__force fmode_t)0x800)
 
 /* Expect random access pattern */
-#define FMODE_RANDOM		((__force fmode_t)4096)
+#define FMODE_RANDOM		((__force fmode_t)0x1000)
 
 /*
  * The below are the various read and write types that we support. Some of
-- 
cgit v1.2.3


From 5beb49305251e5669852ed541e8e2f2f7696c53e Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Fri, 5 Mar 2010 13:42:07 -0800
Subject: mm: change anon_vma linking to fix multi-process server scalability
 issue

The old anon_vma code can lead to scalability issues with heavily forking
workloads.  Specifically, each anon_vma will be shared between the parent
process and all its child processes.

In a workload with 1000 child processes and a VMA with 1000 anonymous
pages per process that get COWed, this leads to a system with a million
anonymous pages in the same anon_vma, each of which is mapped in just one
of the 1000 processes.  However, the current rmap code needs to walk them
all, leading to O(N) scanning complexity for each page.

This can result in systems where one CPU is walking the page tables of
1000 processes in page_referenced_one, while all other CPUs are stuck on
the anon_vma lock.  This leads to catastrophic failure for a benchmark
like AIM7, where the total number of processes can reach in the tens of
thousands.  Real workloads are still a factor 10 less process intensive
than AIM7, but they are catching up.

This patch changes the way anon_vmas and VMAs are linked, which allows us
to associate multiple anon_vmas with a VMA.  At fork time, each child
process gets its own anon_vmas, in which its COWed pages will be
instantiated.  The parents' anon_vma is also linked to the VMA, because
non-COWed pages could be present in any of the children.

This reduces rmap scanning complexity to O(1) for the pages of the 1000
child processes, with O(N) complexity for at most 1/N pages in the system.
 This reduces the average scanning cost in heavily forking workloads from
O(N) to 2.

The only real complexity in this patch stems from the fact that linking a
VMA to anon_vmas now involves memory allocations.  This means vma_adjust
can fail, if it needs to attach a VMA to anon_vma structures.  This in
turn means error handling needs to be added to the calling functions.

A second source of complexity is that, because there can be multiple
anon_vmas, the anon_vma linking in vma_adjust can no longer be done under
"the" anon_vma lock.  To prevent the rmap code from walking up an
incomplete VMA, this patch introduces the VM_LOCK_RMAP VMA flag.  This bit
flag uses the same slot as the NOMMU VM_MAPPED_COPY, with an ifdef in mm.h
to make sure it is impossible to compile a kernel that needs both symbolic
values for the same bitflag.

Some test results:

Without the anon_vma changes, when AIM7 hits around 9.7k users (on a test
box with 16GB RAM and not quite enough IO), the system ends up running
>99% in system time, with every CPU on the same anon_vma lock in the
pageout code.

With these changes, AIM7 hits the cross-over point around 29.7k users.
This happens with ~99% IO wait time, there never seems to be any spike in
system time.  The anon_vma lock contention appears to be resolved.

[akpm@linux-foundation.org: cleanups]
Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/perfmon.c |   1 +
 arch/ia64/mm/init.c        |   2 +
 fs/exec.c                  |   6 +-
 include/linux/mm.h         |   6 +-
 include/linux/mm_types.h   |   3 +-
 include/linux/rmap.h       |  35 ++++++++--
 kernel/fork.c              |   6 +-
 mm/ksm.c                   |  12 +++-
 mm/memory-failure.c        |   5 +-
 mm/memory.c                |   4 +-
 mm/mmap.c                  | 138 +++++++++++++++++++++++++++------------
 mm/mremap.c                |   7 +-
 mm/nommu.c                 |   2 +-
 mm/rmap.c                  | 156 +++++++++++++++++++++++++++++++++++++--------
 14 files changed, 298 insertions(+), 85 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index b81e46b1629b..703062c44fb9 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2315,6 +2315,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
 		DPRINT(("Cannot allocate vma\n"));
 		goto error_kmem;
 	}
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
 
 	/*
 	 * partially initialize the vma for the sampling buffer
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index ca3335ea56cc..ed41759efcac 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -117,6 +117,7 @@ ia64_init_addr_space (void)
 	 */
 	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 	if (vma) {
+		INIT_LIST_HEAD(&vma->anon_vma_chain);
 		vma->vm_mm = current->mm;
 		vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
 		vma->vm_end = vma->vm_start + PAGE_SIZE;
@@ -135,6 +136,7 @@ ia64_init_addr_space (void)
 	if (!(current->personality & MMAP_PAGE_ZERO)) {
 		vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 		if (vma) {
+			INIT_LIST_HEAD(&vma->anon_vma_chain);
 			vma->vm_mm = current->mm;
 			vma->vm_end = PAGE_SIZE;
 			vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
diff --git a/fs/exec.c b/fs/exec.c
index ea7861727efd..591030735591 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -246,6 +246,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 	vma->vm_start = vma->vm_end - PAGE_SIZE;
 	vma->vm_flags = VM_STACK_FLAGS;
 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
 	err = insert_vm_struct(mm, vma);
 	if (err)
 		goto err;
@@ -516,7 +517,8 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 	/*
 	 * cover the whole range: [new_start, old_end)
 	 */
-	vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL);
+	if (vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL))
+		return -ENOMEM;
 
 	/*
 	 * move the page tables downwards, on failure we rely on
@@ -547,7 +549,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
 	tlb_finish_mmu(tlb, new_end, old_end);
 
 	/*
-	 * shrink the vma to just the new range.
+	 * Shrink the vma to just the new range.  Always succeeds.
 	 */
 	vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL);
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8e580c07d171..8e2841a2f441 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -97,7 +97,11 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_NORESERVE	0x00200000	/* should the VM suppress accounting */
 #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
 #define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
+#ifdef CONFIG_MMU
+#define VM_LOCK_RMAP	0x01000000	/* Do not follow this rmap (mmu mmap) */
+#else
 #define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
+#endif
 #define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
 #define VM_ALWAYSDUMP	0x04000000	/* Always include in core dumps */
 
@@ -1216,7 +1220,7 @@ static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
 
 /* mmap.c */
 extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
-extern void vma_adjust(struct vm_area_struct *vma, unsigned long start,
+extern int vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert);
 extern struct vm_area_struct *vma_merge(struct mm_struct *,
 	struct vm_area_struct *prev, unsigned long addr, unsigned long end,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 19549d7275ab..048b46270aa5 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -163,7 +163,8 @@ struct vm_area_struct {
 	 * can only be in the i_mmap tree.  An anonymous MAP_PRIVATE, stack
 	 * or brk vma (with NULL file) can only be in an anon_vma list.
 	 */
-	struct list_head anon_vma_node;	/* Serialized by anon_vma->lock */
+	struct list_head anon_vma_chain; /* Serialized by mmap_sem &
+					  * page_table_lock */
 	struct anon_vma *anon_vma;	/* Serialized by page_table_lock */
 
 	/* Function pointers to deal with this struct. */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b019ae64e2ab..62da2001d55c 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -37,7 +37,27 @@ struct anon_vma {
 	 * is serialized by a system wide lock only visible to
 	 * mm_take_all_locks() (mm_all_locks_mutex).
 	 */
-	struct list_head head;	/* List of private "related" vmas */
+	struct list_head head;	/* Chain of private "related" vmas */
+};
+
+/*
+ * The copy-on-write semantics of fork mean that an anon_vma
+ * can become associated with multiple processes. Furthermore,
+ * each child process will have its own anon_vma, where new
+ * pages for that process are instantiated.
+ *
+ * This structure allows us to find the anon_vmas associated
+ * with a VMA, or the VMAs associated with an anon_vma.
+ * The "same_vma" list contains the anon_vma_chains linking
+ * all the anon_vmas associated with this VMA.
+ * The "same_anon_vma" list contains the anon_vma_chains
+ * which link all the VMAs associated with this anon_vma.
+ */
+struct anon_vma_chain {
+	struct vm_area_struct *vma;
+	struct anon_vma *anon_vma;
+	struct list_head same_vma;   /* locked by mmap_sem & page_table_lock */
+	struct list_head same_anon_vma;	/* locked by anon_vma->lock */
 };
 
 #ifdef CONFIG_MMU
@@ -89,12 +109,19 @@ static inline void anon_vma_unlock(struct vm_area_struct *vma)
  */
 void anon_vma_init(void);	/* create anon_vma_cachep */
 int  anon_vma_prepare(struct vm_area_struct *);
-void __anon_vma_merge(struct vm_area_struct *, struct vm_area_struct *);
-void anon_vma_unlink(struct vm_area_struct *);
-void anon_vma_link(struct vm_area_struct *);
+void unlink_anon_vmas(struct vm_area_struct *);
+int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *);
+int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
 void __anon_vma_link(struct vm_area_struct *);
 void anon_vma_free(struct anon_vma *);
 
+static inline void anon_vma_merge(struct vm_area_struct *vma,
+				  struct vm_area_struct *next)
+{
+	VM_BUG_ON(vma->anon_vma != next->anon_vma);
+	unlink_anon_vmas(next);
+}
+
 /*
  * rmap interfaces called when adding or removing pte of page
  */
diff --git a/kernel/fork.c b/kernel/fork.c
index 7616bcf107b9..bab7b254ad39 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -329,15 +329,17 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 		if (!tmp)
 			goto fail_nomem;
 		*tmp = *mpnt;
+		INIT_LIST_HEAD(&tmp->anon_vma_chain);
 		pol = mpol_dup(vma_policy(mpnt));
 		retval = PTR_ERR(pol);
 		if (IS_ERR(pol))
 			goto fail_nomem_policy;
 		vma_set_policy(tmp, pol);
+		if (anon_vma_fork(tmp, mpnt))
+			goto fail_nomem_anon_vma_fork;
 		tmp->vm_flags &= ~VM_LOCKED;
 		tmp->vm_mm = mm;
 		tmp->vm_next = NULL;
-		anon_vma_link(tmp);
 		file = tmp->vm_file;
 		if (file) {
 			struct inode *inode = file->f_path.dentry->d_inode;
@@ -392,6 +394,8 @@ out:
 	flush_tlb_mm(oldmm);
 	up_write(&oldmm->mmap_sem);
 	return retval;
+fail_nomem_anon_vma_fork:
+	mpol_put(pol);
 fail_nomem_policy:
 	kmem_cache_free(vm_area_cachep, tmp);
 fail_nomem:
diff --git a/mm/ksm.c b/mm/ksm.c
index 56a0da1f9979..a93f1b7f508c 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1563,10 +1563,12 @@ int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
 again:
 	hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
 		struct anon_vma *anon_vma = rmap_item->anon_vma;
+		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
 
 		spin_lock(&anon_vma->lock);
-		list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+		list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
+			vma = vmac->vma;
 			if (rmap_item->address < vma->vm_start ||
 			    rmap_item->address >= vma->vm_end)
 				continue;
@@ -1614,10 +1616,12 @@ int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
 again:
 	hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
 		struct anon_vma *anon_vma = rmap_item->anon_vma;
+		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
 
 		spin_lock(&anon_vma->lock);
-		list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+		list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
+			vma = vmac->vma;
 			if (rmap_item->address < vma->vm_start ||
 			    rmap_item->address >= vma->vm_end)
 				continue;
@@ -1664,10 +1668,12 @@ int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
 again:
 	hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
 		struct anon_vma *anon_vma = rmap_item->anon_vma;
+		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
 
 		spin_lock(&anon_vma->lock);
-		list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+		list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
+			vma = vmac->vma;
 			if (rmap_item->address < vma->vm_start ||
 			    rmap_item->address >= vma->vm_end)
 				continue;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 17299fd4577c..d1f335162976 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -383,9 +383,12 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
 	if (av == NULL)	/* Not actually mapped anymore */
 		goto out;
 	for_each_process (tsk) {
+		struct anon_vma_chain *vmac;
+
 		if (!task_early_kill(tsk))
 			continue;
-		list_for_each_entry (vma, &av->head, anon_vma_node) {
+		list_for_each_entry(vmac, &av->head, same_anon_vma) {
+			vma = vmac->vma;
 			if (!page_mapped_in_vma(page, vma))
 				continue;
 			if (vma->vm_mm == tsk->mm)
diff --git a/mm/memory.c b/mm/memory.c
index 77d9f840936b..dc785b438d70 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -374,7 +374,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		 * Hide vma from rmap and truncate_pagecache before freeing
 		 * pgtables
 		 */
-		anon_vma_unlink(vma);
+		unlink_anon_vmas(vma);
 		unlink_file_vma(vma);
 
 		if (is_vm_hugetlb_page(vma)) {
@@ -388,7 +388,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 			       && !is_vm_hugetlb_page(next)) {
 				vma = next;
 				next = vma->vm_next;
-				anon_vma_unlink(vma);
+				unlink_anon_vmas(vma);
 				unlink_file_vma(vma);
 			}
 			free_pgd_range(tlb, addr, vma->vm_end,
diff --git a/mm/mmap.c b/mm/mmap.c
index 31656147128e..6a0c15db7f60 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -437,7 +437,6 @@ __vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
 {
 	__vma_link_list(mm, vma, prev, rb_parent);
 	__vma_link_rb(mm, vma, rb_link, rb_parent);
-	__anon_vma_link(vma);
 }
 
 static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -499,7 +498,7 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
  * are necessary.  The "insert" vma (if any) is to be inserted
  * before we drop the necessary locks.
  */
-void vma_adjust(struct vm_area_struct *vma, unsigned long start,
+int vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
 {
 	struct mm_struct *mm = vma->vm_mm;
@@ -542,6 +541,28 @@ again:			remove_next = 1 + (end > next->vm_end);
 		}
 	}
 
+	/*
+	 * When changing only vma->vm_end, we don't really need anon_vma lock.
+	 */
+	if (vma->anon_vma && (insert || importer || start != vma->vm_start))
+		anon_vma = vma->anon_vma;
+	if (anon_vma) {
+		/*
+		 * Easily overlooked: when mprotect shifts the boundary,
+		 * make sure the expanding vma has anon_vma set if the
+		 * shrinking vma had, to cover any anon pages imported.
+		 */
+		if (importer && !importer->anon_vma) {
+			/* Block reverse map lookups until things are set up. */
+			importer->vm_flags |= VM_LOCK_RMAP;
+			if (anon_vma_clone(importer, vma)) {
+				importer->vm_flags &= ~VM_LOCK_RMAP;
+				return -ENOMEM;
+			}
+			importer->anon_vma = anon_vma;
+		}
+	}
+
 	if (file) {
 		mapping = file->f_mapping;
 		if (!(vma->vm_flags & VM_NONLINEAR))
@@ -567,25 +588,6 @@ again:			remove_next = 1 + (end > next->vm_end);
 		}
 	}
 
-	/*
-	 * When changing only vma->vm_end, we don't really need
-	 * anon_vma lock.
-	 */
-	if (vma->anon_vma && (insert || importer || start != vma->vm_start))
-		anon_vma = vma->anon_vma;
-	if (anon_vma) {
-		spin_lock(&anon_vma->lock);
-		/*
-		 * Easily overlooked: when mprotect shifts the boundary,
-		 * make sure the expanding vma has anon_vma set if the
-		 * shrinking vma had, to cover any anon pages imported.
-		 */
-		if (importer && !importer->anon_vma) {
-			importer->anon_vma = anon_vma;
-			__anon_vma_link(importer);
-		}
-	}
-
 	if (root) {
 		flush_dcache_mmap_lock(mapping);
 		vma_prio_tree_remove(vma, root);
@@ -616,8 +618,11 @@ again:			remove_next = 1 + (end > next->vm_end);
 		__vma_unlink(mm, next, vma);
 		if (file)
 			__remove_shared_vm_struct(next, file, mapping);
-		if (next->anon_vma)
-			__anon_vma_merge(vma, next);
+		/*
+		 * This VMA is now dead, no need for rmap to follow it.
+		 * Call anon_vma_merge below, outside of i_mmap_lock.
+		 */
+		next->vm_flags |= VM_LOCK_RMAP;
 	} else if (insert) {
 		/*
 		 * split_vma has split insert from vma, and needs
@@ -627,17 +632,25 @@ again:			remove_next = 1 + (end > next->vm_end);
 		__insert_vm_struct(mm, insert);
 	}
 
-	if (anon_vma)
-		spin_unlock(&anon_vma->lock);
 	if (mapping)
 		spin_unlock(&mapping->i_mmap_lock);
 
+	/*
+	 * The current VMA has been set up. It is now safe for the
+	 * rmap code to get from the pages to the ptes.
+	 */
+	if (anon_vma && importer)
+		importer->vm_flags &= ~VM_LOCK_RMAP;
+
 	if (remove_next) {
 		if (file) {
 			fput(file);
 			if (next->vm_flags & VM_EXECUTABLE)
 				removed_exe_file_vma(mm);
 		}
+		/* Protected by mmap_sem and VM_LOCK_RMAP. */
+		if (next->anon_vma)
+			anon_vma_merge(vma, next);
 		mm->map_count--;
 		mpol_put(vma_policy(next));
 		kmem_cache_free(vm_area_cachep, next);
@@ -653,6 +666,8 @@ again:			remove_next = 1 + (end > next->vm_end);
 	}
 
 	validate_mm(mm);
+
+	return 0;
 }
 
 /*
@@ -759,6 +774,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 {
 	pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
 	struct vm_area_struct *area, *next;
+	int err;
 
 	/*
 	 * We later require that vma->vm_flags == vm_flags,
@@ -792,11 +808,13 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 				is_mergeable_anon_vma(prev->anon_vma,
 						      next->anon_vma)) {
 							/* cases 1, 6 */
-			vma_adjust(prev, prev->vm_start,
+			err = vma_adjust(prev, prev->vm_start,
 				next->vm_end, prev->vm_pgoff, NULL);
 		} else					/* cases 2, 5, 7 */
-			vma_adjust(prev, prev->vm_start,
+			err = vma_adjust(prev, prev->vm_start,
 				end, prev->vm_pgoff, NULL);
+		if (err)
+			return NULL;
 		return prev;
 	}
 
@@ -808,11 +826,13 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 			can_vma_merge_before(next, vm_flags,
 					anon_vma, file, pgoff+pglen)) {
 		if (prev && addr < prev->vm_end)	/* case 4 */
-			vma_adjust(prev, prev->vm_start,
+			err = vma_adjust(prev, prev->vm_start,
 				addr, prev->vm_pgoff, NULL);
 		else					/* cases 3, 8 */
-			vma_adjust(area, addr, next->vm_end,
+			err = vma_adjust(area, addr, next->vm_end,
 				next->vm_pgoff - pglen, NULL);
+		if (err)
+			return NULL;
 		return area;
 	}
 
@@ -1205,6 +1225,7 @@ munmap_back:
 	vma->vm_flags = vm_flags;
 	vma->vm_page_prot = vm_get_page_prot(vm_flags);
 	vma->vm_pgoff = pgoff;
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
 
 	if (file) {
 		error = -EINVAL;
@@ -1865,6 +1886,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 {
 	struct mempolicy *pol;
 	struct vm_area_struct *new;
+	int err = -ENOMEM;
 
 	if (is_vm_hugetlb_page(vma) && (addr &
 					~(huge_page_mask(hstate_vma(vma)))))
@@ -1872,11 +1894,13 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 
 	new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (!new)
-		return -ENOMEM;
+		goto out_err;
 
 	/* most fields are the same, copy all, and then fixup */
 	*new = *vma;
 
+	INIT_LIST_HEAD(&new->anon_vma_chain);
+
 	if (new_below)
 		new->vm_end = addr;
 	else {
@@ -1886,11 +1910,14 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 
 	pol = mpol_dup(vma_policy(vma));
 	if (IS_ERR(pol)) {
-		kmem_cache_free(vm_area_cachep, new);
-		return PTR_ERR(pol);
+		err = PTR_ERR(pol);
+		goto out_free_vma;
 	}
 	vma_set_policy(new, pol);
 
+	if (anon_vma_clone(new, vma))
+		goto out_free_mpol;
+
 	if (new->vm_file) {
 		get_file(new->vm_file);
 		if (vma->vm_flags & VM_EXECUTABLE)
@@ -1901,12 +1928,28 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 		new->vm_ops->open(new);
 
 	if (new_below)
-		vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
+		err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
 			((addr - new->vm_start) >> PAGE_SHIFT), new);
 	else
-		vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+		err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
 
-	return 0;
+	/* Success. */
+	if (!err)
+		return 0;
+
+	/* Clean everything up if vma_adjust failed. */
+	new->vm_ops->close(new);
+	if (new->vm_file) {
+		if (vma->vm_flags & VM_EXECUTABLE)
+			removed_exe_file_vma(mm);
+		fput(new->vm_file);
+	}
+ out_free_mpol:
+	mpol_put(pol);
+ out_free_vma:
+	kmem_cache_free(vm_area_cachep, new);
+ out_err:
+	return err;
 }
 
 /*
@@ -2116,6 +2159,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 		return -ENOMEM;
 	}
 
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
 	vma->vm_mm = mm;
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
@@ -2252,10 +2296,11 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		if (new_vma) {
 			*new_vma = *vma;
 			pol = mpol_dup(vma_policy(vma));
-			if (IS_ERR(pol)) {
-				kmem_cache_free(vm_area_cachep, new_vma);
-				return NULL;
-			}
+			if (IS_ERR(pol))
+				goto out_free_vma;
+			INIT_LIST_HEAD(&new_vma->anon_vma_chain);
+			if (anon_vma_clone(new_vma, vma))
+				goto out_free_mempol;
 			vma_set_policy(new_vma, pol);
 			new_vma->vm_start = addr;
 			new_vma->vm_end = addr + len;
@@ -2271,6 +2316,12 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		}
 	}
 	return new_vma;
+
+ out_free_mempol:
+	mpol_put(pol);
+ out_free_vma:
+	kmem_cache_free(vm_area_cachep, new_vma);
+	return NULL;
 }
 
 /*
@@ -2348,6 +2399,7 @@ int install_special_mapping(struct mm_struct *mm,
 	if (unlikely(vma == NULL))
 		return -ENOMEM;
 
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
 	vma->vm_mm = mm;
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
@@ -2448,6 +2500,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
 int mm_take_all_locks(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma;
+	struct anon_vma_chain *avc;
 	int ret = -EINTR;
 
 	BUG_ON(down_read_trylock(&mm->mmap_sem));
@@ -2465,7 +2518,8 @@ int mm_take_all_locks(struct mm_struct *mm)
 		if (signal_pending(current))
 			goto out_unlock;
 		if (vma->anon_vma)
-			vm_lock_anon_vma(mm, vma->anon_vma);
+			list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+				vm_lock_anon_vma(mm, avc->anon_vma);
 	}
 
 	ret = 0;
@@ -2520,13 +2574,15 @@ static void vm_unlock_mapping(struct address_space *mapping)
 void mm_drop_all_locks(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma;
+	struct anon_vma_chain *avc;
 
 	BUG_ON(down_read_trylock(&mm->mmap_sem));
 	BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
 
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 		if (vma->anon_vma)
-			vm_unlock_anon_vma(vma->anon_vma);
+			list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+				vm_unlock_anon_vma(avc->anon_vma);
 		if (vma->vm_file && vma->vm_file->f_mapping)
 			vm_unlock_mapping(vma->vm_file->f_mapping);
 	}
diff --git a/mm/mremap.c b/mm/mremap.c
index 4c4c803453f3..e9c75efce609 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -460,8 +460,11 @@ unsigned long do_mremap(unsigned long addr,
 		if (vma_expandable(vma, new_len - old_len)) {
 			int pages = (new_len - old_len) >> PAGE_SHIFT;
 
-			vma_adjust(vma, vma->vm_start,
-				addr + new_len, vma->vm_pgoff, NULL);
+			if (vma_adjust(vma, vma->vm_start, addr + new_len,
+				       vma->vm_pgoff, NULL)) {
+				ret = -ENOMEM;
+				goto out;
+			}
 
 			mm->total_vm += pages;
 			vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages);
diff --git a/mm/nommu.c b/mm/nommu.c
index 48a2ecfaf059..55727a74af98 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1209,7 +1209,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 	region->vm_flags = vm_flags;
 	region->vm_pgoff = pgoff;
 
-	INIT_LIST_HEAD(&vma->anon_vma_node);
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
 	vma->vm_flags = vm_flags;
 	vma->vm_pgoff = pgoff;
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 5cb47111f79e..be34094e4595 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -62,6 +62,7 @@
 #include "internal.h"
 
 static struct kmem_cache *anon_vma_cachep;
+static struct kmem_cache *anon_vma_chain_cachep;
 
 static inline struct anon_vma *anon_vma_alloc(void)
 {
@@ -73,6 +74,16 @@ void anon_vma_free(struct anon_vma *anon_vma)
 	kmem_cache_free(anon_vma_cachep, anon_vma);
 }
 
+static inline struct anon_vma_chain *anon_vma_chain_alloc(void)
+{
+	return kmem_cache_alloc(anon_vma_chain_cachep, GFP_KERNEL);
+}
+
+void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
+{
+	kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
+}
+
 /**
  * anon_vma_prepare - attach an anon_vma to a memory region
  * @vma: the memory region in question
@@ -103,18 +114,23 @@ void anon_vma_free(struct anon_vma *anon_vma)
 int anon_vma_prepare(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
+	struct anon_vma_chain *avc;
 
 	might_sleep();
 	if (unlikely(!anon_vma)) {
 		struct mm_struct *mm = vma->vm_mm;
 		struct anon_vma *allocated;
 
+		avc = anon_vma_chain_alloc();
+		if (!avc)
+			goto out_enomem;
+
 		anon_vma = find_mergeable_anon_vma(vma);
 		allocated = NULL;
 		if (!anon_vma) {
 			anon_vma = anon_vma_alloc();
 			if (unlikely(!anon_vma))
-				return -ENOMEM;
+				goto out_enomem_free_avc;
 			allocated = anon_vma;
 		}
 		spin_lock(&anon_vma->lock);
@@ -123,53 +139,113 @@ int anon_vma_prepare(struct vm_area_struct *vma)
 		spin_lock(&mm->page_table_lock);
 		if (likely(!vma->anon_vma)) {
 			vma->anon_vma = anon_vma;
-			list_add_tail(&vma->anon_vma_node, &anon_vma->head);
+			avc->anon_vma = anon_vma;
+			avc->vma = vma;
+			list_add(&avc->same_vma, &vma->anon_vma_chain);
+			list_add(&avc->same_anon_vma, &anon_vma->head);
 			allocated = NULL;
 		}
 		spin_unlock(&mm->page_table_lock);
 
 		spin_unlock(&anon_vma->lock);
-		if (unlikely(allocated))
+		if (unlikely(allocated)) {
 			anon_vma_free(allocated);
+			anon_vma_chain_free(avc);
+		}
 	}
 	return 0;
+
+ out_enomem_free_avc:
+	anon_vma_chain_free(avc);
+ out_enomem:
+	return -ENOMEM;
 }
 
-void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next)
+static void anon_vma_chain_link(struct vm_area_struct *vma,
+				struct anon_vma_chain *avc,
+				struct anon_vma *anon_vma)
 {
-	BUG_ON(vma->anon_vma != next->anon_vma);
-	list_del(&next->anon_vma_node);
+	avc->vma = vma;
+	avc->anon_vma = anon_vma;
+	list_add(&avc->same_vma, &vma->anon_vma_chain);
+
+	spin_lock(&anon_vma->lock);
+	list_add_tail(&avc->same_anon_vma, &anon_vma->head);
+	spin_unlock(&anon_vma->lock);
 }
 
-void __anon_vma_link(struct vm_area_struct *vma)
+/*
+ * Attach the anon_vmas from src to dst.
+ * Returns 0 on success, -ENOMEM on failure.
+ */
+int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
 {
-	struct anon_vma *anon_vma = vma->anon_vma;
+	struct anon_vma_chain *avc, *pavc;
+
+	list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) {
+		avc = anon_vma_chain_alloc();
+		if (!avc)
+			goto enomem_failure;
+		anon_vma_chain_link(dst, avc, pavc->anon_vma);
+	}
+	return 0;
 
-	if (anon_vma)
-		list_add_tail(&vma->anon_vma_node, &anon_vma->head);
+ enomem_failure:
+	unlink_anon_vmas(dst);
+	return -ENOMEM;
 }
 
-void anon_vma_link(struct vm_area_struct *vma)
+/*
+ * Attach vma to its own anon_vma, as well as to the anon_vmas that
+ * the corresponding VMA in the parent process is attached to.
+ * Returns 0 on success, non-zero on failure.
+ */
+int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
 {
-	struct anon_vma *anon_vma = vma->anon_vma;
+	struct anon_vma_chain *avc;
+	struct anon_vma *anon_vma;
 
-	if (anon_vma) {
-		spin_lock(&anon_vma->lock);
-		list_add_tail(&vma->anon_vma_node, &anon_vma->head);
-		spin_unlock(&anon_vma->lock);
-	}
+	/* Don't bother if the parent process has no anon_vma here. */
+	if (!pvma->anon_vma)
+		return 0;
+
+	/*
+	 * First, attach the new VMA to the parent VMA's anon_vmas,
+	 * so rmap can find non-COWed pages in child processes.
+	 */
+	if (anon_vma_clone(vma, pvma))
+		return -ENOMEM;
+
+	/* Then add our own anon_vma. */
+	anon_vma = anon_vma_alloc();
+	if (!anon_vma)
+		goto out_error;
+	avc = anon_vma_chain_alloc();
+	if (!avc)
+		goto out_error_free_anon_vma;
+	anon_vma_chain_link(vma, avc, anon_vma);
+	/* Mark this anon_vma as the one where our new (COWed) pages go. */
+	vma->anon_vma = anon_vma;
+
+	return 0;
+
+ out_error_free_anon_vma:
+	anon_vma_free(anon_vma);
+ out_error:
+	return -ENOMEM;
 }
 
-void anon_vma_unlink(struct vm_area_struct *vma)
+static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
 {
-	struct anon_vma *anon_vma = vma->anon_vma;
+	struct anon_vma *anon_vma = anon_vma_chain->anon_vma;
 	int empty;
 
+	/* If anon_vma_fork fails, we can get an empty anon_vma_chain. */
 	if (!anon_vma)
 		return;
 
 	spin_lock(&anon_vma->lock);
-	list_del(&vma->anon_vma_node);
+	list_del(&anon_vma_chain->same_anon_vma);
 
 	/* We must garbage collect the anon_vma if it's empty */
 	empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma);
@@ -179,6 +255,18 @@ void anon_vma_unlink(struct vm_area_struct *vma)
 		anon_vma_free(anon_vma);
 }
 
+void unlink_anon_vmas(struct vm_area_struct *vma)
+{
+	struct anon_vma_chain *avc, *next;
+
+	/* Unlink each anon_vma chained to the VMA. */
+	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
+		anon_vma_unlink(avc);
+		list_del(&avc->same_vma);
+		anon_vma_chain_free(avc);
+	}
+}
+
 static void anon_vma_ctor(void *data)
 {
 	struct anon_vma *anon_vma = data;
@@ -192,6 +280,7 @@ void __init anon_vma_init(void)
 {
 	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
 			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
+	anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
 }
 
 /*
@@ -240,6 +329,18 @@ vma_address(struct page *page, struct vm_area_struct *vma)
 		/* page should be within @vma mapping range */
 		return -EFAULT;
 	}
+	if (unlikely(vma->vm_flags & VM_LOCK_RMAP)) {
+		/*
+		 * This VMA is being unlinked or is not yet linked into the
+		 * VMA tree.  Do not try to follow this rmap.  This race
+		 * condition can result in page_referenced() ignoring a
+		 * reference or in try_to_unmap() failing to unmap a page.
+		 * The VMA cannot be freed under us because we hold the
+		 * anon_vma->lock, which the munmap code takes while
+		 * unlinking the anon_vmas from the VMA.
+		 */
+		return -EFAULT;
+	}
 	return address;
 }
 
@@ -396,7 +497,7 @@ static int page_referenced_anon(struct page *page,
 {
 	unsigned int mapcount;
 	struct anon_vma *anon_vma;
-	struct vm_area_struct *vma;
+	struct anon_vma_chain *avc;
 	int referenced = 0;
 
 	anon_vma = page_lock_anon_vma(page);
@@ -404,7 +505,8 @@ static int page_referenced_anon(struct page *page,
 		return referenced;
 
 	mapcount = page_mapcount(page);
-	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+	list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
+		struct vm_area_struct *vma = avc->vma;
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
 			continue;
@@ -1025,14 +1127,15 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
 static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
 {
 	struct anon_vma *anon_vma;
-	struct vm_area_struct *vma;
+	struct anon_vma_chain *avc;
 	int ret = SWAP_AGAIN;
 
 	anon_vma = page_lock_anon_vma(page);
 	if (!anon_vma)
 		return ret;
 
-	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+	list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
+		struct vm_area_struct *vma = avc->vma;
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
 			continue;
@@ -1223,7 +1326,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
 		struct vm_area_struct *, unsigned long, void *), void *arg)
 {
 	struct anon_vma *anon_vma;
-	struct vm_area_struct *vma;
+	struct anon_vma_chain *avc;
 	int ret = SWAP_AGAIN;
 
 	/*
@@ -1238,7 +1341,8 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
 	if (!anon_vma)
 		return ret;
 	spin_lock(&anon_vma->lock);
-	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+	list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
+		struct vm_area_struct *vma = avc->vma;
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
 			continue;
-- 
cgit v1.2.3


From c44b674323f4a2480dbeb65d4b487fa5f06f49e0 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Fri, 5 Mar 2010 13:42:09 -0800
Subject: rmap: move exclusively owned pages to own anon_vma in do_wp_page()

When the parent process breaks the COW on a page, both the original which
is mapped at child and the new page which is mapped parent end up in that
same anon_vma.  Generally this won't be a problem, but for some workloads
it could preserve the O(N) rmap scanning complexity.

A simple fix is to ensure that, when a page which is mapped child gets
reused in do_wp_page, because we already are the exclusive owner, the page
gets moved to our own exclusive child's anon_vma.

Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h |  1 +
 mm/memory.c          |  7 +++++++
 mm/rmap.c            | 24 ++++++++++++++++++++++++
 3 files changed, 32 insertions(+)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 62da2001d55c..72be23b1480a 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -125,6 +125,7 @@ static inline void anon_vma_merge(struct vm_area_struct *vma,
 /*
  * rmap interfaces called when adding or removing pte of page
  */
+void page_move_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_file_rmap(struct page *);
diff --git a/mm/memory.c b/mm/memory.c
index dc785b438d70..d1153e37e9ba 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2138,6 +2138,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			page_cache_release(old_page);
 		}
 		reuse = reuse_swap_page(old_page);
+		if (reuse)
+			/*
+			 * The page is all ours.  Move it to our anon_vma so
+			 * the rmap code will not search our parent or siblings.
+			 * Protected against the rmap code by the page lock.
+			 */
+			page_move_anon_rmap(old_page, vma, address);
 		unlock_page(old_page);
 	} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
 					(VM_WRITE|VM_SHARED))) {
diff --git a/mm/rmap.c b/mm/rmap.c
index 23ecd0a892df..28bcdc433d88 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -715,6 +715,30 @@ int page_mkclean(struct page *page)
 }
 EXPORT_SYMBOL_GPL(page_mkclean);
 
+/**
+ * page_move_anon_rmap - move a page to our anon_vma
+ * @page:	the page to move to our anon_vma
+ * @vma:	the vma the page belongs to
+ * @address:	the user virtual address mapped
+ *
+ * When a page belongs exclusively to one process after a COW event,
+ * that page can be moved into the anon_vma that belongs to just that
+ * process, so the rmap code will not search the parent or sibling
+ * processes.
+ */
+void page_move_anon_rmap(struct page *page,
+	struct vm_area_struct *vma, unsigned long address)
+{
+	struct anon_vma *anon_vma = vma->anon_vma;
+
+	VM_BUG_ON(!PageLocked(page));
+	VM_BUG_ON(!anon_vma);
+	VM_BUG_ON(page->index != linear_page_index(vma, address));
+
+	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
+	page->mapping = (struct address_space *) anon_vma;
+}
+
 /**
  * __page_set_anon_rmap - setup new anonymous rmap
  * @page:	the page to add the mapping to
-- 
cgit v1.2.3


From fc148a5f7e0532750c312385c7ee9fa3e9311f34 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Fri, 5 Mar 2010 13:42:10 -0800
Subject: mm: remove VM_LOCK_RMAP code

When a VMA is in an inconsistent state during setup or teardown, the worst
that can happen is that the rmap code will not be able to find the page.

The mapping is in the process of being torn down (PTEs just got
invalidated by munmap), or set up (no PTEs have been instantiated yet).

It is also impossible for the rmap code to follow a pointer to an already
freed VMA, because the rmap code holds the anon_vma->lock, which the VMA
teardown code needs to take before the VMA is removed from the anon_vma
chain.

Hence, we should not need the VM_LOCK_RMAP locking at all.

Signed-off-by: Rik van Riel <riel@redhat.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  4 ----
 mm/mmap.c          | 15 ---------------
 mm/rmap.c          | 12 ------------
 3 files changed, 31 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8e2841a2f441..3899395a03de 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -97,11 +97,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_NORESERVE	0x00200000	/* should the VM suppress accounting */
 #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
 #define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
-#ifdef CONFIG_MMU
-#define VM_LOCK_RMAP	0x01000000	/* Do not follow this rmap (mmu mmap) */
-#else
 #define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
-#endif
 #define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
 #define VM_ALWAYSDUMP	0x04000000	/* Always include in core dumps */
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 6a0c15db7f60..f1b4448626bf 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -554,9 +554,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 		 */
 		if (importer && !importer->anon_vma) {
 			/* Block reverse map lookups until things are set up. */
-			importer->vm_flags |= VM_LOCK_RMAP;
 			if (anon_vma_clone(importer, vma)) {
-				importer->vm_flags &= ~VM_LOCK_RMAP;
 				return -ENOMEM;
 			}
 			importer->anon_vma = anon_vma;
@@ -618,11 +616,6 @@ again:			remove_next = 1 + (end > next->vm_end);
 		__vma_unlink(mm, next, vma);
 		if (file)
 			__remove_shared_vm_struct(next, file, mapping);
-		/*
-		 * This VMA is now dead, no need for rmap to follow it.
-		 * Call anon_vma_merge below, outside of i_mmap_lock.
-		 */
-		next->vm_flags |= VM_LOCK_RMAP;
 	} else if (insert) {
 		/*
 		 * split_vma has split insert from vma, and needs
@@ -635,20 +628,12 @@ again:			remove_next = 1 + (end > next->vm_end);
 	if (mapping)
 		spin_unlock(&mapping->i_mmap_lock);
 
-	/*
-	 * The current VMA has been set up. It is now safe for the
-	 * rmap code to get from the pages to the ptes.
-	 */
-	if (anon_vma && importer)
-		importer->vm_flags &= ~VM_LOCK_RMAP;
-
 	if (remove_next) {
 		if (file) {
 			fput(file);
 			if (next->vm_flags & VM_EXECUTABLE)
 				removed_exe_file_vma(mm);
 		}
-		/* Protected by mmap_sem and VM_LOCK_RMAP. */
 		if (next->anon_vma)
 			anon_vma_merge(vma, next);
 		mm->map_count--;
diff --git a/mm/rmap.c b/mm/rmap.c
index 28bcdc433d88..4d2fb93851ca 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -329,18 +329,6 @@ vma_address(struct page *page, struct vm_area_struct *vma)
 		/* page should be within @vma mapping range */
 		return -EFAULT;
 	}
-	if (unlikely(vma->vm_flags & VM_LOCK_RMAP)) {
-		/*
-		 * This VMA is being unlinked or is not yet linked into the
-		 * VMA tree.  Do not try to follow this rmap.  This race
-		 * condition can result in page_referenced() ignoring a
-		 * reference or in try_to_unmap() failing to unmap a page.
-		 * The VMA cannot be freed under us because we hold the
-		 * anon_vma->lock, which the munmap code takes while
-		 * unlinking the anon_vmas from the VMA.
-		 */
-		return -EFAULT;
-	}
 	return address;
 }
 
-- 
cgit v1.2.3


From 452aa6999e6703ffbddd7f6ea124d3968915f3e3 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 5 Mar 2010 13:42:13 -0800
Subject: mm/pm: force GFP_NOIO during suspend/hibernation and resume

There are quite a few GFP_KERNEL memory allocations made during
suspend/hibernation and resume that may cause the system to hang, because
the I/O operations they depend on cannot be completed due to the
underlying devices being suspended.

Avoid this problem by clearing the __GFP_IO and __GFP_FS bits in
gfp_allowed_mask before suspend/hibernation and restoring the original
values of these bits in gfp_allowed_mask durig the subsequent resume.

[akpm@linux-foundation.org: fix CONFIG_PM=n linkage]
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Reported-by: Maxim Levitsky <maximlevitsky@gmail.com>
Cc: Sebastian Ott <sebott@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h      |  7 +++----
 init/main.c              |  2 +-
 kernel/power/hibernate.c |  9 +++++++++
 kernel/power/suspend.c   |  3 +++
 mm/page_alloc.c          | 25 +++++++++++++++++++++++++
 5 files changed, 41 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index e5567e6762f3..2e1b32c0484d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -83,6 +83,7 @@ struct vm_area_struct;
 #define GFP_HIGHUSER_MOVABLE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
 				 __GFP_HARDWALL | __GFP_HIGHMEM | \
 				 __GFP_MOVABLE)
+#define GFP_IOFS	(__GFP_IO | __GFP_FS)
 
 #ifdef CONFIG_NUMA
 #define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
@@ -337,9 +338,7 @@ void drain_local_pages(void *dummy);
 
 extern gfp_t gfp_allowed_mask;
 
-static inline void set_gfp_allowed_mask(gfp_t mask)
-{
-	gfp_allowed_mask = mask;
-}
+extern void set_gfp_allowed_mask(gfp_t mask);
+extern gfp_t clear_gfp_allowed_mask(gfp_t mask);
 
 #endif /* __LINUX_GFP_H */
diff --git a/init/main.c b/init/main.c
index 40aaa020cd68..41d0f10dbbc7 100644
--- a/init/main.c
+++ b/init/main.c
@@ -618,7 +618,7 @@ asmlinkage void __init start_kernel(void)
 	local_irq_enable();
 
 	/* Interrupts are enabled now so all GFP allocations are safe. */
-	set_gfp_allowed_mask(__GFP_BITS_MASK);
+	gfp_allowed_mask = __GFP_BITS_MASK;
 
 	kmem_cache_init_late();
 
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index bbfe472d7524..da5288ec2392 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -323,6 +323,7 @@ static int create_image(int platform_mode)
 int hibernation_snapshot(int platform_mode)
 {
 	int error;
+	gfp_t saved_mask;
 
 	error = platform_begin(platform_mode);
 	if (error)
@@ -334,6 +335,7 @@ int hibernation_snapshot(int platform_mode)
 		goto Close;
 
 	suspend_console();
+	saved_mask = clear_gfp_allowed_mask(GFP_IOFS);
 	error = dpm_suspend_start(PMSG_FREEZE);
 	if (error)
 		goto Recover_platform;
@@ -351,6 +353,7 @@ int hibernation_snapshot(int platform_mode)
 
 	dpm_resume_end(in_suspend ?
 		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
+	set_gfp_allowed_mask(saved_mask);
 	resume_console();
  Close:
 	platform_end(platform_mode);
@@ -445,14 +448,17 @@ static int resume_target_kernel(bool platform_mode)
 int hibernation_restore(int platform_mode)
 {
 	int error;
+	gfp_t saved_mask;
 
 	pm_prepare_console();
 	suspend_console();
+	saved_mask = clear_gfp_allowed_mask(GFP_IOFS);
 	error = dpm_suspend_start(PMSG_QUIESCE);
 	if (!error) {
 		error = resume_target_kernel(platform_mode);
 		dpm_resume_end(PMSG_RECOVER);
 	}
+	set_gfp_allowed_mask(saved_mask);
 	resume_console();
 	pm_restore_console();
 	return error;
@@ -466,6 +472,7 @@ int hibernation_restore(int platform_mode)
 int hibernation_platform_enter(void)
 {
 	int error;
+	gfp_t saved_mask;
 
 	if (!hibernation_ops)
 		return -ENOSYS;
@@ -481,6 +488,7 @@ int hibernation_platform_enter(void)
 
 	entering_platform_hibernation = true;
 	suspend_console();
+	saved_mask = clear_gfp_allowed_mask(GFP_IOFS);
 	error = dpm_suspend_start(PMSG_HIBERNATE);
 	if (error) {
 		if (hibernation_ops->recover)
@@ -518,6 +526,7 @@ int hibernation_platform_enter(void)
  Resume_devices:
 	entering_platform_hibernation = false;
 	dpm_resume_end(PMSG_RESTORE);
+	set_gfp_allowed_mask(saved_mask);
 	resume_console();
 
  Close:
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 6f10dfc2d3e9..44cce10b582d 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -189,6 +189,7 @@ static int suspend_enter(suspend_state_t state)
 int suspend_devices_and_enter(suspend_state_t state)
 {
 	int error;
+	gfp_t saved_mask;
 
 	if (!suspend_ops)
 		return -ENOSYS;
@@ -199,6 +200,7 @@ int suspend_devices_and_enter(suspend_state_t state)
 			goto Close;
 	}
 	suspend_console();
+	saved_mask = clear_gfp_allowed_mask(GFP_IOFS);
 	suspend_test_start();
 	error = dpm_suspend_start(PMSG_SUSPEND);
 	if (error) {
@@ -215,6 +217,7 @@ int suspend_devices_and_enter(suspend_state_t state)
 	suspend_test_start();
 	dpm_resume_end(PMSG_RESUME);
 	suspend_test_finish("resume devices");
+	set_gfp_allowed_mask(saved_mask);
 	resume_console();
  Close:
 	if (suspend_ops->end)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0734bedabd9c..298f307c63a1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -76,6 +76,31 @@ unsigned long totalreserve_pages __read_mostly;
 int percpu_pagelist_fraction;
 gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
 
+#ifdef CONFIG_PM_SLEEP
+/*
+ * The following functions are used by the suspend/hibernate code to temporarily
+ * change gfp_allowed_mask in order to avoid using I/O during memory allocations
+ * while devices are suspended.  To avoid races with the suspend/hibernate code,
+ * they should always be called with pm_mutex held (gfp_allowed_mask also should
+ * only be modified with pm_mutex held, unless the suspend/hibernate code is
+ * guaranteed not to run in parallel with that modification).
+ */
+void set_gfp_allowed_mask(gfp_t mask)
+{
+	WARN_ON(!mutex_is_locked(&pm_mutex));
+	gfp_allowed_mask = mask;
+}
+
+gfp_t clear_gfp_allowed_mask(gfp_t mask)
+{
+	gfp_t ret = gfp_allowed_mask;
+
+	WARN_ON(!mutex_is_locked(&pm_mutex));
+	gfp_allowed_mask &= ~mask;
+	return ret;
+}
+#endif /* CONFIG_PM_SLEEP */
+
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
 int pageblock_order __read_mostly;
 #endif
-- 
cgit v1.2.3


From 645747462435d84c6c6a64269ed49cc3015f753d Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 5 Mar 2010 13:42:22 -0800
Subject: vmscan: detect mapped file pages used only once

The VM currently assumes that an inactive, mapped and referenced file page
is in use and promotes it to the active list.

However, every mapped file page starts out like this and thus a problem
arises when workloads create a stream of such pages that are used only for
a short time.  By flooding the active list with those pages, the VM
quickly gets into trouble finding eligible reclaim canditates.  The result
is long allocation latencies and eviction of the wrong pages.

This patch reuses the PG_referenced page flag (used for unmapped file
pages) to implement a usage detection that scales with the speed of LRU
list cycling (i.e.  memory pressure).

If the scanner encounters those pages, the flag is set and the page cycled
again on the inactive list.  Only if it returns with another page table
reference it is activated.  Otherwise it is reclaimed as 'not recently
used cache'.

This effectively changes the minimum lifetime of a used-once mapped file
page from a full memory cycle to an inactive list cycle, which allows it
to occur in linear streams without affecting the stable working set of the
system.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: OSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h |  2 +-
 mm/rmap.c            |  3 ---
 mm/vmscan.c          | 45 +++++++++++++++++++++++++++++++++++----------
 3 files changed, 36 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 72be23b1480a..d25bd224d370 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -209,7 +209,7 @@ static inline int page_referenced(struct page *page, int is_locked,
 				  unsigned long *vm_flags)
 {
 	*vm_flags = 0;
-	return TestClearPageReferenced(page);
+	return 0;
 }
 
 #define try_to_unmap(page, refs) SWAP_FAIL
diff --git a/mm/rmap.c b/mm/rmap.c
index 4d2fb93851ca..fcd593c9c997 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -601,9 +601,6 @@ int page_referenced(struct page *page,
 	int referenced = 0;
 	int we_locked = 0;
 
-	if (TestClearPageReferenced(page))
-		referenced++;
-
 	*vm_flags = 0;
 	if (page_mapped(page) && page_rmapping(page)) {
 		if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d9a0e0d3aac7..79c809895fba 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -561,18 +561,18 @@ redo:
 enum page_references {
 	PAGEREF_RECLAIM,
 	PAGEREF_RECLAIM_CLEAN,
+	PAGEREF_KEEP,
 	PAGEREF_ACTIVATE,
 };
 
 static enum page_references page_check_references(struct page *page,
 						  struct scan_control *sc)
 {
+	int referenced_ptes, referenced_page;
 	unsigned long vm_flags;
-	int referenced;
 
-	referenced = page_referenced(page, 1, sc->mem_cgroup, &vm_flags);
-	if (!referenced)
-		return PAGEREF_RECLAIM;
+	referenced_ptes = page_referenced(page, 1, sc->mem_cgroup, &vm_flags);
+	referenced_page = TestClearPageReferenced(page);
 
 	/* Lumpy reclaim - ignore references */
 	if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
@@ -585,11 +585,36 @@ static enum page_references page_check_references(struct page *page,
 	if (vm_flags & VM_LOCKED)
 		return PAGEREF_RECLAIM;
 
-	if (page_mapped(page))
-		return PAGEREF_ACTIVATE;
+	if (referenced_ptes) {
+		if (PageAnon(page))
+			return PAGEREF_ACTIVATE;
+		/*
+		 * All mapped pages start out with page table
+		 * references from the instantiating fault, so we need
+		 * to look twice if a mapped file page is used more
+		 * than once.
+		 *
+		 * Mark it and spare it for another trip around the
+		 * inactive list.  Another page table reference will
+		 * lead to its activation.
+		 *
+		 * Note: the mark is set for activated pages as well
+		 * so that recently deactivated but used pages are
+		 * quickly recovered.
+		 */
+		SetPageReferenced(page);
+
+		if (referenced_page)
+			return PAGEREF_ACTIVATE;
+
+		return PAGEREF_KEEP;
+	}
 
 	/* Reclaim if clean, defer dirty pages to writeback */
-	return PAGEREF_RECLAIM_CLEAN;
+	if (referenced_page)
+		return PAGEREF_RECLAIM_CLEAN;
+
+	return PAGEREF_RECLAIM;
 }
 
 /*
@@ -657,6 +682,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		switch (references) {
 		case PAGEREF_ACTIVATE:
 			goto activate_locked;
+		case PAGEREF_KEEP:
+			goto keep_locked;
 		case PAGEREF_RECLAIM:
 		case PAGEREF_RECLAIM_CLEAN:
 			; /* try to reclaim the page below */
@@ -1359,9 +1386,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 			continue;
 		}
 
-		/* page_referenced clears PageReferenced */
-		if (page_mapped(page) &&
-		    page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
+		if (page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
 			nr_rotated++;
 			/*
 			 * Identify referenced, file-backed active pages and
-- 
cgit v1.2.3


From 478352e789f507105193d3d0177c3b4f26da0399 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Fri, 5 Mar 2010 13:42:23 -0800
Subject: mm: add comment about deprecation of __GFP_NOFAIL

__GFP_NOFAIL was deprecated in dab48dab, so add a comment that no new
users should be added.

Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 2e1b32c0484d..4c6d41333f98 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -30,7 +30,8 @@ struct vm_area_struct;
  * _might_ fail.  This depends upon the particular VM implementation.
  *
  * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
- * cannot handle allocation failures.
+ * cannot handle allocation failures.  This modifier is deprecated and no new
+ * users should be added.
  *
  * __GFP_NORETRY: The VM implementation must not retry indefinitely.
  *
-- 
cgit v1.2.3


From 221e3ebf6d5f2625373573155924e39f196c5d3d Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 5 Mar 2010 13:42:41 -0800
Subject: cpumask: let num_*_cpus() function always return unsigned values

Dependent on CONFIG_SMP the num_*_cpus() functions return unsigned or
signed values.  Let them always return unsigned values to avoid strange
casts.

Fixes at least one warning:

 kernel/kprobes.c: In function 'register_kretprobe':
 kernel/kprobes.c:1038: warning: comparison of distinct pointer types lacks a cast

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpumask.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index dbcee7647d9a..bae6fe24d1f9 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -90,10 +90,10 @@ extern const struct cpumask *const cpu_active_mask;
 #define cpu_present(cpu)	cpumask_test_cpu((cpu), cpu_present_mask)
 #define cpu_active(cpu)		cpumask_test_cpu((cpu), cpu_active_mask)
 #else
-#define num_online_cpus()	1
-#define num_possible_cpus()	1
-#define num_present_cpus()	1
-#define num_active_cpus()	1
+#define num_online_cpus()	1U
+#define num_possible_cpus()	1U
+#define num_present_cpus()	1U
+#define num_active_cpus()	1U
 #define cpu_online(cpu)		((cpu) == 0)
 #define cpu_possible(cpu)	((cpu) == 0)
 #define cpu_present(cpu)	((cpu) == 0)
-- 
cgit v1.2.3


From 72c3368856c543ace033f6a5b9a3edf1f4043236 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Fri, 5 Mar 2010 13:42:43 -0800
Subject: nodemask.h: remove macro any_online_node

The macro any_online_node() is prone to producing sparse warnings due to
the local symbol 'node'.  Since all the in-tree users are really
requesting the first online node (the mask argument is either
NODE_MASK_ALL or node_online_map) just use the first_online_node macro and
remove the any_online_node macro since there are no users.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Acked-by: David Rientjes <rientjes@google.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Milton Miller <miltonm@bga.com>
Cc: Nathan Fontenot <nfont@austin.ibm.com>
Cc: Geoff Levand <geoffrey.levand@am.sony.com>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: J. Bruce Fields <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Benny Halevy <bhalevy@panasas.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/mm/numa.c   |  6 +++---
 include/linux/nodemask.h | 11 -----------
 net/sunrpc/svc.c         |  2 +-
 3 files changed, 4 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b037d95eeadc..64c00227b997 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -451,7 +451,7 @@ static int __cpuinit numa_setup_cpu(unsigned long lcpu)
 	nid = of_node_to_nid_single(cpu);
 
 	if (nid < 0 || !node_online(nid))
-		nid = any_online_node(NODE_MASK_ALL);
+		nid = first_online_node;
 out:
 	map_cpu_to_node(lcpu, nid);
 
@@ -1114,7 +1114,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
 	int nid, found = 0;
 
 	if (!numa_enabled || (min_common_depth < 0))
-		return any_online_node(NODE_MASK_ALL);
+		return first_online_node;
 
 	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
 	if (memory) {
@@ -1125,7 +1125,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
 	}
 
 	if (nid < 0 || !node_online(nid))
-		nid = any_online_node(NODE_MASK_ALL);
+		nid = first_online_node;
 
 	if (NODE_DATA(nid)->node_spanned_pages)
 		return nid;
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 454997cccbd8..c4fa64b585ff 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -69,8 +69,6 @@
  * int node_online(node)		Is some node online?
  * int node_possible(node)		Is some node possible?
  *
- * int any_online_node(mask)		First online node in mask
- *
  * node_set_online(node)		set bit 'node' in node_online_map
  * node_set_offline(node)		clear bit 'node' in node_online_map
  *
@@ -467,15 +465,6 @@ static inline int num_node_state(enum node_states state)
 #define node_online_map 	node_states[N_ONLINE]
 #define node_possible_map 	node_states[N_POSSIBLE]
 
-#define any_online_node(mask)			\
-({						\
-	int node;				\
-	for_each_node_mask(node, (mask))	\
-		if (node_online(node))		\
-			break;			\
-	node;					\
-})
-
 #define num_online_nodes()	num_node_state(N_ONLINE)
 #define num_possible_nodes()	num_node_state(N_POSSIBLE)
 #define node_online(node)	node_state((node), N_ONLINE)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6dcf8c9c784c..8420a4205b76 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -133,7 +133,7 @@ svc_pool_map_choose_mode(void)
 		return SVC_POOL_PERNODE;
 	}
 
-	node = any_online_node(node_online_map);
+	node = first_online_node;
 	if (nr_cpus_node(node) > 2) {
 		/*
 		 * Non-trivial SMP, or CONFIG_NUMA on
-- 
cgit v1.2.3


From cfd8d6c0ed89ba387609419e3d8d4c6b92a5d446 Mon Sep 17 00:00:00 2001
From: Rakib Mullick <rakib.mullick@gmail.com>
Date: Fri, 5 Mar 2010 13:42:45 -0800
Subject: smp: fix documentation in include/linux/smp.h

smp: Fix documentation.

Fix documentation in include/linux/smp.h: smp_processor_id()

Signed-off-by: Rakib Mullick <rakib.mullick@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/smp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 7a0570e6a596..cfa2d20e35f1 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -154,7 +154,7 @@ smp_call_function_any(const struct cpumask *mask, void (*func)(void *info),
 /*
  * smp_processor_id(): get the current CPU ID.
  *
- * if DEBUG_PREEMPT is enabled the we check whether it is
+ * if DEBUG_PREEMPT is enabled then we check whether it is
  * used in a preemption-safe way. (smp_processor_id() is safe
  * if it's used in a preemption-off critical section, or in
  * a thread that is bound to the current CPU.)
-- 
cgit v1.2.3


From 9a86e2bad0b9fbf3290ae496da6dab9536dd6bf7 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Fri, 5 Mar 2010 13:43:17 -0800
Subject: lib: fix first line of kernel-doc for a few functions

The function name must be followed by a space, hypen, space, and a short
description.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h | 6 +++---
 lib/bitmap.c         | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/list.h b/include/linux/list.h
index 5d9c6558e8ab..8392884a2977 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -498,7 +498,7 @@ static inline void list_splice_tail_init(struct list_head *list,
 	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
 
 /**
- * list_for_each_entry_safe_continue
+ * list_for_each_entry_safe_continue - continue list iteration safe against removal
  * @pos:	the type * to use as a loop cursor.
  * @n:		another type * to use as temporary storage
  * @head:	the head for your list.
@@ -514,7 +514,7 @@ static inline void list_splice_tail_init(struct list_head *list,
 	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
 
 /**
- * list_for_each_entry_safe_from
+ * list_for_each_entry_safe_from - iterate over list from current point safe against removal
  * @pos:	the type * to use as a loop cursor.
  * @n:		another type * to use as temporary storage
  * @head:	the head for your list.
@@ -529,7 +529,7 @@ static inline void list_splice_tail_init(struct list_head *list,
 	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
 
 /**
- * list_for_each_entry_safe_reverse
+ * list_for_each_entry_safe_reverse - iterate backwards over list safe against removal
  * @pos:	the type * to use as a loop cursor.
  * @n:		another type * to use as temporary storage
  * @head:	the head for your list.
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 11bf49750583..61998c5924fe 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -487,7 +487,7 @@ int __bitmap_parse(const char *buf, unsigned int buflen,
 EXPORT_SYMBOL(__bitmap_parse);
 
 /**
- * bitmap_parse_user()
+ * bitmap_parse_user - convert an ASCII hex string in a user buffer into a bitmap
  *
  * @ubuf: pointer to user buffer containing string.
  * @ulen: buffer size in bytes.  If string is smaller than this
@@ -619,7 +619,7 @@ int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
 EXPORT_SYMBOL(bitmap_parselist);
 
 /**
- * bitmap_pos_to_ord(buf, pos, bits)
+ * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
  *	@buf: pointer to a bitmap
  *	@pos: a bit position in @buf (0 <= @pos < @bits)
  *	@bits: number of valid bit positions in @buf
@@ -655,7 +655,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
 }
 
 /**
- * bitmap_ord_to_pos(buf, ord, bits)
+ * bitmap_ord_to_pos - find position of n-th set bit in bitmap
  *	@buf: pointer to bitmap
  *	@ord: ordinal bit position (n-th set bit, n >= 0)
  *	@bits: number of valid bit positions in @buf
-- 
cgit v1.2.3


From 3fb7fb4a01d09f81d1daaf65e52d929734bd691f Mon Sep 17 00:00:00 2001
From: Bing Zhao <bzhao@marvell.com>
Date: Fri, 5 Mar 2010 13:43:25 -0800
Subject: sdio: add quirk to clamp byte mode transfer

Some SDIO cards expect byte transfers not to exceed the configured block
transfer size.  Add a quirk to that effect.

Patches to make use of this quirk will be sent separately.

Signed-off-by: Bing Zhao <bzhao@marvell.com>
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/sdio_io.c | 7 ++++++-
 include/linux/mmc/card.h   | 7 +++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c
index f9aa8a7deffa..87c618904ee2 100644
--- a/drivers/mmc/core/sdio_io.c
+++ b/drivers/mmc/core/sdio_io.c
@@ -189,7 +189,12 @@ static inline unsigned int sdio_max_byte_size(struct sdio_func *func)
 {
 	unsigned mval =	min(func->card->host->max_seg_size,
 			    func->card->host->max_blk_size);
-	mval = min(mval, func->max_blksize);
+
+	if (mmc_blksz_for_byte_mode(func->card))
+		mval = min(mval, func->cur_blksize);
+	else
+		mval = min(mval, func->max_blksize);
+
 	return min(mval, 512u); /* maximum size for byte mode */
 }
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 2ee22e8af110..d02d2c6e0cfe 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -99,6 +99,8 @@ struct mmc_card {
 #define MMC_STATE_BLOCKADDR	(1<<3)		/* card uses block-addressing */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
+#define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
+						/* for byte mode */
 
 	u32			raw_cid[4];	/* raw card CID */
 	u32			raw_csd[4];	/* raw card CSD */
@@ -139,6 +141,11 @@ static inline int mmc_card_lenient_fn0(const struct mmc_card *c)
 	return c->quirks & MMC_QUIRK_LENIENT_FN0;
 }
 
+static inline int mmc_blksz_for_byte_mode(const struct mmc_card *c)
+{
+	return c->quirks & MMC_QUIRK_BLKSZ_FOR_BYTE_MODE;
+}
+
 #define mmc_card_name(c)	((c)->cid.prod_name)
 #define mmc_card_id(c)		(dev_name(&(c)->dev))
 
-- 
cgit v1.2.3


From da68c4eb258cd9f3f0b8aeb7e46b8118bb6358b6 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@fluxnic.net>
Date: Fri, 5 Mar 2010 13:43:31 -0800
Subject: sdio: introduce API for special power management features

This patch series provides the core changes needed to allow SDIO cards to
remain powered and active while the host system is suspended, and let them
wake up the host system when needed.  This is used to implement
wake-on-lan with SDIO wireless cards at the moment.  Patches to add that
support to the libertas driver will be posted separately.

This patch:

Some SDIO cards have the ability to keep on running autonomously when the
host system is suspended, and wake it up when needed.  This however
requires that the host controller preserve power to the card, and
configure itself appropriately for wake-up.

There is however 4 layers of abstractions involved: the host controller
driver, the MMC core code, the SDIO card management code, and the actual
SDIO function driver.  To make things simple and manageable, host drivers
must advertise their PM capabilities with a feature bitmask, then function
drivers can query and set those features from their suspend method.  Then
each layer in the suspend call chain is expected to act upon those bits
accordingly.

[akpm@linux-foundation.org: fix typo in comment]
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/core.c       | 12 ++++++++---
 drivers/mmc/core/sdio_io.c    | 49 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mmc/host.h      |  5 +++++
 include/linux/mmc/pm.h        | 30 ++++++++++++++++++++++++++
 include/linux/mmc/sdio_func.h |  5 +++++
 5 files changed, 98 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/mmc/pm.h

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 30acd5265821..f4b97d3c3d0f 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1151,6 +1151,9 @@ void mmc_stop_host(struct mmc_host *host)
 	cancel_delayed_work(&host->detect);
 	mmc_flush_scheduled_work();
 
+	/* clear pm flags now and let card drivers set them as needed */
+	host->pm_flags = 0;
+
 	mmc_bus_get(host);
 	if (host->bus_ops && !host->bus_dead) {
 		if (host->bus_ops->remove)
@@ -1273,12 +1276,13 @@ int mmc_suspend_host(struct mmc_host *host, pm_message_t state)
 			mmc_claim_host(host);
 			mmc_detach_bus(host);
 			mmc_release_host(host);
+			host->pm_flags = 0;
 			err = 0;
 		}
 	}
 	mmc_bus_put(host);
 
-	if (!err)
+	if (!err && !(host->pm_flags & MMC_PM_KEEP_POWER))
 		mmc_power_off(host);
 
 	return err;
@@ -1296,8 +1300,10 @@ int mmc_resume_host(struct mmc_host *host)
 
 	mmc_bus_get(host);
 	if (host->bus_ops && !host->bus_dead) {
-		mmc_power_up(host);
-		mmc_select_voltage(host, host->ocr);
+		if (!(host->pm_flags & MMC_PM_KEEP_POWER)) {
+			mmc_power_up(host);
+			mmc_select_voltage(host, host->ocr);
+		}
 		BUG_ON(!host->bus_ops->resume);
 		err = host->bus_ops->resume(host);
 		if (err) {
diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c
index 87c618904ee2..ff27c8c71355 100644
--- a/drivers/mmc/core/sdio_io.c
+++ b/drivers/mmc/core/sdio_io.c
@@ -640,3 +640,52 @@ void sdio_f0_writeb(struct sdio_func *func, unsigned char b, unsigned int addr,
 		*err_ret = ret;
 }
 EXPORT_SYMBOL_GPL(sdio_f0_writeb);
+
+/**
+ *	sdio_get_host_pm_caps - get host power management capabilities
+ *	@func: SDIO function attached to host
+ *
+ *	Returns a capability bitmask corresponding to power management
+ *	features supported by the host controller that the card function
+ *	might rely upon during a system suspend.  The host doesn't need
+ *	to be claimed, nor the function active, for this information to be
+ *	obtained.
+ */
+mmc_pm_flag_t sdio_get_host_pm_caps(struct sdio_func *func)
+{
+	BUG_ON(!func);
+	BUG_ON(!func->card);
+
+	return func->card->host->pm_caps;
+}
+EXPORT_SYMBOL_GPL(sdio_get_host_pm_caps);
+
+/**
+ *	sdio_set_host_pm_flags - set wanted host power management capabilities
+ *	@func: SDIO function attached to host
+ *
+ *	Set a capability bitmask corresponding to wanted host controller
+ *	power management features for the upcoming suspend state.
+ *	This must be called, if needed, each time the suspend method of
+ *	the function driver is called, and must contain only bits that
+ *	were returned by sdio_get_host_pm_caps().
+ *	The host doesn't need to be claimed, nor the function active,
+ *	for this information to be set.
+ */
+int sdio_set_host_pm_flags(struct sdio_func *func, mmc_pm_flag_t flags)
+{
+	struct mmc_host *host;
+
+	BUG_ON(!func);
+	BUG_ON(!func->card);
+
+	host = func->card->host;
+
+	if (flags & ~host->pm_caps)
+		return -EINVAL;
+
+	/* function suspend methods are serialized, hence no lock needed */
+	host->pm_flags |= flags;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(sdio_set_host_pm_flags);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index eaf36364b7d4..43eaf5ca5848 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 
 #include <linux/mmc/core.h>
+#include <linux/mmc/pm.h>
 
 struct mmc_ios {
 	unsigned int	clock;			/* clock rate */
@@ -152,6 +153,8 @@ struct mmc_host {
 #define MMC_CAP_NONREMOVABLE	(1 << 8)	/* Nonremovable e.g. eMMC */
 #define MMC_CAP_WAIT_WHILE_BUSY	(1 << 9)	/* Waits while card is busy */
 
+	mmc_pm_flag_t		pm_caps;	/* supported pm features */
+
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
 	unsigned short		max_hw_segs;	/* see blk_queue_max_hw_segments */
@@ -197,6 +200,8 @@ struct mmc_host {
 	struct task_struct	*sdio_irq_thread;
 	atomic_t		sdio_irq_thread_abort;
 
+	mmc_pm_flag_t		pm_flags;	/* requested pm features */
+
 #ifdef CONFIG_LEDS_TRIGGERS
 	struct led_trigger	*led;		/* activity led */
 #endif
diff --git a/include/linux/mmc/pm.h b/include/linux/mmc/pm.h
new file mode 100644
index 000000000000..d37aac49cf9a
--- /dev/null
+++ b/include/linux/mmc/pm.h
@@ -0,0 +1,30 @@
+/*
+ * linux/include/linux/mmc/pm.h
+ *
+ * Author:	Nicolas Pitre
+ * Copyright:	(C) 2009 Marvell Technology Group Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef LINUX_MMC_PM_H
+#define LINUX_MMC_PM_H
+
+/*
+ * These flags are used to describe power management features that
+ * some cards (typically SDIO cards) might wish to benefit from when
+ * the host system is being suspended.  There are several layers of
+ * abstractions involved, from the host controller driver, to the MMC core
+ * code, to the SDIO core code, to finally get to the actual SDIO function
+ * driver.  This file is therefore used for common definitions shared across
+ * all those layers.
+ */
+
+typedef unsigned int mmc_pm_flag_t;
+
+#define MMC_PM_KEEP_POWER	(1 << 0)	/* preserve card power during suspend */
+#define MMC_PM_WAKE_SDIO_IRQ	(1 << 1)	/* wake up host system on SDIO IRQ assertion */
+
+#endif
diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h
index ac3ab683fec6..c6c0cceba5fe 100644
--- a/include/linux/mmc/sdio_func.h
+++ b/include/linux/mmc/sdio_func.h
@@ -15,6 +15,8 @@
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
 
+#include <linux/mmc/pm.h>
+
 struct mmc_card;
 struct sdio_func;
 
@@ -153,5 +155,8 @@ extern unsigned char sdio_f0_readb(struct sdio_func *func,
 extern void sdio_f0_writeb(struct sdio_func *func, unsigned char b,
 	unsigned int addr, int *err_ret);
 
+extern mmc_pm_flag_t sdio_get_host_pm_caps(struct sdio_func *func);
+extern int sdio_set_host_pm_flags(struct sdio_func *func, mmc_pm_flag_t flags);
+
 #endif
 
-- 
cgit v1.2.3


From 6b5eda369ac3772dad416ef96d86064204d74770 Mon Sep 17 00:00:00 2001
From: Daniel Drake <dsd@laptop.org>
Date: Fri, 5 Mar 2010 13:43:34 -0800
Subject: sdio: put active devices into 1-bit mode during suspend

And bring them back to 4-bit mode during resume.

Signed-off-by: Daniel Drake <dsd@laptop.org>
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/sdio.c  | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mmc/sdio.h |  2 ++
 2 files changed, 45 insertions(+)

(limited to 'include')

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 5840de106b69..2dd4cfe7ca17 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -187,6 +187,40 @@ static int sdio_disable_cd(struct mmc_card *card)
 	return mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_IF, ctrl, NULL);
 }
 
+/*
+ * Devices that remain active during a system suspend are
+ * put back into 1-bit mode.
+ */
+static int sdio_disable_wide(struct mmc_card *card)
+{
+	int ret;
+	u8 ctrl;
+
+	if (!(card->host->caps & MMC_CAP_4_BIT_DATA))
+		return 0;
+
+	if (card->cccr.low_speed && !card->cccr.wide_bus)
+		return 0;
+
+	ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_IF, 0, &ctrl);
+	if (ret)
+		return ret;
+
+	if (!(ctrl & SDIO_BUS_WIDTH_4BIT))
+		return 0;
+
+	ctrl &= ~SDIO_BUS_WIDTH_4BIT;
+	ctrl |= SDIO_BUS_ASYNC_INT;
+
+	ret = mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_IF, ctrl, NULL);
+	if (ret)
+		return ret;
+
+	mmc_set_bus_width(card->host, MMC_BUS_WIDTH_1);
+
+	return 0;
+}
+
 /*
  * Test if the card supports high-speed mode and, if so, switch to it.
  */
@@ -427,6 +461,12 @@ static int mmc_sdio_suspend(struct mmc_host *host)
 		}
 	}
 
+	if (!err && host->pm_flags & MMC_PM_KEEP_POWER) {
+		mmc_claim_host(host);
+		sdio_disable_wide(host->card);
+		mmc_release_host(host);
+	}
+
 	return err;
 }
 
@@ -441,6 +481,9 @@ static int mmc_sdio_resume(struct mmc_host *host)
 	mmc_claim_host(host);
 	err = mmc_sdio_init_card(host, host->ocr, host->card,
 				 (host->pm_flags & MMC_PM_KEEP_POWER));
+	if (!err)
+		/* We may have switched to 1-bit mode during suspend. */
+		err = sdio_enable_wide(host->card);
 	if (!err && host->sdio_irqs)
 		mmc_signal_sdio_irq(host);
 	mmc_release_host(host);
diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h
index 47ba464f5170..0ebaef577ff5 100644
--- a/include/linux/mmc/sdio.h
+++ b/include/linux/mmc/sdio.h
@@ -95,6 +95,8 @@
 #define  SDIO_BUS_WIDTH_1BIT	0x00
 #define  SDIO_BUS_WIDTH_4BIT	0x02
 
+#define  SDIO_BUS_ASYNC_INT	0x20
+
 #define  SDIO_BUS_CD_DISABLE     0x80	/* disable pull-up on DAT3 (pin 1) */
 
 #define SDIO_CCCR_CAPS		0x08
-- 
cgit v1.2.3


From 088e7af73a962fcc8883b7a6392544d8342553d6 Mon Sep 17 00:00:00 2001
From: Daisuke HATAYAMA <d.hatayama@jp.fujitsu.com>
Date: Fri, 5 Mar 2010 13:44:06 -0800
Subject: coredump: move dump_write() and dump_seek() into a header file

My next patch will replace ELF_CORE_EXTRA_* macros by functions, putting
them into other newly created *.c files.  Then, each files will contain
dump_write(), where each pair of binfmt_*.c and elfcore.c should be the
same.  So, this patch moves them into a header file with dump_seek().
Also, the patch deletes confusing DUMP_WRITE macros in each files.

Signed-off-by: Daisuke HATAYAMA <d.hatayama@jp.fujitsu.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Greg Ungerer <gerg@snapgear.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_aout.c         | 49 ++++++++-----------------------------------
 fs/binfmt_elf.c          | 52 +++++++++++++---------------------------------
 fs/binfmt_elf_fdpic.c    | 54 ++++++++++++++----------------------------------
 include/linux/coredump.h | 41 ++++++++++++++++++++++++++++++++++++
 4 files changed, 79 insertions(+), 117 deletions(-)
 create mode 100644 include/linux/coredump.h

(limited to 'include')

diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index d2f8872dd767..15d80bb35d6f 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -24,6 +24,7 @@
 #include <linux/binfmts.h>
 #include <linux/personality.h>
 #include <linux/init.h>
+#include <linux/coredump.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -59,42 +60,6 @@ static int set_brk(unsigned long start, unsigned long end)
 	return 0;
 }
 
-/*
- * These are the only things you should do on a core-file: use only these
- * macros to write out all the necessary info.
- */
-
-static int dump_write(struct file *file, const void *addr, int nr)
-{
-	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-
-static int dump_seek(struct file *file, loff_t off)
-{
-	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
-		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
-			return 0;
-	} else {
-		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-		if (!buf)
-			return 0;
-		while (off > 0) {
-			unsigned long n = off;
-			if (n > PAGE_SIZE)
-				n = PAGE_SIZE;
-			if (!dump_write(file, buf, n))
-				return 0;
-			off -= n;
-		}
-		free_page((unsigned long)buf);
-	}
-	return 1;
-}
-
-#define DUMP_WRITE(addr, nr)	\
-	if (!dump_write(file, (void *)(addr), (nr))) \
-		goto end_coredump;
-
 /*
  * Routine writes a core dump image in the current directory.
  * Currently only a stub-function.
@@ -146,7 +111,8 @@ static int aout_core_dump(struct coredump_params *cprm)
 
 	set_fs(KERNEL_DS);
 /* struct user */
-	DUMP_WRITE(&dump,sizeof(dump));
+	if (!dump_write(file, &dump, sizeof(dump)))
+		goto end_coredump;
 /* Now dump all of the user data.  Include malloced stuff as well */
 	if (!dump_seek(cprm->file, PAGE_SIZE - sizeof(dump)))
 		goto end_coredump;
@@ -156,17 +122,20 @@ static int aout_core_dump(struct coredump_params *cprm)
 	if (dump.u_dsize != 0) {
 		dump_start = START_DATA(dump);
 		dump_size = dump.u_dsize << PAGE_SHIFT;
-		DUMP_WRITE(dump_start,dump_size);
+		if (!dump_write(file, dump_start, dump_size))
+			goto end_coredump;
 	}
 /* Now prepare to dump the stack area */
 	if (dump.u_ssize != 0) {
 		dump_start = START_STACK(dump);
 		dump_size = dump.u_ssize << PAGE_SHIFT;
-		DUMP_WRITE(dump_start,dump_size);
+		if (!dump_write(file, dump_start, dump_size))
+			goto end_coredump;
 	}
 /* Finally dump the task struct.  Not be used by gdb, but could be useful */
 	set_fs(KERNEL_DS);
-	DUMP_WRITE(current,sizeof(*current));
+	if (!dump_write(file, current, sizeof(*current)))
+		goto end_coredump;
 end_coredump:
 	set_fs(fs);
 	return has_dumped;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index fd5b2ea5d299..0bcfbb05c32d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -31,6 +31,7 @@
 #include <linux/random.h>
 #include <linux/elf.h>
 #include <linux/utsname.h>
+#include <linux/coredump.h>
 #include <asm/uaccess.h>
 #include <asm/param.h>
 #include <asm/page.h>
@@ -1085,36 +1086,6 @@ out:
  * Modelled on fs/exec.c:aout_core_dump()
  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
  */
-/*
- * These are the only things you should do on a core-file: use only these
- * functions to write out all the necessary info.
- */
-static int dump_write(struct file *file, const void *addr, int nr)
-{
-	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-
-static int dump_seek(struct file *file, loff_t off)
-{
-	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
-		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
-			return 0;
-	} else {
-		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-		if (!buf)
-			return 0;
-		while (off > 0) {
-			unsigned long n = off;
-			if (n > PAGE_SIZE)
-				n = PAGE_SIZE;
-			if (!dump_write(file, buf, n))
-				return 0;
-			off -= n;
-		}
-		free_page((unsigned long)buf);
-	}
-	return 1;
-}
 
 /*
  * Decide what to dump of a segment, part, all or none.
@@ -1249,11 +1220,6 @@ static int writenote(struct memelfnote *men, struct file *file,
 }
 #undef DUMP_WRITE
 
-#define DUMP_WRITE(addr, nr)				\
-	if ((size += (nr)) > cprm->limit ||		\
-	    !dump_write(cprm->file, (addr), (nr)))	\
-		goto end_coredump;
-
 static void fill_elf_header(struct elfhdr *elf, int segs,
 			    u16 machine, u32 flags, u8 osabi)
 {
@@ -1934,7 +1900,10 @@ static int elf_core_dump(struct coredump_params *cprm)
 	fs = get_fs();
 	set_fs(KERNEL_DS);
 
-	DUMP_WRITE(elf, sizeof(*elf));
+	size += sizeof(*elf);
+	if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
+		goto end_coredump;
+
 	offset += sizeof(*elf);				/* Elf header */
 	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
 	foffset = offset;
@@ -1948,7 +1917,11 @@ static int elf_core_dump(struct coredump_params *cprm)
 
 		fill_elf_note_phdr(&phdr, sz, offset);
 		offset += sz;
-		DUMP_WRITE(&phdr, sizeof(phdr));
+
+		size += sizeof(phdr);
+		if (size > cprm->limit
+		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
+			goto end_coredump;
 	}
 
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
@@ -1979,7 +1952,10 @@ static int elf_core_dump(struct coredump_params *cprm)
 			phdr.p_flags |= PF_X;
 		phdr.p_align = ELF_EXEC_PAGESIZE;
 
-		DUMP_WRITE(&phdr, sizeof(phdr));
+		size += sizeof(phdr);
+		if (size > cprm->limit
+		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
+			goto end_coredump;
 	}
 
 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 32d9b44c3cb9..63edf40b569b 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -34,6 +34,7 @@
 #include <linux/elf.h>
 #include <linux/elf-fdpic.h>
 #include <linux/elfcore.h>
+#include <linux/coredump.h>
 
 #include <asm/uaccess.h>
 #include <asm/param.h>
@@ -1215,37 +1216,6 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
  */
 #ifdef CONFIG_ELF_CORE
 
-/*
- * These are the only things you should do on a core-file: use only these
- * functions to write out all the necessary info.
- */
-static int dump_write(struct file *file, const void *addr, int nr)
-{
-	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-
-static int dump_seek(struct file *file, loff_t off)
-{
-	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
-		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
-			return 0;
-	} else {
-		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-		if (!buf)
-			return 0;
-		while (off > 0) {
-			unsigned long n = off;
-			if (n > PAGE_SIZE)
-				n = PAGE_SIZE;
-			if (!dump_write(file, buf, n))
-				return 0;
-			off -= n;
-		}
-		free_page((unsigned long)buf);
-	}
-	return 1;
-}
-
 /*
  * Decide whether a segment is worth dumping; default is yes to be
  * sure (missing info is worse than too much; etc).
@@ -1354,11 +1324,6 @@ static int writenote(struct memelfnote *men, struct file *file,
 }
 #undef DUMP_WRITE
 
-#define DUMP_WRITE(addr, nr)				\
-	if ((size += (nr)) > cprm->limit ||		\
-	    !dump_write(cprm->file, (addr), (nr)))	\
-		goto end_coredump;
-
 static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
 {
 	memcpy(elf->e_ident, ELFMAG, SELFMAG);
@@ -1743,7 +1708,11 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	fs = get_fs();
 	set_fs(KERNEL_DS);
 
-	DUMP_WRITE(elf, sizeof(*elf));
+	size += sizeof(*elf);
+	if (size > cprm->limit
+	    || !dump_write(cprm->file, elf, sizeof(*elf)))
+		goto end_coredump;
+
 	offset += sizeof(*elf);				/* Elf header */
 	offset += (segs+1) * sizeof(struct elf_phdr);	/* Program headers */
 	foffset = offset;
@@ -1760,7 +1729,11 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 
 		fill_elf_note_phdr(&phdr, sz, offset);
 		offset += sz;
-		DUMP_WRITE(&phdr, sizeof(phdr));
+
+		size += sizeof(phdr);
+		if (size > cprm->limit
+		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
+			goto end_coredump;
 	}
 
 	/* Page-align dumped data */
@@ -1794,7 +1767,10 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 			phdr.p_flags |= PF_X;
 		phdr.p_align = ELF_EXEC_PAGESIZE;
 
-		DUMP_WRITE(&phdr, sizeof(phdr));
+		size += sizeof(phdr);
+		if (size > cprm->limit
+		    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
+			goto end_coredump;
 	}
 
 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
new file mode 100644
index 000000000000..b3c91d7cede4
--- /dev/null
+++ b/include/linux/coredump.h
@@ -0,0 +1,41 @@
+#ifndef _LINUX_COREDUMP_H
+#define _LINUX_COREDUMP_H
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+
+/*
+ * These are the only things you should do on a core-file: use only these
+ * functions to write out all the necessary info.
+ */
+static inline int dump_write(struct file *file, const void *addr, int nr)
+{
+	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+}
+
+static inline int dump_seek(struct file *file, loff_t off)
+{
+	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
+			return 0;
+	} else {
+		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
+
+		if (!buf)
+			return 0;
+		while (off > 0) {
+			unsigned long n = off;
+
+			if (n > PAGE_SIZE)
+				n = PAGE_SIZE;
+			if (!dump_write(file, buf, n))
+				return 0;
+			off -= n;
+		}
+		free_page((unsigned long)buf);
+	}
+	return 1;
+}
+
+#endif /* _LINUX_COREDUMP_H */
-- 
cgit v1.2.3


From 1fcccbac89f5bbc5e41aa72086960059fce372da Mon Sep 17 00:00:00 2001
From: Daisuke HATAYAMA <d.hatayama@jp.fujitsu.com>
Date: Fri, 5 Mar 2010 13:44:07 -0800
Subject: elf coredump: replace ELF_CORE_EXTRA_* macros by functions

elf_core_dump() and elf_fdpic_core_dump() use #ifdef and the corresponding
macro for hiding _multiline_ logics in functions.  This patch removes
#ifdef and replaces ELF_CORE_EXTRA_* by corresponding functions.  For
architectures not implemeonting ELF_CORE_EXTRA_*, we use weak functions in
order to reduce a range of modification.

This cleanup is for my next patches, but I think this cleanup itself is
worth doing regardless of my firnal purpose.

Signed-off-by: Daisuke HATAYAMA <d.hatayama@jp.fujitsu.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Greg Ungerer <gerg@snapgear.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/asm/elf.h | 48 --------------------------------
 arch/ia64/kernel/Makefile   |  2 ++
 arch/ia64/kernel/elfcore.c  | 64 +++++++++++++++++++++++++++++++++++++++++++
 arch/um/sys-i386/Makefile   |  2 ++
 arch/um/sys-i386/asm/elf.h  | 43 -----------------------------
 arch/um/sys-i386/elfcore.c  | 67 +++++++++++++++++++++++++++++++++++++++++++++
 fs/binfmt_elf.c             | 14 ++++------
 fs/binfmt_elf_fdpic.c       | 14 ++++------
 fs/compat_binfmt_elf.c      |  2 ++
 include/linux/elf.h         |  2 ++
 include/linux/elfcore.h     | 16 +++++++++++
 kernel/Makefile             |  3 ++
 kernel/elfcore.c            | 23 ++++++++++++++++
 13 files changed, 191 insertions(+), 109 deletions(-)
 create mode 100644 arch/ia64/kernel/elfcore.c
 create mode 100644 arch/um/sys-i386/elfcore.c
 create mode 100644 kernel/elfcore.c

(limited to 'include')

diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h
index 4c41656ede87..b5298eb09adb 100644
--- a/arch/ia64/include/asm/elf.h
+++ b/arch/ia64/include/asm/elf.h
@@ -219,54 +219,6 @@ do {										\
 	NEW_AUX_ENT(AT_SYSINFO_EHDR, (unsigned long) GATE_EHDR);		\
 } while (0)
 
-
-/*
- * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
- * extra segments containing the gate DSO contents.  Dumping its
- * contents makes post-mortem fully interpretable later without matching up
- * the same kernel and hardware config to see what PC values meant.
- * Dumping its extra ELF program headers includes all the other information
- * a debugger needs to easily find how the gate DSO was being used.
- */
-#define ELF_CORE_EXTRA_PHDRS		(GATE_EHDR->e_phnum)
-#define ELF_CORE_WRITE_EXTRA_PHDRS						\
-do {										\
-	const struct elf_phdr *const gate_phdrs =			      \
-		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);   \
-	int i;									\
-	Elf64_Off ofs = 0;						      \
-	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {				\
-		struct elf_phdr phdr = gate_phdrs[i];			      \
-		if (phdr.p_type == PT_LOAD) {					\
-			phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz);	      \
-			phdr.p_filesz = phdr.p_memsz;			      \
-			if (ofs == 0) {					      \
-				ofs = phdr.p_offset = offset;		      \
-			offset += phdr.p_filesz;				\
-		}							      \
-		else							      \
-				phdr.p_offset = ofs;			      \
-		}							      \
-		else							      \
-			phdr.p_offset += ofs;					\
-		phdr.p_paddr = 0; /* match other core phdrs */			\
-		DUMP_WRITE(&phdr, sizeof(phdr));				\
-	}									\
-} while (0)
-#define ELF_CORE_WRITE_EXTRA_DATA					\
-do {									\
-	const struct elf_phdr *const gate_phdrs =			      \
-		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);   \
-	int i;								\
-	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {			\
-		if (gate_phdrs[i].p_type == PT_LOAD) {			      \
-			DUMP_WRITE((void *) gate_phdrs[i].p_vaddr,	      \
-				   PAGE_ALIGN(gate_phdrs[i].p_memsz));	      \
-			break;						      \
-		}							      \
-	}								\
-} while (0)
-
 /*
  * format for entries in the Global Offset Table
  */
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 4138282aefa8..db10b1e378b0 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -45,6 +45,8 @@ endif
 obj-$(CONFIG_DMAR)		+= pci-dma.o
 obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb.o
 
+obj-$(CONFIG_BINFMT_ELF)	+= elfcore.o
+
 # fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
 CFLAGS_traps.o  += -mfixed-range=f2-f5,f16-f31
 
diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
new file mode 100644
index 000000000000..57a2298a8581
--- /dev/null
+++ b/arch/ia64/kernel/elfcore.c
@@ -0,0 +1,64 @@
+#include <linux/elf.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include <asm/elf.h>
+
+
+Elf64_Half elf_core_extra_phdrs(void)
+{
+	return GATE_EHDR->e_phnum;
+}
+
+int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
+			       unsigned long limit)
+{
+	const struct elf_phdr *const gate_phdrs =
+		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+	int i;
+	Elf64_Off ofs = 0;
+
+	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+		struct elf_phdr phdr = gate_phdrs[i];
+
+		if (phdr.p_type == PT_LOAD) {
+			phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz);
+			phdr.p_filesz = phdr.p_memsz;
+			if (ofs == 0) {
+				ofs = phdr.p_offset = offset;
+				offset += phdr.p_filesz;
+			} else {
+				phdr.p_offset = ofs;
+			}
+		} else {
+			phdr.p_offset += ofs;
+		}
+		phdr.p_paddr = 0; /* match other core phdrs */
+		*size += sizeof(phdr);
+		if (*size > limit || !dump_write(file, &phdr, sizeof(phdr)))
+			return 0;
+	}
+	return 1;
+}
+
+int elf_core_write_extra_data(struct file *file, size_t *size,
+			      unsigned long limit)
+{
+	const struct elf_phdr *const gate_phdrs =
+		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+	int i;
+
+	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+		if (gate_phdrs[i].p_type == PT_LOAD) {
+			void *addr = (void *)gate_phdrs[i].p_vaddr;
+			size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz);
+
+			*size += memsz;
+			if (*size > limit || !dump_write(file, addr, memsz))
+				return 0;
+			break;
+		}
+	}
+	return 1;
+}
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 1b549bca4645..804b28dd0328 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -6,6 +6,8 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
 	ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \
 	sys_call_table.o tls.o
 
+obj-$(CONFIG_BINFMT_ELF) += elfcore.o
+
 subarch-obj-y = lib/semaphore_32.o lib/string_32.o
 subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o
 subarch-obj-$(CONFIG_MODULES) += kernel/module.o
diff --git a/arch/um/sys-i386/asm/elf.h b/arch/um/sys-i386/asm/elf.h
index 770885472ed4..e64cd41d7bab 100644
--- a/arch/um/sys-i386/asm/elf.h
+++ b/arch/um/sys-i386/asm/elf.h
@@ -116,47 +116,4 @@ do {								\
 	}							\
 } while (0)
 
-/*
- * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
- * extra segments containing the vsyscall DSO contents.  Dumping its
- * contents makes post-mortem fully interpretable later without matching up
- * the same kernel and hardware config to see what PC values meant.
- * Dumping its extra ELF program headers includes all the other information
- * a debugger needs to easily find how the vsyscall DSO was being used.
- */
-#define ELF_CORE_EXTRA_PHDRS						      \
-	(vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0 )
-
-#define ELF_CORE_WRITE_EXTRA_PHDRS					      \
-if ( vsyscall_ehdr ) {							      \
-	const struct elfhdr *const ehdrp = (struct elfhdr *)vsyscall_ehdr;    \
-	const struct elf_phdr *const phdrp =				      \
-		(const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);   \
-	int i;								      \
-	Elf32_Off ofs = 0;						      \
-	for (i = 0; i < ehdrp->e_phnum; ++i) {				      \
-		struct elf_phdr phdr = phdrp[i];			      \
-		if (phdr.p_type == PT_LOAD) {				      \
-			ofs = phdr.p_offset = offset;			      \
-			offset += phdr.p_filesz;			      \
-		}							      \
-		else							      \
-			phdr.p_offset += ofs;				      \
-		phdr.p_paddr = 0; /* match other core phdrs */		      \
-		DUMP_WRITE(&phdr, sizeof(phdr));			      \
-	}								      \
-}
-#define ELF_CORE_WRITE_EXTRA_DATA					      \
-if ( vsyscall_ehdr ) {							      \
-	const struct elfhdr *const ehdrp = (struct elfhdr *)vsyscall_ehdr;    \
-	const struct elf_phdr *const phdrp =				      \
-		(const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);   \
-	int i;								      \
-	for (i = 0; i < ehdrp->e_phnum; ++i) {				      \
-		if (phdrp[i].p_type == PT_LOAD)				      \
-			DUMP_WRITE((void *) phdrp[i].p_vaddr,		      \
-				   phdrp[i].p_filesz);			      \
-	}								      \
-}
-
 #endif
diff --git a/arch/um/sys-i386/elfcore.c b/arch/um/sys-i386/elfcore.c
new file mode 100644
index 000000000000..30cac52a04b4
--- /dev/null
+++ b/arch/um/sys-i386/elfcore.c
@@ -0,0 +1,67 @@
+#include <linux/elf.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include <asm/elf.h>
+
+
+Elf32_Half elf_core_extra_phdrs(void)
+{
+	return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0;
+}
+
+int elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
+			       unsigned long limit)
+{
+	if ( vsyscall_ehdr ) {
+		const struct elfhdr *const ehdrp =
+			(struct elfhdr *) vsyscall_ehdr;
+		const struct elf_phdr *const phdrp =
+			(const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
+		int i;
+		Elf32_Off ofs = 0;
+
+		for (i = 0; i < ehdrp->e_phnum; ++i) {
+			struct elf_phdr phdr = phdrp[i];
+
+			if (phdr.p_type == PT_LOAD) {
+				ofs = phdr.p_offset = offset;
+				offset += phdr.p_filesz;
+			} else {
+				phdr.p_offset += ofs;
+			}
+			phdr.p_paddr = 0; /* match other core phdrs */
+			*size += sizeof(phdr);
+			if (*size > limit
+			    || !dump_write(file, &phdr, sizeof(phdr)))
+				return 0;
+		}
+	}
+	return 1;
+}
+
+int elf_core_write_extra_data(struct file *file, size_t *size,
+			      unsigned long limit)
+{
+	if ( vsyscall_ehdr ) {
+		const struct elfhdr *const ehdrp =
+			(struct elfhdr *) vsyscall_ehdr;
+		const struct elf_phdr *const phdrp =
+			(const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
+		int i;
+
+		for (i = 0; i < ehdrp->e_phnum; ++i) {
+			if (phdrp[i].p_type == PT_LOAD) {
+				void *addr = (void *) phdrp[i].p_vaddr;
+				size_t filesz = phdrp[i].p_filesz;
+
+				*size += filesz;
+				if (*size > limit
+				    || !dump_write(file, addr, filesz))
+					return 0;
+			}
+		}
+	}
+	return 1;
+}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0bcfbb05c32d..c1a499599b7d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1878,9 +1878,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
 	 */
 	segs = current->mm->map_count;
-#ifdef ELF_CORE_EXTRA_PHDRS
-	segs += ELF_CORE_EXTRA_PHDRS;
-#endif
+	segs += elf_core_extra_phdrs();
 
 	gate_vma = get_gate_vma(current);
 	if (gate_vma != NULL)
@@ -1958,9 +1956,8 @@ static int elf_core_dump(struct coredump_params *cprm)
 			goto end_coredump;
 	}
 
-#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
-	ELF_CORE_WRITE_EXTRA_PHDRS;
-#endif
+	if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
+		goto end_coredump;
 
  	/* write out the notes section */
 	if (!write_note_info(&info, cprm->file, &foffset))
@@ -1999,9 +1996,8 @@ static int elf_core_dump(struct coredump_params *cprm)
 		}
 	}
 
-#ifdef ELF_CORE_WRITE_EXTRA_DATA
-	ELF_CORE_WRITE_EXTRA_DATA;
-#endif
+	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
+		goto end_coredump;
 
 end_coredump:
 	set_fs(fs);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 63edf40b569b..952699a86ec3 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1664,9 +1664,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);
 
 	segs = current->mm->map_count;
-#ifdef ELF_CORE_EXTRA_PHDRS
-	segs += ELF_CORE_EXTRA_PHDRS;
-#endif
+	segs += elf_core_extra_phdrs();
 
 	/* Set up header */
 	fill_elf_fdpic_header(elf, segs + 1);	/* including notes section */
@@ -1773,9 +1771,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 			goto end_coredump;
 	}
 
-#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
-	ELF_CORE_WRITE_EXTRA_PHDRS;
-#endif
+	if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
+		goto end_coredump;
 
  	/* write out the notes section */
 	for (i = 0; i < numnote; i++)
@@ -1799,9 +1796,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 				    mm_flags) < 0)
 		goto end_coredump;
 
-#ifdef ELF_CORE_WRITE_EXTRA_DATA
-	ELF_CORE_WRITE_EXTRA_DATA;
-#endif
+	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
+		goto end_coredump;
 
 	if (cprm->file->f_pos != offset) {
 		/* Sanity check */
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index 0adced2f296f..112e45a17e99 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -28,10 +28,12 @@
 
 #undef	elfhdr
 #undef	elf_phdr
+#undef	elf_shdr
 #undef	elf_note
 #undef	elf_addr_t
 #define elfhdr		elf32_hdr
 #define elf_phdr	elf32_phdr
+#define elf_shdr	elf32_shdr
 #define elf_note	elf32_note
 #define elf_addr_t	Elf32_Addr
 
diff --git a/include/linux/elf.h b/include/linux/elf.h
index ad990c5f63f6..ccde3fd45f36 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -396,6 +396,7 @@ extern Elf32_Dyn _DYNAMIC [];
 #define elf_phdr	elf32_phdr
 #define elf_note	elf32_note
 #define elf_addr_t	Elf32_Off
+#define Elf_Half	Elf32_Half
 
 #else
 
@@ -404,6 +405,7 @@ extern Elf64_Dyn _DYNAMIC [];
 #define elf_phdr	elf64_phdr
 #define elf_note	elf64_note
 #define elf_addr_t	Elf64_Off
+#define Elf_Half	Elf64_Half
 
 #endif
 
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 00d6a68d0421..cfda74f521b5 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -8,6 +8,8 @@
 #include <linux/user.h>
 #endif
 #include <linux/ptrace.h>
+#include <linux/elf.h>
+#include <linux/fs.h>
 
 struct elf_siginfo
 {
@@ -150,5 +152,19 @@ static inline int elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregse
 
 #endif /* __KERNEL__ */
 
+/*
+ * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out
+ * extra segments containing the gate DSO contents.  Dumping its
+ * contents makes post-mortem fully interpretable later without matching up
+ * the same kernel and hardware config to see what PC values meant.
+ * Dumping its extra ELF program headers includes all the other information
+ * a debugger needs to easily find how the gate DSO was being used.
+ */
+extern Elf_Half elf_core_extra_phdrs(void);
+extern int
+elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
+			   unsigned long limit);
+extern int
+elf_core_write_extra_data(struct file *file, size_t *size, unsigned long limit);
 
 #endif /* _LINUX_ELFCORE_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index 7b974699f8c2..a987aa1676b5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -91,6 +91,9 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
 obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
 obj-$(CONFIG_LATENCYTOP) += latencytop.o
+obj-$(CONFIG_BINFMT_ELF) += elfcore.o
+obj-$(CONFIG_COMPAT_BINFMT_ELF) += elfcore.o
+obj-$(CONFIG_BINFMT_ELF_FDPIC) += elfcore.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace/
 obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_X86_DS) += trace/
diff --git a/kernel/elfcore.c b/kernel/elfcore.c
new file mode 100644
index 000000000000..5445741f4b4c
--- /dev/null
+++ b/kernel/elfcore.c
@@ -0,0 +1,23 @@
+#include <linux/elf.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include <asm/elf.h>
+
+
+Elf_Half __weak elf_core_extra_phdrs(void)
+{
+	return 0;
+}
+
+int __weak elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
+				      unsigned long limit)
+{
+	return 1;
+}
+
+int __weak elf_core_write_extra_data(struct file *file, size_t *size,
+				     unsigned long limit)
+{
+	return 1;
+}
-- 
cgit v1.2.3


From 8d9032bbe4671dc481261ccd4e161cd96e54b118 Mon Sep 17 00:00:00 2001
From: Daisuke HATAYAMA <d.hatayama@jp.fujitsu.com>
Date: Fri, 5 Mar 2010 13:44:10 -0800
Subject: elf coredump: add extended numbering support

The current ELF dumper implementation can produce broken corefiles if
program headers exceed 65535.  This number is determined by the number of
vmas which the process have.  In particular, some extreme programs may use
more than 65535 vmas.  (If you google max_map_count, you can find some
users facing this problem.) This kind of program never be able to generate
correct coredumps.

This patch implements ``extended numbering'' that uses sh_info field of
the first section header instead of e_phnum field in order to represent
upto 4294967295 vmas.

This is supported by
AMD64-ABI(http://www.x86-64.org/documentation.html) and
Solaris(http://docs.sun.com/app/docs/doc/817-1984/).
Of course, we are preparing patches for gdb and binutils.

Signed-off-by: Daisuke HATAYAMA <d.hatayama@jp.fujitsu.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Greg Ungerer <gerg@snapgear.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/elfcore.c | 16 +++++++++++
 arch/um/sys-i386/elfcore.c | 16 +++++++++++
 fs/binfmt_elf.c            | 66 +++++++++++++++++++++++++++++++++++++++++++---
 fs/binfmt_elf_fdpic.c      | 63 +++++++++++++++++++++++++++++++++++++++++--
 include/linux/elf.h        | 26 +++++++++++++++++-
 include/linux/elfcore.h    |  1 +
 kernel/elfcore.c           |  5 ++++
 7 files changed, 187 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
index 57a2298a8581..bac1639bc320 100644
--- a/arch/ia64/kernel/elfcore.c
+++ b/arch/ia64/kernel/elfcore.c
@@ -62,3 +62,19 @@ int elf_core_write_extra_data(struct file *file, size_t *size,
 	}
 	return 1;
 }
+
+size_t elf_core_extra_data_size(void)
+{
+	const struct elf_phdr *const gate_phdrs =
+		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+	int i;
+	size_t size = 0;
+
+	for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+		if (gate_phdrs[i].p_type == PT_LOAD) {
+			size += PAGE_ALIGN(gate_phdrs[i].p_memsz);
+			break;
+		}
+	}
+	return size;
+}
diff --git a/arch/um/sys-i386/elfcore.c b/arch/um/sys-i386/elfcore.c
index 30cac52a04b4..6bb49b687c97 100644
--- a/arch/um/sys-i386/elfcore.c
+++ b/arch/um/sys-i386/elfcore.c
@@ -65,3 +65,19 @@ int elf_core_write_extra_data(struct file *file, size_t *size,
 	}
 	return 1;
 }
+
+size_t elf_core_extra_data_size(void)
+{
+	if ( vsyscall_ehdr ) {
+		const struct elfhdr *const ehdrp =
+			(struct elfhdr *)vsyscall_ehdr;
+		const struct elf_phdr *const phdrp =
+			(const struct elf_phdr *) (vsyscall_ehdr + ehdrp->e_phoff);
+		int i;
+
+		for (i = 0; i < ehdrp->e_phnum; ++i)
+			if (phdrp[i].p_type == PT_LOAD)
+				return (size_t) phdrp[i].p_filesz;
+	}
+	return 0;
+}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6fc49b6ed936..78de530cfb02 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1838,6 +1838,34 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
 	return gate_vma;
 }
 
+static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
+			     elf_addr_t e_shoff, int segs)
+{
+	elf->e_shoff = e_shoff;
+	elf->e_shentsize = sizeof(*shdr4extnum);
+	elf->e_shnum = 1;
+	elf->e_shstrndx = SHN_UNDEF;
+
+	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
+
+	shdr4extnum->sh_type = SHT_NULL;
+	shdr4extnum->sh_size = elf->e_shnum;
+	shdr4extnum->sh_link = elf->e_shstrndx;
+	shdr4extnum->sh_info = segs;
+}
+
+static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
+				     unsigned long mm_flags)
+{
+	struct vm_area_struct *vma;
+	size_t size = 0;
+
+	for (vma = first_vma(current, gate_vma); vma != NULL;
+	     vma = next_vma(vma, gate_vma))
+		size += vma_dump_size(vma, mm_flags);
+	return size;
+}
+
 /*
  * Actual dumper
  *
@@ -1857,6 +1885,9 @@ static int elf_core_dump(struct coredump_params *cprm)
 	unsigned long mm_flags;
 	struct elf_note_info info;
 	struct elf_phdr *phdr4note = NULL;
+	struct elf_shdr *shdr4extnum = NULL;
+	Elf_Half e_phnum;
+	elf_addr_t e_shoff;
 
 	/*
 	 * We no longer stop all VM operations.
@@ -1885,12 +1916,19 @@ static int elf_core_dump(struct coredump_params *cprm)
 	if (gate_vma != NULL)
 		segs++;
 
+	/* for notes section */
+	segs++;
+
+	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
+	 * this, kernel supports extended numbering. Have a look at
+	 * include/linux/elf.h for further information. */
+	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
+
 	/*
 	 * Collect all the non-memory information about the process for the
 	 * notes.  This also sets up the file header.
 	 */
-	if (!fill_note_info(elf, segs + 1, /* including notes section */
-			    &info, cprm->signr, cprm->regs))
+	if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
 		goto cleanup;
 
 	has_dumped = 1;
@@ -1900,7 +1938,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	set_fs(KERNEL_DS);
 
 	offset += sizeof(*elf);				/* Elf header */
-	offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
+	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
 	foffset = offset;
 
 	/* Write notes phdr entry */
@@ -1926,6 +1964,19 @@ static int elf_core_dump(struct coredump_params *cprm)
 	 */
 	mm_flags = current->mm->flags;
 
+	offset += elf_core_vma_data_size(gate_vma, mm_flags);
+	offset += elf_core_extra_data_size();
+	e_shoff = offset;
+
+	if (e_phnum == PN_XNUM) {
+		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
+		if (!shdr4extnum)
+			goto end_coredump;
+		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
+	}
+
+	offset = dataoff;
+
 	size += sizeof(*elf);
 	if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
 		goto end_coredump;
@@ -2003,11 +2054,20 @@ static int elf_core_dump(struct coredump_params *cprm)
 	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
 		goto end_coredump;
 
+	if (e_phnum == PN_XNUM) {
+		size += sizeof(*shdr4extnum);
+		if (size > cprm->limit
+		    || !dump_write(cprm->file, shdr4extnum,
+				   sizeof(*shdr4extnum)))
+			goto end_coredump;
+	}
+
 end_coredump:
 	set_fs(fs);
 
 cleanup:
 	free_note_info(&info);
+	kfree(shdr4extnum);
 	kfree(phdr4note);
 	kfree(elf);
 out:
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 112da491d75d..e49d9c06a4b6 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1505,6 +1505,22 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
 	return sz;
 }
 
+static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
+			     elf_addr_t e_shoff, int segs)
+{
+	elf->e_shoff = e_shoff;
+	elf->e_shentsize = sizeof(*shdr4extnum);
+	elf->e_shnum = 1;
+	elf->e_shstrndx = SHN_UNDEF;
+
+	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
+
+	shdr4extnum->sh_type = SHT_NULL;
+	shdr4extnum->sh_size = elf->e_shnum;
+	shdr4extnum->sh_link = elf->e_shstrndx;
+	shdr4extnum->sh_info = segs;
+}
+
 /*
  * dump the segments for an MMU process
  */
@@ -1569,6 +1585,17 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
 }
 #endif
 
+static size_t elf_core_vma_data_size(unsigned long mm_flags)
+{
+	struct vm_area_struct *vma;
+	size_t size = 0;
+
+	for (vma = current->mm->mmap; vma; vma->vm_next)
+		if (maydump(vma, mm_flags))
+			size += vma->vm_end - vma->vm_start;
+	return size;
+}
+
 /*
  * Actual dumper
  *
@@ -1601,6 +1628,9 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	elf_addr_t *auxv;
 	unsigned long mm_flags;
 	struct elf_phdr *phdr4note = NULL;
+	struct elf_shdr *shdr4extnum = NULL;
+	Elf_Half e_phnum;
+	elf_addr_t e_shoff;
 
 	/*
 	 * We no longer stop all VM operations.
@@ -1667,8 +1697,16 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	segs = current->mm->map_count;
 	segs += elf_core_extra_phdrs();
 
+	/* for notes section */
+	segs++;
+
+	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
+	 * this, kernel supports extended numbering. Have a look at
+	 * include/linux/elf.h for further information. */
+	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
+
 	/* Set up header */
-	fill_elf_fdpic_header(elf, segs + 1);	/* including notes section */
+	fill_elf_fdpic_header(elf, e_phnum);
 
 	has_dumped = 1;
 	current->flags |= PF_DUMPCORE;
@@ -1708,7 +1746,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	set_fs(KERNEL_DS);
 
 	offset += sizeof(*elf);				/* Elf header */
-	offset += (segs+1) * sizeof(struct elf_phdr);	/* Program headers */
+	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
 	foffset = offset;
 
 	/* Write notes phdr entry */
@@ -1738,6 +1776,19 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	 */
 	mm_flags = current->mm->flags;
 
+	offset += elf_core_vma_data_size(mm_flags);
+	offset += elf_core_extra_data_size();
+	e_shoff = offset;
+
+	if (e_phnum == PN_XNUM) {
+		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
+		if (!shdr4extnum)
+			goto end_coredump;
+		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
+	}
+
+	offset = dataoff;
+
 	size += sizeof(*elf);
 	if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
 		goto end_coredump;
@@ -1802,6 +1853,14 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
 		goto end_coredump;
 
+	if (e_phnum == PN_XNUM) {
+		size += sizeof(*shdr4extnum);
+		if (size > cprm->limit
+		    || !dump_write(cprm->file, shdr4extnum,
+				   sizeof(*shdr4extnum)))
+			goto end_coredump;
+	}
+
 	if (cprm->file->f_pos != offset) {
 		/* Sanity check */
 		printk(KERN_WARNING
diff --git a/include/linux/elf.h b/include/linux/elf.h
index ccde3fd45f36..597858418051 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -50,6 +50,28 @@ typedef __s64	Elf64_Sxword;
 
 #define PT_GNU_STACK	(PT_LOOS + 0x474e551)
 
+/*
+ * Extended Numbering
+ *
+ * If the real number of program header table entries is larger than
+ * or equal to PN_XNUM(0xffff), it is set to sh_info field of the
+ * section header at index 0, and PN_XNUM is set to e_phnum
+ * field. Otherwise, the section header at index 0 is zero
+ * initialized, if it exists.
+ *
+ * Specifications are available in:
+ *
+ * - Sun microsystems: Linker and Libraries.
+ *   Part No: 817-1984-17, September 2008.
+ *   URL: http://docs.sun.com/app/docs/doc/817-1984
+ *
+ * - System V ABI AMD64 Architecture Processor Supplement
+ *   Draft Version 0.99.,
+ *   May 11, 2009.
+ *   URL: http://www.x86-64.org/
+ */
+#define PN_XNUM 0xffff
+
 /* These constants define the different elf file types */
 #define ET_NONE   0
 #define ET_REL    1
@@ -286,7 +308,7 @@ typedef struct elf64_phdr {
 #define SHN_COMMON	0xfff2
 #define SHN_HIRESERVE	0xffff
  
-typedef struct {
+typedef struct elf32_shdr {
   Elf32_Word	sh_name;
   Elf32_Word	sh_type;
   Elf32_Word	sh_flags;
@@ -394,6 +416,7 @@ typedef struct elf64_note {
 extern Elf32_Dyn _DYNAMIC [];
 #define elfhdr		elf32_hdr
 #define elf_phdr	elf32_phdr
+#define elf_shdr	elf32_shdr
 #define elf_note	elf32_note
 #define elf_addr_t	Elf32_Off
 #define Elf_Half	Elf32_Half
@@ -403,6 +426,7 @@ extern Elf32_Dyn _DYNAMIC [];
 extern Elf64_Dyn _DYNAMIC [];
 #define elfhdr		elf64_hdr
 #define elf_phdr	elf64_phdr
+#define elf_shdr	elf64_shdr
 #define elf_note	elf64_note
 #define elf_addr_t	Elf64_Off
 #define Elf_Half	Elf64_Half
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index cfda74f521b5..e687bc3ba4da 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -166,5 +166,6 @@ elf_core_write_extra_phdrs(struct file *file, loff_t offset, size_t *size,
 			   unsigned long limit);
 extern int
 elf_core_write_extra_data(struct file *file, size_t *size, unsigned long limit);
+extern size_t elf_core_extra_data_size(void);
 
 #endif /* _LINUX_ELFCORE_H */
diff --git a/kernel/elfcore.c b/kernel/elfcore.c
index 5445741f4b4c..ff915efef66d 100644
--- a/kernel/elfcore.c
+++ b/kernel/elfcore.c
@@ -21,3 +21,8 @@ int __weak elf_core_write_extra_data(struct file *file, size_t *size,
 {
 	return 1;
 }
+
+size_t __weak elf_core_extra_data_size(void)
+{
+	return 0;
+}
-- 
cgit v1.2.3


From 30736a4d43f4af7f1a7836d6a266be17082195c4 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Fri, 5 Mar 2010 13:44:12 -0800
Subject: coredump: pass mm->flags as a coredump parameter for consistency

Pass mm->flags as a coredump parameter for consistency.

 ---
1787         if (mm->core_state || !get_dumpable(mm)) {  <- (1)
1788                 up_write(&mm->mmap_sem);
1789                 put_cred(cred);
1790                 goto fail;
1791         }
1792
[...]
1798         if (get_dumpable(mm) == 2) {    /* Setuid core dump mode */ <-(2)
1799                 flag = O_EXCL;          /* Stop rewrite attacks */
1800                 cred->fsuid = 0;        /* Dump root private */
1801         }
 ---

Since dumpable bits are not protected by lock, there is a chance to change
these bits between (1) and (2).

To solve this issue, this patch copies mm->flags to
coredump_params.mm_flags at the beginning of do_coredump() and uses it
instead of get_dumpable() while dumping core.

This copy is also passed to binfmt->core_dump, since elf*_core_dump() uses
dump_filter bits in mm->flags.

[akpm@linux-foundation.org: fix merge]
Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c         | 14 +++-----------
 fs/binfmt_elf_fdpic.c   | 14 +++-----------
 fs/exec.c               | 20 ++++++++++++++++----
 include/linux/binfmts.h |  1 +
 4 files changed, 23 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 78de530cfb02..535e763ab1a6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1882,7 +1882,6 @@ static int elf_core_dump(struct coredump_params *cprm)
 	struct vm_area_struct *vma, *gate_vma;
 	struct elfhdr *elf = NULL;
 	loff_t offset = 0, dataoff, foffset;
-	unsigned long mm_flags;
 	struct elf_note_info info;
 	struct elf_phdr *phdr4note = NULL;
 	struct elf_shdr *shdr4extnum = NULL;
@@ -1957,14 +1956,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
 
-	/*
-	 * We must use the same mm->flags while dumping core to avoid
-	 * inconsistency between the program headers and bodies, otherwise an
-	 * unusable core file can be generated.
-	 */
-	mm_flags = current->mm->flags;
-
-	offset += elf_core_vma_data_size(gate_vma, mm_flags);
+	offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
 	offset += elf_core_extra_data_size();
 	e_shoff = offset;
 
@@ -1995,7 +1987,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 		phdr.p_offset = offset;
 		phdr.p_vaddr = vma->vm_start;
 		phdr.p_paddr = 0;
-		phdr.p_filesz = vma_dump_size(vma, mm_flags);
+		phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
 		phdr.p_memsz = vma->vm_end - vma->vm_start;
 		offset += phdr.p_filesz;
 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
@@ -2030,7 +2022,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 		unsigned long addr;
 		unsigned long end;
 
-		end = vma->vm_start + vma_dump_size(vma, mm_flags);
+		end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
 
 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
 			struct page *page;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index e49d9c06a4b6..6d6a16c5e9bb 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1626,7 +1626,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 #endif
 	int thread_status_size = 0;
 	elf_addr_t *auxv;
-	unsigned long mm_flags;
 	struct elf_phdr *phdr4note = NULL;
 	struct elf_shdr *shdr4extnum = NULL;
 	Elf_Half e_phnum;
@@ -1769,14 +1768,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	/* Page-align dumped data */
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
 
-	/*
-	 * We must use the same mm->flags while dumping core to avoid
-	 * inconsistency between the program headers and bodies, otherwise an
-	 * unusable core file can be generated.
-	 */
-	mm_flags = current->mm->flags;
-
-	offset += elf_core_vma_data_size(mm_flags);
+	offset += elf_core_vma_data_size(cprm->mm_flags);
 	offset += elf_core_extra_data_size();
 	e_shoff = offset;
 
@@ -1809,7 +1801,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 		phdr.p_offset = offset;
 		phdr.p_vaddr = vma->vm_start;
 		phdr.p_paddr = 0;
-		phdr.p_filesz = maydump(vma, mm_flags) ? sz : 0;
+		phdr.p_filesz = maydump(vma, cprm->mm_flags) ? sz : 0;
 		phdr.p_memsz = sz;
 		offset += phdr.p_filesz;
 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
@@ -1847,7 +1839,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 		goto end_coredump;
 
 	if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
-				    mm_flags) < 0)
+				    cprm->mm_flags) < 0)
 		goto end_coredump;
 
 	if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
diff --git a/fs/exec.c b/fs/exec.c
index da2b31dc4e1c..89d4080c1435 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1748,14 +1748,19 @@ void set_dumpable(struct mm_struct *mm, int value)
 	}
 }
 
-int get_dumpable(struct mm_struct *mm)
+static int __get_dumpable(unsigned long mm_flags)
 {
 	int ret;
 
-	ret = mm->flags & 0x3;
+	ret = mm_flags & MMF_DUMPABLE_MASK;
 	return (ret >= 2) ? 2 : ret;
 }
 
+int get_dumpable(struct mm_struct *mm)
+{
+	return __get_dumpable(mm->flags);
+}
+
 static void wait_for_dump_helpers(struct file *file)
 {
 	struct pipe_inode_info *pipe;
@@ -1799,6 +1804,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 		.signr = signr,
 		.regs = regs,
 		.limit = rlimit(RLIMIT_CORE),
+		/*
+		 * We must use the same mm->flags while dumping core to avoid
+		 * inconsistency of bit flags, since this flag is not protected
+		 * by any locks.
+		 */
+		.mm_flags = mm->flags,
 	};
 
 	audit_core_dumps(signr);
@@ -1817,7 +1828,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	/*
 	 * If another thread got here first, or we are not dumpable, bail out.
 	 */
-	if (mm->core_state || !get_dumpable(mm)) {
+	if (mm->core_state || !__get_dumpable(cprm.mm_flags)) {
 		up_write(&mm->mmap_sem);
 		put_cred(cred);
 		goto fail;
@@ -1828,7 +1839,8 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	 *	process nor do we know its entire history. We only know it
 	 *	was tainted so we dump it as root in mode 2.
 	 */
-	if (get_dumpable(mm) == 2) {	/* Setuid core dump mode */
+	if (__get_dumpable(cprm.mm_flags) == 2) {
+		/* Setuid core dump mode */
 		flag = O_EXCL;		/* Stop rewrite attacks */
 		cred->fsuid = 0;	/* Dump root private */
 	}
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 89c6249fc561..c809e286d213 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -74,6 +74,7 @@ struct coredump_params {
 	struct pt_regs *regs;
 	struct file *file;
 	unsigned long limit;
+	unsigned long mm_flags;
 };
 
 /*
-- 
cgit v1.2.3


From 57205026da070b59e9546df352fe465f1aeacf99 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Fri, 5 Mar 2010 13:44:25 -0800
Subject: mc13783: rename mc13783_{{un,}mask,ack_irq} to have a mc13783_irq
 prefix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the source file group these functions together.

The mc13783 header file provides fallback implementations for the old
names to prevent build failures.  When all users of the old names are
fixed to use the new names these can go away.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Paul Gortmaker <p_gortmaker@yahoo.com>
Cc: Valentin Longchamp <valentin.longchamp@epfl.ch>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Luotao Fu <l.fu@pengutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/mc13783-core.c  | 38 +++++++++++++++++++-------------------
 include/linux/mfd/mc13783.h | 24 +++++++++++++++++++++---
 2 files changed, 40 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/mc13783-core.c b/drivers/mfd/mc13783-core.c
index 735c8a4d164f..a2bd44552042 100644
--- a/drivers/mfd/mc13783-core.c
+++ b/drivers/mfd/mc13783-core.c
@@ -225,7 +225,7 @@ int mc13783_reg_rmw(struct mc13783 *mc13783, unsigned int offset,
 }
 EXPORT_SYMBOL(mc13783_reg_rmw);
 
-int mc13783_mask(struct mc13783 *mc13783, int irq)
+int mc13783_irq_mask(struct mc13783 *mc13783, int irq)
 {
 	int ret;
 	unsigned int offmask = irq < 24 ? MC13783_IRQMASK0 : MC13783_IRQMASK1;
@@ -245,9 +245,9 @@ int mc13783_mask(struct mc13783 *mc13783, int irq)
 
 	return mc13783_reg_write(mc13783, offmask, mask | irqbit);
 }
-EXPORT_SYMBOL(mc13783_mask);
+EXPORT_SYMBOL(mc13783_irq_mask);
 
-int mc13783_unmask(struct mc13783 *mc13783, int irq)
+int mc13783_irq_unmask(struct mc13783 *mc13783, int irq)
 {
 	int ret;
 	unsigned int offmask = irq < 24 ? MC13783_IRQMASK0 : MC13783_IRQMASK1;
@@ -267,7 +267,18 @@ int mc13783_unmask(struct mc13783 *mc13783, int irq)
 
 	return mc13783_reg_write(mc13783, offmask, mask & ~irqbit);
 }
-EXPORT_SYMBOL(mc13783_unmask);
+EXPORT_SYMBOL(mc13783_irq_unmask);
+
+int mc13783_irq_ack(struct mc13783 *mc13783, int irq)
+{
+	unsigned int offstat = irq < 24 ? MC13783_IRQSTAT0 : MC13783_IRQSTAT1;
+	unsigned int val = 1 << (irq < 24 ? irq : irq - 24);
+
+	BUG_ON(irq < 0 || irq >= MC13783_NUM_IRQ);
+
+	return mc13783_reg_write(mc13783, offstat, val);
+}
+EXPORT_SYMBOL(mc13783_irq_ack);
 
 int mc13783_irq_request_nounmask(struct mc13783 *mc13783, int irq,
 		irq_handler_t handler, const char *name, void *dev)
@@ -297,7 +308,7 @@ int mc13783_irq_request(struct mc13783 *mc13783, int irq,
 	if (ret)
 		return ret;
 
-	ret = mc13783_unmask(mc13783, irq);
+	ret = mc13783_irq_unmask(mc13783, irq);
 	if (ret) {
 		mc13783->irqhandler[irq] = NULL;
 		mc13783->irqdata[irq] = NULL;
@@ -317,7 +328,7 @@ int mc13783_irq_free(struct mc13783 *mc13783, int irq, void *dev)
 			mc13783->irqdata[irq] != dev)
 		return -EINVAL;
 
-	ret = mc13783_mask(mc13783, irq);
+	ret = mc13783_irq_mask(mc13783, irq);
 	if (ret)
 		return ret;
 
@@ -333,17 +344,6 @@ static inline irqreturn_t mc13783_irqhandler(struct mc13783 *mc13783, int irq)
 	return mc13783->irqhandler[irq](irq, mc13783->irqdata[irq]);
 }
 
-int mc13783_ackirq(struct mc13783 *mc13783, int irq)
-{
-	unsigned int offstat = irq < 24 ? MC13783_IRQSTAT0 : MC13783_IRQSTAT1;
-	unsigned int val = 1 << (irq < 24 ? irq : irq - 24);
-
-	BUG_ON(irq < 0 || irq >= MC13783_NUM_IRQ);
-
-	return mc13783_reg_write(mc13783, offstat, val);
-}
-EXPORT_SYMBOL(mc13783_ackirq);
-
 /*
  * returns: number of handled irqs or negative error
  * locking: holds mc13783->lock
@@ -422,7 +422,7 @@ static irqreturn_t mc13783_handler_adcdone(int irq, void *data)
 {
 	struct mc13783_adcdone_data *adcdone_data = data;
 
-	mc13783_ackirq(adcdone_data->mc13783, irq);
+	mc13783_irq_ack(adcdone_data->mc13783, irq);
 
 	complete_all(&adcdone_data->done);
 
@@ -486,7 +486,7 @@ int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode,
 	dev_dbg(&mc13783->spidev->dev, "%s: request irq\n", __func__);
 	mc13783_irq_request(mc13783, MC13783_IRQ_ADCDONE,
 			mc13783_handler_adcdone, __func__, &adcdone_data);
-	mc13783_ackirq(mc13783, MC13783_IRQ_ADCDONE);
+	mc13783_irq_ack(mc13783, MC13783_IRQ_ADCDONE);
 
 	mc13783_reg_write(mc13783, MC13783_REG_ADC_0, adc0);
 	mc13783_reg_write(mc13783, MC13783_REG_ADC_1, adc1);
diff --git a/include/linux/mfd/mc13783.h b/include/linux/mfd/mc13783.h
index 94cb51a64037..b8b9f3b4f3e2 100644
--- a/include/linux/mfd/mc13783.h
+++ b/include/linux/mfd/mc13783.h
@@ -26,10 +26,28 @@ int mc13783_irq_request(struct mc13783 *mc13783, int irq,
 int mc13783_irq_request_nounmask(struct mc13783 *mc13783, int irq,
 		irq_handler_t handler, const char *name, void *dev);
 int mc13783_irq_free(struct mc13783 *mc13783, int irq, void *dev);
-int mc13783_ackirq(struct mc13783 *mc13783, int irq);
 
-int mc13783_mask(struct mc13783 *mc13783, int irq);
-int mc13783_unmask(struct mc13783 *mc13783, int irq);
+int mc13783_irq_mask(struct mc13783 *mc13783, int irq);
+int mc13783_irq_unmask(struct mc13783 *mc13783, int irq);
+int mc13783_irq_ack(struct mc13783 *mc13783, int irq);
+
+static inline int mc13783_mask(struct mc13783 *mc13783, int irq) __deprecated;
+static inline int mc13783_mask(struct mc13783 *mc13783, int irq)
+{
+	return mc13783_irq_mask(mc13783, irq);
+}
+
+static inline int mc13783_unmask(struct mc13783 *mc13783, int irq) __deprecated;
+static inline int mc13783_unmask(struct mc13783 *mc13783, int irq)
+{
+	return mc13783_irq_unmask(mc13783, irq);
+}
+
+static inline int mc13783_ackirq(struct mc13783 *mc13783, int irq) __deprecated;
+static inline int mc13783_ackirq(struct mc13783 *mc13783, int irq)
+{
+	return mc13783_irq_ack(mc13783, irq);
+}
 
 #define MC13783_ADC0		43
 #define MC13783_ADC0_ADREFEN		(1 << 10)
-- 
cgit v1.2.3


From 86c3400810a7a33e176bf33b6b074d881e829374 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Fri, 5 Mar 2010 13:44:29 -0800
Subject: mfd/mc13783: new function reading irq mask and status register
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver for the mc13783 rtc needs to know if the TODA irq is pending.

Instead of tracking in the rtc driver if the irq is enabled provide that
information, too.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Paul Gortmaker <p_gortmaker@yahoo.com>
Cc: Valentin Longchamp <valentin.longchamp@epfl.ch>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Luotao Fu <l.fu@pengutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/mc13783-core.c  | 35 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/mc13783.h |  2 ++
 2 files changed, 37 insertions(+)

(limited to 'include')

diff --git a/drivers/mfd/mc13783-core.c b/drivers/mfd/mc13783-core.c
index a2bd44552042..62a847e4c2d8 100644
--- a/drivers/mfd/mc13783-core.c
+++ b/drivers/mfd/mc13783-core.c
@@ -269,6 +269,41 @@ int mc13783_irq_unmask(struct mc13783 *mc13783, int irq)
 }
 EXPORT_SYMBOL(mc13783_irq_unmask);
 
+int mc13783_irq_status(struct mc13783 *mc13783, int irq,
+		int *enabled, int *pending)
+{
+	int ret;
+	unsigned int offmask = irq < 24 ? MC13783_IRQMASK0 : MC13783_IRQMASK1;
+	unsigned int offstat = irq < 24 ? MC13783_IRQSTAT0 : MC13783_IRQSTAT1;
+	u32 irqbit = 1 << (irq < 24 ? irq : irq - 24);
+
+	if (irq < 0 || irq >= MC13783_NUM_IRQ)
+		return -EINVAL;
+
+	if (enabled) {
+		u32 mask;
+
+		ret = mc13783_reg_read(mc13783, offmask, &mask);
+		if (ret)
+			return ret;
+
+		*enabled = mask & irqbit;
+	}
+
+	if (pending) {
+		u32 stat;
+
+		ret = mc13783_reg_read(mc13783, offstat, &stat);
+		if (ret)
+			return ret;
+
+		*pending = stat & irqbit;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(mc13783_irq_status);
+
 int mc13783_irq_ack(struct mc13783 *mc13783, int irq)
 {
 	unsigned int offstat = irq < 24 ? MC13783_IRQSTAT0 : MC13783_IRQSTAT1;
diff --git a/include/linux/mfd/mc13783.h b/include/linux/mfd/mc13783.h
index b8b9f3b4f3e2..8895d9d8879c 100644
--- a/include/linux/mfd/mc13783.h
+++ b/include/linux/mfd/mc13783.h
@@ -29,6 +29,8 @@ int mc13783_irq_free(struct mc13783 *mc13783, int irq, void *dev);
 
 int mc13783_irq_mask(struct mc13783 *mc13783, int irq);
 int mc13783_irq_unmask(struct mc13783 *mc13783, int irq);
+int mc13783_irq_status(struct mc13783 *mc13783, int irq,
+		int *enabled, int *pending);
 int mc13783_irq_ack(struct mc13783 *mc13783, int irq);
 
 static inline int mc13783_mask(struct mc13783 *mc13783, int irq) __deprecated;
-- 
cgit v1.2.3


From e952805d2d2e706aed182723e5ab3ec0b1f91de3 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 5 Mar 2010 13:44:33 -0800
Subject: gpio: add driver for MAX7300 I2C GPIO extender

Add the MAX7300-I2C variant of the MAX7301-SPI version.  Both chips share
the same core logic, so the generic part of the in-kernel SPI-driver is
refactored into a generic part.  The I2C and SPI specific funtions are
then wrapped into seperate drivers picking up the generic part.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: Juergen Beisert <j.beisert@pengutronix.de>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |  13 +-
 drivers/gpio/Makefile       |   2 +
 drivers/gpio/max7300.c      |  94 ++++++++++++++
 drivers/gpio/max7301.c      | 293 +++++---------------------------------------
 drivers/gpio/max730x.c      | 244 ++++++++++++++++++++++++++++++++++++
 include/linux/spi/max7301.h |  18 +++
 6 files changed, 404 insertions(+), 260 deletions(-)
 create mode 100644 drivers/gpio/max7300.c
 create mode 100644 drivers/gpio/max730x.c

(limited to 'include')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 1f1d88ae68d6..f3549b8779d8 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -65,6 +65,9 @@ config GPIO_SYSFS
 
 # put expanders in the right section, in alphabetical order
 
+config GPIO_MAX730X
+	tristate
+
 comment "Memory mapped GPIO expanders:"
 
 config GPIO_PL061
@@ -87,6 +90,13 @@ config GPIO_VR41XX
 
 comment "I2C GPIO expanders:"
 
+config GPIO_MAX7300
+	tristate "Maxim MAX7300 GPIO expander"
+	depends on I2C
+	select GPIO_MAX730X
+	help
+	  GPIO driver for Maxim MAX7301 I2C-based GPIO expander.
+
 config GPIO_MAX732X
 	tristate "MAX7319, MAX7320-7327 I2C Port Expanders"
 	depends on I2C
@@ -226,8 +236,9 @@ comment "SPI GPIO expanders:"
 config GPIO_MAX7301
 	tristate "Maxim MAX7301 GPIO expander"
 	depends on SPI_MASTER
+	select GPIO_MAX730X
 	help
-	  gpio driver for Maxim MAX7301 SPI GPIO expander.
+	  GPIO driver for Maxim MAX7301 SPI-based GPIO expander.
 
 config GPIO_MCP23S08
 	tristate "Microchip MCP23S08 I/O expander"
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 48687238edb1..508a1b202cdb 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -7,6 +7,8 @@ obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 obj-$(CONFIG_GPIO_ADP5520)	+= adp5520-gpio.o
 obj-$(CONFIG_GPIO_ADP5588)	+= adp5588-gpio.o
 obj-$(CONFIG_GPIO_LANGWELL)	+= langwell_gpio.o
+obj-$(CONFIG_GPIO_MAX730X)	+= max730x.o
+obj-$(CONFIG_GPIO_MAX7300)	+= max7300.o
 obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
 obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
 obj-$(CONFIG_GPIO_MC33880)	+= mc33880.o
diff --git a/drivers/gpio/max7300.c b/drivers/gpio/max7300.c
new file mode 100644
index 000000000000..9d74eef1157a
--- /dev/null
+++ b/drivers/gpio/max7300.c
@@ -0,0 +1,94 @@
+/*
+ * drivers/gpio/max7300.c
+ *
+ * Copyright (C) 2009 Wolfram Sang, Pengutronix
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Check max730x.c for further details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/i2c.h>
+#include <linux/spi/max7301.h>
+
+static int max7300_i2c_write(struct device *dev, unsigned int reg,
+				unsigned int val)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return i2c_smbus_write_byte_data(client, reg, val);
+}
+
+static int max7300_i2c_read(struct device *dev, unsigned int reg)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	return i2c_smbus_read_byte_data(client, reg);
+}
+
+static int __devinit max7300_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+{
+	struct max7301 *ts;
+	int ret;
+
+	if (!i2c_check_functionality(client->adapter,
+			I2C_FUNC_SMBUS_BYTE_DATA))
+		return -EIO;
+
+	ts = kzalloc(sizeof(struct max7301), GFP_KERNEL);
+	if (!ts)
+		return -ENOMEM;
+
+	ts->read = max7300_i2c_read;
+	ts->write = max7300_i2c_write;
+	ts->dev = &client->dev;
+
+	ret = __max730x_probe(ts);
+	if (ret)
+		kfree(ts);
+	return ret;
+}
+
+static int __devexit max7300_remove(struct i2c_client *client)
+{
+	return __max730x_remove(&client->dev);
+}
+
+static const struct i2c_device_id max7300_id[] = {
+	{ "max7300", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max7300_id);
+
+static struct i2c_driver max7300_driver = {
+	.driver = {
+		.name = "max7300",
+		.owner = THIS_MODULE,
+	},
+	.probe = max7300_probe,
+	.remove = __devexit_p(max7300_remove),
+	.id_table = max7300_id,
+};
+
+static int __init max7300_init(void)
+{
+	return i2c_add_driver(&max7300_driver);
+}
+subsys_initcall(max7300_init);
+
+static void __exit max7300_exit(void)
+{
+	i2c_del_driver(&max7300_driver);
+}
+module_exit(max7300_exit);
+
+MODULE_AUTHOR("Wolfram Sang");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("MAX7300 GPIO-Expander");
diff --git a/drivers/gpio/max7301.c b/drivers/gpio/max7301.c
index 480956f1ca50..965d9b1ea13e 100644
--- a/drivers/gpio/max7301.c
+++ b/drivers/gpio/max7301.c
@@ -1,98 +1,41 @@
-/**
+/*
  * drivers/gpio/max7301.c
  *
  * Copyright (C) 2006 Juergen Beisert, Pengutronix
  * Copyright (C) 2008 Guennadi Liakhovetski, Pengutronix
+ * Copyright (C) 2009 Wolfram Sang, Pengutronix
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
- * The Maxim's MAX7301 device is an SPI driven GPIO expander. There are
- * 28 GPIOs. 8 of them can trigger an interrupt. See datasheet for more
- * details
- * Note:
- * - DIN must be stable at the rising edge of clock.
- * - when writing:
- *   - always clock in 16 clocks at once
- *   - at DIN: D15 first, D0 last
- *   - D0..D7 = databyte, D8..D14 = commandbyte
- *   - D15 = low -> write command
- * - when reading
- *   - always clock in 16 clocks at once
- *   - at DIN: D15 first, D0 last
- *   - D0..D7 = dummy, D8..D14 = register address
- *   - D15 = high -> read command
- *   - raise CS and assert it again
- *   - always clock in 16 clocks at once
- *   - at DOUT: D15 first, D0 last
- *   - D0..D7 contains the data from the first cycle
- *
- * The driver exports a standard gpiochip interface
+ * Check max730x.c for further details.
  */
 
+#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/mutex.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/max7301.h>
-#include <linux/gpio.h>
-
-#define DRIVER_NAME "max7301"
-
-/*
- * Pin configurations, see MAX7301 datasheet page 6
- */
-#define PIN_CONFIG_MASK 0x03
-#define PIN_CONFIG_IN_PULLUP 0x03
-#define PIN_CONFIG_IN_WO_PULLUP 0x02
-#define PIN_CONFIG_OUT 0x01
-
-#define PIN_NUMBER 28
-
-
-/*
- * Some registers must be read back to modify.
- * To save time we cache them here in memory
- */
-struct max7301 {
-	struct mutex	lock;
-	u8		port_config[8];	/* field 0 is unused */
-	u32		out_level;	/* cached output levels */
-	struct gpio_chip chip;
-	struct spi_device *spi;
-};
 
-/**
- * max7301_write - Write a new register content
- * @spi: The SPI device
- * @reg: Register offset
- * @val: Value to write
- *
- * A write to the MAX7301 means one message with one transfer
- *
- * Returns 0 if successful or a negative value on error
- */
-static int max7301_write(struct spi_device *spi, unsigned int reg, unsigned int val)
+/* A write to the MAX7301 means one message with one transfer */
+static int max7301_spi_write(struct device *dev, unsigned int reg,
+				unsigned int val)
 {
+	struct spi_device *spi = to_spi_device(dev);
 	u16 word = ((reg & 0x7F) << 8) | (val & 0xFF);
+
 	return spi_write(spi, (const u8 *)&word, sizeof(word));
 }
 
-/**
- * max7301_read - Read back register content
- * @spi: The SPI device
- * @reg: Register offset
- *
- * A read from the MAX7301 means two transfers; here, one message each
- *
- * Returns positive 8 bit value from device if successful or a
- * negative value on error
- */
-static int max7301_read(struct spi_device *spi, unsigned int reg)
+/* A read from the MAX7301 means two transfers; here, one message each */
+
+static int max7301_spi_read(struct device *dev, unsigned int reg)
 {
 	int ret;
 	u16 word;
+	struct spi_device *spi = to_spi_device(dev);
 
 	word = 0x8000 | (reg << 8);
 	ret = spi_write(spi, (const u8 *)&word, sizeof(word));
@@ -108,125 +51,13 @@ static int max7301_read(struct spi_device *spi, unsigned int reg)
 	return word & 0xff;
 }
 
-static int max7301_direction_input(struct gpio_chip *chip, unsigned offset)
-{
-	struct max7301 *ts = container_of(chip, struct max7301, chip);
-	u8 *config;
-	int ret;
-
-	/* First 4 pins are unused in the controller */
-	offset += 4;
-
-	config = &ts->port_config[offset >> 2];
-
-	mutex_lock(&ts->lock);
-
-	/* Standard GPIO API doesn't support pull-ups, has to be extended.
-	 * Hard-coding no pollup for now. */
-	*config = (*config & ~(3 << (offset & 3))) | (1 << (offset & 3));
-
-	ret = max7301_write(ts->spi, 0x08 + (offset >> 2), *config);
-
-	mutex_unlock(&ts->lock);
-
-	return ret;
-}
-
-static int __max7301_set(struct max7301 *ts, unsigned offset, int value)
-{
-	if (value) {
-		ts->out_level |= 1 << offset;
-		return max7301_write(ts->spi, 0x20 + offset, 0x01);
-	} else {
-		ts->out_level &= ~(1 << offset);
-		return max7301_write(ts->spi, 0x20 + offset, 0x00);
-	}
-}
-
-static int max7301_direction_output(struct gpio_chip *chip, unsigned offset,
-				    int value)
-{
-	struct max7301 *ts = container_of(chip, struct max7301, chip);
-	u8 *config;
-	int ret;
-
-	/* First 4 pins are unused in the controller */
-	offset += 4;
-
-	config = &ts->port_config[offset >> 2];
-
-	mutex_lock(&ts->lock);
-
-	*config = (*config & ~(3 << (offset & 3))) | (1 << (offset & 3));
-
-	ret = __max7301_set(ts, offset, value);
-
-	if (!ret)
-		ret = max7301_write(ts->spi, 0x08 + (offset >> 2), *config);
-
-	mutex_unlock(&ts->lock);
-
-	return ret;
-}
-
-static int max7301_get(struct gpio_chip *chip, unsigned offset)
-{
-	struct max7301 *ts = container_of(chip, struct max7301, chip);
-	int config, level = -EINVAL;
-
-	/* First 4 pins are unused in the controller */
-	offset += 4;
-
-	mutex_lock(&ts->lock);
-
-	config = (ts->port_config[offset >> 2] >> ((offset & 3) * 2)) & 3;
-
-	switch (config) {
-	case 1:
-		/* Output: return cached level */
-		level =  !!(ts->out_level & (1 << offset));
-		break;
-	case 2:
-	case 3:
-		/* Input: read out */
-		level = max7301_read(ts->spi, 0x20 + offset) & 0x01;
-	}
-	mutex_unlock(&ts->lock);
-
-	return level;
-}
-
-static void max7301_set(struct gpio_chip *chip, unsigned offset, int value)
-{
-	struct max7301 *ts = container_of(chip, struct max7301, chip);
-
-	/* First 4 pins are unused in the controller */
-	offset += 4;
-
-	mutex_lock(&ts->lock);
-
-	__max7301_set(ts, offset, value);
-
-	mutex_unlock(&ts->lock);
-}
-
 static int __devinit max7301_probe(struct spi_device *spi)
 {
 	struct max7301 *ts;
-	struct max7301_platform_data *pdata;
-	int i, ret;
-
-	pdata = spi->dev.platform_data;
-	if (!pdata || !pdata->base) {
-		dev_dbg(&spi->dev, "incorrect or missing platform data\n");
-		return -EINVAL;
-	}
+	int ret;
 
-	/*
-	 * bits_per_word cannot be configured in platform data
-	 */
+	/* bits_per_word cannot be configured in platform data */
 	spi->bits_per_word = 16;
-
 	ret = spi_setup(spi);
 	if (ret < 0)
 		return ret;
@@ -235,90 +66,35 @@ static int __devinit max7301_probe(struct spi_device *spi)
 	if (!ts)
 		return -ENOMEM;
 
-	mutex_init(&ts->lock);
-
-	dev_set_drvdata(&spi->dev, ts);
+	ts->read = max7301_spi_read;
+	ts->write = max7301_spi_write;
+	ts->dev = &spi->dev;
 
-	/* Power up the chip and disable IRQ output */
-	max7301_write(spi, 0x04, 0x01);
-
-	ts->spi = spi;
-
-	ts->chip.label = DRIVER_NAME,
-
-	ts->chip.direction_input = max7301_direction_input;
-	ts->chip.get = max7301_get;
-	ts->chip.direction_output = max7301_direction_output;
-	ts->chip.set = max7301_set;
-
-	ts->chip.base = pdata->base;
-	ts->chip.ngpio = PIN_NUMBER;
-	ts->chip.can_sleep = 1;
-	ts->chip.dev = &spi->dev;
-	ts->chip.owner = THIS_MODULE;
-
-	/*
-	 * tristate all pins in hardware and cache the
-	 * register values for later use.
-	 */
-	for (i = 1; i < 8; i++) {
-		int j;
-		/* 0xAA means input with internal pullup disabled */
-		max7301_write(spi, 0x08 + i, 0xAA);
-		ts->port_config[i] = 0xAA;
-		for (j = 0; j < 4; j++) {
-			int offset = (i - 1) * 4 + j;
-			ret = max7301_direction_input(&ts->chip, offset);
-			if (ret)
-				goto exit_destroy;
-		}
-	}
-
-	ret = gpiochip_add(&ts->chip);
+	ret = __max730x_probe(ts);
 	if (ret)
-		goto exit_destroy;
-
-	return ret;
-
-exit_destroy:
-	dev_set_drvdata(&spi->dev, NULL);
-	mutex_destroy(&ts->lock);
-	kfree(ts);
+		kfree(ts);
 	return ret;
 }
 
 static int __devexit max7301_remove(struct spi_device *spi)
 {
-	struct max7301 *ts;
-	int ret;
-
-	ts = dev_get_drvdata(&spi->dev);
-	if (ts == NULL)
-		return -ENODEV;
-
-	dev_set_drvdata(&spi->dev, NULL);
-
-	/* Power down the chip and disable IRQ output */
-	max7301_write(spi, 0x04, 0x00);
-
-	ret = gpiochip_remove(&ts->chip);
-	if (!ret) {
-		mutex_destroy(&ts->lock);
-		kfree(ts);
-	} else
-		dev_err(&spi->dev, "Failed to remove the GPIO controller: %d\n",
-			ret);
-
-	return ret;
+	return __max730x_remove(&spi->dev);
 }
 
+static const struct spi_device_id max7301_id[] = {
+	{ "max7301", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, max7301_id);
+
 static struct spi_driver max7301_driver = {
 	.driver = {
-		.name		= DRIVER_NAME,
-		.owner		= THIS_MODULE,
+		.name = "max7301",
+		.owner = THIS_MODULE,
 	},
-	.probe		= max7301_probe,
-	.remove		= __devexit_p(max7301_remove),
+	.probe = max7301_probe,
+	.remove = __devexit_p(max7301_remove),
+	.id_table = max7301_id,
 };
 
 static int __init max7301_init(void)
@@ -336,7 +112,6 @@ static void __exit max7301_exit(void)
 }
 module_exit(max7301_exit);
 
-MODULE_AUTHOR("Juergen Beisert");
+MODULE_AUTHOR("Juergen Beisert, Wolfram Sang");
 MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("MAX7301 SPI based GPIO-Expander");
-MODULE_ALIAS("spi:" DRIVER_NAME);
+MODULE_DESCRIPTION("MAX7301 GPIO-Expander");
diff --git a/drivers/gpio/max730x.c b/drivers/gpio/max730x.c
new file mode 100644
index 000000000000..c9bced55f82b
--- /dev/null
+++ b/drivers/gpio/max730x.c
@@ -0,0 +1,244 @@
+/**
+ * drivers/gpio/max7301.c
+ *
+ * Copyright (C) 2006 Juergen Beisert, Pengutronix
+ * Copyright (C) 2008 Guennadi Liakhovetski, Pengutronix
+ * Copyright (C) 2009 Wolfram Sang, Pengutronix
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * The Maxim MAX7300/1 device is an I2C/SPI driven GPIO expander. There are
+ * 28 GPIOs. 8 of them can trigger an interrupt. See datasheet for more
+ * details
+ * Note:
+ * - DIN must be stable at the rising edge of clock.
+ * - when writing:
+ *   - always clock in 16 clocks at once
+ *   - at DIN: D15 first, D0 last
+ *   - D0..D7 = databyte, D8..D14 = commandbyte
+ *   - D15 = low -> write command
+ * - when reading
+ *   - always clock in 16 clocks at once
+ *   - at DIN: D15 first, D0 last
+ *   - D0..D7 = dummy, D8..D14 = register address
+ *   - D15 = high -> read command
+ *   - raise CS and assert it again
+ *   - always clock in 16 clocks at once
+ *   - at DOUT: D15 first, D0 last
+ *   - D0..D7 contains the data from the first cycle
+ *
+ * The driver exports a standard gpiochip interface
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/spi/max7301.h>
+#include <linux/gpio.h>
+
+/*
+ * Pin configurations, see MAX7301 datasheet page 6
+ */
+#define PIN_CONFIG_MASK 0x03
+#define PIN_CONFIG_IN_PULLUP 0x03
+#define PIN_CONFIG_IN_WO_PULLUP 0x02
+#define PIN_CONFIG_OUT 0x01
+
+#define PIN_NUMBER 28
+
+static int max7301_direction_input(struct gpio_chip *chip, unsigned offset)
+{
+	struct max7301 *ts = container_of(chip, struct max7301, chip);
+	u8 *config;
+	u8 offset_bits;
+	int ret;
+
+	/* First 4 pins are unused in the controller */
+	offset += 4;
+	offset_bits = (offset & 3) << 1;
+
+	config = &ts->port_config[offset >> 2];
+
+	mutex_lock(&ts->lock);
+
+	/* Standard GPIO API doesn't support pull-ups, has to be extended.
+	 * Hard-coding no pollup for now. */
+	*config = (*config & ~(PIN_CONFIG_MASK << offset_bits))
+			   | (PIN_CONFIG_IN_WO_PULLUP << offset_bits);
+
+	ret = ts->write(ts->dev, 0x08 + (offset >> 2), *config);
+
+	mutex_unlock(&ts->lock);
+
+	return ret;
+}
+
+static int __max7301_set(struct max7301 *ts, unsigned offset, int value)
+{
+	if (value) {
+		ts->out_level |= 1 << offset;
+		return ts->write(ts->dev, 0x20 + offset, 0x01);
+	} else {
+		ts->out_level &= ~(1 << offset);
+		return ts->write(ts->dev, 0x20 + offset, 0x00);
+	}
+}
+
+static int max7301_direction_output(struct gpio_chip *chip, unsigned offset,
+				    int value)
+{
+	struct max7301 *ts = container_of(chip, struct max7301, chip);
+	u8 *config;
+	u8 offset_bits;
+	int ret;
+
+	/* First 4 pins are unused in the controller */
+	offset += 4;
+	offset_bits = (offset & 3) << 1;
+
+	config = &ts->port_config[offset >> 2];
+
+	mutex_lock(&ts->lock);
+
+	*config = (*config & ~(PIN_CONFIG_MASK << offset_bits))
+			   | (PIN_CONFIG_OUT << offset_bits);
+
+	ret = __max7301_set(ts, offset, value);
+
+	if (!ret)
+		ret = ts->write(ts->dev, 0x08 + (offset >> 2), *config);
+
+	mutex_unlock(&ts->lock);
+
+	return ret;
+}
+
+static int max7301_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct max7301 *ts = container_of(chip, struct max7301, chip);
+	int config, level = -EINVAL;
+
+	/* First 4 pins are unused in the controller */
+	offset += 4;
+
+	mutex_lock(&ts->lock);
+
+	config = (ts->port_config[offset >> 2] >> ((offset & 3) << 1))
+			& PIN_CONFIG_MASK;
+
+	switch (config) {
+	case PIN_CONFIG_OUT:
+		/* Output: return cached level */
+		level =  !!(ts->out_level & (1 << offset));
+		break;
+	case PIN_CONFIG_IN_WO_PULLUP:
+	case PIN_CONFIG_IN_PULLUP:
+		/* Input: read out */
+		level = ts->read(ts->dev, 0x20 + offset) & 0x01;
+	}
+	mutex_unlock(&ts->lock);
+
+	return level;
+}
+
+static void max7301_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	struct max7301 *ts = container_of(chip, struct max7301, chip);
+
+	/* First 4 pins are unused in the controller */
+	offset += 4;
+
+	mutex_lock(&ts->lock);
+
+	__max7301_set(ts, offset, value);
+
+	mutex_unlock(&ts->lock);
+}
+
+int __devinit __max730x_probe(struct max7301 *ts)
+{
+	struct device *dev = ts->dev;
+	struct max7301_platform_data *pdata;
+	int i, ret;
+
+	pdata = dev->platform_data;
+	if (!pdata || !pdata->base) {
+		dev_err(dev, "incorrect or missing platform data\n");
+		return -EINVAL;
+	}
+
+	mutex_init(&ts->lock);
+	dev_set_drvdata(dev, ts);
+
+	/* Power up the chip and disable IRQ output */
+	ts->write(dev, 0x04, 0x01);
+
+	ts->chip.label = dev->driver->name;
+
+	ts->chip.direction_input = max7301_direction_input;
+	ts->chip.get = max7301_get;
+	ts->chip.direction_output = max7301_direction_output;
+	ts->chip.set = max7301_set;
+
+	ts->chip.base = pdata->base;
+	ts->chip.ngpio = PIN_NUMBER;
+	ts->chip.can_sleep = 1;
+	ts->chip.dev = dev;
+	ts->chip.owner = THIS_MODULE;
+
+	/*
+	 * tristate all pins in hardware and cache the
+	 * register values for later use.
+	 */
+	for (i = 1; i < 8; i++) {
+		int j;
+		/* 0xAA means input with internal pullup disabled */
+		ts->write(dev, 0x08 + i, 0xAA);
+		ts->port_config[i] = 0xAA;
+		for (j = 0; j < 4; j++) {
+			int offset = (i - 1) * 4 + j;
+			ret = max7301_direction_input(&ts->chip, offset);
+			if (ret)
+				goto exit_destroy;
+		}
+	}
+
+	ret = gpiochip_add(&ts->chip);
+	if (ret)
+		goto exit_destroy;
+
+	return ret;
+
+exit_destroy:
+	dev_set_drvdata(dev, NULL);
+	mutex_destroy(&ts->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__max730x_probe);
+
+int __devexit __max730x_remove(struct device *dev)
+{
+	struct max7301 *ts = dev_get_drvdata(dev);
+	int ret;
+
+	if (ts == NULL)
+		return -ENODEV;
+
+	dev_set_drvdata(dev, NULL);
+
+	/* Power down the chip and disable IRQ output */
+	ts->write(dev, 0x04, 0x00);
+
+	ret = gpiochip_remove(&ts->chip);
+	if (!ret) {
+		mutex_destroy(&ts->lock);
+		kfree(ts);
+	} else
+		dev_err(dev, "Failed to remove GPIO controller: %d\n", ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__max730x_remove);
diff --git a/include/linux/spi/max7301.h b/include/linux/spi/max7301.h
index 6dfd83f19b4b..34af0a3477bf 100644
--- a/include/linux/spi/max7301.h
+++ b/include/linux/spi/max7301.h
@@ -1,9 +1,27 @@
 #ifndef LINUX_SPI_MAX7301_H
 #define LINUX_SPI_MAX7301_H
 
+#include <linux/gpio.h>
+
+/*
+ * Some registers must be read back to modify.
+ * To save time we cache them here in memory
+ */
+struct max7301 {
+	struct mutex	lock;
+	u8		port_config[8];	/* field 0 is unused */
+	u32		out_level;	/* cached output levels */
+	struct gpio_chip chip;
+	struct device *dev;
+	int (*write)(struct device *dev, unsigned int reg, unsigned int val);
+	int (*read)(struct device *dev, unsigned int reg);
+};
+
 struct max7301_platform_data {
 	/* number assigned to the first GPIO */
 	unsigned	base;
 };
 
+extern int __max730x_remove(struct device *dev);
+extern int __max730x_probe(struct max7301 *ts);
 #endif
-- 
cgit v1.2.3


From 62fecb70cfaa9b4c6aa1981acd53b18f4ad925f0 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Fri, 5 Mar 2010 13:44:34 -0800
Subject: pca953x: minor include cleanup

linux/i2c/pca953x.h is a very bare include file.  Fix check for multiple
includes of linux/i2c/pca953x.h, and add dependent includes into the
header file.

Signed-off-by: Olof Johansson <olof@lixom.net>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Jean Delvare <khali@linux-fr.org>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/i2c/pca953x.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/i2c/pca953x.h b/include/linux/i2c/pca953x.h
index 81736d6a8db7..29699f8dc5a4 100644
--- a/include/linux/i2c/pca953x.h
+++ b/include/linux/i2c/pca953x.h
@@ -1,3 +1,9 @@
+#ifndef _LINUX_PCA953X_H
+#define _LINUX_PCA953X_H
+
+#include <linux/types.h>
+#include <linux/i2c.h>
+
 /* platform data for the PCA9539 16-bit I/O expander driver */
 
 struct pca953x_platform_data {
@@ -17,3 +23,5 @@ struct pca953x_platform_data {
 				void *context);
 	char		**names;
 };
+
+#endif /* _LINUX_PCA953X_H */
-- 
cgit v1.2.3


From 3e45f1d1155894e6f4291f5536b224874d52d8e2 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.y.miao@gmail.com>
Date: Fri, 5 Mar 2010 13:44:35 -0800
Subject: gpio: introduce gpio_request_one() and friends

gpio_request() without initial configuration of the GPIO is normally
useless, introduce gpio_request_one() together with GPIOF_ flags for
input/output direction and initial output level.

gpio_{request,free}_array() for multiple GPIOs.

Signed-off-by: Eric Miao <eric.y.miao@gmail.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: Ben Nizette <bn@niasdigital.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/gpio.txt     | 64 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/gpio/gpiolib.c     | 58 +++++++++++++++++++++++++++++++++++++++++
 include/asm-generic/gpio.h | 26 +++++++++++++++++++
 3 files changed, 148 insertions(+)

(limited to 'include')

diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index 1866c27eec69..c2c6e9b39bbe 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -253,6 +253,70 @@ pin setup (e.g. controlling which pin the GPIO uses, pullup/pulldown).
 Also note that it's your responsibility to have stopped using a GPIO
 before you free it.
 
+Considering in most cases GPIOs are actually configured right after they
+are claimed, three additional calls are defined:
+
+	/* request a single GPIO, with initial configuration specified by
+	 * 'flags', identical to gpio_request() wrt other arguments and
+	 * return value
+	 */
+	int gpio_request_one(unsigned gpio, unsigned long flags, const char *label);
+
+	/* request multiple GPIOs in a single call
+	 */
+	int gpio_request_array(struct gpio *array, size_t num);
+
+	/* release multiple GPIOs in a single call
+	 */
+	void gpio_free_array(struct gpio *array, size_t num);
+
+where 'flags' is currently defined to specify the following properties:
+
+	* GPIOF_DIR_IN		- to configure direction as input
+	* GPIOF_DIR_OUT		- to configure direction as output
+
+	* GPIOF_INIT_LOW	- as output, set initial level to LOW
+	* GPIOF_INIT_HIGH	- as output, set initial level to HIGH
+
+since GPIOF_INIT_* are only valid when configured as output, so group valid
+combinations as:
+
+	* GPIOF_IN		- configure as input
+	* GPIOF_OUT_INIT_LOW	- configured as output, initial level LOW
+	* GPIOF_OUT_INIT_HIGH	- configured as output, initial level HIGH
+
+In the future, these flags can be extended to support more properties such
+as open-drain status.
+
+Further more, to ease the claim/release of multiple GPIOs, 'struct gpio' is
+introduced to encapsulate all three fields as:
+
+	struct gpio {
+		unsigned	gpio;
+		unsigned long	flags;
+		const char	*label;
+	};
+
+A typical example of usage:
+
+	static struct gpio leds_gpios[] = {
+		{ 32, GPIOF_OUT_INIT_HIGH, "Power LED" }, /* default to ON */
+		{ 33, GPIOF_OUT_INIT_LOW,  "Green LED" }, /* default to OFF */
+		{ 34, GPIOF_OUT_INIT_LOW,  "Red LED"   }, /* default to OFF */
+		{ 35, GPIOF_OUT_INIT_LOW,  "Blue LED"  }, /* default to OFF */
+		{ ... },
+	};
+
+	err = gpio_request_one(31, GPIOF_IN, "Reset Button");
+	if (err)
+		...
+
+	err = gpio_request_array(leds_gpios, ARRAY_SIZE(leds_gpios));
+	if (err)
+		...
+
+	gpio_free_array(leds_gpios, ARRAY_SIZE(leds_gpios));
+
 
 GPIOs mapped to IRQs
 --------------------
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 350842ad3632..9006fdb26fea 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1237,6 +1237,64 @@ void gpio_free(unsigned gpio)
 }
 EXPORT_SYMBOL_GPL(gpio_free);
 
+/**
+ * gpio_request_one - request a single GPIO with initial configuration
+ * @gpio:	the GPIO number
+ * @flags:	GPIO configuration as specified by GPIOF_*
+ * @label:	a literal description string of this GPIO
+ */
+int gpio_request_one(unsigned gpio, unsigned long flags, const char *label)
+{
+	int err;
+
+	err = gpio_request(gpio, label);
+	if (err)
+		return err;
+
+	if (flags & GPIOF_DIR_IN)
+		err = gpio_direction_input(gpio);
+	else
+		err = gpio_direction_output(gpio,
+				(flags & GPIOF_INIT_HIGH) ? 1 : 0);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(gpio_request_one);
+
+/**
+ * gpio_request_array - request multiple GPIOs in a single call
+ * @array:	array of the 'struct gpio'
+ * @num:	how many GPIOs in the array
+ */
+int gpio_request_array(struct gpio *array, size_t num)
+{
+	int i, err;
+
+	for (i = 0; i < num; i++, array++) {
+		err = gpio_request_one(array->gpio, array->flags, array->label);
+		if (err)
+			goto err_free;
+	}
+	return 0;
+
+err_free:
+	while (i--)
+		gpio_free((--array)->gpio);
+	return err;
+}
+EXPORT_SYMBOL_GPL(gpio_request_array);
+
+/**
+ * gpio_free_array - release multiple GPIOs in a single call
+ * @array:	array of the 'struct gpio'
+ * @num:	how many GPIOs in the array
+ */
+void gpio_free_array(struct gpio *array, size_t num)
+{
+	while (num--)
+		gpio_free((array++)->gpio);
+}
+EXPORT_SYMBOL_GPL(gpio_free_array);
 
 /**
  * gpiochip_is_requested - return string iff signal was requested
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 485eeb6c4ef3..979c6a57f2f1 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -136,6 +136,32 @@ extern int __gpio_cansleep(unsigned gpio);
 
 extern int __gpio_to_irq(unsigned gpio);
 
+#define GPIOF_DIR_OUT	(0 << 0)
+#define GPIOF_DIR_IN	(1 << 0)
+
+#define GPIOF_INIT_LOW	(0 << 1)
+#define GPIOF_INIT_HIGH	(1 << 1)
+
+#define GPIOF_IN		(GPIOF_DIR_IN)
+#define GPIOF_OUT_INIT_LOW	(GPIOF_DIR_OUT | GPIOF_INIT_LOW)
+#define GPIOF_OUT_INIT_HIGH	(GPIOF_DIR_OUT | GPIOF_INIT_HIGH)
+
+/**
+ * struct gpio - a structure describing a GPIO with configuration
+ * @gpio:	the GPIO number
+ * @flags:	GPIO configuration as specified by GPIOF_*
+ * @label:	a literal description string of this GPIO
+ */
+struct gpio {
+	unsigned	gpio;
+	unsigned long	flags;
+	const char	*label;
+};
+
+extern int gpio_request_one(unsigned gpio, unsigned long flags, const char *label);
+extern int gpio_request_array(struct gpio *array, size_t num);
+extern void gpio_free_array(struct gpio *array, size_t num);
+
 #ifdef CONFIG_GPIO_SYSFS
 
 /*
-- 
cgit v1.2.3


From 89ea8bbe9c3eb2ea0cb57a4ecf283cab7326f0b0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@misterjones.org>
Date: Fri, 5 Mar 2010 13:44:36 -0800
Subject: gpio: pca953x.c: add interrupt handling capability

Most of the GPIO expanders controlled by the pca953x driver are able to
report changes on the input pins through an *INT pin.

This patch implements the irq_chip functionality (edge detection only).

The driver has been tested on an Arcom Zeus.

[akpm@linux-foundation.org: the compiler does inlining for us nowadays]
Signed-off-by: Marc Zyngier <maz@misterjones.org>
Cc: Eric Miao <eric.y.miao@gmail.com>
Cc: Haojian Zhuang <haojian.zhuang@gmail.com>
Cc: David Brownell <david-b@pacbell.net>
Cc: Nate Case <ncase@xes-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |   7 ++
 drivers/gpio/pca953x.c      | 249 +++++++++++++++++++++++++++++++++++++++++---
 include/linux/i2c/pca953x.h |   3 +
 3 files changed, 247 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index f3549b8779d8..c5cc7d9d88e3 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -134,6 +134,13 @@ config GPIO_PCA953X
 	  This driver can also be built as a module.  If so, the module
 	  will be called pca953x.
 
+config GPIO_PCA953X_IRQ
+	bool "Interrupt controller support for PCA953x"
+	depends on GPIO_PCA953X=y
+	help
+	  Say yes here to enable the pca953x to be used as an interrupt
+	  controller. It requires the driver to be built in the kernel.
+
 config GPIO_PCF857X
 	tristate "PCF857x, PCA{85,96}7x, and MAX732[89] I2C GPIO expanders"
 	depends on I2C
diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c
index 6a2fb3fbb3d9..ab5daab14bc2 100644
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c
@@ -14,6 +14,8 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/i2c.h>
 #include <linux/i2c/pca953x.h>
 #ifdef CONFIG_OF_GPIO
@@ -26,23 +28,28 @@
 #define PCA953X_INVERT         2
 #define PCA953X_DIRECTION      3
 
+#define PCA953X_GPIOS	       0x00FF
+#define PCA953X_INT	       0x0100
+
 static const struct i2c_device_id pca953x_id[] = {
-	{ "pca9534", 8, },
-	{ "pca9535", 16, },
+	{ "pca9534", 8  | PCA953X_INT, },
+	{ "pca9535", 16 | PCA953X_INT, },
 	{ "pca9536", 4, },
-	{ "pca9537", 4, },
-	{ "pca9538", 8, },
-	{ "pca9539", 16, },
-	{ "pca9554", 8, },
-	{ "pca9555", 16, },
+	{ "pca9537", 4  | PCA953X_INT, },
+	{ "pca9538", 8  | PCA953X_INT, },
+	{ "pca9539", 16 | PCA953X_INT, },
+	{ "pca9554", 8  | PCA953X_INT, },
+	{ "pca9555", 16 | PCA953X_INT, },
 	{ "pca9556", 8, },
 	{ "pca9557", 8, },
 
 	{ "max7310", 8, },
-	{ "max7315", 8, },
-	{ "pca6107", 8, },
-	{ "tca6408", 8, },
-	{ "tca6416", 16, },
+	{ "max7312", 16 | PCA953X_INT, },
+	{ "max7313", 16 | PCA953X_INT, },
+	{ "max7315", 8  | PCA953X_INT, },
+	{ "pca6107", 8  | PCA953X_INT, },
+	{ "tca6408", 8  | PCA953X_INT, },
+	{ "tca6416", 16 | PCA953X_INT, },
 	/* NYET:  { "tca6424", 24, }, */
 	{ }
 };
@@ -53,6 +60,15 @@ struct pca953x_chip {
 	uint16_t reg_output;
 	uint16_t reg_direction;
 
+#ifdef CONFIG_GPIO_PCA953X_IRQ
+	struct mutex irq_lock;
+	uint16_t irq_mask;
+	uint16_t irq_stat;
+	uint16_t irq_trig_raise;
+	uint16_t irq_trig_fall;
+	int	 irq_base;
+#endif
+
 	struct i2c_client *client;
 	struct pca953x_platform_data *dyn_pdata;
 	struct gpio_chip gpio_chip;
@@ -202,6 +218,210 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios)
 	gc->names = chip->names;
 }
 
+#ifdef CONFIG_GPIO_PCA953X_IRQ
+static int pca953x_gpio_to_irq(struct gpio_chip *gc, unsigned off)
+{
+	struct pca953x_chip *chip;
+
+	chip = container_of(gc, struct pca953x_chip, gpio_chip);
+	return chip->irq_base + off;
+}
+
+static void pca953x_irq_mask(unsigned int irq)
+{
+	struct pca953x_chip *chip = get_irq_chip_data(irq);
+
+	chip->irq_mask &= ~(1 << (irq - chip->irq_base));
+}
+
+static void pca953x_irq_unmask(unsigned int irq)
+{
+	struct pca953x_chip *chip = get_irq_chip_data(irq);
+
+	chip->irq_mask |= 1 << (irq - chip->irq_base);
+}
+
+static void pca953x_irq_bus_lock(unsigned int irq)
+{
+	struct pca953x_chip *chip = get_irq_chip_data(irq);
+
+	mutex_lock(&chip->irq_lock);
+}
+
+static void pca953x_irq_bus_sync_unlock(unsigned int irq)
+{
+	struct pca953x_chip *chip = get_irq_chip_data(irq);
+
+	mutex_unlock(&chip->irq_lock);
+}
+
+static int pca953x_irq_set_type(unsigned int irq, unsigned int type)
+{
+	struct pca953x_chip *chip = get_irq_chip_data(irq);
+	uint16_t level = irq - chip->irq_base;
+	uint16_t mask = 1 << level;
+
+	if (!(type & IRQ_TYPE_EDGE_BOTH)) {
+		dev_err(&chip->client->dev, "irq %d: unsupported type %d\n",
+			irq, type);
+		return -EINVAL;
+	}
+
+	if (type & IRQ_TYPE_EDGE_FALLING)
+		chip->irq_trig_fall |= mask;
+	else
+		chip->irq_trig_fall &= ~mask;
+
+	if (type & IRQ_TYPE_EDGE_RISING)
+		chip->irq_trig_raise |= mask;
+	else
+		chip->irq_trig_raise &= ~mask;
+
+	return pca953x_gpio_direction_input(&chip->gpio_chip, level);
+}
+
+static struct irq_chip pca953x_irq_chip = {
+	.name			= "pca953x",
+	.mask			= pca953x_irq_mask,
+	.unmask			= pca953x_irq_unmask,
+	.bus_lock		= pca953x_irq_bus_lock,
+	.bus_sync_unlock	= pca953x_irq_bus_sync_unlock,
+	.set_type		= pca953x_irq_set_type,
+};
+
+static uint16_t pca953x_irq_pending(struct pca953x_chip *chip)
+{
+	uint16_t cur_stat;
+	uint16_t old_stat;
+	uint16_t pending;
+	uint16_t trigger;
+	int ret;
+
+	ret = pca953x_read_reg(chip, PCA953X_INPUT, &cur_stat);
+	if (ret)
+		return 0;
+
+	/* Remove output pins from the equation */
+	cur_stat &= chip->reg_direction;
+
+	old_stat = chip->irq_stat;
+	trigger = (cur_stat ^ old_stat) & chip->irq_mask;
+
+	if (!trigger)
+		return 0;
+
+	chip->irq_stat = cur_stat;
+
+	pending = (old_stat & chip->irq_trig_fall) |
+		  (cur_stat & chip->irq_trig_raise);
+	pending &= trigger;
+
+	return pending;
+}
+
+static irqreturn_t pca953x_irq_handler(int irq, void *devid)
+{
+	struct pca953x_chip *chip = devid;
+	uint16_t pending;
+	uint16_t level;
+
+	pending = pca953x_irq_pending(chip);
+
+	if (!pending)
+		return IRQ_HANDLED;
+
+	do {
+		level = __ffs(pending);
+		handle_nested_irq(level + chip->irq_base);
+
+		pending &= ~(1 << level);
+	} while (pending);
+
+	return IRQ_HANDLED;
+}
+
+static int pca953x_irq_setup(struct pca953x_chip *chip,
+			     const struct i2c_device_id *id)
+{
+	struct i2c_client *client = chip->client;
+	struct pca953x_platform_data *pdata = client->dev.platform_data;
+	int ret;
+
+	if (pdata->irq_base && (id->driver_data & PCA953X_INT)) {
+		int lvl;
+
+		ret = pca953x_read_reg(chip, PCA953X_INPUT,
+				       &chip->irq_stat);
+		if (ret)
+			goto out_failed;
+
+		/*
+		 * There is no way to know which GPIO line generated the
+		 * interrupt.  We have to rely on the previous read for
+		 * this purpose.
+		 */
+		chip->irq_stat &= chip->reg_direction;
+		chip->irq_base = pdata->irq_base;
+		mutex_init(&chip->irq_lock);
+
+		for (lvl = 0; lvl < chip->gpio_chip.ngpio; lvl++) {
+			int irq = lvl + chip->irq_base;
+
+			set_irq_chip_data(irq, chip);
+			set_irq_chip_and_handler(irq, &pca953x_irq_chip,
+						 handle_edge_irq);
+			set_irq_nested_thread(irq, 1);
+#ifdef CONFIG_ARM
+			set_irq_flags(irq, IRQF_VALID);
+#else
+			set_irq_noprobe(irq);
+#endif
+		}
+
+		ret = request_threaded_irq(client->irq,
+					   NULL,
+					   pca953x_irq_handler,
+					   IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+					   dev_name(&client->dev), chip);
+		if (ret) {
+			dev_err(&client->dev, "failed to request irq %d\n",
+				client->irq);
+			goto out_failed;
+		}
+
+		chip->gpio_chip.to_irq = pca953x_gpio_to_irq;
+	}
+
+	return 0;
+
+out_failed:
+	chip->irq_base = 0;
+	return ret;
+}
+
+static void pca953x_irq_teardown(struct pca953x_chip *chip)
+{
+	if (chip->irq_base)
+		free_irq(chip->client->irq, chip);
+}
+#else /* CONFIG_GPIO_PCA953X_IRQ */
+static int pca953x_irq_setup(struct pca953x_chip *chip,
+			     const struct i2c_device_id *id)
+{
+	struct i2c_client *client = chip->client;
+	struct pca953x_platform_data *pdata = client->dev.platform_data;
+
+	if (pdata->irq_base && (id->driver_data & PCA953X_INT))
+		dev_warn(&client->dev, "interrupt support not compiled in\n");
+
+	return 0;
+}
+
+static void pca953x_irq_teardown(struct pca953x_chip *chip)
+{
+}
+#endif
+
 /*
  * Handlers for alternative sources of platform_data
  */
@@ -286,7 +506,7 @@ static int __devinit pca953x_probe(struct i2c_client *client,
 	/* initialize cached registers from their original values.
 	 * we can't share this chip with another i2c master.
 	 */
-	pca953x_setup_gpio(chip, id->driver_data);
+	pca953x_setup_gpio(chip, id->driver_data & PCA953X_GPIOS);
 
 	ret = pca953x_read_reg(chip, PCA953X_OUTPUT, &chip->reg_output);
 	if (ret)
@@ -301,6 +521,9 @@ static int __devinit pca953x_probe(struct i2c_client *client,
 	if (ret)
 		goto out_failed;
 
+	ret = pca953x_irq_setup(chip, id);
+	if (ret)
+		goto out_failed;
 
 	ret = gpiochip_add(&chip->gpio_chip);
 	if (ret)
@@ -317,6 +540,7 @@ static int __devinit pca953x_probe(struct i2c_client *client,
 	return 0;
 
 out_failed:
+	pca953x_irq_teardown(chip);
 	kfree(chip->dyn_pdata);
 	kfree(chip);
 	return ret;
@@ -345,6 +569,7 @@ static int pca953x_remove(struct i2c_client *client)
 		return ret;
 	}
 
+	pca953x_irq_teardown(chip);
 	kfree(chip->dyn_pdata);
 	kfree(chip);
 	return 0;
diff --git a/include/linux/i2c/pca953x.h b/include/linux/i2c/pca953x.h
index 29699f8dc5a4..d5c5a60c8a0b 100644
--- a/include/linux/i2c/pca953x.h
+++ b/include/linux/i2c/pca953x.h
@@ -13,6 +13,9 @@ struct pca953x_platform_data {
 	/* initial polarity inversion setting */
 	uint16_t	invert;
 
+	/* interrupt base */
+	int		irq_base;
+
 	void		*context;	/* param to setup/teardown */
 
 	int		(*setup)(struct i2c_client *client,
-- 
cgit v1.2.3


From d690b2cd222afc75320b9b8e9da7df02e9e630ca Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 6 Mar 2010 21:28:37 +0100
Subject: PM: Provide generic subsystem-level callbacks

There are subsystems whose power management callbacks only need to
invoke the callbacks provided by device drivers.  Still, their system
sleep PM callbacks should play well with the runtime PM callbacks,
so that devices suspended at run time can be left in that state for
a system sleep transition.

Provide a set of generic PM callbacks for such subsystems and
define convenience macros for populating dev_pm_ops structures.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 Documentation/power/runtime_pm.txt |  65 +++++++++++
 drivers/base/power/Makefile        |   1 +
 drivers/base/power/generic_ops.c   | 233 +++++++++++++++++++++++++++++++++++++
 include/linux/pm.h                 |  51 +++++++-
 include/linux/pm_runtime.h         |   6 +
 5 files changed, 350 insertions(+), 6 deletions(-)
 create mode 100644 drivers/base/power/generic_ops.c

(limited to 'include')

diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index f19370641684..ab00eeddecaf 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -335,6 +335,10 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
       'power.runtime_error' is set or 'power.disable_depth' is greater than
       zero)
 
+  bool pm_runtime_suspended(struct device *dev);
+    - return true if the device's runtime PM status is 'suspended', or false
+      otherwise
+
   void pm_runtime_allow(struct device *dev);
     - set the power.runtime_auto flag for the device and decrease its usage
       counter (used by the /sys/devices/.../power/control interface to
@@ -459,3 +463,64 @@ The PM core always increments the run-time usage counter before calling the
 ->prepare() callback and decrements it after calling the ->complete() callback.
 Hence disabling run-time PM temporarily like this will not cause any run-time
 suspend callbacks to be lost.
+
+7. Generic subsystem callbacks
+
+Subsystems may wish to conserve code space by using the set of generic power
+management callbacks provided by the PM core, defined in
+driver/base/power/generic_ops.c:
+
+  int pm_generic_runtime_idle(struct device *dev);
+    - invoke the ->runtime_idle() callback provided by the driver of this
+      device, if defined, and call pm_runtime_suspend() for this device if the
+      return value is 0 or the callback is not defined
+
+  int pm_generic_runtime_suspend(struct device *dev);
+    - invoke the ->runtime_suspend() callback provided by the driver of this
+      device and return its result, or return -EINVAL if not defined
+
+  int pm_generic_runtime_resume(struct device *dev);
+    - invoke the ->runtime_resume() callback provided by the driver of this
+      device and return its result, or return -EINVAL if not defined
+
+  int pm_generic_suspend(struct device *dev);
+    - if the device has not been suspended at run time, invoke the ->suspend()
+      callback provided by its driver and return its result, or return 0 if not
+      defined
+
+  int pm_generic_resume(struct device *dev);
+    - invoke the ->resume() callback provided by the driver of this device and,
+      if successful, change the device's runtime PM status to 'active'
+
+  int pm_generic_freeze(struct device *dev);
+    - if the device has not been suspended at run time, invoke the ->freeze()
+      callback provided by its driver and return its result, or return 0 if not
+      defined
+
+  int pm_generic_thaw(struct device *dev);
+    - if the device has not been suspended at run time, invoke the ->thaw()
+      callback provided by its driver and return its result, or return 0 if not
+      defined
+
+  int pm_generic_poweroff(struct device *dev);
+    - if the device has not been suspended at run time, invoke the ->poweroff()
+      callback provided by its driver and return its result, or return 0 if not
+      defined
+
+  int pm_generic_restore(struct device *dev);
+    - invoke the ->restore() callback provided by the driver of this device and,
+      if successful, change the device's runtime PM status to 'active'
+
+These functions can be assigned to the ->runtime_idle(), ->runtime_suspend(),
+->runtime_resume(), ->suspend(), ->resume(), ->freeze(), ->thaw(), ->poweroff(),
+or ->restore() callback pointers in the subsystem-level dev_pm_ops structures.
+
+If a subsystem wishes to use all of them at the same time, it can simply assign
+the GENERIC_SUBSYS_PM_OPS macro, defined in include/linux/pm.h, to its
+dev_pm_ops structure pointer.
+
+Device drivers that wish to use the same function as a system suspend, freeze,
+poweroff and run-time suspend callback, and similarly for system resume, thaw,
+restore, and run-time resume, can achieve this with the help of the
+UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its
+last argument to NULL).
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index 3ce3519e8f30..89de75325cea 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_PM)	+= sysfs.o
 obj-$(CONFIG_PM_SLEEP)	+= main.o
 obj-$(CONFIG_PM_RUNTIME)	+= runtime.o
+obj-$(CONFIG_PM_OPS)	+= generic_ops.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
new file mode 100644
index 000000000000..4b29d4981253
--- /dev/null
+++ b/drivers/base/power/generic_ops.c
@@ -0,0 +1,233 @@
+/*
+ * drivers/base/power/generic_ops.c - Generic PM callbacks for subsystems
+ *
+ * Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+
+#ifdef CONFIG_PM_RUNTIME
+/**
+ * pm_generic_runtime_idle - Generic runtime idle callback for subsystems.
+ * @dev: Device to handle.
+ *
+ * If PM operations are defined for the @dev's driver and they include
+ * ->runtime_idle(), execute it and return its error code, if nonzero.
+ * Otherwise, execute pm_runtime_suspend() for the device and return 0.
+ */
+int pm_generic_runtime_idle(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	if (pm && pm->runtime_idle) {
+		int ret = pm->runtime_idle(dev);
+		if (ret)
+			return ret;
+	}
+
+	pm_runtime_suspend(dev);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pm_generic_runtime_idle);
+
+/**
+ * pm_generic_runtime_suspend - Generic runtime suspend callback for subsystems.
+ * @dev: Device to suspend.
+ *
+ * If PM operations are defined for the @dev's driver and they include
+ * ->runtime_suspend(), execute it and return its error code.  Otherwise,
+ * return -EINVAL.
+ */
+int pm_generic_runtime_suspend(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	int ret;
+
+	ret = pm && pm->runtime_suspend ? pm->runtime_suspend(dev) : -EINVAL;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pm_generic_runtime_suspend);
+
+/**
+ * pm_generic_runtime_resume - Generic runtime resume callback for subsystems.
+ * @dev: Device to resume.
+ *
+ * If PM operations are defined for the @dev's driver and they include
+ * ->runtime_resume(), execute it and return its error code.  Otherwise,
+ * return -EINVAL.
+ */
+int pm_generic_runtime_resume(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	int ret;
+
+	ret = pm && pm->runtime_resume ? pm->runtime_resume(dev) : -EINVAL;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pm_generic_runtime_resume);
+#endif /* CONFIG_PM_RUNTIME */
+
+#ifdef CONFIG_PM_SLEEP
+/**
+ * __pm_generic_call - Generic suspend/freeze/poweroff/thaw subsystem callback.
+ * @dev: Device to handle.
+ * @event: PM transition of the system under way.
+ *
+ * If the device has not been suspended at run time, execute the
+ * suspend/freeze/poweroff/thaw callback provided by its driver, if defined, and
+ * return its error code.  Otherwise, return zero.
+ */
+static int __pm_generic_call(struct device *dev, int event)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	int (*callback)(struct device *);
+
+	if (!pm || pm_runtime_suspended(dev))
+		return 0;
+
+	switch (event) {
+	case PM_EVENT_SUSPEND:
+		callback = pm->suspend;
+		break;
+	case PM_EVENT_FREEZE:
+		callback = pm->freeze;
+		break;
+	case PM_EVENT_HIBERNATE:
+		callback = pm->poweroff;
+		break;
+	case PM_EVENT_THAW:
+		callback = pm->thaw;
+		break;
+	default:
+		callback = NULL;
+		break;
+	}
+
+	return callback ? callback(dev) : 0;
+}
+
+/**
+ * pm_generic_suspend - Generic suspend callback for subsystems.
+ * @dev: Device to suspend.
+ */
+int pm_generic_suspend(struct device *dev)
+{
+	return __pm_generic_call(dev, PM_EVENT_SUSPEND);
+}
+EXPORT_SYMBOL_GPL(pm_generic_suspend);
+
+/**
+ * pm_generic_freeze - Generic freeze callback for subsystems.
+ * @dev: Device to freeze.
+ */
+int pm_generic_freeze(struct device *dev)
+{
+	return __pm_generic_call(dev, PM_EVENT_FREEZE);
+}
+EXPORT_SYMBOL_GPL(pm_generic_freeze);
+
+/**
+ * pm_generic_poweroff - Generic poweroff callback for subsystems.
+ * @dev: Device to handle.
+ */
+int pm_generic_poweroff(struct device *dev)
+{
+	return __pm_generic_call(dev, PM_EVENT_HIBERNATE);
+}
+EXPORT_SYMBOL_GPL(pm_generic_poweroff);
+
+/**
+ * pm_generic_thaw - Generic thaw callback for subsystems.
+ * @dev: Device to thaw.
+ */
+int pm_generic_thaw(struct device *dev)
+{
+	return __pm_generic_call(dev, PM_EVENT_THAW);
+}
+EXPORT_SYMBOL_GPL(pm_generic_thaw);
+
+/**
+ * __pm_generic_resume - Generic resume/restore callback for subsystems.
+ * @dev: Device to handle.
+ * @event: PM transition of the system under way.
+ *
+ * Execute the resume/resotre callback provided by the @dev's driver, if
+ * defined.  If it returns 0, change the device's runtime PM status to 'active'.
+ * Return the callback's error code.
+ */
+static int __pm_generic_resume(struct device *dev, int event)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	int (*callback)(struct device *);
+	int ret;
+
+	if (!pm)
+		return 0;
+
+	switch (event) {
+	case PM_EVENT_RESUME:
+		callback = pm->resume;
+		break;
+	case PM_EVENT_RESTORE:
+		callback = pm->restore;
+		break;
+	default:
+		callback = NULL;
+		break;
+	}
+
+	if (!callback)
+		return 0;
+
+	ret = callback(dev);
+	if (!ret) {
+		pm_runtime_disable(dev);
+		pm_runtime_set_active(dev);
+		pm_runtime_enable(dev);
+	}
+
+	return ret;
+}
+
+/**
+ * pm_generic_resume - Generic resume callback for subsystems.
+ * @dev: Device to resume.
+ */
+int pm_generic_resume(struct device *dev)
+{
+	return __pm_generic_resume(dev, PM_EVENT_RESUME);
+}
+EXPORT_SYMBOL_GPL(pm_generic_resume);
+
+/**
+ * pm_generic_restore - Generic restore callback for subsystems.
+ * @dev: Device to restore.
+ */
+int pm_generic_restore(struct device *dev)
+{
+	return __pm_generic_resume(dev, PM_EVENT_RESTORE);
+}
+EXPORT_SYMBOL_GPL(pm_generic_restore);
+#endif /* CONFIG_PM_SLEEP */
+
+struct dev_pm_ops generic_subsys_pm_ops = {
+#ifdef CONFIG_PM_SLEEP
+	.suspend = pm_generic_suspend,
+	.resume = pm_generic_resume,
+	.freeze = pm_generic_freeze,
+	.thaw = pm_generic_thaw,
+	.poweroff = pm_generic_poweroff,
+	.restore = pm_generic_restore,
+#endif
+#ifdef CONFIG_PM_RUNTIME
+	.runtime_suspend = pm_generic_runtime_suspend,
+	.runtime_resume = pm_generic_runtime_resume,
+	.runtime_idle = pm_generic_runtime_idle,
+#endif
+};
+EXPORT_SYMBOL_GPL(generic_subsys_pm_ops);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index e80df06ad22a..8e258c727971 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -215,20 +215,59 @@ struct dev_pm_ops {
 	int (*runtime_idle)(struct device *dev);
 };
 
+#ifdef CONFIG_PM_SLEEP
+#define SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+	.suspend = suspend_fn, \
+	.resume = resume_fn, \
+	.freeze = suspend_fn, \
+	.thaw = resume_fn, \
+	.poweroff = suspend_fn, \
+	.restore = resume_fn,
+#else
+#define SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
+#endif
+
+#ifdef CONFIG_PM_RUNTIME
+#define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
+	.runtime_suspend = suspend_fn, \
+	.runtime_resume = resume_fn, \
+	.runtime_idle = idle_fn,
+#else
+#define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn)
+#endif
+
 /*
  * Use this if you want to use the same suspend and resume callbacks for suspend
  * to RAM and hibernation.
  */
 #define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
 const struct dev_pm_ops name = { \
-	.suspend = suspend_fn, \
-	.resume = resume_fn, \
-	.freeze = suspend_fn, \
-	.thaw = resume_fn, \
-	.poweroff = suspend_fn, \
-	.restore = resume_fn, \
+	SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+}
+
+/*
+ * Use this for defining a set of PM operations to be used in all situations
+ * (sustem suspend, hibernation or runtime PM).
+ */
+#define UNIVERSAL_DEV_PM_OPS(name, suspend_fn, resume_fn, idle_fn) \
+const struct dev_pm_ops name = { \
+	SET_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn) \
+	SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
 }
 
+/*
+ * Use this for subsystems (bus types, device types, device classes) that don't
+ * need any special suspend/resume handling in addition to invoking the PM
+ * callbacks provided by device drivers supporting both the system sleep PM and
+ * runtime PM, make the pm member point to generic_subsys_pm_ops.
+ */
+#ifdef CONFIG_PM_OPS
+extern struct dev_pm_ops generic_subsys_pm_ops;
+#define GENERIC_SUBSYS_PM_OPS	(&generic_subsys_pm_ops)
+#else
+#define GENERIC_SUBSYS_PM_OPS	NULL
+#endif
+
 /**
  * PM_EVENT_ messages
  *
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 7d773aac5314..b776db737244 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -62,6 +62,11 @@ static inline void device_set_run_wake(struct device *dev, bool enable)
 	dev->power.run_wake = enable;
 }
 
+static inline bool pm_runtime_suspended(struct device *dev)
+{
+	return dev->power.runtime_status == RPM_SUSPENDED;
+}
+
 #else /* !CONFIG_PM_RUNTIME */
 
 static inline int pm_runtime_idle(struct device *dev) { return -ENOSYS; }
@@ -89,6 +94,7 @@ static inline void pm_runtime_get_noresume(struct device *dev) {}
 static inline void pm_runtime_put_noidle(struct device *dev) {}
 static inline bool device_run_wake(struct device *dev) { return false; }
 static inline void device_set_run_wake(struct device *dev, bool enable) {}
+static inline bool pm_runtime_suspended(struct device *dev) { return false; }
 
 #endif /* !CONFIG_PM_RUNTIME */
 
-- 
cgit v1.2.3


From f99344fc69c3df46786a39ea4283a4175ea40b3f Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 5 Jan 2010 13:59:07 +0000
Subject: mfd: Add a data argument to the WM8350 IRQ free function

To better match genirq.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8350-irq.c             |  2 +-
 drivers/power/wm8350_power.c         | 24 ++++++++++++------------
 drivers/regulator/wm8350-regulator.c |  2 +-
 drivers/rtc/rtc-wm8350.c             |  4 ++--
 include/linux/mfd/wm8350/core.h      |  3 ++-
 sound/soc/codecs/wm8350.c            |  4 ++--
 6 files changed, 20 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/wm8350-irq.c b/drivers/mfd/wm8350-irq.c
index 9025f29e2707..655836bc69a0 100644
--- a/drivers/mfd/wm8350-irq.c
+++ b/drivers/mfd/wm8350-irq.c
@@ -451,7 +451,7 @@ int wm8350_register_irq(struct wm8350 *wm8350, int irq,
 }
 EXPORT_SYMBOL_GPL(wm8350_register_irq);
 
-int wm8350_free_irq(struct wm8350 *wm8350, int irq)
+int wm8350_free_irq(struct wm8350 *wm8350, int irq, void *data)
 {
 	if (irq < 0 || irq >= WM8350_NUM_IRQ)
 		return -EINVAL;
diff --git a/drivers/power/wm8350_power.c b/drivers/power/wm8350_power.c
index ad4f071e1287..3839a5e1c4a7 100644
--- a/drivers/power/wm8350_power.c
+++ b/drivers/power/wm8350_power.c
@@ -428,18 +428,18 @@ static void wm8350_init_charger(struct wm8350 *wm8350)
 
 static void free_charger_irq(struct wm8350 *wm8350)
 {
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_TO);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_END);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_START);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85);
-	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_USB_FB);
-	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_WALL_FB);
-	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_BAT_FB);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_TO, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_END, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_START, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_USB_FB, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_WALL_FB, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_BAT_FB, wm8350);
 }
 
 static __devinit int wm8350_power_probe(struct platform_device *pdev)
diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c
index 94227dd6ba7b..723cd1fb4867 100644
--- a/drivers/regulator/wm8350-regulator.c
+++ b/drivers/regulator/wm8350-regulator.c
@@ -1453,7 +1453,7 @@ static int wm8350_regulator_remove(struct platform_device *pdev)
 	struct regulator_dev *rdev = platform_get_drvdata(pdev);
 	struct wm8350 *wm8350 = rdev_get_drvdata(rdev);
 
-	wm8350_free_irq(wm8350, wm8350_reg[pdev->id].irq);
+	wm8350_free_irq(wm8350, wm8350_reg[pdev->id].irq, rdev);
 
 	regulator_unregister(rdev);
 
diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
index f1e440521c54..a5512f515998 100644
--- a/drivers/rtc/rtc-wm8350.c
+++ b/drivers/rtc/rtc-wm8350.c
@@ -478,8 +478,8 @@ static int __devexit wm8350_rtc_remove(struct platform_device *pdev)
 	struct wm8350 *wm8350 = platform_get_drvdata(pdev);
 	struct wm8350_rtc *wm_rtc = &wm8350->rtc;
 
-	wm8350_free_irq(wm8350, WM8350_IRQ_RTC_SEC);
-	wm8350_free_irq(wm8350, WM8350_IRQ_RTC_ALM);
+	wm8350_free_irq(wm8350, WM8350_IRQ_RTC_SEC, wm8350);
+	wm8350_free_irq(wm8350, WM8350_IRQ_RTC_ALM, wm8350);
 
 	rtc_device_unregister(wm_rtc->rtc);
 
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 43868899bf49..8883125ddea1 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -680,7 +680,8 @@ int wm8350_block_write(struct wm8350 *wm8350, int reg, int size, u16 *src);
 int wm8350_register_irq(struct wm8350 *wm8350, int irq,
 			irq_handler_t handler, unsigned long flags,
 			const char *name, void *data);
-int wm8350_free_irq(struct wm8350 *wm8350, int irq);
+int wm8350_free_irq(struct wm8350 *wm8350, int irq, void *data);
+
 int wm8350_mask_irq(struct wm8350 *wm8350, int irq);
 int wm8350_unmask_irq(struct wm8350 *wm8350, int irq);
 int wm8350_irq_init(struct wm8350 *wm8350, int irq,
diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c
index 718ef912e758..079bf745bf05 100644
--- a/sound/soc/codecs/wm8350.c
+++ b/sound/soc/codecs/wm8350.c
@@ -1521,8 +1521,8 @@ static int wm8350_remove(struct platform_device *pdev)
 			  WM8350_JDL_ENA | WM8350_JDR_ENA);
 	wm8350_clear_bits(wm8350, WM8350_POWER_MGMT_4, WM8350_TOCLK_ENA);
 
-	wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L);
-	wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L, priv);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R, priv);
 
 	priv->hpl.jack = NULL;
 	priv->hpr.jack = NULL;
-- 
cgit v1.2.3


From 29c71b138c83c8191f1f7e46fcc28b9d6bc8a5dd Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 5 Jan 2010 13:59:08 +0000
Subject: rtc: Suppress duplicate enable/disable of WM8350 update interrupt

Unlike the wm8350-custom code genirq nests enable and disable calls
so we can't just unconditionally mask or unmask the interrupt,
we need to remember the state we set and only mask or unmask when
there is a real change.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Cc: rtc-linux@googlegroups.com
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/rtc/rtc-wm8350.c       | 7 +++++++
 include/linux/mfd/wm8350/rtc.h | 1 +
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
index a5512f515998..3d0dc76b38af 100644
--- a/drivers/rtc/rtc-wm8350.c
+++ b/drivers/rtc/rtc-wm8350.c
@@ -307,11 +307,18 @@ static int wm8350_rtc_update_irq_enable(struct device *dev,
 {
 	struct wm8350 *wm8350 = dev_get_drvdata(dev);
 
+	/* Suppress duplicate changes since genirq nests enable and
+	 * disable calls. */
+	if (enabled == wm8350->rtc.update_enabled)
+		return 0;
+
 	if (enabled)
 		wm8350_unmask_irq(wm8350, WM8350_IRQ_RTC_SEC);
 	else
 		wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC);
 
+	wm8350->rtc.update_enabled = enabled;
+
 	return 0;
 }
 
diff --git a/include/linux/mfd/wm8350/rtc.h b/include/linux/mfd/wm8350/rtc.h
index 24add2bef6c9..ebd72ffc62d1 100644
--- a/include/linux/mfd/wm8350/rtc.h
+++ b/include/linux/mfd/wm8350/rtc.h
@@ -263,6 +263,7 @@ struct wm8350_rtc {
 	struct platform_device *pdev;
 	struct rtc_device *rtc;
 	int alarm_enabled;      /* used over suspend/resume */
+	int update_enabled;
 };
 
 #endif
-- 
cgit v1.2.3


From 760e4518788df6762700e6bb9dd8692379f11168 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 5 Jan 2010 13:59:09 +0000
Subject: mfd: Convert WM8350 to genirq

This gives us use of the diagnostic facilities genirq provides and
will allow implementation of interrupt support for the WM8350 GPIOs.
Stub functions are provided to ease the transition of the individual
drivers, probably after additional work to pass the IRQ numbers via
the struct devices.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8350-irq.c        | 155 ++++++++++++++++++++++------------------
 include/linux/mfd/wm8350/core.h |  44 ++++++++----
 2 files changed, 118 insertions(+), 81 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/wm8350-irq.c b/drivers/mfd/wm8350-irq.c
index 655836bc69a0..f56c9adf9493 100644
--- a/drivers/mfd/wm8350-irq.c
+++ b/drivers/mfd/wm8350-irq.c
@@ -18,7 +18,7 @@
 #include <linux/bug.h>
 #include <linux/device.h>
 #include <linux/interrupt.h>
-#include <linux/workqueue.h>
+#include <linux/irq.h>
 
 #include <linux/mfd/wm8350/core.h>
 #include <linux/mfd/wm8350/audio.h>
@@ -29,8 +29,6 @@
 #include <linux/mfd/wm8350/supply.h>
 #include <linux/mfd/wm8350/wdt.h>
 
-#define WM8350_NUM_IRQ_REGS 7
-
 #define WM8350_INT_OFFSET_1                     0
 #define WM8350_INT_OFFSET_2                     1
 #define WM8350_POWER_UP_INT_OFFSET              2
@@ -366,19 +364,10 @@ static struct wm8350_irq_data wm8350_irqs[] = {
 	},
 };
 
-static void wm8350_irq_call_handler(struct wm8350 *wm8350, int irq)
+static inline struct wm8350_irq_data *irq_to_wm8350_irq(struct wm8350 *wm8350,
+							int irq)
 {
-	mutex_lock(&wm8350->irq_mutex);
-
-	if (wm8350->irq[irq].handler)
-		wm8350->irq[irq].handler(irq, wm8350->irq[irq].data);
-	else {
-		dev_err(wm8350->dev, "irq %d nobody cared. now masked.\n",
-			irq);
-		wm8350_mask_irq(wm8350, irq);
-	}
-
-	mutex_unlock(&wm8350->irq_mutex);
+	return &wm8350_irqs[irq - wm8350->irq_base];
 }
 
 /*
@@ -386,7 +375,9 @@ static void wm8350_irq_call_handler(struct wm8350 *wm8350, int irq)
  * interrupts are clear on read the IRQ line will be reasserted and
  * the physical IRQ will be handled again if another interrupt is
  * asserted while we run - in the normal course of events this is a
- * rare occurrence so we save I2C/SPI reads.
+ * rare occurrence so we save I2C/SPI reads.  We're also assuming that
+ * it's rare to get lots of interrupts firing simultaneously so try to
+ * minimise I/O.
  */
 static irqreturn_t wm8350_irq(int irq, void *irq_data)
 {
@@ -397,7 +388,6 @@ static irqreturn_t wm8350_irq(int irq, void *irq_data)
 	struct wm8350_irq_data *data;
 	int i;
 
-	/* TODO: Use block reads to improve performance? */
 	level_one = wm8350_reg_read(wm8350, WM8350_SYSTEM_INTERRUPTS)
 		& ~wm8350_reg_read(wm8350, WM8350_SYSTEM_INTERRUPTS_MASK);
 
@@ -416,93 +406,101 @@ static irqreturn_t wm8350_irq(int irq, void *irq_data)
 			sub_reg[data->reg] =
 				wm8350_reg_read(wm8350, WM8350_INT_STATUS_1 +
 						data->reg);
-			sub_reg[data->reg] &=
-				~wm8350_reg_read(wm8350,
-						 WM8350_INT_STATUS_1_MASK +
-						 data->reg);
+			sub_reg[data->reg] &= ~wm8350->irq_masks[data->reg];
 			read_done[data->reg] = 1;
 		}
 
 		if (sub_reg[data->reg] & data->mask)
-			wm8350_irq_call_handler(wm8350, i);
+			handle_nested_irq(wm8350->irq_base + i);
 	}
 
 	return IRQ_HANDLED;
 }
 
-int wm8350_register_irq(struct wm8350 *wm8350, int irq,
-			irq_handler_t handler, unsigned long flags,
-			const char *name, void *data)
+static void wm8350_irq_lock(unsigned int irq)
 {
-	if (irq < 0 || irq >= WM8350_NUM_IRQ || !handler)
-		return -EINVAL;
-
-	if (wm8350->irq[irq].handler)
-		return -EBUSY;
-
-	mutex_lock(&wm8350->irq_mutex);
-	wm8350->irq[irq].handler = handler;
-	wm8350->irq[irq].data = data;
-	mutex_unlock(&wm8350->irq_mutex);
-
-	wm8350_unmask_irq(wm8350, irq);
+	struct wm8350 *wm8350 = get_irq_chip_data(irq);
 
-	return 0;
+	mutex_lock(&wm8350->irq_lock);
 }
-EXPORT_SYMBOL_GPL(wm8350_register_irq);
 
-int wm8350_free_irq(struct wm8350 *wm8350, int irq, void *data)
+static void wm8350_irq_sync_unlock(unsigned int irq)
 {
-	if (irq < 0 || irq >= WM8350_NUM_IRQ)
-		return -EINVAL;
+	struct wm8350 *wm8350 = get_irq_chip_data(irq);
+	int i;
 
-	wm8350_mask_irq(wm8350, irq);
+	for (i = 0; i < ARRAY_SIZE(wm8350->irq_masks); i++) {
+		/* If there's been a change in the mask write it back
+		 * to the hardware. */
+		if (wm8350->irq_masks[i] !=
+		    wm8350->reg_cache[WM8350_INT_STATUS_1_MASK + i])
+			WARN_ON(wm8350_reg_write(wm8350,
+					 WM8350_INT_STATUS_1_MASK + i,
+						 wm8350->irq_masks[i]));
+	}
 
-	mutex_lock(&wm8350->irq_mutex);
-	wm8350->irq[irq].handler = NULL;
-	mutex_unlock(&wm8350->irq_mutex);
-	return 0;
+	mutex_unlock(&wm8350->irq_lock);
 }
-EXPORT_SYMBOL_GPL(wm8350_free_irq);
 
-int wm8350_mask_irq(struct wm8350 *wm8350, int irq)
+static void wm8350_irq_enable(unsigned int irq)
 {
-	return wm8350_set_bits(wm8350, WM8350_INT_STATUS_1_MASK +
-			       wm8350_irqs[irq].reg,
-			       wm8350_irqs[irq].mask);
+	struct wm8350 *wm8350 = get_irq_chip_data(irq);
+	struct wm8350_irq_data *irq_data = irq_to_wm8350_irq(wm8350, irq);
+
+	wm8350->irq_masks[irq_data->reg] &= ~irq_data->mask;
 }
-EXPORT_SYMBOL_GPL(wm8350_mask_irq);
 
-int wm8350_unmask_irq(struct wm8350 *wm8350, int irq)
+static void wm8350_irq_disable(unsigned int irq)
 {
-	return wm8350_clear_bits(wm8350, WM8350_INT_STATUS_1_MASK +
-				 wm8350_irqs[irq].reg,
-				 wm8350_irqs[irq].mask);
+	struct wm8350 *wm8350 = get_irq_chip_data(irq);
+	struct wm8350_irq_data *irq_data = irq_to_wm8350_irq(wm8350, irq);
+
+	wm8350->irq_masks[irq_data->reg] |= irq_data->mask;
 }
-EXPORT_SYMBOL_GPL(wm8350_unmask_irq);
+
+static struct irq_chip wm8350_irq_chip = {
+	.name = "wm8350",
+	.bus_lock = wm8350_irq_lock,
+	.bus_sync_unlock = wm8350_irq_sync_unlock,
+	.disable = wm8350_irq_disable,
+	.enable = wm8350_irq_enable,
+};
 
 int wm8350_irq_init(struct wm8350 *wm8350, int irq,
 		    struct wm8350_platform_data *pdata)
 {
-	int ret;
+	int ret, cur_irq, i;
 	int flags = IRQF_ONESHOT;
 
 	if (!irq) {
-		dev_err(wm8350->dev, "No IRQ configured\n");
-		return -EINVAL;
+		dev_warn(wm8350->dev, "No interrupt support, no core IRQ\n");
+		return 0;
+	}
+
+	if (!pdata || !pdata->irq_base) {
+		dev_warn(wm8350->dev, "No interrupt support, no IRQ base\n");
+		return 0;
 	}
 
+	/* Mask top level interrupts */
 	wm8350_reg_write(wm8350, WM8350_SYSTEM_INTERRUPTS_MASK, 0xFFFF);
-	wm8350_reg_write(wm8350, WM8350_INT_STATUS_1_MASK, 0xFFFF);
-	wm8350_reg_write(wm8350, WM8350_INT_STATUS_2_MASK, 0xFFFF);
-	wm8350_reg_write(wm8350, WM8350_UNDER_VOLTAGE_INT_STATUS_MASK, 0xFFFF);
-	wm8350_reg_write(wm8350, WM8350_GPIO_INT_STATUS_MASK, 0xFFFF);
-	wm8350_reg_write(wm8350, WM8350_COMPARATOR_INT_STATUS_MASK, 0xFFFF);
 
-	mutex_init(&wm8350->irq_mutex);
+	/* Mask all individual interrupts by default and cache the
+	 * masks.  We read the masks back since there are unwritable
+	 * bits in the mask registers. */
+	for (i = 0; i < ARRAY_SIZE(wm8350->irq_masks); i++) {
+		wm8350_reg_write(wm8350, WM8350_INT_STATUS_1_MASK + i,
+				 0xFFFF);
+		wm8350->irq_masks[i] =
+			wm8350_reg_read(wm8350,
+					WM8350_INT_STATUS_1_MASK + i);
+	}
+
+	mutex_init(&wm8350->irq_lock);
 	wm8350->chip_irq = irq;
+	wm8350->irq_base = pdata->irq_base;
 
-	if (pdata && pdata->irq_high) {
+	if (pdata->irq_high) {
 		flags |= IRQF_TRIGGER_HIGH;
 
 		wm8350_set_bits(wm8350, WM8350_SYSTEM_CONTROL_1,
@@ -514,11 +512,32 @@ int wm8350_irq_init(struct wm8350 *wm8350, int irq,
 				  WM8350_IRQ_POL);
 	}
 
+	/* Register with genirq */
+	for (cur_irq = wm8350->irq_base;
+	     cur_irq < ARRAY_SIZE(wm8350_irqs) + wm8350->irq_base;
+	     cur_irq++) {
+		set_irq_chip_data(cur_irq, wm8350);
+		set_irq_chip_and_handler(cur_irq, &wm8350_irq_chip,
+					 handle_edge_irq);
+		set_irq_nested_thread(cur_irq, 1);
+
+		/* ARM needs us to explicitly flag the IRQ as valid
+		 * and will set them noprobe when we do so. */
+#ifdef CONFIG_ARM
+		set_irq_flags(cur_irq, IRQF_VALID);
+#else
+		set_irq_noprobe(cur_irq);
+#endif
+	}
+
 	ret = request_threaded_irq(irq, NULL, wm8350_irq, flags,
 				   "wm8350", wm8350);
 	if (ret != 0)
 		dev_err(wm8350->dev, "Failed to request IRQ: %d\n", ret);
 
+	/* Allow interrupts to fire */
+	wm8350_reg_write(wm8350, WM8350_SYSTEM_INTERRUPTS_MASK, 0);
+
 	return ret;
 }
 
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 8883125ddea1..04217a71f173 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -579,6 +579,8 @@
 
 #define WM8350_NUM_IRQ				63
 
+#define WM8350_NUM_IRQ_REGS 7
+
 struct wm8350_reg_access {
 	u16 readable;		/* Mask of readable bits */
 	u16 writable;		/* Mask of writable bits */
@@ -600,11 +602,6 @@ extern const u16 wm8352_mode3_defaults[];
 
 struct wm8350;
 
-struct wm8350_irq {
-	irq_handler_t handler;
-	void *data;
-};
-
 struct wm8350_hwmon {
 	struct platform_device *pdev;
 	struct device *classdev;
@@ -626,9 +623,10 @@ struct wm8350 {
 	struct mutex auxadc_mutex;
 
 	/* Interrupt handling */
-	struct mutex irq_mutex; /* IRQ table mutex */
-	struct wm8350_irq irq[WM8350_NUM_IRQ];
+	struct mutex irq_lock;
 	int chip_irq;
+	int irq_base;
+	u16 irq_masks[WM8350_NUM_IRQ_REGS];
 
 	/* Client devices */
 	struct wm8350_codec codec;
@@ -677,13 +675,33 @@ int wm8350_block_write(struct wm8350 *wm8350, int reg, int size, u16 *src);
 /*
  * WM8350 internal interrupts
  */
-int wm8350_register_irq(struct wm8350 *wm8350, int irq,
-			irq_handler_t handler, unsigned long flags,
-			const char *name, void *data);
-int wm8350_free_irq(struct wm8350 *wm8350, int irq, void *data);
+static inline int wm8350_register_irq(struct wm8350 *wm8350, int irq,
+				      irq_handler_t handler,
+				      unsigned long flags,
+				      const char *name, void *data)
+{
+	if (!wm8350->irq_base)
+		return -ENODEV;
+
+	return request_threaded_irq(irq + wm8350->irq_base, NULL,
+				    handler, flags, name, data);
+}
+
+static inline void wm8350_free_irq(struct wm8350 *wm8350, int irq, void *data)
+{
+	free_irq(irq + wm8350->irq_base, data);
+}
+
+static inline void wm8350_mask_irq(struct wm8350 *wm8350, int irq)
+{
+	disable_irq(irq + wm8350->irq_base);
+}
+
+static inline void wm8350_unmask_irq(struct wm8350 *wm8350, int irq)
+{
+	enable_irq(irq + wm8350->irq_base);
+}
 
-int wm8350_mask_irq(struct wm8350 *wm8350, int irq);
-int wm8350_unmask_irq(struct wm8350 *wm8350, int irq);
 int wm8350_irq_init(struct wm8350 *wm8350, int irq,
 		    struct wm8350_platform_data *pdata);
 int wm8350_irq_exit(struct wm8350 *wm8350);
-- 
cgit v1.2.3


From 38f6ce45f0bca04ac653c57cacd375c469995321 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 7 Jan 2010 16:10:08 +0000
Subject: gpiolib: Add support for WM8350 GPIO controller

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/gpio/Kconfig            |   7 ++
 drivers/gpio/Makefile           |   1 +
 drivers/gpio/wm8350-gpiolib.c   | 181 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm8350/core.h |   2 +
 include/linux/mfd/wm8350/gpio.h |   1 +
 5 files changed, 192 insertions(+)
 create mode 100644 drivers/gpio/wm8350-gpiolib.c

(limited to 'include')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index a4cdbd51b1c6..acac9f60db1d 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -185,6 +185,13 @@ config GPIO_WM831X
 	  Say yes here to access the GPIO signals of WM831x power management
 	  chips from Wolfson Microelectronics.
 
+config GPIO_WM8350
+	tristate "WM8350 GPIOs"
+	depends on MFD_WM8350
+	help
+	  Say yes here to access the GPIO signals of WM8350 power management
+	  chips from Wolfson Microelectronics.
+
 config GPIO_ADP5520
 	tristate "GPIO Support for ADP5520 PMIC"
 	depends on PMIC_ADP5520
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 128abf8a98da..90b0880923de 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -25,3 +25,4 @@ obj-$(CONFIG_GPIO_BT8XX)	+= bt8xxgpio.o
 obj-$(CONFIG_GPIO_IT8761E)	+= it8761e_gpio.o
 obj-$(CONFIG_GPIO_VR41XX)	+= vr41xx_giu.o
 obj-$(CONFIG_GPIO_WM831X)	+= wm831x-gpio.o
+obj-$(CONFIG_GPIO_WM8350)	+= wm8350-gpiolib.o
diff --git a/drivers/gpio/wm8350-gpiolib.c b/drivers/gpio/wm8350-gpiolib.c
new file mode 100644
index 000000000000..511840d1c7ba
--- /dev/null
+++ b/drivers/gpio/wm8350-gpiolib.c
@@ -0,0 +1,181 @@
+/*
+ * wm835x-gpiolib.c  --  gpiolib support for Wolfson WM835x PMICs
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/mfd/core.h>
+#include <linux/platform_device.h>
+#include <linux/seq_file.h>
+
+#include <linux/mfd/wm8350/core.h>
+#include <linux/mfd/wm8350/gpio.h>
+
+struct wm8350_gpio_data {
+	struct wm8350 *wm8350;
+	struct gpio_chip gpio_chip;
+};
+
+static inline struct wm8350_gpio_data *to_wm8350_gpio(struct gpio_chip *chip)
+{
+	return container_of(chip, struct wm8350_gpio_data, gpio_chip);
+}
+
+static int wm8350_gpio_direction_in(struct gpio_chip *chip, unsigned offset)
+{
+	struct wm8350_gpio_data *wm8350_gpio = to_wm8350_gpio(chip);
+	struct wm8350 *wm8350 = wm8350_gpio->wm8350;
+
+	return wm8350_set_bits(wm8350, WM8350_GPIO_CONFIGURATION_I_O,
+			       1 << offset);
+}
+
+static int wm8350_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct wm8350_gpio_data *wm8350_gpio = to_wm8350_gpio(chip);
+	struct wm8350 *wm8350 = wm8350_gpio->wm8350;
+	int ret;
+
+	ret = wm8350_reg_read(wm8350, WM8350_GPIO_LEVEL);
+	if (ret < 0)
+		return ret;
+
+	if (ret & (1 << offset))
+		return 1;
+	else
+		return 0;
+}
+
+static void wm8350_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	struct wm8350_gpio_data *wm8350_gpio = to_wm8350_gpio(chip);
+	struct wm8350 *wm8350 = wm8350_gpio->wm8350;
+
+	if (value)
+		wm8350_set_bits(wm8350, WM8350_GPIO_LEVEL, 1 << offset);
+	else
+		wm8350_clear_bits(wm8350, WM8350_GPIO_LEVEL, 1 << offset);
+}
+
+static int wm8350_gpio_direction_out(struct gpio_chip *chip,
+				     unsigned offset, int value)
+{
+	struct wm8350_gpio_data *wm8350_gpio = to_wm8350_gpio(chip);
+	struct wm8350 *wm8350 = wm8350_gpio->wm8350;
+	int ret;
+
+	ret = wm8350_clear_bits(wm8350, WM8350_GPIO_CONFIGURATION_I_O,
+				1 << offset);
+	if (ret < 0)
+		return ret;
+
+	/* Don't have an atomic direction/value setup */
+	wm8350_gpio_set(chip, offset, value);
+
+	return 0;
+}
+
+static int wm8350_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
+{
+	struct wm8350_gpio_data *wm8350_gpio = to_wm8350_gpio(chip);
+	struct wm8350 *wm8350 = wm8350_gpio->wm8350;
+
+	if (!wm8350->irq_base)
+		return -EINVAL;
+
+	return wm8350->irq_base + WM8350_IRQ_GPIO(offset);
+}
+
+static struct gpio_chip template_chip = {
+	.label			= "wm8350",
+	.owner			= THIS_MODULE,
+	.direction_input	= wm8350_gpio_direction_in,
+	.get			= wm8350_gpio_get,
+	.direction_output	= wm8350_gpio_direction_out,
+	.set			= wm8350_gpio_set,
+	.to_irq			= wm8350_gpio_to_irq,
+	.can_sleep		= 1,
+};
+
+static int __devinit wm8350_gpio_probe(struct platform_device *pdev)
+{
+	struct wm8350 *wm8350 = dev_get_drvdata(pdev->dev.parent);
+	struct wm8350_platform_data *pdata = wm8350->dev->platform_data;
+	struct wm8350_gpio_data *wm8350_gpio;
+	int ret;
+
+	wm8350_gpio = kzalloc(sizeof(*wm8350_gpio), GFP_KERNEL);
+	if (wm8350_gpio == NULL)
+		return -ENOMEM;
+
+	wm8350_gpio->wm8350 = wm8350;
+	wm8350_gpio->gpio_chip = template_chip;
+	wm8350_gpio->gpio_chip.ngpio = 13;
+	wm8350_gpio->gpio_chip.dev = &pdev->dev;
+	if (pdata && pdata->gpio_base)
+		wm8350_gpio->gpio_chip.base = pdata->gpio_base;
+	else
+		wm8350_gpio->gpio_chip.base = -1;
+
+	ret = gpiochip_add(&wm8350_gpio->gpio_chip);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Could not register gpiochip, %d\n",
+			ret);
+		goto err;
+	}
+
+	platform_set_drvdata(pdev, wm8350_gpio);
+
+	return ret;
+
+err:
+	kfree(wm8350_gpio);
+	return ret;
+}
+
+static int __devexit wm8350_gpio_remove(struct platform_device *pdev)
+{
+	struct wm8350_gpio_data *wm8350_gpio = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = gpiochip_remove(&wm8350_gpio->gpio_chip);
+	if (ret == 0)
+		kfree(wm8350_gpio);
+
+	return ret;
+}
+
+static struct platform_driver wm8350_gpio_driver = {
+	.driver.name	= "wm8350-gpio",
+	.driver.owner	= THIS_MODULE,
+	.probe		= wm8350_gpio_probe,
+	.remove		= __devexit_p(wm8350_gpio_remove),
+};
+
+static int __init wm8350_gpio_init(void)
+{
+	return platform_driver_register(&wm8350_gpio_driver);
+}
+subsys_initcall(wm8350_gpio_init);
+
+static void __exit wm8350_gpio_exit(void)
+{
+	platform_driver_unregister(&wm8350_gpio_driver);
+}
+module_exit(wm8350_gpio_exit);
+
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_DESCRIPTION("GPIO interface for WM8350 PMICs");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm8350-gpio");
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 04217a71f173..fae08aa65413 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -645,11 +645,13 @@ struct wm8350 {
  *        used by the platform to configure GPIO functions and similar.
  * @irq_high: Set if WM8350 IRQ is active high.
  * @irq_base: Base IRQ for genirq (not currently used).
+ * @gpio_base: Base for gpiolib.
  */
 struct wm8350_platform_data {
 	int (*init)(struct wm8350 *wm8350);
 	int irq_high;
 	int irq_base;
+	int gpio_base;
 };
 
 
diff --git a/include/linux/mfd/wm8350/gpio.h b/include/linux/mfd/wm8350/gpio.h
index 71af3d6ebe9d..d657bcd6d955 100644
--- a/include/linux/mfd/wm8350/gpio.h
+++ b/include/linux/mfd/wm8350/gpio.h
@@ -29,6 +29,7 @@
 #define WM8350_GPIO_FUNCTION_SELECT_2           0x8D
 #define WM8350_GPIO_FUNCTION_SELECT_3           0x8E
 #define WM8350_GPIO_FUNCTION_SELECT_4           0x8F
+#define WM8350_GPIO_LEVEL			0xE6
 
 /*
  * GPIO Functions
-- 
cgit v1.2.3


From 0df883df8e8aea79b501f6262b595e66dec175dc Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Fri, 8 Jan 2010 10:44:16 +0100
Subject: mfd: Convert AB3100 driver to threaded IRQ

This converts the AB3100 core MFD driver to use a threaded
interrupt handler instead of the explicit top/bottom-half
construction with a workqueue. This saves some code and make it
more similar to other modern MFD drivers.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ab3100-core.c  | 43 +++++++++++++------------------------------
 include/linux/mfd/ab3100.h |  3 ---
 2 files changed, 13 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/ab3100-core.c b/drivers/mfd/ab3100-core.c
index fd42a80e7bf9..aa3824a1b4f2 100644
--- a/drivers/mfd/ab3100-core.c
+++ b/drivers/mfd/ab3100-core.c
@@ -365,10 +365,13 @@ int ab3100_event_registers_startup_state_get(struct ab3100 *ab3100,
 }
 EXPORT_SYMBOL(ab3100_event_registers_startup_state_get);
 
-/* Interrupt handling worker */
-static void ab3100_work(struct work_struct *work)
+/*
+ * This is a threaded interrupt handler so we can make some
+ * I2C calls etc.
+ */
+static irqreturn_t ab3100_irq_handler(int irq, void *data)
 {
-	struct ab3100 *ab3100 = container_of(work, struct ab3100, work);
+	struct ab3100 *ab3100 = data;
 	u8 event_regs[3];
 	u32 fatevent;
 	int err;
@@ -376,7 +379,7 @@ static void ab3100_work(struct work_struct *work)
 	err = ab3100_get_register_page_interruptible(ab3100, AB3100_EVENTA1,
 				       event_regs, 3);
 	if (err)
-		goto err_event_wq;
+		goto err_event;
 
 	fatevent = (event_regs[0] << 16) |
 		(event_regs[1] << 8) |
@@ -398,29 +401,11 @@ static void ab3100_work(struct work_struct *work)
 	dev_dbg(ab3100->dev,
 		"IRQ Event: 0x%08x\n", fatevent);
 
-	/* By now the IRQ should be acked and deasserted so enable it again */
-	enable_irq(ab3100->i2c_client->irq);
-	return;
+	return IRQ_HANDLED;
 
- err_event_wq:
+ err_event:
 	dev_dbg(ab3100->dev,
-		"error in event workqueue\n");
-	/* Enable the IRQ anyway, what choice do we have? */
-	enable_irq(ab3100->i2c_client->irq);
-	return;
-}
-
-static irqreturn_t ab3100_irq_handler(int irq, void *data)
-{
-	struct ab3100 *ab3100 = data;
-	/*
-	 * Disable the IRQ and dispatch a worker to handle the
-	 * event. Since the chip resides on I2C this is slow
-	 * stuff and we will re-enable the interrupts once th
-	 * worker has finished.
-	 */
-	disable_irq_nosync(irq);
-	schedule_work(&ab3100->work);
+		"error reading event status\n");
 	return IRQ_HANDLED;
 }
 
@@ -904,12 +889,10 @@ static int __init ab3100_probe(struct i2c_client *client,
 	if (err)
 		goto exit_no_setup;
 
-	INIT_WORK(&ab3100->work, ab3100_work);
-
 	/* This real unpredictable IRQ is of course sampled for entropy */
-	err = request_irq(client->irq, ab3100_irq_handler,
-			  IRQF_DISABLED | IRQF_SAMPLE_RANDOM,
-			  "AB3100 IRQ", ab3100);
+	err = request_threaded_irq(client->irq, NULL, ab3100_irq_handler,
+			  IRQF_ONESHOT,
+			  "ab3100-core", ab3100);
 	if (err)
 		goto exit_no_irq;
 
diff --git a/include/linux/mfd/ab3100.h b/include/linux/mfd/ab3100.h
index e9aa4c9d749d..9a881c305a50 100644
--- a/include/linux/mfd/ab3100.h
+++ b/include/linux/mfd/ab3100.h
@@ -6,7 +6,6 @@
  */
 
 #include <linux/device.h>
-#include <linux/workqueue.h>
 #include <linux/regulator/machine.h>
 
 #ifndef MFD_AB3100_H
@@ -74,7 +73,6 @@
  * @testreg_client: secondary client for test registers
  * @chip_name: name of this chip variant
  * @chip_id: 8 bit chip ID for this chip variant
- * @work: an event handling worker
  * @event_subscribers: event subscribers are listed here
  * @startup_events: a copy of the first reading of the event registers
  * @startup_events_read: whether the first events have been read
@@ -90,7 +88,6 @@ struct ab3100 {
 	struct i2c_client *testreg_client;
 	char chip_name[32];
 	u8 chip_id;
-	struct work_struct work;
 	struct blocking_notifier_head event_subscribers;
 	u32 startup_events;
 	bool startup_events_read;
-- 
cgit v1.2.3


From bbd51b1ff1bf57b9ed7f062486a415509968d4d9 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Wed, 6 Jan 2010 17:04:18 -0500
Subject: mfd: Split 88pm8607 driver

Create 88pm8607-i2c driver to support all I2C operation of 88PM8607.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/88pm8607.c       | 302 -------------------------------------------
 drivers/mfd/88pm860x-core.c  | 134 +++++++++++++++++++
 drivers/mfd/88pm860x-i2c.c   | 202 +++++++++++++++++++++++++++++
 drivers/mfd/Makefile         |   2 +-
 include/linux/mfd/88pm8607.h |  12 +-
 5 files changed, 346 insertions(+), 306 deletions(-)
 delete mode 100644 drivers/mfd/88pm8607.c
 create mode 100644 drivers/mfd/88pm860x-core.c
 create mode 100644 drivers/mfd/88pm860x-i2c.c

(limited to 'include')

diff --git a/drivers/mfd/88pm8607.c b/drivers/mfd/88pm8607.c
deleted file mode 100644
index 7e3f65907993..000000000000
--- a/drivers/mfd/88pm8607.c
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Base driver for Marvell 88PM8607
- *
- * Copyright (C) 2009 Marvell International Ltd.
- * 	Haojian Zhuang <haojian.zhuang@marvell.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/i2c.h>
-#include <linux/mfd/core.h>
-#include <linux/mfd/88pm8607.h>
-
-
-#define PM8607_REG_RESOURCE(_start, _end)		\
-{							\
-	.start	= PM8607_##_start,			\
-	.end	= PM8607_##_end,			\
-	.flags	= IORESOURCE_IO,			\
-}
-
-static struct resource pm8607_regulator_resources[] = {
-	PM8607_REG_RESOURCE(BUCK1, BUCK1),
-	PM8607_REG_RESOURCE(BUCK2, BUCK2),
-	PM8607_REG_RESOURCE(BUCK3, BUCK3),
-	PM8607_REG_RESOURCE(LDO1,  LDO1),
-	PM8607_REG_RESOURCE(LDO2,  LDO2),
-	PM8607_REG_RESOURCE(LDO3,  LDO3),
-	PM8607_REG_RESOURCE(LDO4,  LDO4),
-	PM8607_REG_RESOURCE(LDO5,  LDO5),
-	PM8607_REG_RESOURCE(LDO6,  LDO6),
-	PM8607_REG_RESOURCE(LDO7,  LDO7),
-	PM8607_REG_RESOURCE(LDO8,  LDO8),
-	PM8607_REG_RESOURCE(LDO9,  LDO9),
-	PM8607_REG_RESOURCE(LDO10, LDO10),
-	PM8607_REG_RESOURCE(LDO12, LDO12),
-	PM8607_REG_RESOURCE(LDO14, LDO14),
-};
-
-#define PM8607_REG_DEVS(_name, _id)					\
-{									\
-	.name		= "88pm8607-" #_name,				\
-	.num_resources	= 1,						\
-	.resources	= &pm8607_regulator_resources[PM8607_ID_##_id],	\
-}
-
-static struct mfd_cell pm8607_devs[] = {
-	PM8607_REG_DEVS(buck1, BUCK1),
-	PM8607_REG_DEVS(buck2, BUCK2),
-	PM8607_REG_DEVS(buck3, BUCK3),
-	PM8607_REG_DEVS(ldo1,  LDO1),
-	PM8607_REG_DEVS(ldo2,  LDO2),
-	PM8607_REG_DEVS(ldo3,  LDO3),
-	PM8607_REG_DEVS(ldo4,  LDO4),
-	PM8607_REG_DEVS(ldo5,  LDO5),
-	PM8607_REG_DEVS(ldo6,  LDO6),
-	PM8607_REG_DEVS(ldo7,  LDO7),
-	PM8607_REG_DEVS(ldo8,  LDO8),
-	PM8607_REG_DEVS(ldo9,  LDO9),
-	PM8607_REG_DEVS(ldo10, LDO10),
-	PM8607_REG_DEVS(ldo12, LDO12),
-	PM8607_REG_DEVS(ldo14, LDO14),
-};
-
-static inline int pm8607_read_device(struct pm8607_chip *chip,
-				     int reg, int bytes, void *dest)
-{
-	struct i2c_client *i2c = chip->client;
-	unsigned char data;
-	int ret;
-
-	data = (unsigned char)reg;
-	ret = i2c_master_send(i2c, &data, 1);
-	if (ret < 0)
-		return ret;
-
-	ret = i2c_master_recv(i2c, dest, bytes);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
-static inline int pm8607_write_device(struct pm8607_chip *chip,
-				      int reg, int bytes, void *src)
-{
-	struct i2c_client *i2c = chip->client;
-	unsigned char buf[bytes + 1];
-	int ret;
-
-	buf[0] = (unsigned char)reg;
-	memcpy(&buf[1], src, bytes);
-
-	ret = i2c_master_send(i2c, buf, bytes + 1);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
-int pm8607_reg_read(struct pm8607_chip *chip, int reg)
-{
-	unsigned char data;
-	int ret;
-
-	mutex_lock(&chip->io_lock);
-	ret = chip->read(chip, reg, 1, &data);
-	mutex_unlock(&chip->io_lock);
-
-	if (ret < 0)
-		return ret;
-	else
-		return (int)data;
-}
-EXPORT_SYMBOL(pm8607_reg_read);
-
-int pm8607_reg_write(struct pm8607_chip *chip, int reg,
-		     unsigned char data)
-{
-	int ret;
-
-	mutex_lock(&chip->io_lock);
-	ret = chip->write(chip, reg, 1, &data);
-	mutex_unlock(&chip->io_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL(pm8607_reg_write);
-
-int pm8607_bulk_read(struct pm8607_chip *chip, int reg,
-		     int count, unsigned char *buf)
-{
-	int ret;
-
-	mutex_lock(&chip->io_lock);
-	ret = chip->read(chip, reg, count, buf);
-	mutex_unlock(&chip->io_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL(pm8607_bulk_read);
-
-int pm8607_bulk_write(struct pm8607_chip *chip, int reg,
-		      int count, unsigned char *buf)
-{
-	int ret;
-
-	mutex_lock(&chip->io_lock);
-	ret = chip->write(chip, reg, count, buf);
-	mutex_unlock(&chip->io_lock);
-
-	return ret;
-}
-EXPORT_SYMBOL(pm8607_bulk_write);
-
-int pm8607_set_bits(struct pm8607_chip *chip, int reg,
-		    unsigned char mask, unsigned char data)
-{
-	unsigned char value;
-	int ret;
-
-	mutex_lock(&chip->io_lock);
-	ret = chip->read(chip, reg, 1, &value);
-	if (ret < 0)
-		goto out;
-	value &= ~mask;
-	value |= data;
-	ret = chip->write(chip, reg, 1, &value);
-out:
-	mutex_unlock(&chip->io_lock);
-	return ret;
-}
-EXPORT_SYMBOL(pm8607_set_bits);
-
-
-static const struct i2c_device_id pm8607_id_table[] = {
-	{ "88PM8607", 0 },
-	{}
-};
-MODULE_DEVICE_TABLE(i2c, pm8607_id_table);
-
-
-static int __devinit pm8607_probe(struct i2c_client *client,
-				  const struct i2c_device_id *id)
-{
-	struct pm8607_platform_data *pdata = client->dev.platform_data;
-	struct pm8607_chip *chip;
-	int i, count;
-	int ret;
-
-	chip = kzalloc(sizeof(struct pm8607_chip), GFP_KERNEL);
-	if (chip == NULL)
-		return -ENOMEM;
-
-	chip->client = client;
-	chip->dev = &client->dev;
-	chip->read = pm8607_read_device;
-	chip->write = pm8607_write_device;
-	i2c_set_clientdata(client, chip);
-
-	mutex_init(&chip->io_lock);
-	dev_set_drvdata(chip->dev, chip);
-
-	ret = pm8607_reg_read(chip, PM8607_CHIP_ID);
-	if (ret < 0) {
-		dev_err(chip->dev, "Failed to read CHIP ID: %d\n", ret);
-		goto out;
-	}
-	if ((ret & CHIP_ID_MASK) == CHIP_ID)
-		dev_info(chip->dev, "Marvell 88PM8607 (ID: %02x) detected\n",
-			 ret);
-	else {
-		dev_err(chip->dev, "Failed to detect Marvell 88PM8607. "
-			"Chip ID: %02x\n", ret);
-		goto out;
-	}
-	chip->chip_id = ret;
-
-	ret = pm8607_reg_read(chip, PM8607_BUCK3);
-	if (ret < 0) {
-		dev_err(chip->dev, "Failed to read BUCK3 register: %d\n", ret);
-		goto out;
-	}
-	if (ret & PM8607_BUCK3_DOUBLE)
-		chip->buck3_double = 1;
-
-	ret = pm8607_reg_read(chip, PM8607_MISC1);
-	if (ret < 0) {
-		dev_err(chip->dev, "Failed to read MISC1 register: %d\n", ret);
-		goto out;
-	}
-	if (pdata->i2c_port == PI2C_PORT)
-		ret |= PM8607_MISC1_PI2C;
-	else
-		ret &= ~PM8607_MISC1_PI2C;
-	ret = pm8607_reg_write(chip, PM8607_MISC1, ret);
-	if (ret < 0) {
-		dev_err(chip->dev, "Failed to write MISC1 register: %d\n", ret);
-		goto out;
-	}
-
-
-	count = ARRAY_SIZE(pm8607_devs);
-	for (i = 0; i < count; i++) {
-		ret = mfd_add_devices(chip->dev, i, &pm8607_devs[i],
-				      1, NULL, 0);
-		if (ret != 0) {
-			dev_err(chip->dev, "Failed to add subdevs\n");
-			goto out;
-		}
-	}
-
-	return 0;
-
-out:
-	i2c_set_clientdata(client, NULL);
-	kfree(chip);
-	return ret;
-}
-
-static int __devexit pm8607_remove(struct i2c_client *client)
-{
-	struct pm8607_chip *chip = i2c_get_clientdata(client);
-
-	mfd_remove_devices(chip->dev);
-	kfree(chip);
-	return 0;
-}
-
-static struct i2c_driver pm8607_driver = {
-	.driver	= {
-		.name	= "88PM8607",
-		.owner	= THIS_MODULE,
-	},
-	.probe		= pm8607_probe,
-	.remove		= __devexit_p(pm8607_remove),
-	.id_table	= pm8607_id_table,
-};
-
-static int __init pm8607_init(void)
-{
-	int ret;
-	ret = i2c_add_driver(&pm8607_driver);
-	if (ret != 0)
-		pr_err("Failed to register 88PM8607 I2C driver: %d\n", ret);
-	return ret;
-}
-subsys_initcall(pm8607_init);
-
-static void __exit pm8607_exit(void)
-{
-	i2c_del_driver(&pm8607_driver);
-}
-module_exit(pm8607_exit);
-
-MODULE_DESCRIPTION("PMIC Driver for Marvell 88PM8607");
-MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
-MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
new file mode 100644
index 000000000000..d1464e54e656
--- /dev/null
+++ b/drivers/mfd/88pm860x-core.c
@@ -0,0 +1,134 @@
+/*
+ * Base driver for Marvell 88PM8607
+ *
+ * Copyright (C) 2009 Marvell International Ltd.
+ * 	Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/88pm8607.h>
+
+
+#define PM8607_REG_RESOURCE(_start, _end)		\
+{							\
+	.start	= PM8607_##_start,			\
+	.end	= PM8607_##_end,			\
+	.flags	= IORESOURCE_IO,			\
+}
+
+static struct resource pm8607_regulator_resources[] = {
+	PM8607_REG_RESOURCE(BUCK1, BUCK1),
+	PM8607_REG_RESOURCE(BUCK2, BUCK2),
+	PM8607_REG_RESOURCE(BUCK3, BUCK3),
+	PM8607_REG_RESOURCE(LDO1,  LDO1),
+	PM8607_REG_RESOURCE(LDO2,  LDO2),
+	PM8607_REG_RESOURCE(LDO3,  LDO3),
+	PM8607_REG_RESOURCE(LDO4,  LDO4),
+	PM8607_REG_RESOURCE(LDO5,  LDO5),
+	PM8607_REG_RESOURCE(LDO6,  LDO6),
+	PM8607_REG_RESOURCE(LDO7,  LDO7),
+	PM8607_REG_RESOURCE(LDO8,  LDO8),
+	PM8607_REG_RESOURCE(LDO9,  LDO9),
+	PM8607_REG_RESOURCE(LDO10, LDO10),
+	PM8607_REG_RESOURCE(LDO12, LDO12),
+	PM8607_REG_RESOURCE(LDO14, LDO14),
+};
+
+#define PM8607_REG_DEVS(_name, _id)					\
+{									\
+	.name		= "88pm8607-" #_name,				\
+	.num_resources	= 1,						\
+	.resources	= &pm8607_regulator_resources[PM8607_ID_##_id],	\
+}
+
+static struct mfd_cell pm8607_devs[] = {
+	PM8607_REG_DEVS(buck1, BUCK1),
+	PM8607_REG_DEVS(buck2, BUCK2),
+	PM8607_REG_DEVS(buck3, BUCK3),
+	PM8607_REG_DEVS(ldo1,  LDO1),
+	PM8607_REG_DEVS(ldo2,  LDO2),
+	PM8607_REG_DEVS(ldo3,  LDO3),
+	PM8607_REG_DEVS(ldo4,  LDO4),
+	PM8607_REG_DEVS(ldo5,  LDO5),
+	PM8607_REG_DEVS(ldo6,  LDO6),
+	PM8607_REG_DEVS(ldo7,  LDO7),
+	PM8607_REG_DEVS(ldo8,  LDO8),
+	PM8607_REG_DEVS(ldo9,  LDO9),
+	PM8607_REG_DEVS(ldo10, LDO10),
+	PM8607_REG_DEVS(ldo12, LDO12),
+	PM8607_REG_DEVS(ldo14, LDO14),
+};
+
+int pm860x_device_init(struct pm8607_chip *chip,
+		       struct pm8607_platform_data *pdata)
+{
+	int i, count;
+	int ret;
+
+	ret = pm8607_reg_read(chip, PM8607_CHIP_ID);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to read CHIP ID: %d\n", ret);
+		goto out;
+	}
+	if ((ret & PM8607_ID_MASK) == PM8607_ID)
+		dev_info(chip->dev, "Marvell 88PM8607 (ID: %02x) detected\n",
+			 ret);
+	else {
+		dev_err(chip->dev, "Failed to detect Marvell 88PM8607. "
+			"Chip ID: %02x\n", ret);
+		goto out;
+	}
+	chip->chip_id = ret;
+
+	ret = pm8607_reg_read(chip, PM8607_BUCK3);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to read BUCK3 register: %d\n", ret);
+		goto out;
+	}
+	if (ret & PM8607_BUCK3_DOUBLE)
+		chip->buck3_double = 1;
+
+	ret = pm8607_reg_read(chip, PM8607_MISC1);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to read MISC1 register: %d\n", ret);
+		goto out;
+	}
+	if (pdata->i2c_port == PI2C_PORT)
+		ret |= PM8607_MISC1_PI2C;
+	else
+		ret &= ~PM8607_MISC1_PI2C;
+	ret = pm8607_reg_write(chip, PM8607_MISC1, ret);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to write MISC1 register: %d\n", ret);
+		goto out;
+	}
+
+	count = ARRAY_SIZE(pm8607_devs);
+	for (i = 0; i < count; i++) {
+		ret = mfd_add_devices(chip->dev, i, &pm8607_devs[i],
+				      1, NULL, 0);
+		if (ret != 0) {
+			dev_err(chip->dev, "Failed to add subdevs\n");
+			goto out;
+		}
+	}
+out:
+	return ret;
+}
+
+void pm8607_device_exit(struct pm8607_chip *chip)
+{
+	mfd_remove_devices(chip->dev);
+}
+
+MODULE_DESCRIPTION("PMIC Driver for Marvell 88PM8607");
+MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/88pm860x-i2c.c b/drivers/mfd/88pm860x-i2c.c
new file mode 100644
index 000000000000..dda23cbfe415
--- /dev/null
+++ b/drivers/mfd/88pm860x-i2c.c
@@ -0,0 +1,202 @@
+/*
+ * I2C driver for Marvell 88PM8607
+ *
+ * Copyright (C) 2009 Marvell International Ltd.
+ * 	Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/mfd/88pm8607.h>
+
+static inline int pm8607_read_device(struct pm8607_chip *chip,
+				     int reg, int bytes, void *dest)
+{
+	struct i2c_client *i2c = chip->client;
+	unsigned char data;
+	int ret;
+
+	data = (unsigned char)reg;
+	ret = i2c_master_send(i2c, &data, 1);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_master_recv(i2c, dest, bytes);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static inline int pm8607_write_device(struct pm8607_chip *chip,
+				      int reg, int bytes, void *src)
+{
+	struct i2c_client *i2c = chip->client;
+	unsigned char buf[bytes + 1];
+	int ret;
+
+	buf[0] = (unsigned char)reg;
+	memcpy(&buf[1], src, bytes);
+
+	ret = i2c_master_send(i2c, buf, bytes + 1);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+int pm8607_reg_read(struct pm8607_chip *chip, int reg)
+{
+	unsigned char data;
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = chip->read(chip, reg, 1, &data);
+	mutex_unlock(&chip->io_lock);
+
+	if (ret < 0)
+		return ret;
+	else
+		return (int)data;
+}
+EXPORT_SYMBOL(pm8607_reg_read);
+
+int pm8607_reg_write(struct pm8607_chip *chip, int reg,
+		     unsigned char data)
+{
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = chip->write(chip, reg, 1, &data);
+	mutex_unlock(&chip->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(pm8607_reg_write);
+
+int pm8607_bulk_read(struct pm8607_chip *chip, int reg,
+		     int count, unsigned char *buf)
+{
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = chip->read(chip, reg, count, buf);
+	mutex_unlock(&chip->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(pm8607_bulk_read);
+
+int pm8607_bulk_write(struct pm8607_chip *chip, int reg,
+		      int count, unsigned char *buf)
+{
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = chip->write(chip, reg, count, buf);
+	mutex_unlock(&chip->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(pm8607_bulk_write);
+
+int pm8607_set_bits(struct pm8607_chip *chip, int reg,
+		    unsigned char mask, unsigned char data)
+{
+	unsigned char value;
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = chip->read(chip, reg, 1, &value);
+	if (ret < 0)
+		goto out;
+	value &= ~mask;
+	value |= data;
+	ret = chip->write(chip, reg, 1, &value);
+out:
+	mutex_unlock(&chip->io_lock);
+	return ret;
+}
+EXPORT_SYMBOL(pm8607_set_bits);
+
+
+static const struct i2c_device_id pm860x_id_table[] = {
+	{ "88PM8607", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, pm860x_id_table);
+
+static int __devinit pm860x_probe(struct i2c_client *client,
+				  const struct i2c_device_id *id)
+{
+	struct pm8607_platform_data *pdata = client->dev.platform_data;
+	struct pm8607_chip *chip;
+	int ret;
+
+	chip = kzalloc(sizeof(struct pm8607_chip), GFP_KERNEL);
+	if (chip == NULL)
+		return -ENOMEM;
+
+	chip->client = client;
+	chip->dev = &client->dev;
+	chip->read = pm8607_read_device;
+	chip->write = pm8607_write_device;
+	memcpy(&chip->id, id, sizeof(struct i2c_device_id));
+	i2c_set_clientdata(client, chip);
+
+	mutex_init(&chip->io_lock);
+	dev_set_drvdata(chip->dev, chip);
+
+	ret = pm860x_device_init(chip, pdata);
+	if (ret < 0)
+		goto out;
+
+
+	return 0;
+
+out:
+	i2c_set_clientdata(client, NULL);
+	kfree(chip);
+	return ret;
+}
+
+static int __devexit pm860x_remove(struct i2c_client *client)
+{
+	struct pm8607_chip *chip = i2c_get_clientdata(client);
+
+	kfree(chip);
+	return 0;
+}
+
+static struct i2c_driver pm860x_driver = {
+	.driver	= {
+		.name	= "88PM860x",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= pm860x_probe,
+	.remove		= __devexit_p(pm860x_remove),
+	.id_table	= pm860x_id_table,
+};
+
+static int __init pm860x_i2c_init(void)
+{
+	int ret;
+	ret = i2c_add_driver(&pm860x_driver);
+	if (ret != 0)
+		pr_err("Failed to register 88PM860x I2C driver: %d\n", ret);
+	return ret;
+}
+subsys_initcall(pm860x_i2c_init);
+
+static void __exit pm860x_i2c_exit(void)
+{
+	i2c_del_driver(&pm860x_driver);
+}
+module_exit(pm860x_i2c_exit);
+
+MODULE_DESCRIPTION("I2C Driver for Marvell 88PM860x");
+MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 78295d6a75f7..88fa200188cf 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -2,6 +2,7 @@
 # Makefile for multifunction miscellaneous devices
 #
 
+obj-$(CONFIG_MFD_88PM8607)	+= 88pm860x-core.o 88pm860x-i2c.o
 obj-$(CONFIG_MFD_SM501)		+= sm501.o
 obj-$(CONFIG_MFD_ASIC3)		+= asic3.o tmio_core.o
 obj-$(CONFIG_MFD_SH_MOBILE_SDHI)		+= sh_mobile_sdhi.o
@@ -55,5 +56,4 @@ obj-$(CONFIG_AB3100_CORE)	+= ab3100-core.o
 obj-$(CONFIG_AB3100_OTP)	+= ab3100-otp.o
 obj-$(CONFIG_AB4500_CORE)	+= ab4500-core.o
 obj-$(CONFIG_MFD_TIMBERDALE)    += timberdale.o
-obj-$(CONFIG_MFD_88PM8607)	+= 88pm8607.o
 obj-$(CONFIG_PMIC_ADP5520)	+= adp5520.o
diff --git a/include/linux/mfd/88pm8607.h b/include/linux/mfd/88pm8607.h
index f41b428d2cec..6e4dcdca02a8 100644
--- a/include/linux/mfd/88pm8607.h
+++ b/include/linux/mfd/88pm8607.h
@@ -33,8 +33,8 @@ enum {
 	PM8607_ID_RG_MAX,
 };
 
-#define CHIP_ID				(0x40)
-#define CHIP_ID_MASK			(0xF8)
+#define PM8607_ID			(0x40)	/* 8607 chip ID */
+#define PM8607_ID_MASK			(0xF8)	/* 8607 chip ID mask */
 
 /* Interrupt Registers */
 #define PM8607_STATUS_1			(0x01)
@@ -185,6 +185,7 @@ struct pm8607_chip {
 	struct device		*dev;
 	struct mutex		io_lock;
 	struct i2c_client	*client;
+	struct i2c_device_id	id;
 
 	int (*read)(struct pm8607_chip *chip, int reg, int bytes, void *dest);
 	int (*write)(struct pm8607_chip *chip, int reg, int bytes, void *src);
@@ -214,4 +215,9 @@ extern int pm8607_bulk_write(struct pm8607_chip *, int, int,
 			     unsigned char *);
 extern int pm8607_set_bits(struct pm8607_chip *, int, unsigned char,
 			   unsigned char);
-#endif /* __LINUX_MFD_88PM8607_H */
+
+extern int pm860x_device_init(struct pm8607_chip *chip,
+			      struct pm8607_platform_data *pdata);
+extern void pm860x_device_exit(struct pm8607_chip *chip);
+
+#endif /* __LINUX_MFD_88PM860X_H */
-- 
cgit v1.2.3


From 53dbab7af9ca13fa95605e9a5c31bb803dcba363 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Fri, 8 Jan 2010 06:01:24 -0500
Subject: mfd: Support 88pm8606 in 860x driver

88PM8606 and 88PM8607 are two discrete chips used for power management.
Hardware designer can use them together or only one of them according to
requirement.

There's some logic tightly linked between these two chips. For example, USB
charger driver needs to access both chips by I2C interface.

Now share one driver to these two devices. Only one I2C client is identified
in platform init data. If another chip is also used, user should mark it in
companion_addr field of platform init data. Then driver could create another
I2C client for the companion chip.

All I2C operations are accessed by 860x-i2c driver. In order to support both
I2C client address, the read/write API is changed in below.

reg_read(client, offset)
reg_write(client, offset, data)

The benefit is that client drivers only need one kind of read/write API. I2C
and MFD driver can be shared in both 8606 and 8607.

Since API is changed, update API in 8607 regulator driver.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/88pm860x-core.c  |  61 ++++++++----
 drivers/mfd/88pm860x-i2c.c   | 172 ++++++++++++++++++++++----------
 drivers/regulator/88pm8607.c |  32 +++---
 include/linux/mfd/88pm8607.h | 223 ------------------------------------------
 include/linux/mfd/88pm860x.h | 227 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 402 insertions(+), 313 deletions(-)
 delete mode 100644 include/linux/mfd/88pm8607.h
 create mode 100644 include/linux/mfd/88pm860x.h

(limited to 'include')

diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index d1464e54e656..72b00304dc3a 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c
@@ -14,7 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
-#include <linux/mfd/88pm8607.h>
+#include <linux/mfd/88pm860x.h>
 
 
 #define PM8607_REG_RESOURCE(_start, _end)		\
@@ -67,18 +67,23 @@ static struct mfd_cell pm8607_devs[] = {
 	PM8607_REG_DEVS(ldo14, LDO14),
 };
 
-int pm860x_device_init(struct pm8607_chip *chip,
-		       struct pm8607_platform_data *pdata)
+static void device_8606_init(struct pm860x_chip *chip, struct i2c_client *i2c,
+			     struct pm860x_platform_data *pdata)
+{
+}
+
+static void device_8607_init(struct pm860x_chip *chip, struct i2c_client *i2c,
+			     struct pm860x_platform_data *pdata)
 {
 	int i, count;
 	int ret;
 
-	ret = pm8607_reg_read(chip, PM8607_CHIP_ID);
+	ret = pm860x_reg_read(i2c, PM8607_CHIP_ID);
 	if (ret < 0) {
 		dev_err(chip->dev, "Failed to read CHIP ID: %d\n", ret);
 		goto out;
 	}
-	if ((ret & PM8607_ID_MASK) == PM8607_ID)
+	if ((ret & PM8607_VERSION_MASK) == PM8607_VERSION)
 		dev_info(chip->dev, "Marvell 88PM8607 (ID: %02x) detected\n",
 			 ret);
 	else {
@@ -86,9 +91,9 @@ int pm860x_device_init(struct pm8607_chip *chip,
 			"Chip ID: %02x\n", ret);
 		goto out;
 	}
-	chip->chip_id = ret;
+	chip->chip_version = ret;
 
-	ret = pm8607_reg_read(chip, PM8607_BUCK3);
+	ret = pm860x_reg_read(i2c, PM8607_BUCK3);
 	if (ret < 0) {
 		dev_err(chip->dev, "Failed to read BUCK3 register: %d\n", ret);
 		goto out;
@@ -96,20 +101,11 @@ int pm860x_device_init(struct pm8607_chip *chip,
 	if (ret & PM8607_BUCK3_DOUBLE)
 		chip->buck3_double = 1;
 
-	ret = pm8607_reg_read(chip, PM8607_MISC1);
+	ret = pm860x_reg_read(i2c, PM8607_MISC1);
 	if (ret < 0) {
 		dev_err(chip->dev, "Failed to read MISC1 register: %d\n", ret);
 		goto out;
 	}
-	if (pdata->i2c_port == PI2C_PORT)
-		ret |= PM8607_MISC1_PI2C;
-	else
-		ret &= ~PM8607_MISC1_PI2C;
-	ret = pm8607_reg_write(chip, PM8607_MISC1, ret);
-	if (ret < 0) {
-		dev_err(chip->dev, "Failed to write MISC1 register: %d\n", ret);
-		goto out;
-	}
 
 	count = ARRAY_SIZE(pm8607_devs);
 	for (i = 0; i < count; i++) {
@@ -121,14 +117,39 @@ int pm860x_device_init(struct pm8607_chip *chip,
 		}
 	}
 out:
-	return ret;
+	return;
+}
+
+int pm860x_device_init(struct pm860x_chip *chip,
+		       struct pm860x_platform_data *pdata)
+{
+	switch (chip->id) {
+	case CHIP_PM8606:
+		device_8606_init(chip, chip->client, pdata);
+		break;
+	case CHIP_PM8607:
+		device_8607_init(chip, chip->client, pdata);
+		break;
+	}
+
+	if (chip->companion) {
+		switch (chip->id) {
+		case CHIP_PM8607:
+			device_8606_init(chip, chip->companion, pdata);
+			break;
+		case CHIP_PM8606:
+			device_8607_init(chip, chip->companion, pdata);
+			break;
+		}
+	}
+	return 0;
 }
 
-void pm8607_device_exit(struct pm8607_chip *chip)
+void pm860x_device_exit(struct pm860x_chip *chip)
 {
 	mfd_remove_devices(chip->dev);
 }
 
-MODULE_DESCRIPTION("PMIC Driver for Marvell 88PM8607");
+MODULE_DESCRIPTION("PMIC Driver for Marvell 88PM860x");
 MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/88pm860x-i2c.c b/drivers/mfd/88pm860x-i2c.c
index dda23cbfe415..6d7dba2bce8a 100644
--- a/drivers/mfd/88pm860x-i2c.c
+++ b/drivers/mfd/88pm860x-i2c.c
@@ -1,5 +1,5 @@
 /*
- * I2C driver for Marvell 88PM8607
+ * I2C driver for Marvell 88PM860x
  *
  * Copyright (C) 2009 Marvell International Ltd.
  * 	Haojian Zhuang <haojian.zhuang@marvell.com>
@@ -12,12 +12,11 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
-#include <linux/mfd/88pm8607.h>
+#include <linux/mfd/88pm860x.h>
 
-static inline int pm8607_read_device(struct pm8607_chip *chip,
+static inline int pm860x_read_device(struct i2c_client *i2c,
 				     int reg, int bytes, void *dest)
 {
-	struct i2c_client *i2c = chip->client;
 	unsigned char data;
 	int ret;
 
@@ -32,10 +31,9 @@ static inline int pm8607_read_device(struct pm8607_chip *chip,
 	return 0;
 }
 
-static inline int pm8607_write_device(struct pm8607_chip *chip,
+static inline int pm860x_write_device(struct i2c_client *i2c,
 				      int reg, int bytes, void *src)
 {
-	struct i2c_client *i2c = chip->client;
 	unsigned char buf[bytes + 1];
 	int ret;
 
@@ -48,13 +46,14 @@ static inline int pm8607_write_device(struct pm8607_chip *chip,
 	return 0;
 }
 
-int pm8607_reg_read(struct pm8607_chip *chip, int reg)
+int pm860x_reg_read(struct i2c_client *i2c, int reg)
 {
+	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
 	unsigned char data;
 	int ret;
 
 	mutex_lock(&chip->io_lock);
-	ret = chip->read(chip, reg, 1, &data);
+	ret = pm860x_read_device(i2c, reg, 1, &data);
 	mutex_unlock(&chip->io_lock);
 
 	if (ret < 0)
@@ -62,111 +61,178 @@ int pm8607_reg_read(struct pm8607_chip *chip, int reg)
 	else
 		return (int)data;
 }
-EXPORT_SYMBOL(pm8607_reg_read);
+EXPORT_SYMBOL(pm860x_reg_read);
 
-int pm8607_reg_write(struct pm8607_chip *chip, int reg,
+int pm860x_reg_write(struct i2c_client *i2c, int reg,
 		     unsigned char data)
 {
+	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
 	int ret;
 
 	mutex_lock(&chip->io_lock);
-	ret = chip->write(chip, reg, 1, &data);
+	ret = pm860x_write_device(i2c, reg, 1, &data);
 	mutex_unlock(&chip->io_lock);
 
 	return ret;
 }
-EXPORT_SYMBOL(pm8607_reg_write);
+EXPORT_SYMBOL(pm860x_reg_write);
 
-int pm8607_bulk_read(struct pm8607_chip *chip, int reg,
+int pm860x_bulk_read(struct i2c_client *i2c, int reg,
 		     int count, unsigned char *buf)
 {
+	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
 	int ret;
 
 	mutex_lock(&chip->io_lock);
-	ret = chip->read(chip, reg, count, buf);
+	ret = pm860x_read_device(i2c, reg, count, buf);
 	mutex_unlock(&chip->io_lock);
 
 	return ret;
 }
-EXPORT_SYMBOL(pm8607_bulk_read);
+EXPORT_SYMBOL(pm860x_bulk_read);
 
-int pm8607_bulk_write(struct pm8607_chip *chip, int reg,
+int pm860x_bulk_write(struct i2c_client *i2c, int reg,
 		      int count, unsigned char *buf)
 {
+	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
 	int ret;
 
 	mutex_lock(&chip->io_lock);
-	ret = chip->write(chip, reg, count, buf);
+	ret = pm860x_write_device(i2c, reg, count, buf);
 	mutex_unlock(&chip->io_lock);
 
 	return ret;
 }
-EXPORT_SYMBOL(pm8607_bulk_write);
+EXPORT_SYMBOL(pm860x_bulk_write);
 
-int pm8607_set_bits(struct pm8607_chip *chip, int reg,
+int pm860x_set_bits(struct i2c_client *i2c, int reg,
 		    unsigned char mask, unsigned char data)
 {
+	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
 	unsigned char value;
 	int ret;
 
 	mutex_lock(&chip->io_lock);
-	ret = chip->read(chip, reg, 1, &value);
+	ret = pm860x_read_device(i2c, reg, 1, &value);
 	if (ret < 0)
 		goto out;
 	value &= ~mask;
 	value |= data;
-	ret = chip->write(chip, reg, 1, &value);
+	ret = pm860x_write_device(i2c, reg, 1, &value);
 out:
 	mutex_unlock(&chip->io_lock);
 	return ret;
 }
-EXPORT_SYMBOL(pm8607_set_bits);
+EXPORT_SYMBOL(pm860x_set_bits);
 
 
 static const struct i2c_device_id pm860x_id_table[] = {
-	{ "88PM8607", 0 },
+	{ "88PM860x", 0 },
 	{}
 };
 MODULE_DEVICE_TABLE(i2c, pm860x_id_table);
 
+static int verify_addr(struct i2c_client *i2c)
+{
+	unsigned short addr_8607[] = {0x30, 0x34};
+	unsigned short addr_8606[] = {0x10, 0x11};
+	int size, i;
+
+	if (i2c == NULL)
+		return 0;
+	size = ARRAY_SIZE(addr_8606);
+	for (i = 0; i < size; i++) {
+		if (i2c->addr == *(addr_8606 + i))
+			return CHIP_PM8606;
+	}
+	size = ARRAY_SIZE(addr_8607);
+	for (i = 0; i < size; i++) {
+		if (i2c->addr == *(addr_8607 + i))
+			return CHIP_PM8607;
+	}
+	return 0;
+}
+
 static int __devinit pm860x_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
-	struct pm8607_platform_data *pdata = client->dev.platform_data;
-	struct pm8607_chip *chip;
-	int ret;
-
-	chip = kzalloc(sizeof(struct pm8607_chip), GFP_KERNEL);
-	if (chip == NULL)
-		return -ENOMEM;
-
-	chip->client = client;
-	chip->dev = &client->dev;
-	chip->read = pm8607_read_device;
-	chip->write = pm8607_write_device;
-	memcpy(&chip->id, id, sizeof(struct i2c_device_id));
-	i2c_set_clientdata(client, chip);
-
-	mutex_init(&chip->io_lock);
-	dev_set_drvdata(chip->dev, chip);
-
-	ret = pm860x_device_init(chip, pdata);
-	if (ret < 0)
-		goto out;
-
-
+	struct pm860x_platform_data *pdata = client->dev.platform_data;
+	static struct pm860x_chip *chip;
+	struct i2c_board_info i2c_info = {
+		.type		= "88PM860x",
+		.platform_data	= client->dev.platform_data,
+	};
+	int addr_c, found_companion = 0;
+
+	if (pdata == NULL) {
+		pr_info("No platform data in %s!\n", __func__);
+		return -EINVAL;
+	}
+
+	/*
+	 * Both client and companion client shares same platform driver.
+	 * Driver distinguishes them by pdata->companion_addr.
+	 * pdata->companion_addr is only assigned if companion chip exists.
+	 * At the same time, the companion_addr shouldn't equal to client
+	 * address.
+	 */
+	addr_c = pdata->companion_addr;
+	if (addr_c && (addr_c != client->addr)) {
+		i2c_info.addr = addr_c;
+		found_companion = 1;
+	}
+
+	if (found_companion || (addr_c == 0)) {
+		chip = kzalloc(sizeof(struct pm860x_chip), GFP_KERNEL);
+		if (chip == NULL)
+			return -ENOMEM;
+
+		chip->id = verify_addr(client);
+		chip->companion_addr = addr_c;
+		chip->client = client;
+		i2c_set_clientdata(client, chip);
+		chip->dev = &client->dev;
+		mutex_init(&chip->io_lock);
+		dev_set_drvdata(chip->dev, chip);
+
+		if (found_companion) {
+			/*
+			 * If this driver is built in, probe function is
+			 * recursive.
+			 * If this driver is built as module, the next probe
+			 * function is called after the first one finished.
+			 */
+			chip->companion = i2c_new_device(client->adapter,
+							 &i2c_info);
+		}
+	}
+
+	/*
+	 * If companion chip existes, it's called by companion probe.
+	 * If there's no companion chip, it's called by client probe.
+	 */
+	if ((addr_c == 0) || (addr_c == client->addr)) {
+		chip->companion = client;
+		i2c_set_clientdata(chip->companion, chip);
+		pm860x_device_init(chip, pdata);
+	}
 	return 0;
-
-out:
-	i2c_set_clientdata(client, NULL);
-	kfree(chip);
-	return ret;
 }
 
 static int __devexit pm860x_remove(struct i2c_client *client)
 {
-	struct pm8607_chip *chip = i2c_get_clientdata(client);
-
+	struct pm860x_chip *chip = i2c_get_clientdata(client);
+
+	/*
+	 * If companion existes, companion client is removed first.
+	 * Because companion client is registered last and removed first.
+	 */
+	if (chip->companion_addr == client->addr)
+		return 0;
+	pm860x_device_exit(chip);
+	i2c_unregister_device(chip->companion);
+	i2c_set_clientdata(chip->companion, NULL);
+	i2c_set_clientdata(chip->client, NULL);
 	kfree(chip);
 	return 0;
 }
diff --git a/drivers/regulator/88pm8607.c b/drivers/regulator/88pm8607.c
index 04719551381b..97897a6bf4f3 100644
--- a/drivers/regulator/88pm8607.c
+++ b/drivers/regulator/88pm8607.c
@@ -11,15 +11,17 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/err.h>
+#include <linux/i2c.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
-#include <linux/mfd/88pm8607.h>
+#include <linux/mfd/88pm860x.h>
 
 struct pm8607_regulator_info {
 	struct regulator_desc	desc;
-	struct pm8607_chip	*chip;
+	struct pm860x_chip	*chip;
 	struct regulator_dev	*regulator;
+	struct i2c_client	*i2c;
 
 	int	min_uV;
 	int	max_uV;
@@ -46,7 +48,7 @@ static inline int check_range(struct pm8607_regulator_info *info,
 static int pm8607_list_voltage(struct regulator_dev *rdev, unsigned index)
 {
 	struct pm8607_regulator_info *info = rdev_get_drvdata(rdev);
-	uint8_t chip_id = info->chip->chip_id;
+	uint8_t chip_id = info->chip->chip_version;
 	int ret = -EINVAL;
 
 	switch (info->desc.id) {
@@ -169,7 +171,7 @@ static int pm8607_list_voltage(struct regulator_dev *rdev, unsigned index)
 static int choose_voltage(struct regulator_dev *rdev, int min_uV, int max_uV)
 {
 	struct pm8607_regulator_info *info = rdev_get_drvdata(rdev);
-	uint8_t chip_id = info->chip->chip_id;
+	uint8_t chip_id = info->chip->chip_version;
 	int val = -ENOENT;
 	int ret;
 
@@ -428,7 +430,6 @@ static int pm8607_set_voltage(struct regulator_dev *rdev,
 			      int min_uV, int max_uV)
 {
 	struct pm8607_regulator_info *info = rdev_get_drvdata(rdev);
-	struct pm8607_chip *chip = info->chip;
 	uint8_t val, mask;
 	int ret;
 
@@ -443,13 +444,13 @@ static int pm8607_set_voltage(struct regulator_dev *rdev,
 	val = (uint8_t)(ret << info->vol_shift);
 	mask = ((1 << info->vol_nbits) - 1)  << info->vol_shift;
 
-	ret = pm8607_set_bits(chip, info->vol_reg, mask, val);
+	ret = pm860x_set_bits(info->i2c, info->vol_reg, mask, val);
 	if (ret)
 		return ret;
 	switch (info->desc.id) {
 	case PM8607_ID_BUCK1:
 	case PM8607_ID_BUCK3:
-		ret = pm8607_set_bits(chip, info->update_reg,
+		ret = pm860x_set_bits(info->i2c, info->update_reg,
 				      1 << info->update_bit,
 				      1 << info->update_bit);
 		break;
@@ -460,11 +461,10 @@ static int pm8607_set_voltage(struct regulator_dev *rdev,
 static int pm8607_get_voltage(struct regulator_dev *rdev)
 {
 	struct pm8607_regulator_info *info = rdev_get_drvdata(rdev);
-	struct pm8607_chip *chip = info->chip;
 	uint8_t val, mask;
 	int ret;
 
-	ret = pm8607_reg_read(chip, info->vol_reg);
+	ret = pm860x_reg_read(info->i2c, info->vol_reg);
 	if (ret < 0)
 		return ret;
 
@@ -477,9 +477,8 @@ static int pm8607_get_voltage(struct regulator_dev *rdev)
 static int pm8607_enable(struct regulator_dev *rdev)
 {
 	struct pm8607_regulator_info *info = rdev_get_drvdata(rdev);
-	struct pm8607_chip *chip = info->chip;
 
-	return pm8607_set_bits(chip, info->enable_reg,
+	return pm860x_set_bits(info->i2c, info->enable_reg,
 			       1 << info->enable_bit,
 			       1 << info->enable_bit);
 }
@@ -487,19 +486,17 @@ static int pm8607_enable(struct regulator_dev *rdev)
 static int pm8607_disable(struct regulator_dev *rdev)
 {
 	struct pm8607_regulator_info *info = rdev_get_drvdata(rdev);
-	struct pm8607_chip *chip = info->chip;
 
-	return pm8607_set_bits(chip, info->enable_reg,
+	return pm860x_set_bits(info->i2c, info->enable_reg,
 			       1 << info->enable_bit, 0);
 }
 
 static int pm8607_is_enabled(struct regulator_dev *rdev)
 {
 	struct pm8607_regulator_info *info = rdev_get_drvdata(rdev);
-	struct pm8607_chip *chip = info->chip;
 	int ret;
 
-	ret = pm8607_reg_read(chip, info->enable_reg);
+	ret = pm860x_reg_read(info->i2c, info->enable_reg);
 	if (ret < 0)
 		return ret;
 
@@ -589,8 +586,8 @@ static inline struct pm8607_regulator_info *find_regulator_info(int id)
 
 static int __devinit pm8607_regulator_probe(struct platform_device *pdev)
 {
-	struct pm8607_chip *chip = dev_get_drvdata(pdev->dev.parent);
-	struct pm8607_platform_data *pdata = chip->dev->platform_data;
+	struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent);
+	struct pm860x_platform_data *pdata = chip->dev->platform_data;
 	struct pm8607_regulator_info *info = NULL;
 
 	info = find_regulator_info(pdev->id);
@@ -599,6 +596,7 @@ static int __devinit pm8607_regulator_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
+	info->i2c = (chip->id == CHIP_PM8607) ? chip->client : chip->companion;
 	info->chip = chip;
 
 	info->regulator = regulator_register(&info->desc, &pdev->dev,
diff --git a/include/linux/mfd/88pm8607.h b/include/linux/mfd/88pm8607.h
deleted file mode 100644
index 6e4dcdca02a8..000000000000
--- a/include/linux/mfd/88pm8607.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Marvell 88PM8607 Interface
- *
- * Copyright (C) 2009 Marvell International Ltd.
- * 	Haojian Zhuang <haojian.zhuang@marvell.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __LINUX_MFD_88PM8607_H
-#define __LINUX_MFD_88PM8607_H
-
-enum {
-	PM8607_ID_BUCK1 = 0,
-	PM8607_ID_BUCK2,
-	PM8607_ID_BUCK3,
-
-	PM8607_ID_LDO1,
-	PM8607_ID_LDO2,
-	PM8607_ID_LDO3,
-	PM8607_ID_LDO4,
-	PM8607_ID_LDO5,
-	PM8607_ID_LDO6,
-	PM8607_ID_LDO7,
-	PM8607_ID_LDO8,
-	PM8607_ID_LDO9,
-	PM8607_ID_LDO10,
-	PM8607_ID_LDO12,
-	PM8607_ID_LDO14,
-
-	PM8607_ID_RG_MAX,
-};
-
-#define PM8607_ID			(0x40)	/* 8607 chip ID */
-#define PM8607_ID_MASK			(0xF8)	/* 8607 chip ID mask */
-
-/* Interrupt Registers */
-#define PM8607_STATUS_1			(0x01)
-#define PM8607_STATUS_2			(0x02)
-#define PM8607_INT_STATUS1		(0x03)
-#define PM8607_INT_STATUS2		(0x04)
-#define PM8607_INT_STATUS3		(0x05)
-#define PM8607_INT_MASK_1		(0x06)
-#define PM8607_INT_MASK_2		(0x07)
-#define PM8607_INT_MASK_3		(0x08)
-
-/* Regulator Control Registers */
-#define PM8607_LDO1			(0x10)
-#define PM8607_LDO2			(0x11)
-#define PM8607_LDO3			(0x12)
-#define PM8607_LDO4			(0x13)
-#define PM8607_LDO5			(0x14)
-#define PM8607_LDO6			(0x15)
-#define PM8607_LDO7			(0x16)
-#define PM8607_LDO8			(0x17)
-#define PM8607_LDO9			(0x18)
-#define PM8607_LDO10			(0x19)
-#define PM8607_LDO12			(0x1A)
-#define PM8607_LDO14			(0x1B)
-#define PM8607_SLEEP_MODE1		(0x1C)
-#define PM8607_SLEEP_MODE2		(0x1D)
-#define PM8607_SLEEP_MODE3		(0x1E)
-#define PM8607_SLEEP_MODE4		(0x1F)
-#define PM8607_GO			(0x20)
-#define PM8607_SLEEP_BUCK1		(0x21)
-#define PM8607_SLEEP_BUCK2		(0x22)
-#define PM8607_SLEEP_BUCK3		(0x23)
-#define PM8607_BUCK1			(0x24)
-#define PM8607_BUCK2			(0x25)
-#define PM8607_BUCK3			(0x26)
-#define PM8607_BUCK_CONTROLS		(0x27)
-#define PM8607_SUPPLIES_EN11		(0x2B)
-#define PM8607_SUPPLIES_EN12		(0x2C)
-#define PM8607_GROUP1			(0x2D)
-#define PM8607_GROUP2			(0x2E)
-#define PM8607_GROUP3			(0x2F)
-#define PM8607_GROUP4			(0x30)
-#define PM8607_GROUP5			(0x31)
-#define PM8607_GROUP6			(0x32)
-#define PM8607_SUPPLIES_EN21		(0x33)
-#define PM8607_SUPPLIES_EN22		(0x34)
-
-/* RTC Control Registers */
-#define PM8607_RTC1			(0xA0)
-#define PM8607_RTC_COUNTER1		(0xA1)
-#define PM8607_RTC_COUNTER2		(0xA2)
-#define PM8607_RTC_COUNTER3		(0xA3)
-#define PM8607_RTC_COUNTER4		(0xA4)
-#define PM8607_RTC_EXPIRE1		(0xA5)
-#define PM8607_RTC_EXPIRE2		(0xA6)
-#define PM8607_RTC_EXPIRE3		(0xA7)
-#define PM8607_RTC_EXPIRE4		(0xA8)
-#define PM8607_RTC_TRIM1		(0xA9)
-#define PM8607_RTC_TRIM2		(0xAA)
-#define PM8607_RTC_TRIM3		(0xAB)
-#define PM8607_RTC_TRIM4		(0xAC)
-#define PM8607_RTC_MISC1		(0xAD)
-#define PM8607_RTC_MISC2		(0xAE)
-#define PM8607_RTC_MISC3		(0xAF)
-
-/* Misc Registers */
-#define PM8607_CHIP_ID			(0x00)
-#define PM8607_LDO1			(0x10)
-#define PM8607_DVC3			(0x26)
-#define PM8607_MISC1			(0x40)
-
-/* bit definitions for PM8607 events */
-#define PM8607_EVENT_ONKEY		(1 << 0)
-#define PM8607_EVENT_EXTON		(1 << 1)
-#define PM8607_EVENT_CHG		(1 << 2)
-#define PM8607_EVENT_BAT		(1 << 3)
-#define PM8607_EVENT_RTC		(1 << 4)
-#define PM8607_EVENT_CC			(1 << 5)
-#define PM8607_EVENT_VBAT		(1 << 8)
-#define PM8607_EVENT_VCHG		(1 << 9)
-#define PM8607_EVENT_VSYS		(1 << 10)
-#define PM8607_EVENT_TINT		(1 << 11)
-#define PM8607_EVENT_GPADC0		(1 << 12)
-#define PM8607_EVENT_GPADC1		(1 << 13)
-#define PM8607_EVENT_GPADC2		(1 << 14)
-#define PM8607_EVENT_GPADC3		(1 << 15)
-#define PM8607_EVENT_AUDIO_SHORT	(1 << 16)
-#define PM8607_EVENT_PEN		(1 << 17)
-#define PM8607_EVENT_HEADSET		(1 << 18)
-#define PM8607_EVENT_HOOK		(1 << 19)
-#define PM8607_EVENT_MICIN		(1 << 20)
-#define PM8607_EVENT_CHG_TIMEOUT	(1 << 21)
-#define PM8607_EVENT_CHG_DONE		(1 << 22)
-#define PM8607_EVENT_CHG_FAULT		(1 << 23)
-
-/* bit definitions of Status Query Interface */
-#define PM8607_STATUS_CC		(1 << 3)
-#define PM8607_STATUS_PEN		(1 << 4)
-#define PM8607_STATUS_HEADSET		(1 << 5)
-#define PM8607_STATUS_HOOK		(1 << 6)
-#define PM8607_STATUS_MICIN		(1 << 7)
-#define PM8607_STATUS_ONKEY		(1 << 8)
-#define PM8607_STATUS_EXTON		(1 << 9)
-#define PM8607_STATUS_CHG		(1 << 10)
-#define PM8607_STATUS_BAT		(1 << 11)
-#define PM8607_STATUS_VBUS		(1 << 12)
-#define PM8607_STATUS_OV		(1 << 13)
-
-/* bit definitions of BUCK3 */
-#define PM8607_BUCK3_DOUBLE		(1 << 6)
-
-/* bit definitions of Misc1 */
-#define PM8607_MISC1_PI2C		(1 << 0)
-
-/* Interrupt Number in 88PM8607 */
-enum {
-	PM8607_IRQ_ONKEY = 0,
-	PM8607_IRQ_EXTON,
-	PM8607_IRQ_CHG,
-	PM8607_IRQ_BAT,
-	PM8607_IRQ_RTC,
-	PM8607_IRQ_VBAT = 8,
-	PM8607_IRQ_VCHG,
-	PM8607_IRQ_VSYS,
-	PM8607_IRQ_TINT,
-	PM8607_IRQ_GPADC0,
-	PM8607_IRQ_GPADC1,
-	PM8607_IRQ_GPADC2,
-	PM8607_IRQ_GPADC3,
-	PM8607_IRQ_AUDIO_SHORT = 16,
-	PM8607_IRQ_PEN,
-	PM8607_IRQ_HEADSET,
-	PM8607_IRQ_HOOK,
-	PM8607_IRQ_MICIN,
-	PM8607_IRQ_CHG_FAIL,
-	PM8607_IRQ_CHG_DONE,
-	PM8607_IRQ_CHG_FAULT,
-};
-
-enum {
-	PM8607_CHIP_A0 = 0x40,
-	PM8607_CHIP_A1 = 0x41,
-	PM8607_CHIP_B0 = 0x48,
-};
-
-
-struct pm8607_chip {
-	struct device		*dev;
-	struct mutex		io_lock;
-	struct i2c_client	*client;
-	struct i2c_device_id	id;
-
-	int (*read)(struct pm8607_chip *chip, int reg, int bytes, void *dest);
-	int (*write)(struct pm8607_chip *chip, int reg, int bytes, void *src);
-
-	int			buck3_double;	/* DVC ramp slope double */
-	unsigned char		chip_id;
-
-};
-
-#define PM8607_MAX_REGULATOR	15	/* 3 Bucks, 12 LDOs */
-
-enum {
-	GI2C_PORT = 0,
-	PI2C_PORT,
-};
-
-struct pm8607_platform_data {
-	int	i2c_port;	/* Controlled by GI2C or PI2C */
-	struct regulator_init_data *regulator[PM8607_MAX_REGULATOR];
-};
-
-extern int pm8607_reg_read(struct pm8607_chip *, int);
-extern int pm8607_reg_write(struct pm8607_chip *, int, unsigned char);
-extern int pm8607_bulk_read(struct pm8607_chip *, int, int,
-			    unsigned char *);
-extern int pm8607_bulk_write(struct pm8607_chip *, int, int,
-			     unsigned char *);
-extern int pm8607_set_bits(struct pm8607_chip *, int, unsigned char,
-			   unsigned char);
-
-extern int pm860x_device_init(struct pm8607_chip *chip,
-			      struct pm8607_platform_data *pdata);
-extern void pm860x_device_exit(struct pm8607_chip *chip);
-
-#endif /* __LINUX_MFD_88PM860X_H */
diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h
new file mode 100644
index 000000000000..5845ae47df30
--- /dev/null
+++ b/include/linux/mfd/88pm860x.h
@@ -0,0 +1,227 @@
+/*
+ * Marvell 88PM860x Interface
+ *
+ * Copyright (C) 2009 Marvell International Ltd.
+ * 	Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_MFD_88PM860X_H
+#define __LINUX_MFD_88PM860X_H
+
+enum {
+	CHIP_INVALID = 0,
+	CHIP_PM8606,
+	CHIP_PM8607,
+	CHIP_MAX,
+};
+
+enum {
+	PM8607_ID_BUCK1 = 0,
+	PM8607_ID_BUCK2,
+	PM8607_ID_BUCK3,
+
+	PM8607_ID_LDO1,
+	PM8607_ID_LDO2,
+	PM8607_ID_LDO3,
+	PM8607_ID_LDO4,
+	PM8607_ID_LDO5,
+	PM8607_ID_LDO6,
+	PM8607_ID_LDO7,
+	PM8607_ID_LDO8,
+	PM8607_ID_LDO9,
+	PM8607_ID_LDO10,
+	PM8607_ID_LDO12,
+	PM8607_ID_LDO14,
+
+	PM8607_ID_RG_MAX,
+};
+
+#define PM8607_VERSION			(0x40)	/* 8607 chip ID */
+#define PM8607_VERSION_MASK		(0xF0)	/* 8607 chip ID mask */
+
+/* Interrupt Registers */
+#define PM8607_STATUS_1			(0x01)
+#define PM8607_STATUS_2			(0x02)
+#define PM8607_INT_STATUS1		(0x03)
+#define PM8607_INT_STATUS2		(0x04)
+#define PM8607_INT_STATUS3		(0x05)
+#define PM8607_INT_MASK_1		(0x06)
+#define PM8607_INT_MASK_2		(0x07)
+#define PM8607_INT_MASK_3		(0x08)
+
+/* Regulator Control Registers */
+#define PM8607_LDO1			(0x10)
+#define PM8607_LDO2			(0x11)
+#define PM8607_LDO3			(0x12)
+#define PM8607_LDO4			(0x13)
+#define PM8607_LDO5			(0x14)
+#define PM8607_LDO6			(0x15)
+#define PM8607_LDO7			(0x16)
+#define PM8607_LDO8			(0x17)
+#define PM8607_LDO9			(0x18)
+#define PM8607_LDO10			(0x19)
+#define PM8607_LDO12			(0x1A)
+#define PM8607_LDO14			(0x1B)
+#define PM8607_SLEEP_MODE1		(0x1C)
+#define PM8607_SLEEP_MODE2		(0x1D)
+#define PM8607_SLEEP_MODE3		(0x1E)
+#define PM8607_SLEEP_MODE4		(0x1F)
+#define PM8607_GO			(0x20)
+#define PM8607_SLEEP_BUCK1		(0x21)
+#define PM8607_SLEEP_BUCK2		(0x22)
+#define PM8607_SLEEP_BUCK3		(0x23)
+#define PM8607_BUCK1			(0x24)
+#define PM8607_BUCK2			(0x25)
+#define PM8607_BUCK3			(0x26)
+#define PM8607_BUCK_CONTROLS		(0x27)
+#define PM8607_SUPPLIES_EN11		(0x2B)
+#define PM8607_SUPPLIES_EN12		(0x2C)
+#define PM8607_GROUP1			(0x2D)
+#define PM8607_GROUP2			(0x2E)
+#define PM8607_GROUP3			(0x2F)
+#define PM8607_GROUP4			(0x30)
+#define PM8607_GROUP5			(0x31)
+#define PM8607_GROUP6			(0x32)
+#define PM8607_SUPPLIES_EN21		(0x33)
+#define PM8607_SUPPLIES_EN22		(0x34)
+
+/* RTC Control Registers */
+#define PM8607_RTC1			(0xA0)
+#define PM8607_RTC_COUNTER1		(0xA1)
+#define PM8607_RTC_COUNTER2		(0xA2)
+#define PM8607_RTC_COUNTER3		(0xA3)
+#define PM8607_RTC_COUNTER4		(0xA4)
+#define PM8607_RTC_EXPIRE1		(0xA5)
+#define PM8607_RTC_EXPIRE2		(0xA6)
+#define PM8607_RTC_EXPIRE3		(0xA7)
+#define PM8607_RTC_EXPIRE4		(0xA8)
+#define PM8607_RTC_TRIM1		(0xA9)
+#define PM8607_RTC_TRIM2		(0xAA)
+#define PM8607_RTC_TRIM3		(0xAB)
+#define PM8607_RTC_TRIM4		(0xAC)
+#define PM8607_RTC_MISC1		(0xAD)
+#define PM8607_RTC_MISC2		(0xAE)
+#define PM8607_RTC_MISC3		(0xAF)
+
+/* Misc Registers */
+#define PM8607_CHIP_ID			(0x00)
+#define PM8607_LDO1			(0x10)
+#define PM8607_DVC3			(0x26)
+#define PM8607_MISC1			(0x40)
+
+/* bit definitions for PM8607 events */
+#define PM8607_EVENT_ONKEY		(1 << 0)
+#define PM8607_EVENT_EXTON		(1 << 1)
+#define PM8607_EVENT_CHG		(1 << 2)
+#define PM8607_EVENT_BAT		(1 << 3)
+#define PM8607_EVENT_RTC		(1 << 4)
+#define PM8607_EVENT_CC			(1 << 5)
+#define PM8607_EVENT_VBAT		(1 << 8)
+#define PM8607_EVENT_VCHG		(1 << 9)
+#define PM8607_EVENT_VSYS		(1 << 10)
+#define PM8607_EVENT_TINT		(1 << 11)
+#define PM8607_EVENT_GPADC0		(1 << 12)
+#define PM8607_EVENT_GPADC1		(1 << 13)
+#define PM8607_EVENT_GPADC2		(1 << 14)
+#define PM8607_EVENT_GPADC3		(1 << 15)
+#define PM8607_EVENT_AUDIO_SHORT	(1 << 16)
+#define PM8607_EVENT_PEN		(1 << 17)
+#define PM8607_EVENT_HEADSET		(1 << 18)
+#define PM8607_EVENT_HOOK		(1 << 19)
+#define PM8607_EVENT_MICIN		(1 << 20)
+#define PM8607_EVENT_CHG_TIMEOUT	(1 << 21)
+#define PM8607_EVENT_CHG_DONE		(1 << 22)
+#define PM8607_EVENT_CHG_FAULT		(1 << 23)
+
+/* bit definitions of Status Query Interface */
+#define PM8607_STATUS_CC		(1 << 3)
+#define PM8607_STATUS_PEN		(1 << 4)
+#define PM8607_STATUS_HEADSET		(1 << 5)
+#define PM8607_STATUS_HOOK		(1 << 6)
+#define PM8607_STATUS_MICIN		(1 << 7)
+#define PM8607_STATUS_ONKEY		(1 << 8)
+#define PM8607_STATUS_EXTON		(1 << 9)
+#define PM8607_STATUS_CHG		(1 << 10)
+#define PM8607_STATUS_BAT		(1 << 11)
+#define PM8607_STATUS_VBUS		(1 << 12)
+#define PM8607_STATUS_OV		(1 << 13)
+
+/* bit definitions of BUCK3 */
+#define PM8607_BUCK3_DOUBLE		(1 << 6)
+
+/* bit definitions of Misc1 */
+#define PM8607_MISC1_PI2C		(1 << 0)
+
+/* Interrupt Number in 88PM8607 */
+enum {
+	PM8607_IRQ_ONKEY = 0,
+	PM8607_IRQ_EXTON,
+	PM8607_IRQ_CHG,
+	PM8607_IRQ_BAT,
+	PM8607_IRQ_RTC,
+	PM8607_IRQ_VBAT = 8,
+	PM8607_IRQ_VCHG,
+	PM8607_IRQ_VSYS,
+	PM8607_IRQ_TINT,
+	PM8607_IRQ_GPADC0,
+	PM8607_IRQ_GPADC1,
+	PM8607_IRQ_GPADC2,
+	PM8607_IRQ_GPADC3,
+	PM8607_IRQ_AUDIO_SHORT = 16,
+	PM8607_IRQ_PEN,
+	PM8607_IRQ_HEADSET,
+	PM8607_IRQ_HOOK,
+	PM8607_IRQ_MICIN,
+	PM8607_IRQ_CHG_FAIL,
+	PM8607_IRQ_CHG_DONE,
+	PM8607_IRQ_CHG_FAULT,
+};
+
+enum {
+	PM8607_CHIP_A0 = 0x40,
+	PM8607_CHIP_A1 = 0x41,
+	PM8607_CHIP_B0 = 0x48,
+};
+
+struct pm860x_chip {
+	struct device		*dev;
+	struct mutex		io_lock;
+	struct i2c_client	*client;
+	struct i2c_client	*companion;	/* companion chip client */
+
+	int			buck3_double;	/* DVC ramp slope double */
+	unsigned short		companion_addr;
+	int			id;
+	unsigned char		chip_version;
+
+};
+
+#define PM8607_MAX_REGULATOR	15	/* 3 Bucks, 12 LDOs */
+
+enum {
+	GI2C_PORT = 0,
+	PI2C_PORT,
+};
+
+struct pm860x_platform_data {
+	unsigned short	companion_addr;	/* I2C address of companion chip */
+	int		i2c_port;	/* Controlled by GI2C or PI2C */
+	struct regulator_init_data *regulator[PM8607_MAX_REGULATOR];
+};
+
+extern int pm860x_reg_read(struct i2c_client *, int);
+extern int pm860x_reg_write(struct i2c_client *, int, unsigned char);
+extern int pm860x_bulk_read(struct i2c_client *, int, int, unsigned char *);
+extern int pm860x_bulk_write(struct i2c_client *, int, int, unsigned char *);
+extern int pm860x_set_bits(struct i2c_client *, int, unsigned char,
+			   unsigned char);
+
+extern int pm860x_device_init(struct pm860x_chip *chip,
+			      struct pm860x_platform_data *pdata);
+extern void pm860x_device_exit(struct pm860x_chip *chip);
+
+#endif /* __LINUX_MFD_88PM860X_H */
-- 
cgit v1.2.3


From 5c42e8c4a9c86ea26ed4ecb732a842dea0dfb6b6 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Tue, 15 Dec 2009 16:01:47 -0500
Subject: mfd: Add irq support in 88pm860x

88PM860x is a complex PMIC device. It contains touch, charger, sound, rtc,
backlight, led, and so on.

Host communicates to 88PM860x by I2C bus. Use thread irq to support this
usage case.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/88pm860x-core.c  | 226 +++++++++++++++++++++++++++++++++++++++++--
 include/linux/mfd/88pm860x.h |  54 ++++++-----
 2 files changed, 247 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index 72b00304dc3a..9185f0d945f4 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c
@@ -11,6 +11,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/i2c.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
@@ -67,15 +68,209 @@ static struct mfd_cell pm8607_devs[] = {
 	PM8607_REG_DEVS(ldo14, LDO14),
 };
 
-static void device_8606_init(struct pm860x_chip *chip, struct i2c_client *i2c,
-			     struct pm860x_platform_data *pdata)
+#define CHECK_IRQ(irq)					\
+do {							\
+	if ((irq < 0) || (irq >= PM860X_NUM_IRQ))	\
+		return -EINVAL;				\
+} while (0)
+
+/* IRQs only occur on 88PM8607 */
+int pm860x_mask_irq(struct pm860x_chip *chip, int irq)
+{
+	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
+				: chip->companion;
+	int offset, data, ret;
+
+	CHECK_IRQ(irq);
+
+	offset = (irq >> 3) + PM8607_INT_MASK_1;
+	data = 1 << (irq % 8);
+	ret = pm860x_set_bits(i2c, offset, data, 0);
+
+	return ret;
+}
+EXPORT_SYMBOL(pm860x_mask_irq);
+
+int pm860x_unmask_irq(struct pm860x_chip *chip, int irq)
+{
+	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
+				: chip->companion;
+	int offset, data, ret;
+
+	CHECK_IRQ(irq);
+
+	offset = (irq >> 3) + PM8607_INT_MASK_1;
+	data = 1 << (irq % 8);
+	ret = pm860x_set_bits(i2c, offset, data, data);
+
+	return ret;
+}
+EXPORT_SYMBOL(pm860x_unmask_irq);
+
+#define INT_STATUS_NUM		(3)
+
+static irqreturn_t pm8607_irq_thread(int irq, void *data)
+{
+	DECLARE_BITMAP(irq_status, PM860X_NUM_IRQ);
+	struct pm860x_chip *chip = data;
+	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
+				: chip->companion;
+	unsigned char status_buf[INT_STATUS_NUM << 1];
+	unsigned long value;
+	int i, ret;
+
+	irq_status[0] = 0;
+
+	/* read out status register */
+	ret = pm860x_bulk_read(i2c, PM8607_INT_STATUS1,
+				INT_STATUS_NUM << 1, status_buf);
+	if (ret < 0)
+		goto out;
+	if (chip->irq_mode) {
+		/* 0, clear by read. 1, clear by write */
+		ret = pm860x_bulk_write(i2c, PM8607_INT_STATUS1,
+					INT_STATUS_NUM, status_buf);
+		if (ret < 0)
+			goto out;
+	}
+
+	/* clear masked interrupt status */
+	for (i = 0, value = 0; i < INT_STATUS_NUM; i++) {
+		status_buf[i] &= status_buf[i + INT_STATUS_NUM];
+		irq_status[0] |= status_buf[i] << (i * 8);
+	}
+
+	while (!bitmap_empty(irq_status, PM860X_NUM_IRQ)) {
+		irq = find_first_bit(irq_status, PM860X_NUM_IRQ);
+		clear_bit(irq, irq_status);
+		dev_dbg(chip->dev, "Servicing IRQ #%d\n", irq);
+
+		mutex_lock(&chip->irq_lock);
+		if (chip->irq[irq].handler)
+			chip->irq[irq].handler(irq, chip->irq[irq].data);
+		else {
+			pm860x_mask_irq(chip, irq);
+			dev_err(chip->dev, "Nobody cares IRQ %d. "
+				"Now mask it.\n", irq);
+			for (i = 0; i < (INT_STATUS_NUM << 1); i++) {
+				dev_err(chip->dev, "status[%d]:%x\n", i,
+					status_buf[i]);
+			}
+		}
+		mutex_unlock(&chip->irq_lock);
+	}
+out:
+	return IRQ_HANDLED;
+}
+
+int pm860x_request_irq(struct pm860x_chip *chip, int irq,
+		       irq_handler_t handler, void *data)
 {
+	CHECK_IRQ(irq);
+	if (!handler)
+		return -EINVAL;
+
+	mutex_lock(&chip->irq_lock);
+	chip->irq[irq].handler = handler;
+	chip->irq[irq].data = data;
+	mutex_unlock(&chip->irq_lock);
+
+	return 0;
 }
+EXPORT_SYMBOL(pm860x_request_irq);
 
-static void device_8607_init(struct pm860x_chip *chip, struct i2c_client *i2c,
-			     struct pm860x_platform_data *pdata)
+int pm860x_free_irq(struct pm860x_chip *chip, int irq)
 {
-	int i, count;
+	CHECK_IRQ(irq);
+
+	mutex_lock(&chip->irq_lock);
+	chip->irq[irq].handler = NULL;
+	chip->irq[irq].data = NULL;
+	mutex_unlock(&chip->irq_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(pm860x_free_irq);
+
+static int __devinit device_irq_init(struct pm860x_chip *chip,
+				     struct pm860x_platform_data *pdata)
+{
+	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
+				: chip->companion;
+	unsigned char status_buf[INT_STATUS_NUM];
+	int data, mask, ret = -EINVAL;
+
+	mutex_init(&chip->irq_lock);
+
+	mask = PM8607_B0_MISC1_INV_INT | PM8607_B0_MISC1_INT_CLEAR
+		| PM8607_B0_MISC1_INT_MASK;
+	data = 0;
+	chip->irq_mode = 0;
+	if (pdata && pdata->irq_mode) {
+		/*
+		 * irq_mode defines the way of clearing interrupt. If it's 1,
+		 * clear IRQ by write. Otherwise, clear it by read.
+		 * This control bit is valid from 88PM8607 B0 steping.
+		 */
+		data |= PM8607_B0_MISC1_INT_CLEAR;
+		chip->irq_mode = 1;
+	}
+	ret = pm860x_set_bits(i2c, PM8607_B0_MISC1, mask, data);
+	if (ret < 0)
+		goto out;
+
+	/* mask all IRQs */
+	memset(status_buf, 0, INT_STATUS_NUM);
+	ret = pm860x_bulk_write(i2c, PM8607_INT_MASK_1,
+				INT_STATUS_NUM, status_buf);
+	if (ret < 0)
+		goto out;
+
+	if (chip->irq_mode) {
+		/* clear interrupt status by write */
+		memset(status_buf, 0xFF, INT_STATUS_NUM);
+		ret = pm860x_bulk_write(i2c, PM8607_INT_STATUS1,
+					INT_STATUS_NUM, status_buf);
+	} else {
+		/* clear interrupt status by read */
+		ret = pm860x_bulk_read(i2c, PM8607_INT_STATUS1,
+					INT_STATUS_NUM, status_buf);
+	}
+	if (ret < 0)
+		goto out;
+
+	memset(chip->irq, 0, sizeof(struct pm860x_irq) * PM860X_NUM_IRQ);
+
+	ret = request_threaded_irq(i2c->irq, NULL, pm8607_irq_thread,
+				IRQF_ONESHOT | IRQF_TRIGGER_LOW,
+				"88PM8607", chip);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to request IRQ #%d.\n", i2c->irq);
+		goto out;
+	}
+	chip->chip_irq = i2c->irq;
+	return 0;
+out:
+	return ret;
+}
+
+static void __devexit device_irq_exit(struct pm860x_chip *chip)
+{
+	if (chip->chip_irq >= 0)
+		free_irq(chip->chip_irq, chip);
+}
+
+static void __devinit device_8606_init(struct pm860x_chip *chip,
+				       struct i2c_client *i2c,
+				       struct pm860x_platform_data *pdata)
+{
+}
+
+static void __devinit device_8607_init(struct pm860x_chip *chip,
+				       struct i2c_client *i2c,
+				       struct pm860x_platform_data *pdata)
+{
+	int i, count, data;
 	int ret;
 
 	ret = pm860x_reg_read(i2c, PM8607_CHIP_ID);
@@ -91,7 +286,6 @@ static void device_8607_init(struct pm860x_chip *chip, struct i2c_client *i2c,
 			"Chip ID: %02x\n", ret);
 		goto out;
 	}
-	chip->chip_version = ret;
 
 	ret = pm860x_reg_read(i2c, PM8607_BUCK3);
 	if (ret < 0) {
@@ -101,12 +295,26 @@ static void device_8607_init(struct pm860x_chip *chip, struct i2c_client *i2c,
 	if (ret & PM8607_BUCK3_DOUBLE)
 		chip->buck3_double = 1;
 
-	ret = pm860x_reg_read(i2c, PM8607_MISC1);
+	ret = pm860x_reg_read(i2c, PM8607_B0_MISC1);
 	if (ret < 0) {
 		dev_err(chip->dev, "Failed to read MISC1 register: %d\n", ret);
 		goto out;
 	}
 
+	if (pdata && (pdata->i2c_port == PI2C_PORT))
+		data = PM8607_B0_MISC1_PI2C;
+	else
+		data = 0;
+	ret = pm860x_set_bits(i2c, PM8607_B0_MISC1, PM8607_B0_MISC1_PI2C, data);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to access MISC1:%d\n", ret);
+		goto out;
+	}
+
+	ret = device_irq_init(chip, pdata);
+	if (ret < 0)
+		goto out;
+
 	count = ARRAY_SIZE(pm8607_devs);
 	for (i = 0; i < count; i++) {
 		ret = mfd_add_devices(chip->dev, i, &pm8607_devs[i],
@@ -123,6 +331,8 @@ out:
 int pm860x_device_init(struct pm860x_chip *chip,
 		       struct pm860x_platform_data *pdata)
 {
+	chip->chip_irq = -EINVAL;
+
 	switch (chip->id) {
 	case CHIP_PM8606:
 		device_8606_init(chip, chip->client, pdata);
@@ -142,11 +352,13 @@ int pm860x_device_init(struct pm860x_chip *chip,
 			break;
 		}
 	}
+
 	return 0;
 }
 
 void pm860x_device_exit(struct pm860x_chip *chip)
 {
+	device_irq_exit(chip);
 	mfd_remove_devices(chip->dev);
 }
 
diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h
index 5845ae47df30..b4d6018ba0d6 100644
--- a/include/linux/mfd/88pm860x.h
+++ b/include/linux/mfd/88pm860x.h
@@ -12,6 +12,8 @@
 #ifndef __LINUX_MFD_88PM860X_H
 #define __LINUX_MFD_88PM860X_H
 
+#include <linux/interrupt.h>
+
 enum {
 	CHIP_INVALID = 0,
 	CHIP_PM8606,
@@ -109,33 +111,10 @@ enum {
 
 /* Misc Registers */
 #define PM8607_CHIP_ID			(0x00)
+#define PM8607_B0_MISC1			(0x0C)
 #define PM8607_LDO1			(0x10)
 #define PM8607_DVC3			(0x26)
-#define PM8607_MISC1			(0x40)
-
-/* bit definitions for PM8607 events */
-#define PM8607_EVENT_ONKEY		(1 << 0)
-#define PM8607_EVENT_EXTON		(1 << 1)
-#define PM8607_EVENT_CHG		(1 << 2)
-#define PM8607_EVENT_BAT		(1 << 3)
-#define PM8607_EVENT_RTC		(1 << 4)
-#define PM8607_EVENT_CC			(1 << 5)
-#define PM8607_EVENT_VBAT		(1 << 8)
-#define PM8607_EVENT_VCHG		(1 << 9)
-#define PM8607_EVENT_VSYS		(1 << 10)
-#define PM8607_EVENT_TINT		(1 << 11)
-#define PM8607_EVENT_GPADC0		(1 << 12)
-#define PM8607_EVENT_GPADC1		(1 << 13)
-#define PM8607_EVENT_GPADC2		(1 << 14)
-#define PM8607_EVENT_GPADC3		(1 << 15)
-#define PM8607_EVENT_AUDIO_SHORT	(1 << 16)
-#define PM8607_EVENT_PEN		(1 << 17)
-#define PM8607_EVENT_HEADSET		(1 << 18)
-#define PM8607_EVENT_HOOK		(1 << 19)
-#define PM8607_EVENT_MICIN		(1 << 20)
-#define PM8607_EVENT_CHG_TIMEOUT	(1 << 21)
-#define PM8607_EVENT_CHG_DONE		(1 << 22)
-#define PM8607_EVENT_CHG_FAULT		(1 << 23)
+#define PM8607_A1_MISC1			(0x40)
 
 /* bit definitions of Status Query Interface */
 #define PM8607_STATUS_CC		(1 << 3)
@@ -154,7 +133,12 @@ enum {
 #define PM8607_BUCK3_DOUBLE		(1 << 6)
 
 /* bit definitions of Misc1 */
-#define PM8607_MISC1_PI2C		(1 << 0)
+#define PM8607_A1_MISC1_PI2C		(1 << 0)
+#define PM8607_B0_MISC1_INV_INT		(1 << 0)
+#define PM8607_B0_MISC1_INT_CLEAR	(1 << 1)
+#define PM8607_B0_MISC1_INT_MASK	(1 << 2)
+#define PM8607_B0_MISC1_PI2C		(1 << 3)
+#define PM8607_B0_MISC1_RESET		(1 << 6)
 
 /* Interrupt Number in 88PM8607 */
 enum {
@@ -187,15 +171,26 @@ enum {
 	PM8607_CHIP_B0 = 0x48,
 };
 
+#define PM860X_NUM_IRQ		24
+
+struct pm860x_irq {
+	irq_handler_t		handler;
+	void			*data;
+};
+
 struct pm860x_chip {
 	struct device		*dev;
 	struct mutex		io_lock;
+	struct mutex		irq_lock;
 	struct i2c_client	*client;
 	struct i2c_client	*companion;	/* companion chip client */
+	struct pm860x_irq	irq[PM860X_NUM_IRQ];
 
 	int			buck3_double;	/* DVC ramp slope double */
 	unsigned short		companion_addr;
 	int			id;
+	int			irq_mode;
+	int			chip_irq;
 	unsigned char		chip_version;
 
 };
@@ -210,6 +205,7 @@ enum {
 struct pm860x_platform_data {
 	unsigned short	companion_addr;	/* I2C address of companion chip */
 	int		i2c_port;	/* Controlled by GI2C or PI2C */
+	int		irq_mode;	/* Clear interrupt by read/write(0/1) */
 	struct regulator_init_data *regulator[PM8607_MAX_REGULATOR];
 };
 
@@ -220,6 +216,12 @@ extern int pm860x_bulk_write(struct i2c_client *, int, int, unsigned char *);
 extern int pm860x_set_bits(struct i2c_client *, int, unsigned char,
 			   unsigned char);
 
+extern int pm860x_mask_irq(struct pm860x_chip *, int);
+extern int pm860x_unmask_irq(struct pm860x_chip *, int);
+extern int pm860x_request_irq(struct pm860x_chip *, int,
+			      irq_handler_t handler, void *);
+extern int pm860x_free_irq(struct pm860x_chip *, int);
+
 extern int pm860x_device_init(struct pm860x_chip *chip,
 			      struct pm860x_platform_data *pdata);
 extern void pm860x_device_exit(struct pm860x_chip *chip);
-- 
cgit v1.2.3


From a16122bcacf050e7f83015183053cf799713cc37 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Tue, 15 Dec 2009 16:04:36 -0500
Subject: mfd: Append subdev into 88pm860x driver

Append backlight, led & touch subdevs into 88pm860x driver.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/88pm860x-core.c  | 217 ++++++++++++++++++++++++++++++++++++++++---
 include/linux/mfd/88pm860x.h | 151 ++++++++++++++++++++++++++++++
 2 files changed, 356 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index 9185f0d945f4..16f0dca707a7 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c
@@ -17,6 +17,100 @@
 #include <linux/mfd/core.h>
 #include <linux/mfd/88pm860x.h>
 
+char pm860x_backlight_name[][MFD_NAME_SIZE] = {
+	"backlight-0",
+	"backlight-1",
+	"backlight-2",
+};
+EXPORT_SYMBOL(pm860x_backlight_name);
+
+char pm860x_led_name[][MFD_NAME_SIZE] = {
+	"led0-red",
+	"led0-green",
+	"led0-blue",
+	"led1-red",
+	"led1-green",
+	"led1-blue",
+};
+EXPORT_SYMBOL(pm860x_led_name);
+
+#define PM8606_BACKLIGHT_RESOURCE(_i, _x)		\
+{							\
+	.name	= pm860x_backlight_name[_i],		\
+	.start	= PM8606_##_x,				\
+	.end	= PM8606_##_x,				\
+	.flags	= IORESOURCE_IO,			\
+}
+
+static struct resource backlight_resources[] = {
+	PM8606_BACKLIGHT_RESOURCE(PM8606_BACKLIGHT1, WLED1A),
+	PM8606_BACKLIGHT_RESOURCE(PM8606_BACKLIGHT2, WLED2A),
+	PM8606_BACKLIGHT_RESOURCE(PM8606_BACKLIGHT3, WLED3A),
+};
+
+#define PM8606_BACKLIGHT_DEVS(_i)			\
+{							\
+	.name		= "88pm860x-backlight",		\
+	.num_resources	= 1,				\
+	.resources	= &backlight_resources[_i],	\
+	.id		= _i,				\
+}
+
+static struct mfd_cell backlight_devs[] = {
+	PM8606_BACKLIGHT_DEVS(PM8606_BACKLIGHT1),
+	PM8606_BACKLIGHT_DEVS(PM8606_BACKLIGHT2),
+	PM8606_BACKLIGHT_DEVS(PM8606_BACKLIGHT3),
+};
+
+#define PM8606_LED_RESOURCE(_i, _x)			\
+{							\
+	.name	= pm860x_led_name[_i],			\
+	.start	= PM8606_##_x,				\
+	.end	= PM8606_##_x,				\
+	.flags	= IORESOURCE_IO,			\
+}
+
+static struct resource led_resources[] = {
+	PM8606_LED_RESOURCE(PM8606_LED1_RED, RGB2B),
+	PM8606_LED_RESOURCE(PM8606_LED1_GREEN, RGB2C),
+	PM8606_LED_RESOURCE(PM8606_LED1_BLUE, RGB2D),
+	PM8606_LED_RESOURCE(PM8606_LED2_RED, RGB1B),
+	PM8606_LED_RESOURCE(PM8606_LED2_GREEN, RGB1C),
+	PM8606_LED_RESOURCE(PM8606_LED2_BLUE, RGB1D),
+};
+
+#define PM8606_LED_DEVS(_i)				\
+{							\
+	.name		= "88pm860x-led",		\
+	.num_resources	= 1,				\
+	.resources	= &led_resources[_i],		\
+	.id		= _i,				\
+}
+
+static struct mfd_cell led_devs[] = {
+	PM8606_LED_DEVS(PM8606_LED1_RED),
+	PM8606_LED_DEVS(PM8606_LED1_GREEN),
+	PM8606_LED_DEVS(PM8606_LED1_BLUE),
+	PM8606_LED_DEVS(PM8606_LED2_RED),
+	PM8606_LED_DEVS(PM8606_LED2_GREEN),
+	PM8606_LED_DEVS(PM8606_LED2_BLUE),
+};
+
+static struct resource touch_resources[] = {
+	{
+		.start	= PM8607_IRQ_PEN,
+		.end	= PM8607_IRQ_PEN,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct mfd_cell touch_devs[] = {
+	{
+		.name		= "88pm860x-touch",
+		.num_resources	= 1,
+		.resources	= &touch_resources[0],
+	},
+};
 
 #define PM8607_REG_RESOURCE(_start, _end)		\
 {							\
@@ -25,7 +119,7 @@
 	.flags	= IORESOURCE_IO,			\
 }
 
-static struct resource pm8607_regulator_resources[] = {
+static struct resource regulator_resources[] = {
 	PM8607_REG_RESOURCE(BUCK1, BUCK1),
 	PM8607_REG_RESOURCE(BUCK2, BUCK2),
 	PM8607_REG_RESOURCE(BUCK3, BUCK3),
@@ -47,10 +141,11 @@ static struct resource pm8607_regulator_resources[] = {
 {									\
 	.name		= "88pm8607-" #_name,				\
 	.num_resources	= 1,						\
-	.resources	= &pm8607_regulator_resources[PM8607_ID_##_id],	\
+	.resources	= &regulator_resources[PM8607_ID_##_id],	\
+	.id		= PM8607_ID_##_id,				\
 }
 
-static struct mfd_cell pm8607_devs[] = {
+static struct mfd_cell regulator_devs[] = {
 	PM8607_REG_DEVS(buck1, BUCK1),
 	PM8607_REG_DEVS(buck2, BUCK2),
 	PM8607_REG_DEVS(buck3, BUCK3),
@@ -192,6 +287,61 @@ int pm860x_free_irq(struct pm860x_chip *chip, int irq)
 }
 EXPORT_SYMBOL(pm860x_free_irq);
 
+static int __devinit device_gpadc_init(struct pm860x_chip *chip,
+				       struct pm860x_platform_data *pdata)
+{
+	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
+				: chip->companion;
+	int use_gpadc = 0, data, ret;
+
+	/* initialize GPADC without activating it */
+
+	if (pdata && pdata->touch) {
+		/* set GPADC MISC1 register */
+		data = 0;
+		data |= (pdata->touch->gpadc_prebias << 1)
+			& PM8607_GPADC_PREBIAS_MASK;
+		data |= (pdata->touch->slot_cycle << 3)
+			& PM8607_GPADC_SLOT_CYCLE_MASK;
+		data |= (pdata->touch->off_scale << 5)
+			& PM8607_GPADC_OFF_SCALE_MASK;
+		data |= (pdata->touch->sw_cal << 7)
+			& PM8607_GPADC_SW_CAL_MASK;
+		if (data) {
+			ret = pm860x_reg_write(i2c, PM8607_GPADC_MISC1, data);
+			if (ret < 0)
+				goto out;
+		}
+		/* set tsi prebias time */
+		if (pdata->touch->tsi_prebias) {
+			data = pdata->touch->tsi_prebias;
+			ret = pm860x_reg_write(i2c, PM8607_TSI_PREBIAS, data);
+			if (ret < 0)
+				goto out;
+		}
+		/* set prebias & prechg time of pen detect */
+		data = 0;
+		data |= pdata->touch->pen_prebias & PM8607_PD_PREBIAS_MASK;
+		data |= (pdata->touch->pen_prechg << 5)
+			& PM8607_PD_PRECHG_MASK;
+		if (data) {
+			ret = pm860x_reg_write(i2c, PM8607_PD_PREBIAS, data);
+			if (ret < 0)
+				goto out;
+		}
+
+		use_gpadc = 1;
+	}
+
+	/* turn on GPADC */
+	if (use_gpadc) {
+		ret = pm860x_set_bits(i2c, PM8607_GPADC_MISC1,
+				      PM8607_GPADC_EN, PM8607_GPADC_EN);
+	}
+out:
+	return ret;
+}
+
 static int __devinit device_irq_init(struct pm860x_chip *chip,
 				     struct pm860x_platform_data *pdata)
 {
@@ -264,14 +414,40 @@ static void __devinit device_8606_init(struct pm860x_chip *chip,
 				       struct i2c_client *i2c,
 				       struct pm860x_platform_data *pdata)
 {
+	int ret;
+
+	if (pdata && pdata->backlight) {
+		ret = mfd_add_devices(chip->dev, 0, &backlight_devs[0],
+				      ARRAY_SIZE(backlight_devs),
+				      &backlight_resources[0], 0);
+		if (ret < 0) {
+			dev_err(chip->dev, "Failed to add backlight "
+				"subdev\n");
+			goto out_dev;
+		}
+	}
+
+	if (pdata && pdata->led) {
+		ret = mfd_add_devices(chip->dev, 0, &led_devs[0],
+				      ARRAY_SIZE(led_devs),
+				      &led_resources[0], 0);
+		if (ret < 0) {
+			dev_err(chip->dev, "Failed to add led "
+				"subdev\n");
+			goto out_dev;
+		}
+	}
+	return;
+out_dev:
+	mfd_remove_devices(chip->dev);
+	device_irq_exit(chip);
 }
 
 static void __devinit device_8607_init(struct pm860x_chip *chip,
 				       struct i2c_client *i2c,
 				       struct pm860x_platform_data *pdata)
 {
-	int i, count, data;
-	int ret;
+	int data, ret;
 
 	ret = pm860x_reg_read(i2c, PM8607_CHIP_ID);
 	if (ret < 0) {
@@ -311,19 +487,36 @@ static void __devinit device_8607_init(struct pm860x_chip *chip,
 		goto out;
 	}
 
+	ret = device_gpadc_init(chip, pdata);
+	if (ret < 0)
+		goto out;
+
 	ret = device_irq_init(chip, pdata);
 	if (ret < 0)
 		goto out;
 
-	count = ARRAY_SIZE(pm8607_devs);
-	for (i = 0; i < count; i++) {
-		ret = mfd_add_devices(chip->dev, i, &pm8607_devs[i],
-				      1, NULL, 0);
-		if (ret != 0) {
-			dev_err(chip->dev, "Failed to add subdevs\n");
-			goto out;
+	ret = mfd_add_devices(chip->dev, 0, &regulator_devs[0],
+			      ARRAY_SIZE(regulator_devs),
+			      &regulator_resources[0], 0);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to add regulator subdev\n");
+		goto out_dev;
+	}
+
+	if (pdata && pdata->touch) {
+		ret = mfd_add_devices(chip->dev, 0, &touch_devs[0],
+				      ARRAY_SIZE(touch_devs),
+				      &touch_resources[0], 0);
+		if (ret < 0) {
+			dev_err(chip->dev, "Failed to add touch "
+				"subdev\n");
+			goto out_dev;
 		}
 	}
+	return;
+out_dev:
+	mfd_remove_devices(chip->dev);
+	device_irq_exit(chip);
 out:
 	return;
 }
diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h
index b4d6018ba0d6..d7edf11784f5 100644
--- a/include/linux/mfd/88pm860x.h
+++ b/include/linux/mfd/88pm860x.h
@@ -14,6 +14,8 @@
 
 #include <linux/interrupt.h>
 
+#define MFD_NAME_SIZE		(40)
+
 enum {
 	CHIP_INVALID = 0,
 	CHIP_PM8606,
@@ -21,6 +23,99 @@ enum {
 	CHIP_MAX,
 };
 
+enum {
+	PM8606_ID_INVALID,
+	PM8606_ID_BACKLIGHT,
+	PM8606_ID_LED,
+	PM8606_ID_VIBRATOR,
+	PM8606_ID_TOUCH,
+	PM8606_ID_SOUND,
+	PM8606_ID_CHARGER,
+	PM8606_ID_MAX,
+};
+
+enum {
+	PM8606_BACKLIGHT1 = 0,
+	PM8606_BACKLIGHT2,
+	PM8606_BACKLIGHT3,
+};
+
+enum {
+	PM8606_LED1_RED = 0,
+	PM8606_LED1_GREEN,
+	PM8606_LED1_BLUE,
+	PM8606_LED2_RED,
+	PM8606_LED2_GREEN,
+	PM8606_LED2_BLUE,
+	PM8607_LED_VIBRATOR,
+};
+
+
+/* 8606 Registers */
+#define PM8606_DCM_BOOST		(0x00)
+#define PM8606_PWM			(0x01)
+
+/* Backlight Registers */
+#define PM8606_WLED1A			(0x02)
+#define PM8606_WLED1B			(0x03)
+#define PM8606_WLED2A			(0x04)
+#define PM8606_WLED2B			(0x05)
+#define PM8606_WLED3A			(0x06)
+#define PM8606_WLED3B			(0x07)
+
+/* LED Registers */
+#define PM8606_RGB2A			(0x08)
+#define PM8606_RGB2B			(0x09)
+#define PM8606_RGB2C			(0x0A)
+#define PM8606_RGB2D			(0x0B)
+#define PM8606_RGB1A			(0x0C)
+#define PM8606_RGB1B			(0x0D)
+#define PM8606_RGB1C			(0x0E)
+#define PM8606_RGB1D			(0x0F)
+
+#define PM8606_PREREGULATORA		(0x10)
+#define PM8606_PREREGULATORB		(0x11)
+#define PM8606_VIBRATORA		(0x12)
+#define PM8606_VIBRATORB		(0x13)
+#define PM8606_VCHG			(0x14)
+#define PM8606_VSYS			(0x15)
+#define PM8606_MISC			(0x16)
+#define PM8606_CHIP_ID			(0x17)
+#define PM8606_STATUS			(0x18)
+#define PM8606_FLAGS			(0x19)
+#define PM8606_PROTECTA			(0x1A)
+#define PM8606_PROTECTB			(0x1B)
+#define PM8606_PROTECTC			(0x1C)
+
+/* Bit definitions of PM8606 registers */
+#define PM8606_DCM_500MA		(0x0)	/* current limit */
+#define PM8606_DCM_750MA		(0x1)
+#define PM8606_DCM_1000MA		(0x2)
+#define PM8606_DCM_1250MA		(0x3)
+#define PM8606_DCM_250MV		(0x0 << 2)
+#define PM8606_DCM_300MV		(0x1 << 2)
+#define PM8606_DCM_350MV		(0x2 << 2)
+#define PM8606_DCM_400MV		(0x3 << 2)
+
+#define PM8606_PWM_31200HZ		(0x0)
+#define PM8606_PWM_15600HZ		(0x1)
+#define PM8606_PWM_7800HZ		(0x2)
+#define PM8606_PWM_3900HZ		(0x3)
+#define PM8606_PWM_1950HZ		(0x4)
+#define PM8606_PWM_976HZ		(0x5)
+#define PM8606_PWM_488HZ		(0x6)
+#define PM8606_PWM_244HZ		(0x7)
+#define PM8606_PWM_FREQ_MASK		(0x7)
+
+#define PM8606_WLED_ON			(1 << 0)
+#define PM8606_WLED_CURRENT(x)		((x & 0x1F) << 1)
+
+#define PM8606_LED_CURRENT(x)		(((x >> 2) & 0x07) << 5)
+
+#define PM8606_VSYS_EN			(1 << 1)
+
+#define PM8606_MISC_OSC_EN		(1 << 4)
+
 enum {
 	PM8607_ID_BUCK1 = 0,
 	PM8607_ID_BUCK2,
@@ -91,6 +186,21 @@ enum {
 #define PM8607_SUPPLIES_EN21		(0x33)
 #define PM8607_SUPPLIES_EN22		(0x34)
 
+/* Vibrator Control Registers */
+#define PM8607_VIBRATOR_SET		(0x28)
+#define PM8607_VIBRATOR_PWM		(0x29)
+
+/* GPADC Registers */
+#define PM8607_GP_BIAS1			(0x4F)
+#define PM8607_MEAS_EN1			(0x50)
+#define PM8607_MEAS_EN2			(0x51)
+#define PM8607_MEAS_EN3			(0x52)
+#define PM8607_MEAS_OFF_TIME1		(0x53)
+#define PM8607_MEAS_OFF_TIME2		(0x54)
+#define PM8607_TSI_PREBIAS		(0x55)	/* prebias time */
+#define PM8607_PD_PREBIAS		(0x56)	/* prebias time */
+#define PM8607_GPADC_MISC1		(0x57)
+
 /* RTC Control Registers */
 #define PM8607_RTC1			(0xA0)
 #define PM8607_RTC_COUNTER1		(0xA1)
@@ -140,6 +250,16 @@ enum {
 #define PM8607_B0_MISC1_PI2C		(1 << 3)
 #define PM8607_B0_MISC1_RESET		(1 << 6)
 
+/* bits definitions of GPADC */
+#define PM8607_GPADC_EN			(1 << 0)
+#define PM8607_GPADC_PREBIAS_MASK	(3 << 1)
+#define PM8607_GPADC_SLOT_CYCLE_MASK	(3 << 3)	/* slow mode */
+#define PM8607_GPADC_OFF_SCALE_MASK	(3 << 5)	/* GP sleep mode */
+#define PM8607_GPADC_SW_CAL_MASK	(1 << 7)
+
+#define PM8607_PD_PREBIAS_MASK		(0x1F << 0)
+#define PM8607_PD_PRECHG_MASK		(7 << 5)
+
 /* Interrupt Number in 88PM8607 */
 enum {
 	PM8607_IRQ_ONKEY = 0,
@@ -202,13 +322,44 @@ enum {
 	PI2C_PORT,
 };
 
+struct pm860x_backlight_pdata {
+	int		id;
+	int		pwm;
+	int		iset;
+	unsigned long	flags;
+};
+
+struct pm860x_led_pdata {
+	int		id;
+	int		iset;
+	unsigned long	flags;
+};
+
+struct pm860x_touch_pdata {
+	int		gpadc_prebias;
+	int		slot_cycle;
+	int		off_scale;
+	int		sw_cal;
+	int		tsi_prebias;	/* time, slot */
+	int		pen_prebias;	/* time, slot */
+	int		pen_prechg;	/* time, slot */
+	unsigned long	flags;
+};
+
 struct pm860x_platform_data {
+	struct pm860x_backlight_pdata	*backlight;
+	struct pm860x_led_pdata		*led;
+	struct pm860x_touch_pdata	*touch;
+
 	unsigned short	companion_addr;	/* I2C address of companion chip */
 	int		i2c_port;	/* Controlled by GI2C or PI2C */
 	int		irq_mode;	/* Clear interrupt by read/write(0/1) */
 	struct regulator_init_data *regulator[PM8607_MAX_REGULATOR];
 };
 
+extern char pm860x_backlight_name[][MFD_NAME_SIZE];
+extern char pm860x_led_name[][MFD_NAME_SIZE];
+
 extern int pm860x_reg_read(struct i2c_client *, int);
 extern int pm860x_reg_write(struct i2c_client *, int, unsigned char);
 extern int pm860x_bulk_read(struct i2c_client *, int, int, unsigned char *);
-- 
cgit v1.2.3


From 866a98ae6e1a9768cd25fe1185481569c7e4b4a9 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Tue, 15 Dec 2009 16:06:17 -0500
Subject: input: Enable touch on 88pm860x

Enable touchscreen driver for the 88pm860x multi function core.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Acked-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/input/touchscreen/88pm860x-ts.c | 241 ++++++++++++++++++++++++++++++++
 drivers/input/touchscreen/Kconfig       |  12 ++
 drivers/input/touchscreen/Makefile      |   1 +
 include/linux/mfd/88pm860x.h            |   1 +
 4 files changed, 255 insertions(+)
 create mode 100644 drivers/input/touchscreen/88pm860x-ts.c

(limited to 'include')

diff --git a/drivers/input/touchscreen/88pm860x-ts.c b/drivers/input/touchscreen/88pm860x-ts.c
new file mode 100644
index 000000000000..56254d2a1f6e
--- /dev/null
+++ b/drivers/input/touchscreen/88pm860x-ts.c
@@ -0,0 +1,241 @@
+/*
+ * Touchscreen driver for Marvell 88PM860x
+ *
+ * Copyright (C) 2009 Marvell International Ltd.
+ * 	Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/mfd/88pm860x.h>
+
+#define MEAS_LEN		(8)
+#define ACCURATE_BIT		(12)
+
+/* touch register */
+#define MEAS_EN3		(0x52)
+
+#define MEAS_TSIX_1		(0x8D)
+#define MEAS_TSIX_2		(0x8E)
+#define MEAS_TSIY_1		(0x8F)
+#define MEAS_TSIY_2		(0x90)
+#define MEAS_TSIZ1_1		(0x91)
+#define MEAS_TSIZ1_2		(0x92)
+#define MEAS_TSIZ2_1		(0x93)
+#define MEAS_TSIZ2_2		(0x94)
+
+/* bit definitions of touch */
+#define MEAS_PD_EN		(1 << 3)
+#define MEAS_TSIX_EN		(1 << 4)
+#define MEAS_TSIY_EN		(1 << 5)
+#define MEAS_TSIZ1_EN		(1 << 6)
+#define MEAS_TSIZ2_EN		(1 << 7)
+
+struct pm860x_touch {
+	struct input_dev *idev;
+	struct i2c_client *i2c;
+	struct pm860x_chip *chip;
+	int irq;
+	int res_x;		/* resistor of Xplate */
+};
+
+static irqreturn_t pm860x_touch_handler(int irq, void *data)
+{
+	struct pm860x_touch *touch = data;
+	struct pm860x_chip *chip = touch->chip;
+	unsigned char buf[MEAS_LEN];
+	int x, y, pen_down;
+	int z1, z2, rt = 0;
+	int ret;
+
+	pm860x_mask_irq(chip, irq);
+	ret = pm860x_bulk_read(touch->i2c, MEAS_TSIX_1, MEAS_LEN, buf);
+	if (ret < 0)
+		goto out;
+
+	pen_down = buf[1] & (1 << 6);
+	x = ((buf[0] & 0xFF) << 4) | (buf[1] & 0x0F);
+	y = ((buf[2] & 0xFF) << 4) | (buf[3] & 0x0F);
+	z1 = ((buf[4] & 0xFF) << 4) | (buf[5] & 0x0F);
+	z2 = ((buf[6] & 0xFF) << 4) | (buf[7] & 0x0F);
+
+	if (pen_down) {
+		if ((x != 0) && (z1 != 0) && (touch->res_x != 0)) {
+			rt = z2 / z1 - 1;
+			rt = (rt * touch->res_x * x) >> ACCURATE_BIT;
+			dev_dbg(chip->dev, "z1:%d, z2:%d, rt:%d\n",
+				z1, z2, rt);
+		}
+		input_report_abs(touch->idev, ABS_X, x);
+		input_report_abs(touch->idev, ABS_Y, y);
+		input_report_abs(touch->idev, ABS_PRESSURE, rt);
+		input_report_key(touch->idev, BTN_TOUCH, 1);
+		dev_dbg(chip->dev, "pen down at [%d, %d].\n", x, y);
+	} else {
+		input_report_abs(touch->idev, ABS_PRESSURE, 0);
+		input_report_key(touch->idev, BTN_TOUCH, 0);
+		dev_dbg(chip->dev, "pen release\n");
+	}
+	input_sync(touch->idev);
+	pm860x_unmask_irq(chip, irq);
+
+out:
+	return IRQ_HANDLED;
+}
+
+static int pm860x_touch_open(struct input_dev *dev)
+{
+	struct pm860x_touch *touch = input_get_drvdata(dev);
+	struct pm860x_chip *chip = touch->chip;
+	int data, ret;
+
+	data = MEAS_PD_EN | MEAS_TSIX_EN | MEAS_TSIY_EN
+		| MEAS_TSIZ1_EN | MEAS_TSIZ2_EN;
+	ret = pm860x_set_bits(touch->i2c, MEAS_EN3, data, data);
+	if (ret < 0)
+		goto out;
+	pm860x_unmask_irq(chip, touch->irq);
+	return 0;
+out:
+	return ret;
+}
+
+static void pm860x_touch_close(struct input_dev *dev)
+{
+	struct pm860x_touch *touch = input_get_drvdata(dev);
+	struct pm860x_chip *chip = touch->chip;
+	int data;
+
+	data = MEAS_PD_EN | MEAS_TSIX_EN | MEAS_TSIY_EN
+		| MEAS_TSIZ1_EN | MEAS_TSIZ2_EN;
+	pm860x_set_bits(touch->i2c, MEAS_EN3, data, 0);
+	pm860x_mask_irq(chip, touch->irq);
+}
+
+static int __devinit pm860x_touch_probe(struct platform_device *pdev)
+{
+	struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent);
+	struct pm860x_platform_data *pm860x_pdata =		\
+				pdev->dev.parent->platform_data;
+	struct pm860x_touch_pdata *pdata = NULL;
+	struct pm860x_touch *touch;
+	int irq, ret;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "No IRQ resource!\n");
+		return -EINVAL;
+	}
+
+	if (!pm860x_pdata) {
+		dev_err(&pdev->dev, "platform data is missing\n");
+		return -EINVAL;
+	}
+
+	pdata = pm860x_pdata->touch;
+	if (!pdata) {
+		dev_err(&pdev->dev, "touchscreen data is missing\n");
+		return -EINVAL;
+	}
+
+	touch = kzalloc(sizeof(struct pm860x_touch), GFP_KERNEL);
+	if (touch == NULL)
+		return -ENOMEM;
+	dev_set_drvdata(&pdev->dev, touch);
+
+	touch->idev = input_allocate_device();
+	if (touch->idev == NULL) {
+		dev_err(&pdev->dev, "Failed to allocate input device!\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	touch->idev->name = "88pm860x-touch";
+	touch->idev->phys = "88pm860x/input0";
+	touch->idev->id.bustype = BUS_I2C;
+	touch->idev->dev.parent = &pdev->dev;
+	touch->idev->open = pm860x_touch_open;
+	touch->idev->close = pm860x_touch_close;
+	touch->chip = chip;
+	touch->i2c = (chip->id == CHIP_PM8607) ? chip->client : chip->companion;
+	touch->irq = irq;
+	touch->res_x = pdata->res_x;
+	input_set_drvdata(touch->idev, touch);
+
+	ret = pm860x_request_irq(chip, irq, pm860x_touch_handler, touch);
+	if (ret < 0)
+		goto out_irq;
+
+	__set_bit(EV_ABS, touch->idev->evbit);
+	__set_bit(ABS_X, touch->idev->absbit);
+	__set_bit(ABS_Y, touch->idev->absbit);
+	__set_bit(ABS_PRESSURE, touch->idev->absbit);
+	__set_bit(EV_SYN, touch->idev->evbit);
+	__set_bit(EV_KEY, touch->idev->evbit);
+	__set_bit(BTN_TOUCH, touch->idev->keybit);
+
+	input_set_abs_params(touch->idev, ABS_X, 0, 1 << ACCURATE_BIT, 0, 0);
+	input_set_abs_params(touch->idev, ABS_Y, 0, 1 << ACCURATE_BIT, 0, 0);
+	input_set_abs_params(touch->idev, ABS_PRESSURE, 0, 1 << ACCURATE_BIT,
+				0, 0);
+
+	ret = input_register_device(touch->idev);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to register touch!\n");
+		goto out_rg;
+	}
+
+	platform_set_drvdata(pdev, touch);
+	return 0;
+out_rg:
+	pm860x_free_irq(chip, irq);
+out_irq:
+	input_free_device(touch->idev);
+out:
+	kfree(touch);
+	return ret;
+}
+
+static int __devexit pm860x_touch_remove(struct platform_device *pdev)
+{
+	struct pm860x_touch *touch = platform_get_drvdata(pdev);
+
+	input_unregister_device(touch->idev);
+	pm860x_free_irq(touch->chip, touch->irq);
+	platform_set_drvdata(pdev, NULL);
+	kfree(touch);
+	return 0;
+}
+
+static struct platform_driver pm860x_touch_driver = {
+	.driver	= {
+		.name	= "88pm860x-touch",
+		.owner	= THIS_MODULE,
+	},
+	.probe	= pm860x_touch_probe,
+	.remove	= __devexit_p(pm860x_touch_remove),
+};
+
+static int __init pm860x_touch_init(void)
+{
+	return platform_driver_register(&pm860x_touch_driver);
+}
+module_init(pm860x_touch_init);
+
+static void __exit pm860x_touch_exit(void)
+{
+	platform_driver_unregister(&pm860x_touch_driver);
+}
+module_exit(pm860x_touch_exit);
+
+MODULE_DESCRIPTION("Touchscreen driver for Marvell Semiconductor 88PM860x");
+MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:88pm860x-touch");
+
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 6457e060ae49..7208654a94ae 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -11,6 +11,18 @@ menuconfig INPUT_TOUCHSCREEN
 
 if INPUT_TOUCHSCREEN
 
+config TOUCHSCREEN_88PM860X
+	tristate "Marvell 88PM860x touchscreen"
+	depends on MFD_88PM860X
+	help
+	  Say Y here if you have a 88PM860x PMIC and want to enable
+	  support for the built-in touchscreen.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called 88pm860x-ts.
+
 config TOUCHSCREEN_ADS7846
 	tristate "ADS7846/TSC2046 and ADS7843 based touchscreens"
 	depends on SPI_MASTER
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index d61a3b4def9a..7fef7d5cca23 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -6,6 +6,7 @@
 
 wm97xx-ts-y := wm97xx-core.o
 
+obj-$(CONFIG_TOUCHSCREEN_88PM860X)	+= 88pm860x-ts.o
 obj-$(CONFIG_TOUCHSCREEN_AD7877)	+= ad7877.o
 obj-$(CONFIG_TOUCHSCREEN_AD7879)	+= ad7879.o
 obj-$(CONFIG_TOUCHSCREEN_ADS7846)	+= ads7846.o
diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h
index d7edf11784f5..80bc82a7ac96 100644
--- a/include/linux/mfd/88pm860x.h
+++ b/include/linux/mfd/88pm860x.h
@@ -343,6 +343,7 @@ struct pm860x_touch_pdata {
 	int		tsi_prebias;	/* time, slot */
 	int		pen_prebias;	/* time, slot */
 	int		pen_prechg;	/* time, slot */
+	int		res_x;		/* resistor of Xplate */
 	unsigned long	flags;
 };
 
-- 
cgit v1.2.3


From d50f8f339f6901fccc9d4292b65ce8b69d7413d4 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Fri, 8 Jan 2010 12:29:23 +0100
Subject: mfd: Initial max8925 support

Basic Max8925 support, which is a power management IC from Maxim
Semiconductor.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig         |   9 ++
 drivers/mfd/Makefile        |   2 +
 drivers/mfd/max8925-core.c  | 262 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/mfd/max8925-i2c.c   | 210 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/max8925.h | 119 ++++++++++++++++++++
 5 files changed, 602 insertions(+)
 create mode 100644 drivers/mfd/max8925-core.c
 create mode 100644 drivers/mfd/max8925-i2c.c
 create mode 100644 include/linux/mfd/max8925.h

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 815907eb70a4..ee416eefb8e9 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -194,6 +194,15 @@ config PMIC_ADP5520
 	  individual components like LCD backlight, LEDs, GPIOs and Kepad
 	  under the corresponding menus.
 
+config MFD_MAX8925
+	tristate "Maxim Semiconductor MAX8925 PMIC Support"
+	depends on I2C
+	help
+	  Say yes here to support for Maxim Semiconductor MAX8925. This is
+	  a Power Management IC. This driver provies common support for
+	  accessing the device, additional drivers must be enabled in order
+	  to use the functionality of the device.
+
 config MFD_WM8400
 	tristate "Support Wolfson Microelectronics WM8400"
 	select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 1e3ae062c1f6..261635700243 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -49,6 +49,8 @@ endif
 obj-$(CONFIG_UCB1400_CORE)	+= ucb1400_core.o
 
 obj-$(CONFIG_PMIC_DA903X)	+= da903x.o
+max8925-objs			:= max8925-core.o max8925-i2c.o
+obj-$(CONFIG_MFD_MAX8925)	+= max8925.o
 
 obj-$(CONFIG_MFD_PCF50633)	+= pcf50633-core.o
 obj-$(CONFIG_PCF50633_ADC)	+= pcf50633-adc.o
diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c
new file mode 100644
index 000000000000..3e26267960b1
--- /dev/null
+++ b/drivers/mfd/max8925-core.c
@@ -0,0 +1,262 @@
+/*
+ * Base driver for Maxim MAX8925
+ *
+ * Copyright (C) 2009 Marvell International Ltd.
+ *	Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/max8925.h>
+
+#define IRQ_MODE_STATUS		0
+#define IRQ_MODE_MASK		1
+
+static int __get_irq_offset(struct max8925_chip *chip, int irq, int mode,
+			    int *offset, int *bit)
+{
+	if (!offset || !bit)
+		return -EINVAL;
+
+	switch (chip->chip_id) {
+	case MAX8925_GPM:
+		*bit = irq % BITS_PER_BYTE;
+		if (irq < (BITS_PER_BYTE << 1)) {	/* irq = [0,15] */
+			*offset = (mode) ? MAX8925_CHG_IRQ1_MASK
+				: MAX8925_CHG_IRQ1;
+			if (irq >= BITS_PER_BYTE)
+				(*offset)++;
+		} else {				/* irq = [16,31] */
+			*offset = (mode) ? MAX8925_ON_OFF_IRQ1_MASK
+				: MAX8925_ON_OFF_IRQ1;
+			if (irq >= (BITS_PER_BYTE * 3))
+				(*offset)++;
+		}
+		break;
+	case MAX8925_ADC:
+		*bit = irq % BITS_PER_BYTE;
+		*offset = (mode) ? MAX8925_TSC_IRQ_MASK : MAX8925_TSC_IRQ;
+		break;
+	default:
+		goto out;
+	}
+	return 0;
+out:
+	dev_err(chip->dev, "Wrong irq #%d is assigned\n", irq);
+	return -EINVAL;
+}
+
+static int __check_irq(int irq)
+{
+	if ((irq < 0) || (irq >= MAX8925_NUM_IRQ))
+		return -EINVAL;
+	return 0;
+}
+
+int max8925_mask_irq(struct max8925_chip *chip, int irq)
+{
+	int offset, bit, ret;
+
+	ret = __get_irq_offset(chip, irq, IRQ_MODE_MASK, &offset, &bit);
+	if (ret < 0)
+		return ret;
+	ret = max8925_set_bits(chip->i2c, offset, 1 << bit, 1 << bit);
+	return ret;
+}
+
+int max8925_unmask_irq(struct max8925_chip *chip, int irq)
+{
+	int offset, bit, ret;
+
+	ret = __get_irq_offset(chip, irq, IRQ_MODE_MASK, &offset, &bit);
+	if (ret < 0)
+		return ret;
+	ret = max8925_set_bits(chip->i2c, offset, 1 << bit, 0);
+	return ret;
+}
+
+#define INT_STATUS_NUM		(MAX8925_NUM_IRQ / BITS_PER_BYTE)
+
+static irqreturn_t max8925_irq_thread(int irq, void *data)
+{
+	struct max8925_chip *chip = data;
+	unsigned long irq_status[INT_STATUS_NUM];
+	unsigned char status_buf[INT_STATUS_NUM << 1];
+	int i, ret;
+
+	memset(irq_status, 0, sizeof(unsigned long) * INT_STATUS_NUM);
+
+	/* all these interrupt status registers are read-only */
+	switch (chip->chip_id) {
+	case MAX8925_GPM:
+		ret = max8925_bulk_read(chip->i2c, MAX8925_CHG_IRQ1,
+					4, status_buf);
+		if (ret < 0)
+			goto out;
+		ret = max8925_bulk_read(chip->i2c, MAX8925_ON_OFF_IRQ1,
+					2, &status_buf[4]);
+		if (ret < 0)
+			goto out;
+		ret = max8925_bulk_read(chip->i2c, MAX8925_ON_OFF_IRQ2,
+					2, &status_buf[6]);
+		if (ret < 0)
+			goto out;
+		/* clear masked interrupt status */
+		status_buf[0] &= (~status_buf[2] & CHG_IRQ1_MASK);
+		irq_status[0] |= status_buf[0];
+		status_buf[1] &= (~status_buf[3] & CHG_IRQ2_MASK);
+		irq_status[0] |= (status_buf[1] << BITS_PER_BYTE);
+		status_buf[4] &= (~status_buf[5] & ON_OFF_IRQ1_MASK);
+		irq_status[0] |= (status_buf[4] << (BITS_PER_BYTE * 2));
+		status_buf[6] &= (~status_buf[7] & ON_OFF_IRQ2_MASK);
+		irq_status[0] |= (status_buf[6] << (BITS_PER_BYTE * 3));
+		break;
+	case MAX8925_ADC:
+		ret = max8925_bulk_read(chip->i2c, MAX8925_TSC_IRQ,
+					2, status_buf);
+		if (ret < 0)
+			goto out;
+		/* clear masked interrupt status */
+		status_buf[0] &= (~status_buf[1] & TSC_IRQ_MASK);
+		irq_status[0] |= status_buf[0];
+		break;
+	default:
+		goto out;
+	}
+
+	for_each_bit(i, &irq_status[0], MAX8925_NUM_IRQ) {
+		clear_bit(i, irq_status);
+		dev_dbg(chip->dev, "Servicing IRQ #%d in %s\n", i, chip->name);
+
+		mutex_lock(&chip->irq_lock);
+		if (chip->irq[i].handler)
+			chip->irq[i].handler(i, chip->irq[i].data);
+		else {
+			max8925_mask_irq(chip, i);
+			dev_err(chip->dev, "Noboday cares IRQ #%d in %s. "
+				"Now mask it.\n", i, chip->name);
+		}
+		mutex_unlock(&chip->irq_lock);
+	}
+out:
+	return IRQ_HANDLED;
+}
+
+int max8925_request_irq(struct max8925_chip *chip, int irq,
+			irq_handler_t handler, void *data)
+{
+	if ((__check_irq(irq) < 0) || !handler)
+		return -EINVAL;
+
+	mutex_lock(&chip->irq_lock);
+	chip->irq[irq].handler = handler;
+	chip->irq[irq].data = data;
+	mutex_unlock(&chip->irq_lock);
+	return 0;
+}
+EXPORT_SYMBOL(max8925_request_irq);
+
+int max8925_free_irq(struct max8925_chip *chip, int irq)
+{
+	if (__check_irq(irq) < 0)
+		return -EINVAL;
+
+	mutex_lock(&chip->irq_lock);
+	chip->irq[irq].handler = NULL;
+	chip->irq[irq].data = NULL;
+	mutex_unlock(&chip->irq_lock);
+	return 0;
+}
+EXPORT_SYMBOL(max8925_free_irq);
+
+static int __devinit device_gpm_init(struct max8925_chip *chip,
+				      struct i2c_client *i2c,
+				      struct max8925_platform_data *pdata)
+{
+	int ret;
+
+	/* mask all IRQs */
+	ret = max8925_set_bits(i2c, MAX8925_CHG_IRQ1_MASK, 0x7, 0x7);
+	if (ret < 0)
+		goto out;
+	ret = max8925_set_bits(i2c, MAX8925_CHG_IRQ2_MASK, 0xff, 0xff);
+	if (ret < 0)
+		goto out;
+	ret = max8925_set_bits(i2c, MAX8925_ON_OFF_IRQ1_MASK, 0xff, 0xff);
+	if (ret < 0)
+		goto out;
+	ret = max8925_set_bits(i2c, MAX8925_ON_OFF_IRQ2_MASK, 0x3, 0x3);
+	if (ret < 0)
+		goto out;
+
+	chip->name = "GPM";
+	memset(chip->irq, 0, sizeof(struct max8925_irq) * MAX8925_NUM_IRQ);
+	ret = request_threaded_irq(i2c->irq, NULL, max8925_irq_thread,
+				IRQF_ONESHOT | IRQF_TRIGGER_LOW,
+				"max8925-gpm", chip);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to request IRQ #%d.\n", i2c->irq);
+		goto out;
+	}
+	chip->chip_irq = i2c->irq;
+
+	/* enable hard-reset for ONKEY power-off */
+	max8925_set_bits(i2c, MAX8925_SYSENSEL, 0x80, 0x80);
+out:
+	return ret;
+}
+
+static int __devinit device_adc_init(struct max8925_chip *chip,
+				     struct i2c_client *i2c,
+				     struct max8925_platform_data *pdata)
+{
+	int ret;
+
+	/* mask all IRQs */
+	ret = max8925_set_bits(i2c, MAX8925_TSC_IRQ_MASK, 3, 3);
+
+	chip->name = "ADC";
+	memset(chip->irq, 0, sizeof(struct max8925_irq) * MAX8925_NUM_IRQ);
+	ret = request_threaded_irq(i2c->irq, NULL, max8925_irq_thread,
+				IRQF_ONESHOT | IRQF_TRIGGER_LOW,
+				"max8925-adc", chip);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to request IRQ #%d.\n", i2c->irq);
+		goto out;
+	}
+	chip->chip_irq = i2c->irq;
+out:
+	return ret;
+}
+
+int __devinit max8925_device_init(struct max8925_chip *chip,
+				  struct max8925_platform_data *pdata)
+{
+	switch (chip->chip_id) {
+	case MAX8925_GPM:
+		device_gpm_init(chip, chip->i2c, pdata);
+		break;
+	case MAX8925_ADC:
+		device_adc_init(chip, chip->i2c, pdata);
+		break;
+	}
+	return 0;
+}
+
+void max8925_device_exit(struct max8925_chip *chip)
+{
+	if (chip->chip_irq >= 0)
+		free_irq(chip->chip_irq, chip);
+}
+
+MODULE_DESCRIPTION("PMIC Driver for Maxim MAX8925");
+MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/max8925-i2c.c b/drivers/mfd/max8925-i2c.c
new file mode 100644
index 000000000000..942068e730f9
--- /dev/null
+++ b/drivers/mfd/max8925-i2c.c
@@ -0,0 +1,210 @@
+/*
+ * I2C driver for Maxim MAX8925
+ *
+ * Copyright (C) 2009 Marvell International Ltd.
+ *	Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/mfd/max8925.h>
+
+static inline int max8925_read_device(struct i2c_client *i2c,
+				      int reg, int bytes, void *dest)
+{
+	unsigned char data;
+	unsigned char *buf;
+	int ret;
+
+	buf = kzalloc(bytes + 1, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	data = (unsigned char)reg;
+	ret = i2c_master_send(i2c, &data, 1);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_master_recv(i2c, buf, bytes + 1);
+	if (ret < 0)
+		return ret;
+	memcpy(dest, buf, bytes);
+	return 0;
+}
+
+static inline int max8925_write_device(struct i2c_client *i2c,
+				       int reg, int bytes, void *src)
+{
+	unsigned char buf[bytes + 1];
+	int ret;
+
+	buf[0] = (unsigned char)reg;
+	memcpy(&buf[1], src, bytes);
+
+	ret = i2c_master_send(i2c, buf, bytes + 1);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+int max8925_reg_read(struct i2c_client *i2c, int reg)
+{
+	struct max8925_chip *chip = i2c_get_clientdata(i2c);
+	unsigned char data;
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = max8925_read_device(i2c, reg, 1, &data);
+	mutex_unlock(&chip->io_lock);
+
+	if (ret < 0)
+		return ret;
+	else
+		return (int)data;
+}
+EXPORT_SYMBOL(max8925_reg_read);
+
+int max8925_reg_write(struct i2c_client *i2c, int reg,
+		unsigned char data)
+{
+	struct max8925_chip *chip = i2c_get_clientdata(i2c);
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = max8925_write_device(i2c, reg, 1, &data);
+	mutex_unlock(&chip->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(max8925_reg_write);
+
+int max8925_bulk_read(struct i2c_client *i2c, int reg,
+		int count, unsigned char *buf)
+{
+	struct max8925_chip *chip = i2c_get_clientdata(i2c);
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = max8925_read_device(i2c, reg, count, buf);
+	mutex_unlock(&chip->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(max8925_bulk_read);
+
+int max8925_bulk_write(struct i2c_client *i2c, int reg,
+		int count, unsigned char *buf)
+{
+	struct max8925_chip *chip = i2c_get_clientdata(i2c);
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = max8925_write_device(i2c, reg, count, buf);
+	mutex_unlock(&chip->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(max8925_bulk_write);
+
+int max8925_set_bits(struct i2c_client *i2c, int reg,
+		unsigned char mask, unsigned char data)
+{
+	struct max8925_chip *chip = i2c_get_clientdata(i2c);
+	unsigned char value;
+	int ret;
+
+	mutex_lock(&chip->io_lock);
+	ret = max8925_read_device(i2c, reg, 1, &value);
+	if (ret < 0)
+		goto out;
+	value &= ~mask;
+	value |= data;
+	ret = max8925_write_device(i2c, reg, 1, &value);
+out:
+	mutex_unlock(&chip->io_lock);
+	return ret;
+}
+EXPORT_SYMBOL(max8925_set_bits);
+
+
+static const struct i2c_device_id max8925_id_table[] = {
+	{ "max8925", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, max8925_id_table);
+
+static int __devinit max8925_probe(struct i2c_client *client,
+				   const struct i2c_device_id *id)
+{
+	struct max8925_platform_data *pdata = client->dev.platform_data;
+	struct max8925_chip *chip;
+
+	if (!pdata) {
+		pr_info("%s: platform data is missing\n", __func__);
+		return -EINVAL;
+	}
+	if ((pdata->chip_id <= MAX8925_INVALID)
+		|| (pdata->chip_id >= MAX8925_MAX)) {
+		pr_info("#%s: wrong chip identification\n", __func__);
+		return -EINVAL;
+	}
+
+	chip = kzalloc(sizeof(struct max8925_chip), GFP_KERNEL);
+	if (chip == NULL)
+		return -ENOMEM;
+	chip->i2c = client;
+	chip->chip_id = pdata->chip_id;
+	i2c_set_clientdata(client, chip);
+	chip->dev = &client->dev;
+	mutex_init(&chip->io_lock);
+	dev_set_drvdata(chip->dev, chip);
+	max8925_device_init(chip, pdata);
+
+	return 0;
+}
+
+static int __devexit max8925_remove(struct i2c_client *client)
+{
+	struct max8925_chip *chip = i2c_get_clientdata(client);
+
+	max8925_device_exit(chip);
+	i2c_set_clientdata(client, NULL);
+	kfree(chip);
+	return 0;
+}
+
+static struct i2c_driver max8925_driver = {
+	.driver	= {
+		.name	= "max8925",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= max8925_probe,
+	.remove		= __devexit_p(max8925_remove),
+	.id_table	= max8925_id_table,
+};
+
+static int __init max8925_i2c_init(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&max8925_driver);
+	if (ret != 0)
+		pr_err("Failed to register MAX8925 I2C driver: %d\n", ret);
+	return ret;
+}
+subsys_initcall(max8925_i2c_init);
+
+static void __exit max8925_i2c_exit(void)
+{
+	i2c_del_driver(&max8925_driver);
+}
+module_exit(max8925_i2c_exit);
+
+MODULE_DESCRIPTION("I2C Driver for Maxim 8925");
+MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
new file mode 100644
index 000000000000..2326246ddef2
--- /dev/null
+++ b/include/linux/mfd/max8925.h
@@ -0,0 +1,119 @@
+/*
+ * Maxim8925 Interface
+ *
+ * Copyright (C) 2009 Marvell International Ltd.
+ *	Haojian Zhuang <haojian.zhuang@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_MFD_MAX8925_H
+#define __LINUX_MFD_MAX8925_H
+
+#include <linux/interrupt.h>
+
+/* Charger registers */
+#define MAX8925_CHG_IRQ1		(0x7e)
+#define MAX8925_CHG_IRQ2		(0x7f)
+#define MAX8925_CHG_IRQ1_MASK		(0x80)
+#define MAX8925_CHG_IRQ2_MASK		(0x81)
+
+/* GPM registers */
+#define MAX8925_SYSENSEL		(0x00)
+#define MAX8925_ON_OFF_IRQ1		(0x01)
+#define MAX8925_ON_OFF_IRQ1_MASK	(0x02)
+#define MAX8925_ON_OFF_STAT		(0x03)
+#define MAX8925_ON_OFF_IRQ2		(0x0d)
+#define MAX8925_ON_OFF_IRQ2_MASK	(0x0e)
+#define MAX8925_RESET_CNFG		(0x0f)
+
+/* Touch registers */
+#define MAX8925_TSC_IRQ			(0x00)
+#define MAX8925_TSC_IRQ_MASK		(0x01)
+
+/* RTC registers */
+#define MAX8925_RTC_STATUS		(0x1a)
+#define MAX8925_RTC_IRQ			(0x1c)
+#define MAX8925_RTC_IRQ_MASK		(0x1d)
+
+/* bit definitions */
+#define CHG_IRQ1_MASK			(0x07)
+#define CHG_IRQ2_MASK			(0xff)
+#define ON_OFF_IRQ1_MASK		(0xff)
+#define ON_OFF_IRQ2_MASK		(0x03)
+#define TSC_IRQ_MASK			(0x03)
+#define RTC_IRQ_MASK			(0x0c)
+
+#define MAX8925_NUM_IRQ			(32)
+
+#define MAX8925_NAME_SIZE		(32)
+
+enum {
+	MAX8925_INVALID = 0,
+	MAX8925_RTC,
+	MAX8925_ADC,
+	MAX8925_GPM,	/* general power management */
+	MAX8925_MAX,
+};
+
+#define MAX8925_IRQ_VCHG_OVP		(0)
+#define MAX8925_IRQ_VCHG_F		(1)
+#define MAX8925_IRQ_VCHG_R		(2)
+#define MAX8925_IRQ_VCHG_THM_OK_R	(8)
+#define MAX8925_IRQ_VCHG_THM_OK_F	(9)
+#define MAX8925_IRQ_VCHG_BATTLOW_F	(10)
+#define MAX8925_IRQ_VCHG_BATTLOW_R	(11)
+#define MAX8925_IRQ_VCHG_RST		(12)
+#define MAX8925_IRQ_VCHG_DONE		(13)
+#define MAX8925_IRQ_VCHG_TOPOFF		(14)
+#define MAX8925_IRQ_VCHG_TMR_FAULT	(15)
+#define MAX8925_IRQ_GPM_RSTIN		(16)
+#define MAX8925_IRQ_GPM_MPL		(17)
+#define MAX8925_IRQ_GPM_SW_3SEC		(18)
+#define MAX8925_IRQ_GPM_EXTON_F		(19)
+#define MAX8925_IRQ_GPM_EXTON_R		(20)
+#define MAX8925_IRQ_GPM_SW_1SEC		(21)
+#define MAX8925_IRQ_GPM_SW_F		(22)
+#define MAX8925_IRQ_GPM_SW_R		(23)
+#define MAX8925_IRQ_GPM_SYSCKEN_F	(24)
+#define MAX8925_IRQ_GPM_SYSCKEN_R	(25)
+
+#define MAX8925_IRQ_TSC_STICK		(0)
+#define MAX8925_IRQ_TSC_NSTICK		(1)
+
+struct max8925_irq {
+	irq_handler_t		handler;
+	void			*data;
+};
+
+struct max8925_chip {
+	struct device		*dev;
+	struct mutex		io_lock;
+	struct mutex		irq_lock;
+	struct i2c_client	*i2c;
+	struct max8925_irq	irq[MAX8925_NUM_IRQ];
+
+	const char		*name;
+	int			chip_id;
+	int			chip_irq;
+};
+
+struct max8925_platform_data {
+	int	chip_id;
+	int	chip_irq;
+};
+
+extern int max8925_reg_read(struct i2c_client *, int);
+extern int max8925_reg_write(struct i2c_client *, int, unsigned char);
+extern int max8925_bulk_read(struct i2c_client *, int, int, unsigned char *);
+extern int max8925_bulk_write(struct i2c_client *, int, int, unsigned char *);
+extern int max8925_set_bits(struct i2c_client *, int, unsigned char,
+			unsigned char);
+
+extern int max8925_device_init(struct max8925_chip *,
+				struct max8925_platform_data *);
+extern void max8925_device_exit(struct max8925_chip *);
+#endif /* __LINUX_MFD_MAX8925_H */
+
-- 
cgit v1.2.3


From 1ad998934e9c6cbae91662a05e0cb8772b1f4f75 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Fri, 8 Jan 2010 12:43:29 -0500
Subject: mfd: Add subdevs in max8925

Add subdevs in MAX8925. MAX8925 includes regulator, backlight and touch
components.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig         |   1 +
 drivers/mfd/max8925-core.c  | 142 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/max8925.h |  96 ++++++++++++++++++++++++++++++
 3 files changed, 239 insertions(+)

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index ee416eefb8e9..d63ab2eec661 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -197,6 +197,7 @@ config PMIC_ADP5520
 config MFD_MAX8925
 	tristate "Maxim Semiconductor MAX8925 PMIC Support"
 	depends on I2C
+	select MFD_CORE
 	help
 	  Say yes here to support for Maxim Semiconductor MAX8925. This is
 	  a Power Management IC. This driver provies common support for
diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c
index 3e26267960b1..f36c494b80f1 100644
--- a/drivers/mfd/max8925-core.c
+++ b/drivers/mfd/max8925-core.c
@@ -20,6 +20,109 @@
 #define IRQ_MODE_STATUS		0
 #define IRQ_MODE_MASK		1
 
+static struct resource backlight_resources[] = {
+	{
+		.name	= "max8925-backlight",
+		.start	= MAX8925_WLED_MODE_CNTL,
+		.end	= MAX8925_WLED_CNTL,
+		.flags	= IORESOURCE_IO,
+	},
+};
+
+static struct mfd_cell backlight_devs[] = {
+	{
+		.name		= "max8925-backlight",
+		.num_resources	= 1,
+		.resources	= &backlight_resources[0],
+		.id		= -1,
+	},
+};
+
+static struct resource touch_resources[] = {
+	{
+		.name	= "max8925-tsc",
+		.start	= MAX8925_TSC_IRQ,
+		.end	= MAX8925_ADC_RES_END,
+		.flags	= IORESOURCE_IO,
+	},
+};
+
+static struct mfd_cell touch_devs[] = {
+	{
+		.name		= "max8925-touch",
+		.num_resources	= 1,
+		.resources	= &touch_resources[0],
+		.id		= -1,
+	},
+};
+
+#define MAX8925_REG_RESOURCE(_start, _end)	\
+{						\
+	.start	= MAX8925_##_start,		\
+	.end	= MAX8925_##_end,		\
+	.flags	= IORESOURCE_IO,		\
+}
+
+static struct resource regulator_resources[] = {
+	MAX8925_REG_RESOURCE(SDCTL1, SDCTL1),
+	MAX8925_REG_RESOURCE(SDCTL2, SDCTL2),
+	MAX8925_REG_RESOURCE(SDCTL3, SDCTL3),
+	MAX8925_REG_RESOURCE(LDOCTL1, LDOCTL1),
+	MAX8925_REG_RESOURCE(LDOCTL2, LDOCTL2),
+	MAX8925_REG_RESOURCE(LDOCTL3, LDOCTL3),
+	MAX8925_REG_RESOURCE(LDOCTL4, LDOCTL4),
+	MAX8925_REG_RESOURCE(LDOCTL5, LDOCTL5),
+	MAX8925_REG_RESOURCE(LDOCTL6, LDOCTL6),
+	MAX8925_REG_RESOURCE(LDOCTL7, LDOCTL7),
+	MAX8925_REG_RESOURCE(LDOCTL8, LDOCTL8),
+	MAX8925_REG_RESOURCE(LDOCTL9, LDOCTL9),
+	MAX8925_REG_RESOURCE(LDOCTL10, LDOCTL10),
+	MAX8925_REG_RESOURCE(LDOCTL11, LDOCTL11),
+	MAX8925_REG_RESOURCE(LDOCTL12, LDOCTL12),
+	MAX8925_REG_RESOURCE(LDOCTL13, LDOCTL13),
+	MAX8925_REG_RESOURCE(LDOCTL14, LDOCTL14),
+	MAX8925_REG_RESOURCE(LDOCTL15, LDOCTL15),
+	MAX8925_REG_RESOURCE(LDOCTL16, LDOCTL16),
+	MAX8925_REG_RESOURCE(LDOCTL17, LDOCTL17),
+	MAX8925_REG_RESOURCE(LDOCTL18, LDOCTL18),
+	MAX8925_REG_RESOURCE(LDOCTL19, LDOCTL19),
+	MAX8925_REG_RESOURCE(LDOCTL20, LDOCTL20),
+};
+
+#define MAX8925_REG_DEVS(_id)						\
+{									\
+	.name		= "max8925-regulator",				\
+	.num_resources	= 1,						\
+	.resources	= &regulator_resources[MAX8925_ID_##_id],	\
+	.id		= MAX8925_ID_##_id,				\
+}
+
+static struct mfd_cell regulator_devs[] = {
+	MAX8925_REG_DEVS(SD1),
+	MAX8925_REG_DEVS(SD2),
+	MAX8925_REG_DEVS(SD3),
+	MAX8925_REG_DEVS(LDO1),
+	MAX8925_REG_DEVS(LDO2),
+	MAX8925_REG_DEVS(LDO3),
+	MAX8925_REG_DEVS(LDO4),
+	MAX8925_REG_DEVS(LDO5),
+	MAX8925_REG_DEVS(LDO6),
+	MAX8925_REG_DEVS(LDO7),
+	MAX8925_REG_DEVS(LDO8),
+	MAX8925_REG_DEVS(LDO9),
+	MAX8925_REG_DEVS(LDO10),
+	MAX8925_REG_DEVS(LDO11),
+	MAX8925_REG_DEVS(LDO12),
+	MAX8925_REG_DEVS(LDO13),
+	MAX8925_REG_DEVS(LDO14),
+	MAX8925_REG_DEVS(LDO15),
+	MAX8925_REG_DEVS(LDO16),
+	MAX8925_REG_DEVS(LDO17),
+	MAX8925_REG_DEVS(LDO18),
+	MAX8925_REG_DEVS(LDO19),
+	MAX8925_REG_DEVS(LDO20),
+};
+
 static int __get_irq_offset(struct max8925_chip *chip, int irq, int mode,
 			    int *offset, int *bit)
 {
@@ -210,6 +313,30 @@ static int __devinit device_gpm_init(struct max8925_chip *chip,
 
 	/* enable hard-reset for ONKEY power-off */
 	max8925_set_bits(i2c, MAX8925_SYSENSEL, 0x80, 0x80);
+
+	ret = mfd_add_devices(chip->dev, 0, &regulator_devs[0],
+			      ARRAY_SIZE(regulator_devs),
+			      &regulator_resources[0], 0);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to add regulator subdev\n");
+		goto out_irq;
+	}
+
+	if (pdata && pdata->backlight) {
+		ret = mfd_add_devices(chip->dev, 0, &backlight_devs[0],
+				      ARRAY_SIZE(backlight_devs),
+				      &backlight_resources[0], 0);
+		if (ret < 0) {
+			dev_err(chip->dev, "Failed to add backlight subdev\n");
+			goto out_dev;
+		}
+	}
+	return 0;
+out_dev:
+	mfd_remove_devices(chip->dev);
+out_irq:
+	if (chip->chip_irq)
+		free_irq(chip->chip_irq, chip);
 out:
 	return ret;
 }
@@ -233,6 +360,20 @@ static int __devinit device_adc_init(struct max8925_chip *chip,
 		goto out;
 	}
 	chip->chip_irq = i2c->irq;
+
+	if (pdata && pdata->touch) {
+		ret = mfd_add_devices(chip->dev, 0, &touch_devs[0],
+				      ARRAY_SIZE(touch_devs),
+				      &touch_resources[0], 0);
+		if (ret < 0) {
+			dev_err(chip->dev, "Failed to add touch subdev\n");
+			goto out_irq;
+		}
+	}
+	return 0;
+out_irq:
+	if (chip->chip_irq)
+		free_irq(chip->chip_irq, chip);
 out:
 	return ret;
 }
@@ -255,6 +396,7 @@ void max8925_device_exit(struct max8925_chip *chip)
 {
 	if (chip->chip_irq >= 0)
 		free_irq(chip->chip_irq, chip);
+	mfd_remove_devices(chip->dev);
 }
 
 MODULE_DESCRIPTION("PMIC Driver for Maxim MAX8925");
diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index 2326246ddef2..b72dbe174d51 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h
@@ -14,6 +14,33 @@
 
 #include <linux/interrupt.h>
 
+/* Unified sub device IDs for MAX8925 */
+enum {
+	MAX8925_ID_SD1,
+	MAX8925_ID_SD2,
+	MAX8925_ID_SD3,
+	MAX8925_ID_LDO1,
+	MAX8925_ID_LDO2,
+	MAX8925_ID_LDO3,
+	MAX8925_ID_LDO4,
+	MAX8925_ID_LDO5,
+	MAX8925_ID_LDO6,
+	MAX8925_ID_LDO7,
+	MAX8925_ID_LDO8,
+	MAX8925_ID_LDO9,
+	MAX8925_ID_LDO10,
+	MAX8925_ID_LDO11,
+	MAX8925_ID_LDO12,
+	MAX8925_ID_LDO13,
+	MAX8925_ID_LDO14,
+	MAX8925_ID_LDO15,
+	MAX8925_ID_LDO16,
+	MAX8925_ID_LDO17,
+	MAX8925_ID_LDO18,
+	MAX8925_ID_LDO19,
+	MAX8925_ID_LDO20,
+};
+
 /* Charger registers */
 #define MAX8925_CHG_IRQ1		(0x7e)
 #define MAX8925_CHG_IRQ2		(0x7f)
@@ -32,12 +59,65 @@
 /* Touch registers */
 #define MAX8925_TSC_IRQ			(0x00)
 #define MAX8925_TSC_IRQ_MASK		(0x01)
+#define MAX8925_ADC_RES_END		(0x6f)
 
 /* RTC registers */
 #define MAX8925_RTC_STATUS		(0x1a)
 #define MAX8925_RTC_IRQ			(0x1c)
 #define MAX8925_RTC_IRQ_MASK		(0x1d)
 
+/* WLED registers */
+#define MAX8925_WLED_MODE_CNTL		(0x84)
+#define MAX8925_WLED_CNTL		(0x85)
+
+/* MAX8925 Registers */
+#define MAX8925_SDCTL1			(0x04)
+#define MAX8925_SDCTL2			(0x07)
+#define MAX8925_SDCTL3			(0x0A)
+#define MAX8925_SDV1			(0x06)
+#define MAX8925_SDV2			(0x09)
+#define MAX8925_SDV3			(0x0C)
+#define MAX8925_LDOCTL1			(0x18)
+#define MAX8925_LDOCTL2			(0x1C)
+#define MAX8925_LDOCTL3			(0x20)
+#define MAX8925_LDOCTL4			(0x24)
+#define MAX8925_LDOCTL5			(0x28)
+#define MAX8925_LDOCTL6			(0x2C)
+#define MAX8925_LDOCTL7			(0x30)
+#define MAX8925_LDOCTL8			(0x34)
+#define MAX8925_LDOCTL9			(0x38)
+#define MAX8925_LDOCTL10		(0x3C)
+#define MAX8925_LDOCTL11		(0x40)
+#define MAX8925_LDOCTL12		(0x44)
+#define MAX8925_LDOCTL13		(0x48)
+#define MAX8925_LDOCTL14		(0x4C)
+#define MAX8925_LDOCTL15		(0x50)
+#define MAX8925_LDOCTL16		(0x10)
+#define MAX8925_LDOCTL17		(0x14)
+#define MAX8925_LDOCTL18		(0x72)
+#define MAX8925_LDOCTL19		(0x5C)
+#define MAX8925_LDOCTL20		(0x9C)
+#define MAX8925_LDOVOUT1		(0x1A)
+#define MAX8925_LDOVOUT2		(0x1E)
+#define MAX8925_LDOVOUT3		(0x22)
+#define MAX8925_LDOVOUT4		(0x26)
+#define MAX8925_LDOVOUT5		(0x2A)
+#define MAX8925_LDOVOUT6		(0x2E)
+#define MAX8925_LDOVOUT7		(0x32)
+#define MAX8925_LDOVOUT8		(0x36)
+#define MAX8925_LDOVOUT9		(0x3A)
+#define MAX8925_LDOVOUT10		(0x3E)
+#define MAX8925_LDOVOUT11		(0x42)
+#define MAX8925_LDOVOUT12		(0x46)
+#define MAX8925_LDOVOUT13		(0x4A)
+#define MAX8925_LDOVOUT14		(0x4E)
+#define MAX8925_LDOVOUT15		(0x52)
+#define MAX8925_LDOVOUT16		(0x12)
+#define MAX8925_LDOVOUT17		(0x16)
+#define MAX8925_LDOVOUT18		(0x74)
+#define MAX8925_LDOVOUT19		(0x5E)
+#define MAX8925_LDOVOUT20		(0x9E)
+
 /* bit definitions */
 #define CHG_IRQ1_MASK			(0x07)
 #define CHG_IRQ2_MASK			(0xff)
@@ -83,6 +163,8 @@ enum {
 #define MAX8925_IRQ_TSC_STICK		(0)
 #define MAX8925_IRQ_TSC_NSTICK		(1)
 
+#define MAX8925_MAX_REGULATOR		(23)
+
 struct max8925_irq {
 	irq_handler_t		handler;
 	void			*data;
@@ -100,7 +182,21 @@ struct max8925_chip {
 	int			chip_irq;
 };
 
+struct max8925_backlight_pdata {
+	int	lxw_scl;	/* 0/1 -- 0.8Ohm/0.4Ohm */
+	int	lxw_freq;	/* 700KHz ~ 1400KHz */
+	int	dual_string;	/* 0/1 -- single/dual string */
+};
+
+struct max8925_touch_pdata {
+	unsigned int		flags;
+};
+
 struct max8925_platform_data {
+	struct max8925_backlight_pdata	*backlight;
+	struct max8925_touch_pdata	*touch;
+	struct regulator_init_data	*regulator[MAX8925_MAX_REGULATOR];
+
 	int	chip_id;
 	int	chip_irq;
 };
-- 
cgit v1.2.3


From 6048a3dd2371c58611ea0ab8b306f8f1469399ae Mon Sep 17 00:00:00 2001
From: Cory Maccarrone <darkstar6262@gmail.com>
Date: Tue, 19 Jan 2010 11:22:45 +0100
Subject: mfd: Add HTCPLD driver

This change introduces a driver for the HTC PLD chip found
on some smartphones, such as the HTC Wizard and HTC Herald.
It works through the I2C bus and acts as a GPIO extender.
Specifically:

 * it can have several sub-devices, each with its own I2C
   address
 * Each sub-device provides 8 output and 8 input pins
 * The chip attaches to one GPIO to signal when any of the
   input GPIOs change -- at which point all chips must be
   scanned for changes

This driver implements the GPIOs throught the kernel's
GPIO and IRQ framework.  This allows any GPIO-servicing
drivers to operate on htcpld pins, such as the gpio-keys
and gpio-leds drivers.

Signed-off-by: Cory Maccarrone <darkstar6262@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig      |   9 +
 drivers/mfd/Makefile     |   1 +
 drivers/mfd/htc-i2cpld.c | 710 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/htcpld.h   |  24 ++
 4 files changed, 744 insertions(+)
 create mode 100644 drivers/mfd/htc-i2cpld.c
 create mode 100644 include/linux/htcpld.h

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index d63ab2eec661..9bcd447e71e2 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -78,6 +78,15 @@ config HTC_PASIC3
 	  HTC Magician devices, respectively. Actual functionality is
 	  handled by the leds-pasic3 and ds1wm drivers.
 
+config HTC_I2CPLD
+	bool "HTC I2C PLD chip support"
+	depends on I2C=y
+	help
+	  If you say yes here you get support for the supposed CPLD
+	  found on omap850 HTC devices like the HTC Wizard and HTC Herald.
+	  This device provides input and output GPIOs through an I2C
+	  interface to one or more sub-chips.
+
 config UCB1400_CORE
 	tristate "Philips UCB1400 Core driver"
 	depends on AC97_BUS
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 261635700243..142d31202b14 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_MFD_SH_MOBILE_SDHI)		+= sh_mobile_sdhi.o
 
 obj-$(CONFIG_HTC_EGPIO)		+= htc-egpio.o
 obj-$(CONFIG_HTC_PASIC3)	+= htc-pasic3.o
+obj-$(CONFIG_HTC_I2CPLD)	+= htc-i2cpld.o
 
 obj-$(CONFIG_MFD_DM355EVM_MSP)	+= dm355evm_msp.o
 
diff --git a/drivers/mfd/htc-i2cpld.c b/drivers/mfd/htc-i2cpld.c
new file mode 100644
index 000000000000..37b9fdab4f36
--- /dev/null
+++ b/drivers/mfd/htc-i2cpld.c
@@ -0,0 +1,710 @@
+/*
+ *  htc-i2cpld.c
+ *  Chip driver for an unknown CPLD chip found on omap850 HTC devices like
+ *  the HTC Wizard and HTC Herald.
+ *  The cpld is located on the i2c bus and acts as an input/output GPIO
+ *  extender.
+ *
+ *  Copyright (C) 2009 Cory Maccarrone <darkstar6262@gmail.com>
+ *
+ *  Based on work done in the linwizard project
+ *  Copyright (C) 2008-2009 Angelo Arrifano <miknix@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/irq.h>
+#include <linux/spinlock.h>
+#include <linux/htcpld.h>
+#include <linux/gpio.h>
+
+struct htcpld_chip {
+	spinlock_t              lock;
+
+	/* chip info */
+	u8                      reset;
+	u8                      addr;
+	struct device           *dev;
+	struct i2c_client	*client;
+
+	/* Output details */
+	u8                      cache_out;
+	struct gpio_chip        chip_out;
+
+	/* Input details */
+	u8                      cache_in;
+	struct gpio_chip        chip_in;
+
+	u16                     irqs_enabled;
+	uint                    irq_start;
+	int                     nirqs;
+
+	/*
+	 * Work structure to allow for setting values outside of any
+	 * possible interrupt context
+	 */
+	struct work_struct set_val_work;
+};
+
+struct htcpld_data {
+	/* irq info */
+	u16                irqs_enabled;
+	uint               irq_start;
+	int                nirqs;
+	uint               chained_irq;
+	unsigned int       int_reset_gpio_hi;
+	unsigned int       int_reset_gpio_lo;
+
+	/* htcpld info */
+	struct htcpld_chip *chip;
+	unsigned int       nchips;
+};
+
+/* There does not appear to be a way to proactively mask interrupts
+ * on the htcpld chip itself.  So, we simply ignore interrupts that
+ * aren't desired. */
+static void htcpld_mask(unsigned int irq)
+{
+	struct htcpld_chip *chip = get_irq_chip_data(irq);
+	chip->irqs_enabled &= ~(1 << (irq - chip->irq_start));
+	pr_debug("HTCPLD mask %d %04x\n", irq, chip->irqs_enabled);
+}
+static void htcpld_unmask(unsigned int irq)
+{
+	struct htcpld_chip *chip = get_irq_chip_data(irq);
+	chip->irqs_enabled |= 1 << (irq - chip->irq_start);
+	pr_debug("HTCPLD unmask %d %04x\n", irq, chip->irqs_enabled);
+}
+
+static int htcpld_set_type(unsigned int irq, unsigned int flags)
+{
+	struct irq_desc *d = irq_to_desc(irq);
+
+	if (!d) {
+		pr_err("HTCPLD invalid IRQ: %d\n", irq);
+		return -EINVAL;
+	}
+
+	if (flags & ~IRQ_TYPE_SENSE_MASK)
+		return -EINVAL;
+
+	/* We only allow edge triggering */
+	if (flags & (IRQ_TYPE_LEVEL_LOW|IRQ_TYPE_LEVEL_HIGH))
+		return -EINVAL;
+
+	d->status &= ~IRQ_TYPE_SENSE_MASK;
+	d->status |= flags;
+
+	return 0;
+}
+
+static struct irq_chip htcpld_muxed_chip = {
+	.name     = "htcpld",
+	.mask     = htcpld_mask,
+	.unmask   = htcpld_unmask,
+	.set_type = htcpld_set_type,
+};
+
+/* To properly dispatch IRQ events, we need to read from the
+ * chip.  This is an I2C action that could possibly sleep
+ * (which is bad in interrupt context) -- so we use a threaded
+ * interrupt handler to get around that.
+ */
+static irqreturn_t htcpld_handler(int irq, void *dev)
+{
+	struct htcpld_data *htcpld = dev;
+	unsigned int i;
+	unsigned long flags;
+	int irqpin;
+	struct irq_desc *desc;
+
+	if (!htcpld) {
+		pr_debug("htcpld is null in ISR\n");
+		return IRQ_HANDLED;
+	}
+
+	/*
+	 * For each chip, do a read of the chip and trigger any interrupts
+	 * desired.  The interrupts will be triggered from LSB to MSB (i.e.
+	 * bit 0 first, then bit 1, etc.)
+	 *
+	 * For chips that have no interrupt range specified, just skip 'em.
+	 */
+	for (i = 0; i < htcpld->nchips; i++) {
+		struct htcpld_chip *chip = &htcpld->chip[i];
+		struct i2c_client *client;
+		int val;
+		unsigned long uval, old_val;
+
+		if (!chip) {
+			pr_debug("chip %d is null in ISR\n", i);
+			continue;
+		}
+
+		if (chip->nirqs == 0)
+			continue;
+
+		client = chip->client;
+		if (!client) {
+			pr_debug("client %d is null in ISR\n", i);
+			continue;
+		}
+
+		/* Scan the chip */
+		val = i2c_smbus_read_byte_data(client, chip->cache_out);
+		if (val < 0) {
+			/* Throw a warning and skip this chip */
+			dev_warn(chip->dev, "Unable to read from chip: %d\n",
+				 val);
+			continue;
+		}
+
+		uval = (unsigned long)val;
+
+		spin_lock_irqsave(&chip->lock, flags);
+
+		/* Save away the old value so we can compare it */
+		old_val = chip->cache_in;
+
+		/* Write the new value */
+		chip->cache_in = uval;
+
+		spin_unlock_irqrestore(&chip->lock, flags);
+
+		/*
+		 * For each bit in the data (starting at bit 0), trigger
+		 * associated interrupts.
+		 */
+		for (irqpin = 0; irqpin < chip->nirqs; irqpin++) {
+			unsigned oldb, newb;
+			int flags;
+
+			irq = chip->irq_start + irqpin;
+			desc = irq_to_desc(irq);
+			flags = desc->status;
+
+			/* Run the IRQ handler, but only if the bit value
+			 * changed, and the proper flags are set */
+			oldb = (old_val >> irqpin) & 1;
+			newb = (uval >> irqpin) & 1;
+
+			if ((!oldb && newb && (flags & IRQ_TYPE_EDGE_RISING)) ||
+			    (oldb && !newb &&
+			     (flags & IRQ_TYPE_EDGE_FALLING))) {
+				pr_debug("fire IRQ %d\n", irqpin);
+				desc->handle_irq(irq, desc);
+			}
+		}
+	}
+
+	/*
+	 * In order to continue receiving interrupts, the int_reset_gpio must
+	 * be asserted.
+	 */
+	if (htcpld->int_reset_gpio_hi)
+		gpio_set_value(htcpld->int_reset_gpio_hi, 1);
+	if (htcpld->int_reset_gpio_lo)
+		gpio_set_value(htcpld->int_reset_gpio_lo, 0);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * The GPIO set routines can be called from interrupt context, especially if,
+ * for example they're attached to the led-gpio framework and a trigger is
+ * enabled.  As such, we declared work above in the htcpld_chip structure,
+ * and that work is scheduled in the set routine.  The kernel can then run
+ * the I2C functions, which will sleep, in process context.
+ */
+void htcpld_chip_set(struct gpio_chip *chip, unsigned offset, int val)
+{
+	struct i2c_client *client;
+	struct htcpld_chip *chip_data;
+	unsigned long flags;
+
+	chip_data = container_of(chip, struct htcpld_chip, chip_out);
+	if (!chip_data)
+		return;
+
+	client = chip_data->client;
+	if (client == NULL)
+		return;
+
+	spin_lock_irqsave(&chip_data->lock, flags);
+	if (val)
+		chip_data->cache_out |= (1 << offset);
+	else
+		chip_data->cache_out &= ~(1 << offset);
+	spin_unlock_irqrestore(&chip_data->lock, flags);
+
+	schedule_work(&(chip_data->set_val_work));
+}
+
+void htcpld_chip_set_ni(struct work_struct *work)
+{
+	struct htcpld_chip *chip_data;
+	struct i2c_client *client;
+
+	chip_data = container_of(work, struct htcpld_chip, set_val_work);
+	client = chip_data->client;
+	i2c_smbus_read_byte_data(client, chip_data->cache_out);
+}
+
+int htcpld_chip_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct htcpld_chip *chip_data;
+	int val = 0;
+	int is_input = 0;
+
+	/* Try out first */
+	chip_data = container_of(chip, struct htcpld_chip, chip_out);
+	if (!chip_data) {
+		/* Try in */
+		is_input = 1;
+		chip_data = container_of(chip, struct htcpld_chip, chip_in);
+		if (!chip_data)
+			return -EINVAL;
+	}
+
+	/* Determine if this is an input or output GPIO */
+	if (!is_input)
+		/* Use the output cache */
+		val = (chip_data->cache_out >> offset) & 1;
+	else
+		/* Use the input cache */
+		val = (chip_data->cache_in >> offset) & 1;
+
+	if (val)
+		return 1;
+	else
+		return 0;
+}
+
+static int htcpld_direction_output(struct gpio_chip *chip,
+					unsigned offset, int value)
+{
+	htcpld_chip_set(chip, offset, value);
+	return 0;
+}
+
+static int htcpld_direction_input(struct gpio_chip *chip,
+					unsigned offset)
+{
+	/*
+	 * No-op: this function can only be called on the input chip.
+	 * We do however make sure the offset is within range.
+	 */
+	return (offset < chip->ngpio) ? 0 : -EINVAL;
+}
+
+int htcpld_chip_to_irq(struct gpio_chip *chip, unsigned offset)
+{
+	struct htcpld_chip *chip_data;
+
+	chip_data = container_of(chip, struct htcpld_chip, chip_in);
+
+	if (offset < chip_data->nirqs)
+		return chip_data->irq_start + offset;
+	else
+		return -EINVAL;
+}
+
+void htcpld_chip_reset(struct i2c_client *client)
+{
+	struct htcpld_chip *chip_data = i2c_get_clientdata(client);
+	if (!chip_data)
+		return;
+
+	i2c_smbus_read_byte_data(
+		client, (chip_data->cache_out = chip_data->reset));
+}
+
+static int __devinit htcpld_setup_chip_irq(
+		struct platform_device *pdev,
+		int chip_index)
+{
+	struct htcpld_data *htcpld;
+	struct device *dev = &pdev->dev;
+	struct htcpld_core_platform_data *pdata;
+	struct htcpld_chip *chip;
+	struct htcpld_chip_platform_data *plat_chip_data;
+	unsigned int irq, irq_end;
+	int ret = 0;
+
+	/* Get the platform and driver data */
+	pdata = dev->platform_data;
+	htcpld = platform_get_drvdata(pdev);
+	chip = &htcpld->chip[chip_index];
+	plat_chip_data = &pdata->chip[chip_index];
+
+	/* Setup irq handlers */
+	irq_end = chip->irq_start + chip->nirqs;
+	for (irq = chip->irq_start; irq < irq_end; irq++) {
+		set_irq_chip(irq, &htcpld_muxed_chip);
+		set_irq_chip_data(irq, chip);
+		set_irq_handler(irq, handle_simple_irq);
+#ifdef CONFIG_ARM
+		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+#else
+		set_irq_probe(irq);
+#endif
+	}
+
+	return ret;
+}
+
+static int __devinit htcpld_register_chip_i2c(
+		struct platform_device *pdev,
+		int chip_index)
+{
+	struct htcpld_data *htcpld;
+	struct device *dev = &pdev->dev;
+	struct htcpld_core_platform_data *pdata;
+	struct htcpld_chip *chip;
+	struct htcpld_chip_platform_data *plat_chip_data;
+	struct i2c_adapter *adapter;
+	struct i2c_client *client;
+	struct i2c_board_info info;
+
+	/* Get the platform and driver data */
+	pdata = dev->platform_data;
+	htcpld = platform_get_drvdata(pdev);
+	chip = &htcpld->chip[chip_index];
+	plat_chip_data = &pdata->chip[chip_index];
+
+	adapter = i2c_get_adapter(pdata->i2c_adapter_id);
+	if (adapter == NULL) {
+		/* Eek, no such I2C adapter!  Bail out. */
+		dev_warn(dev, "Chip at i2c address 0x%x: Invalid i2c adapter %d\n",
+			 plat_chip_data->addr, pdata->i2c_adapter_id);
+		return -ENODEV;
+	}
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA)) {
+		dev_warn(dev, "i2c adapter %d non-functional\n",
+			 pdata->i2c_adapter_id);
+		return -EINVAL;
+	}
+
+	memset(&info, 0, sizeof(struct i2c_board_info));
+	info.addr = plat_chip_data->addr;
+	strlcpy(info.type, "htcpld-chip", I2C_NAME_SIZE);
+	info.platform_data = chip;
+
+	/* Add the I2C device.  This calls the probe() function. */
+	client = i2c_new_device(adapter, &info);
+	if (!client) {
+		/* I2C device registration failed, contineu with the next */
+		dev_warn(dev, "Unable to add I2C device for 0x%x\n",
+			 plat_chip_data->addr);
+		return -ENODEV;
+	}
+
+	i2c_set_clientdata(client, chip);
+	snprintf(client->name, I2C_NAME_SIZE, "Chip_0x%d", client->addr);
+	chip->client = client;
+
+	/* Reset the chip */
+	htcpld_chip_reset(client);
+	chip->cache_in = i2c_smbus_read_byte_data(client, chip->cache_out);
+
+	return 0;
+}
+
+static void __devinit htcpld_unregister_chip_i2c(
+		struct platform_device *pdev,
+		int chip_index)
+{
+	struct htcpld_data *htcpld;
+	struct htcpld_chip *chip;
+
+	/* Get the platform and driver data */
+	htcpld = platform_get_drvdata(pdev);
+	chip = &htcpld->chip[chip_index];
+
+	if (chip->client)
+		i2c_unregister_device(chip->client);
+}
+
+static int __devinit htcpld_register_chip_gpio(
+		struct platform_device *pdev,
+		int chip_index)
+{
+	struct htcpld_data *htcpld;
+	struct device *dev = &pdev->dev;
+	struct htcpld_core_platform_data *pdata;
+	struct htcpld_chip *chip;
+	struct htcpld_chip_platform_data *plat_chip_data;
+	struct gpio_chip *gpio_chip;
+	int ret = 0;
+
+	/* Get the platform and driver data */
+	pdata = dev->platform_data;
+	htcpld = platform_get_drvdata(pdev);
+	chip = &htcpld->chip[chip_index];
+	plat_chip_data = &pdata->chip[chip_index];
+
+	/* Setup the GPIO chips */
+	gpio_chip = &(chip->chip_out);
+	gpio_chip->label           = "htcpld-out";
+	gpio_chip->dev             = dev;
+	gpio_chip->owner           = THIS_MODULE;
+	gpio_chip->get             = htcpld_chip_get;
+	gpio_chip->set             = htcpld_chip_set;
+	gpio_chip->direction_input = NULL;
+	gpio_chip->direction_output = htcpld_direction_output;
+	gpio_chip->base            = plat_chip_data->gpio_out_base;
+	gpio_chip->ngpio           = plat_chip_data->num_gpios;
+
+	gpio_chip = &(chip->chip_in);
+	gpio_chip->label           = "htcpld-in";
+	gpio_chip->dev             = dev;
+	gpio_chip->owner           = THIS_MODULE;
+	gpio_chip->get             = htcpld_chip_get;
+	gpio_chip->set             = NULL;
+	gpio_chip->direction_input = htcpld_direction_input;
+	gpio_chip->direction_output = NULL;
+	gpio_chip->to_irq          = htcpld_chip_to_irq;
+	gpio_chip->base            = plat_chip_data->gpio_in_base;
+	gpio_chip->ngpio           = plat_chip_data->num_gpios;
+
+	/* Add the GPIO chips */
+	ret = gpiochip_add(&(chip->chip_out));
+	if (ret) {
+		dev_warn(dev, "Unable to register output GPIOs for 0x%x: %d\n",
+			 plat_chip_data->addr, ret);
+		return ret;
+	}
+
+	ret = gpiochip_add(&(chip->chip_in));
+	if (ret) {
+		int error;
+
+		dev_warn(dev, "Unable to register input GPIOs for 0x%x: %d\n",
+			 plat_chip_data->addr, ret);
+
+		error = gpiochip_remove(&(chip->chip_out));
+		if (error)
+			dev_warn(dev, "Error while trying to unregister gpio chip: %d\n", error);
+
+		return ret;
+	}
+
+	return 0;
+}
+
+static int __devinit htcpld_setup_chips(struct platform_device *pdev)
+{
+	struct htcpld_data *htcpld;
+	struct device *dev = &pdev->dev;
+	struct htcpld_core_platform_data *pdata;
+	int i;
+
+	/* Get the platform and driver data */
+	pdata = dev->platform_data;
+	htcpld = platform_get_drvdata(pdev);
+
+	/* Setup each chip's output GPIOs */
+	htcpld->nchips = pdata->num_chip;
+	htcpld->chip = kzalloc(sizeof(struct htcpld_chip) * htcpld->nchips,
+			       GFP_KERNEL);
+	if (!htcpld->chip) {
+		dev_warn(dev, "Unable to allocate memory for chips\n");
+		return -ENOMEM;
+	}
+
+	/* Add the chips as best we can */
+	for (i = 0; i < htcpld->nchips; i++) {
+		int ret;
+
+		/* Setup the HTCPLD chips */
+		htcpld->chip[i].reset = pdata->chip[i].reset;
+		htcpld->chip[i].cache_out = pdata->chip[i].reset;
+		htcpld->chip[i].cache_in = 0;
+		htcpld->chip[i].dev = dev;
+		htcpld->chip[i].irq_start = pdata->chip[i].irq_base;
+		htcpld->chip[i].nirqs = pdata->chip[i].num_irqs;
+
+		INIT_WORK(&(htcpld->chip[i].set_val_work), &htcpld_chip_set_ni);
+		spin_lock_init(&(htcpld->chip[i].lock));
+
+		/* Setup the interrupts for the chip */
+		if (htcpld->chained_irq) {
+			ret = htcpld_setup_chip_irq(pdev, i);
+			if (ret)
+				continue;
+		}
+
+		/* Register the chip with I2C */
+		ret = htcpld_register_chip_i2c(pdev, i);
+		if (ret)
+			continue;
+
+
+		/* Register the chips with the GPIO subsystem */
+		ret = htcpld_register_chip_gpio(pdev, i);
+		if (ret) {
+			/* Unregister the chip from i2c and continue */
+			htcpld_unregister_chip_i2c(pdev, i);
+			continue;
+		}
+
+		dev_info(dev, "Registered chip at 0x%x\n", pdata->chip[i].addr);
+	}
+
+	return 0;
+}
+
+static int __devinit htcpld_core_probe(struct platform_device *pdev)
+{
+	struct htcpld_data *htcpld;
+	struct device *dev = &pdev->dev;
+	struct htcpld_core_platform_data *pdata;
+	struct resource *res;
+	int ret = 0;
+
+	if (!dev)
+		return -ENODEV;
+
+	pdata = dev->platform_data;
+	if (!pdata) {
+		dev_warn(dev, "Platform data not found for htcpld core!\n");
+		return -ENXIO;
+	}
+
+	htcpld = kzalloc(sizeof(struct htcpld_data), GFP_KERNEL);
+	if (!htcpld)
+		return -ENOMEM;
+
+	/* Find chained irq */
+	ret = -EINVAL;
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (res) {
+		int flags;
+		htcpld->chained_irq = res->start;
+
+		/* Setup the chained interrupt handler */
+		flags = IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING;
+		ret = request_threaded_irq(htcpld->chained_irq,
+					   NULL, htcpld_handler,
+					   flags, pdev->name, htcpld);
+		if (ret) {
+			dev_warn(dev, "Unable to setup chained irq handler: %d\n", ret);
+			goto fail;
+		} else
+			device_init_wakeup(dev, 0);
+	}
+
+	/* Set the driver data */
+	platform_set_drvdata(pdev, htcpld);
+
+	/* Setup the htcpld chips */
+	ret = htcpld_setup_chips(pdev);
+	if (ret)
+		goto fail;
+
+	/* Request the GPIO(s) for the int reset and set them up */
+	if (pdata->int_reset_gpio_hi) {
+		ret = gpio_request(pdata->int_reset_gpio_hi, "htcpld-core");
+		if (ret) {
+			/*
+			 * If it failed, that sucks, but we can probably
+			 * continue on without it.
+			 */
+			dev_warn(dev, "Unable to request int_reset_gpio_hi -- interrupts may not work\n");
+			htcpld->int_reset_gpio_hi = 0;
+		} else {
+			htcpld->int_reset_gpio_hi = pdata->int_reset_gpio_hi;
+			gpio_set_value(htcpld->int_reset_gpio_hi, 1);
+		}
+	}
+
+	if (pdata->int_reset_gpio_lo) {
+		ret = gpio_request(pdata->int_reset_gpio_lo, "htcpld-core");
+		if (ret) {
+			/*
+			 * If it failed, that sucks, but we can probably
+			 * continue on without it.
+			 */
+			dev_warn(dev, "Unable to request int_reset_gpio_lo -- interrupts may not work\n");
+			htcpld->int_reset_gpio_lo = 0;
+		} else {
+			htcpld->int_reset_gpio_lo = pdata->int_reset_gpio_lo;
+			gpio_set_value(htcpld->int_reset_gpio_lo, 0);
+		}
+	}
+
+	dev_info(dev, "Initialized successfully\n");
+	return 0;
+
+fail:
+	kfree(htcpld);
+	return ret;
+}
+
+/* The I2C Driver -- used internally */
+static const struct i2c_device_id htcpld_chip_id[] = {
+	{ "htcpld-chip", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, htcpld_chip_id);
+
+
+static struct i2c_driver htcpld_chip_driver = {
+	.driver = {
+		.name	= "htcpld-chip",
+	},
+	.id_table = htcpld_chip_id,
+};
+
+/* The Core Driver */
+static struct platform_driver htcpld_core_driver = {
+	.driver = {
+		.name = "i2c-htcpld",
+	},
+};
+
+static int __init htcpld_core_init(void)
+{
+	int ret;
+
+	/* Register the I2C Chip driver */
+	ret = i2c_add_driver(&htcpld_chip_driver);
+	if (ret)
+		return ret;
+
+	/* Probe for our chips */
+	return platform_driver_probe(&htcpld_core_driver, htcpld_core_probe);
+}
+
+static void __exit htcpld_core_exit(void)
+{
+	i2c_del_driver(&htcpld_chip_driver);
+	platform_driver_unregister(&htcpld_core_driver);
+}
+
+module_init(htcpld_core_init);
+module_exit(htcpld_core_exit);
+
+MODULE_AUTHOR("Cory Maccarrone <darkstar6262@gmail.com>");
+MODULE_DESCRIPTION("I2C HTC PLD Driver");
+MODULE_LICENSE("GPL");
+
diff --git a/include/linux/htcpld.h b/include/linux/htcpld.h
new file mode 100644
index 000000000000..ab3f6cb4dddc
--- /dev/null
+++ b/include/linux/htcpld.h
@@ -0,0 +1,24 @@
+#ifndef __LINUX_HTCPLD_H
+#define __LINUX_HTCPLD_H
+
+struct htcpld_chip_platform_data {
+	unsigned int addr;
+	unsigned int reset;
+	unsigned int num_gpios;
+	unsigned int gpio_out_base;
+	unsigned int gpio_in_base;
+	unsigned int irq_base;
+	unsigned int num_irqs;
+};
+
+struct htcpld_core_platform_data {
+	unsigned int                      int_reset_gpio_hi;
+	unsigned int                      int_reset_gpio_lo;
+	unsigned int                      i2c_adapter_id;
+
+	struct htcpld_chip_platform_data  *chip;
+	unsigned int                      num_chip;
+};
+
+#endif /* __LINUX_HTCPLD_H */
+
-- 
cgit v1.2.3


From f7ea2dc59ed46dcd0f1cfaccda02211f4507207b Mon Sep 17 00:00:00 2001
From: Christoph Egger <siccegge@stud.informatik.uni-erlangen.de>
Date: Fri, 15 Jan 2010 15:33:46 +0100
Subject: mfd: Remove leftover from discontinued TWL4030 battery patch

The TWL4030_BCI_BATTERY config option originates from a patch to the
omap git tree. However inclusion in linux was seemingly rejected and
the functionality nears inclusion under a different name so this
removes the bits of the old version that made it into the mainline
kernel again.

Signed-off-by: Christoph Egger <siccegge@stud.informatik.uni-erlangen.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl-core.c  | 19 -------------------
 include/linux/i2c/twl.h |  7 +------
 2 files changed, 1 insertion(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 19a930d06241..d81003f4867e 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -58,13 +58,6 @@
 
 #define DRIVER_NAME			"twl"
 
-#if defined(CONFIG_TWL4030_BCI_BATTERY) || \
-	defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
-#define twl_has_bci()		true
-#else
-#define twl_has_bci()		false
-#endif
-
 #if defined(CONFIG_KEYBOARD_TWL4030) || defined(CONFIG_KEYBOARD_TWL4030_MODULE)
 #define twl_has_keypad()	true
 #else
@@ -588,18 +581,6 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 	struct device	*child;
 	unsigned sub_chip_id;
 
-	if (twl_has_bci() && pdata->bci &&
-	    !(features & (TPS_SUBSET | TWL5031))) {
-		child = add_child(3, "twl4030_bci",
-				pdata->bci, sizeof(*pdata->bci),
-				false,
-				/* irq0 = CHG_PRES, irq1 = BCI */
-				pdata->irq_base + BCI_PRES_INTR_OFFSET,
-				pdata->irq_base + BCI_INTR_OFFSET);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
-	}
-
 	if (twl_has_gpio() && pdata->gpio) {
 		child = add_child(SUB_CHIP_ID1, "twl4030_gpio",
 				pdata->gpio, sizeof(*pdata->gpio),
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 7897f3096560..9733e9e53f2b 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -605,12 +605,7 @@ int twl4030_sih_setup(int module);
 #define TWL4030_VAUX3_DEV_GRP		0x1F
 #define TWL4030_VAUX3_DEDICATED		0x22
 
-#if defined(CONFIG_TWL4030_BCI_BATTERY) || \
-	defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
-	extern int twl4030charger_usb_en(int enable);
-#else
-	static inline int twl4030charger_usb_en(int enable) { return 0; }
-#endif
+static inline int twl4030charger_usb_en(int enable) { return 0; }
 
 /*----------------------------------------------------------------------*/
 
-- 
cgit v1.2.3


From 1c4d3b70a40c666331052adf77933e6994590b74 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 29 Jan 2010 18:20:28 +0000
Subject: mfd: Add WM8994 register definitions

As a separate patch due to the large size.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8994/registers.h | 4292 ++++++++++++++++++++++++++++++++++
 1 file changed, 4292 insertions(+)
 create mode 100644 include/linux/mfd/wm8994/registers.h

(limited to 'include')

diff --git a/include/linux/mfd/wm8994/registers.h b/include/linux/mfd/wm8994/registers.h
new file mode 100644
index 000000000000..967f62f54159
--- /dev/null
+++ b/include/linux/mfd/wm8994/registers.h
@@ -0,0 +1,4292 @@
+/*
+ * include/linux/mfd/wm8994/registers.h -- Register definitions for WM8994
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM8994_REGISTERS_H__
+#define __MFD_WM8994_REGISTERS_H__
+
+/*
+ * Register values.
+ */
+#define WM8994_SOFTWARE_RESET                   0x00
+#define WM8994_POWER_MANAGEMENT_1               0x01
+#define WM8994_POWER_MANAGEMENT_2               0x02
+#define WM8994_POWER_MANAGEMENT_3               0x03
+#define WM8994_POWER_MANAGEMENT_4               0x04
+#define WM8994_POWER_MANAGEMENT_5               0x05
+#define WM8994_POWER_MANAGEMENT_6               0x06
+#define WM8994_INPUT_MIXER_1                    0x15
+#define WM8994_LEFT_LINE_INPUT_1_2_VOLUME       0x18
+#define WM8994_LEFT_LINE_INPUT_3_4_VOLUME       0x19
+#define WM8994_RIGHT_LINE_INPUT_1_2_VOLUME      0x1A
+#define WM8994_RIGHT_LINE_INPUT_3_4_VOLUME      0x1B
+#define WM8994_LEFT_OUTPUT_VOLUME               0x1C
+#define WM8994_RIGHT_OUTPUT_VOLUME              0x1D
+#define WM8994_LINE_OUTPUTS_VOLUME              0x1E
+#define WM8994_HPOUT2_VOLUME                    0x1F
+#define WM8994_LEFT_OPGA_VOLUME                 0x20
+#define WM8994_RIGHT_OPGA_VOLUME                0x21
+#define WM8994_SPKMIXL_ATTENUATION              0x22
+#define WM8994_SPKMIXR_ATTENUATION              0x23
+#define WM8994_SPKOUT_MIXERS                    0x24
+#define WM8994_CLASSD                           0x25
+#define WM8994_SPEAKER_VOLUME_LEFT              0x26
+#define WM8994_SPEAKER_VOLUME_RIGHT             0x27
+#define WM8994_INPUT_MIXER_2                    0x28
+#define WM8994_INPUT_MIXER_3                    0x29
+#define WM8994_INPUT_MIXER_4                    0x2A
+#define WM8994_INPUT_MIXER_5                    0x2B
+#define WM8994_INPUT_MIXER_6                    0x2C
+#define WM8994_OUTPUT_MIXER_1                   0x2D
+#define WM8994_OUTPUT_MIXER_2                   0x2E
+#define WM8994_OUTPUT_MIXER_3                   0x2F
+#define WM8994_OUTPUT_MIXER_4                   0x30
+#define WM8994_OUTPUT_MIXER_5                   0x31
+#define WM8994_OUTPUT_MIXER_6                   0x32
+#define WM8994_HPOUT2_MIXER                     0x33
+#define WM8994_LINE_MIXER_1                     0x34
+#define WM8994_LINE_MIXER_2                     0x35
+#define WM8994_SPEAKER_MIXER                    0x36
+#define WM8994_ADDITIONAL_CONTROL               0x37
+#define WM8994_ANTIPOP_1                        0x38
+#define WM8994_ANTIPOP_2                        0x39
+#define WM8994_MICBIAS                          0x3A
+#define WM8994_LDO_1                            0x3B
+#define WM8994_LDO_2                            0x3C
+#define WM8994_CHARGE_PUMP_1                    0x4C
+#define WM8994_CLASS_W_1                        0x51
+#define WM8994_DC_SERVO_1                       0x54
+#define WM8994_DC_SERVO_2                       0x55
+#define WM8994_DC_SERVO_4                       0x57
+#define WM8994_DC_SERVO_READBACK                0x58
+#define WM8994_ANALOGUE_HP_1                    0x60
+#define WM8994_CHIP_REVISION                    0x100
+#define WM8994_CONTROL_INTERFACE                0x101
+#define WM8994_WRITE_SEQUENCER_CTRL_1           0x110
+#define WM8994_WRITE_SEQUENCER_CTRL_2           0x111
+#define WM8994_AIF1_CLOCKING_1                  0x200
+#define WM8994_AIF1_CLOCKING_2                  0x201
+#define WM8994_AIF2_CLOCKING_1                  0x204
+#define WM8994_AIF2_CLOCKING_2                  0x205
+#define WM8994_CLOCKING_1                       0x208
+#define WM8994_CLOCKING_2                       0x209
+#define WM8994_AIF1_RATE                        0x210
+#define WM8994_AIF2_RATE                        0x211
+#define WM8994_RATE_STATUS                      0x212
+#define WM8994_FLL1_CONTROL_1                   0x220
+#define WM8994_FLL1_CONTROL_2                   0x221
+#define WM8994_FLL1_CONTROL_3                   0x222
+#define WM8994_FLL1_CONTROL_4                   0x223
+#define WM8994_FLL1_CONTROL_5                   0x224
+#define WM8994_FLL2_CONTROL_1                   0x240
+#define WM8994_FLL2_CONTROL_2                   0x241
+#define WM8994_FLL2_CONTROL_3                   0x242
+#define WM8994_FLL2_CONTROL_4                   0x243
+#define WM8994_FLL2_CONTROL_5                   0x244
+#define WM8994_AIF1_CONTROL_1                   0x300
+#define WM8994_AIF1_CONTROL_2                   0x301
+#define WM8994_AIF1_MASTER_SLAVE                0x302
+#define WM8994_AIF1_BCLK                        0x303
+#define WM8994_AIF1ADC_LRCLK                    0x304
+#define WM8994_AIF1DAC_LRCLK                    0x305
+#define WM8994_AIF1DAC_DATA                     0x306
+#define WM8994_AIF1ADC_DATA                     0x307
+#define WM8994_AIF2_CONTROL_1                   0x310
+#define WM8994_AIF2_CONTROL_2                   0x311
+#define WM8994_AIF2_MASTER_SLAVE                0x312
+#define WM8994_AIF2_BCLK                        0x313
+#define WM8994_AIF2ADC_LRCLK                    0x314
+#define WM8994_AIF2DAC_LRCLK                    0x315
+#define WM8994_AIF2DAC_DATA                     0x316
+#define WM8994_AIF2ADC_DATA                     0x317
+#define WM8994_AIF1_ADC1_LEFT_VOLUME            0x400
+#define WM8994_AIF1_ADC1_RIGHT_VOLUME           0x401
+#define WM8994_AIF1_DAC1_LEFT_VOLUME            0x402
+#define WM8994_AIF1_DAC1_RIGHT_VOLUME           0x403
+#define WM8994_AIF1_ADC2_LEFT_VOLUME            0x404
+#define WM8994_AIF1_ADC2_RIGHT_VOLUME           0x405
+#define WM8994_AIF1_DAC2_LEFT_VOLUME            0x406
+#define WM8994_AIF1_DAC2_RIGHT_VOLUME           0x407
+#define WM8994_AIF1_ADC1_FILTERS                0x410
+#define WM8994_AIF1_ADC2_FILTERS                0x411
+#define WM8994_AIF1_DAC1_FILTERS_1              0x420
+#define WM8994_AIF1_DAC1_FILTERS_2              0x421
+#define WM8994_AIF1_DAC2_FILTERS_1              0x422
+#define WM8994_AIF1_DAC2_FILTERS_2              0x423
+#define WM8994_AIF1_DRC1_1                      0x440
+#define WM8994_AIF1_DRC1_2                      0x441
+#define WM8994_AIF1_DRC1_3                      0x442
+#define WM8994_AIF1_DRC1_4                      0x443
+#define WM8994_AIF1_DRC1_5                      0x444
+#define WM8994_AIF1_DRC2_1                      0x450
+#define WM8994_AIF1_DRC2_2                      0x451
+#define WM8994_AIF1_DRC2_3                      0x452
+#define WM8994_AIF1_DRC2_4                      0x453
+#define WM8994_AIF1_DRC2_5                      0x454
+#define WM8994_AIF1_DAC1_EQ_GAINS_1             0x480
+#define WM8994_AIF1_DAC1_EQ_GAINS_2             0x481
+#define WM8994_AIF1_DAC1_EQ_BAND_1_A            0x482
+#define WM8994_AIF1_DAC1_EQ_BAND_1_B            0x483
+#define WM8994_AIF1_DAC1_EQ_BAND_1_PG           0x484
+#define WM8994_AIF1_DAC1_EQ_BAND_2_A            0x485
+#define WM8994_AIF1_DAC1_EQ_BAND_2_B            0x486
+#define WM8994_AIF1_DAC1_EQ_BAND_2_C            0x487
+#define WM8994_AIF1_DAC1_EQ_BAND_2_PG           0x488
+#define WM8994_AIF1_DAC1_EQ_BAND_3_A            0x489
+#define WM8994_AIF1_DAC1_EQ_BAND_3_B            0x48A
+#define WM8994_AIF1_DAC1_EQ_BAND_3_C            0x48B
+#define WM8994_AIF1_DAC1_EQ_BAND_3_PG           0x48C
+#define WM8994_AIF1_DAC1_EQ_BAND_4_A            0x48D
+#define WM8994_AIF1_DAC1_EQ_BAND_4_B            0x48E
+#define WM8994_AIF1_DAC1_EQ_BAND_4_C            0x48F
+#define WM8994_AIF1_DAC1_EQ_BAND_4_PG           0x490
+#define WM8994_AIF1_DAC1_EQ_BAND_5_A            0x491
+#define WM8994_AIF1_DAC1_EQ_BAND_5_B            0x492
+#define WM8994_AIF1_DAC1_EQ_BAND_5_PG           0x493
+#define WM8994_AIF1_DAC2_EQ_GAINS_1             0x4A0
+#define WM8994_AIF1_DAC2_EQ_GAINS_2             0x4A1
+#define WM8994_AIF1_DAC2_EQ_BAND_1_A            0x4A2
+#define WM8994_AIF1_DAC2_EQ_BAND_1_B            0x4A3
+#define WM8994_AIF1_DAC2_EQ_BAND_1_PG           0x4A4
+#define WM8994_AIF1_DAC2_EQ_BAND_2_A            0x4A5
+#define WM8994_AIF1_DAC2_EQ_BAND_2_B            0x4A6
+#define WM8994_AIF1_DAC2_EQ_BAND_2_C            0x4A7
+#define WM8994_AIF1_DAC2_EQ_BAND_2_PG           0x4A8
+#define WM8994_AIF1_DAC2_EQ_BAND_3_A            0x4A9
+#define WM8994_AIF1_DAC2_EQ_BAND_3_B            0x4AA
+#define WM8994_AIF1_DAC2_EQ_BAND_3_C            0x4AB
+#define WM8994_AIF1_DAC2_EQ_BAND_3_PG           0x4AC
+#define WM8994_AIF1_DAC2_EQ_BAND_4_A            0x4AD
+#define WM8994_AIF1_DAC2_EQ_BAND_4_B            0x4AE
+#define WM8994_AIF1_DAC2_EQ_BAND_4_C            0x4AF
+#define WM8994_AIF1_DAC2_EQ_BAND_4_PG           0x4B0
+#define WM8994_AIF1_DAC2_EQ_BAND_5_A            0x4B1
+#define WM8994_AIF1_DAC2_EQ_BAND_5_B            0x4B2
+#define WM8994_AIF1_DAC2_EQ_BAND_5_PG           0x4B3
+#define WM8994_AIF2_ADC_LEFT_VOLUME             0x500
+#define WM8994_AIF2_ADC_RIGHT_VOLUME            0x501
+#define WM8994_AIF2_DAC_LEFT_VOLUME             0x502
+#define WM8994_AIF2_DAC_RIGHT_VOLUME            0x503
+#define WM8994_AIF2_ADC_FILTERS                 0x510
+#define WM8994_AIF2_DAC_FILTERS_1               0x520
+#define WM8994_AIF2_DAC_FILTERS_2               0x521
+#define WM8994_AIF2_DRC_1                       0x540
+#define WM8994_AIF2_DRC_2                       0x541
+#define WM8994_AIF2_DRC_3                       0x542
+#define WM8994_AIF2_DRC_4                       0x543
+#define WM8994_AIF2_DRC_5                       0x544
+#define WM8994_AIF2_EQ_GAINS_1                  0x580
+#define WM8994_AIF2_EQ_GAINS_2                  0x581
+#define WM8994_AIF2_EQ_BAND_1_A                 0x582
+#define WM8994_AIF2_EQ_BAND_1_B                 0x583
+#define WM8994_AIF2_EQ_BAND_1_PG                0x584
+#define WM8994_AIF2_EQ_BAND_2_A                 0x585
+#define WM8994_AIF2_EQ_BAND_2_B                 0x586
+#define WM8994_AIF2_EQ_BAND_2_C                 0x587
+#define WM8994_AIF2_EQ_BAND_2_PG                0x588
+#define WM8994_AIF2_EQ_BAND_3_A                 0x589
+#define WM8994_AIF2_EQ_BAND_3_B                 0x58A
+#define WM8994_AIF2_EQ_BAND_3_C                 0x58B
+#define WM8994_AIF2_EQ_BAND_3_PG                0x58C
+#define WM8994_AIF2_EQ_BAND_4_A                 0x58D
+#define WM8994_AIF2_EQ_BAND_4_B                 0x58E
+#define WM8994_AIF2_EQ_BAND_4_C                 0x58F
+#define WM8994_AIF2_EQ_BAND_4_PG                0x590
+#define WM8994_AIF2_EQ_BAND_5_A                 0x591
+#define WM8994_AIF2_EQ_BAND_5_B                 0x592
+#define WM8994_AIF2_EQ_BAND_5_PG                0x593
+#define WM8994_DAC1_MIXER_VOLUMES               0x600
+#define WM8994_DAC1_LEFT_MIXER_ROUTING          0x601
+#define WM8994_DAC1_RIGHT_MIXER_ROUTING         0x602
+#define WM8994_DAC2_MIXER_VOLUMES               0x603
+#define WM8994_DAC2_LEFT_MIXER_ROUTING          0x604
+#define WM8994_DAC2_RIGHT_MIXER_ROUTING         0x605
+#define WM8994_AIF1_ADC1_LEFT_MIXER_ROUTING     0x606
+#define WM8994_AIF1_ADC1_RIGHT_MIXER_ROUTING    0x607
+#define WM8994_AIF1_ADC2_LEFT_MIXER_ROUTING     0x608
+#define WM8994_AIF1_ADC2_RIGHT_MIXER_ROUTING    0x609
+#define WM8994_DAC1_LEFT_VOLUME                 0x610
+#define WM8994_DAC1_RIGHT_VOLUME                0x611
+#define WM8994_DAC2_LEFT_VOLUME                 0x612
+#define WM8994_DAC2_RIGHT_VOLUME                0x613
+#define WM8994_DAC_SOFTMUTE                     0x614
+#define WM8994_OVERSAMPLING                     0x620
+#define WM8994_SIDETONE                         0x621
+#define WM8994_GPIO_1                           0x700
+#define WM8994_GPIO_2                           0x701
+#define WM8994_GPIO_3                           0x702
+#define WM8994_GPIO_4                           0x703
+#define WM8994_GPIO_5                           0x704
+#define WM8994_GPIO_6                           0x705
+#define WM8994_GPIO_7                           0x706
+#define WM8994_GPIO_8                           0x707
+#define WM8994_GPIO_9                           0x708
+#define WM8994_GPIO_10                          0x709
+#define WM8994_GPIO_11                          0x70A
+#define WM8994_PULL_CONTROL_1                   0x720
+#define WM8994_PULL_CONTROL_2                   0x721
+#define WM8994_INTERRUPT_STATUS_1               0x730
+#define WM8994_INTERRUPT_STATUS_2               0x731
+#define WM8994_INTERRUPT_RAW_STATUS_2           0x732
+#define WM8994_INTERRUPT_STATUS_1_MASK          0x738
+#define WM8994_INTERRUPT_STATUS_2_MASK          0x739
+#define WM8994_INTERRUPT_CONTROL                0x740
+#define WM8994_IRQ_DEBOUNCE                     0x748
+#define WM8994_WRITE_SEQUENCER_0                0x3000
+#define WM8994_WRITE_SEQUENCER_1                0x3001
+#define WM8994_WRITE_SEQUENCER_2                0x3002
+#define WM8994_WRITE_SEQUENCER_3                0x3003
+#define WM8994_WRITE_SEQUENCER_4                0x3004
+#define WM8994_WRITE_SEQUENCER_5                0x3005
+#define WM8994_WRITE_SEQUENCER_6                0x3006
+#define WM8994_WRITE_SEQUENCER_7                0x3007
+#define WM8994_WRITE_SEQUENCER_8                0x3008
+#define WM8994_WRITE_SEQUENCER_9                0x3009
+#define WM8994_WRITE_SEQUENCER_10               0x300A
+#define WM8994_WRITE_SEQUENCER_11               0x300B
+#define WM8994_WRITE_SEQUENCER_12               0x300C
+#define WM8994_WRITE_SEQUENCER_13               0x300D
+#define WM8994_WRITE_SEQUENCER_14               0x300E
+#define WM8994_WRITE_SEQUENCER_15               0x300F
+#define WM8994_WRITE_SEQUENCER_16               0x3010
+#define WM8994_WRITE_SEQUENCER_17               0x3011
+#define WM8994_WRITE_SEQUENCER_18               0x3012
+#define WM8994_WRITE_SEQUENCER_19               0x3013
+#define WM8994_WRITE_SEQUENCER_20               0x3014
+#define WM8994_WRITE_SEQUENCER_21               0x3015
+#define WM8994_WRITE_SEQUENCER_22               0x3016
+#define WM8994_WRITE_SEQUENCER_23               0x3017
+#define WM8994_WRITE_SEQUENCER_24               0x3018
+#define WM8994_WRITE_SEQUENCER_25               0x3019
+#define WM8994_WRITE_SEQUENCER_26               0x301A
+#define WM8994_WRITE_SEQUENCER_27               0x301B
+#define WM8994_WRITE_SEQUENCER_28               0x301C
+#define WM8994_WRITE_SEQUENCER_29               0x301D
+#define WM8994_WRITE_SEQUENCER_30               0x301E
+#define WM8994_WRITE_SEQUENCER_31               0x301F
+#define WM8994_WRITE_SEQUENCER_32               0x3020
+#define WM8994_WRITE_SEQUENCER_33               0x3021
+#define WM8994_WRITE_SEQUENCER_34               0x3022
+#define WM8994_WRITE_SEQUENCER_35               0x3023
+#define WM8994_WRITE_SEQUENCER_36               0x3024
+#define WM8994_WRITE_SEQUENCER_37               0x3025
+#define WM8994_WRITE_SEQUENCER_38               0x3026
+#define WM8994_WRITE_SEQUENCER_39               0x3027
+#define WM8994_WRITE_SEQUENCER_40               0x3028
+#define WM8994_WRITE_SEQUENCER_41               0x3029
+#define WM8994_WRITE_SEQUENCER_42               0x302A
+#define WM8994_WRITE_SEQUENCER_43               0x302B
+#define WM8994_WRITE_SEQUENCER_44               0x302C
+#define WM8994_WRITE_SEQUENCER_45               0x302D
+#define WM8994_WRITE_SEQUENCER_46               0x302E
+#define WM8994_WRITE_SEQUENCER_47               0x302F
+#define WM8994_WRITE_SEQUENCER_48               0x3030
+#define WM8994_WRITE_SEQUENCER_49               0x3031
+#define WM8994_WRITE_SEQUENCER_50               0x3032
+#define WM8994_WRITE_SEQUENCER_51               0x3033
+#define WM8994_WRITE_SEQUENCER_52               0x3034
+#define WM8994_WRITE_SEQUENCER_53               0x3035
+#define WM8994_WRITE_SEQUENCER_54               0x3036
+#define WM8994_WRITE_SEQUENCER_55               0x3037
+#define WM8994_WRITE_SEQUENCER_56               0x3038
+#define WM8994_WRITE_SEQUENCER_57               0x3039
+#define WM8994_WRITE_SEQUENCER_58               0x303A
+#define WM8994_WRITE_SEQUENCER_59               0x303B
+#define WM8994_WRITE_SEQUENCER_60               0x303C
+#define WM8994_WRITE_SEQUENCER_61               0x303D
+#define WM8994_WRITE_SEQUENCER_62               0x303E
+#define WM8994_WRITE_SEQUENCER_63               0x303F
+#define WM8994_WRITE_SEQUENCER_64               0x3040
+#define WM8994_WRITE_SEQUENCER_65               0x3041
+#define WM8994_WRITE_SEQUENCER_66               0x3042
+#define WM8994_WRITE_SEQUENCER_67               0x3043
+#define WM8994_WRITE_SEQUENCER_68               0x3044
+#define WM8994_WRITE_SEQUENCER_69               0x3045
+#define WM8994_WRITE_SEQUENCER_70               0x3046
+#define WM8994_WRITE_SEQUENCER_71               0x3047
+#define WM8994_WRITE_SEQUENCER_72               0x3048
+#define WM8994_WRITE_SEQUENCER_73               0x3049
+#define WM8994_WRITE_SEQUENCER_74               0x304A
+#define WM8994_WRITE_SEQUENCER_75               0x304B
+#define WM8994_WRITE_SEQUENCER_76               0x304C
+#define WM8994_WRITE_SEQUENCER_77               0x304D
+#define WM8994_WRITE_SEQUENCER_78               0x304E
+#define WM8994_WRITE_SEQUENCER_79               0x304F
+#define WM8994_WRITE_SEQUENCER_80               0x3050
+#define WM8994_WRITE_SEQUENCER_81               0x3051
+#define WM8994_WRITE_SEQUENCER_82               0x3052
+#define WM8994_WRITE_SEQUENCER_83               0x3053
+#define WM8994_WRITE_SEQUENCER_84               0x3054
+#define WM8994_WRITE_SEQUENCER_85               0x3055
+#define WM8994_WRITE_SEQUENCER_86               0x3056
+#define WM8994_WRITE_SEQUENCER_87               0x3057
+#define WM8994_WRITE_SEQUENCER_88               0x3058
+#define WM8994_WRITE_SEQUENCER_89               0x3059
+#define WM8994_WRITE_SEQUENCER_90               0x305A
+#define WM8994_WRITE_SEQUENCER_91               0x305B
+#define WM8994_WRITE_SEQUENCER_92               0x305C
+#define WM8994_WRITE_SEQUENCER_93               0x305D
+#define WM8994_WRITE_SEQUENCER_94               0x305E
+#define WM8994_WRITE_SEQUENCER_95               0x305F
+#define WM8994_WRITE_SEQUENCER_96               0x3060
+#define WM8994_WRITE_SEQUENCER_97               0x3061
+#define WM8994_WRITE_SEQUENCER_98               0x3062
+#define WM8994_WRITE_SEQUENCER_99               0x3063
+#define WM8994_WRITE_SEQUENCER_100              0x3064
+#define WM8994_WRITE_SEQUENCER_101              0x3065
+#define WM8994_WRITE_SEQUENCER_102              0x3066
+#define WM8994_WRITE_SEQUENCER_103              0x3067
+#define WM8994_WRITE_SEQUENCER_104              0x3068
+#define WM8994_WRITE_SEQUENCER_105              0x3069
+#define WM8994_WRITE_SEQUENCER_106              0x306A
+#define WM8994_WRITE_SEQUENCER_107              0x306B
+#define WM8994_WRITE_SEQUENCER_108              0x306C
+#define WM8994_WRITE_SEQUENCER_109              0x306D
+#define WM8994_WRITE_SEQUENCER_110              0x306E
+#define WM8994_WRITE_SEQUENCER_111              0x306F
+#define WM8994_WRITE_SEQUENCER_112              0x3070
+#define WM8994_WRITE_SEQUENCER_113              0x3071
+#define WM8994_WRITE_SEQUENCER_114              0x3072
+#define WM8994_WRITE_SEQUENCER_115              0x3073
+#define WM8994_WRITE_SEQUENCER_116              0x3074
+#define WM8994_WRITE_SEQUENCER_117              0x3075
+#define WM8994_WRITE_SEQUENCER_118              0x3076
+#define WM8994_WRITE_SEQUENCER_119              0x3077
+#define WM8994_WRITE_SEQUENCER_120              0x3078
+#define WM8994_WRITE_SEQUENCER_121              0x3079
+#define WM8994_WRITE_SEQUENCER_122              0x307A
+#define WM8994_WRITE_SEQUENCER_123              0x307B
+#define WM8994_WRITE_SEQUENCER_124              0x307C
+#define WM8994_WRITE_SEQUENCER_125              0x307D
+#define WM8994_WRITE_SEQUENCER_126              0x307E
+#define WM8994_WRITE_SEQUENCER_127              0x307F
+#define WM8994_WRITE_SEQUENCER_128              0x3080
+#define WM8994_WRITE_SEQUENCER_129              0x3081
+#define WM8994_WRITE_SEQUENCER_130              0x3082
+#define WM8994_WRITE_SEQUENCER_131              0x3083
+#define WM8994_WRITE_SEQUENCER_132              0x3084
+#define WM8994_WRITE_SEQUENCER_133              0x3085
+#define WM8994_WRITE_SEQUENCER_134              0x3086
+#define WM8994_WRITE_SEQUENCER_135              0x3087
+#define WM8994_WRITE_SEQUENCER_136              0x3088
+#define WM8994_WRITE_SEQUENCER_137              0x3089
+#define WM8994_WRITE_SEQUENCER_138              0x308A
+#define WM8994_WRITE_SEQUENCER_139              0x308B
+#define WM8994_WRITE_SEQUENCER_140              0x308C
+#define WM8994_WRITE_SEQUENCER_141              0x308D
+#define WM8994_WRITE_SEQUENCER_142              0x308E
+#define WM8994_WRITE_SEQUENCER_143              0x308F
+#define WM8994_WRITE_SEQUENCER_144              0x3090
+#define WM8994_WRITE_SEQUENCER_145              0x3091
+#define WM8994_WRITE_SEQUENCER_146              0x3092
+#define WM8994_WRITE_SEQUENCER_147              0x3093
+#define WM8994_WRITE_SEQUENCER_148              0x3094
+#define WM8994_WRITE_SEQUENCER_149              0x3095
+#define WM8994_WRITE_SEQUENCER_150              0x3096
+#define WM8994_WRITE_SEQUENCER_151              0x3097
+#define WM8994_WRITE_SEQUENCER_152              0x3098
+#define WM8994_WRITE_SEQUENCER_153              0x3099
+#define WM8994_WRITE_SEQUENCER_154              0x309A
+#define WM8994_WRITE_SEQUENCER_155              0x309B
+#define WM8994_WRITE_SEQUENCER_156              0x309C
+#define WM8994_WRITE_SEQUENCER_157              0x309D
+#define WM8994_WRITE_SEQUENCER_158              0x309E
+#define WM8994_WRITE_SEQUENCER_159              0x309F
+#define WM8994_WRITE_SEQUENCER_160              0x30A0
+#define WM8994_WRITE_SEQUENCER_161              0x30A1
+#define WM8994_WRITE_SEQUENCER_162              0x30A2
+#define WM8994_WRITE_SEQUENCER_163              0x30A3
+#define WM8994_WRITE_SEQUENCER_164              0x30A4
+#define WM8994_WRITE_SEQUENCER_165              0x30A5
+#define WM8994_WRITE_SEQUENCER_166              0x30A6
+#define WM8994_WRITE_SEQUENCER_167              0x30A7
+#define WM8994_WRITE_SEQUENCER_168              0x30A8
+#define WM8994_WRITE_SEQUENCER_169              0x30A9
+#define WM8994_WRITE_SEQUENCER_170              0x30AA
+#define WM8994_WRITE_SEQUENCER_171              0x30AB
+#define WM8994_WRITE_SEQUENCER_172              0x30AC
+#define WM8994_WRITE_SEQUENCER_173              0x30AD
+#define WM8994_WRITE_SEQUENCER_174              0x30AE
+#define WM8994_WRITE_SEQUENCER_175              0x30AF
+#define WM8994_WRITE_SEQUENCER_176              0x30B0
+#define WM8994_WRITE_SEQUENCER_177              0x30B1
+#define WM8994_WRITE_SEQUENCER_178              0x30B2
+#define WM8994_WRITE_SEQUENCER_179              0x30B3
+#define WM8994_WRITE_SEQUENCER_180              0x30B4
+#define WM8994_WRITE_SEQUENCER_181              0x30B5
+#define WM8994_WRITE_SEQUENCER_182              0x30B6
+#define WM8994_WRITE_SEQUENCER_183              0x30B7
+#define WM8994_WRITE_SEQUENCER_184              0x30B8
+#define WM8994_WRITE_SEQUENCER_185              0x30B9
+#define WM8994_WRITE_SEQUENCER_186              0x30BA
+#define WM8994_WRITE_SEQUENCER_187              0x30BB
+#define WM8994_WRITE_SEQUENCER_188              0x30BC
+#define WM8994_WRITE_SEQUENCER_189              0x30BD
+#define WM8994_WRITE_SEQUENCER_190              0x30BE
+#define WM8994_WRITE_SEQUENCER_191              0x30BF
+#define WM8994_WRITE_SEQUENCER_192              0x30C0
+#define WM8994_WRITE_SEQUENCER_193              0x30C1
+#define WM8994_WRITE_SEQUENCER_194              0x30C2
+#define WM8994_WRITE_SEQUENCER_195              0x30C3
+#define WM8994_WRITE_SEQUENCER_196              0x30C4
+#define WM8994_WRITE_SEQUENCER_197              0x30C5
+#define WM8994_WRITE_SEQUENCER_198              0x30C6
+#define WM8994_WRITE_SEQUENCER_199              0x30C7
+#define WM8994_WRITE_SEQUENCER_200              0x30C8
+#define WM8994_WRITE_SEQUENCER_201              0x30C9
+#define WM8994_WRITE_SEQUENCER_202              0x30CA
+#define WM8994_WRITE_SEQUENCER_203              0x30CB
+#define WM8994_WRITE_SEQUENCER_204              0x30CC
+#define WM8994_WRITE_SEQUENCER_205              0x30CD
+#define WM8994_WRITE_SEQUENCER_206              0x30CE
+#define WM8994_WRITE_SEQUENCER_207              0x30CF
+#define WM8994_WRITE_SEQUENCER_208              0x30D0
+#define WM8994_WRITE_SEQUENCER_209              0x30D1
+#define WM8994_WRITE_SEQUENCER_210              0x30D2
+#define WM8994_WRITE_SEQUENCER_211              0x30D3
+#define WM8994_WRITE_SEQUENCER_212              0x30D4
+#define WM8994_WRITE_SEQUENCER_213              0x30D5
+#define WM8994_WRITE_SEQUENCER_214              0x30D6
+#define WM8994_WRITE_SEQUENCER_215              0x30D7
+#define WM8994_WRITE_SEQUENCER_216              0x30D8
+#define WM8994_WRITE_SEQUENCER_217              0x30D9
+#define WM8994_WRITE_SEQUENCER_218              0x30DA
+#define WM8994_WRITE_SEQUENCER_219              0x30DB
+#define WM8994_WRITE_SEQUENCER_220              0x30DC
+#define WM8994_WRITE_SEQUENCER_221              0x30DD
+#define WM8994_WRITE_SEQUENCER_222              0x30DE
+#define WM8994_WRITE_SEQUENCER_223              0x30DF
+#define WM8994_WRITE_SEQUENCER_224              0x30E0
+#define WM8994_WRITE_SEQUENCER_225              0x30E1
+#define WM8994_WRITE_SEQUENCER_226              0x30E2
+#define WM8994_WRITE_SEQUENCER_227              0x30E3
+#define WM8994_WRITE_SEQUENCER_228              0x30E4
+#define WM8994_WRITE_SEQUENCER_229              0x30E5
+#define WM8994_WRITE_SEQUENCER_230              0x30E6
+#define WM8994_WRITE_SEQUENCER_231              0x30E7
+#define WM8994_WRITE_SEQUENCER_232              0x30E8
+#define WM8994_WRITE_SEQUENCER_233              0x30E9
+#define WM8994_WRITE_SEQUENCER_234              0x30EA
+#define WM8994_WRITE_SEQUENCER_235              0x30EB
+#define WM8994_WRITE_SEQUENCER_236              0x30EC
+#define WM8994_WRITE_SEQUENCER_237              0x30ED
+#define WM8994_WRITE_SEQUENCER_238              0x30EE
+#define WM8994_WRITE_SEQUENCER_239              0x30EF
+#define WM8994_WRITE_SEQUENCER_240              0x30F0
+#define WM8994_WRITE_SEQUENCER_241              0x30F1
+#define WM8994_WRITE_SEQUENCER_242              0x30F2
+#define WM8994_WRITE_SEQUENCER_243              0x30F3
+#define WM8994_WRITE_SEQUENCER_244              0x30F4
+#define WM8994_WRITE_SEQUENCER_245              0x30F5
+#define WM8994_WRITE_SEQUENCER_246              0x30F6
+#define WM8994_WRITE_SEQUENCER_247              0x30F7
+#define WM8994_WRITE_SEQUENCER_248              0x30F8
+#define WM8994_WRITE_SEQUENCER_249              0x30F9
+#define WM8994_WRITE_SEQUENCER_250              0x30FA
+#define WM8994_WRITE_SEQUENCER_251              0x30FB
+#define WM8994_WRITE_SEQUENCER_252              0x30FC
+#define WM8994_WRITE_SEQUENCER_253              0x30FD
+#define WM8994_WRITE_SEQUENCER_254              0x30FE
+#define WM8994_WRITE_SEQUENCER_255              0x30FF
+#define WM8994_WRITE_SEQUENCER_256              0x3100
+#define WM8994_WRITE_SEQUENCER_257              0x3101
+#define WM8994_WRITE_SEQUENCER_258              0x3102
+#define WM8994_WRITE_SEQUENCER_259              0x3103
+#define WM8994_WRITE_SEQUENCER_260              0x3104
+#define WM8994_WRITE_SEQUENCER_261              0x3105
+#define WM8994_WRITE_SEQUENCER_262              0x3106
+#define WM8994_WRITE_SEQUENCER_263              0x3107
+#define WM8994_WRITE_SEQUENCER_264              0x3108
+#define WM8994_WRITE_SEQUENCER_265              0x3109
+#define WM8994_WRITE_SEQUENCER_266              0x310A
+#define WM8994_WRITE_SEQUENCER_267              0x310B
+#define WM8994_WRITE_SEQUENCER_268              0x310C
+#define WM8994_WRITE_SEQUENCER_269              0x310D
+#define WM8994_WRITE_SEQUENCER_270              0x310E
+#define WM8994_WRITE_SEQUENCER_271              0x310F
+#define WM8994_WRITE_SEQUENCER_272              0x3110
+#define WM8994_WRITE_SEQUENCER_273              0x3111
+#define WM8994_WRITE_SEQUENCER_274              0x3112
+#define WM8994_WRITE_SEQUENCER_275              0x3113
+#define WM8994_WRITE_SEQUENCER_276              0x3114
+#define WM8994_WRITE_SEQUENCER_277              0x3115
+#define WM8994_WRITE_SEQUENCER_278              0x3116
+#define WM8994_WRITE_SEQUENCER_279              0x3117
+#define WM8994_WRITE_SEQUENCER_280              0x3118
+#define WM8994_WRITE_SEQUENCER_281              0x3119
+#define WM8994_WRITE_SEQUENCER_282              0x311A
+#define WM8994_WRITE_SEQUENCER_283              0x311B
+#define WM8994_WRITE_SEQUENCER_284              0x311C
+#define WM8994_WRITE_SEQUENCER_285              0x311D
+#define WM8994_WRITE_SEQUENCER_286              0x311E
+#define WM8994_WRITE_SEQUENCER_287              0x311F
+#define WM8994_WRITE_SEQUENCER_288              0x3120
+#define WM8994_WRITE_SEQUENCER_289              0x3121
+#define WM8994_WRITE_SEQUENCER_290              0x3122
+#define WM8994_WRITE_SEQUENCER_291              0x3123
+#define WM8994_WRITE_SEQUENCER_292              0x3124
+#define WM8994_WRITE_SEQUENCER_293              0x3125
+#define WM8994_WRITE_SEQUENCER_294              0x3126
+#define WM8994_WRITE_SEQUENCER_295              0x3127
+#define WM8994_WRITE_SEQUENCER_296              0x3128
+#define WM8994_WRITE_SEQUENCER_297              0x3129
+#define WM8994_WRITE_SEQUENCER_298              0x312A
+#define WM8994_WRITE_SEQUENCER_299              0x312B
+#define WM8994_WRITE_SEQUENCER_300              0x312C
+#define WM8994_WRITE_SEQUENCER_301              0x312D
+#define WM8994_WRITE_SEQUENCER_302              0x312E
+#define WM8994_WRITE_SEQUENCER_303              0x312F
+#define WM8994_WRITE_SEQUENCER_304              0x3130
+#define WM8994_WRITE_SEQUENCER_305              0x3131
+#define WM8994_WRITE_SEQUENCER_306              0x3132
+#define WM8994_WRITE_SEQUENCER_307              0x3133
+#define WM8994_WRITE_SEQUENCER_308              0x3134
+#define WM8994_WRITE_SEQUENCER_309              0x3135
+#define WM8994_WRITE_SEQUENCER_310              0x3136
+#define WM8994_WRITE_SEQUENCER_311              0x3137
+#define WM8994_WRITE_SEQUENCER_312              0x3138
+#define WM8994_WRITE_SEQUENCER_313              0x3139
+#define WM8994_WRITE_SEQUENCER_314              0x313A
+#define WM8994_WRITE_SEQUENCER_315              0x313B
+#define WM8994_WRITE_SEQUENCER_316              0x313C
+#define WM8994_WRITE_SEQUENCER_317              0x313D
+#define WM8994_WRITE_SEQUENCER_318              0x313E
+#define WM8994_WRITE_SEQUENCER_319              0x313F
+#define WM8994_WRITE_SEQUENCER_320              0x3140
+#define WM8994_WRITE_SEQUENCER_321              0x3141
+#define WM8994_WRITE_SEQUENCER_322              0x3142
+#define WM8994_WRITE_SEQUENCER_323              0x3143
+#define WM8994_WRITE_SEQUENCER_324              0x3144
+#define WM8994_WRITE_SEQUENCER_325              0x3145
+#define WM8994_WRITE_SEQUENCER_326              0x3146
+#define WM8994_WRITE_SEQUENCER_327              0x3147
+#define WM8994_WRITE_SEQUENCER_328              0x3148
+#define WM8994_WRITE_SEQUENCER_329              0x3149
+#define WM8994_WRITE_SEQUENCER_330              0x314A
+#define WM8994_WRITE_SEQUENCER_331              0x314B
+#define WM8994_WRITE_SEQUENCER_332              0x314C
+#define WM8994_WRITE_SEQUENCER_333              0x314D
+#define WM8994_WRITE_SEQUENCER_334              0x314E
+#define WM8994_WRITE_SEQUENCER_335              0x314F
+#define WM8994_WRITE_SEQUENCER_336              0x3150
+#define WM8994_WRITE_SEQUENCER_337              0x3151
+#define WM8994_WRITE_SEQUENCER_338              0x3152
+#define WM8994_WRITE_SEQUENCER_339              0x3153
+#define WM8994_WRITE_SEQUENCER_340              0x3154
+#define WM8994_WRITE_SEQUENCER_341              0x3155
+#define WM8994_WRITE_SEQUENCER_342              0x3156
+#define WM8994_WRITE_SEQUENCER_343              0x3157
+#define WM8994_WRITE_SEQUENCER_344              0x3158
+#define WM8994_WRITE_SEQUENCER_345              0x3159
+#define WM8994_WRITE_SEQUENCER_346              0x315A
+#define WM8994_WRITE_SEQUENCER_347              0x315B
+#define WM8994_WRITE_SEQUENCER_348              0x315C
+#define WM8994_WRITE_SEQUENCER_349              0x315D
+#define WM8994_WRITE_SEQUENCER_350              0x315E
+#define WM8994_WRITE_SEQUENCER_351              0x315F
+#define WM8994_WRITE_SEQUENCER_352              0x3160
+#define WM8994_WRITE_SEQUENCER_353              0x3161
+#define WM8994_WRITE_SEQUENCER_354              0x3162
+#define WM8994_WRITE_SEQUENCER_355              0x3163
+#define WM8994_WRITE_SEQUENCER_356              0x3164
+#define WM8994_WRITE_SEQUENCER_357              0x3165
+#define WM8994_WRITE_SEQUENCER_358              0x3166
+#define WM8994_WRITE_SEQUENCER_359              0x3167
+#define WM8994_WRITE_SEQUENCER_360              0x3168
+#define WM8994_WRITE_SEQUENCER_361              0x3169
+#define WM8994_WRITE_SEQUENCER_362              0x316A
+#define WM8994_WRITE_SEQUENCER_363              0x316B
+#define WM8994_WRITE_SEQUENCER_364              0x316C
+#define WM8994_WRITE_SEQUENCER_365              0x316D
+#define WM8994_WRITE_SEQUENCER_366              0x316E
+#define WM8994_WRITE_SEQUENCER_367              0x316F
+#define WM8994_WRITE_SEQUENCER_368              0x3170
+#define WM8994_WRITE_SEQUENCER_369              0x3171
+#define WM8994_WRITE_SEQUENCER_370              0x3172
+#define WM8994_WRITE_SEQUENCER_371              0x3173
+#define WM8994_WRITE_SEQUENCER_372              0x3174
+#define WM8994_WRITE_SEQUENCER_373              0x3175
+#define WM8994_WRITE_SEQUENCER_374              0x3176
+#define WM8994_WRITE_SEQUENCER_375              0x3177
+#define WM8994_WRITE_SEQUENCER_376              0x3178
+#define WM8994_WRITE_SEQUENCER_377              0x3179
+#define WM8994_WRITE_SEQUENCER_378              0x317A
+#define WM8994_WRITE_SEQUENCER_379              0x317B
+#define WM8994_WRITE_SEQUENCER_380              0x317C
+#define WM8994_WRITE_SEQUENCER_381              0x317D
+#define WM8994_WRITE_SEQUENCER_382              0x317E
+#define WM8994_WRITE_SEQUENCER_383              0x317F
+#define WM8994_WRITE_SEQUENCER_384              0x3180
+#define WM8994_WRITE_SEQUENCER_385              0x3181
+#define WM8994_WRITE_SEQUENCER_386              0x3182
+#define WM8994_WRITE_SEQUENCER_387              0x3183
+#define WM8994_WRITE_SEQUENCER_388              0x3184
+#define WM8994_WRITE_SEQUENCER_389              0x3185
+#define WM8994_WRITE_SEQUENCER_390              0x3186
+#define WM8994_WRITE_SEQUENCER_391              0x3187
+#define WM8994_WRITE_SEQUENCER_392              0x3188
+#define WM8994_WRITE_SEQUENCER_393              0x3189
+#define WM8994_WRITE_SEQUENCER_394              0x318A
+#define WM8994_WRITE_SEQUENCER_395              0x318B
+#define WM8994_WRITE_SEQUENCER_396              0x318C
+#define WM8994_WRITE_SEQUENCER_397              0x318D
+#define WM8994_WRITE_SEQUENCER_398              0x318E
+#define WM8994_WRITE_SEQUENCER_399              0x318F
+#define WM8994_WRITE_SEQUENCER_400              0x3190
+#define WM8994_WRITE_SEQUENCER_401              0x3191
+#define WM8994_WRITE_SEQUENCER_402              0x3192
+#define WM8994_WRITE_SEQUENCER_403              0x3193
+#define WM8994_WRITE_SEQUENCER_404              0x3194
+#define WM8994_WRITE_SEQUENCER_405              0x3195
+#define WM8994_WRITE_SEQUENCER_406              0x3196
+#define WM8994_WRITE_SEQUENCER_407              0x3197
+#define WM8994_WRITE_SEQUENCER_408              0x3198
+#define WM8994_WRITE_SEQUENCER_409              0x3199
+#define WM8994_WRITE_SEQUENCER_410              0x319A
+#define WM8994_WRITE_SEQUENCER_411              0x319B
+#define WM8994_WRITE_SEQUENCER_412              0x319C
+#define WM8994_WRITE_SEQUENCER_413              0x319D
+#define WM8994_WRITE_SEQUENCER_414              0x319E
+#define WM8994_WRITE_SEQUENCER_415              0x319F
+#define WM8994_WRITE_SEQUENCER_416              0x31A0
+#define WM8994_WRITE_SEQUENCER_417              0x31A1
+#define WM8994_WRITE_SEQUENCER_418              0x31A2
+#define WM8994_WRITE_SEQUENCER_419              0x31A3
+#define WM8994_WRITE_SEQUENCER_420              0x31A4
+#define WM8994_WRITE_SEQUENCER_421              0x31A5
+#define WM8994_WRITE_SEQUENCER_422              0x31A6
+#define WM8994_WRITE_SEQUENCER_423              0x31A7
+#define WM8994_WRITE_SEQUENCER_424              0x31A8
+#define WM8994_WRITE_SEQUENCER_425              0x31A9
+#define WM8994_WRITE_SEQUENCER_426              0x31AA
+#define WM8994_WRITE_SEQUENCER_427              0x31AB
+#define WM8994_WRITE_SEQUENCER_428              0x31AC
+#define WM8994_WRITE_SEQUENCER_429              0x31AD
+#define WM8994_WRITE_SEQUENCER_430              0x31AE
+#define WM8994_WRITE_SEQUENCER_431              0x31AF
+#define WM8994_WRITE_SEQUENCER_432              0x31B0
+#define WM8994_WRITE_SEQUENCER_433              0x31B1
+#define WM8994_WRITE_SEQUENCER_434              0x31B2
+#define WM8994_WRITE_SEQUENCER_435              0x31B3
+#define WM8994_WRITE_SEQUENCER_436              0x31B4
+#define WM8994_WRITE_SEQUENCER_437              0x31B5
+#define WM8994_WRITE_SEQUENCER_438              0x31B6
+#define WM8994_WRITE_SEQUENCER_439              0x31B7
+#define WM8994_WRITE_SEQUENCER_440              0x31B8
+#define WM8994_WRITE_SEQUENCER_441              0x31B9
+#define WM8994_WRITE_SEQUENCER_442              0x31BA
+#define WM8994_WRITE_SEQUENCER_443              0x31BB
+#define WM8994_WRITE_SEQUENCER_444              0x31BC
+#define WM8994_WRITE_SEQUENCER_445              0x31BD
+#define WM8994_WRITE_SEQUENCER_446              0x31BE
+#define WM8994_WRITE_SEQUENCER_447              0x31BF
+#define WM8994_WRITE_SEQUENCER_448              0x31C0
+#define WM8994_WRITE_SEQUENCER_449              0x31C1
+#define WM8994_WRITE_SEQUENCER_450              0x31C2
+#define WM8994_WRITE_SEQUENCER_451              0x31C3
+#define WM8994_WRITE_SEQUENCER_452              0x31C4
+#define WM8994_WRITE_SEQUENCER_453              0x31C5
+#define WM8994_WRITE_SEQUENCER_454              0x31C6
+#define WM8994_WRITE_SEQUENCER_455              0x31C7
+#define WM8994_WRITE_SEQUENCER_456              0x31C8
+#define WM8994_WRITE_SEQUENCER_457              0x31C9
+#define WM8994_WRITE_SEQUENCER_458              0x31CA
+#define WM8994_WRITE_SEQUENCER_459              0x31CB
+#define WM8994_WRITE_SEQUENCER_460              0x31CC
+#define WM8994_WRITE_SEQUENCER_461              0x31CD
+#define WM8994_WRITE_SEQUENCER_462              0x31CE
+#define WM8994_WRITE_SEQUENCER_463              0x31CF
+#define WM8994_WRITE_SEQUENCER_464              0x31D0
+#define WM8994_WRITE_SEQUENCER_465              0x31D1
+#define WM8994_WRITE_SEQUENCER_466              0x31D2
+#define WM8994_WRITE_SEQUENCER_467              0x31D3
+#define WM8994_WRITE_SEQUENCER_468              0x31D4
+#define WM8994_WRITE_SEQUENCER_469              0x31D5
+#define WM8994_WRITE_SEQUENCER_470              0x31D6
+#define WM8994_WRITE_SEQUENCER_471              0x31D7
+#define WM8994_WRITE_SEQUENCER_472              0x31D8
+#define WM8994_WRITE_SEQUENCER_473              0x31D9
+#define WM8994_WRITE_SEQUENCER_474              0x31DA
+#define WM8994_WRITE_SEQUENCER_475              0x31DB
+#define WM8994_WRITE_SEQUENCER_476              0x31DC
+#define WM8994_WRITE_SEQUENCER_477              0x31DD
+#define WM8994_WRITE_SEQUENCER_478              0x31DE
+#define WM8994_WRITE_SEQUENCER_479              0x31DF
+#define WM8994_WRITE_SEQUENCER_480              0x31E0
+#define WM8994_WRITE_SEQUENCER_481              0x31E1
+#define WM8994_WRITE_SEQUENCER_482              0x31E2
+#define WM8994_WRITE_SEQUENCER_483              0x31E3
+#define WM8994_WRITE_SEQUENCER_484              0x31E4
+#define WM8994_WRITE_SEQUENCER_485              0x31E5
+#define WM8994_WRITE_SEQUENCER_486              0x31E6
+#define WM8994_WRITE_SEQUENCER_487              0x31E7
+#define WM8994_WRITE_SEQUENCER_488              0x31E8
+#define WM8994_WRITE_SEQUENCER_489              0x31E9
+#define WM8994_WRITE_SEQUENCER_490              0x31EA
+#define WM8994_WRITE_SEQUENCER_491              0x31EB
+#define WM8994_WRITE_SEQUENCER_492              0x31EC
+#define WM8994_WRITE_SEQUENCER_493              0x31ED
+#define WM8994_WRITE_SEQUENCER_494              0x31EE
+#define WM8994_WRITE_SEQUENCER_495              0x31EF
+#define WM8994_WRITE_SEQUENCER_496              0x31F0
+#define WM8994_WRITE_SEQUENCER_497              0x31F1
+#define WM8994_WRITE_SEQUENCER_498              0x31F2
+#define WM8994_WRITE_SEQUENCER_499              0x31F3
+#define WM8994_WRITE_SEQUENCER_500              0x31F4
+#define WM8994_WRITE_SEQUENCER_501              0x31F5
+#define WM8994_WRITE_SEQUENCER_502              0x31F6
+#define WM8994_WRITE_SEQUENCER_503              0x31F7
+#define WM8994_WRITE_SEQUENCER_504              0x31F8
+#define WM8994_WRITE_SEQUENCER_505              0x31F9
+#define WM8994_WRITE_SEQUENCER_506              0x31FA
+#define WM8994_WRITE_SEQUENCER_507              0x31FB
+#define WM8994_WRITE_SEQUENCER_508              0x31FC
+#define WM8994_WRITE_SEQUENCER_509              0x31FD
+#define WM8994_WRITE_SEQUENCER_510              0x31FE
+#define WM8994_WRITE_SEQUENCER_511              0x31FF
+
+#define WM8994_REGISTER_COUNT                   736
+#define WM8994_MAX_REGISTER                     0x31FF
+#define WM8994_MAX_CACHED_REGISTER              0x749
+
+/*
+ * Field Definitions.
+ */
+
+/*
+ * R0 (0x00) - Software Reset
+ */
+#define WM8994_SW_RESET_MASK                    0xFFFF  /* SW_RESET - [15:0] */
+#define WM8994_SW_RESET_SHIFT                        0  /* SW_RESET - [15:0] */
+#define WM8994_SW_RESET_WIDTH                       16  /* SW_RESET - [15:0] */
+
+/*
+ * R1 (0x01) - Power Management (1)
+ */
+#define WM8994_SPKOUTR_ENA                      0x2000  /* SPKOUTR_ENA */
+#define WM8994_SPKOUTR_ENA_MASK                 0x2000  /* SPKOUTR_ENA */
+#define WM8994_SPKOUTR_ENA_SHIFT                    13  /* SPKOUTR_ENA */
+#define WM8994_SPKOUTR_ENA_WIDTH                     1  /* SPKOUTR_ENA */
+#define WM8994_SPKOUTL_ENA                      0x1000  /* SPKOUTL_ENA */
+#define WM8994_SPKOUTL_ENA_MASK                 0x1000  /* SPKOUTL_ENA */
+#define WM8994_SPKOUTL_ENA_SHIFT                    12  /* SPKOUTL_ENA */
+#define WM8994_SPKOUTL_ENA_WIDTH                     1  /* SPKOUTL_ENA */
+#define WM8994_HPOUT2_ENA                       0x0800  /* HPOUT2_ENA */
+#define WM8994_HPOUT2_ENA_MASK                  0x0800  /* HPOUT2_ENA */
+#define WM8994_HPOUT2_ENA_SHIFT                     11  /* HPOUT2_ENA */
+#define WM8994_HPOUT2_ENA_WIDTH                      1  /* HPOUT2_ENA */
+#define WM8994_HPOUT1L_ENA                      0x0200  /* HPOUT1L_ENA */
+#define WM8994_HPOUT1L_ENA_MASK                 0x0200  /* HPOUT1L_ENA */
+#define WM8994_HPOUT1L_ENA_SHIFT                     9  /* HPOUT1L_ENA */
+#define WM8994_HPOUT1L_ENA_WIDTH                     1  /* HPOUT1L_ENA */
+#define WM8994_HPOUT1R_ENA                      0x0100  /* HPOUT1R_ENA */
+#define WM8994_HPOUT1R_ENA_MASK                 0x0100  /* HPOUT1R_ENA */
+#define WM8994_HPOUT1R_ENA_SHIFT                     8  /* HPOUT1R_ENA */
+#define WM8994_HPOUT1R_ENA_WIDTH                     1  /* HPOUT1R_ENA */
+#define WM8994_MICB2_ENA                        0x0020  /* MICB2_ENA */
+#define WM8994_MICB2_ENA_MASK                   0x0020  /* MICB2_ENA */
+#define WM8994_MICB2_ENA_SHIFT                       5  /* MICB2_ENA */
+#define WM8994_MICB2_ENA_WIDTH                       1  /* MICB2_ENA */
+#define WM8994_MICB1_ENA                        0x0010  /* MICB1_ENA */
+#define WM8994_MICB1_ENA_MASK                   0x0010  /* MICB1_ENA */
+#define WM8994_MICB1_ENA_SHIFT                       4  /* MICB1_ENA */
+#define WM8994_MICB1_ENA_WIDTH                       1  /* MICB1_ENA */
+#define WM8994_VMID_SEL_MASK                    0x0006  /* VMID_SEL - [2:1] */
+#define WM8994_VMID_SEL_SHIFT                        1  /* VMID_SEL - [2:1] */
+#define WM8994_VMID_SEL_WIDTH                        2  /* VMID_SEL - [2:1] */
+#define WM8994_BIAS_ENA                         0x0001  /* BIAS_ENA */
+#define WM8994_BIAS_ENA_MASK                    0x0001  /* BIAS_ENA */
+#define WM8994_BIAS_ENA_SHIFT                        0  /* BIAS_ENA */
+#define WM8994_BIAS_ENA_WIDTH                        1  /* BIAS_ENA */
+
+/*
+ * R2 (0x02) - Power Management (2)
+ */
+#define WM8994_TSHUT_ENA                        0x4000  /* TSHUT_ENA */
+#define WM8994_TSHUT_ENA_MASK                   0x4000  /* TSHUT_ENA */
+#define WM8994_TSHUT_ENA_SHIFT                      14  /* TSHUT_ENA */
+#define WM8994_TSHUT_ENA_WIDTH                       1  /* TSHUT_ENA */
+#define WM8994_TSHUT_OPDIS                      0x2000  /* TSHUT_OPDIS */
+#define WM8994_TSHUT_OPDIS_MASK                 0x2000  /* TSHUT_OPDIS */
+#define WM8994_TSHUT_OPDIS_SHIFT                    13  /* TSHUT_OPDIS */
+#define WM8994_TSHUT_OPDIS_WIDTH                     1  /* TSHUT_OPDIS */
+#define WM8994_OPCLK_ENA                        0x0800  /* OPCLK_ENA */
+#define WM8994_OPCLK_ENA_MASK                   0x0800  /* OPCLK_ENA */
+#define WM8994_OPCLK_ENA_SHIFT                      11  /* OPCLK_ENA */
+#define WM8994_OPCLK_ENA_WIDTH                       1  /* OPCLK_ENA */
+#define WM8994_MIXINL_ENA                       0x0200  /* MIXINL_ENA */
+#define WM8994_MIXINL_ENA_MASK                  0x0200  /* MIXINL_ENA */
+#define WM8994_MIXINL_ENA_SHIFT                      9  /* MIXINL_ENA */
+#define WM8994_MIXINL_ENA_WIDTH                      1  /* MIXINL_ENA */
+#define WM8994_MIXINR_ENA                       0x0100  /* MIXINR_ENA */
+#define WM8994_MIXINR_ENA_MASK                  0x0100  /* MIXINR_ENA */
+#define WM8994_MIXINR_ENA_SHIFT                      8  /* MIXINR_ENA */
+#define WM8994_MIXINR_ENA_WIDTH                      1  /* MIXINR_ENA */
+#define WM8994_IN2L_ENA                         0x0080  /* IN2L_ENA */
+#define WM8994_IN2L_ENA_MASK                    0x0080  /* IN2L_ENA */
+#define WM8994_IN2L_ENA_SHIFT                        7  /* IN2L_ENA */
+#define WM8994_IN2L_ENA_WIDTH                        1  /* IN2L_ENA */
+#define WM8994_IN1L_ENA                         0x0040  /* IN1L_ENA */
+#define WM8994_IN1L_ENA_MASK                    0x0040  /* IN1L_ENA */
+#define WM8994_IN1L_ENA_SHIFT                        6  /* IN1L_ENA */
+#define WM8994_IN1L_ENA_WIDTH                        1  /* IN1L_ENA */
+#define WM8994_IN2R_ENA                         0x0020  /* IN2R_ENA */
+#define WM8994_IN2R_ENA_MASK                    0x0020  /* IN2R_ENA */
+#define WM8994_IN2R_ENA_SHIFT                        5  /* IN2R_ENA */
+#define WM8994_IN2R_ENA_WIDTH                        1  /* IN2R_ENA */
+#define WM8994_IN1R_ENA                         0x0010  /* IN1R_ENA */
+#define WM8994_IN1R_ENA_MASK                    0x0010  /* IN1R_ENA */
+#define WM8994_IN1R_ENA_SHIFT                        4  /* IN1R_ENA */
+#define WM8994_IN1R_ENA_WIDTH                        1  /* IN1R_ENA */
+
+/*
+ * R3 (0x03) - Power Management (3)
+ */
+#define WM8994_LINEOUT1N_ENA                    0x2000  /* LINEOUT1N_ENA */
+#define WM8994_LINEOUT1N_ENA_MASK               0x2000  /* LINEOUT1N_ENA */
+#define WM8994_LINEOUT1N_ENA_SHIFT                  13  /* LINEOUT1N_ENA */
+#define WM8994_LINEOUT1N_ENA_WIDTH                   1  /* LINEOUT1N_ENA */
+#define WM8994_LINEOUT1P_ENA                    0x1000  /* LINEOUT1P_ENA */
+#define WM8994_LINEOUT1P_ENA_MASK               0x1000  /* LINEOUT1P_ENA */
+#define WM8994_LINEOUT1P_ENA_SHIFT                  12  /* LINEOUT1P_ENA */
+#define WM8994_LINEOUT1P_ENA_WIDTH                   1  /* LINEOUT1P_ENA */
+#define WM8994_LINEOUT2N_ENA                    0x0800  /* LINEOUT2N_ENA */
+#define WM8994_LINEOUT2N_ENA_MASK               0x0800  /* LINEOUT2N_ENA */
+#define WM8994_LINEOUT2N_ENA_SHIFT                  11  /* LINEOUT2N_ENA */
+#define WM8994_LINEOUT2N_ENA_WIDTH                   1  /* LINEOUT2N_ENA */
+#define WM8994_LINEOUT2P_ENA                    0x0400  /* LINEOUT2P_ENA */
+#define WM8994_LINEOUT2P_ENA_MASK               0x0400  /* LINEOUT2P_ENA */
+#define WM8994_LINEOUT2P_ENA_SHIFT                  10  /* LINEOUT2P_ENA */
+#define WM8994_LINEOUT2P_ENA_WIDTH                   1  /* LINEOUT2P_ENA */
+#define WM8994_SPKRVOL_ENA                      0x0200  /* SPKRVOL_ENA */
+#define WM8994_SPKRVOL_ENA_MASK                 0x0200  /* SPKRVOL_ENA */
+#define WM8994_SPKRVOL_ENA_SHIFT                     9  /* SPKRVOL_ENA */
+#define WM8994_SPKRVOL_ENA_WIDTH                     1  /* SPKRVOL_ENA */
+#define WM8994_SPKLVOL_ENA                      0x0100  /* SPKLVOL_ENA */
+#define WM8994_SPKLVOL_ENA_MASK                 0x0100  /* SPKLVOL_ENA */
+#define WM8994_SPKLVOL_ENA_SHIFT                     8  /* SPKLVOL_ENA */
+#define WM8994_SPKLVOL_ENA_WIDTH                     1  /* SPKLVOL_ENA */
+#define WM8994_MIXOUTLVOL_ENA                   0x0080  /* MIXOUTLVOL_ENA */
+#define WM8994_MIXOUTLVOL_ENA_MASK              0x0080  /* MIXOUTLVOL_ENA */
+#define WM8994_MIXOUTLVOL_ENA_SHIFT                  7  /* MIXOUTLVOL_ENA */
+#define WM8994_MIXOUTLVOL_ENA_WIDTH                  1  /* MIXOUTLVOL_ENA */
+#define WM8994_MIXOUTRVOL_ENA                   0x0040  /* MIXOUTRVOL_ENA */
+#define WM8994_MIXOUTRVOL_ENA_MASK              0x0040  /* MIXOUTRVOL_ENA */
+#define WM8994_MIXOUTRVOL_ENA_SHIFT                  6  /* MIXOUTRVOL_ENA */
+#define WM8994_MIXOUTRVOL_ENA_WIDTH                  1  /* MIXOUTRVOL_ENA */
+#define WM8994_MIXOUTL_ENA                      0x0020  /* MIXOUTL_ENA */
+#define WM8994_MIXOUTL_ENA_MASK                 0x0020  /* MIXOUTL_ENA */
+#define WM8994_MIXOUTL_ENA_SHIFT                     5  /* MIXOUTL_ENA */
+#define WM8994_MIXOUTL_ENA_WIDTH                     1  /* MIXOUTL_ENA */
+#define WM8994_MIXOUTR_ENA                      0x0010  /* MIXOUTR_ENA */
+#define WM8994_MIXOUTR_ENA_MASK                 0x0010  /* MIXOUTR_ENA */
+#define WM8994_MIXOUTR_ENA_SHIFT                     4  /* MIXOUTR_ENA */
+#define WM8994_MIXOUTR_ENA_WIDTH                     1  /* MIXOUTR_ENA */
+
+/*
+ * R4 (0x04) - Power Management (4)
+ */
+#define WM8994_AIF2ADCL_ENA                     0x2000  /* AIF2ADCL_ENA */
+#define WM8994_AIF2ADCL_ENA_MASK                0x2000  /* AIF2ADCL_ENA */
+#define WM8994_AIF2ADCL_ENA_SHIFT                   13  /* AIF2ADCL_ENA */
+#define WM8994_AIF2ADCL_ENA_WIDTH                    1  /* AIF2ADCL_ENA */
+#define WM8994_AIF2ADCR_ENA                     0x1000  /* AIF2ADCR_ENA */
+#define WM8994_AIF2ADCR_ENA_MASK                0x1000  /* AIF2ADCR_ENA */
+#define WM8994_AIF2ADCR_ENA_SHIFT                   12  /* AIF2ADCR_ENA */
+#define WM8994_AIF2ADCR_ENA_WIDTH                    1  /* AIF2ADCR_ENA */
+#define WM8994_AIF1ADC2L_ENA                    0x0800  /* AIF1ADC2L_ENA */
+#define WM8994_AIF1ADC2L_ENA_MASK               0x0800  /* AIF1ADC2L_ENA */
+#define WM8994_AIF1ADC2L_ENA_SHIFT                  11  /* AIF1ADC2L_ENA */
+#define WM8994_AIF1ADC2L_ENA_WIDTH                   1  /* AIF1ADC2L_ENA */
+#define WM8994_AIF1ADC2R_ENA                    0x0400  /* AIF1ADC2R_ENA */
+#define WM8994_AIF1ADC2R_ENA_MASK               0x0400  /* AIF1ADC2R_ENA */
+#define WM8994_AIF1ADC2R_ENA_SHIFT                  10  /* AIF1ADC2R_ENA */
+#define WM8994_AIF1ADC2R_ENA_WIDTH                   1  /* AIF1ADC2R_ENA */
+#define WM8994_AIF1ADC1L_ENA                    0x0200  /* AIF1ADC1L_ENA */
+#define WM8994_AIF1ADC1L_ENA_MASK               0x0200  /* AIF1ADC1L_ENA */
+#define WM8994_AIF1ADC1L_ENA_SHIFT                   9  /* AIF1ADC1L_ENA */
+#define WM8994_AIF1ADC1L_ENA_WIDTH                   1  /* AIF1ADC1L_ENA */
+#define WM8994_AIF1ADC1R_ENA                    0x0100  /* AIF1ADC1R_ENA */
+#define WM8994_AIF1ADC1R_ENA_MASK               0x0100  /* AIF1ADC1R_ENA */
+#define WM8994_AIF1ADC1R_ENA_SHIFT                   8  /* AIF1ADC1R_ENA */
+#define WM8994_AIF1ADC1R_ENA_WIDTH                   1  /* AIF1ADC1R_ENA */
+#define WM8994_DMIC2L_ENA                       0x0020  /* DMIC2L_ENA */
+#define WM8994_DMIC2L_ENA_MASK                  0x0020  /* DMIC2L_ENA */
+#define WM8994_DMIC2L_ENA_SHIFT                      5  /* DMIC2L_ENA */
+#define WM8994_DMIC2L_ENA_WIDTH                      1  /* DMIC2L_ENA */
+#define WM8994_DMIC2R_ENA                       0x0010  /* DMIC2R_ENA */
+#define WM8994_DMIC2R_ENA_MASK                  0x0010  /* DMIC2R_ENA */
+#define WM8994_DMIC2R_ENA_SHIFT                      4  /* DMIC2R_ENA */
+#define WM8994_DMIC2R_ENA_WIDTH                      1  /* DMIC2R_ENA */
+#define WM8994_DMIC1L_ENA                       0x0008  /* DMIC1L_ENA */
+#define WM8994_DMIC1L_ENA_MASK                  0x0008  /* DMIC1L_ENA */
+#define WM8994_DMIC1L_ENA_SHIFT                      3  /* DMIC1L_ENA */
+#define WM8994_DMIC1L_ENA_WIDTH                      1  /* DMIC1L_ENA */
+#define WM8994_DMIC1R_ENA                       0x0004  /* DMIC1R_ENA */
+#define WM8994_DMIC1R_ENA_MASK                  0x0004  /* DMIC1R_ENA */
+#define WM8994_DMIC1R_ENA_SHIFT                      2  /* DMIC1R_ENA */
+#define WM8994_DMIC1R_ENA_WIDTH                      1  /* DMIC1R_ENA */
+#define WM8994_ADCL_ENA                         0x0002  /* ADCL_ENA */
+#define WM8994_ADCL_ENA_MASK                    0x0002  /* ADCL_ENA */
+#define WM8994_ADCL_ENA_SHIFT                        1  /* ADCL_ENA */
+#define WM8994_ADCL_ENA_WIDTH                        1  /* ADCL_ENA */
+#define WM8994_ADCR_ENA                         0x0001  /* ADCR_ENA */
+#define WM8994_ADCR_ENA_MASK                    0x0001  /* ADCR_ENA */
+#define WM8994_ADCR_ENA_SHIFT                        0  /* ADCR_ENA */
+#define WM8994_ADCR_ENA_WIDTH                        1  /* ADCR_ENA */
+
+/*
+ * R5 (0x05) - Power Management (5)
+ */
+#define WM8994_AIF2DACL_ENA                     0x2000  /* AIF2DACL_ENA */
+#define WM8994_AIF2DACL_ENA_MASK                0x2000  /* AIF2DACL_ENA */
+#define WM8994_AIF2DACL_ENA_SHIFT                   13  /* AIF2DACL_ENA */
+#define WM8994_AIF2DACL_ENA_WIDTH                    1  /* AIF2DACL_ENA */
+#define WM8994_AIF2DACR_ENA                     0x1000  /* AIF2DACR_ENA */
+#define WM8994_AIF2DACR_ENA_MASK                0x1000  /* AIF2DACR_ENA */
+#define WM8994_AIF2DACR_ENA_SHIFT                   12  /* AIF2DACR_ENA */
+#define WM8994_AIF2DACR_ENA_WIDTH                    1  /* AIF2DACR_ENA */
+#define WM8994_AIF1DAC2L_ENA                    0x0800  /* AIF1DAC2L_ENA */
+#define WM8994_AIF1DAC2L_ENA_MASK               0x0800  /* AIF1DAC2L_ENA */
+#define WM8994_AIF1DAC2L_ENA_SHIFT                  11  /* AIF1DAC2L_ENA */
+#define WM8994_AIF1DAC2L_ENA_WIDTH                   1  /* AIF1DAC2L_ENA */
+#define WM8994_AIF1DAC2R_ENA                    0x0400  /* AIF1DAC2R_ENA */
+#define WM8994_AIF1DAC2R_ENA_MASK               0x0400  /* AIF1DAC2R_ENA */
+#define WM8994_AIF1DAC2R_ENA_SHIFT                  10  /* AIF1DAC2R_ENA */
+#define WM8994_AIF1DAC2R_ENA_WIDTH                   1  /* AIF1DAC2R_ENA */
+#define WM8994_AIF1DAC1L_ENA                    0x0200  /* AIF1DAC1L_ENA */
+#define WM8994_AIF1DAC1L_ENA_MASK               0x0200  /* AIF1DAC1L_ENA */
+#define WM8994_AIF1DAC1L_ENA_SHIFT                   9  /* AIF1DAC1L_ENA */
+#define WM8994_AIF1DAC1L_ENA_WIDTH                   1  /* AIF1DAC1L_ENA */
+#define WM8994_AIF1DAC1R_ENA                    0x0100  /* AIF1DAC1R_ENA */
+#define WM8994_AIF1DAC1R_ENA_MASK               0x0100  /* AIF1DAC1R_ENA */
+#define WM8994_AIF1DAC1R_ENA_SHIFT                   8  /* AIF1DAC1R_ENA */
+#define WM8994_AIF1DAC1R_ENA_WIDTH                   1  /* AIF1DAC1R_ENA */
+#define WM8994_DAC2L_ENA                        0x0008  /* DAC2L_ENA */
+#define WM8994_DAC2L_ENA_MASK                   0x0008  /* DAC2L_ENA */
+#define WM8994_DAC2L_ENA_SHIFT                       3  /* DAC2L_ENA */
+#define WM8994_DAC2L_ENA_WIDTH                       1  /* DAC2L_ENA */
+#define WM8994_DAC2R_ENA                        0x0004  /* DAC2R_ENA */
+#define WM8994_DAC2R_ENA_MASK                   0x0004  /* DAC2R_ENA */
+#define WM8994_DAC2R_ENA_SHIFT                       2  /* DAC2R_ENA */
+#define WM8994_DAC2R_ENA_WIDTH                       1  /* DAC2R_ENA */
+#define WM8994_DAC1L_ENA                        0x0002  /* DAC1L_ENA */
+#define WM8994_DAC1L_ENA_MASK                   0x0002  /* DAC1L_ENA */
+#define WM8994_DAC1L_ENA_SHIFT                       1  /* DAC1L_ENA */
+#define WM8994_DAC1L_ENA_WIDTH                       1  /* DAC1L_ENA */
+#define WM8994_DAC1R_ENA                        0x0001  /* DAC1R_ENA */
+#define WM8994_DAC1R_ENA_MASK                   0x0001  /* DAC1R_ENA */
+#define WM8994_DAC1R_ENA_SHIFT                       0  /* DAC1R_ENA */
+#define WM8994_DAC1R_ENA_WIDTH                       1  /* DAC1R_ENA */
+
+/*
+ * R6 (0x06) - Power Management (6)
+ */
+#define WM8994_AIF3_TRI                         0x0020  /* AIF3_TRI */
+#define WM8994_AIF3_TRI_MASK                    0x0020  /* AIF3_TRI */
+#define WM8994_AIF3_TRI_SHIFT                        5  /* AIF3_TRI */
+#define WM8994_AIF3_TRI_WIDTH                        1  /* AIF3_TRI */
+#define WM8994_AIF3_ADCDAT_SRC_MASK             0x0018  /* AIF3_ADCDAT_SRC - [4:3] */
+#define WM8994_AIF3_ADCDAT_SRC_SHIFT                 3  /* AIF3_ADCDAT_SRC - [4:3] */
+#define WM8994_AIF3_ADCDAT_SRC_WIDTH                 2  /* AIF3_ADCDAT_SRC - [4:3] */
+#define WM8994_AIF2_ADCDAT_SRC                  0x0004  /* AIF2_ADCDAT_SRC */
+#define WM8994_AIF2_ADCDAT_SRC_MASK             0x0004  /* AIF2_ADCDAT_SRC */
+#define WM8994_AIF2_ADCDAT_SRC_SHIFT                 2  /* AIF2_ADCDAT_SRC */
+#define WM8994_AIF2_ADCDAT_SRC_WIDTH                 1  /* AIF2_ADCDAT_SRC */
+#define WM8994_AIF2_DACDAT_SRC                  0x0002  /* AIF2_DACDAT_SRC */
+#define WM8994_AIF2_DACDAT_SRC_MASK             0x0002  /* AIF2_DACDAT_SRC */
+#define WM8994_AIF2_DACDAT_SRC_SHIFT                 1  /* AIF2_DACDAT_SRC */
+#define WM8994_AIF2_DACDAT_SRC_WIDTH                 1  /* AIF2_DACDAT_SRC */
+#define WM8994_AIF1_DACDAT_SRC                  0x0001  /* AIF1_DACDAT_SRC */
+#define WM8994_AIF1_DACDAT_SRC_MASK             0x0001  /* AIF1_DACDAT_SRC */
+#define WM8994_AIF1_DACDAT_SRC_SHIFT                 0  /* AIF1_DACDAT_SRC */
+#define WM8994_AIF1_DACDAT_SRC_WIDTH                 1  /* AIF1_DACDAT_SRC */
+
+/*
+ * R21 (0x15) - Input Mixer (1)
+ */
+#define WM8994_IN1RP_MIXINR_BOOST               0x0100  /* IN1RP_MIXINR_BOOST */
+#define WM8994_IN1RP_MIXINR_BOOST_MASK          0x0100  /* IN1RP_MIXINR_BOOST */
+#define WM8994_IN1RP_MIXINR_BOOST_SHIFT              8  /* IN1RP_MIXINR_BOOST */
+#define WM8994_IN1RP_MIXINR_BOOST_WIDTH              1  /* IN1RP_MIXINR_BOOST */
+#define WM8994_IN1LP_MIXINL_BOOST               0x0080  /* IN1LP_MIXINL_BOOST */
+#define WM8994_IN1LP_MIXINL_BOOST_MASK          0x0080  /* IN1LP_MIXINL_BOOST */
+#define WM8994_IN1LP_MIXINL_BOOST_SHIFT              7  /* IN1LP_MIXINL_BOOST */
+#define WM8994_IN1LP_MIXINL_BOOST_WIDTH              1  /* IN1LP_MIXINL_BOOST */
+#define WM8994_INPUTS_CLAMP                     0x0040  /* INPUTS_CLAMP */
+#define WM8994_INPUTS_CLAMP_MASK                0x0040  /* INPUTS_CLAMP */
+#define WM8994_INPUTS_CLAMP_SHIFT                    6  /* INPUTS_CLAMP */
+#define WM8994_INPUTS_CLAMP_WIDTH                    1  /* INPUTS_CLAMP */
+
+/*
+ * R24 (0x18) - Left Line Input 1&2 Volume
+ */
+#define WM8994_IN1_VU                           0x0100  /* IN1_VU */
+#define WM8994_IN1_VU_MASK                      0x0100  /* IN1_VU */
+#define WM8994_IN1_VU_SHIFT                          8  /* IN1_VU */
+#define WM8994_IN1_VU_WIDTH                          1  /* IN1_VU */
+#define WM8994_IN1L_MUTE                        0x0080  /* IN1L_MUTE */
+#define WM8994_IN1L_MUTE_MASK                   0x0080  /* IN1L_MUTE */
+#define WM8994_IN1L_MUTE_SHIFT                       7  /* IN1L_MUTE */
+#define WM8994_IN1L_MUTE_WIDTH                       1  /* IN1L_MUTE */
+#define WM8994_IN1L_ZC                          0x0040  /* IN1L_ZC */
+#define WM8994_IN1L_ZC_MASK                     0x0040  /* IN1L_ZC */
+#define WM8994_IN1L_ZC_SHIFT                         6  /* IN1L_ZC */
+#define WM8994_IN1L_ZC_WIDTH                         1  /* IN1L_ZC */
+#define WM8994_IN1L_VOL_MASK                    0x001F  /* IN1L_VOL - [4:0] */
+#define WM8994_IN1L_VOL_SHIFT                        0  /* IN1L_VOL - [4:0] */
+#define WM8994_IN1L_VOL_WIDTH                        5  /* IN1L_VOL - [4:0] */
+
+/*
+ * R25 (0x19) - Left Line Input 3&4 Volume
+ */
+#define WM8994_IN2_VU                           0x0100  /* IN2_VU */
+#define WM8994_IN2_VU_MASK                      0x0100  /* IN2_VU */
+#define WM8994_IN2_VU_SHIFT                          8  /* IN2_VU */
+#define WM8994_IN2_VU_WIDTH                          1  /* IN2_VU */
+#define WM8994_IN2L_MUTE                        0x0080  /* IN2L_MUTE */
+#define WM8994_IN2L_MUTE_MASK                   0x0080  /* IN2L_MUTE */
+#define WM8994_IN2L_MUTE_SHIFT                       7  /* IN2L_MUTE */
+#define WM8994_IN2L_MUTE_WIDTH                       1  /* IN2L_MUTE */
+#define WM8994_IN2L_ZC                          0x0040  /* IN2L_ZC */
+#define WM8994_IN2L_ZC_MASK                     0x0040  /* IN2L_ZC */
+#define WM8994_IN2L_ZC_SHIFT                         6  /* IN2L_ZC */
+#define WM8994_IN2L_ZC_WIDTH                         1  /* IN2L_ZC */
+#define WM8994_IN2L_VOL_MASK                    0x001F  /* IN2L_VOL - [4:0] */
+#define WM8994_IN2L_VOL_SHIFT                        0  /* IN2L_VOL - [4:0] */
+#define WM8994_IN2L_VOL_WIDTH                        5  /* IN2L_VOL - [4:0] */
+
+/*
+ * R26 (0x1A) - Right Line Input 1&2 Volume
+ */
+#define WM8994_IN1_VU                           0x0100  /* IN1_VU */
+#define WM8994_IN1_VU_MASK                      0x0100  /* IN1_VU */
+#define WM8994_IN1_VU_SHIFT                          8  /* IN1_VU */
+#define WM8994_IN1_VU_WIDTH                          1  /* IN1_VU */
+#define WM8994_IN1R_MUTE                        0x0080  /* IN1R_MUTE */
+#define WM8994_IN1R_MUTE_MASK                   0x0080  /* IN1R_MUTE */
+#define WM8994_IN1R_MUTE_SHIFT                       7  /* IN1R_MUTE */
+#define WM8994_IN1R_MUTE_WIDTH                       1  /* IN1R_MUTE */
+#define WM8994_IN1R_ZC                          0x0040  /* IN1R_ZC */
+#define WM8994_IN1R_ZC_MASK                     0x0040  /* IN1R_ZC */
+#define WM8994_IN1R_ZC_SHIFT                         6  /* IN1R_ZC */
+#define WM8994_IN1R_ZC_WIDTH                         1  /* IN1R_ZC */
+#define WM8994_IN1R_VOL_MASK                    0x001F  /* IN1R_VOL - [4:0] */
+#define WM8994_IN1R_VOL_SHIFT                        0  /* IN1R_VOL - [4:0] */
+#define WM8994_IN1R_VOL_WIDTH                        5  /* IN1R_VOL - [4:0] */
+
+/*
+ * R27 (0x1B) - Right Line Input 3&4 Volume
+ */
+#define WM8994_IN2_VU                           0x0100  /* IN2_VU */
+#define WM8994_IN2_VU_MASK                      0x0100  /* IN2_VU */
+#define WM8994_IN2_VU_SHIFT                          8  /* IN2_VU */
+#define WM8994_IN2_VU_WIDTH                          1  /* IN2_VU */
+#define WM8994_IN2R_MUTE                        0x0080  /* IN2R_MUTE */
+#define WM8994_IN2R_MUTE_MASK                   0x0080  /* IN2R_MUTE */
+#define WM8994_IN2R_MUTE_SHIFT                       7  /* IN2R_MUTE */
+#define WM8994_IN2R_MUTE_WIDTH                       1  /* IN2R_MUTE */
+#define WM8994_IN2R_ZC                          0x0040  /* IN2R_ZC */
+#define WM8994_IN2R_ZC_MASK                     0x0040  /* IN2R_ZC */
+#define WM8994_IN2R_ZC_SHIFT                         6  /* IN2R_ZC */
+#define WM8994_IN2R_ZC_WIDTH                         1  /* IN2R_ZC */
+#define WM8994_IN2R_VOL_MASK                    0x001F  /* IN2R_VOL - [4:0] */
+#define WM8994_IN2R_VOL_SHIFT                        0  /* IN2R_VOL - [4:0] */
+#define WM8994_IN2R_VOL_WIDTH                        5  /* IN2R_VOL - [4:0] */
+
+/*
+ * R28 (0x1C) - Left Output Volume
+ */
+#define WM8994_HPOUT1_VU                        0x0100  /* HPOUT1_VU */
+#define WM8994_HPOUT1_VU_MASK                   0x0100  /* HPOUT1_VU */
+#define WM8994_HPOUT1_VU_SHIFT                       8  /* HPOUT1_VU */
+#define WM8994_HPOUT1_VU_WIDTH                       1  /* HPOUT1_VU */
+#define WM8994_HPOUT1L_ZC                       0x0080  /* HPOUT1L_ZC */
+#define WM8994_HPOUT1L_ZC_MASK                  0x0080  /* HPOUT1L_ZC */
+#define WM8994_HPOUT1L_ZC_SHIFT                      7  /* HPOUT1L_ZC */
+#define WM8994_HPOUT1L_ZC_WIDTH                      1  /* HPOUT1L_ZC */
+#define WM8994_HPOUT1L_MUTE_N                   0x0040  /* HPOUT1L_MUTE_N */
+#define WM8994_HPOUT1L_MUTE_N_MASK              0x0040  /* HPOUT1L_MUTE_N */
+#define WM8994_HPOUT1L_MUTE_N_SHIFT                  6  /* HPOUT1L_MUTE_N */
+#define WM8994_HPOUT1L_MUTE_N_WIDTH                  1  /* HPOUT1L_MUTE_N */
+#define WM8994_HPOUT1L_VOL_MASK                 0x003F  /* HPOUT1L_VOL - [5:0] */
+#define WM8994_HPOUT1L_VOL_SHIFT                     0  /* HPOUT1L_VOL - [5:0] */
+#define WM8994_HPOUT1L_VOL_WIDTH                     6  /* HPOUT1L_VOL - [5:0] */
+
+/*
+ * R29 (0x1D) - Right Output Volume
+ */
+#define WM8994_HPOUT1_VU                        0x0100  /* HPOUT1_VU */
+#define WM8994_HPOUT1_VU_MASK                   0x0100  /* HPOUT1_VU */
+#define WM8994_HPOUT1_VU_SHIFT                       8  /* HPOUT1_VU */
+#define WM8994_HPOUT1_VU_WIDTH                       1  /* HPOUT1_VU */
+#define WM8994_HPOUT1R_ZC                       0x0080  /* HPOUT1R_ZC */
+#define WM8994_HPOUT1R_ZC_MASK                  0x0080  /* HPOUT1R_ZC */
+#define WM8994_HPOUT1R_ZC_SHIFT                      7  /* HPOUT1R_ZC */
+#define WM8994_HPOUT1R_ZC_WIDTH                      1  /* HPOUT1R_ZC */
+#define WM8994_HPOUT1R_MUTE_N                   0x0040  /* HPOUT1R_MUTE_N */
+#define WM8994_HPOUT1R_MUTE_N_MASK              0x0040  /* HPOUT1R_MUTE_N */
+#define WM8994_HPOUT1R_MUTE_N_SHIFT                  6  /* HPOUT1R_MUTE_N */
+#define WM8994_HPOUT1R_MUTE_N_WIDTH                  1  /* HPOUT1R_MUTE_N */
+#define WM8994_HPOUT1R_VOL_MASK                 0x003F  /* HPOUT1R_VOL - [5:0] */
+#define WM8994_HPOUT1R_VOL_SHIFT                     0  /* HPOUT1R_VOL - [5:0] */
+#define WM8994_HPOUT1R_VOL_WIDTH                     6  /* HPOUT1R_VOL - [5:0] */
+
+/*
+ * R30 (0x1E) - Line Outputs Volume
+ */
+#define WM8994_LINEOUT1N_MUTE                   0x0040  /* LINEOUT1N_MUTE */
+#define WM8994_LINEOUT1N_MUTE_MASK              0x0040  /* LINEOUT1N_MUTE */
+#define WM8994_LINEOUT1N_MUTE_SHIFT                  6  /* LINEOUT1N_MUTE */
+#define WM8994_LINEOUT1N_MUTE_WIDTH                  1  /* LINEOUT1N_MUTE */
+#define WM8994_LINEOUT1P_MUTE                   0x0020  /* LINEOUT1P_MUTE */
+#define WM8994_LINEOUT1P_MUTE_MASK              0x0020  /* LINEOUT1P_MUTE */
+#define WM8994_LINEOUT1P_MUTE_SHIFT                  5  /* LINEOUT1P_MUTE */
+#define WM8994_LINEOUT1P_MUTE_WIDTH                  1  /* LINEOUT1P_MUTE */
+#define WM8994_LINEOUT1_VOL                     0x0010  /* LINEOUT1_VOL */
+#define WM8994_LINEOUT1_VOL_MASK                0x0010  /* LINEOUT1_VOL */
+#define WM8994_LINEOUT1_VOL_SHIFT                    4  /* LINEOUT1_VOL */
+#define WM8994_LINEOUT1_VOL_WIDTH                    1  /* LINEOUT1_VOL */
+#define WM8994_LINEOUT2N_MUTE                   0x0004  /* LINEOUT2N_MUTE */
+#define WM8994_LINEOUT2N_MUTE_MASK              0x0004  /* LINEOUT2N_MUTE */
+#define WM8994_LINEOUT2N_MUTE_SHIFT                  2  /* LINEOUT2N_MUTE */
+#define WM8994_LINEOUT2N_MUTE_WIDTH                  1  /* LINEOUT2N_MUTE */
+#define WM8994_LINEOUT2P_MUTE                   0x0002  /* LINEOUT2P_MUTE */
+#define WM8994_LINEOUT2P_MUTE_MASK              0x0002  /* LINEOUT2P_MUTE */
+#define WM8994_LINEOUT2P_MUTE_SHIFT                  1  /* LINEOUT2P_MUTE */
+#define WM8994_LINEOUT2P_MUTE_WIDTH                  1  /* LINEOUT2P_MUTE */
+#define WM8994_LINEOUT2_VOL                     0x0001  /* LINEOUT2_VOL */
+#define WM8994_LINEOUT2_VOL_MASK                0x0001  /* LINEOUT2_VOL */
+#define WM8994_LINEOUT2_VOL_SHIFT                    0  /* LINEOUT2_VOL */
+#define WM8994_LINEOUT2_VOL_WIDTH                    1  /* LINEOUT2_VOL */
+
+/*
+ * R31 (0x1F) - HPOUT2 Volume
+ */
+#define WM8994_HPOUT2_MUTE                      0x0020  /* HPOUT2_MUTE */
+#define WM8994_HPOUT2_MUTE_MASK                 0x0020  /* HPOUT2_MUTE */
+#define WM8994_HPOUT2_MUTE_SHIFT                     5  /* HPOUT2_MUTE */
+#define WM8994_HPOUT2_MUTE_WIDTH                     1  /* HPOUT2_MUTE */
+#define WM8994_HPOUT2_VOL                       0x0010  /* HPOUT2_VOL */
+#define WM8994_HPOUT2_VOL_MASK                  0x0010  /* HPOUT2_VOL */
+#define WM8994_HPOUT2_VOL_SHIFT                      4  /* HPOUT2_VOL */
+#define WM8994_HPOUT2_VOL_WIDTH                      1  /* HPOUT2_VOL */
+
+/*
+ * R32 (0x20) - Left OPGA Volume
+ */
+#define WM8994_MIXOUT_VU                        0x0100  /* MIXOUT_VU */
+#define WM8994_MIXOUT_VU_MASK                   0x0100  /* MIXOUT_VU */
+#define WM8994_MIXOUT_VU_SHIFT                       8  /* MIXOUT_VU */
+#define WM8994_MIXOUT_VU_WIDTH                       1  /* MIXOUT_VU */
+#define WM8994_MIXOUTL_ZC                       0x0080  /* MIXOUTL_ZC */
+#define WM8994_MIXOUTL_ZC_MASK                  0x0080  /* MIXOUTL_ZC */
+#define WM8994_MIXOUTL_ZC_SHIFT                      7  /* MIXOUTL_ZC */
+#define WM8994_MIXOUTL_ZC_WIDTH                      1  /* MIXOUTL_ZC */
+#define WM8994_MIXOUTL_MUTE_N                   0x0040  /* MIXOUTL_MUTE_N */
+#define WM8994_MIXOUTL_MUTE_N_MASK              0x0040  /* MIXOUTL_MUTE_N */
+#define WM8994_MIXOUTL_MUTE_N_SHIFT                  6  /* MIXOUTL_MUTE_N */
+#define WM8994_MIXOUTL_MUTE_N_WIDTH                  1  /* MIXOUTL_MUTE_N */
+#define WM8994_MIXOUTL_VOL_MASK                 0x003F  /* MIXOUTL_VOL - [5:0] */
+#define WM8994_MIXOUTL_VOL_SHIFT                     0  /* MIXOUTL_VOL - [5:0] */
+#define WM8994_MIXOUTL_VOL_WIDTH                     6  /* MIXOUTL_VOL - [5:0] */
+
+/*
+ * R33 (0x21) - Right OPGA Volume
+ */
+#define WM8994_MIXOUT_VU                        0x0100  /* MIXOUT_VU */
+#define WM8994_MIXOUT_VU_MASK                   0x0100  /* MIXOUT_VU */
+#define WM8994_MIXOUT_VU_SHIFT                       8  /* MIXOUT_VU */
+#define WM8994_MIXOUT_VU_WIDTH                       1  /* MIXOUT_VU */
+#define WM8994_MIXOUTR_ZC                       0x0080  /* MIXOUTR_ZC */
+#define WM8994_MIXOUTR_ZC_MASK                  0x0080  /* MIXOUTR_ZC */
+#define WM8994_MIXOUTR_ZC_SHIFT                      7  /* MIXOUTR_ZC */
+#define WM8994_MIXOUTR_ZC_WIDTH                      1  /* MIXOUTR_ZC */
+#define WM8994_MIXOUTR_MUTE_N                   0x0040  /* MIXOUTR_MUTE_N */
+#define WM8994_MIXOUTR_MUTE_N_MASK              0x0040  /* MIXOUTR_MUTE_N */
+#define WM8994_MIXOUTR_MUTE_N_SHIFT                  6  /* MIXOUTR_MUTE_N */
+#define WM8994_MIXOUTR_MUTE_N_WIDTH                  1  /* MIXOUTR_MUTE_N */
+#define WM8994_MIXOUTR_VOL_MASK                 0x003F  /* MIXOUTR_VOL - [5:0] */
+#define WM8994_MIXOUTR_VOL_SHIFT                     0  /* MIXOUTR_VOL - [5:0] */
+#define WM8994_MIXOUTR_VOL_WIDTH                     6  /* MIXOUTR_VOL - [5:0] */
+
+/*
+ * R34 (0x22) - SPKMIXL Attenuation
+ */
+#define WM8994_DAC2L_SPKMIXL_VOL                0x0040  /* DAC2L_SPKMIXL_VOL */
+#define WM8994_DAC2L_SPKMIXL_VOL_MASK           0x0040  /* DAC2L_SPKMIXL_VOL */
+#define WM8994_DAC2L_SPKMIXL_VOL_SHIFT               6  /* DAC2L_SPKMIXL_VOL */
+#define WM8994_DAC2L_SPKMIXL_VOL_WIDTH               1  /* DAC2L_SPKMIXL_VOL */
+#define WM8994_MIXINL_SPKMIXL_VOL               0x0020  /* MIXINL_SPKMIXL_VOL */
+#define WM8994_MIXINL_SPKMIXL_VOL_MASK          0x0020  /* MIXINL_SPKMIXL_VOL */
+#define WM8994_MIXINL_SPKMIXL_VOL_SHIFT              5  /* MIXINL_SPKMIXL_VOL */
+#define WM8994_MIXINL_SPKMIXL_VOL_WIDTH              1  /* MIXINL_SPKMIXL_VOL */
+#define WM8994_IN1LP_SPKMIXL_VOL                0x0010  /* IN1LP_SPKMIXL_VOL */
+#define WM8994_IN1LP_SPKMIXL_VOL_MASK           0x0010  /* IN1LP_SPKMIXL_VOL */
+#define WM8994_IN1LP_SPKMIXL_VOL_SHIFT               4  /* IN1LP_SPKMIXL_VOL */
+#define WM8994_IN1LP_SPKMIXL_VOL_WIDTH               1  /* IN1LP_SPKMIXL_VOL */
+#define WM8994_MIXOUTL_SPKMIXL_VOL              0x0008  /* MIXOUTL_SPKMIXL_VOL */
+#define WM8994_MIXOUTL_SPKMIXL_VOL_MASK         0x0008  /* MIXOUTL_SPKMIXL_VOL */
+#define WM8994_MIXOUTL_SPKMIXL_VOL_SHIFT             3  /* MIXOUTL_SPKMIXL_VOL */
+#define WM8994_MIXOUTL_SPKMIXL_VOL_WIDTH             1  /* MIXOUTL_SPKMIXL_VOL */
+#define WM8994_DAC1L_SPKMIXL_VOL                0x0004  /* DAC1L_SPKMIXL_VOL */
+#define WM8994_DAC1L_SPKMIXL_VOL_MASK           0x0004  /* DAC1L_SPKMIXL_VOL */
+#define WM8994_DAC1L_SPKMIXL_VOL_SHIFT               2  /* DAC1L_SPKMIXL_VOL */
+#define WM8994_DAC1L_SPKMIXL_VOL_WIDTH               1  /* DAC1L_SPKMIXL_VOL */
+#define WM8994_SPKMIXL_VOL_MASK                 0x0003  /* SPKMIXL_VOL - [1:0] */
+#define WM8994_SPKMIXL_VOL_SHIFT                     0  /* SPKMIXL_VOL - [1:0] */
+#define WM8994_SPKMIXL_VOL_WIDTH                     2  /* SPKMIXL_VOL - [1:0] */
+
+/*
+ * R35 (0x23) - SPKMIXR Attenuation
+ */
+#define WM8994_SPKOUT_CLASSAB                   0x0100  /* SPKOUT_CLASSAB */
+#define WM8994_SPKOUT_CLASSAB_MASK              0x0100  /* SPKOUT_CLASSAB */
+#define WM8994_SPKOUT_CLASSAB_SHIFT                  8  /* SPKOUT_CLASSAB */
+#define WM8994_SPKOUT_CLASSAB_WIDTH                  1  /* SPKOUT_CLASSAB */
+#define WM8994_DAC2R_SPKMIXR_VOL                0x0040  /* DAC2R_SPKMIXR_VOL */
+#define WM8994_DAC2R_SPKMIXR_VOL_MASK           0x0040  /* DAC2R_SPKMIXR_VOL */
+#define WM8994_DAC2R_SPKMIXR_VOL_SHIFT               6  /* DAC2R_SPKMIXR_VOL */
+#define WM8994_DAC2R_SPKMIXR_VOL_WIDTH               1  /* DAC2R_SPKMIXR_VOL */
+#define WM8994_MIXINR_SPKMIXR_VOL               0x0020  /* MIXINR_SPKMIXR_VOL */
+#define WM8994_MIXINR_SPKMIXR_VOL_MASK          0x0020  /* MIXINR_SPKMIXR_VOL */
+#define WM8994_MIXINR_SPKMIXR_VOL_SHIFT              5  /* MIXINR_SPKMIXR_VOL */
+#define WM8994_MIXINR_SPKMIXR_VOL_WIDTH              1  /* MIXINR_SPKMIXR_VOL */
+#define WM8994_IN1RP_SPKMIXR_VOL                0x0010  /* IN1RP_SPKMIXR_VOL */
+#define WM8994_IN1RP_SPKMIXR_VOL_MASK           0x0010  /* IN1RP_SPKMIXR_VOL */
+#define WM8994_IN1RP_SPKMIXR_VOL_SHIFT               4  /* IN1RP_SPKMIXR_VOL */
+#define WM8994_IN1RP_SPKMIXR_VOL_WIDTH               1  /* IN1RP_SPKMIXR_VOL */
+#define WM8994_MIXOUTR_SPKMIXR_VOL              0x0008  /* MIXOUTR_SPKMIXR_VOL */
+#define WM8994_MIXOUTR_SPKMIXR_VOL_MASK         0x0008  /* MIXOUTR_SPKMIXR_VOL */
+#define WM8994_MIXOUTR_SPKMIXR_VOL_SHIFT             3  /* MIXOUTR_SPKMIXR_VOL */
+#define WM8994_MIXOUTR_SPKMIXR_VOL_WIDTH             1  /* MIXOUTR_SPKMIXR_VOL */
+#define WM8994_DAC1R_SPKMIXR_VOL                0x0004  /* DAC1R_SPKMIXR_VOL */
+#define WM8994_DAC1R_SPKMIXR_VOL_MASK           0x0004  /* DAC1R_SPKMIXR_VOL */
+#define WM8994_DAC1R_SPKMIXR_VOL_SHIFT               2  /* DAC1R_SPKMIXR_VOL */
+#define WM8994_DAC1R_SPKMIXR_VOL_WIDTH               1  /* DAC1R_SPKMIXR_VOL */
+#define WM8994_SPKMIXR_VOL_MASK                 0x0003  /* SPKMIXR_VOL - [1:0] */
+#define WM8994_SPKMIXR_VOL_SHIFT                     0  /* SPKMIXR_VOL - [1:0] */
+#define WM8994_SPKMIXR_VOL_WIDTH                     2  /* SPKMIXR_VOL - [1:0] */
+
+/*
+ * R36 (0x24) - SPKOUT Mixers
+ */
+#define WM8994_IN2LRP_TO_SPKOUTL                0x0020  /* IN2LRP_TO_SPKOUTL */
+#define WM8994_IN2LRP_TO_SPKOUTL_MASK           0x0020  /* IN2LRP_TO_SPKOUTL */
+#define WM8994_IN2LRP_TO_SPKOUTL_SHIFT               5  /* IN2LRP_TO_SPKOUTL */
+#define WM8994_IN2LRP_TO_SPKOUTL_WIDTH               1  /* IN2LRP_TO_SPKOUTL */
+#define WM8994_SPKMIXL_TO_SPKOUTL               0x0010  /* SPKMIXL_TO_SPKOUTL */
+#define WM8994_SPKMIXL_TO_SPKOUTL_MASK          0x0010  /* SPKMIXL_TO_SPKOUTL */
+#define WM8994_SPKMIXL_TO_SPKOUTL_SHIFT              4  /* SPKMIXL_TO_SPKOUTL */
+#define WM8994_SPKMIXL_TO_SPKOUTL_WIDTH              1  /* SPKMIXL_TO_SPKOUTL */
+#define WM8994_SPKMIXR_TO_SPKOUTL               0x0008  /* SPKMIXR_TO_SPKOUTL */
+#define WM8994_SPKMIXR_TO_SPKOUTL_MASK          0x0008  /* SPKMIXR_TO_SPKOUTL */
+#define WM8994_SPKMIXR_TO_SPKOUTL_SHIFT              3  /* SPKMIXR_TO_SPKOUTL */
+#define WM8994_SPKMIXR_TO_SPKOUTL_WIDTH              1  /* SPKMIXR_TO_SPKOUTL */
+#define WM8994_IN2LRP_TO_SPKOUTR                0x0004  /* IN2LRP_TO_SPKOUTR */
+#define WM8994_IN2LRP_TO_SPKOUTR_MASK           0x0004  /* IN2LRP_TO_SPKOUTR */
+#define WM8994_IN2LRP_TO_SPKOUTR_SHIFT               2  /* IN2LRP_TO_SPKOUTR */
+#define WM8994_IN2LRP_TO_SPKOUTR_WIDTH               1  /* IN2LRP_TO_SPKOUTR */
+#define WM8994_SPKMIXL_TO_SPKOUTR               0x0002  /* SPKMIXL_TO_SPKOUTR */
+#define WM8994_SPKMIXL_TO_SPKOUTR_MASK          0x0002  /* SPKMIXL_TO_SPKOUTR */
+#define WM8994_SPKMIXL_TO_SPKOUTR_SHIFT              1  /* SPKMIXL_TO_SPKOUTR */
+#define WM8994_SPKMIXL_TO_SPKOUTR_WIDTH              1  /* SPKMIXL_TO_SPKOUTR */
+#define WM8994_SPKMIXR_TO_SPKOUTR               0x0001  /* SPKMIXR_TO_SPKOUTR */
+#define WM8994_SPKMIXR_TO_SPKOUTR_MASK          0x0001  /* SPKMIXR_TO_SPKOUTR */
+#define WM8994_SPKMIXR_TO_SPKOUTR_SHIFT              0  /* SPKMIXR_TO_SPKOUTR */
+#define WM8994_SPKMIXR_TO_SPKOUTR_WIDTH              1  /* SPKMIXR_TO_SPKOUTR */
+
+/*
+ * R37 (0x25) - ClassD
+ */
+#define WM8994_SPKOUTL_BOOST_MASK               0x0038  /* SPKOUTL_BOOST - [5:3] */
+#define WM8994_SPKOUTL_BOOST_SHIFT                   3  /* SPKOUTL_BOOST - [5:3] */
+#define WM8994_SPKOUTL_BOOST_WIDTH                   3  /* SPKOUTL_BOOST - [5:3] */
+#define WM8994_SPKOUTR_BOOST_MASK               0x0007  /* SPKOUTR_BOOST - [2:0] */
+#define WM8994_SPKOUTR_BOOST_SHIFT                   0  /* SPKOUTR_BOOST - [2:0] */
+#define WM8994_SPKOUTR_BOOST_WIDTH                   3  /* SPKOUTR_BOOST - [2:0] */
+
+/*
+ * R38 (0x26) - Speaker Volume Left
+ */
+#define WM8994_SPKOUT_VU                        0x0100  /* SPKOUT_VU */
+#define WM8994_SPKOUT_VU_MASK                   0x0100  /* SPKOUT_VU */
+#define WM8994_SPKOUT_VU_SHIFT                       8  /* SPKOUT_VU */
+#define WM8994_SPKOUT_VU_WIDTH                       1  /* SPKOUT_VU */
+#define WM8994_SPKOUTL_ZC                       0x0080  /* SPKOUTL_ZC */
+#define WM8994_SPKOUTL_ZC_MASK                  0x0080  /* SPKOUTL_ZC */
+#define WM8994_SPKOUTL_ZC_SHIFT                      7  /* SPKOUTL_ZC */
+#define WM8994_SPKOUTL_ZC_WIDTH                      1  /* SPKOUTL_ZC */
+#define WM8994_SPKOUTL_MUTE_N                   0x0040  /* SPKOUTL_MUTE_N */
+#define WM8994_SPKOUTL_MUTE_N_MASK              0x0040  /* SPKOUTL_MUTE_N */
+#define WM8994_SPKOUTL_MUTE_N_SHIFT                  6  /* SPKOUTL_MUTE_N */
+#define WM8994_SPKOUTL_MUTE_N_WIDTH                  1  /* SPKOUTL_MUTE_N */
+#define WM8994_SPKOUTL_VOL_MASK                 0x003F  /* SPKOUTL_VOL - [5:0] */
+#define WM8994_SPKOUTL_VOL_SHIFT                     0  /* SPKOUTL_VOL - [5:0] */
+#define WM8994_SPKOUTL_VOL_WIDTH                     6  /* SPKOUTL_VOL - [5:0] */
+
+/*
+ * R39 (0x27) - Speaker Volume Right
+ */
+#define WM8994_SPKOUT_VU                        0x0100  /* SPKOUT_VU */
+#define WM8994_SPKOUT_VU_MASK                   0x0100  /* SPKOUT_VU */
+#define WM8994_SPKOUT_VU_SHIFT                       8  /* SPKOUT_VU */
+#define WM8994_SPKOUT_VU_WIDTH                       1  /* SPKOUT_VU */
+#define WM8994_SPKOUTR_ZC                       0x0080  /* SPKOUTR_ZC */
+#define WM8994_SPKOUTR_ZC_MASK                  0x0080  /* SPKOUTR_ZC */
+#define WM8994_SPKOUTR_ZC_SHIFT                      7  /* SPKOUTR_ZC */
+#define WM8994_SPKOUTR_ZC_WIDTH                      1  /* SPKOUTR_ZC */
+#define WM8994_SPKOUTR_MUTE_N                   0x0040  /* SPKOUTR_MUTE_N */
+#define WM8994_SPKOUTR_MUTE_N_MASK              0x0040  /* SPKOUTR_MUTE_N */
+#define WM8994_SPKOUTR_MUTE_N_SHIFT                  6  /* SPKOUTR_MUTE_N */
+#define WM8994_SPKOUTR_MUTE_N_WIDTH                  1  /* SPKOUTR_MUTE_N */
+#define WM8994_SPKOUTR_VOL_MASK                 0x003F  /* SPKOUTR_VOL - [5:0] */
+#define WM8994_SPKOUTR_VOL_SHIFT                     0  /* SPKOUTR_VOL - [5:0] */
+#define WM8994_SPKOUTR_VOL_WIDTH                     6  /* SPKOUTR_VOL - [5:0] */
+
+/*
+ * R40 (0x28) - Input Mixer (2)
+ */
+#define WM8994_IN2LP_TO_IN2L                    0x0080  /* IN2LP_TO_IN2L */
+#define WM8994_IN2LP_TO_IN2L_MASK               0x0080  /* IN2LP_TO_IN2L */
+#define WM8994_IN2LP_TO_IN2L_SHIFT                   7  /* IN2LP_TO_IN2L */
+#define WM8994_IN2LP_TO_IN2L_WIDTH                   1  /* IN2LP_TO_IN2L */
+#define WM8994_IN2LN_TO_IN2L                    0x0040  /* IN2LN_TO_IN2L */
+#define WM8994_IN2LN_TO_IN2L_MASK               0x0040  /* IN2LN_TO_IN2L */
+#define WM8994_IN2LN_TO_IN2L_SHIFT                   6  /* IN2LN_TO_IN2L */
+#define WM8994_IN2LN_TO_IN2L_WIDTH                   1  /* IN2LN_TO_IN2L */
+#define WM8994_IN1LP_TO_IN1L                    0x0020  /* IN1LP_TO_IN1L */
+#define WM8994_IN1LP_TO_IN1L_MASK               0x0020  /* IN1LP_TO_IN1L */
+#define WM8994_IN1LP_TO_IN1L_SHIFT                   5  /* IN1LP_TO_IN1L */
+#define WM8994_IN1LP_TO_IN1L_WIDTH                   1  /* IN1LP_TO_IN1L */
+#define WM8994_IN1LN_TO_IN1L                    0x0010  /* IN1LN_TO_IN1L */
+#define WM8994_IN1LN_TO_IN1L_MASK               0x0010  /* IN1LN_TO_IN1L */
+#define WM8994_IN1LN_TO_IN1L_SHIFT                   4  /* IN1LN_TO_IN1L */
+#define WM8994_IN1LN_TO_IN1L_WIDTH                   1  /* IN1LN_TO_IN1L */
+#define WM8994_IN2RP_TO_IN2R                    0x0008  /* IN2RP_TO_IN2R */
+#define WM8994_IN2RP_TO_IN2R_MASK               0x0008  /* IN2RP_TO_IN2R */
+#define WM8994_IN2RP_TO_IN2R_SHIFT                   3  /* IN2RP_TO_IN2R */
+#define WM8994_IN2RP_TO_IN2R_WIDTH                   1  /* IN2RP_TO_IN2R */
+#define WM8994_IN2RN_TO_IN2R                    0x0004  /* IN2RN_TO_IN2R */
+#define WM8994_IN2RN_TO_IN2R_MASK               0x0004  /* IN2RN_TO_IN2R */
+#define WM8994_IN2RN_TO_IN2R_SHIFT                   2  /* IN2RN_TO_IN2R */
+#define WM8994_IN2RN_TO_IN2R_WIDTH                   1  /* IN2RN_TO_IN2R */
+#define WM8994_IN1RP_TO_IN1R                    0x0002  /* IN1RP_TO_IN1R */
+#define WM8994_IN1RP_TO_IN1R_MASK               0x0002  /* IN1RP_TO_IN1R */
+#define WM8994_IN1RP_TO_IN1R_SHIFT                   1  /* IN1RP_TO_IN1R */
+#define WM8994_IN1RP_TO_IN1R_WIDTH                   1  /* IN1RP_TO_IN1R */
+#define WM8994_IN1RN_TO_IN1R                    0x0001  /* IN1RN_TO_IN1R */
+#define WM8994_IN1RN_TO_IN1R_MASK               0x0001  /* IN1RN_TO_IN1R */
+#define WM8994_IN1RN_TO_IN1R_SHIFT                   0  /* IN1RN_TO_IN1R */
+#define WM8994_IN1RN_TO_IN1R_WIDTH                   1  /* IN1RN_TO_IN1R */
+
+/*
+ * R41 (0x29) - Input Mixer (3)
+ */
+#define WM8994_IN2L_TO_MIXINL                   0x0100  /* IN2L_TO_MIXINL */
+#define WM8994_IN2L_TO_MIXINL_MASK              0x0100  /* IN2L_TO_MIXINL */
+#define WM8994_IN2L_TO_MIXINL_SHIFT                  8  /* IN2L_TO_MIXINL */
+#define WM8994_IN2L_TO_MIXINL_WIDTH                  1  /* IN2L_TO_MIXINL */
+#define WM8994_IN2L_MIXINL_VOL                  0x0080  /* IN2L_MIXINL_VOL */
+#define WM8994_IN2L_MIXINL_VOL_MASK             0x0080  /* IN2L_MIXINL_VOL */
+#define WM8994_IN2L_MIXINL_VOL_SHIFT                 7  /* IN2L_MIXINL_VOL */
+#define WM8994_IN2L_MIXINL_VOL_WIDTH                 1  /* IN2L_MIXINL_VOL */
+#define WM8994_IN1L_TO_MIXINL                   0x0020  /* IN1L_TO_MIXINL */
+#define WM8994_IN1L_TO_MIXINL_MASK              0x0020  /* IN1L_TO_MIXINL */
+#define WM8994_IN1L_TO_MIXINL_SHIFT                  5  /* IN1L_TO_MIXINL */
+#define WM8994_IN1L_TO_MIXINL_WIDTH                  1  /* IN1L_TO_MIXINL */
+#define WM8994_IN1L_MIXINL_VOL                  0x0010  /* IN1L_MIXINL_VOL */
+#define WM8994_IN1L_MIXINL_VOL_MASK             0x0010  /* IN1L_MIXINL_VOL */
+#define WM8994_IN1L_MIXINL_VOL_SHIFT                 4  /* IN1L_MIXINL_VOL */
+#define WM8994_IN1L_MIXINL_VOL_WIDTH                 1  /* IN1L_MIXINL_VOL */
+#define WM8994_MIXOUTL_MIXINL_VOL_MASK          0x0007  /* MIXOUTL_MIXINL_VOL - [2:0] */
+#define WM8994_MIXOUTL_MIXINL_VOL_SHIFT              0  /* MIXOUTL_MIXINL_VOL - [2:0] */
+#define WM8994_MIXOUTL_MIXINL_VOL_WIDTH              3  /* MIXOUTL_MIXINL_VOL - [2:0] */
+
+/*
+ * R42 (0x2A) - Input Mixer (4)
+ */
+#define WM8994_IN2R_TO_MIXINR                   0x0100  /* IN2R_TO_MIXINR */
+#define WM8994_IN2R_TO_MIXINR_MASK              0x0100  /* IN2R_TO_MIXINR */
+#define WM8994_IN2R_TO_MIXINR_SHIFT                  8  /* IN2R_TO_MIXINR */
+#define WM8994_IN2R_TO_MIXINR_WIDTH                  1  /* IN2R_TO_MIXINR */
+#define WM8994_IN2R_MIXINR_VOL                  0x0080  /* IN2R_MIXINR_VOL */
+#define WM8994_IN2R_MIXINR_VOL_MASK             0x0080  /* IN2R_MIXINR_VOL */
+#define WM8994_IN2R_MIXINR_VOL_SHIFT                 7  /* IN2R_MIXINR_VOL */
+#define WM8994_IN2R_MIXINR_VOL_WIDTH                 1  /* IN2R_MIXINR_VOL */
+#define WM8994_IN1R_TO_MIXINR                   0x0020  /* IN1R_TO_MIXINR */
+#define WM8994_IN1R_TO_MIXINR_MASK              0x0020  /* IN1R_TO_MIXINR */
+#define WM8994_IN1R_TO_MIXINR_SHIFT                  5  /* IN1R_TO_MIXINR */
+#define WM8994_IN1R_TO_MIXINR_WIDTH                  1  /* IN1R_TO_MIXINR */
+#define WM8994_IN1R_MIXINR_VOL                  0x0010  /* IN1R_MIXINR_VOL */
+#define WM8994_IN1R_MIXINR_VOL_MASK             0x0010  /* IN1R_MIXINR_VOL */
+#define WM8994_IN1R_MIXINR_VOL_SHIFT                 4  /* IN1R_MIXINR_VOL */
+#define WM8994_IN1R_MIXINR_VOL_WIDTH                 1  /* IN1R_MIXINR_VOL */
+#define WM8994_MIXOUTR_MIXINR_VOL_MASK          0x0007  /* MIXOUTR_MIXINR_VOL - [2:0] */
+#define WM8994_MIXOUTR_MIXINR_VOL_SHIFT              0  /* MIXOUTR_MIXINR_VOL - [2:0] */
+#define WM8994_MIXOUTR_MIXINR_VOL_WIDTH              3  /* MIXOUTR_MIXINR_VOL - [2:0] */
+
+/*
+ * R43 (0x2B) - Input Mixer (5)
+ */
+#define WM8994_IN1LP_MIXINL_VOL_MASK            0x01C0  /* IN1LP_MIXINL_VOL - [8:6] */
+#define WM8994_IN1LP_MIXINL_VOL_SHIFT                6  /* IN1LP_MIXINL_VOL - [8:6] */
+#define WM8994_IN1LP_MIXINL_VOL_WIDTH                3  /* IN1LP_MIXINL_VOL - [8:6] */
+#define WM8994_IN2LRP_MIXINL_VOL_MASK           0x0007  /* IN2LRP_MIXINL_VOL - [2:0] */
+#define WM8994_IN2LRP_MIXINL_VOL_SHIFT               0  /* IN2LRP_MIXINL_VOL - [2:0] */
+#define WM8994_IN2LRP_MIXINL_VOL_WIDTH               3  /* IN2LRP_MIXINL_VOL - [2:0] */
+
+/*
+ * R44 (0x2C) - Input Mixer (6)
+ */
+#define WM8994_IN1RP_MIXINR_VOL_MASK            0x01C0  /* IN1RP_MIXINR_VOL - [8:6] */
+#define WM8994_IN1RP_MIXINR_VOL_SHIFT                6  /* IN1RP_MIXINR_VOL - [8:6] */
+#define WM8994_IN1RP_MIXINR_VOL_WIDTH                3  /* IN1RP_MIXINR_VOL - [8:6] */
+#define WM8994_IN2LRP_MIXINR_VOL_MASK           0x0007  /* IN2LRP_MIXINR_VOL - [2:0] */
+#define WM8994_IN2LRP_MIXINR_VOL_SHIFT               0  /* IN2LRP_MIXINR_VOL - [2:0] */
+#define WM8994_IN2LRP_MIXINR_VOL_WIDTH               3  /* IN2LRP_MIXINR_VOL - [2:0] */
+
+/*
+ * R45 (0x2D) - Output Mixer (1)
+ */
+#define WM8994_DAC1L_TO_HPOUT1L                 0x0100  /* DAC1L_TO_HPOUT1L */
+#define WM8994_DAC1L_TO_HPOUT1L_MASK            0x0100  /* DAC1L_TO_HPOUT1L */
+#define WM8994_DAC1L_TO_HPOUT1L_SHIFT                8  /* DAC1L_TO_HPOUT1L */
+#define WM8994_DAC1L_TO_HPOUT1L_WIDTH                1  /* DAC1L_TO_HPOUT1L */
+#define WM8994_MIXINR_TO_MIXOUTL                0x0080  /* MIXINR_TO_MIXOUTL */
+#define WM8994_MIXINR_TO_MIXOUTL_MASK           0x0080  /* MIXINR_TO_MIXOUTL */
+#define WM8994_MIXINR_TO_MIXOUTL_SHIFT               7  /* MIXINR_TO_MIXOUTL */
+#define WM8994_MIXINR_TO_MIXOUTL_WIDTH               1  /* MIXINR_TO_MIXOUTL */
+#define WM8994_MIXINL_TO_MIXOUTL                0x0040  /* MIXINL_TO_MIXOUTL */
+#define WM8994_MIXINL_TO_MIXOUTL_MASK           0x0040  /* MIXINL_TO_MIXOUTL */
+#define WM8994_MIXINL_TO_MIXOUTL_SHIFT               6  /* MIXINL_TO_MIXOUTL */
+#define WM8994_MIXINL_TO_MIXOUTL_WIDTH               1  /* MIXINL_TO_MIXOUTL */
+#define WM8994_IN2RN_TO_MIXOUTL                 0x0020  /* IN2RN_TO_MIXOUTL */
+#define WM8994_IN2RN_TO_MIXOUTL_MASK            0x0020  /* IN2RN_TO_MIXOUTL */
+#define WM8994_IN2RN_TO_MIXOUTL_SHIFT                5  /* IN2RN_TO_MIXOUTL */
+#define WM8994_IN2RN_TO_MIXOUTL_WIDTH                1  /* IN2RN_TO_MIXOUTL */
+#define WM8994_IN2LN_TO_MIXOUTL                 0x0010  /* IN2LN_TO_MIXOUTL */
+#define WM8994_IN2LN_TO_MIXOUTL_MASK            0x0010  /* IN2LN_TO_MIXOUTL */
+#define WM8994_IN2LN_TO_MIXOUTL_SHIFT                4  /* IN2LN_TO_MIXOUTL */
+#define WM8994_IN2LN_TO_MIXOUTL_WIDTH                1  /* IN2LN_TO_MIXOUTL */
+#define WM8994_IN1R_TO_MIXOUTL                  0x0008  /* IN1R_TO_MIXOUTL */
+#define WM8994_IN1R_TO_MIXOUTL_MASK             0x0008  /* IN1R_TO_MIXOUTL */
+#define WM8994_IN1R_TO_MIXOUTL_SHIFT                 3  /* IN1R_TO_MIXOUTL */
+#define WM8994_IN1R_TO_MIXOUTL_WIDTH                 1  /* IN1R_TO_MIXOUTL */
+#define WM8994_IN1L_TO_MIXOUTL                  0x0004  /* IN1L_TO_MIXOUTL */
+#define WM8994_IN1L_TO_MIXOUTL_MASK             0x0004  /* IN1L_TO_MIXOUTL */
+#define WM8994_IN1L_TO_MIXOUTL_SHIFT                 2  /* IN1L_TO_MIXOUTL */
+#define WM8994_IN1L_TO_MIXOUTL_WIDTH                 1  /* IN1L_TO_MIXOUTL */
+#define WM8994_IN2LP_TO_MIXOUTL                 0x0002  /* IN2LP_TO_MIXOUTL */
+#define WM8994_IN2LP_TO_MIXOUTL_MASK            0x0002  /* IN2LP_TO_MIXOUTL */
+#define WM8994_IN2LP_TO_MIXOUTL_SHIFT                1  /* IN2LP_TO_MIXOUTL */
+#define WM8994_IN2LP_TO_MIXOUTL_WIDTH                1  /* IN2LP_TO_MIXOUTL */
+#define WM8994_DAC1L_TO_MIXOUTL                 0x0001  /* DAC1L_TO_MIXOUTL */
+#define WM8994_DAC1L_TO_MIXOUTL_MASK            0x0001  /* DAC1L_TO_MIXOUTL */
+#define WM8994_DAC1L_TO_MIXOUTL_SHIFT                0  /* DAC1L_TO_MIXOUTL */
+#define WM8994_DAC1L_TO_MIXOUTL_WIDTH                1  /* DAC1L_TO_MIXOUTL */
+
+/*
+ * R46 (0x2E) - Output Mixer (2)
+ */
+#define WM8994_DAC1R_TO_HPOUT1R                 0x0100  /* DAC1R_TO_HPOUT1R */
+#define WM8994_DAC1R_TO_HPOUT1R_MASK            0x0100  /* DAC1R_TO_HPOUT1R */
+#define WM8994_DAC1R_TO_HPOUT1R_SHIFT                8  /* DAC1R_TO_HPOUT1R */
+#define WM8994_DAC1R_TO_HPOUT1R_WIDTH                1  /* DAC1R_TO_HPOUT1R */
+#define WM8994_MIXINL_TO_MIXOUTR                0x0080  /* MIXINL_TO_MIXOUTR */
+#define WM8994_MIXINL_TO_MIXOUTR_MASK           0x0080  /* MIXINL_TO_MIXOUTR */
+#define WM8994_MIXINL_TO_MIXOUTR_SHIFT               7  /* MIXINL_TO_MIXOUTR */
+#define WM8994_MIXINL_TO_MIXOUTR_WIDTH               1  /* MIXINL_TO_MIXOUTR */
+#define WM8994_MIXINR_TO_MIXOUTR                0x0040  /* MIXINR_TO_MIXOUTR */
+#define WM8994_MIXINR_TO_MIXOUTR_MASK           0x0040  /* MIXINR_TO_MIXOUTR */
+#define WM8994_MIXINR_TO_MIXOUTR_SHIFT               6  /* MIXINR_TO_MIXOUTR */
+#define WM8994_MIXINR_TO_MIXOUTR_WIDTH               1  /* MIXINR_TO_MIXOUTR */
+#define WM8994_IN2LN_TO_MIXOUTR                 0x0020  /* IN2LN_TO_MIXOUTR */
+#define WM8994_IN2LN_TO_MIXOUTR_MASK            0x0020  /* IN2LN_TO_MIXOUTR */
+#define WM8994_IN2LN_TO_MIXOUTR_SHIFT                5  /* IN2LN_TO_MIXOUTR */
+#define WM8994_IN2LN_TO_MIXOUTR_WIDTH                1  /* IN2LN_TO_MIXOUTR */
+#define WM8994_IN2RN_TO_MIXOUTR                 0x0010  /* IN2RN_TO_MIXOUTR */
+#define WM8994_IN2RN_TO_MIXOUTR_MASK            0x0010  /* IN2RN_TO_MIXOUTR */
+#define WM8994_IN2RN_TO_MIXOUTR_SHIFT                4  /* IN2RN_TO_MIXOUTR */
+#define WM8994_IN2RN_TO_MIXOUTR_WIDTH                1  /* IN2RN_TO_MIXOUTR */
+#define WM8994_IN1L_TO_MIXOUTR                  0x0008  /* IN1L_TO_MIXOUTR */
+#define WM8994_IN1L_TO_MIXOUTR_MASK             0x0008  /* IN1L_TO_MIXOUTR */
+#define WM8994_IN1L_TO_MIXOUTR_SHIFT                 3  /* IN1L_TO_MIXOUTR */
+#define WM8994_IN1L_TO_MIXOUTR_WIDTH                 1  /* IN1L_TO_MIXOUTR */
+#define WM8994_IN1R_TO_MIXOUTR                  0x0004  /* IN1R_TO_MIXOUTR */
+#define WM8994_IN1R_TO_MIXOUTR_MASK             0x0004  /* IN1R_TO_MIXOUTR */
+#define WM8994_IN1R_TO_MIXOUTR_SHIFT                 2  /* IN1R_TO_MIXOUTR */
+#define WM8994_IN1R_TO_MIXOUTR_WIDTH                 1  /* IN1R_TO_MIXOUTR */
+#define WM8994_IN2RP_TO_MIXOUTR                 0x0002  /* IN2RP_TO_MIXOUTR */
+#define WM8994_IN2RP_TO_MIXOUTR_MASK            0x0002  /* IN2RP_TO_MIXOUTR */
+#define WM8994_IN2RP_TO_MIXOUTR_SHIFT                1  /* IN2RP_TO_MIXOUTR */
+#define WM8994_IN2RP_TO_MIXOUTR_WIDTH                1  /* IN2RP_TO_MIXOUTR */
+#define WM8994_DAC1R_TO_MIXOUTR                 0x0001  /* DAC1R_TO_MIXOUTR */
+#define WM8994_DAC1R_TO_MIXOUTR_MASK            0x0001  /* DAC1R_TO_MIXOUTR */
+#define WM8994_DAC1R_TO_MIXOUTR_SHIFT                0  /* DAC1R_TO_MIXOUTR */
+#define WM8994_DAC1R_TO_MIXOUTR_WIDTH                1  /* DAC1R_TO_MIXOUTR */
+
+/*
+ * R47 (0x2F) - Output Mixer (3)
+ */
+#define WM8994_IN2LP_MIXOUTL_VOL_MASK           0x0E00  /* IN2LP_MIXOUTL_VOL - [11:9] */
+#define WM8994_IN2LP_MIXOUTL_VOL_SHIFT               9  /* IN2LP_MIXOUTL_VOL - [11:9] */
+#define WM8994_IN2LP_MIXOUTL_VOL_WIDTH               3  /* IN2LP_MIXOUTL_VOL - [11:9] */
+#define WM8994_IN2LN_MIXOUTL_VOL_MASK           0x01C0  /* IN2LN_MIXOUTL_VOL - [8:6] */
+#define WM8994_IN2LN_MIXOUTL_VOL_SHIFT               6  /* IN2LN_MIXOUTL_VOL - [8:6] */
+#define WM8994_IN2LN_MIXOUTL_VOL_WIDTH               3  /* IN2LN_MIXOUTL_VOL - [8:6] */
+#define WM8994_IN1R_MIXOUTL_VOL_MASK            0x0038  /* IN1R_MIXOUTL_VOL - [5:3] */
+#define WM8994_IN1R_MIXOUTL_VOL_SHIFT                3  /* IN1R_MIXOUTL_VOL - [5:3] */
+#define WM8994_IN1R_MIXOUTL_VOL_WIDTH                3  /* IN1R_MIXOUTL_VOL - [5:3] */
+#define WM8994_IN1L_MIXOUTL_VOL_MASK            0x0007  /* IN1L_MIXOUTL_VOL - [2:0] */
+#define WM8994_IN1L_MIXOUTL_VOL_SHIFT                0  /* IN1L_MIXOUTL_VOL - [2:0] */
+#define WM8994_IN1L_MIXOUTL_VOL_WIDTH                3  /* IN1L_MIXOUTL_VOL - [2:0] */
+
+/*
+ * R48 (0x30) - Output Mixer (4)
+ */
+#define WM8994_IN2RP_MIXOUTR_VOL_MASK           0x0E00  /* IN2RP_MIXOUTR_VOL - [11:9] */
+#define WM8994_IN2RP_MIXOUTR_VOL_SHIFT               9  /* IN2RP_MIXOUTR_VOL - [11:9] */
+#define WM8994_IN2RP_MIXOUTR_VOL_WIDTH               3  /* IN2RP_MIXOUTR_VOL - [11:9] */
+#define WM8994_IN2RN_MIXOUTR_VOL_MASK           0x01C0  /* IN2RN_MIXOUTR_VOL - [8:6] */
+#define WM8994_IN2RN_MIXOUTR_VOL_SHIFT               6  /* IN2RN_MIXOUTR_VOL - [8:6] */
+#define WM8994_IN2RN_MIXOUTR_VOL_WIDTH               3  /* IN2RN_MIXOUTR_VOL - [8:6] */
+#define WM8994_IN1L_MIXOUTR_VOL_MASK            0x0038  /* IN1L_MIXOUTR_VOL - [5:3] */
+#define WM8994_IN1L_MIXOUTR_VOL_SHIFT                3  /* IN1L_MIXOUTR_VOL - [5:3] */
+#define WM8994_IN1L_MIXOUTR_VOL_WIDTH                3  /* IN1L_MIXOUTR_VOL - [5:3] */
+#define WM8994_IN1R_MIXOUTR_VOL_MASK            0x0007  /* IN1R_MIXOUTR_VOL - [2:0] */
+#define WM8994_IN1R_MIXOUTR_VOL_SHIFT                0  /* IN1R_MIXOUTR_VOL - [2:0] */
+#define WM8994_IN1R_MIXOUTR_VOL_WIDTH                3  /* IN1R_MIXOUTR_VOL - [2:0] */
+
+/*
+ * R49 (0x31) - Output Mixer (5)
+ */
+#define WM8994_DAC1L_MIXOUTL_VOL_MASK           0x0E00  /* DAC1L_MIXOUTL_VOL - [11:9] */
+#define WM8994_DAC1L_MIXOUTL_VOL_SHIFT               9  /* DAC1L_MIXOUTL_VOL - [11:9] */
+#define WM8994_DAC1L_MIXOUTL_VOL_WIDTH               3  /* DAC1L_MIXOUTL_VOL - [11:9] */
+#define WM8994_IN2RN_MIXOUTL_VOL_MASK           0x01C0  /* IN2RN_MIXOUTL_VOL - [8:6] */
+#define WM8994_IN2RN_MIXOUTL_VOL_SHIFT               6  /* IN2RN_MIXOUTL_VOL - [8:6] */
+#define WM8994_IN2RN_MIXOUTL_VOL_WIDTH               3  /* IN2RN_MIXOUTL_VOL - [8:6] */
+#define WM8994_MIXINR_MIXOUTL_VOL_MASK          0x0038  /* MIXINR_MIXOUTL_VOL - [5:3] */
+#define WM8994_MIXINR_MIXOUTL_VOL_SHIFT              3  /* MIXINR_MIXOUTL_VOL - [5:3] */
+#define WM8994_MIXINR_MIXOUTL_VOL_WIDTH              3  /* MIXINR_MIXOUTL_VOL - [5:3] */
+#define WM8994_MIXINL_MIXOUTL_VOL_MASK          0x0007  /* MIXINL_MIXOUTL_VOL - [2:0] */
+#define WM8994_MIXINL_MIXOUTL_VOL_SHIFT              0  /* MIXINL_MIXOUTL_VOL - [2:0] */
+#define WM8994_MIXINL_MIXOUTL_VOL_WIDTH              3  /* MIXINL_MIXOUTL_VOL - [2:0] */
+
+/*
+ * R50 (0x32) - Output Mixer (6)
+ */
+#define WM8994_DAC1R_MIXOUTR_VOL_MASK           0x0E00  /* DAC1R_MIXOUTR_VOL - [11:9] */
+#define WM8994_DAC1R_MIXOUTR_VOL_SHIFT               9  /* DAC1R_MIXOUTR_VOL - [11:9] */
+#define WM8994_DAC1R_MIXOUTR_VOL_WIDTH               3  /* DAC1R_MIXOUTR_VOL - [11:9] */
+#define WM8994_IN2LN_MIXOUTR_VOL_MASK           0x01C0  /* IN2LN_MIXOUTR_VOL - [8:6] */
+#define WM8994_IN2LN_MIXOUTR_VOL_SHIFT               6  /* IN2LN_MIXOUTR_VOL - [8:6] */
+#define WM8994_IN2LN_MIXOUTR_VOL_WIDTH               3  /* IN2LN_MIXOUTR_VOL - [8:6] */
+#define WM8994_MIXINL_MIXOUTR_VOL_MASK          0x0038  /* MIXINL_MIXOUTR_VOL - [5:3] */
+#define WM8994_MIXINL_MIXOUTR_VOL_SHIFT              3  /* MIXINL_MIXOUTR_VOL - [5:3] */
+#define WM8994_MIXINL_MIXOUTR_VOL_WIDTH              3  /* MIXINL_MIXOUTR_VOL - [5:3] */
+#define WM8994_MIXINR_MIXOUTR_VOL_MASK          0x0007  /* MIXINR_MIXOUTR_VOL - [2:0] */
+#define WM8994_MIXINR_MIXOUTR_VOL_SHIFT              0  /* MIXINR_MIXOUTR_VOL - [2:0] */
+#define WM8994_MIXINR_MIXOUTR_VOL_WIDTH              3  /* MIXINR_MIXOUTR_VOL - [2:0] */
+
+/*
+ * R51 (0x33) - HPOUT2 Mixer
+ */
+#define WM8994_IN2LRP_TO_HPOUT2                 0x0020  /* IN2LRP_TO_HPOUT2 */
+#define WM8994_IN2LRP_TO_HPOUT2_MASK            0x0020  /* IN2LRP_TO_HPOUT2 */
+#define WM8994_IN2LRP_TO_HPOUT2_SHIFT                5  /* IN2LRP_TO_HPOUT2 */
+#define WM8994_IN2LRP_TO_HPOUT2_WIDTH                1  /* IN2LRP_TO_HPOUT2 */
+#define WM8994_MIXOUTLVOL_TO_HPOUT2             0x0010  /* MIXOUTLVOL_TO_HPOUT2 */
+#define WM8994_MIXOUTLVOL_TO_HPOUT2_MASK        0x0010  /* MIXOUTLVOL_TO_HPOUT2 */
+#define WM8994_MIXOUTLVOL_TO_HPOUT2_SHIFT            4  /* MIXOUTLVOL_TO_HPOUT2 */
+#define WM8994_MIXOUTLVOL_TO_HPOUT2_WIDTH            1  /* MIXOUTLVOL_TO_HPOUT2 */
+#define WM8994_MIXOUTRVOL_TO_HPOUT2             0x0008  /* MIXOUTRVOL_TO_HPOUT2 */
+#define WM8994_MIXOUTRVOL_TO_HPOUT2_MASK        0x0008  /* MIXOUTRVOL_TO_HPOUT2 */
+#define WM8994_MIXOUTRVOL_TO_HPOUT2_SHIFT            3  /* MIXOUTRVOL_TO_HPOUT2 */
+#define WM8994_MIXOUTRVOL_TO_HPOUT2_WIDTH            1  /* MIXOUTRVOL_TO_HPOUT2 */
+
+/*
+ * R52 (0x34) - Line Mixer (1)
+ */
+#define WM8994_MIXOUTL_TO_LINEOUT1N             0x0040  /* MIXOUTL_TO_LINEOUT1N */
+#define WM8994_MIXOUTL_TO_LINEOUT1N_MASK        0x0040  /* MIXOUTL_TO_LINEOUT1N */
+#define WM8994_MIXOUTL_TO_LINEOUT1N_SHIFT            6  /* MIXOUTL_TO_LINEOUT1N */
+#define WM8994_MIXOUTL_TO_LINEOUT1N_WIDTH            1  /* MIXOUTL_TO_LINEOUT1N */
+#define WM8994_MIXOUTR_TO_LINEOUT1N             0x0020  /* MIXOUTR_TO_LINEOUT1N */
+#define WM8994_MIXOUTR_TO_LINEOUT1N_MASK        0x0020  /* MIXOUTR_TO_LINEOUT1N */
+#define WM8994_MIXOUTR_TO_LINEOUT1N_SHIFT            5  /* MIXOUTR_TO_LINEOUT1N */
+#define WM8994_MIXOUTR_TO_LINEOUT1N_WIDTH            1  /* MIXOUTR_TO_LINEOUT1N */
+#define WM8994_LINEOUT1_MODE                    0x0010  /* LINEOUT1_MODE */
+#define WM8994_LINEOUT1_MODE_MASK               0x0010  /* LINEOUT1_MODE */
+#define WM8994_LINEOUT1_MODE_SHIFT                   4  /* LINEOUT1_MODE */
+#define WM8994_LINEOUT1_MODE_WIDTH                   1  /* LINEOUT1_MODE */
+#define WM8994_IN1R_TO_LINEOUT1P                0x0004  /* IN1R_TO_LINEOUT1P */
+#define WM8994_IN1R_TO_LINEOUT1P_MASK           0x0004  /* IN1R_TO_LINEOUT1P */
+#define WM8994_IN1R_TO_LINEOUT1P_SHIFT               2  /* IN1R_TO_LINEOUT1P */
+#define WM8994_IN1R_TO_LINEOUT1P_WIDTH               1  /* IN1R_TO_LINEOUT1P */
+#define WM8994_IN1L_TO_LINEOUT1P                0x0002  /* IN1L_TO_LINEOUT1P */
+#define WM8994_IN1L_TO_LINEOUT1P_MASK           0x0002  /* IN1L_TO_LINEOUT1P */
+#define WM8994_IN1L_TO_LINEOUT1P_SHIFT               1  /* IN1L_TO_LINEOUT1P */
+#define WM8994_IN1L_TO_LINEOUT1P_WIDTH               1  /* IN1L_TO_LINEOUT1P */
+#define WM8994_MIXOUTL_TO_LINEOUT1P             0x0001  /* MIXOUTL_TO_LINEOUT1P */
+#define WM8994_MIXOUTL_TO_LINEOUT1P_MASK        0x0001  /* MIXOUTL_TO_LINEOUT1P */
+#define WM8994_MIXOUTL_TO_LINEOUT1P_SHIFT            0  /* MIXOUTL_TO_LINEOUT1P */
+#define WM8994_MIXOUTL_TO_LINEOUT1P_WIDTH            1  /* MIXOUTL_TO_LINEOUT1P */
+
+/*
+ * R53 (0x35) - Line Mixer (2)
+ */
+#define WM8994_MIXOUTR_TO_LINEOUT2N             0x0040  /* MIXOUTR_TO_LINEOUT2N */
+#define WM8994_MIXOUTR_TO_LINEOUT2N_MASK        0x0040  /* MIXOUTR_TO_LINEOUT2N */
+#define WM8994_MIXOUTR_TO_LINEOUT2N_SHIFT            6  /* MIXOUTR_TO_LINEOUT2N */
+#define WM8994_MIXOUTR_TO_LINEOUT2N_WIDTH            1  /* MIXOUTR_TO_LINEOUT2N */
+#define WM8994_MIXOUTL_TO_LINEOUT2N             0x0020  /* MIXOUTL_TO_LINEOUT2N */
+#define WM8994_MIXOUTL_TO_LINEOUT2N_MASK        0x0020  /* MIXOUTL_TO_LINEOUT2N */
+#define WM8994_MIXOUTL_TO_LINEOUT2N_SHIFT            5  /* MIXOUTL_TO_LINEOUT2N */
+#define WM8994_MIXOUTL_TO_LINEOUT2N_WIDTH            1  /* MIXOUTL_TO_LINEOUT2N */
+#define WM8994_LINEOUT2_MODE                    0x0010  /* LINEOUT2_MODE */
+#define WM8994_LINEOUT2_MODE_MASK               0x0010  /* LINEOUT2_MODE */
+#define WM8994_LINEOUT2_MODE_SHIFT                   4  /* LINEOUT2_MODE */
+#define WM8994_LINEOUT2_MODE_WIDTH                   1  /* LINEOUT2_MODE */
+#define WM8994_IN1L_TO_LINEOUT2P                0x0004  /* IN1L_TO_LINEOUT2P */
+#define WM8994_IN1L_TO_LINEOUT2P_MASK           0x0004  /* IN1L_TO_LINEOUT2P */
+#define WM8994_IN1L_TO_LINEOUT2P_SHIFT               2  /* IN1L_TO_LINEOUT2P */
+#define WM8994_IN1L_TO_LINEOUT2P_WIDTH               1  /* IN1L_TO_LINEOUT2P */
+#define WM8994_IN1R_TO_LINEOUT2P                0x0002  /* IN1R_TO_LINEOUT2P */
+#define WM8994_IN1R_TO_LINEOUT2P_MASK           0x0002  /* IN1R_TO_LINEOUT2P */
+#define WM8994_IN1R_TO_LINEOUT2P_SHIFT               1  /* IN1R_TO_LINEOUT2P */
+#define WM8994_IN1R_TO_LINEOUT2P_WIDTH               1  /* IN1R_TO_LINEOUT2P */
+#define WM8994_MIXOUTR_TO_LINEOUT2P             0x0001  /* MIXOUTR_TO_LINEOUT2P */
+#define WM8994_MIXOUTR_TO_LINEOUT2P_MASK        0x0001  /* MIXOUTR_TO_LINEOUT2P */
+#define WM8994_MIXOUTR_TO_LINEOUT2P_SHIFT            0  /* MIXOUTR_TO_LINEOUT2P */
+#define WM8994_MIXOUTR_TO_LINEOUT2P_WIDTH            1  /* MIXOUTR_TO_LINEOUT2P */
+
+/*
+ * R54 (0x36) - Speaker Mixer
+ */
+#define WM8994_DAC2L_TO_SPKMIXL                 0x0200  /* DAC2L_TO_SPKMIXL */
+#define WM8994_DAC2L_TO_SPKMIXL_MASK            0x0200  /* DAC2L_TO_SPKMIXL */
+#define WM8994_DAC2L_TO_SPKMIXL_SHIFT                9  /* DAC2L_TO_SPKMIXL */
+#define WM8994_DAC2L_TO_SPKMIXL_WIDTH                1  /* DAC2L_TO_SPKMIXL */
+#define WM8994_DAC2R_TO_SPKMIXR                 0x0100  /* DAC2R_TO_SPKMIXR */
+#define WM8994_DAC2R_TO_SPKMIXR_MASK            0x0100  /* DAC2R_TO_SPKMIXR */
+#define WM8994_DAC2R_TO_SPKMIXR_SHIFT                8  /* DAC2R_TO_SPKMIXR */
+#define WM8994_DAC2R_TO_SPKMIXR_WIDTH                1  /* DAC2R_TO_SPKMIXR */
+#define WM8994_MIXINL_TO_SPKMIXL                0x0080  /* MIXINL_TO_SPKMIXL */
+#define WM8994_MIXINL_TO_SPKMIXL_MASK           0x0080  /* MIXINL_TO_SPKMIXL */
+#define WM8994_MIXINL_TO_SPKMIXL_SHIFT               7  /* MIXINL_TO_SPKMIXL */
+#define WM8994_MIXINL_TO_SPKMIXL_WIDTH               1  /* MIXINL_TO_SPKMIXL */
+#define WM8994_MIXINR_TO_SPKMIXR                0x0040  /* MIXINR_TO_SPKMIXR */
+#define WM8994_MIXINR_TO_SPKMIXR_MASK           0x0040  /* MIXINR_TO_SPKMIXR */
+#define WM8994_MIXINR_TO_SPKMIXR_SHIFT               6  /* MIXINR_TO_SPKMIXR */
+#define WM8994_MIXINR_TO_SPKMIXR_WIDTH               1  /* MIXINR_TO_SPKMIXR */
+#define WM8994_IN1LP_TO_SPKMIXL                 0x0020  /* IN1LP_TO_SPKMIXL */
+#define WM8994_IN1LP_TO_SPKMIXL_MASK            0x0020  /* IN1LP_TO_SPKMIXL */
+#define WM8994_IN1LP_TO_SPKMIXL_SHIFT                5  /* IN1LP_TO_SPKMIXL */
+#define WM8994_IN1LP_TO_SPKMIXL_WIDTH                1  /* IN1LP_TO_SPKMIXL */
+#define WM8994_IN1RP_TO_SPKMIXR                 0x0010  /* IN1RP_TO_SPKMIXR */
+#define WM8994_IN1RP_TO_SPKMIXR_MASK            0x0010  /* IN1RP_TO_SPKMIXR */
+#define WM8994_IN1RP_TO_SPKMIXR_SHIFT                4  /* IN1RP_TO_SPKMIXR */
+#define WM8994_IN1RP_TO_SPKMIXR_WIDTH                1  /* IN1RP_TO_SPKMIXR */
+#define WM8994_MIXOUTL_TO_SPKMIXL               0x0008  /* MIXOUTL_TO_SPKMIXL */
+#define WM8994_MIXOUTL_TO_SPKMIXL_MASK          0x0008  /* MIXOUTL_TO_SPKMIXL */
+#define WM8994_MIXOUTL_TO_SPKMIXL_SHIFT              3  /* MIXOUTL_TO_SPKMIXL */
+#define WM8994_MIXOUTL_TO_SPKMIXL_WIDTH              1  /* MIXOUTL_TO_SPKMIXL */
+#define WM8994_MIXOUTR_TO_SPKMIXR               0x0004  /* MIXOUTR_TO_SPKMIXR */
+#define WM8994_MIXOUTR_TO_SPKMIXR_MASK          0x0004  /* MIXOUTR_TO_SPKMIXR */
+#define WM8994_MIXOUTR_TO_SPKMIXR_SHIFT              2  /* MIXOUTR_TO_SPKMIXR */
+#define WM8994_MIXOUTR_TO_SPKMIXR_WIDTH              1  /* MIXOUTR_TO_SPKMIXR */
+#define WM8994_DAC1L_TO_SPKMIXL                 0x0002  /* DAC1L_TO_SPKMIXL */
+#define WM8994_DAC1L_TO_SPKMIXL_MASK            0x0002  /* DAC1L_TO_SPKMIXL */
+#define WM8994_DAC1L_TO_SPKMIXL_SHIFT                1  /* DAC1L_TO_SPKMIXL */
+#define WM8994_DAC1L_TO_SPKMIXL_WIDTH                1  /* DAC1L_TO_SPKMIXL */
+#define WM8994_DAC1R_TO_SPKMIXR                 0x0001  /* DAC1R_TO_SPKMIXR */
+#define WM8994_DAC1R_TO_SPKMIXR_MASK            0x0001  /* DAC1R_TO_SPKMIXR */
+#define WM8994_DAC1R_TO_SPKMIXR_SHIFT                0  /* DAC1R_TO_SPKMIXR */
+#define WM8994_DAC1R_TO_SPKMIXR_WIDTH                1  /* DAC1R_TO_SPKMIXR */
+
+/*
+ * R55 (0x37) - Additional Control
+ */
+#define WM8994_LINEOUT1_FB                      0x0080  /* LINEOUT1_FB */
+#define WM8994_LINEOUT1_FB_MASK                 0x0080  /* LINEOUT1_FB */
+#define WM8994_LINEOUT1_FB_SHIFT                     7  /* LINEOUT1_FB */
+#define WM8994_LINEOUT1_FB_WIDTH                     1  /* LINEOUT1_FB */
+#define WM8994_LINEOUT2_FB                      0x0040  /* LINEOUT2_FB */
+#define WM8994_LINEOUT2_FB_MASK                 0x0040  /* LINEOUT2_FB */
+#define WM8994_LINEOUT2_FB_SHIFT                     6  /* LINEOUT2_FB */
+#define WM8994_LINEOUT2_FB_WIDTH                     1  /* LINEOUT2_FB */
+#define WM8994_VROI                             0x0001  /* VROI */
+#define WM8994_VROI_MASK                        0x0001  /* VROI */
+#define WM8994_VROI_SHIFT                            0  /* VROI */
+#define WM8994_VROI_WIDTH                            1  /* VROI */
+
+/*
+ * R56 (0x38) - AntiPOP (1)
+ */
+#define WM8994_LINEOUT_VMID_BUF_ENA             0x0080  /* LINEOUT_VMID_BUF_ENA */
+#define WM8994_LINEOUT_VMID_BUF_ENA_MASK        0x0080  /* LINEOUT_VMID_BUF_ENA */
+#define WM8994_LINEOUT_VMID_BUF_ENA_SHIFT            7  /* LINEOUT_VMID_BUF_ENA */
+#define WM8994_LINEOUT_VMID_BUF_ENA_WIDTH            1  /* LINEOUT_VMID_BUF_ENA */
+#define WM8994_HPOUT2_IN_ENA                    0x0040  /* HPOUT2_IN_ENA */
+#define WM8994_HPOUT2_IN_ENA_MASK               0x0040  /* HPOUT2_IN_ENA */
+#define WM8994_HPOUT2_IN_ENA_SHIFT                   6  /* HPOUT2_IN_ENA */
+#define WM8994_HPOUT2_IN_ENA_WIDTH                   1  /* HPOUT2_IN_ENA */
+#define WM8994_LINEOUT1_DISCH                   0x0020  /* LINEOUT1_DISCH */
+#define WM8994_LINEOUT1_DISCH_MASK              0x0020  /* LINEOUT1_DISCH */
+#define WM8994_LINEOUT1_DISCH_SHIFT                  5  /* LINEOUT1_DISCH */
+#define WM8994_LINEOUT1_DISCH_WIDTH                  1  /* LINEOUT1_DISCH */
+#define WM8994_LINEOUT2_DISCH                   0x0010  /* LINEOUT2_DISCH */
+#define WM8994_LINEOUT2_DISCH_MASK              0x0010  /* LINEOUT2_DISCH */
+#define WM8994_LINEOUT2_DISCH_SHIFT                  4  /* LINEOUT2_DISCH */
+#define WM8994_LINEOUT2_DISCH_WIDTH                  1  /* LINEOUT2_DISCH */
+
+/*
+ * R57 (0x39) - AntiPOP (2)
+ */
+#define WM8994_MICB2_DISCH                      0x0100  /* MICB2_DISCH */
+#define WM8994_MICB2_DISCH_MASK                 0x0100  /* MICB2_DISCH */
+#define WM8994_MICB2_DISCH_SHIFT                     8  /* MICB2_DISCH */
+#define WM8994_MICB2_DISCH_WIDTH                     1  /* MICB2_DISCH */
+#define WM8994_MICB1_DISCH                      0x0080  /* MICB1_DISCH */
+#define WM8994_MICB1_DISCH_MASK                 0x0080  /* MICB1_DISCH */
+#define WM8994_MICB1_DISCH_SHIFT                     7  /* MICB1_DISCH */
+#define WM8994_MICB1_DISCH_WIDTH                     1  /* MICB1_DISCH */
+#define WM8994_VMID_RAMP_MASK                   0x0060  /* VMID_RAMP - [6:5] */
+#define WM8994_VMID_RAMP_SHIFT                       5  /* VMID_RAMP - [6:5] */
+#define WM8994_VMID_RAMP_WIDTH                       2  /* VMID_RAMP - [6:5] */
+#define WM8994_VMID_BUF_ENA                     0x0008  /* VMID_BUF_ENA */
+#define WM8994_VMID_BUF_ENA_MASK                0x0008  /* VMID_BUF_ENA */
+#define WM8994_VMID_BUF_ENA_SHIFT                    3  /* VMID_BUF_ENA */
+#define WM8994_VMID_BUF_ENA_WIDTH                    1  /* VMID_BUF_ENA */
+#define WM8994_STARTUP_BIAS_ENA                 0x0004  /* STARTUP_BIAS_ENA */
+#define WM8994_STARTUP_BIAS_ENA_MASK            0x0004  /* STARTUP_BIAS_ENA */
+#define WM8994_STARTUP_BIAS_ENA_SHIFT                2  /* STARTUP_BIAS_ENA */
+#define WM8994_STARTUP_BIAS_ENA_WIDTH                1  /* STARTUP_BIAS_ENA */
+#define WM8994_BIAS_SRC                         0x0002  /* BIAS_SRC */
+#define WM8994_BIAS_SRC_MASK                    0x0002  /* BIAS_SRC */
+#define WM8994_BIAS_SRC_SHIFT                        1  /* BIAS_SRC */
+#define WM8994_BIAS_SRC_WIDTH                        1  /* BIAS_SRC */
+#define WM8994_VMID_DISCH                       0x0001  /* VMID_DISCH */
+#define WM8994_VMID_DISCH_MASK                  0x0001  /* VMID_DISCH */
+#define WM8994_VMID_DISCH_SHIFT                      0  /* VMID_DISCH */
+#define WM8994_VMID_DISCH_WIDTH                      1  /* VMID_DISCH */
+
+/*
+ * R58 (0x3A) - MICBIAS
+ */
+#define WM8994_MICD_SCTHR_MASK                  0x00C0  /* MICD_SCTHR - [7:6] */
+#define WM8994_MICD_SCTHR_SHIFT                      6  /* MICD_SCTHR - [7:6] */
+#define WM8994_MICD_SCTHR_WIDTH                      2  /* MICD_SCTHR - [7:6] */
+#define WM8994_MICD_THR_MASK                    0x0038  /* MICD_THR - [5:3] */
+#define WM8994_MICD_THR_SHIFT                        3  /* MICD_THR - [5:3] */
+#define WM8994_MICD_THR_WIDTH                        3  /* MICD_THR - [5:3] */
+#define WM8994_MICD_ENA                         0x0004  /* MICD_ENA */
+#define WM8994_MICD_ENA_MASK                    0x0004  /* MICD_ENA */
+#define WM8994_MICD_ENA_SHIFT                        2  /* MICD_ENA */
+#define WM8994_MICD_ENA_WIDTH                        1  /* MICD_ENA */
+#define WM8994_MICB2_LVL                        0x0002  /* MICB2_LVL */
+#define WM8994_MICB2_LVL_MASK                   0x0002  /* MICB2_LVL */
+#define WM8994_MICB2_LVL_SHIFT                       1  /* MICB2_LVL */
+#define WM8994_MICB2_LVL_WIDTH                       1  /* MICB2_LVL */
+#define WM8994_MICB1_LVL                        0x0001  /* MICB1_LVL */
+#define WM8994_MICB1_LVL_MASK                   0x0001  /* MICB1_LVL */
+#define WM8994_MICB1_LVL_SHIFT                       0  /* MICB1_LVL */
+#define WM8994_MICB1_LVL_WIDTH                       1  /* MICB1_LVL */
+
+/*
+ * R59 (0x3B) - LDO 1
+ */
+#define WM8994_LDO1_VSEL_MASK                   0x000E  /* LDO1_VSEL - [3:1] */
+#define WM8994_LDO1_VSEL_SHIFT                       1  /* LDO1_VSEL - [3:1] */
+#define WM8994_LDO1_VSEL_WIDTH                       3  /* LDO1_VSEL - [3:1] */
+#define WM8994_LDO1_DISCH                       0x0001  /* LDO1_DISCH */
+#define WM8994_LDO1_DISCH_MASK                  0x0001  /* LDO1_DISCH */
+#define WM8994_LDO1_DISCH_SHIFT                      0  /* LDO1_DISCH */
+#define WM8994_LDO1_DISCH_WIDTH                      1  /* LDO1_DISCH */
+
+/*
+ * R60 (0x3C) - LDO 2
+ */
+#define WM8994_LDO2_VSEL_MASK                   0x0006  /* LDO2_VSEL - [2:1] */
+#define WM8994_LDO2_VSEL_SHIFT                       1  /* LDO2_VSEL - [2:1] */
+#define WM8994_LDO2_VSEL_WIDTH                       2  /* LDO2_VSEL - [2:1] */
+#define WM8994_LDO2_DISCH                       0x0001  /* LDO2_DISCH */
+#define WM8994_LDO2_DISCH_MASK                  0x0001  /* LDO2_DISCH */
+#define WM8994_LDO2_DISCH_SHIFT                      0  /* LDO2_DISCH */
+#define WM8994_LDO2_DISCH_WIDTH                      1  /* LDO2_DISCH */
+
+/*
+ * R76 (0x4C) - Charge Pump (1)
+ */
+#define WM8994_CP_ENA                           0x8000  /* CP_ENA */
+#define WM8994_CP_ENA_MASK                      0x8000  /* CP_ENA */
+#define WM8994_CP_ENA_SHIFT                         15  /* CP_ENA */
+#define WM8994_CP_ENA_WIDTH                          1  /* CP_ENA */
+
+/*
+ * R81 (0x51) - Class W (1)
+ */
+#define WM8994_CP_DYN_SRC_SEL_MASK              0x0300  /* CP_DYN_SRC_SEL - [9:8] */
+#define WM8994_CP_DYN_SRC_SEL_SHIFT                  8  /* CP_DYN_SRC_SEL - [9:8] */
+#define WM8994_CP_DYN_SRC_SEL_WIDTH                  2  /* CP_DYN_SRC_SEL - [9:8] */
+#define WM8994_CP_DYN_PWR                       0x0001  /* CP_DYN_PWR */
+#define WM8994_CP_DYN_PWR_MASK                  0x0001  /* CP_DYN_PWR */
+#define WM8994_CP_DYN_PWR_SHIFT                      0  /* CP_DYN_PWR */
+#define WM8994_CP_DYN_PWR_WIDTH                      1  /* CP_DYN_PWR */
+
+/*
+ * R84 (0x54) - DC Servo (1)
+ */
+#define WM8994_DCS_TRIG_SINGLE_1                0x2000  /* DCS_TRIG_SINGLE_1 */
+#define WM8994_DCS_TRIG_SINGLE_1_MASK           0x2000  /* DCS_TRIG_SINGLE_1 */
+#define WM8994_DCS_TRIG_SINGLE_1_SHIFT              13  /* DCS_TRIG_SINGLE_1 */
+#define WM8994_DCS_TRIG_SINGLE_1_WIDTH               1  /* DCS_TRIG_SINGLE_1 */
+#define WM8994_DCS_TRIG_SINGLE_0                0x1000  /* DCS_TRIG_SINGLE_0 */
+#define WM8994_DCS_TRIG_SINGLE_0_MASK           0x1000  /* DCS_TRIG_SINGLE_0 */
+#define WM8994_DCS_TRIG_SINGLE_0_SHIFT              12  /* DCS_TRIG_SINGLE_0 */
+#define WM8994_DCS_TRIG_SINGLE_0_WIDTH               1  /* DCS_TRIG_SINGLE_0 */
+#define WM8994_DCS_TRIG_SERIES_1                0x0200  /* DCS_TRIG_SERIES_1 */
+#define WM8994_DCS_TRIG_SERIES_1_MASK           0x0200  /* DCS_TRIG_SERIES_1 */
+#define WM8994_DCS_TRIG_SERIES_1_SHIFT               9  /* DCS_TRIG_SERIES_1 */
+#define WM8994_DCS_TRIG_SERIES_1_WIDTH               1  /* DCS_TRIG_SERIES_1 */
+#define WM8994_DCS_TRIG_SERIES_0                0x0100  /* DCS_TRIG_SERIES_0 */
+#define WM8994_DCS_TRIG_SERIES_0_MASK           0x0100  /* DCS_TRIG_SERIES_0 */
+#define WM8994_DCS_TRIG_SERIES_0_SHIFT               8  /* DCS_TRIG_SERIES_0 */
+#define WM8994_DCS_TRIG_SERIES_0_WIDTH               1  /* DCS_TRIG_SERIES_0 */
+#define WM8994_DCS_TRIG_STARTUP_1               0x0020  /* DCS_TRIG_STARTUP_1 */
+#define WM8994_DCS_TRIG_STARTUP_1_MASK          0x0020  /* DCS_TRIG_STARTUP_1 */
+#define WM8994_DCS_TRIG_STARTUP_1_SHIFT              5  /* DCS_TRIG_STARTUP_1 */
+#define WM8994_DCS_TRIG_STARTUP_1_WIDTH              1  /* DCS_TRIG_STARTUP_1 */
+#define WM8994_DCS_TRIG_STARTUP_0               0x0010  /* DCS_TRIG_STARTUP_0 */
+#define WM8994_DCS_TRIG_STARTUP_0_MASK          0x0010  /* DCS_TRIG_STARTUP_0 */
+#define WM8994_DCS_TRIG_STARTUP_0_SHIFT              4  /* DCS_TRIG_STARTUP_0 */
+#define WM8994_DCS_TRIG_STARTUP_0_WIDTH              1  /* DCS_TRIG_STARTUP_0 */
+#define WM8994_DCS_TRIG_DAC_WR_1                0x0008  /* DCS_TRIG_DAC_WR_1 */
+#define WM8994_DCS_TRIG_DAC_WR_1_MASK           0x0008  /* DCS_TRIG_DAC_WR_1 */
+#define WM8994_DCS_TRIG_DAC_WR_1_SHIFT               3  /* DCS_TRIG_DAC_WR_1 */
+#define WM8994_DCS_TRIG_DAC_WR_1_WIDTH               1  /* DCS_TRIG_DAC_WR_1 */
+#define WM8994_DCS_TRIG_DAC_WR_0                0x0004  /* DCS_TRIG_DAC_WR_0 */
+#define WM8994_DCS_TRIG_DAC_WR_0_MASK           0x0004  /* DCS_TRIG_DAC_WR_0 */
+#define WM8994_DCS_TRIG_DAC_WR_0_SHIFT               2  /* DCS_TRIG_DAC_WR_0 */
+#define WM8994_DCS_TRIG_DAC_WR_0_WIDTH               1  /* DCS_TRIG_DAC_WR_0 */
+#define WM8994_DCS_ENA_CHAN_1                   0x0002  /* DCS_ENA_CHAN_1 */
+#define WM8994_DCS_ENA_CHAN_1_MASK              0x0002  /* DCS_ENA_CHAN_1 */
+#define WM8994_DCS_ENA_CHAN_1_SHIFT                  1  /* DCS_ENA_CHAN_1 */
+#define WM8994_DCS_ENA_CHAN_1_WIDTH                  1  /* DCS_ENA_CHAN_1 */
+#define WM8994_DCS_ENA_CHAN_0                   0x0001  /* DCS_ENA_CHAN_0 */
+#define WM8994_DCS_ENA_CHAN_0_MASK              0x0001  /* DCS_ENA_CHAN_0 */
+#define WM8994_DCS_ENA_CHAN_0_SHIFT                  0  /* DCS_ENA_CHAN_0 */
+#define WM8994_DCS_ENA_CHAN_0_WIDTH                  1  /* DCS_ENA_CHAN_0 */
+
+/*
+ * R85 (0x55) - DC Servo (2)
+ */
+#define WM8994_DCS_SERIES_NO_01_MASK            0x0FE0  /* DCS_SERIES_NO_01 - [11:5] */
+#define WM8994_DCS_SERIES_NO_01_SHIFT                5  /* DCS_SERIES_NO_01 - [11:5] */
+#define WM8994_DCS_SERIES_NO_01_WIDTH                7  /* DCS_SERIES_NO_01 - [11:5] */
+#define WM8994_DCS_TIMER_PERIOD_01_MASK         0x000F  /* DCS_TIMER_PERIOD_01 - [3:0] */
+#define WM8994_DCS_TIMER_PERIOD_01_SHIFT             0  /* DCS_TIMER_PERIOD_01 - [3:0] */
+#define WM8994_DCS_TIMER_PERIOD_01_WIDTH             4  /* DCS_TIMER_PERIOD_01 - [3:0] */
+
+/*
+ * R87 (0x57) - DC Servo (4)
+ */
+#define WM8994_DCS_DAC_WR_VAL_1_MASK            0xFF00  /* DCS_DAC_WR_VAL_1 - [15:8] */
+#define WM8994_DCS_DAC_WR_VAL_1_SHIFT                8  /* DCS_DAC_WR_VAL_1 - [15:8] */
+#define WM8994_DCS_DAC_WR_VAL_1_WIDTH                8  /* DCS_DAC_WR_VAL_1 - [15:8] */
+#define WM8994_DCS_DAC_WR_VAL_0_MASK            0x00FF  /* DCS_DAC_WR_VAL_0 - [7:0] */
+#define WM8994_DCS_DAC_WR_VAL_0_SHIFT                0  /* DCS_DAC_WR_VAL_0 - [7:0] */
+#define WM8994_DCS_DAC_WR_VAL_0_WIDTH                8  /* DCS_DAC_WR_VAL_0 - [7:0] */
+
+/*
+ * R88 (0x58) - DC Servo Readback
+ */
+#define WM8994_DCS_CAL_COMPLETE_MASK            0x0300  /* DCS_CAL_COMPLETE - [9:8] */
+#define WM8994_DCS_CAL_COMPLETE_SHIFT                8  /* DCS_CAL_COMPLETE - [9:8] */
+#define WM8994_DCS_CAL_COMPLETE_WIDTH                2  /* DCS_CAL_COMPLETE - [9:8] */
+#define WM8994_DCS_DAC_WR_COMPLETE_MASK         0x0030  /* DCS_DAC_WR_COMPLETE - [5:4] */
+#define WM8994_DCS_DAC_WR_COMPLETE_SHIFT             4  /* DCS_DAC_WR_COMPLETE - [5:4] */
+#define WM8994_DCS_DAC_WR_COMPLETE_WIDTH             2  /* DCS_DAC_WR_COMPLETE - [5:4] */
+#define WM8994_DCS_STARTUP_COMPLETE_MASK        0x0003  /* DCS_STARTUP_COMPLETE - [1:0] */
+#define WM8994_DCS_STARTUP_COMPLETE_SHIFT            0  /* DCS_STARTUP_COMPLETE - [1:0] */
+#define WM8994_DCS_STARTUP_COMPLETE_WIDTH            2  /* DCS_STARTUP_COMPLETE - [1:0] */
+
+/*
+ * R96 (0x60) - Analogue HP (1)
+ */
+#define WM8994_HPOUT1L_RMV_SHORT                0x0080  /* HPOUT1L_RMV_SHORT */
+#define WM8994_HPOUT1L_RMV_SHORT_MASK           0x0080  /* HPOUT1L_RMV_SHORT */
+#define WM8994_HPOUT1L_RMV_SHORT_SHIFT               7  /* HPOUT1L_RMV_SHORT */
+#define WM8994_HPOUT1L_RMV_SHORT_WIDTH               1  /* HPOUT1L_RMV_SHORT */
+#define WM8994_HPOUT1L_OUTP                     0x0040  /* HPOUT1L_OUTP */
+#define WM8994_HPOUT1L_OUTP_MASK                0x0040  /* HPOUT1L_OUTP */
+#define WM8994_HPOUT1L_OUTP_SHIFT                    6  /* HPOUT1L_OUTP */
+#define WM8994_HPOUT1L_OUTP_WIDTH                    1  /* HPOUT1L_OUTP */
+#define WM8994_HPOUT1L_DLY                      0x0020  /* HPOUT1L_DLY */
+#define WM8994_HPOUT1L_DLY_MASK                 0x0020  /* HPOUT1L_DLY */
+#define WM8994_HPOUT1L_DLY_SHIFT                     5  /* HPOUT1L_DLY */
+#define WM8994_HPOUT1L_DLY_WIDTH                     1  /* HPOUT1L_DLY */
+#define WM8994_HPOUT1R_RMV_SHORT                0x0008  /* HPOUT1R_RMV_SHORT */
+#define WM8994_HPOUT1R_RMV_SHORT_MASK           0x0008  /* HPOUT1R_RMV_SHORT */
+#define WM8994_HPOUT1R_RMV_SHORT_SHIFT               3  /* HPOUT1R_RMV_SHORT */
+#define WM8994_HPOUT1R_RMV_SHORT_WIDTH               1  /* HPOUT1R_RMV_SHORT */
+#define WM8994_HPOUT1R_OUTP                     0x0004  /* HPOUT1R_OUTP */
+#define WM8994_HPOUT1R_OUTP_MASK                0x0004  /* HPOUT1R_OUTP */
+#define WM8994_HPOUT1R_OUTP_SHIFT                    2  /* HPOUT1R_OUTP */
+#define WM8994_HPOUT1R_OUTP_WIDTH                    1  /* HPOUT1R_OUTP */
+#define WM8994_HPOUT1R_DLY                      0x0002  /* HPOUT1R_DLY */
+#define WM8994_HPOUT1R_DLY_MASK                 0x0002  /* HPOUT1R_DLY */
+#define WM8994_HPOUT1R_DLY_SHIFT                     1  /* HPOUT1R_DLY */
+#define WM8994_HPOUT1R_DLY_WIDTH                     1  /* HPOUT1R_DLY */
+
+/*
+ * R256 (0x100) - Chip Revision
+ */
+#define WM8994_CHIP_REV_MASK                    0x000F  /* CHIP_REV - [3:0] */
+#define WM8994_CHIP_REV_SHIFT                        0  /* CHIP_REV - [3:0] */
+#define WM8994_CHIP_REV_WIDTH                        4  /* CHIP_REV - [3:0] */
+
+/*
+ * R257 (0x101) - Control Interface
+ */
+#define WM8994_SPI_CONTRD                       0x0040  /* SPI_CONTRD */
+#define WM8994_SPI_CONTRD_MASK                  0x0040  /* SPI_CONTRD */
+#define WM8994_SPI_CONTRD_SHIFT                      6  /* SPI_CONTRD */
+#define WM8994_SPI_CONTRD_WIDTH                      1  /* SPI_CONTRD */
+#define WM8994_SPI_4WIRE                        0x0020  /* SPI_4WIRE */
+#define WM8994_SPI_4WIRE_MASK                   0x0020  /* SPI_4WIRE */
+#define WM8994_SPI_4WIRE_SHIFT                       5  /* SPI_4WIRE */
+#define WM8994_SPI_4WIRE_WIDTH                       1  /* SPI_4WIRE */
+#define WM8994_SPI_CFG                          0x0010  /* SPI_CFG */
+#define WM8994_SPI_CFG_MASK                     0x0010  /* SPI_CFG */
+#define WM8994_SPI_CFG_SHIFT                         4  /* SPI_CFG */
+#define WM8994_SPI_CFG_WIDTH                         1  /* SPI_CFG */
+#define WM8994_AUTO_INC                         0x0004  /* AUTO_INC */
+#define WM8994_AUTO_INC_MASK                    0x0004  /* AUTO_INC */
+#define WM8994_AUTO_INC_SHIFT                        2  /* AUTO_INC */
+#define WM8994_AUTO_INC_WIDTH                        1  /* AUTO_INC */
+
+/*
+ * R272 (0x110) - Write Sequencer Ctrl (1)
+ */
+#define WM8994_WSEQ_ENA                         0x8000  /* WSEQ_ENA */
+#define WM8994_WSEQ_ENA_MASK                    0x8000  /* WSEQ_ENA */
+#define WM8994_WSEQ_ENA_SHIFT                       15  /* WSEQ_ENA */
+#define WM8994_WSEQ_ENA_WIDTH                        1  /* WSEQ_ENA */
+#define WM8994_WSEQ_ABORT                       0x0200  /* WSEQ_ABORT */
+#define WM8994_WSEQ_ABORT_MASK                  0x0200  /* WSEQ_ABORT */
+#define WM8994_WSEQ_ABORT_SHIFT                      9  /* WSEQ_ABORT */
+#define WM8994_WSEQ_ABORT_WIDTH                      1  /* WSEQ_ABORT */
+#define WM8994_WSEQ_START                       0x0100  /* WSEQ_START */
+#define WM8994_WSEQ_START_MASK                  0x0100  /* WSEQ_START */
+#define WM8994_WSEQ_START_SHIFT                      8  /* WSEQ_START */
+#define WM8994_WSEQ_START_WIDTH                      1  /* WSEQ_START */
+#define WM8994_WSEQ_START_INDEX_MASK            0x007F  /* WSEQ_START_INDEX - [6:0] */
+#define WM8994_WSEQ_START_INDEX_SHIFT                0  /* WSEQ_START_INDEX - [6:0] */
+#define WM8994_WSEQ_START_INDEX_WIDTH                7  /* WSEQ_START_INDEX - [6:0] */
+
+/*
+ * R273 (0x111) - Write Sequencer Ctrl (2)
+ */
+#define WM8994_WSEQ_BUSY                        0x0100  /* WSEQ_BUSY */
+#define WM8994_WSEQ_BUSY_MASK                   0x0100  /* WSEQ_BUSY */
+#define WM8994_WSEQ_BUSY_SHIFT                       8  /* WSEQ_BUSY */
+#define WM8994_WSEQ_BUSY_WIDTH                       1  /* WSEQ_BUSY */
+#define WM8994_WSEQ_CURRENT_INDEX_MASK          0x007F  /* WSEQ_CURRENT_INDEX - [6:0] */
+#define WM8994_WSEQ_CURRENT_INDEX_SHIFT              0  /* WSEQ_CURRENT_INDEX - [6:0] */
+#define WM8994_WSEQ_CURRENT_INDEX_WIDTH              7  /* WSEQ_CURRENT_INDEX - [6:0] */
+
+/*
+ * R512 (0x200) - AIF1 Clocking (1)
+ */
+#define WM8994_AIF1CLK_SRC_MASK                 0x0018  /* AIF1CLK_SRC - [4:3] */
+#define WM8994_AIF1CLK_SRC_SHIFT                     3  /* AIF1CLK_SRC - [4:3] */
+#define WM8994_AIF1CLK_SRC_WIDTH                     2  /* AIF1CLK_SRC - [4:3] */
+#define WM8994_AIF1CLK_INV                      0x0004  /* AIF1CLK_INV */
+#define WM8994_AIF1CLK_INV_MASK                 0x0004  /* AIF1CLK_INV */
+#define WM8994_AIF1CLK_INV_SHIFT                     2  /* AIF1CLK_INV */
+#define WM8994_AIF1CLK_INV_WIDTH                     1  /* AIF1CLK_INV */
+#define WM8994_AIF1CLK_DIV                      0x0002  /* AIF1CLK_DIV */
+#define WM8994_AIF1CLK_DIV_MASK                 0x0002  /* AIF1CLK_DIV */
+#define WM8994_AIF1CLK_DIV_SHIFT                     1  /* AIF1CLK_DIV */
+#define WM8994_AIF1CLK_DIV_WIDTH                     1  /* AIF1CLK_DIV */
+#define WM8994_AIF1CLK_ENA                      0x0001  /* AIF1CLK_ENA */
+#define WM8994_AIF1CLK_ENA_MASK                 0x0001  /* AIF1CLK_ENA */
+#define WM8994_AIF1CLK_ENA_SHIFT                     0  /* AIF1CLK_ENA */
+#define WM8994_AIF1CLK_ENA_WIDTH                     1  /* AIF1CLK_ENA */
+
+/*
+ * R513 (0x201) - AIF1 Clocking (2)
+ */
+#define WM8994_AIF1DAC_DIV_MASK                 0x0038  /* AIF1DAC_DIV - [5:3] */
+#define WM8994_AIF1DAC_DIV_SHIFT                     3  /* AIF1DAC_DIV - [5:3] */
+#define WM8994_AIF1DAC_DIV_WIDTH                     3  /* AIF1DAC_DIV - [5:3] */
+#define WM8994_AIF1ADC_DIV_MASK                 0x0007  /* AIF1ADC_DIV - [2:0] */
+#define WM8994_AIF1ADC_DIV_SHIFT                     0  /* AIF1ADC_DIV - [2:0] */
+#define WM8994_AIF1ADC_DIV_WIDTH                     3  /* AIF1ADC_DIV - [2:0] */
+
+/*
+ * R516 (0x204) - AIF2 Clocking (1)
+ */
+#define WM8994_AIF2CLK_SRC_MASK                 0x0018  /* AIF2CLK_SRC - [4:3] */
+#define WM8994_AIF2CLK_SRC_SHIFT                     3  /* AIF2CLK_SRC - [4:3] */
+#define WM8994_AIF2CLK_SRC_WIDTH                     2  /* AIF2CLK_SRC - [4:3] */
+#define WM8994_AIF2CLK_INV                      0x0004  /* AIF2CLK_INV */
+#define WM8994_AIF2CLK_INV_MASK                 0x0004  /* AIF2CLK_INV */
+#define WM8994_AIF2CLK_INV_SHIFT                     2  /* AIF2CLK_INV */
+#define WM8994_AIF2CLK_INV_WIDTH                     1  /* AIF2CLK_INV */
+#define WM8994_AIF2CLK_DIV                      0x0002  /* AIF2CLK_DIV */
+#define WM8994_AIF2CLK_DIV_MASK                 0x0002  /* AIF2CLK_DIV */
+#define WM8994_AIF2CLK_DIV_SHIFT                     1  /* AIF2CLK_DIV */
+#define WM8994_AIF2CLK_DIV_WIDTH                     1  /* AIF2CLK_DIV */
+#define WM8994_AIF2CLK_ENA                      0x0001  /* AIF2CLK_ENA */
+#define WM8994_AIF2CLK_ENA_MASK                 0x0001  /* AIF2CLK_ENA */
+#define WM8994_AIF2CLK_ENA_SHIFT                     0  /* AIF2CLK_ENA */
+#define WM8994_AIF2CLK_ENA_WIDTH                     1  /* AIF2CLK_ENA */
+
+/*
+ * R517 (0x205) - AIF2 Clocking (2)
+ */
+#define WM8994_AIF2DAC_DIV_MASK                 0x0038  /* AIF2DAC_DIV - [5:3] */
+#define WM8994_AIF2DAC_DIV_SHIFT                     3  /* AIF2DAC_DIV - [5:3] */
+#define WM8994_AIF2DAC_DIV_WIDTH                     3  /* AIF2DAC_DIV - [5:3] */
+#define WM8994_AIF2ADC_DIV_MASK                 0x0007  /* AIF2ADC_DIV - [2:0] */
+#define WM8994_AIF2ADC_DIV_SHIFT                     0  /* AIF2ADC_DIV - [2:0] */
+#define WM8994_AIF2ADC_DIV_WIDTH                     3  /* AIF2ADC_DIV - [2:0] */
+
+/*
+ * R520 (0x208) - Clocking (1)
+ */
+#define WM8994_TOCLK_ENA                        0x0010  /* TOCLK_ENA */
+#define WM8994_TOCLK_ENA_MASK                   0x0010  /* TOCLK_ENA */
+#define WM8994_TOCLK_ENA_SHIFT                       4  /* TOCLK_ENA */
+#define WM8994_TOCLK_ENA_WIDTH                       1  /* TOCLK_ENA */
+#define WM8994_AIF1DSPCLK_ENA                   0x0008  /* AIF1DSPCLK_ENA */
+#define WM8994_AIF1DSPCLK_ENA_MASK              0x0008  /* AIF1DSPCLK_ENA */
+#define WM8994_AIF1DSPCLK_ENA_SHIFT                  3  /* AIF1DSPCLK_ENA */
+#define WM8994_AIF1DSPCLK_ENA_WIDTH                  1  /* AIF1DSPCLK_ENA */
+#define WM8994_AIF2DSPCLK_ENA                   0x0004  /* AIF2DSPCLK_ENA */
+#define WM8994_AIF2DSPCLK_ENA_MASK              0x0004  /* AIF2DSPCLK_ENA */
+#define WM8994_AIF2DSPCLK_ENA_SHIFT                  2  /* AIF2DSPCLK_ENA */
+#define WM8994_AIF2DSPCLK_ENA_WIDTH                  1  /* AIF2DSPCLK_ENA */
+#define WM8994_SYSDSPCLK_ENA                    0x0002  /* SYSDSPCLK_ENA */
+#define WM8994_SYSDSPCLK_ENA_MASK               0x0002  /* SYSDSPCLK_ENA */
+#define WM8994_SYSDSPCLK_ENA_SHIFT                   1  /* SYSDSPCLK_ENA */
+#define WM8994_SYSDSPCLK_ENA_WIDTH                   1  /* SYSDSPCLK_ENA */
+#define WM8994_SYSCLK_SRC                       0x0001  /* SYSCLK_SRC */
+#define WM8994_SYSCLK_SRC_MASK                  0x0001  /* SYSCLK_SRC */
+#define WM8994_SYSCLK_SRC_SHIFT                      0  /* SYSCLK_SRC */
+#define WM8994_SYSCLK_SRC_WIDTH                      1  /* SYSCLK_SRC */
+
+/*
+ * R521 (0x209) - Clocking (2)
+ */
+#define WM8994_TOCLK_DIV_MASK                   0x0700  /* TOCLK_DIV - [10:8] */
+#define WM8994_TOCLK_DIV_SHIFT                       8  /* TOCLK_DIV - [10:8] */
+#define WM8994_TOCLK_DIV_WIDTH                       3  /* TOCLK_DIV - [10:8] */
+#define WM8994_DBCLK_DIV_MASK                   0x0070  /* DBCLK_DIV - [6:4] */
+#define WM8994_DBCLK_DIV_SHIFT                       4  /* DBCLK_DIV - [6:4] */
+#define WM8994_DBCLK_DIV_WIDTH                       3  /* DBCLK_DIV - [6:4] */
+#define WM8994_OPCLK_DIV_MASK                   0x0007  /* OPCLK_DIV - [2:0] */
+#define WM8994_OPCLK_DIV_SHIFT                       0  /* OPCLK_DIV - [2:0] */
+#define WM8994_OPCLK_DIV_WIDTH                       3  /* OPCLK_DIV - [2:0] */
+
+/*
+ * R528 (0x210) - AIF1 Rate
+ */
+#define WM8994_AIF1_SR_MASK                     0x00F0  /* AIF1_SR - [7:4] */
+#define WM8994_AIF1_SR_SHIFT                         4  /* AIF1_SR - [7:4] */
+#define WM8994_AIF1_SR_WIDTH                         4  /* AIF1_SR - [7:4] */
+#define WM8994_AIF1CLK_RATE_MASK                0x000F  /* AIF1CLK_RATE - [3:0] */
+#define WM8994_AIF1CLK_RATE_SHIFT                    0  /* AIF1CLK_RATE - [3:0] */
+#define WM8994_AIF1CLK_RATE_WIDTH                    4  /* AIF1CLK_RATE - [3:0] */
+
+/*
+ * R529 (0x211) - AIF2 Rate
+ */
+#define WM8994_AIF2_SR_MASK                     0x00F0  /* AIF2_SR - [7:4] */
+#define WM8994_AIF2_SR_SHIFT                         4  /* AIF2_SR - [7:4] */
+#define WM8994_AIF2_SR_WIDTH                         4  /* AIF2_SR - [7:4] */
+#define WM8994_AIF2CLK_RATE_MASK                0x000F  /* AIF2CLK_RATE - [3:0] */
+#define WM8994_AIF2CLK_RATE_SHIFT                    0  /* AIF2CLK_RATE - [3:0] */
+#define WM8994_AIF2CLK_RATE_WIDTH                    4  /* AIF2CLK_RATE - [3:0] */
+
+/*
+ * R530 (0x212) - Rate Status
+ */
+#define WM8994_SR_ERROR_MASK                    0x000F  /* SR_ERROR - [3:0] */
+#define WM8994_SR_ERROR_SHIFT                        0  /* SR_ERROR - [3:0] */
+#define WM8994_SR_ERROR_WIDTH                        4  /* SR_ERROR - [3:0] */
+
+/*
+ * R544 (0x220) - FLL1 Control (1)
+ */
+#define WM8994_FLL1_FRAC                        0x0004  /* FLL1_FRAC */
+#define WM8994_FLL1_FRAC_MASK                   0x0004  /* FLL1_FRAC */
+#define WM8994_FLL1_FRAC_SHIFT                       2  /* FLL1_FRAC */
+#define WM8994_FLL1_FRAC_WIDTH                       1  /* FLL1_FRAC */
+#define WM8994_FLL1_OSC_ENA                     0x0002  /* FLL1_OSC_ENA */
+#define WM8994_FLL1_OSC_ENA_MASK                0x0002  /* FLL1_OSC_ENA */
+#define WM8994_FLL1_OSC_ENA_SHIFT                    1  /* FLL1_OSC_ENA */
+#define WM8994_FLL1_OSC_ENA_WIDTH                    1  /* FLL1_OSC_ENA */
+#define WM8994_FLL1_ENA                         0x0001  /* FLL1_ENA */
+#define WM8994_FLL1_ENA_MASK                    0x0001  /* FLL1_ENA */
+#define WM8994_FLL1_ENA_SHIFT                        0  /* FLL1_ENA */
+#define WM8994_FLL1_ENA_WIDTH                        1  /* FLL1_ENA */
+
+/*
+ * R545 (0x221) - FLL1 Control (2)
+ */
+#define WM8994_FLL1_OUTDIV_MASK                 0x3F00  /* FLL1_OUTDIV - [13:8] */
+#define WM8994_FLL1_OUTDIV_SHIFT                     8  /* FLL1_OUTDIV - [13:8] */
+#define WM8994_FLL1_OUTDIV_WIDTH                     6  /* FLL1_OUTDIV - [13:8] */
+#define WM8994_FLL1_CTRL_RATE_MASK              0x0070  /* FLL1_CTRL_RATE - [6:4] */
+#define WM8994_FLL1_CTRL_RATE_SHIFT                  4  /* FLL1_CTRL_RATE - [6:4] */
+#define WM8994_FLL1_CTRL_RATE_WIDTH                  3  /* FLL1_CTRL_RATE - [6:4] */
+#define WM8994_FLL1_FRATIO_MASK                 0x0007  /* FLL1_FRATIO - [2:0] */
+#define WM8994_FLL1_FRATIO_SHIFT                     0  /* FLL1_FRATIO - [2:0] */
+#define WM8994_FLL1_FRATIO_WIDTH                     3  /* FLL1_FRATIO - [2:0] */
+
+/*
+ * R546 (0x222) - FLL1 Control (3)
+ */
+#define WM8994_FLL1_K_MASK                      0xFFFF  /* FLL1_K - [15:0] */
+#define WM8994_FLL1_K_SHIFT                          0  /* FLL1_K - [15:0] */
+#define WM8994_FLL1_K_WIDTH                         16  /* FLL1_K - [15:0] */
+
+/*
+ * R547 (0x223) - FLL1 Control (4)
+ */
+#define WM8994_FLL1_N_MASK                      0x7FE0  /* FLL1_N - [14:5] */
+#define WM8994_FLL1_N_SHIFT                          5  /* FLL1_N - [14:5] */
+#define WM8994_FLL1_N_WIDTH                         10  /* FLL1_N - [14:5] */
+#define WM8994_FLL1_LOOP_GAIN_MASK              0x000F  /* FLL1_LOOP_GAIN - [3:0] */
+#define WM8994_FLL1_LOOP_GAIN_SHIFT                  0  /* FLL1_LOOP_GAIN - [3:0] */
+#define WM8994_FLL1_LOOP_GAIN_WIDTH                  4  /* FLL1_LOOP_GAIN - [3:0] */
+
+/*
+ * R548 (0x224) - FLL1 Control (5)
+ */
+#define WM8994_FLL1_FRC_NCO_VAL_MASK            0x1F80  /* FLL1_FRC_NCO_VAL - [12:7] */
+#define WM8994_FLL1_FRC_NCO_VAL_SHIFT                7  /* FLL1_FRC_NCO_VAL - [12:7] */
+#define WM8994_FLL1_FRC_NCO_VAL_WIDTH                6  /* FLL1_FRC_NCO_VAL - [12:7] */
+#define WM8994_FLL1_FRC_NCO                     0x0040  /* FLL1_FRC_NCO */
+#define WM8994_FLL1_FRC_NCO_MASK                0x0040  /* FLL1_FRC_NCO */
+#define WM8994_FLL1_FRC_NCO_SHIFT                    6  /* FLL1_FRC_NCO */
+#define WM8994_FLL1_FRC_NCO_WIDTH                    1  /* FLL1_FRC_NCO */
+#define WM8994_FLL1_REFCLK_DIV_MASK             0x0018  /* FLL1_REFCLK_DIV - [4:3] */
+#define WM8994_FLL1_REFCLK_DIV_SHIFT                 3  /* FLL1_REFCLK_DIV - [4:3] */
+#define WM8994_FLL1_REFCLK_DIV_WIDTH                 2  /* FLL1_REFCLK_DIV - [4:3] */
+#define WM8994_FLL1_REFCLK_SRC_MASK             0x0003  /* FLL1_REFCLK_SRC - [1:0] */
+#define WM8994_FLL1_REFCLK_SRC_SHIFT                 0  /* FLL1_REFCLK_SRC - [1:0] */
+#define WM8994_FLL1_REFCLK_SRC_WIDTH                 2  /* FLL1_REFCLK_SRC - [1:0] */
+
+/*
+ * R576 (0x240) - FLL2 Control (1)
+ */
+#define WM8994_FLL2_FRAC                        0x0004  /* FLL2_FRAC */
+#define WM8994_FLL2_FRAC_MASK                   0x0004  /* FLL2_FRAC */
+#define WM8994_FLL2_FRAC_SHIFT                       2  /* FLL2_FRAC */
+#define WM8994_FLL2_FRAC_WIDTH                       1  /* FLL2_FRAC */
+#define WM8994_FLL2_OSC_ENA                     0x0002  /* FLL2_OSC_ENA */
+#define WM8994_FLL2_OSC_ENA_MASK                0x0002  /* FLL2_OSC_ENA */
+#define WM8994_FLL2_OSC_ENA_SHIFT                    1  /* FLL2_OSC_ENA */
+#define WM8994_FLL2_OSC_ENA_WIDTH                    1  /* FLL2_OSC_ENA */
+#define WM8994_FLL2_ENA                         0x0001  /* FLL2_ENA */
+#define WM8994_FLL2_ENA_MASK                    0x0001  /* FLL2_ENA */
+#define WM8994_FLL2_ENA_SHIFT                        0  /* FLL2_ENA */
+#define WM8994_FLL2_ENA_WIDTH                        1  /* FLL2_ENA */
+
+/*
+ * R577 (0x241) - FLL2 Control (2)
+ */
+#define WM8994_FLL2_OUTDIV_MASK                 0x3F00  /* FLL2_OUTDIV - [13:8] */
+#define WM8994_FLL2_OUTDIV_SHIFT                     8  /* FLL2_OUTDIV - [13:8] */
+#define WM8994_FLL2_OUTDIV_WIDTH                     6  /* FLL2_OUTDIV - [13:8] */
+#define WM8994_FLL2_CTRL_RATE_MASK              0x0070  /* FLL2_CTRL_RATE - [6:4] */
+#define WM8994_FLL2_CTRL_RATE_SHIFT                  4  /* FLL2_CTRL_RATE - [6:4] */
+#define WM8994_FLL2_CTRL_RATE_WIDTH                  3  /* FLL2_CTRL_RATE - [6:4] */
+#define WM8994_FLL2_FRATIO_MASK                 0x0007  /* FLL2_FRATIO - [2:0] */
+#define WM8994_FLL2_FRATIO_SHIFT                     0  /* FLL2_FRATIO - [2:0] */
+#define WM8994_FLL2_FRATIO_WIDTH                     3  /* FLL2_FRATIO - [2:0] */
+
+/*
+ * R578 (0x242) - FLL2 Control (3)
+ */
+#define WM8994_FLL2_K_MASK                      0xFFFF  /* FLL2_K - [15:0] */
+#define WM8994_FLL2_K_SHIFT                          0  /* FLL2_K - [15:0] */
+#define WM8994_FLL2_K_WIDTH                         16  /* FLL2_K - [15:0] */
+
+/*
+ * R579 (0x243) - FLL2 Control (4)
+ */
+#define WM8994_FLL2_N_MASK                      0x7FE0  /* FLL2_N - [14:5] */
+#define WM8994_FLL2_N_SHIFT                          5  /* FLL2_N - [14:5] */
+#define WM8994_FLL2_N_WIDTH                         10  /* FLL2_N - [14:5] */
+#define WM8994_FLL2_LOOP_GAIN_MASK              0x000F  /* FLL2_LOOP_GAIN - [3:0] */
+#define WM8994_FLL2_LOOP_GAIN_SHIFT                  0  /* FLL2_LOOP_GAIN - [3:0] */
+#define WM8994_FLL2_LOOP_GAIN_WIDTH                  4  /* FLL2_LOOP_GAIN - [3:0] */
+
+/*
+ * R580 (0x244) - FLL2 Control (5)
+ */
+#define WM8994_FLL2_FRC_NCO_VAL_MASK            0x1F80  /* FLL2_FRC_NCO_VAL - [12:7] */
+#define WM8994_FLL2_FRC_NCO_VAL_SHIFT                7  /* FLL2_FRC_NCO_VAL - [12:7] */
+#define WM8994_FLL2_FRC_NCO_VAL_WIDTH                6  /* FLL2_FRC_NCO_VAL - [12:7] */
+#define WM8994_FLL2_FRC_NCO                     0x0040  /* FLL2_FRC_NCO */
+#define WM8994_FLL2_FRC_NCO_MASK                0x0040  /* FLL2_FRC_NCO */
+#define WM8994_FLL2_FRC_NCO_SHIFT                    6  /* FLL2_FRC_NCO */
+#define WM8994_FLL2_FRC_NCO_WIDTH                    1  /* FLL2_FRC_NCO */
+#define WM8994_FLL2_REFCLK_DIV_MASK             0x0018  /* FLL2_REFCLK_DIV - [4:3] */
+#define WM8994_FLL2_REFCLK_DIV_SHIFT                 3  /* FLL2_REFCLK_DIV - [4:3] */
+#define WM8994_FLL2_REFCLK_DIV_WIDTH                 2  /* FLL2_REFCLK_DIV - [4:3] */
+#define WM8994_FLL2_REFCLK_SRC_MASK             0x0003  /* FLL2_REFCLK_SRC - [1:0] */
+#define WM8994_FLL2_REFCLK_SRC_SHIFT                 0  /* FLL2_REFCLK_SRC - [1:0] */
+#define WM8994_FLL2_REFCLK_SRC_WIDTH                 2  /* FLL2_REFCLK_SRC - [1:0] */
+
+/*
+ * R768 (0x300) - AIF1 Control (1)
+ */
+#define WM8994_AIF1ADCL_SRC                     0x8000  /* AIF1ADCL_SRC */
+#define WM8994_AIF1ADCL_SRC_MASK                0x8000  /* AIF1ADCL_SRC */
+#define WM8994_AIF1ADCL_SRC_SHIFT                   15  /* AIF1ADCL_SRC */
+#define WM8994_AIF1ADCL_SRC_WIDTH                    1  /* AIF1ADCL_SRC */
+#define WM8994_AIF1ADCR_SRC                     0x4000  /* AIF1ADCR_SRC */
+#define WM8994_AIF1ADCR_SRC_MASK                0x4000  /* AIF1ADCR_SRC */
+#define WM8994_AIF1ADCR_SRC_SHIFT                   14  /* AIF1ADCR_SRC */
+#define WM8994_AIF1ADCR_SRC_WIDTH                    1  /* AIF1ADCR_SRC */
+#define WM8994_AIF1ADC_TDM                      0x2000  /* AIF1ADC_TDM */
+#define WM8994_AIF1ADC_TDM_MASK                 0x2000  /* AIF1ADC_TDM */
+#define WM8994_AIF1ADC_TDM_SHIFT                    13  /* AIF1ADC_TDM */
+#define WM8994_AIF1ADC_TDM_WIDTH                     1  /* AIF1ADC_TDM */
+#define WM8994_AIF1_BCLK_INV                    0x0100  /* AIF1_BCLK_INV */
+#define WM8994_AIF1_BCLK_INV_MASK               0x0100  /* AIF1_BCLK_INV */
+#define WM8994_AIF1_BCLK_INV_SHIFT                   8  /* AIF1_BCLK_INV */
+#define WM8994_AIF1_BCLK_INV_WIDTH                   1  /* AIF1_BCLK_INV */
+#define WM8994_AIF1_LRCLK_INV                   0x0080  /* AIF1_LRCLK_INV */
+#define WM8994_AIF1_LRCLK_INV_MASK              0x0080  /* AIF1_LRCLK_INV */
+#define WM8994_AIF1_LRCLK_INV_SHIFT                  7  /* AIF1_LRCLK_INV */
+#define WM8994_AIF1_LRCLK_INV_WIDTH                  1  /* AIF1_LRCLK_INV */
+#define WM8994_AIF1_WL_MASK                     0x0060  /* AIF1_WL - [6:5] */
+#define WM8994_AIF1_WL_SHIFT                         5  /* AIF1_WL - [6:5] */
+#define WM8994_AIF1_WL_WIDTH                         2  /* AIF1_WL - [6:5] */
+#define WM8994_AIF1_FMT_MASK                    0x0018  /* AIF1_FMT - [4:3] */
+#define WM8994_AIF1_FMT_SHIFT                        3  /* AIF1_FMT - [4:3] */
+#define WM8994_AIF1_FMT_WIDTH                        2  /* AIF1_FMT - [4:3] */
+
+/*
+ * R769 (0x301) - AIF1 Control (2)
+ */
+#define WM8994_AIF1DACL_SRC                     0x8000  /* AIF1DACL_SRC */
+#define WM8994_AIF1DACL_SRC_MASK                0x8000  /* AIF1DACL_SRC */
+#define WM8994_AIF1DACL_SRC_SHIFT                   15  /* AIF1DACL_SRC */
+#define WM8994_AIF1DACL_SRC_WIDTH                    1  /* AIF1DACL_SRC */
+#define WM8994_AIF1DACR_SRC                     0x4000  /* AIF1DACR_SRC */
+#define WM8994_AIF1DACR_SRC_MASK                0x4000  /* AIF1DACR_SRC */
+#define WM8994_AIF1DACR_SRC_SHIFT                   14  /* AIF1DACR_SRC */
+#define WM8994_AIF1DACR_SRC_WIDTH                    1  /* AIF1DACR_SRC */
+#define WM8994_AIF1DAC_BOOST_MASK               0x0C00  /* AIF1DAC_BOOST - [11:10] */
+#define WM8994_AIF1DAC_BOOST_SHIFT                  10  /* AIF1DAC_BOOST - [11:10] */
+#define WM8994_AIF1DAC_BOOST_WIDTH                   2  /* AIF1DAC_BOOST - [11:10] */
+#define WM8994_AIF1_MONO                        0x0100  /* AIF1_MONO */
+#define WM8994_AIF1_MONO_MASK                   0x0100  /* AIF1_MONO */
+#define WM8994_AIF1_MONO_SHIFT                       8  /* AIF1_MONO */
+#define WM8994_AIF1_MONO_WIDTH                       1  /* AIF1_MONO */
+#define WM8994_AIF1DAC_COMP                     0x0010  /* AIF1DAC_COMP */
+#define WM8994_AIF1DAC_COMP_MASK                0x0010  /* AIF1DAC_COMP */
+#define WM8994_AIF1DAC_COMP_SHIFT                    4  /* AIF1DAC_COMP */
+#define WM8994_AIF1DAC_COMP_WIDTH                    1  /* AIF1DAC_COMP */
+#define WM8994_AIF1DAC_COMPMODE                 0x0008  /* AIF1DAC_COMPMODE */
+#define WM8994_AIF1DAC_COMPMODE_MASK            0x0008  /* AIF1DAC_COMPMODE */
+#define WM8994_AIF1DAC_COMPMODE_SHIFT                3  /* AIF1DAC_COMPMODE */
+#define WM8994_AIF1DAC_COMPMODE_WIDTH                1  /* AIF1DAC_COMPMODE */
+#define WM8994_AIF1ADC_COMP                     0x0004  /* AIF1ADC_COMP */
+#define WM8994_AIF1ADC_COMP_MASK                0x0004  /* AIF1ADC_COMP */
+#define WM8994_AIF1ADC_COMP_SHIFT                    2  /* AIF1ADC_COMP */
+#define WM8994_AIF1ADC_COMP_WIDTH                    1  /* AIF1ADC_COMP */
+#define WM8994_AIF1ADC_COMPMODE                 0x0002  /* AIF1ADC_COMPMODE */
+#define WM8994_AIF1ADC_COMPMODE_MASK            0x0002  /* AIF1ADC_COMPMODE */
+#define WM8994_AIF1ADC_COMPMODE_SHIFT                1  /* AIF1ADC_COMPMODE */
+#define WM8994_AIF1ADC_COMPMODE_WIDTH                1  /* AIF1ADC_COMPMODE */
+#define WM8994_AIF1_LOOPBACK                    0x0001  /* AIF1_LOOPBACK */
+#define WM8994_AIF1_LOOPBACK_MASK               0x0001  /* AIF1_LOOPBACK */
+#define WM8994_AIF1_LOOPBACK_SHIFT                   0  /* AIF1_LOOPBACK */
+#define WM8994_AIF1_LOOPBACK_WIDTH                   1  /* AIF1_LOOPBACK */
+
+/*
+ * R770 (0x302) - AIF1 Master/Slave
+ */
+#define WM8994_AIF1_TRI                         0x8000  /* AIF1_TRI */
+#define WM8994_AIF1_TRI_MASK                    0x8000  /* AIF1_TRI */
+#define WM8994_AIF1_TRI_SHIFT                       15  /* AIF1_TRI */
+#define WM8994_AIF1_TRI_WIDTH                        1  /* AIF1_TRI */
+#define WM8994_AIF1_MSTR                        0x4000  /* AIF1_MSTR */
+#define WM8994_AIF1_MSTR_MASK                   0x4000  /* AIF1_MSTR */
+#define WM8994_AIF1_MSTR_SHIFT                      14  /* AIF1_MSTR */
+#define WM8994_AIF1_MSTR_WIDTH                       1  /* AIF1_MSTR */
+#define WM8994_AIF1_CLK_FRC                     0x2000  /* AIF1_CLK_FRC */
+#define WM8994_AIF1_CLK_FRC_MASK                0x2000  /* AIF1_CLK_FRC */
+#define WM8994_AIF1_CLK_FRC_SHIFT                   13  /* AIF1_CLK_FRC */
+#define WM8994_AIF1_CLK_FRC_WIDTH                    1  /* AIF1_CLK_FRC */
+#define WM8994_AIF1_LRCLK_FRC                   0x1000  /* AIF1_LRCLK_FRC */
+#define WM8994_AIF1_LRCLK_FRC_MASK              0x1000  /* AIF1_LRCLK_FRC */
+#define WM8994_AIF1_LRCLK_FRC_SHIFT                 12  /* AIF1_LRCLK_FRC */
+#define WM8994_AIF1_LRCLK_FRC_WIDTH                  1  /* AIF1_LRCLK_FRC */
+
+/*
+ * R771 (0x303) - AIF1 BCLK
+ */
+#define WM8994_AIF1_BCLK_DIV_MASK               0x01F0  /* AIF1_BCLK_DIV - [8:4] */
+#define WM8994_AIF1_BCLK_DIV_SHIFT                   4  /* AIF1_BCLK_DIV - [8:4] */
+#define WM8994_AIF1_BCLK_DIV_WIDTH                   5  /* AIF1_BCLK_DIV - [8:4] */
+
+/*
+ * R772 (0x304) - AIF1ADC LRCLK
+ */
+#define WM8994_AIF1ADC_LRCLK_DIR                0x0800  /* AIF1ADC_LRCLK_DIR */
+#define WM8994_AIF1ADC_LRCLK_DIR_MASK           0x0800  /* AIF1ADC_LRCLK_DIR */
+#define WM8994_AIF1ADC_LRCLK_DIR_SHIFT              11  /* AIF1ADC_LRCLK_DIR */
+#define WM8994_AIF1ADC_LRCLK_DIR_WIDTH               1  /* AIF1ADC_LRCLK_DIR */
+#define WM8994_AIF1ADC_RATE_MASK                0x07FF  /* AIF1ADC_RATE - [10:0] */
+#define WM8994_AIF1ADC_RATE_SHIFT                    0  /* AIF1ADC_RATE - [10:0] */
+#define WM8994_AIF1ADC_RATE_WIDTH                   11  /* AIF1ADC_RATE - [10:0] */
+
+/*
+ * R773 (0x305) - AIF1DAC LRCLK
+ */
+#define WM8994_AIF1DAC_LRCLK_DIR                0x0800  /* AIF1DAC_LRCLK_DIR */
+#define WM8994_AIF1DAC_LRCLK_DIR_MASK           0x0800  /* AIF1DAC_LRCLK_DIR */
+#define WM8994_AIF1DAC_LRCLK_DIR_SHIFT              11  /* AIF1DAC_LRCLK_DIR */
+#define WM8994_AIF1DAC_LRCLK_DIR_WIDTH               1  /* AIF1DAC_LRCLK_DIR */
+#define WM8994_AIF1DAC_RATE_MASK                0x07FF  /* AIF1DAC_RATE - [10:0] */
+#define WM8994_AIF1DAC_RATE_SHIFT                    0  /* AIF1DAC_RATE - [10:0] */
+#define WM8994_AIF1DAC_RATE_WIDTH                   11  /* AIF1DAC_RATE - [10:0] */
+
+/*
+ * R774 (0x306) - AIF1DAC Data
+ */
+#define WM8994_AIF1DACL_DAT_INV                 0x0002  /* AIF1DACL_DAT_INV */
+#define WM8994_AIF1DACL_DAT_INV_MASK            0x0002  /* AIF1DACL_DAT_INV */
+#define WM8994_AIF1DACL_DAT_INV_SHIFT                1  /* AIF1DACL_DAT_INV */
+#define WM8994_AIF1DACL_DAT_INV_WIDTH                1  /* AIF1DACL_DAT_INV */
+#define WM8994_AIF1DACR_DAT_INV                 0x0001  /* AIF1DACR_DAT_INV */
+#define WM8994_AIF1DACR_DAT_INV_MASK            0x0001  /* AIF1DACR_DAT_INV */
+#define WM8994_AIF1DACR_DAT_INV_SHIFT                0  /* AIF1DACR_DAT_INV */
+#define WM8994_AIF1DACR_DAT_INV_WIDTH                1  /* AIF1DACR_DAT_INV */
+
+/*
+ * R775 (0x307) - AIF1ADC Data
+ */
+#define WM8994_AIF1ADCL_DAT_INV                 0x0002  /* AIF1ADCL_DAT_INV */
+#define WM8994_AIF1ADCL_DAT_INV_MASK            0x0002  /* AIF1ADCL_DAT_INV */
+#define WM8994_AIF1ADCL_DAT_INV_SHIFT                1  /* AIF1ADCL_DAT_INV */
+#define WM8994_AIF1ADCL_DAT_INV_WIDTH                1  /* AIF1ADCL_DAT_INV */
+#define WM8994_AIF1ADCR_DAT_INV                 0x0001  /* AIF1ADCR_DAT_INV */
+#define WM8994_AIF1ADCR_DAT_INV_MASK            0x0001  /* AIF1ADCR_DAT_INV */
+#define WM8994_AIF1ADCR_DAT_INV_SHIFT                0  /* AIF1ADCR_DAT_INV */
+#define WM8994_AIF1ADCR_DAT_INV_WIDTH                1  /* AIF1ADCR_DAT_INV */
+
+/*
+ * R784 (0x310) - AIF2 Control (1)
+ */
+#define WM8994_AIF2ADCL_SRC                     0x8000  /* AIF2ADCL_SRC */
+#define WM8994_AIF2ADCL_SRC_MASK                0x8000  /* AIF2ADCL_SRC */
+#define WM8994_AIF2ADCL_SRC_SHIFT                   15  /* AIF2ADCL_SRC */
+#define WM8994_AIF2ADCL_SRC_WIDTH                    1  /* AIF2ADCL_SRC */
+#define WM8994_AIF2ADCR_SRC                     0x4000  /* AIF2ADCR_SRC */
+#define WM8994_AIF2ADCR_SRC_MASK                0x4000  /* AIF2ADCR_SRC */
+#define WM8994_AIF2ADCR_SRC_SHIFT                   14  /* AIF2ADCR_SRC */
+#define WM8994_AIF2ADCR_SRC_WIDTH                    1  /* AIF2ADCR_SRC */
+#define WM8994_AIF2ADC_TDM                      0x2000  /* AIF2ADC_TDM */
+#define WM8994_AIF2ADC_TDM_MASK                 0x2000  /* AIF2ADC_TDM */
+#define WM8994_AIF2ADC_TDM_SHIFT                    13  /* AIF2ADC_TDM */
+#define WM8994_AIF2ADC_TDM_WIDTH                     1  /* AIF2ADC_TDM */
+#define WM8994_AIF2ADC_TDM_CHAN                 0x1000  /* AIF2ADC_TDM_CHAN */
+#define WM8994_AIF2ADC_TDM_CHAN_MASK            0x1000  /* AIF2ADC_TDM_CHAN */
+#define WM8994_AIF2ADC_TDM_CHAN_SHIFT               12  /* AIF2ADC_TDM_CHAN */
+#define WM8994_AIF2ADC_TDM_CHAN_WIDTH                1  /* AIF2ADC_TDM_CHAN */
+#define WM8994_AIF2_BCLK_INV                    0x0100  /* AIF2_BCLK_INV */
+#define WM8994_AIF2_BCLK_INV_MASK               0x0100  /* AIF2_BCLK_INV */
+#define WM8994_AIF2_BCLK_INV_SHIFT                   8  /* AIF2_BCLK_INV */
+#define WM8994_AIF2_BCLK_INV_WIDTH                   1  /* AIF2_BCLK_INV */
+#define WM8994_AIF2_LRCLK_INV                   0x0080  /* AIF2_LRCLK_INV */
+#define WM8994_AIF2_LRCLK_INV_MASK              0x0080  /* AIF2_LRCLK_INV */
+#define WM8994_AIF2_LRCLK_INV_SHIFT                  7  /* AIF2_LRCLK_INV */
+#define WM8994_AIF2_LRCLK_INV_WIDTH                  1  /* AIF2_LRCLK_INV */
+#define WM8994_AIF2_WL_MASK                     0x0060  /* AIF2_WL - [6:5] */
+#define WM8994_AIF2_WL_SHIFT                         5  /* AIF2_WL - [6:5] */
+#define WM8994_AIF2_WL_WIDTH                         2  /* AIF2_WL - [6:5] */
+#define WM8994_AIF2_FMT_MASK                    0x0018  /* AIF2_FMT - [4:3] */
+#define WM8994_AIF2_FMT_SHIFT                        3  /* AIF2_FMT - [4:3] */
+#define WM8994_AIF2_FMT_WIDTH                        2  /* AIF2_FMT - [4:3] */
+
+/*
+ * R785 (0x311) - AIF2 Control (2)
+ */
+#define WM8994_AIF2DACL_SRC                     0x8000  /* AIF2DACL_SRC */
+#define WM8994_AIF2DACL_SRC_MASK                0x8000  /* AIF2DACL_SRC */
+#define WM8994_AIF2DACL_SRC_SHIFT                   15  /* AIF2DACL_SRC */
+#define WM8994_AIF2DACL_SRC_WIDTH                    1  /* AIF2DACL_SRC */
+#define WM8994_AIF2DACR_SRC                     0x4000  /* AIF2DACR_SRC */
+#define WM8994_AIF2DACR_SRC_MASK                0x4000  /* AIF2DACR_SRC */
+#define WM8994_AIF2DACR_SRC_SHIFT                   14  /* AIF2DACR_SRC */
+#define WM8994_AIF2DACR_SRC_WIDTH                    1  /* AIF2DACR_SRC */
+#define WM8994_AIF2DAC_TDM                      0x2000  /* AIF2DAC_TDM */
+#define WM8994_AIF2DAC_TDM_MASK                 0x2000  /* AIF2DAC_TDM */
+#define WM8994_AIF2DAC_TDM_SHIFT                    13  /* AIF2DAC_TDM */
+#define WM8994_AIF2DAC_TDM_WIDTH                     1  /* AIF2DAC_TDM */
+#define WM8994_AIF2DAC_TDM_CHAN                 0x1000  /* AIF2DAC_TDM_CHAN */
+#define WM8994_AIF2DAC_TDM_CHAN_MASK            0x1000  /* AIF2DAC_TDM_CHAN */
+#define WM8994_AIF2DAC_TDM_CHAN_SHIFT               12  /* AIF2DAC_TDM_CHAN */
+#define WM8994_AIF2DAC_TDM_CHAN_WIDTH                1  /* AIF2DAC_TDM_CHAN */
+#define WM8994_AIF2DAC_BOOST_MASK               0x0C00  /* AIF2DAC_BOOST - [11:10] */
+#define WM8994_AIF2DAC_BOOST_SHIFT                  10  /* AIF2DAC_BOOST - [11:10] */
+#define WM8994_AIF2DAC_BOOST_WIDTH                   2  /* AIF2DAC_BOOST - [11:10] */
+#define WM8994_AIF2_MONO                        0x0100  /* AIF2_MONO */
+#define WM8994_AIF2_MONO_MASK                   0x0100  /* AIF2_MONO */
+#define WM8994_AIF2_MONO_SHIFT                       8  /* AIF2_MONO */
+#define WM8994_AIF2_MONO_WIDTH                       1  /* AIF2_MONO */
+#define WM8994_AIF2DAC_COMP                     0x0010  /* AIF2DAC_COMP */
+#define WM8994_AIF2DAC_COMP_MASK                0x0010  /* AIF2DAC_COMP */
+#define WM8994_AIF2DAC_COMP_SHIFT                    4  /* AIF2DAC_COMP */
+#define WM8994_AIF2DAC_COMP_WIDTH                    1  /* AIF2DAC_COMP */
+#define WM8994_AIF2DAC_COMPMODE                 0x0008  /* AIF2DAC_COMPMODE */
+#define WM8994_AIF2DAC_COMPMODE_MASK            0x0008  /* AIF2DAC_COMPMODE */
+#define WM8994_AIF2DAC_COMPMODE_SHIFT                3  /* AIF2DAC_COMPMODE */
+#define WM8994_AIF2DAC_COMPMODE_WIDTH                1  /* AIF2DAC_COMPMODE */
+#define WM8994_AIF2ADC_COMP                     0x0004  /* AIF2ADC_COMP */
+#define WM8994_AIF2ADC_COMP_MASK                0x0004  /* AIF2ADC_COMP */
+#define WM8994_AIF2ADC_COMP_SHIFT                    2  /* AIF2ADC_COMP */
+#define WM8994_AIF2ADC_COMP_WIDTH                    1  /* AIF2ADC_COMP */
+#define WM8994_AIF2ADC_COMPMODE                 0x0002  /* AIF2ADC_COMPMODE */
+#define WM8994_AIF2ADC_COMPMODE_MASK            0x0002  /* AIF2ADC_COMPMODE */
+#define WM8994_AIF2ADC_COMPMODE_SHIFT                1  /* AIF2ADC_COMPMODE */
+#define WM8994_AIF2ADC_COMPMODE_WIDTH                1  /* AIF2ADC_COMPMODE */
+#define WM8994_AIF2_LOOPBACK                    0x0001  /* AIF2_LOOPBACK */
+#define WM8994_AIF2_LOOPBACK_MASK               0x0001  /* AIF2_LOOPBACK */
+#define WM8994_AIF2_LOOPBACK_SHIFT                   0  /* AIF2_LOOPBACK */
+#define WM8994_AIF2_LOOPBACK_WIDTH                   1  /* AIF2_LOOPBACK */
+
+/*
+ * R786 (0x312) - AIF2 Master/Slave
+ */
+#define WM8994_AIF2_TRI                         0x8000  /* AIF2_TRI */
+#define WM8994_AIF2_TRI_MASK                    0x8000  /* AIF2_TRI */
+#define WM8994_AIF2_TRI_SHIFT                       15  /* AIF2_TRI */
+#define WM8994_AIF2_TRI_WIDTH                        1  /* AIF2_TRI */
+#define WM8994_AIF2_MSTR                        0x4000  /* AIF2_MSTR */
+#define WM8994_AIF2_MSTR_MASK                   0x4000  /* AIF2_MSTR */
+#define WM8994_AIF2_MSTR_SHIFT                      14  /* AIF2_MSTR */
+#define WM8994_AIF2_MSTR_WIDTH                       1  /* AIF2_MSTR */
+#define WM8994_AIF2_CLK_FRC                     0x2000  /* AIF2_CLK_FRC */
+#define WM8994_AIF2_CLK_FRC_MASK                0x2000  /* AIF2_CLK_FRC */
+#define WM8994_AIF2_CLK_FRC_SHIFT                   13  /* AIF2_CLK_FRC */
+#define WM8994_AIF2_CLK_FRC_WIDTH                    1  /* AIF2_CLK_FRC */
+#define WM8994_AIF2_LRCLK_FRC                   0x1000  /* AIF2_LRCLK_FRC */
+#define WM8994_AIF2_LRCLK_FRC_MASK              0x1000  /* AIF2_LRCLK_FRC */
+#define WM8994_AIF2_LRCLK_FRC_SHIFT                 12  /* AIF2_LRCLK_FRC */
+#define WM8994_AIF2_LRCLK_FRC_WIDTH                  1  /* AIF2_LRCLK_FRC */
+
+/*
+ * R787 (0x313) - AIF2 BCLK
+ */
+#define WM8994_AIF2_BCLK_DIV_MASK               0x01F0  /* AIF2_BCLK_DIV - [8:4] */
+#define WM8994_AIF2_BCLK_DIV_SHIFT                   4  /* AIF2_BCLK_DIV - [8:4] */
+#define WM8994_AIF2_BCLK_DIV_WIDTH                   5  /* AIF2_BCLK_DIV - [8:4] */
+
+/*
+ * R788 (0x314) - AIF2ADC LRCLK
+ */
+#define WM8994_AIF2ADC_LRCLK_DIR                0x0800  /* AIF2ADC_LRCLK_DIR */
+#define WM8994_AIF2ADC_LRCLK_DIR_MASK           0x0800  /* AIF2ADC_LRCLK_DIR */
+#define WM8994_AIF2ADC_LRCLK_DIR_SHIFT              11  /* AIF2ADC_LRCLK_DIR */
+#define WM8994_AIF2ADC_LRCLK_DIR_WIDTH               1  /* AIF2ADC_LRCLK_DIR */
+#define WM8994_AIF2ADC_RATE_MASK                0x07FF  /* AIF2ADC_RATE - [10:0] */
+#define WM8994_AIF2ADC_RATE_SHIFT                    0  /* AIF2ADC_RATE - [10:0] */
+#define WM8994_AIF2ADC_RATE_WIDTH                   11  /* AIF2ADC_RATE - [10:0] */
+
+/*
+ * R789 (0x315) - AIF2DAC LRCLK
+ */
+#define WM8994_AIF2DAC_LRCLK_DIR                0x0800  /* AIF2DAC_LRCLK_DIR */
+#define WM8994_AIF2DAC_LRCLK_DIR_MASK           0x0800  /* AIF2DAC_LRCLK_DIR */
+#define WM8994_AIF2DAC_LRCLK_DIR_SHIFT              11  /* AIF2DAC_LRCLK_DIR */
+#define WM8994_AIF2DAC_LRCLK_DIR_WIDTH               1  /* AIF2DAC_LRCLK_DIR */
+#define WM8994_AIF2DAC_RATE_MASK                0x07FF  /* AIF2DAC_RATE - [10:0] */
+#define WM8994_AIF2DAC_RATE_SHIFT                    0  /* AIF2DAC_RATE - [10:0] */
+#define WM8994_AIF2DAC_RATE_WIDTH                   11  /* AIF2DAC_RATE - [10:0] */
+
+/*
+ * R790 (0x316) - AIF2DAC Data
+ */
+#define WM8994_AIF2DACL_DAT_INV                 0x0002  /* AIF2DACL_DAT_INV */
+#define WM8994_AIF2DACL_DAT_INV_MASK            0x0002  /* AIF2DACL_DAT_INV */
+#define WM8994_AIF2DACL_DAT_INV_SHIFT                1  /* AIF2DACL_DAT_INV */
+#define WM8994_AIF2DACL_DAT_INV_WIDTH                1  /* AIF2DACL_DAT_INV */
+#define WM8994_AIF2DACR_DAT_INV                 0x0001  /* AIF2DACR_DAT_INV */
+#define WM8994_AIF2DACR_DAT_INV_MASK            0x0001  /* AIF2DACR_DAT_INV */
+#define WM8994_AIF2DACR_DAT_INV_SHIFT                0  /* AIF2DACR_DAT_INV */
+#define WM8994_AIF2DACR_DAT_INV_WIDTH                1  /* AIF2DACR_DAT_INV */
+
+/*
+ * R791 (0x317) - AIF2ADC Data
+ */
+#define WM8994_AIF2ADCL_DAT_INV                 0x0002  /* AIF2ADCL_DAT_INV */
+#define WM8994_AIF2ADCL_DAT_INV_MASK            0x0002  /* AIF2ADCL_DAT_INV */
+#define WM8994_AIF2ADCL_DAT_INV_SHIFT                1  /* AIF2ADCL_DAT_INV */
+#define WM8994_AIF2ADCL_DAT_INV_WIDTH                1  /* AIF2ADCL_DAT_INV */
+#define WM8994_AIF2ADCR_DAT_INV                 0x0001  /* AIF2ADCR_DAT_INV */
+#define WM8994_AIF2ADCR_DAT_INV_MASK            0x0001  /* AIF2ADCR_DAT_INV */
+#define WM8994_AIF2ADCR_DAT_INV_SHIFT                0  /* AIF2ADCR_DAT_INV */
+#define WM8994_AIF2ADCR_DAT_INV_WIDTH                1  /* AIF2ADCR_DAT_INV */
+
+/*
+ * R1024 (0x400) - AIF1 ADC1 Left Volume
+ */
+#define WM8994_AIF1ADC1_VU                      0x0100  /* AIF1ADC1_VU */
+#define WM8994_AIF1ADC1_VU_MASK                 0x0100  /* AIF1ADC1_VU */
+#define WM8994_AIF1ADC1_VU_SHIFT                     8  /* AIF1ADC1_VU */
+#define WM8994_AIF1ADC1_VU_WIDTH                     1  /* AIF1ADC1_VU */
+#define WM8994_AIF1ADC1L_VOL_MASK               0x00FF  /* AIF1ADC1L_VOL - [7:0] */
+#define WM8994_AIF1ADC1L_VOL_SHIFT                   0  /* AIF1ADC1L_VOL - [7:0] */
+#define WM8994_AIF1ADC1L_VOL_WIDTH                   8  /* AIF1ADC1L_VOL - [7:0] */
+
+/*
+ * R1025 (0x401) - AIF1 ADC1 Right Volume
+ */
+#define WM8994_AIF1ADC1_VU                      0x0100  /* AIF1ADC1_VU */
+#define WM8994_AIF1ADC1_VU_MASK                 0x0100  /* AIF1ADC1_VU */
+#define WM8994_AIF1ADC1_VU_SHIFT                     8  /* AIF1ADC1_VU */
+#define WM8994_AIF1ADC1_VU_WIDTH                     1  /* AIF1ADC1_VU */
+#define WM8994_AIF1ADC1R_VOL_MASK               0x00FF  /* AIF1ADC1R_VOL - [7:0] */
+#define WM8994_AIF1ADC1R_VOL_SHIFT                   0  /* AIF1ADC1R_VOL - [7:0] */
+#define WM8994_AIF1ADC1R_VOL_WIDTH                   8  /* AIF1ADC1R_VOL - [7:0] */
+
+/*
+ * R1026 (0x402) - AIF1 DAC1 Left Volume
+ */
+#define WM8994_AIF1DAC1_VU                      0x0100  /* AIF1DAC1_VU */
+#define WM8994_AIF1DAC1_VU_MASK                 0x0100  /* AIF1DAC1_VU */
+#define WM8994_AIF1DAC1_VU_SHIFT                     8  /* AIF1DAC1_VU */
+#define WM8994_AIF1DAC1_VU_WIDTH                     1  /* AIF1DAC1_VU */
+#define WM8994_AIF1DAC1L_VOL_MASK               0x00FF  /* AIF1DAC1L_VOL - [7:0] */
+#define WM8994_AIF1DAC1L_VOL_SHIFT                   0  /* AIF1DAC1L_VOL - [7:0] */
+#define WM8994_AIF1DAC1L_VOL_WIDTH                   8  /* AIF1DAC1L_VOL - [7:0] */
+
+/*
+ * R1027 (0x403) - AIF1 DAC1 Right Volume
+ */
+#define WM8994_AIF1DAC1_VU                      0x0100  /* AIF1DAC1_VU */
+#define WM8994_AIF1DAC1_VU_MASK                 0x0100  /* AIF1DAC1_VU */
+#define WM8994_AIF1DAC1_VU_SHIFT                     8  /* AIF1DAC1_VU */
+#define WM8994_AIF1DAC1_VU_WIDTH                     1  /* AIF1DAC1_VU */
+#define WM8994_AIF1DAC1R_VOL_MASK               0x00FF  /* AIF1DAC1R_VOL - [7:0] */
+#define WM8994_AIF1DAC1R_VOL_SHIFT                   0  /* AIF1DAC1R_VOL - [7:0] */
+#define WM8994_AIF1DAC1R_VOL_WIDTH                   8  /* AIF1DAC1R_VOL - [7:0] */
+
+/*
+ * R1028 (0x404) - AIF1 ADC2 Left Volume
+ */
+#define WM8994_AIF1ADC2_VU                      0x0100  /* AIF1ADC2_VU */
+#define WM8994_AIF1ADC2_VU_MASK                 0x0100  /* AIF1ADC2_VU */
+#define WM8994_AIF1ADC2_VU_SHIFT                     8  /* AIF1ADC2_VU */
+#define WM8994_AIF1ADC2_VU_WIDTH                     1  /* AIF1ADC2_VU */
+#define WM8994_AIF1ADC2L_VOL_MASK               0x00FF  /* AIF1ADC2L_VOL - [7:0] */
+#define WM8994_AIF1ADC2L_VOL_SHIFT                   0  /* AIF1ADC2L_VOL - [7:0] */
+#define WM8994_AIF1ADC2L_VOL_WIDTH                   8  /* AIF1ADC2L_VOL - [7:0] */
+
+/*
+ * R1029 (0x405) - AIF1 ADC2 Right Volume
+ */
+#define WM8994_AIF1ADC2_VU                      0x0100  /* AIF1ADC2_VU */
+#define WM8994_AIF1ADC2_VU_MASK                 0x0100  /* AIF1ADC2_VU */
+#define WM8994_AIF1ADC2_VU_SHIFT                     8  /* AIF1ADC2_VU */
+#define WM8994_AIF1ADC2_VU_WIDTH                     1  /* AIF1ADC2_VU */
+#define WM8994_AIF1ADC2R_VOL_MASK               0x00FF  /* AIF1ADC2R_VOL - [7:0] */
+#define WM8994_AIF1ADC2R_VOL_SHIFT                   0  /* AIF1ADC2R_VOL - [7:0] */
+#define WM8994_AIF1ADC2R_VOL_WIDTH                   8  /* AIF1ADC2R_VOL - [7:0] */
+
+/*
+ * R1030 (0x406) - AIF1 DAC2 Left Volume
+ */
+#define WM8994_AIF1DAC2_VU                      0x0100  /* AIF1DAC2_VU */
+#define WM8994_AIF1DAC2_VU_MASK                 0x0100  /* AIF1DAC2_VU */
+#define WM8994_AIF1DAC2_VU_SHIFT                     8  /* AIF1DAC2_VU */
+#define WM8994_AIF1DAC2_VU_WIDTH                     1  /* AIF1DAC2_VU */
+#define WM8994_AIF1DAC2L_VOL_MASK               0x00FF  /* AIF1DAC2L_VOL - [7:0] */
+#define WM8994_AIF1DAC2L_VOL_SHIFT                   0  /* AIF1DAC2L_VOL - [7:0] */
+#define WM8994_AIF1DAC2L_VOL_WIDTH                   8  /* AIF1DAC2L_VOL - [7:0] */
+
+/*
+ * R1031 (0x407) - AIF1 DAC2 Right Volume
+ */
+#define WM8994_AIF1DAC2_VU                      0x0100  /* AIF1DAC2_VU */
+#define WM8994_AIF1DAC2_VU_MASK                 0x0100  /* AIF1DAC2_VU */
+#define WM8994_AIF1DAC2_VU_SHIFT                     8  /* AIF1DAC2_VU */
+#define WM8994_AIF1DAC2_VU_WIDTH                     1  /* AIF1DAC2_VU */
+#define WM8994_AIF1DAC2R_VOL_MASK               0x00FF  /* AIF1DAC2R_VOL - [7:0] */
+#define WM8994_AIF1DAC2R_VOL_SHIFT                   0  /* AIF1DAC2R_VOL - [7:0] */
+#define WM8994_AIF1DAC2R_VOL_WIDTH                   8  /* AIF1DAC2R_VOL - [7:0] */
+
+/*
+ * R1040 (0x410) - AIF1 ADC1 Filters
+ */
+#define WM8994_AIF1ADC_4FS                      0x8000  /* AIF1ADC_4FS */
+#define WM8994_AIF1ADC_4FS_MASK                 0x8000  /* AIF1ADC_4FS */
+#define WM8994_AIF1ADC_4FS_SHIFT                    15  /* AIF1ADC_4FS */
+#define WM8994_AIF1ADC_4FS_WIDTH                     1  /* AIF1ADC_4FS */
+#define WM8994_AIF1ADC1_HPF_CUT_MASK            0x6000  /* AIF1ADC1_HPF_CUT - [14:13] */
+#define WM8994_AIF1ADC1_HPF_CUT_SHIFT               13  /* AIF1ADC1_HPF_CUT - [14:13] */
+#define WM8994_AIF1ADC1_HPF_CUT_WIDTH                2  /* AIF1ADC1_HPF_CUT - [14:13] */
+#define WM8994_AIF1ADC1L_HPF                    0x1000  /* AIF1ADC1L_HPF */
+#define WM8994_AIF1ADC1L_HPF_MASK               0x1000  /* AIF1ADC1L_HPF */
+#define WM8994_AIF1ADC1L_HPF_SHIFT                  12  /* AIF1ADC1L_HPF */
+#define WM8994_AIF1ADC1L_HPF_WIDTH                   1  /* AIF1ADC1L_HPF */
+#define WM8994_AIF1ADC1R_HPF                    0x0800  /* AIF1ADC1R_HPF */
+#define WM8994_AIF1ADC1R_HPF_MASK               0x0800  /* AIF1ADC1R_HPF */
+#define WM8994_AIF1ADC1R_HPF_SHIFT                  11  /* AIF1ADC1R_HPF */
+#define WM8994_AIF1ADC1R_HPF_WIDTH                   1  /* AIF1ADC1R_HPF */
+
+/*
+ * R1041 (0x411) - AIF1 ADC2 Filters
+ */
+#define WM8994_AIF1ADC2_HPF_CUT_MASK            0x6000  /* AIF1ADC2_HPF_CUT - [14:13] */
+#define WM8994_AIF1ADC2_HPF_CUT_SHIFT               13  /* AIF1ADC2_HPF_CUT - [14:13] */
+#define WM8994_AIF1ADC2_HPF_CUT_WIDTH                2  /* AIF1ADC2_HPF_CUT - [14:13] */
+#define WM8994_AIF1ADC2L_HPF                    0x1000  /* AIF1ADC2L_HPF */
+#define WM8994_AIF1ADC2L_HPF_MASK               0x1000  /* AIF1ADC2L_HPF */
+#define WM8994_AIF1ADC2L_HPF_SHIFT                  12  /* AIF1ADC2L_HPF */
+#define WM8994_AIF1ADC2L_HPF_WIDTH                   1  /* AIF1ADC2L_HPF */
+#define WM8994_AIF1ADC2R_HPF                    0x0800  /* AIF1ADC2R_HPF */
+#define WM8994_AIF1ADC2R_HPF_MASK               0x0800  /* AIF1ADC2R_HPF */
+#define WM8994_AIF1ADC2R_HPF_SHIFT                  11  /* AIF1ADC2R_HPF */
+#define WM8994_AIF1ADC2R_HPF_WIDTH                   1  /* AIF1ADC2R_HPF */
+
+/*
+ * R1056 (0x420) - AIF1 DAC1 Filters (1)
+ */
+#define WM8994_AIF1DAC1_MUTE                    0x0200  /* AIF1DAC1_MUTE */
+#define WM8994_AIF1DAC1_MUTE_MASK               0x0200  /* AIF1DAC1_MUTE */
+#define WM8994_AIF1DAC1_MUTE_SHIFT                   9  /* AIF1DAC1_MUTE */
+#define WM8994_AIF1DAC1_MUTE_WIDTH                   1  /* AIF1DAC1_MUTE */
+#define WM8994_AIF1DAC1_MONO                    0x0080  /* AIF1DAC1_MONO */
+#define WM8994_AIF1DAC1_MONO_MASK               0x0080  /* AIF1DAC1_MONO */
+#define WM8994_AIF1DAC1_MONO_SHIFT                   7  /* AIF1DAC1_MONO */
+#define WM8994_AIF1DAC1_MONO_WIDTH                   1  /* AIF1DAC1_MONO */
+#define WM8994_AIF1DAC1_MUTERATE                0x0020  /* AIF1DAC1_MUTERATE */
+#define WM8994_AIF1DAC1_MUTERATE_MASK           0x0020  /* AIF1DAC1_MUTERATE */
+#define WM8994_AIF1DAC1_MUTERATE_SHIFT               5  /* AIF1DAC1_MUTERATE */
+#define WM8994_AIF1DAC1_MUTERATE_WIDTH               1  /* AIF1DAC1_MUTERATE */
+#define WM8994_AIF1DAC1_UNMUTE_RAMP             0x0010  /* AIF1DAC1_UNMUTE_RAMP */
+#define WM8994_AIF1DAC1_UNMUTE_RAMP_MASK        0x0010  /* AIF1DAC1_UNMUTE_RAMP */
+#define WM8994_AIF1DAC1_UNMUTE_RAMP_SHIFT            4  /* AIF1DAC1_UNMUTE_RAMP */
+#define WM8994_AIF1DAC1_UNMUTE_RAMP_WIDTH            1  /* AIF1DAC1_UNMUTE_RAMP */
+#define WM8994_AIF1DAC1_DEEMP_MASK              0x0006  /* AIF1DAC1_DEEMP - [2:1] */
+#define WM8994_AIF1DAC1_DEEMP_SHIFT                  1  /* AIF1DAC1_DEEMP - [2:1] */
+#define WM8994_AIF1DAC1_DEEMP_WIDTH                  2  /* AIF1DAC1_DEEMP - [2:1] */
+
+/*
+ * R1057 (0x421) - AIF1 DAC1 Filters (2)
+ */
+#define WM8994_AIF1DAC1_3D_GAIN_MASK            0x3E00  /* AIF1DAC1_3D_GAIN - [13:9] */
+#define WM8994_AIF1DAC1_3D_GAIN_SHIFT                9  /* AIF1DAC1_3D_GAIN - [13:9] */
+#define WM8994_AIF1DAC1_3D_GAIN_WIDTH                5  /* AIF1DAC1_3D_GAIN - [13:9] */
+#define WM8994_AIF1DAC1_3D_ENA                  0x0100  /* AIF1DAC1_3D_ENA */
+#define WM8994_AIF1DAC1_3D_ENA_MASK             0x0100  /* AIF1DAC1_3D_ENA */
+#define WM8994_AIF1DAC1_3D_ENA_SHIFT                 8  /* AIF1DAC1_3D_ENA */
+#define WM8994_AIF1DAC1_3D_ENA_WIDTH                 1  /* AIF1DAC1_3D_ENA */
+
+/*
+ * R1058 (0x422) - AIF1 DAC2 Filters (1)
+ */
+#define WM8994_AIF1DAC2_MUTE                    0x0200  /* AIF1DAC2_MUTE */
+#define WM8994_AIF1DAC2_MUTE_MASK               0x0200  /* AIF1DAC2_MUTE */
+#define WM8994_AIF1DAC2_MUTE_SHIFT                   9  /* AIF1DAC2_MUTE */
+#define WM8994_AIF1DAC2_MUTE_WIDTH                   1  /* AIF1DAC2_MUTE */
+#define WM8994_AIF1DAC2_MONO                    0x0080  /* AIF1DAC2_MONO */
+#define WM8994_AIF1DAC2_MONO_MASK               0x0080  /* AIF1DAC2_MONO */
+#define WM8994_AIF1DAC2_MONO_SHIFT                   7  /* AIF1DAC2_MONO */
+#define WM8994_AIF1DAC2_MONO_WIDTH                   1  /* AIF1DAC2_MONO */
+#define WM8994_AIF1DAC2_MUTERATE                0x0020  /* AIF1DAC2_MUTERATE */
+#define WM8994_AIF1DAC2_MUTERATE_MASK           0x0020  /* AIF1DAC2_MUTERATE */
+#define WM8994_AIF1DAC2_MUTERATE_SHIFT               5  /* AIF1DAC2_MUTERATE */
+#define WM8994_AIF1DAC2_MUTERATE_WIDTH               1  /* AIF1DAC2_MUTERATE */
+#define WM8994_AIF1DAC2_UNMUTE_RAMP             0x0010  /* AIF1DAC2_UNMUTE_RAMP */
+#define WM8994_AIF1DAC2_UNMUTE_RAMP_MASK        0x0010  /* AIF1DAC2_UNMUTE_RAMP */
+#define WM8994_AIF1DAC2_UNMUTE_RAMP_SHIFT            4  /* AIF1DAC2_UNMUTE_RAMP */
+#define WM8994_AIF1DAC2_UNMUTE_RAMP_WIDTH            1  /* AIF1DAC2_UNMUTE_RAMP */
+#define WM8994_AIF1DAC2_DEEMP_MASK              0x0006  /* AIF1DAC2_DEEMP - [2:1] */
+#define WM8994_AIF1DAC2_DEEMP_SHIFT                  1  /* AIF1DAC2_DEEMP - [2:1] */
+#define WM8994_AIF1DAC2_DEEMP_WIDTH                  2  /* AIF1DAC2_DEEMP - [2:1] */
+
+/*
+ * R1059 (0x423) - AIF1 DAC2 Filters (2)
+ */
+#define WM8994_AIF1DAC2_3D_GAIN_MASK            0x3E00  /* AIF1DAC2_3D_GAIN - [13:9] */
+#define WM8994_AIF1DAC2_3D_GAIN_SHIFT                9  /* AIF1DAC2_3D_GAIN - [13:9] */
+#define WM8994_AIF1DAC2_3D_GAIN_WIDTH                5  /* AIF1DAC2_3D_GAIN - [13:9] */
+#define WM8994_AIF1DAC2_3D_ENA                  0x0100  /* AIF1DAC2_3D_ENA */
+#define WM8994_AIF1DAC2_3D_ENA_MASK             0x0100  /* AIF1DAC2_3D_ENA */
+#define WM8994_AIF1DAC2_3D_ENA_SHIFT                 8  /* AIF1DAC2_3D_ENA */
+#define WM8994_AIF1DAC2_3D_ENA_WIDTH                 1  /* AIF1DAC2_3D_ENA */
+
+/*
+ * R1088 (0x440) - AIF1 DRC1 (1)
+ */
+#define WM8994_AIF1DRC1_SIG_DET_RMS_MASK        0xF800  /* AIF1DRC1_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF1DRC1_SIG_DET_RMS_SHIFT           11  /* AIF1DRC1_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF1DRC1_SIG_DET_RMS_WIDTH            5  /* AIF1DRC1_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF1DRC1_SIG_DET_PK_MASK         0x0600  /* AIF1DRC1_SIG_DET_PK - [10:9] */
+#define WM8994_AIF1DRC1_SIG_DET_PK_SHIFT             9  /* AIF1DRC1_SIG_DET_PK - [10:9] */
+#define WM8994_AIF1DRC1_SIG_DET_PK_WIDTH             2  /* AIF1DRC1_SIG_DET_PK - [10:9] */
+#define WM8994_AIF1DRC1_NG_ENA                  0x0100  /* AIF1DRC1_NG_ENA */
+#define WM8994_AIF1DRC1_NG_ENA_MASK             0x0100  /* AIF1DRC1_NG_ENA */
+#define WM8994_AIF1DRC1_NG_ENA_SHIFT                 8  /* AIF1DRC1_NG_ENA */
+#define WM8994_AIF1DRC1_NG_ENA_WIDTH                 1  /* AIF1DRC1_NG_ENA */
+#define WM8994_AIF1DRC1_SIG_DET_MODE            0x0080  /* AIF1DRC1_SIG_DET_MODE */
+#define WM8994_AIF1DRC1_SIG_DET_MODE_MASK       0x0080  /* AIF1DRC1_SIG_DET_MODE */
+#define WM8994_AIF1DRC1_SIG_DET_MODE_SHIFT           7  /* AIF1DRC1_SIG_DET_MODE */
+#define WM8994_AIF1DRC1_SIG_DET_MODE_WIDTH           1  /* AIF1DRC1_SIG_DET_MODE */
+#define WM8994_AIF1DRC1_SIG_DET                 0x0040  /* AIF1DRC1_SIG_DET */
+#define WM8994_AIF1DRC1_SIG_DET_MASK            0x0040  /* AIF1DRC1_SIG_DET */
+#define WM8994_AIF1DRC1_SIG_DET_SHIFT                6  /* AIF1DRC1_SIG_DET */
+#define WM8994_AIF1DRC1_SIG_DET_WIDTH                1  /* AIF1DRC1_SIG_DET */
+#define WM8994_AIF1DRC1_KNEE2_OP_ENA            0x0020  /* AIF1DRC1_KNEE2_OP_ENA */
+#define WM8994_AIF1DRC1_KNEE2_OP_ENA_MASK       0x0020  /* AIF1DRC1_KNEE2_OP_ENA */
+#define WM8994_AIF1DRC1_KNEE2_OP_ENA_SHIFT           5  /* AIF1DRC1_KNEE2_OP_ENA */
+#define WM8994_AIF1DRC1_KNEE2_OP_ENA_WIDTH           1  /* AIF1DRC1_KNEE2_OP_ENA */
+#define WM8994_AIF1DRC1_QR                      0x0010  /* AIF1DRC1_QR */
+#define WM8994_AIF1DRC1_QR_MASK                 0x0010  /* AIF1DRC1_QR */
+#define WM8994_AIF1DRC1_QR_SHIFT                     4  /* AIF1DRC1_QR */
+#define WM8994_AIF1DRC1_QR_WIDTH                     1  /* AIF1DRC1_QR */
+#define WM8994_AIF1DRC1_ANTICLIP                0x0008  /* AIF1DRC1_ANTICLIP */
+#define WM8994_AIF1DRC1_ANTICLIP_MASK           0x0008  /* AIF1DRC1_ANTICLIP */
+#define WM8994_AIF1DRC1_ANTICLIP_SHIFT               3  /* AIF1DRC1_ANTICLIP */
+#define WM8994_AIF1DRC1_ANTICLIP_WIDTH               1  /* AIF1DRC1_ANTICLIP */
+#define WM8994_AIF1DAC1_DRC_ENA                 0x0004  /* AIF1DAC1_DRC_ENA */
+#define WM8994_AIF1DAC1_DRC_ENA_MASK            0x0004  /* AIF1DAC1_DRC_ENA */
+#define WM8994_AIF1DAC1_DRC_ENA_SHIFT                2  /* AIF1DAC1_DRC_ENA */
+#define WM8994_AIF1DAC1_DRC_ENA_WIDTH                1  /* AIF1DAC1_DRC_ENA */
+#define WM8994_AIF1ADC1L_DRC_ENA                0x0002  /* AIF1ADC1L_DRC_ENA */
+#define WM8994_AIF1ADC1L_DRC_ENA_MASK           0x0002  /* AIF1ADC1L_DRC_ENA */
+#define WM8994_AIF1ADC1L_DRC_ENA_SHIFT               1  /* AIF1ADC1L_DRC_ENA */
+#define WM8994_AIF1ADC1L_DRC_ENA_WIDTH               1  /* AIF1ADC1L_DRC_ENA */
+#define WM8994_AIF1ADC1R_DRC_ENA                0x0001  /* AIF1ADC1R_DRC_ENA */
+#define WM8994_AIF1ADC1R_DRC_ENA_MASK           0x0001  /* AIF1ADC1R_DRC_ENA */
+#define WM8994_AIF1ADC1R_DRC_ENA_SHIFT               0  /* AIF1ADC1R_DRC_ENA */
+#define WM8994_AIF1ADC1R_DRC_ENA_WIDTH               1  /* AIF1ADC1R_DRC_ENA */
+
+/*
+ * R1089 (0x441) - AIF1 DRC1 (2)
+ */
+#define WM8994_AIF1DRC1_ATK_MASK                0x1E00  /* AIF1DRC1_ATK - [12:9] */
+#define WM8994_AIF1DRC1_ATK_SHIFT                    9  /* AIF1DRC1_ATK - [12:9] */
+#define WM8994_AIF1DRC1_ATK_WIDTH                    4  /* AIF1DRC1_ATK - [12:9] */
+#define WM8994_AIF1DRC1_DCY_MASK                0x01E0  /* AIF1DRC1_DCY - [8:5] */
+#define WM8994_AIF1DRC1_DCY_SHIFT                    5  /* AIF1DRC1_DCY - [8:5] */
+#define WM8994_AIF1DRC1_DCY_WIDTH                    4  /* AIF1DRC1_DCY - [8:5] */
+#define WM8994_AIF1DRC1_MINGAIN_MASK            0x001C  /* AIF1DRC1_MINGAIN - [4:2] */
+#define WM8994_AIF1DRC1_MINGAIN_SHIFT                2  /* AIF1DRC1_MINGAIN - [4:2] */
+#define WM8994_AIF1DRC1_MINGAIN_WIDTH                3  /* AIF1DRC1_MINGAIN - [4:2] */
+#define WM8994_AIF1DRC1_MAXGAIN_MASK            0x0003  /* AIF1DRC1_MAXGAIN - [1:0] */
+#define WM8994_AIF1DRC1_MAXGAIN_SHIFT                0  /* AIF1DRC1_MAXGAIN - [1:0] */
+#define WM8994_AIF1DRC1_MAXGAIN_WIDTH                2  /* AIF1DRC1_MAXGAIN - [1:0] */
+
+/*
+ * R1090 (0x442) - AIF1 DRC1 (3)
+ */
+#define WM8994_AIF1DRC1_NG_MINGAIN_MASK         0xF000  /* AIF1DRC1_NG_MINGAIN - [15:12] */
+#define WM8994_AIF1DRC1_NG_MINGAIN_SHIFT            12  /* AIF1DRC1_NG_MINGAIN - [15:12] */
+#define WM8994_AIF1DRC1_NG_MINGAIN_WIDTH             4  /* AIF1DRC1_NG_MINGAIN - [15:12] */
+#define WM8994_AIF1DRC1_NG_EXP_MASK             0x0C00  /* AIF1DRC1_NG_EXP - [11:10] */
+#define WM8994_AIF1DRC1_NG_EXP_SHIFT                10  /* AIF1DRC1_NG_EXP - [11:10] */
+#define WM8994_AIF1DRC1_NG_EXP_WIDTH                 2  /* AIF1DRC1_NG_EXP - [11:10] */
+#define WM8994_AIF1DRC1_QR_THR_MASK             0x0300  /* AIF1DRC1_QR_THR - [9:8] */
+#define WM8994_AIF1DRC1_QR_THR_SHIFT                 8  /* AIF1DRC1_QR_THR - [9:8] */
+#define WM8994_AIF1DRC1_QR_THR_WIDTH                 2  /* AIF1DRC1_QR_THR - [9:8] */
+#define WM8994_AIF1DRC1_QR_DCY_MASK             0x00C0  /* AIF1DRC1_QR_DCY - [7:6] */
+#define WM8994_AIF1DRC1_QR_DCY_SHIFT                 6  /* AIF1DRC1_QR_DCY - [7:6] */
+#define WM8994_AIF1DRC1_QR_DCY_WIDTH                 2  /* AIF1DRC1_QR_DCY - [7:6] */
+#define WM8994_AIF1DRC1_HI_COMP_MASK            0x0038  /* AIF1DRC1_HI_COMP - [5:3] */
+#define WM8994_AIF1DRC1_HI_COMP_SHIFT                3  /* AIF1DRC1_HI_COMP - [5:3] */
+#define WM8994_AIF1DRC1_HI_COMP_WIDTH                3  /* AIF1DRC1_HI_COMP - [5:3] */
+#define WM8994_AIF1DRC1_LO_COMP_MASK            0x0007  /* AIF1DRC1_LO_COMP - [2:0] */
+#define WM8994_AIF1DRC1_LO_COMP_SHIFT                0  /* AIF1DRC1_LO_COMP - [2:0] */
+#define WM8994_AIF1DRC1_LO_COMP_WIDTH                3  /* AIF1DRC1_LO_COMP - [2:0] */
+
+/*
+ * R1091 (0x443) - AIF1 DRC1 (4)
+ */
+#define WM8994_AIF1DRC1_KNEE_IP_MASK            0x07E0  /* AIF1DRC1_KNEE_IP - [10:5] */
+#define WM8994_AIF1DRC1_KNEE_IP_SHIFT                5  /* AIF1DRC1_KNEE_IP - [10:5] */
+#define WM8994_AIF1DRC1_KNEE_IP_WIDTH                6  /* AIF1DRC1_KNEE_IP - [10:5] */
+#define WM8994_AIF1DRC1_KNEE_OP_MASK            0x001F  /* AIF1DRC1_KNEE_OP - [4:0] */
+#define WM8994_AIF1DRC1_KNEE_OP_SHIFT                0  /* AIF1DRC1_KNEE_OP - [4:0] */
+#define WM8994_AIF1DRC1_KNEE_OP_WIDTH                5  /* AIF1DRC1_KNEE_OP - [4:0] */
+
+/*
+ * R1092 (0x444) - AIF1 DRC1 (5)
+ */
+#define WM8994_AIF1DRC1_KNEE2_IP_MASK           0x03E0  /* AIF1DRC1_KNEE2_IP - [9:5] */
+#define WM8994_AIF1DRC1_KNEE2_IP_SHIFT               5  /* AIF1DRC1_KNEE2_IP - [9:5] */
+#define WM8994_AIF1DRC1_KNEE2_IP_WIDTH               5  /* AIF1DRC1_KNEE2_IP - [9:5] */
+#define WM8994_AIF1DRC1_KNEE2_OP_MASK           0x001F  /* AIF1DRC1_KNEE2_OP - [4:0] */
+#define WM8994_AIF1DRC1_KNEE2_OP_SHIFT               0  /* AIF1DRC1_KNEE2_OP - [4:0] */
+#define WM8994_AIF1DRC1_KNEE2_OP_WIDTH               5  /* AIF1DRC1_KNEE2_OP - [4:0] */
+
+/*
+ * R1104 (0x450) - AIF1 DRC2 (1)
+ */
+#define WM8994_AIF1DRC2_SIG_DET_RMS_MASK        0xF800  /* AIF1DRC2_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF1DRC2_SIG_DET_RMS_SHIFT           11  /* AIF1DRC2_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF1DRC2_SIG_DET_RMS_WIDTH            5  /* AIF1DRC2_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF1DRC2_SIG_DET_PK_MASK         0x0600  /* AIF1DRC2_SIG_DET_PK - [10:9] */
+#define WM8994_AIF1DRC2_SIG_DET_PK_SHIFT             9  /* AIF1DRC2_SIG_DET_PK - [10:9] */
+#define WM8994_AIF1DRC2_SIG_DET_PK_WIDTH             2  /* AIF1DRC2_SIG_DET_PK - [10:9] */
+#define WM8994_AIF1DRC2_NG_ENA                  0x0100  /* AIF1DRC2_NG_ENA */
+#define WM8994_AIF1DRC2_NG_ENA_MASK             0x0100  /* AIF1DRC2_NG_ENA */
+#define WM8994_AIF1DRC2_NG_ENA_SHIFT                 8  /* AIF1DRC2_NG_ENA */
+#define WM8994_AIF1DRC2_NG_ENA_WIDTH                 1  /* AIF1DRC2_NG_ENA */
+#define WM8994_AIF1DRC2_SIG_DET_MODE            0x0080  /* AIF1DRC2_SIG_DET_MODE */
+#define WM8994_AIF1DRC2_SIG_DET_MODE_MASK       0x0080  /* AIF1DRC2_SIG_DET_MODE */
+#define WM8994_AIF1DRC2_SIG_DET_MODE_SHIFT           7  /* AIF1DRC2_SIG_DET_MODE */
+#define WM8994_AIF1DRC2_SIG_DET_MODE_WIDTH           1  /* AIF1DRC2_SIG_DET_MODE */
+#define WM8994_AIF1DRC2_SIG_DET                 0x0040  /* AIF1DRC2_SIG_DET */
+#define WM8994_AIF1DRC2_SIG_DET_MASK            0x0040  /* AIF1DRC2_SIG_DET */
+#define WM8994_AIF1DRC2_SIG_DET_SHIFT                6  /* AIF1DRC2_SIG_DET */
+#define WM8994_AIF1DRC2_SIG_DET_WIDTH                1  /* AIF1DRC2_SIG_DET */
+#define WM8994_AIF1DRC2_KNEE2_OP_ENA            0x0020  /* AIF1DRC2_KNEE2_OP_ENA */
+#define WM8994_AIF1DRC2_KNEE2_OP_ENA_MASK       0x0020  /* AIF1DRC2_KNEE2_OP_ENA */
+#define WM8994_AIF1DRC2_KNEE2_OP_ENA_SHIFT           5  /* AIF1DRC2_KNEE2_OP_ENA */
+#define WM8994_AIF1DRC2_KNEE2_OP_ENA_WIDTH           1  /* AIF1DRC2_KNEE2_OP_ENA */
+#define WM8994_AIF1DRC2_QR                      0x0010  /* AIF1DRC2_QR */
+#define WM8994_AIF1DRC2_QR_MASK                 0x0010  /* AIF1DRC2_QR */
+#define WM8994_AIF1DRC2_QR_SHIFT                     4  /* AIF1DRC2_QR */
+#define WM8994_AIF1DRC2_QR_WIDTH                     1  /* AIF1DRC2_QR */
+#define WM8994_AIF1DRC2_ANTICLIP                0x0008  /* AIF1DRC2_ANTICLIP */
+#define WM8994_AIF1DRC2_ANTICLIP_MASK           0x0008  /* AIF1DRC2_ANTICLIP */
+#define WM8994_AIF1DRC2_ANTICLIP_SHIFT               3  /* AIF1DRC2_ANTICLIP */
+#define WM8994_AIF1DRC2_ANTICLIP_WIDTH               1  /* AIF1DRC2_ANTICLIP */
+#define WM8994_AIF1DAC2_DRC_ENA                 0x0004  /* AIF1DAC2_DRC_ENA */
+#define WM8994_AIF1DAC2_DRC_ENA_MASK            0x0004  /* AIF1DAC2_DRC_ENA */
+#define WM8994_AIF1DAC2_DRC_ENA_SHIFT                2  /* AIF1DAC2_DRC_ENA */
+#define WM8994_AIF1DAC2_DRC_ENA_WIDTH                1  /* AIF1DAC2_DRC_ENA */
+#define WM8994_AIF1ADC2L_DRC_ENA                0x0002  /* AIF1ADC2L_DRC_ENA */
+#define WM8994_AIF1ADC2L_DRC_ENA_MASK           0x0002  /* AIF1ADC2L_DRC_ENA */
+#define WM8994_AIF1ADC2L_DRC_ENA_SHIFT               1  /* AIF1ADC2L_DRC_ENA */
+#define WM8994_AIF1ADC2L_DRC_ENA_WIDTH               1  /* AIF1ADC2L_DRC_ENA */
+#define WM8994_AIF1ADC2R_DRC_ENA                0x0001  /* AIF1ADC2R_DRC_ENA */
+#define WM8994_AIF1ADC2R_DRC_ENA_MASK           0x0001  /* AIF1ADC2R_DRC_ENA */
+#define WM8994_AIF1ADC2R_DRC_ENA_SHIFT               0  /* AIF1ADC2R_DRC_ENA */
+#define WM8994_AIF1ADC2R_DRC_ENA_WIDTH               1  /* AIF1ADC2R_DRC_ENA */
+
+/*
+ * R1105 (0x451) - AIF1 DRC2 (2)
+ */
+#define WM8994_AIF1DRC2_ATK_MASK                0x1E00  /* AIF1DRC2_ATK - [12:9] */
+#define WM8994_AIF1DRC2_ATK_SHIFT                    9  /* AIF1DRC2_ATK - [12:9] */
+#define WM8994_AIF1DRC2_ATK_WIDTH                    4  /* AIF1DRC2_ATK - [12:9] */
+#define WM8994_AIF1DRC2_DCY_MASK                0x01E0  /* AIF1DRC2_DCY - [8:5] */
+#define WM8994_AIF1DRC2_DCY_SHIFT                    5  /* AIF1DRC2_DCY - [8:5] */
+#define WM8994_AIF1DRC2_DCY_WIDTH                    4  /* AIF1DRC2_DCY - [8:5] */
+#define WM8994_AIF1DRC2_MINGAIN_MASK            0x001C  /* AIF1DRC2_MINGAIN - [4:2] */
+#define WM8994_AIF1DRC2_MINGAIN_SHIFT                2  /* AIF1DRC2_MINGAIN - [4:2] */
+#define WM8994_AIF1DRC2_MINGAIN_WIDTH                3  /* AIF1DRC2_MINGAIN - [4:2] */
+#define WM8994_AIF1DRC2_MAXGAIN_MASK            0x0003  /* AIF1DRC2_MAXGAIN - [1:0] */
+#define WM8994_AIF1DRC2_MAXGAIN_SHIFT                0  /* AIF1DRC2_MAXGAIN - [1:0] */
+#define WM8994_AIF1DRC2_MAXGAIN_WIDTH                2  /* AIF1DRC2_MAXGAIN - [1:0] */
+
+/*
+ * R1106 (0x452) - AIF1 DRC2 (3)
+ */
+#define WM8994_AIF1DRC2_NG_MINGAIN_MASK         0xF000  /* AIF1DRC2_NG_MINGAIN - [15:12] */
+#define WM8994_AIF1DRC2_NG_MINGAIN_SHIFT            12  /* AIF1DRC2_NG_MINGAIN - [15:12] */
+#define WM8994_AIF1DRC2_NG_MINGAIN_WIDTH             4  /* AIF1DRC2_NG_MINGAIN - [15:12] */
+#define WM8994_AIF1DRC2_NG_EXP_MASK             0x0C00  /* AIF1DRC2_NG_EXP - [11:10] */
+#define WM8994_AIF1DRC2_NG_EXP_SHIFT                10  /* AIF1DRC2_NG_EXP - [11:10] */
+#define WM8994_AIF1DRC2_NG_EXP_WIDTH                 2  /* AIF1DRC2_NG_EXP - [11:10] */
+#define WM8994_AIF1DRC2_QR_THR_MASK             0x0300  /* AIF1DRC2_QR_THR - [9:8] */
+#define WM8994_AIF1DRC2_QR_THR_SHIFT                 8  /* AIF1DRC2_QR_THR - [9:8] */
+#define WM8994_AIF1DRC2_QR_THR_WIDTH                 2  /* AIF1DRC2_QR_THR - [9:8] */
+#define WM8994_AIF1DRC2_QR_DCY_MASK             0x00C0  /* AIF1DRC2_QR_DCY - [7:6] */
+#define WM8994_AIF1DRC2_QR_DCY_SHIFT                 6  /* AIF1DRC2_QR_DCY - [7:6] */
+#define WM8994_AIF1DRC2_QR_DCY_WIDTH                 2  /* AIF1DRC2_QR_DCY - [7:6] */
+#define WM8994_AIF1DRC2_HI_COMP_MASK            0x0038  /* AIF1DRC2_HI_COMP - [5:3] */
+#define WM8994_AIF1DRC2_HI_COMP_SHIFT                3  /* AIF1DRC2_HI_COMP - [5:3] */
+#define WM8994_AIF1DRC2_HI_COMP_WIDTH                3  /* AIF1DRC2_HI_COMP - [5:3] */
+#define WM8994_AIF1DRC2_LO_COMP_MASK            0x0007  /* AIF1DRC2_LO_COMP - [2:0] */
+#define WM8994_AIF1DRC2_LO_COMP_SHIFT                0  /* AIF1DRC2_LO_COMP - [2:0] */
+#define WM8994_AIF1DRC2_LO_COMP_WIDTH                3  /* AIF1DRC2_LO_COMP - [2:0] */
+
+/*
+ * R1107 (0x453) - AIF1 DRC2 (4)
+ */
+#define WM8994_AIF1DRC2_KNEE_IP_MASK            0x07E0  /* AIF1DRC2_KNEE_IP - [10:5] */
+#define WM8994_AIF1DRC2_KNEE_IP_SHIFT                5  /* AIF1DRC2_KNEE_IP - [10:5] */
+#define WM8994_AIF1DRC2_KNEE_IP_WIDTH                6  /* AIF1DRC2_KNEE_IP - [10:5] */
+#define WM8994_AIF1DRC2_KNEE_OP_MASK            0x001F  /* AIF1DRC2_KNEE_OP - [4:0] */
+#define WM8994_AIF1DRC2_KNEE_OP_SHIFT                0  /* AIF1DRC2_KNEE_OP - [4:0] */
+#define WM8994_AIF1DRC2_KNEE_OP_WIDTH                5  /* AIF1DRC2_KNEE_OP - [4:0] */
+
+/*
+ * R1108 (0x454) - AIF1 DRC2 (5)
+ */
+#define WM8994_AIF1DRC2_KNEE2_IP_MASK           0x03E0  /* AIF1DRC2_KNEE2_IP - [9:5] */
+#define WM8994_AIF1DRC2_KNEE2_IP_SHIFT               5  /* AIF1DRC2_KNEE2_IP - [9:5] */
+#define WM8994_AIF1DRC2_KNEE2_IP_WIDTH               5  /* AIF1DRC2_KNEE2_IP - [9:5] */
+#define WM8994_AIF1DRC2_KNEE2_OP_MASK           0x001F  /* AIF1DRC2_KNEE2_OP - [4:0] */
+#define WM8994_AIF1DRC2_KNEE2_OP_SHIFT               0  /* AIF1DRC2_KNEE2_OP - [4:0] */
+#define WM8994_AIF1DRC2_KNEE2_OP_WIDTH               5  /* AIF1DRC2_KNEE2_OP - [4:0] */
+
+/*
+ * R1152 (0x480) - AIF1 DAC1 EQ Gains (1)
+ */
+#define WM8994_AIF1DAC1_EQ_B1_GAIN_MASK         0xF800  /* AIF1DAC1_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF1DAC1_EQ_B1_GAIN_SHIFT            11  /* AIF1DAC1_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF1DAC1_EQ_B1_GAIN_WIDTH             5  /* AIF1DAC1_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF1DAC1_EQ_B2_GAIN_MASK         0x07C0  /* AIF1DAC1_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF1DAC1_EQ_B2_GAIN_SHIFT             6  /* AIF1DAC1_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF1DAC1_EQ_B2_GAIN_WIDTH             5  /* AIF1DAC1_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF1DAC1_EQ_B3_GAIN_MASK         0x003E  /* AIF1DAC1_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF1DAC1_EQ_B3_GAIN_SHIFT             1  /* AIF1DAC1_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF1DAC1_EQ_B3_GAIN_WIDTH             5  /* AIF1DAC1_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF1DAC1_EQ_ENA                  0x0001  /* AIF1DAC1_EQ_ENA */
+#define WM8994_AIF1DAC1_EQ_ENA_MASK             0x0001  /* AIF1DAC1_EQ_ENA */
+#define WM8994_AIF1DAC1_EQ_ENA_SHIFT                 0  /* AIF1DAC1_EQ_ENA */
+#define WM8994_AIF1DAC1_EQ_ENA_WIDTH                 1  /* AIF1DAC1_EQ_ENA */
+
+/*
+ * R1153 (0x481) - AIF1 DAC1 EQ Gains (2)
+ */
+#define WM8994_AIF1DAC1_EQ_B4_GAIN_MASK         0xF800  /* AIF1DAC1_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF1DAC1_EQ_B4_GAIN_SHIFT            11  /* AIF1DAC1_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF1DAC1_EQ_B4_GAIN_WIDTH             5  /* AIF1DAC1_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF1DAC1_EQ_B5_GAIN_MASK         0x07C0  /* AIF1DAC1_EQ_B5_GAIN - [10:6] */
+#define WM8994_AIF1DAC1_EQ_B5_GAIN_SHIFT             6  /* AIF1DAC1_EQ_B5_GAIN - [10:6] */
+#define WM8994_AIF1DAC1_EQ_B5_GAIN_WIDTH             5  /* AIF1DAC1_EQ_B5_GAIN - [10:6] */
+
+/*
+ * R1154 (0x482) - AIF1 DAC1 EQ Band 1 A
+ */
+#define WM8994_AIF1DAC1_EQ_B1_A_MASK            0xFFFF  /* AIF1DAC1_EQ_B1_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B1_A_SHIFT                0  /* AIF1DAC1_EQ_B1_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B1_A_WIDTH               16  /* AIF1DAC1_EQ_B1_A - [15:0] */
+
+/*
+ * R1155 (0x483) - AIF1 DAC1 EQ Band 1 B
+ */
+#define WM8994_AIF1DAC1_EQ_B1_B_MASK            0xFFFF  /* AIF1DAC1_EQ_B1_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B1_B_SHIFT                0  /* AIF1DAC1_EQ_B1_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B1_B_WIDTH               16  /* AIF1DAC1_EQ_B1_B - [15:0] */
+
+/*
+ * R1156 (0x484) - AIF1 DAC1 EQ Band 1 PG
+ */
+#define WM8994_AIF1DAC1_EQ_B1_PG_MASK           0xFFFF  /* AIF1DAC1_EQ_B1_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B1_PG_SHIFT               0  /* AIF1DAC1_EQ_B1_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B1_PG_WIDTH              16  /* AIF1DAC1_EQ_B1_PG - [15:0] */
+
+/*
+ * R1157 (0x485) - AIF1 DAC1 EQ Band 2 A
+ */
+#define WM8994_AIF1DAC1_EQ_B2_A_MASK            0xFFFF  /* AIF1DAC1_EQ_B2_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B2_A_SHIFT                0  /* AIF1DAC1_EQ_B2_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B2_A_WIDTH               16  /* AIF1DAC1_EQ_B2_A - [15:0] */
+
+/*
+ * R1158 (0x486) - AIF1 DAC1 EQ Band 2 B
+ */
+#define WM8994_AIF1DAC1_EQ_B2_B_MASK            0xFFFF  /* AIF1DAC1_EQ_B2_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B2_B_SHIFT                0  /* AIF1DAC1_EQ_B2_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B2_B_WIDTH               16  /* AIF1DAC1_EQ_B2_B - [15:0] */
+
+/*
+ * R1159 (0x487) - AIF1 DAC1 EQ Band 2 C
+ */
+#define WM8994_AIF1DAC1_EQ_B2_C_MASK            0xFFFF  /* AIF1DAC1_EQ_B2_C - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B2_C_SHIFT                0  /* AIF1DAC1_EQ_B2_C - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B2_C_WIDTH               16  /* AIF1DAC1_EQ_B2_C - [15:0] */
+
+/*
+ * R1160 (0x488) - AIF1 DAC1 EQ Band 2 PG
+ */
+#define WM8994_AIF1DAC1_EQ_B2_PG_MASK           0xFFFF  /* AIF1DAC1_EQ_B2_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B2_PG_SHIFT               0  /* AIF1DAC1_EQ_B2_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B2_PG_WIDTH              16  /* AIF1DAC1_EQ_B2_PG - [15:0] */
+
+/*
+ * R1161 (0x489) - AIF1 DAC1 EQ Band 3 A
+ */
+#define WM8994_AIF1DAC1_EQ_B3_A_MASK            0xFFFF  /* AIF1DAC1_EQ_B3_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B3_A_SHIFT                0  /* AIF1DAC1_EQ_B3_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B3_A_WIDTH               16  /* AIF1DAC1_EQ_B3_A - [15:0] */
+
+/*
+ * R1162 (0x48A) - AIF1 DAC1 EQ Band 3 B
+ */
+#define WM8994_AIF1DAC1_EQ_B3_B_MASK            0xFFFF  /* AIF1DAC1_EQ_B3_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B3_B_SHIFT                0  /* AIF1DAC1_EQ_B3_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B3_B_WIDTH               16  /* AIF1DAC1_EQ_B3_B - [15:0] */
+
+/*
+ * R1163 (0x48B) - AIF1 DAC1 EQ Band 3 C
+ */
+#define WM8994_AIF1DAC1_EQ_B3_C_MASK            0xFFFF  /* AIF1DAC1_EQ_B3_C - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B3_C_SHIFT                0  /* AIF1DAC1_EQ_B3_C - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B3_C_WIDTH               16  /* AIF1DAC1_EQ_B3_C - [15:0] */
+
+/*
+ * R1164 (0x48C) - AIF1 DAC1 EQ Band 3 PG
+ */
+#define WM8994_AIF1DAC1_EQ_B3_PG_MASK           0xFFFF  /* AIF1DAC1_EQ_B3_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B3_PG_SHIFT               0  /* AIF1DAC1_EQ_B3_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B3_PG_WIDTH              16  /* AIF1DAC1_EQ_B3_PG - [15:0] */
+
+/*
+ * R1165 (0x48D) - AIF1 DAC1 EQ Band 4 A
+ */
+#define WM8994_AIF1DAC1_EQ_B4_A_MASK            0xFFFF  /* AIF1DAC1_EQ_B4_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B4_A_SHIFT                0  /* AIF1DAC1_EQ_B4_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B4_A_WIDTH               16  /* AIF1DAC1_EQ_B4_A - [15:0] */
+
+/*
+ * R1166 (0x48E) - AIF1 DAC1 EQ Band 4 B
+ */
+#define WM8994_AIF1DAC1_EQ_B4_B_MASK            0xFFFF  /* AIF1DAC1_EQ_B4_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B4_B_SHIFT                0  /* AIF1DAC1_EQ_B4_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B4_B_WIDTH               16  /* AIF1DAC1_EQ_B4_B - [15:0] */
+
+/*
+ * R1167 (0x48F) - AIF1 DAC1 EQ Band 4 C
+ */
+#define WM8994_AIF1DAC1_EQ_B4_C_MASK            0xFFFF  /* AIF1DAC1_EQ_B4_C - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B4_C_SHIFT                0  /* AIF1DAC1_EQ_B4_C - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B4_C_WIDTH               16  /* AIF1DAC1_EQ_B4_C - [15:0] */
+
+/*
+ * R1168 (0x490) - AIF1 DAC1 EQ Band 4 PG
+ */
+#define WM8994_AIF1DAC1_EQ_B4_PG_MASK           0xFFFF  /* AIF1DAC1_EQ_B4_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B4_PG_SHIFT               0  /* AIF1DAC1_EQ_B4_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B4_PG_WIDTH              16  /* AIF1DAC1_EQ_B4_PG - [15:0] */
+
+/*
+ * R1169 (0x491) - AIF1 DAC1 EQ Band 5 A
+ */
+#define WM8994_AIF1DAC1_EQ_B5_A_MASK            0xFFFF  /* AIF1DAC1_EQ_B5_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B5_A_SHIFT                0  /* AIF1DAC1_EQ_B5_A - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B5_A_WIDTH               16  /* AIF1DAC1_EQ_B5_A - [15:0] */
+
+/*
+ * R1170 (0x492) - AIF1 DAC1 EQ Band 5 B
+ */
+#define WM8994_AIF1DAC1_EQ_B5_B_MASK            0xFFFF  /* AIF1DAC1_EQ_B5_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B5_B_SHIFT                0  /* AIF1DAC1_EQ_B5_B - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B5_B_WIDTH               16  /* AIF1DAC1_EQ_B5_B - [15:0] */
+
+/*
+ * R1171 (0x493) - AIF1 DAC1 EQ Band 5 PG
+ */
+#define WM8994_AIF1DAC1_EQ_B5_PG_MASK           0xFFFF  /* AIF1DAC1_EQ_B5_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B5_PG_SHIFT               0  /* AIF1DAC1_EQ_B5_PG - [15:0] */
+#define WM8994_AIF1DAC1_EQ_B5_PG_WIDTH              16  /* AIF1DAC1_EQ_B5_PG - [15:0] */
+
+/*
+ * R1184 (0x4A0) - AIF1 DAC2 EQ Gains (1)
+ */
+#define WM8994_AIF1DAC2_EQ_B1_GAIN_MASK         0xF800  /* AIF1DAC2_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF1DAC2_EQ_B1_GAIN_SHIFT            11  /* AIF1DAC2_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF1DAC2_EQ_B1_GAIN_WIDTH             5  /* AIF1DAC2_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF1DAC2_EQ_B2_GAIN_MASK         0x07C0  /* AIF1DAC2_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF1DAC2_EQ_B2_GAIN_SHIFT             6  /* AIF1DAC2_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF1DAC2_EQ_B2_GAIN_WIDTH             5  /* AIF1DAC2_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF1DAC2_EQ_B3_GAIN_MASK         0x003E  /* AIF1DAC2_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF1DAC2_EQ_B3_GAIN_SHIFT             1  /* AIF1DAC2_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF1DAC2_EQ_B3_GAIN_WIDTH             5  /* AIF1DAC2_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF1DAC2_EQ_ENA                  0x0001  /* AIF1DAC2_EQ_ENA */
+#define WM8994_AIF1DAC2_EQ_ENA_MASK             0x0001  /* AIF1DAC2_EQ_ENA */
+#define WM8994_AIF1DAC2_EQ_ENA_SHIFT                 0  /* AIF1DAC2_EQ_ENA */
+#define WM8994_AIF1DAC2_EQ_ENA_WIDTH                 1  /* AIF1DAC2_EQ_ENA */
+
+/*
+ * R1185 (0x4A1) - AIF1 DAC2 EQ Gains (2)
+ */
+#define WM8994_AIF1DAC2_EQ_B4_GAIN_MASK         0xF800  /* AIF1DAC2_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF1DAC2_EQ_B4_GAIN_SHIFT            11  /* AIF1DAC2_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF1DAC2_EQ_B4_GAIN_WIDTH             5  /* AIF1DAC2_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF1DAC2_EQ_B5_GAIN_MASK         0x07C0  /* AIF1DAC2_EQ_B5_GAIN - [10:6] */
+#define WM8994_AIF1DAC2_EQ_B5_GAIN_SHIFT             6  /* AIF1DAC2_EQ_B5_GAIN - [10:6] */
+#define WM8994_AIF1DAC2_EQ_B5_GAIN_WIDTH             5  /* AIF1DAC2_EQ_B5_GAIN - [10:6] */
+
+/*
+ * R1186 (0x4A2) - AIF1 DAC2 EQ Band 1 A
+ */
+#define WM8994_AIF1DAC2_EQ_B1_A_MASK            0xFFFF  /* AIF1DAC2_EQ_B1_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B1_A_SHIFT                0  /* AIF1DAC2_EQ_B1_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B1_A_WIDTH               16  /* AIF1DAC2_EQ_B1_A - [15:0] */
+
+/*
+ * R1187 (0x4A3) - AIF1 DAC2 EQ Band 1 B
+ */
+#define WM8994_AIF1DAC2_EQ_B1_B_MASK            0xFFFF  /* AIF1DAC2_EQ_B1_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B1_B_SHIFT                0  /* AIF1DAC2_EQ_B1_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B1_B_WIDTH               16  /* AIF1DAC2_EQ_B1_B - [15:0] */
+
+/*
+ * R1188 (0x4A4) - AIF1 DAC2 EQ Band 1 PG
+ */
+#define WM8994_AIF1DAC2_EQ_B1_PG_MASK           0xFFFF  /* AIF1DAC2_EQ_B1_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B1_PG_SHIFT               0  /* AIF1DAC2_EQ_B1_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B1_PG_WIDTH              16  /* AIF1DAC2_EQ_B1_PG - [15:0] */
+
+/*
+ * R1189 (0x4A5) - AIF1 DAC2 EQ Band 2 A
+ */
+#define WM8994_AIF1DAC2_EQ_B2_A_MASK            0xFFFF  /* AIF1DAC2_EQ_B2_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B2_A_SHIFT                0  /* AIF1DAC2_EQ_B2_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B2_A_WIDTH               16  /* AIF1DAC2_EQ_B2_A - [15:0] */
+
+/*
+ * R1190 (0x4A6) - AIF1 DAC2 EQ Band 2 B
+ */
+#define WM8994_AIF1DAC2_EQ_B2_B_MASK            0xFFFF  /* AIF1DAC2_EQ_B2_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B2_B_SHIFT                0  /* AIF1DAC2_EQ_B2_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B2_B_WIDTH               16  /* AIF1DAC2_EQ_B2_B - [15:0] */
+
+/*
+ * R1191 (0x4A7) - AIF1 DAC2 EQ Band 2 C
+ */
+#define WM8994_AIF1DAC2_EQ_B2_C_MASK            0xFFFF  /* AIF1DAC2_EQ_B2_C - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B2_C_SHIFT                0  /* AIF1DAC2_EQ_B2_C - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B2_C_WIDTH               16  /* AIF1DAC2_EQ_B2_C - [15:0] */
+
+/*
+ * R1192 (0x4A8) - AIF1 DAC2 EQ Band 2 PG
+ */
+#define WM8994_AIF1DAC2_EQ_B2_PG_MASK           0xFFFF  /* AIF1DAC2_EQ_B2_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B2_PG_SHIFT               0  /* AIF1DAC2_EQ_B2_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B2_PG_WIDTH              16  /* AIF1DAC2_EQ_B2_PG - [15:0] */
+
+/*
+ * R1193 (0x4A9) - AIF1 DAC2 EQ Band 3 A
+ */
+#define WM8994_AIF1DAC2_EQ_B3_A_MASK            0xFFFF  /* AIF1DAC2_EQ_B3_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B3_A_SHIFT                0  /* AIF1DAC2_EQ_B3_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B3_A_WIDTH               16  /* AIF1DAC2_EQ_B3_A - [15:0] */
+
+/*
+ * R1194 (0x4AA) - AIF1 DAC2 EQ Band 3 B
+ */
+#define WM8994_AIF1DAC2_EQ_B3_B_MASK            0xFFFF  /* AIF1DAC2_EQ_B3_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B3_B_SHIFT                0  /* AIF1DAC2_EQ_B3_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B3_B_WIDTH               16  /* AIF1DAC2_EQ_B3_B - [15:0] */
+
+/*
+ * R1195 (0x4AB) - AIF1 DAC2 EQ Band 3 C
+ */
+#define WM8994_AIF1DAC2_EQ_B3_C_MASK            0xFFFF  /* AIF1DAC2_EQ_B3_C - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B3_C_SHIFT                0  /* AIF1DAC2_EQ_B3_C - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B3_C_WIDTH               16  /* AIF1DAC2_EQ_B3_C - [15:0] */
+
+/*
+ * R1196 (0x4AC) - AIF1 DAC2 EQ Band 3 PG
+ */
+#define WM8994_AIF1DAC2_EQ_B3_PG_MASK           0xFFFF  /* AIF1DAC2_EQ_B3_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B3_PG_SHIFT               0  /* AIF1DAC2_EQ_B3_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B3_PG_WIDTH              16  /* AIF1DAC2_EQ_B3_PG - [15:0] */
+
+/*
+ * R1197 (0x4AD) - AIF1 DAC2 EQ Band 4 A
+ */
+#define WM8994_AIF1DAC2_EQ_B4_A_MASK            0xFFFF  /* AIF1DAC2_EQ_B4_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B4_A_SHIFT                0  /* AIF1DAC2_EQ_B4_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B4_A_WIDTH               16  /* AIF1DAC2_EQ_B4_A - [15:0] */
+
+/*
+ * R1198 (0x4AE) - AIF1 DAC2 EQ Band 4 B
+ */
+#define WM8994_AIF1DAC2_EQ_B4_B_MASK            0xFFFF  /* AIF1DAC2_EQ_B4_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B4_B_SHIFT                0  /* AIF1DAC2_EQ_B4_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B4_B_WIDTH               16  /* AIF1DAC2_EQ_B4_B - [15:0] */
+
+/*
+ * R1199 (0x4AF) - AIF1 DAC2 EQ Band 4 C
+ */
+#define WM8994_AIF1DAC2_EQ_B4_C_MASK            0xFFFF  /* AIF1DAC2_EQ_B4_C - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B4_C_SHIFT                0  /* AIF1DAC2_EQ_B4_C - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B4_C_WIDTH               16  /* AIF1DAC2_EQ_B4_C - [15:0] */
+
+/*
+ * R1200 (0x4B0) - AIF1 DAC2 EQ Band 4 PG
+ */
+#define WM8994_AIF1DAC2_EQ_B4_PG_MASK           0xFFFF  /* AIF1DAC2_EQ_B4_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B4_PG_SHIFT               0  /* AIF1DAC2_EQ_B4_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B4_PG_WIDTH              16  /* AIF1DAC2_EQ_B4_PG - [15:0] */
+
+/*
+ * R1201 (0x4B1) - AIF1 DAC2 EQ Band 5 A
+ */
+#define WM8994_AIF1DAC2_EQ_B5_A_MASK            0xFFFF  /* AIF1DAC2_EQ_B5_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B5_A_SHIFT                0  /* AIF1DAC2_EQ_B5_A - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B5_A_WIDTH               16  /* AIF1DAC2_EQ_B5_A - [15:0] */
+
+/*
+ * R1202 (0x4B2) - AIF1 DAC2 EQ Band 5 B
+ */
+#define WM8994_AIF1DAC2_EQ_B5_B_MASK            0xFFFF  /* AIF1DAC2_EQ_B5_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B5_B_SHIFT                0  /* AIF1DAC2_EQ_B5_B - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B5_B_WIDTH               16  /* AIF1DAC2_EQ_B5_B - [15:0] */
+
+/*
+ * R1203 (0x4B3) - AIF1 DAC2 EQ Band 5 PG
+ */
+#define WM8994_AIF1DAC2_EQ_B5_PG_MASK           0xFFFF  /* AIF1DAC2_EQ_B5_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B5_PG_SHIFT               0  /* AIF1DAC2_EQ_B5_PG - [15:0] */
+#define WM8994_AIF1DAC2_EQ_B5_PG_WIDTH              16  /* AIF1DAC2_EQ_B5_PG - [15:0] */
+
+/*
+ * R1280 (0x500) - AIF2 ADC Left Volume
+ */
+#define WM8994_AIF2ADC_VU                       0x0100  /* AIF2ADC_VU */
+#define WM8994_AIF2ADC_VU_MASK                  0x0100  /* AIF2ADC_VU */
+#define WM8994_AIF2ADC_VU_SHIFT                      8  /* AIF2ADC_VU */
+#define WM8994_AIF2ADC_VU_WIDTH                      1  /* AIF2ADC_VU */
+#define WM8994_AIF2ADCL_VOL_MASK                0x00FF  /* AIF2ADCL_VOL - [7:0] */
+#define WM8994_AIF2ADCL_VOL_SHIFT                    0  /* AIF2ADCL_VOL - [7:0] */
+#define WM8994_AIF2ADCL_VOL_WIDTH                    8  /* AIF2ADCL_VOL - [7:0] */
+
+/*
+ * R1281 (0x501) - AIF2 ADC Right Volume
+ */
+#define WM8994_AIF2ADC_VU                       0x0100  /* AIF2ADC_VU */
+#define WM8994_AIF2ADC_VU_MASK                  0x0100  /* AIF2ADC_VU */
+#define WM8994_AIF2ADC_VU_SHIFT                      8  /* AIF2ADC_VU */
+#define WM8994_AIF2ADC_VU_WIDTH                      1  /* AIF2ADC_VU */
+#define WM8994_AIF2ADCR_VOL_MASK                0x00FF  /* AIF2ADCR_VOL - [7:0] */
+#define WM8994_AIF2ADCR_VOL_SHIFT                    0  /* AIF2ADCR_VOL - [7:0] */
+#define WM8994_AIF2ADCR_VOL_WIDTH                    8  /* AIF2ADCR_VOL - [7:0] */
+
+/*
+ * R1282 (0x502) - AIF2 DAC Left Volume
+ */
+#define WM8994_AIF2DAC_VU                       0x0100  /* AIF2DAC_VU */
+#define WM8994_AIF2DAC_VU_MASK                  0x0100  /* AIF2DAC_VU */
+#define WM8994_AIF2DAC_VU_SHIFT                      8  /* AIF2DAC_VU */
+#define WM8994_AIF2DAC_VU_WIDTH                      1  /* AIF2DAC_VU */
+#define WM8994_AIF2DACL_VOL_MASK                0x00FF  /* AIF2DACL_VOL - [7:0] */
+#define WM8994_AIF2DACL_VOL_SHIFT                    0  /* AIF2DACL_VOL - [7:0] */
+#define WM8994_AIF2DACL_VOL_WIDTH                    8  /* AIF2DACL_VOL - [7:0] */
+
+/*
+ * R1283 (0x503) - AIF2 DAC Right Volume
+ */
+#define WM8994_AIF2DAC_VU                       0x0100  /* AIF2DAC_VU */
+#define WM8994_AIF2DAC_VU_MASK                  0x0100  /* AIF2DAC_VU */
+#define WM8994_AIF2DAC_VU_SHIFT                      8  /* AIF2DAC_VU */
+#define WM8994_AIF2DAC_VU_WIDTH                      1  /* AIF2DAC_VU */
+#define WM8994_AIF2DACR_VOL_MASK                0x00FF  /* AIF2DACR_VOL - [7:0] */
+#define WM8994_AIF2DACR_VOL_SHIFT                    0  /* AIF2DACR_VOL - [7:0] */
+#define WM8994_AIF2DACR_VOL_WIDTH                    8  /* AIF2DACR_VOL - [7:0] */
+
+/*
+ * R1296 (0x510) - AIF2 ADC Filters
+ */
+#define WM8994_AIF2ADC_4FS                      0x8000  /* AIF2ADC_4FS */
+#define WM8994_AIF2ADC_4FS_MASK                 0x8000  /* AIF2ADC_4FS */
+#define WM8994_AIF2ADC_4FS_SHIFT                    15  /* AIF2ADC_4FS */
+#define WM8994_AIF2ADC_4FS_WIDTH                     1  /* AIF2ADC_4FS */
+#define WM8994_AIF2ADC_HPF_CUT_MASK             0x6000  /* AIF2ADC_HPF_CUT - [14:13] */
+#define WM8994_AIF2ADC_HPF_CUT_SHIFT                13  /* AIF2ADC_HPF_CUT - [14:13] */
+#define WM8994_AIF2ADC_HPF_CUT_WIDTH                 2  /* AIF2ADC_HPF_CUT - [14:13] */
+#define WM8994_AIF2ADCL_HPF                     0x1000  /* AIF2ADCL_HPF */
+#define WM8994_AIF2ADCL_HPF_MASK                0x1000  /* AIF2ADCL_HPF */
+#define WM8994_AIF2ADCL_HPF_SHIFT                   12  /* AIF2ADCL_HPF */
+#define WM8994_AIF2ADCL_HPF_WIDTH                    1  /* AIF2ADCL_HPF */
+#define WM8994_AIF2ADCR_HPF                     0x0800  /* AIF2ADCR_HPF */
+#define WM8994_AIF2ADCR_HPF_MASK                0x0800  /* AIF2ADCR_HPF */
+#define WM8994_AIF2ADCR_HPF_SHIFT                   11  /* AIF2ADCR_HPF */
+#define WM8994_AIF2ADCR_HPF_WIDTH                    1  /* AIF2ADCR_HPF */
+
+/*
+ * R1312 (0x520) - AIF2 DAC Filters (1)
+ */
+#define WM8994_AIF2DAC_MUTE                     0x0200  /* AIF2DAC_MUTE */
+#define WM8994_AIF2DAC_MUTE_MASK                0x0200  /* AIF2DAC_MUTE */
+#define WM8994_AIF2DAC_MUTE_SHIFT                    9  /* AIF2DAC_MUTE */
+#define WM8994_AIF2DAC_MUTE_WIDTH                    1  /* AIF2DAC_MUTE */
+#define WM8994_AIF2DAC_MONO                     0x0080  /* AIF2DAC_MONO */
+#define WM8994_AIF2DAC_MONO_MASK                0x0080  /* AIF2DAC_MONO */
+#define WM8994_AIF2DAC_MONO_SHIFT                    7  /* AIF2DAC_MONO */
+#define WM8994_AIF2DAC_MONO_WIDTH                    1  /* AIF2DAC_MONO */
+#define WM8994_AIF2DAC_MUTERATE                 0x0020  /* AIF2DAC_MUTERATE */
+#define WM8994_AIF2DAC_MUTERATE_MASK            0x0020  /* AIF2DAC_MUTERATE */
+#define WM8994_AIF2DAC_MUTERATE_SHIFT                5  /* AIF2DAC_MUTERATE */
+#define WM8994_AIF2DAC_MUTERATE_WIDTH                1  /* AIF2DAC_MUTERATE */
+#define WM8994_AIF2DAC_UNMUTE_RAMP              0x0010  /* AIF2DAC_UNMUTE_RAMP */
+#define WM8994_AIF2DAC_UNMUTE_RAMP_MASK         0x0010  /* AIF2DAC_UNMUTE_RAMP */
+#define WM8994_AIF2DAC_UNMUTE_RAMP_SHIFT             4  /* AIF2DAC_UNMUTE_RAMP */
+#define WM8994_AIF2DAC_UNMUTE_RAMP_WIDTH             1  /* AIF2DAC_UNMUTE_RAMP */
+#define WM8994_AIF2DAC_DEEMP_MASK               0x0006  /* AIF2DAC_DEEMP - [2:1] */
+#define WM8994_AIF2DAC_DEEMP_SHIFT                   1  /* AIF2DAC_DEEMP - [2:1] */
+#define WM8994_AIF2DAC_DEEMP_WIDTH                   2  /* AIF2DAC_DEEMP - [2:1] */
+
+/*
+ * R1313 (0x521) - AIF2 DAC Filters (2)
+ */
+#define WM8994_AIF2DAC_3D_GAIN_MASK             0x3E00  /* AIF2DAC_3D_GAIN - [13:9] */
+#define WM8994_AIF2DAC_3D_GAIN_SHIFT                 9  /* AIF2DAC_3D_GAIN - [13:9] */
+#define WM8994_AIF2DAC_3D_GAIN_WIDTH                 5  /* AIF2DAC_3D_GAIN - [13:9] */
+#define WM8994_AIF2DAC_3D_ENA                   0x0100  /* AIF2DAC_3D_ENA */
+#define WM8994_AIF2DAC_3D_ENA_MASK              0x0100  /* AIF2DAC_3D_ENA */
+#define WM8994_AIF2DAC_3D_ENA_SHIFT                  8  /* AIF2DAC_3D_ENA */
+#define WM8994_AIF2DAC_3D_ENA_WIDTH                  1  /* AIF2DAC_3D_ENA */
+
+/*
+ * R1344 (0x540) - AIF2 DRC (1)
+ */
+#define WM8994_AIF2DRC_SIG_DET_RMS_MASK         0xF800  /* AIF2DRC_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF2DRC_SIG_DET_RMS_SHIFT            11  /* AIF2DRC_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF2DRC_SIG_DET_RMS_WIDTH             5  /* AIF2DRC_SIG_DET_RMS - [15:11] */
+#define WM8994_AIF2DRC_SIG_DET_PK_MASK          0x0600  /* AIF2DRC_SIG_DET_PK - [10:9] */
+#define WM8994_AIF2DRC_SIG_DET_PK_SHIFT              9  /* AIF2DRC_SIG_DET_PK - [10:9] */
+#define WM8994_AIF2DRC_SIG_DET_PK_WIDTH              2  /* AIF2DRC_SIG_DET_PK - [10:9] */
+#define WM8994_AIF2DRC_NG_ENA                   0x0100  /* AIF2DRC_NG_ENA */
+#define WM8994_AIF2DRC_NG_ENA_MASK              0x0100  /* AIF2DRC_NG_ENA */
+#define WM8994_AIF2DRC_NG_ENA_SHIFT                  8  /* AIF2DRC_NG_ENA */
+#define WM8994_AIF2DRC_NG_ENA_WIDTH                  1  /* AIF2DRC_NG_ENA */
+#define WM8994_AIF2DRC_SIG_DET_MODE             0x0080  /* AIF2DRC_SIG_DET_MODE */
+#define WM8994_AIF2DRC_SIG_DET_MODE_MASK        0x0080  /* AIF2DRC_SIG_DET_MODE */
+#define WM8994_AIF2DRC_SIG_DET_MODE_SHIFT            7  /* AIF2DRC_SIG_DET_MODE */
+#define WM8994_AIF2DRC_SIG_DET_MODE_WIDTH            1  /* AIF2DRC_SIG_DET_MODE */
+#define WM8994_AIF2DRC_SIG_DET                  0x0040  /* AIF2DRC_SIG_DET */
+#define WM8994_AIF2DRC_SIG_DET_MASK             0x0040  /* AIF2DRC_SIG_DET */
+#define WM8994_AIF2DRC_SIG_DET_SHIFT                 6  /* AIF2DRC_SIG_DET */
+#define WM8994_AIF2DRC_SIG_DET_WIDTH                 1  /* AIF2DRC_SIG_DET */
+#define WM8994_AIF2DRC_KNEE2_OP_ENA             0x0020  /* AIF2DRC_KNEE2_OP_ENA */
+#define WM8994_AIF2DRC_KNEE2_OP_ENA_MASK        0x0020  /* AIF2DRC_KNEE2_OP_ENA */
+#define WM8994_AIF2DRC_KNEE2_OP_ENA_SHIFT            5  /* AIF2DRC_KNEE2_OP_ENA */
+#define WM8994_AIF2DRC_KNEE2_OP_ENA_WIDTH            1  /* AIF2DRC_KNEE2_OP_ENA */
+#define WM8994_AIF2DRC_QR                       0x0010  /* AIF2DRC_QR */
+#define WM8994_AIF2DRC_QR_MASK                  0x0010  /* AIF2DRC_QR */
+#define WM8994_AIF2DRC_QR_SHIFT                      4  /* AIF2DRC_QR */
+#define WM8994_AIF2DRC_QR_WIDTH                      1  /* AIF2DRC_QR */
+#define WM8994_AIF2DRC_ANTICLIP                 0x0008  /* AIF2DRC_ANTICLIP */
+#define WM8994_AIF2DRC_ANTICLIP_MASK            0x0008  /* AIF2DRC_ANTICLIP */
+#define WM8994_AIF2DRC_ANTICLIP_SHIFT                3  /* AIF2DRC_ANTICLIP */
+#define WM8994_AIF2DRC_ANTICLIP_WIDTH                1  /* AIF2DRC_ANTICLIP */
+#define WM8994_AIF2DAC_DRC_ENA                  0x0004  /* AIF2DAC_DRC_ENA */
+#define WM8994_AIF2DAC_DRC_ENA_MASK             0x0004  /* AIF2DAC_DRC_ENA */
+#define WM8994_AIF2DAC_DRC_ENA_SHIFT                 2  /* AIF2DAC_DRC_ENA */
+#define WM8994_AIF2DAC_DRC_ENA_WIDTH                 1  /* AIF2DAC_DRC_ENA */
+#define WM8994_AIF2ADCL_DRC_ENA                 0x0002  /* AIF2ADCL_DRC_ENA */
+#define WM8994_AIF2ADCL_DRC_ENA_MASK            0x0002  /* AIF2ADCL_DRC_ENA */
+#define WM8994_AIF2ADCL_DRC_ENA_SHIFT                1  /* AIF2ADCL_DRC_ENA */
+#define WM8994_AIF2ADCL_DRC_ENA_WIDTH                1  /* AIF2ADCL_DRC_ENA */
+#define WM8994_AIF2ADCR_DRC_ENA                 0x0001  /* AIF2ADCR_DRC_ENA */
+#define WM8994_AIF2ADCR_DRC_ENA_MASK            0x0001  /* AIF2ADCR_DRC_ENA */
+#define WM8994_AIF2ADCR_DRC_ENA_SHIFT                0  /* AIF2ADCR_DRC_ENA */
+#define WM8994_AIF2ADCR_DRC_ENA_WIDTH                1  /* AIF2ADCR_DRC_ENA */
+
+/*
+ * R1345 (0x541) - AIF2 DRC (2)
+ */
+#define WM8994_AIF2DRC_ATK_MASK                 0x1E00  /* AIF2DRC_ATK - [12:9] */
+#define WM8994_AIF2DRC_ATK_SHIFT                     9  /* AIF2DRC_ATK - [12:9] */
+#define WM8994_AIF2DRC_ATK_WIDTH                     4  /* AIF2DRC_ATK - [12:9] */
+#define WM8994_AIF2DRC_DCY_MASK                 0x01E0  /* AIF2DRC_DCY - [8:5] */
+#define WM8994_AIF2DRC_DCY_SHIFT                     5  /* AIF2DRC_DCY - [8:5] */
+#define WM8994_AIF2DRC_DCY_WIDTH                     4  /* AIF2DRC_DCY - [8:5] */
+#define WM8994_AIF2DRC_MINGAIN_MASK             0x001C  /* AIF2DRC_MINGAIN - [4:2] */
+#define WM8994_AIF2DRC_MINGAIN_SHIFT                 2  /* AIF2DRC_MINGAIN - [4:2] */
+#define WM8994_AIF2DRC_MINGAIN_WIDTH                 3  /* AIF2DRC_MINGAIN - [4:2] */
+#define WM8994_AIF2DRC_MAXGAIN_MASK             0x0003  /* AIF2DRC_MAXGAIN - [1:0] */
+#define WM8994_AIF2DRC_MAXGAIN_SHIFT                 0  /* AIF2DRC_MAXGAIN - [1:0] */
+#define WM8994_AIF2DRC_MAXGAIN_WIDTH                 2  /* AIF2DRC_MAXGAIN - [1:0] */
+
+/*
+ * R1346 (0x542) - AIF2 DRC (3)
+ */
+#define WM8994_AIF2DRC_NG_MINGAIN_MASK          0xF000  /* AIF2DRC_NG_MINGAIN - [15:12] */
+#define WM8994_AIF2DRC_NG_MINGAIN_SHIFT             12  /* AIF2DRC_NG_MINGAIN - [15:12] */
+#define WM8994_AIF2DRC_NG_MINGAIN_WIDTH              4  /* AIF2DRC_NG_MINGAIN - [15:12] */
+#define WM8994_AIF2DRC_NG_EXP_MASK              0x0C00  /* AIF2DRC_NG_EXP - [11:10] */
+#define WM8994_AIF2DRC_NG_EXP_SHIFT                 10  /* AIF2DRC_NG_EXP - [11:10] */
+#define WM8994_AIF2DRC_NG_EXP_WIDTH                  2  /* AIF2DRC_NG_EXP - [11:10] */
+#define WM8994_AIF2DRC_QR_THR_MASK              0x0300  /* AIF2DRC_QR_THR - [9:8] */
+#define WM8994_AIF2DRC_QR_THR_SHIFT                  8  /* AIF2DRC_QR_THR - [9:8] */
+#define WM8994_AIF2DRC_QR_THR_WIDTH                  2  /* AIF2DRC_QR_THR - [9:8] */
+#define WM8994_AIF2DRC_QR_DCY_MASK              0x00C0  /* AIF2DRC_QR_DCY - [7:6] */
+#define WM8994_AIF2DRC_QR_DCY_SHIFT                  6  /* AIF2DRC_QR_DCY - [7:6] */
+#define WM8994_AIF2DRC_QR_DCY_WIDTH                  2  /* AIF2DRC_QR_DCY - [7:6] */
+#define WM8994_AIF2DRC_HI_COMP_MASK             0x0038  /* AIF2DRC_HI_COMP - [5:3] */
+#define WM8994_AIF2DRC_HI_COMP_SHIFT                 3  /* AIF2DRC_HI_COMP - [5:3] */
+#define WM8994_AIF2DRC_HI_COMP_WIDTH                 3  /* AIF2DRC_HI_COMP - [5:3] */
+#define WM8994_AIF2DRC_LO_COMP_MASK             0x0007  /* AIF2DRC_LO_COMP - [2:0] */
+#define WM8994_AIF2DRC_LO_COMP_SHIFT                 0  /* AIF2DRC_LO_COMP - [2:0] */
+#define WM8994_AIF2DRC_LO_COMP_WIDTH                 3  /* AIF2DRC_LO_COMP - [2:0] */
+
+/*
+ * R1347 (0x543) - AIF2 DRC (4)
+ */
+#define WM8994_AIF2DRC_KNEE_IP_MASK             0x07E0  /* AIF2DRC_KNEE_IP - [10:5] */
+#define WM8994_AIF2DRC_KNEE_IP_SHIFT                 5  /* AIF2DRC_KNEE_IP - [10:5] */
+#define WM8994_AIF2DRC_KNEE_IP_WIDTH                 6  /* AIF2DRC_KNEE_IP - [10:5] */
+#define WM8994_AIF2DRC_KNEE_OP_MASK             0x001F  /* AIF2DRC_KNEE_OP - [4:0] */
+#define WM8994_AIF2DRC_KNEE_OP_SHIFT                 0  /* AIF2DRC_KNEE_OP - [4:0] */
+#define WM8994_AIF2DRC_KNEE_OP_WIDTH                 5  /* AIF2DRC_KNEE_OP - [4:0] */
+
+/*
+ * R1348 (0x544) - AIF2 DRC (5)
+ */
+#define WM8994_AIF2DRC_KNEE2_IP_MASK            0x03E0  /* AIF2DRC_KNEE2_IP - [9:5] */
+#define WM8994_AIF2DRC_KNEE2_IP_SHIFT                5  /* AIF2DRC_KNEE2_IP - [9:5] */
+#define WM8994_AIF2DRC_KNEE2_IP_WIDTH                5  /* AIF2DRC_KNEE2_IP - [9:5] */
+#define WM8994_AIF2DRC_KNEE2_OP_MASK            0x001F  /* AIF2DRC_KNEE2_OP - [4:0] */
+#define WM8994_AIF2DRC_KNEE2_OP_SHIFT                0  /* AIF2DRC_KNEE2_OP - [4:0] */
+#define WM8994_AIF2DRC_KNEE2_OP_WIDTH                5  /* AIF2DRC_KNEE2_OP - [4:0] */
+
+/*
+ * R1408 (0x580) - AIF2 EQ Gains (1)
+ */
+#define WM8994_AIF2DAC_EQ_B1_GAIN_MASK          0xF800  /* AIF2DAC_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF2DAC_EQ_B1_GAIN_SHIFT             11  /* AIF2DAC_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF2DAC_EQ_B1_GAIN_WIDTH              5  /* AIF2DAC_EQ_B1_GAIN - [15:11] */
+#define WM8994_AIF2DAC_EQ_B2_GAIN_MASK          0x07C0  /* AIF2DAC_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF2DAC_EQ_B2_GAIN_SHIFT              6  /* AIF2DAC_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF2DAC_EQ_B2_GAIN_WIDTH              5  /* AIF2DAC_EQ_B2_GAIN - [10:6] */
+#define WM8994_AIF2DAC_EQ_B3_GAIN_MASK          0x003E  /* AIF2DAC_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF2DAC_EQ_B3_GAIN_SHIFT              1  /* AIF2DAC_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF2DAC_EQ_B3_GAIN_WIDTH              5  /* AIF2DAC_EQ_B3_GAIN - [5:1] */
+#define WM8994_AIF2DAC_EQ_ENA                   0x0001  /* AIF2DAC_EQ_ENA */
+#define WM8994_AIF2DAC_EQ_ENA_MASK              0x0001  /* AIF2DAC_EQ_ENA */
+#define WM8994_AIF2DAC_EQ_ENA_SHIFT                  0  /* AIF2DAC_EQ_ENA */
+#define WM8994_AIF2DAC_EQ_ENA_WIDTH                  1  /* AIF2DAC_EQ_ENA */
+
+/*
+ * R1409 (0x581) - AIF2 EQ Gains (2)
+ */
+#define WM8994_AIF2DAC_EQ_B4_GAIN_MASK          0xF800  /* AIF2DAC_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF2DAC_EQ_B4_GAIN_SHIFT             11  /* AIF2DAC_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF2DAC_EQ_B4_GAIN_WIDTH              5  /* AIF2DAC_EQ_B4_GAIN - [15:11] */
+#define WM8994_AIF2DAC_EQ_B5_GAIN_MASK          0x07C0  /* AIF2DAC_EQ_B5_GAIN - [10:6] */
+#define WM8994_AIF2DAC_EQ_B5_GAIN_SHIFT              6  /* AIF2DAC_EQ_B5_GAIN - [10:6] */
+#define WM8994_AIF2DAC_EQ_B5_GAIN_WIDTH              5  /* AIF2DAC_EQ_B5_GAIN - [10:6] */
+
+/*
+ * R1410 (0x582) - AIF2 EQ Band 1 A
+ */
+#define WM8994_AIF2DAC_EQ_B1_A_MASK             0xFFFF  /* AIF2DAC_EQ_B1_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B1_A_SHIFT                 0  /* AIF2DAC_EQ_B1_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B1_A_WIDTH                16  /* AIF2DAC_EQ_B1_A - [15:0] */
+
+/*
+ * R1411 (0x583) - AIF2 EQ Band 1 B
+ */
+#define WM8994_AIF2DAC_EQ_B1_B_MASK             0xFFFF  /* AIF2DAC_EQ_B1_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B1_B_SHIFT                 0  /* AIF2DAC_EQ_B1_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B1_B_WIDTH                16  /* AIF2DAC_EQ_B1_B - [15:0] */
+
+/*
+ * R1412 (0x584) - AIF2 EQ Band 1 PG
+ */
+#define WM8994_AIF2DAC_EQ_B1_PG_MASK            0xFFFF  /* AIF2DAC_EQ_B1_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B1_PG_SHIFT                0  /* AIF2DAC_EQ_B1_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B1_PG_WIDTH               16  /* AIF2DAC_EQ_B1_PG - [15:0] */
+
+/*
+ * R1413 (0x585) - AIF2 EQ Band 2 A
+ */
+#define WM8994_AIF2DAC_EQ_B2_A_MASK             0xFFFF  /* AIF2DAC_EQ_B2_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B2_A_SHIFT                 0  /* AIF2DAC_EQ_B2_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B2_A_WIDTH                16  /* AIF2DAC_EQ_B2_A - [15:0] */
+
+/*
+ * R1414 (0x586) - AIF2 EQ Band 2 B
+ */
+#define WM8994_AIF2DAC_EQ_B2_B_MASK             0xFFFF  /* AIF2DAC_EQ_B2_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B2_B_SHIFT                 0  /* AIF2DAC_EQ_B2_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B2_B_WIDTH                16  /* AIF2DAC_EQ_B2_B - [15:0] */
+
+/*
+ * R1415 (0x587) - AIF2 EQ Band 2 C
+ */
+#define WM8994_AIF2DAC_EQ_B2_C_MASK             0xFFFF  /* AIF2DAC_EQ_B2_C - [15:0] */
+#define WM8994_AIF2DAC_EQ_B2_C_SHIFT                 0  /* AIF2DAC_EQ_B2_C - [15:0] */
+#define WM8994_AIF2DAC_EQ_B2_C_WIDTH                16  /* AIF2DAC_EQ_B2_C - [15:0] */
+
+/*
+ * R1416 (0x588) - AIF2 EQ Band 2 PG
+ */
+#define WM8994_AIF2DAC_EQ_B2_PG_MASK            0xFFFF  /* AIF2DAC_EQ_B2_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B2_PG_SHIFT                0  /* AIF2DAC_EQ_B2_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B2_PG_WIDTH               16  /* AIF2DAC_EQ_B2_PG - [15:0] */
+
+/*
+ * R1417 (0x589) - AIF2 EQ Band 3 A
+ */
+#define WM8994_AIF2DAC_EQ_B3_A_MASK             0xFFFF  /* AIF2DAC_EQ_B3_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B3_A_SHIFT                 0  /* AIF2DAC_EQ_B3_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B3_A_WIDTH                16  /* AIF2DAC_EQ_B3_A - [15:0] */
+
+/*
+ * R1418 (0x58A) - AIF2 EQ Band 3 B
+ */
+#define WM8994_AIF2DAC_EQ_B3_B_MASK             0xFFFF  /* AIF2DAC_EQ_B3_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B3_B_SHIFT                 0  /* AIF2DAC_EQ_B3_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B3_B_WIDTH                16  /* AIF2DAC_EQ_B3_B - [15:0] */
+
+/*
+ * R1419 (0x58B) - AIF2 EQ Band 3 C
+ */
+#define WM8994_AIF2DAC_EQ_B3_C_MASK             0xFFFF  /* AIF2DAC_EQ_B3_C - [15:0] */
+#define WM8994_AIF2DAC_EQ_B3_C_SHIFT                 0  /* AIF2DAC_EQ_B3_C - [15:0] */
+#define WM8994_AIF2DAC_EQ_B3_C_WIDTH                16  /* AIF2DAC_EQ_B3_C - [15:0] */
+
+/*
+ * R1420 (0x58C) - AIF2 EQ Band 3 PG
+ */
+#define WM8994_AIF2DAC_EQ_B3_PG_MASK            0xFFFF  /* AIF2DAC_EQ_B3_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B3_PG_SHIFT                0  /* AIF2DAC_EQ_B3_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B3_PG_WIDTH               16  /* AIF2DAC_EQ_B3_PG - [15:0] */
+
+/*
+ * R1421 (0x58D) - AIF2 EQ Band 4 A
+ */
+#define WM8994_AIF2DAC_EQ_B4_A_MASK             0xFFFF  /* AIF2DAC_EQ_B4_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B4_A_SHIFT                 0  /* AIF2DAC_EQ_B4_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B4_A_WIDTH                16  /* AIF2DAC_EQ_B4_A - [15:0] */
+
+/*
+ * R1422 (0x58E) - AIF2 EQ Band 4 B
+ */
+#define WM8994_AIF2DAC_EQ_B4_B_MASK             0xFFFF  /* AIF2DAC_EQ_B4_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B4_B_SHIFT                 0  /* AIF2DAC_EQ_B4_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B4_B_WIDTH                16  /* AIF2DAC_EQ_B4_B - [15:0] */
+
+/*
+ * R1423 (0x58F) - AIF2 EQ Band 4 C
+ */
+#define WM8994_AIF2DAC_EQ_B4_C_MASK             0xFFFF  /* AIF2DAC_EQ_B4_C - [15:0] */
+#define WM8994_AIF2DAC_EQ_B4_C_SHIFT                 0  /* AIF2DAC_EQ_B4_C - [15:0] */
+#define WM8994_AIF2DAC_EQ_B4_C_WIDTH                16  /* AIF2DAC_EQ_B4_C - [15:0] */
+
+/*
+ * R1424 (0x590) - AIF2 EQ Band 4 PG
+ */
+#define WM8994_AIF2DAC_EQ_B4_PG_MASK            0xFFFF  /* AIF2DAC_EQ_B4_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B4_PG_SHIFT                0  /* AIF2DAC_EQ_B4_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B4_PG_WIDTH               16  /* AIF2DAC_EQ_B4_PG - [15:0] */
+
+/*
+ * R1425 (0x591) - AIF2 EQ Band 5 A
+ */
+#define WM8994_AIF2DAC_EQ_B5_A_MASK             0xFFFF  /* AIF2DAC_EQ_B5_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B5_A_SHIFT                 0  /* AIF2DAC_EQ_B5_A - [15:0] */
+#define WM8994_AIF2DAC_EQ_B5_A_WIDTH                16  /* AIF2DAC_EQ_B5_A - [15:0] */
+
+/*
+ * R1426 (0x592) - AIF2 EQ Band 5 B
+ */
+#define WM8994_AIF2DAC_EQ_B5_B_MASK             0xFFFF  /* AIF2DAC_EQ_B5_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B5_B_SHIFT                 0  /* AIF2DAC_EQ_B5_B - [15:0] */
+#define WM8994_AIF2DAC_EQ_B5_B_WIDTH                16  /* AIF2DAC_EQ_B5_B - [15:0] */
+
+/*
+ * R1427 (0x593) - AIF2 EQ Band 5 PG
+ */
+#define WM8994_AIF2DAC_EQ_B5_PG_MASK            0xFFFF  /* AIF2DAC_EQ_B5_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B5_PG_SHIFT                0  /* AIF2DAC_EQ_B5_PG - [15:0] */
+#define WM8994_AIF2DAC_EQ_B5_PG_WIDTH               16  /* AIF2DAC_EQ_B5_PG - [15:0] */
+
+/*
+ * R1536 (0x600) - DAC1 Mixer Volumes
+ */
+#define WM8994_ADCR_DAC1_VOL_MASK               0x01E0  /* ADCR_DAC1_VOL - [8:5] */
+#define WM8994_ADCR_DAC1_VOL_SHIFT                   5  /* ADCR_DAC1_VOL - [8:5] */
+#define WM8994_ADCR_DAC1_VOL_WIDTH                   4  /* ADCR_DAC1_VOL - [8:5] */
+#define WM8994_ADCL_DAC1_VOL_MASK               0x000F  /* ADCL_DAC1_VOL - [3:0] */
+#define WM8994_ADCL_DAC1_VOL_SHIFT                   0  /* ADCL_DAC1_VOL - [3:0] */
+#define WM8994_ADCL_DAC1_VOL_WIDTH                   4  /* ADCL_DAC1_VOL - [3:0] */
+
+/*
+ * R1537 (0x601) - DAC1 Left Mixer Routing
+ */
+#define WM8994_ADCR_TO_DAC1L                    0x0020  /* ADCR_TO_DAC1L */
+#define WM8994_ADCR_TO_DAC1L_MASK               0x0020  /* ADCR_TO_DAC1L */
+#define WM8994_ADCR_TO_DAC1L_SHIFT                   5  /* ADCR_TO_DAC1L */
+#define WM8994_ADCR_TO_DAC1L_WIDTH                   1  /* ADCR_TO_DAC1L */
+#define WM8994_ADCL_TO_DAC1L                    0x0010  /* ADCL_TO_DAC1L */
+#define WM8994_ADCL_TO_DAC1L_MASK               0x0010  /* ADCL_TO_DAC1L */
+#define WM8994_ADCL_TO_DAC1L_SHIFT                   4  /* ADCL_TO_DAC1L */
+#define WM8994_ADCL_TO_DAC1L_WIDTH                   1  /* ADCL_TO_DAC1L */
+#define WM8994_AIF2DACL_TO_DAC1L                0x0004  /* AIF2DACL_TO_DAC1L */
+#define WM8994_AIF2DACL_TO_DAC1L_MASK           0x0004  /* AIF2DACL_TO_DAC1L */
+#define WM8994_AIF2DACL_TO_DAC1L_SHIFT               2  /* AIF2DACL_TO_DAC1L */
+#define WM8994_AIF2DACL_TO_DAC1L_WIDTH               1  /* AIF2DACL_TO_DAC1L */
+#define WM8994_AIF1DAC2L_TO_DAC1L               0x0002  /* AIF1DAC2L_TO_DAC1L */
+#define WM8994_AIF1DAC2L_TO_DAC1L_MASK          0x0002  /* AIF1DAC2L_TO_DAC1L */
+#define WM8994_AIF1DAC2L_TO_DAC1L_SHIFT              1  /* AIF1DAC2L_TO_DAC1L */
+#define WM8994_AIF1DAC2L_TO_DAC1L_WIDTH              1  /* AIF1DAC2L_TO_DAC1L */
+#define WM8994_AIF1DAC1L_TO_DAC1L               0x0001  /* AIF1DAC1L_TO_DAC1L */
+#define WM8994_AIF1DAC1L_TO_DAC1L_MASK          0x0001  /* AIF1DAC1L_TO_DAC1L */
+#define WM8994_AIF1DAC1L_TO_DAC1L_SHIFT              0  /* AIF1DAC1L_TO_DAC1L */
+#define WM8994_AIF1DAC1L_TO_DAC1L_WIDTH              1  /* AIF1DAC1L_TO_DAC1L */
+
+/*
+ * R1538 (0x602) - DAC1 Right Mixer Routing
+ */
+#define WM8994_ADCR_TO_DAC1R                    0x0020  /* ADCR_TO_DAC1R */
+#define WM8994_ADCR_TO_DAC1R_MASK               0x0020  /* ADCR_TO_DAC1R */
+#define WM8994_ADCR_TO_DAC1R_SHIFT                   5  /* ADCR_TO_DAC1R */
+#define WM8994_ADCR_TO_DAC1R_WIDTH                   1  /* ADCR_TO_DAC1R */
+#define WM8994_ADCL_TO_DAC1R                    0x0010  /* ADCL_TO_DAC1R */
+#define WM8994_ADCL_TO_DAC1R_MASK               0x0010  /* ADCL_TO_DAC1R */
+#define WM8994_ADCL_TO_DAC1R_SHIFT                   4  /* ADCL_TO_DAC1R */
+#define WM8994_ADCL_TO_DAC1R_WIDTH                   1  /* ADCL_TO_DAC1R */
+#define WM8994_AIF2DACR_TO_DAC1R                0x0004  /* AIF2DACR_TO_DAC1R */
+#define WM8994_AIF2DACR_TO_DAC1R_MASK           0x0004  /* AIF2DACR_TO_DAC1R */
+#define WM8994_AIF2DACR_TO_DAC1R_SHIFT               2  /* AIF2DACR_TO_DAC1R */
+#define WM8994_AIF2DACR_TO_DAC1R_WIDTH               1  /* AIF2DACR_TO_DAC1R */
+#define WM8994_AIF1DAC2R_TO_DAC1R               0x0002  /* AIF1DAC2R_TO_DAC1R */
+#define WM8994_AIF1DAC2R_TO_DAC1R_MASK          0x0002  /* AIF1DAC2R_TO_DAC1R */
+#define WM8994_AIF1DAC2R_TO_DAC1R_SHIFT              1  /* AIF1DAC2R_TO_DAC1R */
+#define WM8994_AIF1DAC2R_TO_DAC1R_WIDTH              1  /* AIF1DAC2R_TO_DAC1R */
+#define WM8994_AIF1DAC1R_TO_DAC1R               0x0001  /* AIF1DAC1R_TO_DAC1R */
+#define WM8994_AIF1DAC1R_TO_DAC1R_MASK          0x0001  /* AIF1DAC1R_TO_DAC1R */
+#define WM8994_AIF1DAC1R_TO_DAC1R_SHIFT              0  /* AIF1DAC1R_TO_DAC1R */
+#define WM8994_AIF1DAC1R_TO_DAC1R_WIDTH              1  /* AIF1DAC1R_TO_DAC1R */
+
+/*
+ * R1539 (0x603) - DAC2 Mixer Volumes
+ */
+#define WM8994_ADCR_DAC2_VOL_MASK               0x01E0  /* ADCR_DAC2_VOL - [8:5] */
+#define WM8994_ADCR_DAC2_VOL_SHIFT                   5  /* ADCR_DAC2_VOL - [8:5] */
+#define WM8994_ADCR_DAC2_VOL_WIDTH                   4  /* ADCR_DAC2_VOL - [8:5] */
+#define WM8994_ADCL_DAC2_VOL_MASK               0x000F  /* ADCL_DAC2_VOL - [3:0] */
+#define WM8994_ADCL_DAC2_VOL_SHIFT                   0  /* ADCL_DAC2_VOL - [3:0] */
+#define WM8994_ADCL_DAC2_VOL_WIDTH                   4  /* ADCL_DAC2_VOL - [3:0] */
+
+/*
+ * R1540 (0x604) - DAC2 Left Mixer Routing
+ */
+#define WM8994_ADCR_TO_DAC2L                    0x0020  /* ADCR_TO_DAC2L */
+#define WM8994_ADCR_TO_DAC2L_MASK               0x0020  /* ADCR_TO_DAC2L */
+#define WM8994_ADCR_TO_DAC2L_SHIFT                   5  /* ADCR_TO_DAC2L */
+#define WM8994_ADCR_TO_DAC2L_WIDTH                   1  /* ADCR_TO_DAC2L */
+#define WM8994_ADCL_TO_DAC2L                    0x0010  /* ADCL_TO_DAC2L */
+#define WM8994_ADCL_TO_DAC2L_MASK               0x0010  /* ADCL_TO_DAC2L */
+#define WM8994_ADCL_TO_DAC2L_SHIFT                   4  /* ADCL_TO_DAC2L */
+#define WM8994_ADCL_TO_DAC2L_WIDTH                   1  /* ADCL_TO_DAC2L */
+#define WM8994_AIF2DACL_TO_DAC2L                0x0004  /* AIF2DACL_TO_DAC2L */
+#define WM8994_AIF2DACL_TO_DAC2L_MASK           0x0004  /* AIF2DACL_TO_DAC2L */
+#define WM8994_AIF2DACL_TO_DAC2L_SHIFT               2  /* AIF2DACL_TO_DAC2L */
+#define WM8994_AIF2DACL_TO_DAC2L_WIDTH               1  /* AIF2DACL_TO_DAC2L */
+#define WM8994_AIF1DAC2L_TO_DAC2L               0x0002  /* AIF1DAC2L_TO_DAC2L */
+#define WM8994_AIF1DAC2L_TO_DAC2L_MASK          0x0002  /* AIF1DAC2L_TO_DAC2L */
+#define WM8994_AIF1DAC2L_TO_DAC2L_SHIFT              1  /* AIF1DAC2L_TO_DAC2L */
+#define WM8994_AIF1DAC2L_TO_DAC2L_WIDTH              1  /* AIF1DAC2L_TO_DAC2L */
+#define WM8994_AIF1DAC1L_TO_DAC2L               0x0001  /* AIF1DAC1L_TO_DAC2L */
+#define WM8994_AIF1DAC1L_TO_DAC2L_MASK          0x0001  /* AIF1DAC1L_TO_DAC2L */
+#define WM8994_AIF1DAC1L_TO_DAC2L_SHIFT              0  /* AIF1DAC1L_TO_DAC2L */
+#define WM8994_AIF1DAC1L_TO_DAC2L_WIDTH              1  /* AIF1DAC1L_TO_DAC2L */
+
+/*
+ * R1541 (0x605) - DAC2 Right Mixer Routing
+ */
+#define WM8994_ADCR_TO_DAC2R                    0x0020  /* ADCR_TO_DAC2R */
+#define WM8994_ADCR_TO_DAC2R_MASK               0x0020  /* ADCR_TO_DAC2R */
+#define WM8994_ADCR_TO_DAC2R_SHIFT                   5  /* ADCR_TO_DAC2R */
+#define WM8994_ADCR_TO_DAC2R_WIDTH                   1  /* ADCR_TO_DAC2R */
+#define WM8994_ADCL_TO_DAC2R                    0x0010  /* ADCL_TO_DAC2R */
+#define WM8994_ADCL_TO_DAC2R_MASK               0x0010  /* ADCL_TO_DAC2R */
+#define WM8994_ADCL_TO_DAC2R_SHIFT                   4  /* ADCL_TO_DAC2R */
+#define WM8994_ADCL_TO_DAC2R_WIDTH                   1  /* ADCL_TO_DAC2R */
+#define WM8994_AIF2DACR_TO_DAC2R                0x0004  /* AIF2DACR_TO_DAC2R */
+#define WM8994_AIF2DACR_TO_DAC2R_MASK           0x0004  /* AIF2DACR_TO_DAC2R */
+#define WM8994_AIF2DACR_TO_DAC2R_SHIFT               2  /* AIF2DACR_TO_DAC2R */
+#define WM8994_AIF2DACR_TO_DAC2R_WIDTH               1  /* AIF2DACR_TO_DAC2R */
+#define WM8994_AIF1DAC2R_TO_DAC2R               0x0002  /* AIF1DAC2R_TO_DAC2R */
+#define WM8994_AIF1DAC2R_TO_DAC2R_MASK          0x0002  /* AIF1DAC2R_TO_DAC2R */
+#define WM8994_AIF1DAC2R_TO_DAC2R_SHIFT              1  /* AIF1DAC2R_TO_DAC2R */
+#define WM8994_AIF1DAC2R_TO_DAC2R_WIDTH              1  /* AIF1DAC2R_TO_DAC2R */
+#define WM8994_AIF1DAC1R_TO_DAC2R               0x0001  /* AIF1DAC1R_TO_DAC2R */
+#define WM8994_AIF1DAC1R_TO_DAC2R_MASK          0x0001  /* AIF1DAC1R_TO_DAC2R */
+#define WM8994_AIF1DAC1R_TO_DAC2R_SHIFT              0  /* AIF1DAC1R_TO_DAC2R */
+#define WM8994_AIF1DAC1R_TO_DAC2R_WIDTH              1  /* AIF1DAC1R_TO_DAC2R */
+
+/*
+ * R1542 (0x606) - AIF1 ADC1 Left Mixer Routing
+ */
+#define WM8994_ADC1L_TO_AIF1ADC1L               0x0002  /* ADC1L_TO_AIF1ADC1L */
+#define WM8994_ADC1L_TO_AIF1ADC1L_MASK          0x0002  /* ADC1L_TO_AIF1ADC1L */
+#define WM8994_ADC1L_TO_AIF1ADC1L_SHIFT              1  /* ADC1L_TO_AIF1ADC1L */
+#define WM8994_ADC1L_TO_AIF1ADC1L_WIDTH              1  /* ADC1L_TO_AIF1ADC1L */
+#define WM8994_AIF2DACL_TO_AIF1ADC1L            0x0001  /* AIF2DACL_TO_AIF1ADC1L */
+#define WM8994_AIF2DACL_TO_AIF1ADC1L_MASK       0x0001  /* AIF2DACL_TO_AIF1ADC1L */
+#define WM8994_AIF2DACL_TO_AIF1ADC1L_SHIFT           0  /* AIF2DACL_TO_AIF1ADC1L */
+#define WM8994_AIF2DACL_TO_AIF1ADC1L_WIDTH           1  /* AIF2DACL_TO_AIF1ADC1L */
+
+/*
+ * R1543 (0x607) - AIF1 ADC1 Right Mixer Routing
+ */
+#define WM8994_ADC1R_TO_AIF1ADC1R               0x0002  /* ADC1R_TO_AIF1ADC1R */
+#define WM8994_ADC1R_TO_AIF1ADC1R_MASK          0x0002  /* ADC1R_TO_AIF1ADC1R */
+#define WM8994_ADC1R_TO_AIF1ADC1R_SHIFT              1  /* ADC1R_TO_AIF1ADC1R */
+#define WM8994_ADC1R_TO_AIF1ADC1R_WIDTH              1  /* ADC1R_TO_AIF1ADC1R */
+#define WM8994_AIF2DACR_TO_AIF1ADC1R            0x0001  /* AIF2DACR_TO_AIF1ADC1R */
+#define WM8994_AIF2DACR_TO_AIF1ADC1R_MASK       0x0001  /* AIF2DACR_TO_AIF1ADC1R */
+#define WM8994_AIF2DACR_TO_AIF1ADC1R_SHIFT           0  /* AIF2DACR_TO_AIF1ADC1R */
+#define WM8994_AIF2DACR_TO_AIF1ADC1R_WIDTH           1  /* AIF2DACR_TO_AIF1ADC1R */
+
+/*
+ * R1544 (0x608) - AIF1 ADC2 Left Mixer Routing
+ */
+#define WM8994_ADC2L_TO_AIF1ADC2L               0x0002  /* ADC2L_TO_AIF1ADC2L */
+#define WM8994_ADC2L_TO_AIF1ADC2L_MASK          0x0002  /* ADC2L_TO_AIF1ADC2L */
+#define WM8994_ADC2L_TO_AIF1ADC2L_SHIFT              1  /* ADC2L_TO_AIF1ADC2L */
+#define WM8994_ADC2L_TO_AIF1ADC2L_WIDTH              1  /* ADC2L_TO_AIF1ADC2L */
+#define WM8994_AIF2DACL_TO_AIF1ADC2L            0x0001  /* AIF2DACL_TO_AIF1ADC2L */
+#define WM8994_AIF2DACL_TO_AIF1ADC2L_MASK       0x0001  /* AIF2DACL_TO_AIF1ADC2L */
+#define WM8994_AIF2DACL_TO_AIF1ADC2L_SHIFT           0  /* AIF2DACL_TO_AIF1ADC2L */
+#define WM8994_AIF2DACL_TO_AIF1ADC2L_WIDTH           1  /* AIF2DACL_TO_AIF1ADC2L */
+
+/*
+ * R1545 (0x609) - AIF1 ADC2 Right mixer Routing
+ */
+#define WM8994_ADC2R_TO_AIF1ADC2R               0x0002  /* ADC2R_TO_AIF1ADC2R */
+#define WM8994_ADC2R_TO_AIF1ADC2R_MASK          0x0002  /* ADC2R_TO_AIF1ADC2R */
+#define WM8994_ADC2R_TO_AIF1ADC2R_SHIFT              1  /* ADC2R_TO_AIF1ADC2R */
+#define WM8994_ADC2R_TO_AIF1ADC2R_WIDTH              1  /* ADC2R_TO_AIF1ADC2R */
+#define WM8994_AIF2DACR_TO_AIF1ADC2R            0x0001  /* AIF2DACR_TO_AIF1ADC2R */
+#define WM8994_AIF2DACR_TO_AIF1ADC2R_MASK       0x0001  /* AIF2DACR_TO_AIF1ADC2R */
+#define WM8994_AIF2DACR_TO_AIF1ADC2R_SHIFT           0  /* AIF2DACR_TO_AIF1ADC2R */
+#define WM8994_AIF2DACR_TO_AIF1ADC2R_WIDTH           1  /* AIF2DACR_TO_AIF1ADC2R */
+
+/*
+ * R1552 (0x610) - DAC1 Left Volume
+ */
+#define WM8994_DAC1L_MUTE                       0x0200  /* DAC1L_MUTE */
+#define WM8994_DAC1L_MUTE_MASK                  0x0200  /* DAC1L_MUTE */
+#define WM8994_DAC1L_MUTE_SHIFT                      9  /* DAC1L_MUTE */
+#define WM8994_DAC1L_MUTE_WIDTH                      1  /* DAC1L_MUTE */
+#define WM8994_DAC1_VU                          0x0100  /* DAC1_VU */
+#define WM8994_DAC1_VU_MASK                     0x0100  /* DAC1_VU */
+#define WM8994_DAC1_VU_SHIFT                         8  /* DAC1_VU */
+#define WM8994_DAC1_VU_WIDTH                         1  /* DAC1_VU */
+#define WM8994_DAC1L_VOL_MASK                   0x00FF  /* DAC1L_VOL - [7:0] */
+#define WM8994_DAC1L_VOL_SHIFT                       0  /* DAC1L_VOL - [7:0] */
+#define WM8994_DAC1L_VOL_WIDTH                       8  /* DAC1L_VOL - [7:0] */
+
+/*
+ * R1553 (0x611) - DAC1 Right Volume
+ */
+#define WM8994_DAC1R_MUTE                       0x0200  /* DAC1R_MUTE */
+#define WM8994_DAC1R_MUTE_MASK                  0x0200  /* DAC1R_MUTE */
+#define WM8994_DAC1R_MUTE_SHIFT                      9  /* DAC1R_MUTE */
+#define WM8994_DAC1R_MUTE_WIDTH                      1  /* DAC1R_MUTE */
+#define WM8994_DAC1_VU                          0x0100  /* DAC1_VU */
+#define WM8994_DAC1_VU_MASK                     0x0100  /* DAC1_VU */
+#define WM8994_DAC1_VU_SHIFT                         8  /* DAC1_VU */
+#define WM8994_DAC1_VU_WIDTH                         1  /* DAC1_VU */
+#define WM8994_DAC1R_VOL_MASK                   0x00FF  /* DAC1R_VOL - [7:0] */
+#define WM8994_DAC1R_VOL_SHIFT                       0  /* DAC1R_VOL - [7:0] */
+#define WM8994_DAC1R_VOL_WIDTH                       8  /* DAC1R_VOL - [7:0] */
+
+/*
+ * R1554 (0x612) - DAC2 Left Volume
+ */
+#define WM8994_DAC2L_MUTE                       0x0200  /* DAC2L_MUTE */
+#define WM8994_DAC2L_MUTE_MASK                  0x0200  /* DAC2L_MUTE */
+#define WM8994_DAC2L_MUTE_SHIFT                      9  /* DAC2L_MUTE */
+#define WM8994_DAC2L_MUTE_WIDTH                      1  /* DAC2L_MUTE */
+#define WM8994_DAC2_VU                          0x0100  /* DAC2_VU */
+#define WM8994_DAC2_VU_MASK                     0x0100  /* DAC2_VU */
+#define WM8994_DAC2_VU_SHIFT                         8  /* DAC2_VU */
+#define WM8994_DAC2_VU_WIDTH                         1  /* DAC2_VU */
+#define WM8994_DAC2L_VOL_MASK                   0x00FF  /* DAC2L_VOL - [7:0] */
+#define WM8994_DAC2L_VOL_SHIFT                       0  /* DAC2L_VOL - [7:0] */
+#define WM8994_DAC2L_VOL_WIDTH                       8  /* DAC2L_VOL - [7:0] */
+
+/*
+ * R1555 (0x613) - DAC2 Right Volume
+ */
+#define WM8994_DAC2R_MUTE                       0x0200  /* DAC2R_MUTE */
+#define WM8994_DAC2R_MUTE_MASK                  0x0200  /* DAC2R_MUTE */
+#define WM8994_DAC2R_MUTE_SHIFT                      9  /* DAC2R_MUTE */
+#define WM8994_DAC2R_MUTE_WIDTH                      1  /* DAC2R_MUTE */
+#define WM8994_DAC2_VU                          0x0100  /* DAC2_VU */
+#define WM8994_DAC2_VU_MASK                     0x0100  /* DAC2_VU */
+#define WM8994_DAC2_VU_SHIFT                         8  /* DAC2_VU */
+#define WM8994_DAC2_VU_WIDTH                         1  /* DAC2_VU */
+#define WM8994_DAC2R_VOL_MASK                   0x00FF  /* DAC2R_VOL - [7:0] */
+#define WM8994_DAC2R_VOL_SHIFT                       0  /* DAC2R_VOL - [7:0] */
+#define WM8994_DAC2R_VOL_WIDTH                       8  /* DAC2R_VOL - [7:0] */
+
+/*
+ * R1556 (0x614) - DAC Softmute
+ */
+#define WM8994_DAC_SOFTMUTEMODE                 0x0002  /* DAC_SOFTMUTEMODE */
+#define WM8994_DAC_SOFTMUTEMODE_MASK            0x0002  /* DAC_SOFTMUTEMODE */
+#define WM8994_DAC_SOFTMUTEMODE_SHIFT                1  /* DAC_SOFTMUTEMODE */
+#define WM8994_DAC_SOFTMUTEMODE_WIDTH                1  /* DAC_SOFTMUTEMODE */
+#define WM8994_DAC_MUTERATE                     0x0001  /* DAC_MUTERATE */
+#define WM8994_DAC_MUTERATE_MASK                0x0001  /* DAC_MUTERATE */
+#define WM8994_DAC_MUTERATE_SHIFT                    0  /* DAC_MUTERATE */
+#define WM8994_DAC_MUTERATE_WIDTH                    1  /* DAC_MUTERATE */
+
+/*
+ * R1568 (0x620) - Oversampling
+ */
+#define WM8994_ADC_OSR128                       0x0002  /* ADC_OSR128 */
+#define WM8994_ADC_OSR128_MASK                  0x0002  /* ADC_OSR128 */
+#define WM8994_ADC_OSR128_SHIFT                      1  /* ADC_OSR128 */
+#define WM8994_ADC_OSR128_WIDTH                      1  /* ADC_OSR128 */
+#define WM8994_DAC_OSR128                       0x0001  /* DAC_OSR128 */
+#define WM8994_DAC_OSR128_MASK                  0x0001  /* DAC_OSR128 */
+#define WM8994_DAC_OSR128_SHIFT                      0  /* DAC_OSR128 */
+#define WM8994_DAC_OSR128_WIDTH                      1  /* DAC_OSR128 */
+
+/*
+ * R1569 (0x621) - Sidetone
+ */
+#define WM8994_ST_HPF_CUT_MASK                  0x0380  /* ST_HPF_CUT - [9:7] */
+#define WM8994_ST_HPF_CUT_SHIFT                      7  /* ST_HPF_CUT - [9:7] */
+#define WM8994_ST_HPF_CUT_WIDTH                      3  /* ST_HPF_CUT - [9:7] */
+#define WM8994_ST_HPF                           0x0040  /* ST_HPF */
+#define WM8994_ST_HPF_MASK                      0x0040  /* ST_HPF */
+#define WM8994_ST_HPF_SHIFT                          6  /* ST_HPF */
+#define WM8994_ST_HPF_WIDTH                          1  /* ST_HPF */
+#define WM8994_STR_SEL                          0x0002  /* STR_SEL */
+#define WM8994_STR_SEL_MASK                     0x0002  /* STR_SEL */
+#define WM8994_STR_SEL_SHIFT                         1  /* STR_SEL */
+#define WM8994_STR_SEL_WIDTH                         1  /* STR_SEL */
+#define WM8994_STL_SEL                          0x0001  /* STL_SEL */
+#define WM8994_STL_SEL_MASK                     0x0001  /* STL_SEL */
+#define WM8994_STL_SEL_SHIFT                         0  /* STL_SEL */
+#define WM8994_STL_SEL_WIDTH                         1  /* STL_SEL */
+
+/*
+ * R1824 (0x720) - Pull Control (1)
+ */
+#define WM8994_DMICDAT2_PU                      0x0800  /* DMICDAT2_PU */
+#define WM8994_DMICDAT2_PU_MASK                 0x0800  /* DMICDAT2_PU */
+#define WM8994_DMICDAT2_PU_SHIFT                    11  /* DMICDAT2_PU */
+#define WM8994_DMICDAT2_PU_WIDTH                     1  /* DMICDAT2_PU */
+#define WM8994_DMICDAT2_PD                      0x0400  /* DMICDAT2_PD */
+#define WM8994_DMICDAT2_PD_MASK                 0x0400  /* DMICDAT2_PD */
+#define WM8994_DMICDAT2_PD_SHIFT                    10  /* DMICDAT2_PD */
+#define WM8994_DMICDAT2_PD_WIDTH                     1  /* DMICDAT2_PD */
+#define WM8994_DMICDAT1_PU                      0x0200  /* DMICDAT1_PU */
+#define WM8994_DMICDAT1_PU_MASK                 0x0200  /* DMICDAT1_PU */
+#define WM8994_DMICDAT1_PU_SHIFT                     9  /* DMICDAT1_PU */
+#define WM8994_DMICDAT1_PU_WIDTH                     1  /* DMICDAT1_PU */
+#define WM8994_DMICDAT1_PD                      0x0100  /* DMICDAT1_PD */
+#define WM8994_DMICDAT1_PD_MASK                 0x0100  /* DMICDAT1_PD */
+#define WM8994_DMICDAT1_PD_SHIFT                     8  /* DMICDAT1_PD */
+#define WM8994_DMICDAT1_PD_WIDTH                     1  /* DMICDAT1_PD */
+#define WM8994_MCLK1_PU                         0x0080  /* MCLK1_PU */
+#define WM8994_MCLK1_PU_MASK                    0x0080  /* MCLK1_PU */
+#define WM8994_MCLK1_PU_SHIFT                        7  /* MCLK1_PU */
+#define WM8994_MCLK1_PU_WIDTH                        1  /* MCLK1_PU */
+#define WM8994_MCLK1_PD                         0x0040  /* MCLK1_PD */
+#define WM8994_MCLK1_PD_MASK                    0x0040  /* MCLK1_PD */
+#define WM8994_MCLK1_PD_SHIFT                        6  /* MCLK1_PD */
+#define WM8994_MCLK1_PD_WIDTH                        1  /* MCLK1_PD */
+#define WM8994_DACDAT1_PU                       0x0020  /* DACDAT1_PU */
+#define WM8994_DACDAT1_PU_MASK                  0x0020  /* DACDAT1_PU */
+#define WM8994_DACDAT1_PU_SHIFT                      5  /* DACDAT1_PU */
+#define WM8994_DACDAT1_PU_WIDTH                      1  /* DACDAT1_PU */
+#define WM8994_DACDAT1_PD                       0x0010  /* DACDAT1_PD */
+#define WM8994_DACDAT1_PD_MASK                  0x0010  /* DACDAT1_PD */
+#define WM8994_DACDAT1_PD_SHIFT                      4  /* DACDAT1_PD */
+#define WM8994_DACDAT1_PD_WIDTH                      1  /* DACDAT1_PD */
+#define WM8994_DACLRCLK1_PU                     0x0008  /* DACLRCLK1_PU */
+#define WM8994_DACLRCLK1_PU_MASK                0x0008  /* DACLRCLK1_PU */
+#define WM8994_DACLRCLK1_PU_SHIFT                    3  /* DACLRCLK1_PU */
+#define WM8994_DACLRCLK1_PU_WIDTH                    1  /* DACLRCLK1_PU */
+#define WM8994_DACLRCLK1_PD                     0x0004  /* DACLRCLK1_PD */
+#define WM8994_DACLRCLK1_PD_MASK                0x0004  /* DACLRCLK1_PD */
+#define WM8994_DACLRCLK1_PD_SHIFT                    2  /* DACLRCLK1_PD */
+#define WM8994_DACLRCLK1_PD_WIDTH                    1  /* DACLRCLK1_PD */
+#define WM8994_BCLK1_PU                         0x0002  /* BCLK1_PU */
+#define WM8994_BCLK1_PU_MASK                    0x0002  /* BCLK1_PU */
+#define WM8994_BCLK1_PU_SHIFT                        1  /* BCLK1_PU */
+#define WM8994_BCLK1_PU_WIDTH                        1  /* BCLK1_PU */
+#define WM8994_BCLK1_PD                         0x0001  /* BCLK1_PD */
+#define WM8994_BCLK1_PD_MASK                    0x0001  /* BCLK1_PD */
+#define WM8994_BCLK1_PD_SHIFT                        0  /* BCLK1_PD */
+#define WM8994_BCLK1_PD_WIDTH                        1  /* BCLK1_PD */
+
+/*
+ * R1825 (0x721) - Pull Control (2)
+ */
+#define WM8994_CSNADDR_PD                       0x0100  /* CSNADDR_PD */
+#define WM8994_CSNADDR_PD_MASK                  0x0100  /* CSNADDR_PD */
+#define WM8994_CSNADDR_PD_SHIFT                      8  /* CSNADDR_PD */
+#define WM8994_CSNADDR_PD_WIDTH                      1  /* CSNADDR_PD */
+#define WM8994_LDO2ENA_PD                       0x0040  /* LDO2ENA_PD */
+#define WM8994_LDO2ENA_PD_MASK                  0x0040  /* LDO2ENA_PD */
+#define WM8994_LDO2ENA_PD_SHIFT                      6  /* LDO2ENA_PD */
+#define WM8994_LDO2ENA_PD_WIDTH                      1  /* LDO2ENA_PD */
+#define WM8994_LDO1ENA_PD                       0x0010  /* LDO1ENA_PD */
+#define WM8994_LDO1ENA_PD_MASK                  0x0010  /* LDO1ENA_PD */
+#define WM8994_LDO1ENA_PD_SHIFT                      4  /* LDO1ENA_PD */
+#define WM8994_LDO1ENA_PD_WIDTH                      1  /* LDO1ENA_PD */
+#define WM8994_CIFMODE_PD                       0x0004  /* CIFMODE_PD */
+#define WM8994_CIFMODE_PD_MASK                  0x0004  /* CIFMODE_PD */
+#define WM8994_CIFMODE_PD_SHIFT                      2  /* CIFMODE_PD */
+#define WM8994_CIFMODE_PD_WIDTH                      1  /* CIFMODE_PD */
+#define WM8994_SPKMODE_PU                       0x0002  /* SPKMODE_PU */
+#define WM8994_SPKMODE_PU_MASK                  0x0002  /* SPKMODE_PU */
+#define WM8994_SPKMODE_PU_SHIFT                      1  /* SPKMODE_PU */
+#define WM8994_SPKMODE_PU_WIDTH                      1  /* SPKMODE_PU */
+
+/*
+ * R1840 (0x730) - Interrupt Status 1
+ */
+#define WM8994_GP11_EINT                        0x0400  /* GP11_EINT */
+#define WM8994_GP11_EINT_MASK                   0x0400  /* GP11_EINT */
+#define WM8994_GP11_EINT_SHIFT                      10  /* GP11_EINT */
+#define WM8994_GP11_EINT_WIDTH                       1  /* GP11_EINT */
+#define WM8994_GP10_EINT                        0x0200  /* GP10_EINT */
+#define WM8994_GP10_EINT_MASK                   0x0200  /* GP10_EINT */
+#define WM8994_GP10_EINT_SHIFT                       9  /* GP10_EINT */
+#define WM8994_GP10_EINT_WIDTH                       1  /* GP10_EINT */
+#define WM8994_GP9_EINT                         0x0100  /* GP9_EINT */
+#define WM8994_GP9_EINT_MASK                    0x0100  /* GP9_EINT */
+#define WM8994_GP9_EINT_SHIFT                        8  /* GP9_EINT */
+#define WM8994_GP9_EINT_WIDTH                        1  /* GP9_EINT */
+#define WM8994_GP8_EINT                         0x0080  /* GP8_EINT */
+#define WM8994_GP8_EINT_MASK                    0x0080  /* GP8_EINT */
+#define WM8994_GP8_EINT_SHIFT                        7  /* GP8_EINT */
+#define WM8994_GP8_EINT_WIDTH                        1  /* GP8_EINT */
+#define WM8994_GP7_EINT                         0x0040  /* GP7_EINT */
+#define WM8994_GP7_EINT_MASK                    0x0040  /* GP7_EINT */
+#define WM8994_GP7_EINT_SHIFT                        6  /* GP7_EINT */
+#define WM8994_GP7_EINT_WIDTH                        1  /* GP7_EINT */
+#define WM8994_GP6_EINT                         0x0020  /* GP6_EINT */
+#define WM8994_GP6_EINT_MASK                    0x0020  /* GP6_EINT */
+#define WM8994_GP6_EINT_SHIFT                        5  /* GP6_EINT */
+#define WM8994_GP6_EINT_WIDTH                        1  /* GP6_EINT */
+#define WM8994_GP5_EINT                         0x0010  /* GP5_EINT */
+#define WM8994_GP5_EINT_MASK                    0x0010  /* GP5_EINT */
+#define WM8994_GP5_EINT_SHIFT                        4  /* GP5_EINT */
+#define WM8994_GP5_EINT_WIDTH                        1  /* GP5_EINT */
+#define WM8994_GP4_EINT                         0x0008  /* GP4_EINT */
+#define WM8994_GP4_EINT_MASK                    0x0008  /* GP4_EINT */
+#define WM8994_GP4_EINT_SHIFT                        3  /* GP4_EINT */
+#define WM8994_GP4_EINT_WIDTH                        1  /* GP4_EINT */
+#define WM8994_GP3_EINT                         0x0004  /* GP3_EINT */
+#define WM8994_GP3_EINT_MASK                    0x0004  /* GP3_EINT */
+#define WM8994_GP3_EINT_SHIFT                        2  /* GP3_EINT */
+#define WM8994_GP3_EINT_WIDTH                        1  /* GP3_EINT */
+#define WM8994_GP2_EINT                         0x0002  /* GP2_EINT */
+#define WM8994_GP2_EINT_MASK                    0x0002  /* GP2_EINT */
+#define WM8994_GP2_EINT_SHIFT                        1  /* GP2_EINT */
+#define WM8994_GP2_EINT_WIDTH                        1  /* GP2_EINT */
+#define WM8994_GP1_EINT                         0x0001  /* GP1_EINT */
+#define WM8994_GP1_EINT_MASK                    0x0001  /* GP1_EINT */
+#define WM8994_GP1_EINT_SHIFT                        0  /* GP1_EINT */
+#define WM8994_GP1_EINT_WIDTH                        1  /* GP1_EINT */
+
+/*
+ * R1841 (0x731) - Interrupt Status 2
+ */
+#define WM8994_TEMP_WARN_EINT                   0x8000  /* TEMP_WARN_EINT */
+#define WM8994_TEMP_WARN_EINT_MASK              0x8000  /* TEMP_WARN_EINT */
+#define WM8994_TEMP_WARN_EINT_SHIFT                 15  /* TEMP_WARN_EINT */
+#define WM8994_TEMP_WARN_EINT_WIDTH                  1  /* TEMP_WARN_EINT */
+#define WM8994_DCS_DONE_EINT                    0x4000  /* DCS_DONE_EINT */
+#define WM8994_DCS_DONE_EINT_MASK               0x4000  /* DCS_DONE_EINT */
+#define WM8994_DCS_DONE_EINT_SHIFT                  14  /* DCS_DONE_EINT */
+#define WM8994_DCS_DONE_EINT_WIDTH                   1  /* DCS_DONE_EINT */
+#define WM8994_WSEQ_DONE_EINT                   0x2000  /* WSEQ_DONE_EINT */
+#define WM8994_WSEQ_DONE_EINT_MASK              0x2000  /* WSEQ_DONE_EINT */
+#define WM8994_WSEQ_DONE_EINT_SHIFT                 13  /* WSEQ_DONE_EINT */
+#define WM8994_WSEQ_DONE_EINT_WIDTH                  1  /* WSEQ_DONE_EINT */
+#define WM8994_FIFOS_ERR_EINT                   0x1000  /* FIFOS_ERR_EINT */
+#define WM8994_FIFOS_ERR_EINT_MASK              0x1000  /* FIFOS_ERR_EINT */
+#define WM8994_FIFOS_ERR_EINT_SHIFT                 12  /* FIFOS_ERR_EINT */
+#define WM8994_FIFOS_ERR_EINT_WIDTH                  1  /* FIFOS_ERR_EINT */
+#define WM8994_AIF2DRC_SIG_DET_EINT             0x0800  /* AIF2DRC_SIG_DET_EINT */
+#define WM8994_AIF2DRC_SIG_DET_EINT_MASK        0x0800  /* AIF2DRC_SIG_DET_EINT */
+#define WM8994_AIF2DRC_SIG_DET_EINT_SHIFT           11  /* AIF2DRC_SIG_DET_EINT */
+#define WM8994_AIF2DRC_SIG_DET_EINT_WIDTH            1  /* AIF2DRC_SIG_DET_EINT */
+#define WM8994_AIF1DRC2_SIG_DET_EINT            0x0400  /* AIF1DRC2_SIG_DET_EINT */
+#define WM8994_AIF1DRC2_SIG_DET_EINT_MASK       0x0400  /* AIF1DRC2_SIG_DET_EINT */
+#define WM8994_AIF1DRC2_SIG_DET_EINT_SHIFT          10  /* AIF1DRC2_SIG_DET_EINT */
+#define WM8994_AIF1DRC2_SIG_DET_EINT_WIDTH           1  /* AIF1DRC2_SIG_DET_EINT */
+#define WM8994_AIF1DRC1_SIG_DET_EINT            0x0200  /* AIF1DRC1_SIG_DET_EINT */
+#define WM8994_AIF1DRC1_SIG_DET_EINT_MASK       0x0200  /* AIF1DRC1_SIG_DET_EINT */
+#define WM8994_AIF1DRC1_SIG_DET_EINT_SHIFT           9  /* AIF1DRC1_SIG_DET_EINT */
+#define WM8994_AIF1DRC1_SIG_DET_EINT_WIDTH           1  /* AIF1DRC1_SIG_DET_EINT */
+#define WM8994_SRC2_LOCK_EINT                   0x0100  /* SRC2_LOCK_EINT */
+#define WM8994_SRC2_LOCK_EINT_MASK              0x0100  /* SRC2_LOCK_EINT */
+#define WM8994_SRC2_LOCK_EINT_SHIFT                  8  /* SRC2_LOCK_EINT */
+#define WM8994_SRC2_LOCK_EINT_WIDTH                  1  /* SRC2_LOCK_EINT */
+#define WM8994_SRC1_LOCK_EINT                   0x0080  /* SRC1_LOCK_EINT */
+#define WM8994_SRC1_LOCK_EINT_MASK              0x0080  /* SRC1_LOCK_EINT */
+#define WM8994_SRC1_LOCK_EINT_SHIFT                  7  /* SRC1_LOCK_EINT */
+#define WM8994_SRC1_LOCK_EINT_WIDTH                  1  /* SRC1_LOCK_EINT */
+#define WM8994_FLL2_LOCK_EINT                   0x0040  /* FLL2_LOCK_EINT */
+#define WM8994_FLL2_LOCK_EINT_MASK              0x0040  /* FLL2_LOCK_EINT */
+#define WM8994_FLL2_LOCK_EINT_SHIFT                  6  /* FLL2_LOCK_EINT */
+#define WM8994_FLL2_LOCK_EINT_WIDTH                  1  /* FLL2_LOCK_EINT */
+#define WM8994_FLL1_LOCK_EINT                   0x0020  /* FLL1_LOCK_EINT */
+#define WM8994_FLL1_LOCK_EINT_MASK              0x0020  /* FLL1_LOCK_EINT */
+#define WM8994_FLL1_LOCK_EINT_SHIFT                  5  /* FLL1_LOCK_EINT */
+#define WM8994_FLL1_LOCK_EINT_WIDTH                  1  /* FLL1_LOCK_EINT */
+#define WM8994_MIC2_SHRT_EINT                   0x0010  /* MIC2_SHRT_EINT */
+#define WM8994_MIC2_SHRT_EINT_MASK              0x0010  /* MIC2_SHRT_EINT */
+#define WM8994_MIC2_SHRT_EINT_SHIFT                  4  /* MIC2_SHRT_EINT */
+#define WM8994_MIC2_SHRT_EINT_WIDTH                  1  /* MIC2_SHRT_EINT */
+#define WM8994_MIC2_DET_EINT                    0x0008  /* MIC2_DET_EINT */
+#define WM8994_MIC2_DET_EINT_MASK               0x0008  /* MIC2_DET_EINT */
+#define WM8994_MIC2_DET_EINT_SHIFT                   3  /* MIC2_DET_EINT */
+#define WM8994_MIC2_DET_EINT_WIDTH                   1  /* MIC2_DET_EINT */
+#define WM8994_MIC1_SHRT_EINT                   0x0004  /* MIC1_SHRT_EINT */
+#define WM8994_MIC1_SHRT_EINT_MASK              0x0004  /* MIC1_SHRT_EINT */
+#define WM8994_MIC1_SHRT_EINT_SHIFT                  2  /* MIC1_SHRT_EINT */
+#define WM8994_MIC1_SHRT_EINT_WIDTH                  1  /* MIC1_SHRT_EINT */
+#define WM8994_MIC1_DET_EINT                    0x0002  /* MIC1_DET_EINT */
+#define WM8994_MIC1_DET_EINT_MASK               0x0002  /* MIC1_DET_EINT */
+#define WM8994_MIC1_DET_EINT_SHIFT                   1  /* MIC1_DET_EINT */
+#define WM8994_MIC1_DET_EINT_WIDTH                   1  /* MIC1_DET_EINT */
+#define WM8994_TEMP_SHUT_EINT                   0x0001  /* TEMP_SHUT_EINT */
+#define WM8994_TEMP_SHUT_EINT_MASK              0x0001  /* TEMP_SHUT_EINT */
+#define WM8994_TEMP_SHUT_EINT_SHIFT                  0  /* TEMP_SHUT_EINT */
+#define WM8994_TEMP_SHUT_EINT_WIDTH                  1  /* TEMP_SHUT_EINT */
+
+/*
+ * R1842 (0x732) - Interrupt Raw Status 2
+ */
+#define WM8994_TEMP_WARN_STS                    0x8000  /* TEMP_WARN_STS */
+#define WM8994_TEMP_WARN_STS_MASK               0x8000  /* TEMP_WARN_STS */
+#define WM8994_TEMP_WARN_STS_SHIFT                  15  /* TEMP_WARN_STS */
+#define WM8994_TEMP_WARN_STS_WIDTH                   1  /* TEMP_WARN_STS */
+#define WM8994_DCS_DONE_STS                     0x4000  /* DCS_DONE_STS */
+#define WM8994_DCS_DONE_STS_MASK                0x4000  /* DCS_DONE_STS */
+#define WM8994_DCS_DONE_STS_SHIFT                   14  /* DCS_DONE_STS */
+#define WM8994_DCS_DONE_STS_WIDTH                    1  /* DCS_DONE_STS */
+#define WM8994_WSEQ_DONE_STS                    0x2000  /* WSEQ_DONE_STS */
+#define WM8994_WSEQ_DONE_STS_MASK               0x2000  /* WSEQ_DONE_STS */
+#define WM8994_WSEQ_DONE_STS_SHIFT                  13  /* WSEQ_DONE_STS */
+#define WM8994_WSEQ_DONE_STS_WIDTH                   1  /* WSEQ_DONE_STS */
+#define WM8994_FIFOS_ERR_STS                    0x1000  /* FIFOS_ERR_STS */
+#define WM8994_FIFOS_ERR_STS_MASK               0x1000  /* FIFOS_ERR_STS */
+#define WM8994_FIFOS_ERR_STS_SHIFT                  12  /* FIFOS_ERR_STS */
+#define WM8994_FIFOS_ERR_STS_WIDTH                   1  /* FIFOS_ERR_STS */
+#define WM8994_AIF2DRC_SIG_DET_STS              0x0800  /* AIF2DRC_SIG_DET_STS */
+#define WM8994_AIF2DRC_SIG_DET_STS_MASK         0x0800  /* AIF2DRC_SIG_DET_STS */
+#define WM8994_AIF2DRC_SIG_DET_STS_SHIFT            11  /* AIF2DRC_SIG_DET_STS */
+#define WM8994_AIF2DRC_SIG_DET_STS_WIDTH             1  /* AIF2DRC_SIG_DET_STS */
+#define WM8994_AIF1DRC2_SIG_DET_STS             0x0400  /* AIF1DRC2_SIG_DET_STS */
+#define WM8994_AIF1DRC2_SIG_DET_STS_MASK        0x0400  /* AIF1DRC2_SIG_DET_STS */
+#define WM8994_AIF1DRC2_SIG_DET_STS_SHIFT           10  /* AIF1DRC2_SIG_DET_STS */
+#define WM8994_AIF1DRC2_SIG_DET_STS_WIDTH            1  /* AIF1DRC2_SIG_DET_STS */
+#define WM8994_AIF1DRC1_SIG_DET_STS             0x0200  /* AIF1DRC1_SIG_DET_STS */
+#define WM8994_AIF1DRC1_SIG_DET_STS_MASK        0x0200  /* AIF1DRC1_SIG_DET_STS */
+#define WM8994_AIF1DRC1_SIG_DET_STS_SHIFT            9  /* AIF1DRC1_SIG_DET_STS */
+#define WM8994_AIF1DRC1_SIG_DET_STS_WIDTH            1  /* AIF1DRC1_SIG_DET_STS */
+#define WM8994_SRC2_LOCK_STS                    0x0100  /* SRC2_LOCK_STS */
+#define WM8994_SRC2_LOCK_STS_MASK               0x0100  /* SRC2_LOCK_STS */
+#define WM8994_SRC2_LOCK_STS_SHIFT                   8  /* SRC2_LOCK_STS */
+#define WM8994_SRC2_LOCK_STS_WIDTH                   1  /* SRC2_LOCK_STS */
+#define WM8994_SRC1_LOCK_STS                    0x0080  /* SRC1_LOCK_STS */
+#define WM8994_SRC1_LOCK_STS_MASK               0x0080  /* SRC1_LOCK_STS */
+#define WM8994_SRC1_LOCK_STS_SHIFT                   7  /* SRC1_LOCK_STS */
+#define WM8994_SRC1_LOCK_STS_WIDTH                   1  /* SRC1_LOCK_STS */
+#define WM8994_FLL2_LOCK_STS                    0x0040  /* FLL2_LOCK_STS */
+#define WM8994_FLL2_LOCK_STS_MASK               0x0040  /* FLL2_LOCK_STS */
+#define WM8994_FLL2_LOCK_STS_SHIFT                   6  /* FLL2_LOCK_STS */
+#define WM8994_FLL2_LOCK_STS_WIDTH                   1  /* FLL2_LOCK_STS */
+#define WM8994_FLL1_LOCK_STS                    0x0020  /* FLL1_LOCK_STS */
+#define WM8994_FLL1_LOCK_STS_MASK               0x0020  /* FLL1_LOCK_STS */
+#define WM8994_FLL1_LOCK_STS_SHIFT                   5  /* FLL1_LOCK_STS */
+#define WM8994_FLL1_LOCK_STS_WIDTH                   1  /* FLL1_LOCK_STS */
+#define WM8994_MIC2_SHRT_STS                    0x0010  /* MIC2_SHRT_STS */
+#define WM8994_MIC2_SHRT_STS_MASK               0x0010  /* MIC2_SHRT_STS */
+#define WM8994_MIC2_SHRT_STS_SHIFT                   4  /* MIC2_SHRT_STS */
+#define WM8994_MIC2_SHRT_STS_WIDTH                   1  /* MIC2_SHRT_STS */
+#define WM8994_MIC2_DET_STS                     0x0008  /* MIC2_DET_STS */
+#define WM8994_MIC2_DET_STS_MASK                0x0008  /* MIC2_DET_STS */
+#define WM8994_MIC2_DET_STS_SHIFT                    3  /* MIC2_DET_STS */
+#define WM8994_MIC2_DET_STS_WIDTH                    1  /* MIC2_DET_STS */
+#define WM8994_MIC1_SHRT_STS                    0x0004  /* MIC1_SHRT_STS */
+#define WM8994_MIC1_SHRT_STS_MASK               0x0004  /* MIC1_SHRT_STS */
+#define WM8994_MIC1_SHRT_STS_SHIFT                   2  /* MIC1_SHRT_STS */
+#define WM8994_MIC1_SHRT_STS_WIDTH                   1  /* MIC1_SHRT_STS */
+#define WM8994_MIC1_DET_STS                     0x0002  /* MIC1_DET_STS */
+#define WM8994_MIC1_DET_STS_MASK                0x0002  /* MIC1_DET_STS */
+#define WM8994_MIC1_DET_STS_SHIFT                    1  /* MIC1_DET_STS */
+#define WM8994_MIC1_DET_STS_WIDTH                    1  /* MIC1_DET_STS */
+#define WM8994_TEMP_SHUT_STS                    0x0001  /* TEMP_SHUT_STS */
+#define WM8994_TEMP_SHUT_STS_MASK               0x0001  /* TEMP_SHUT_STS */
+#define WM8994_TEMP_SHUT_STS_SHIFT                   0  /* TEMP_SHUT_STS */
+#define WM8994_TEMP_SHUT_STS_WIDTH                   1  /* TEMP_SHUT_STS */
+
+/*
+ * R1848 (0x738) - Interrupt Status 1 Mask
+ */
+#define WM8994_IM_GP11_EINT                     0x0400  /* IM_GP11_EINT */
+#define WM8994_IM_GP11_EINT_MASK                0x0400  /* IM_GP11_EINT */
+#define WM8994_IM_GP11_EINT_SHIFT                   10  /* IM_GP11_EINT */
+#define WM8994_IM_GP11_EINT_WIDTH                    1  /* IM_GP11_EINT */
+#define WM8994_IM_GP10_EINT                     0x0200  /* IM_GP10_EINT */
+#define WM8994_IM_GP10_EINT_MASK                0x0200  /* IM_GP10_EINT */
+#define WM8994_IM_GP10_EINT_SHIFT                    9  /* IM_GP10_EINT */
+#define WM8994_IM_GP10_EINT_WIDTH                    1  /* IM_GP10_EINT */
+#define WM8994_IM_GP9_EINT                      0x0100  /* IM_GP9_EINT */
+#define WM8994_IM_GP9_EINT_MASK                 0x0100  /* IM_GP9_EINT */
+#define WM8994_IM_GP9_EINT_SHIFT                     8  /* IM_GP9_EINT */
+#define WM8994_IM_GP9_EINT_WIDTH                     1  /* IM_GP9_EINT */
+#define WM8994_IM_GP8_EINT                      0x0080  /* IM_GP8_EINT */
+#define WM8994_IM_GP8_EINT_MASK                 0x0080  /* IM_GP8_EINT */
+#define WM8994_IM_GP8_EINT_SHIFT                     7  /* IM_GP8_EINT */
+#define WM8994_IM_GP8_EINT_WIDTH                     1  /* IM_GP8_EINT */
+#define WM8994_IM_GP7_EINT                      0x0040  /* IM_GP7_EINT */
+#define WM8994_IM_GP7_EINT_MASK                 0x0040  /* IM_GP7_EINT */
+#define WM8994_IM_GP7_EINT_SHIFT                     6  /* IM_GP7_EINT */
+#define WM8994_IM_GP7_EINT_WIDTH                     1  /* IM_GP7_EINT */
+#define WM8994_IM_GP6_EINT                      0x0020  /* IM_GP6_EINT */
+#define WM8994_IM_GP6_EINT_MASK                 0x0020  /* IM_GP6_EINT */
+#define WM8994_IM_GP6_EINT_SHIFT                     5  /* IM_GP6_EINT */
+#define WM8994_IM_GP6_EINT_WIDTH                     1  /* IM_GP6_EINT */
+#define WM8994_IM_GP5_EINT                      0x0010  /* IM_GP5_EINT */
+#define WM8994_IM_GP5_EINT_MASK                 0x0010  /* IM_GP5_EINT */
+#define WM8994_IM_GP5_EINT_SHIFT                     4  /* IM_GP5_EINT */
+#define WM8994_IM_GP5_EINT_WIDTH                     1  /* IM_GP5_EINT */
+#define WM8994_IM_GP4_EINT                      0x0008  /* IM_GP4_EINT */
+#define WM8994_IM_GP4_EINT_MASK                 0x0008  /* IM_GP4_EINT */
+#define WM8994_IM_GP4_EINT_SHIFT                     3  /* IM_GP4_EINT */
+#define WM8994_IM_GP4_EINT_WIDTH                     1  /* IM_GP4_EINT */
+#define WM8994_IM_GP3_EINT                      0x0004  /* IM_GP3_EINT */
+#define WM8994_IM_GP3_EINT_MASK                 0x0004  /* IM_GP3_EINT */
+#define WM8994_IM_GP3_EINT_SHIFT                     2  /* IM_GP3_EINT */
+#define WM8994_IM_GP3_EINT_WIDTH                     1  /* IM_GP3_EINT */
+#define WM8994_IM_GP2_EINT                      0x0002  /* IM_GP2_EINT */
+#define WM8994_IM_GP2_EINT_MASK                 0x0002  /* IM_GP2_EINT */
+#define WM8994_IM_GP2_EINT_SHIFT                     1  /* IM_GP2_EINT */
+#define WM8994_IM_GP2_EINT_WIDTH                     1  /* IM_GP2_EINT */
+#define WM8994_IM_GP1_EINT                      0x0001  /* IM_GP1_EINT */
+#define WM8994_IM_GP1_EINT_MASK                 0x0001  /* IM_GP1_EINT */
+#define WM8994_IM_GP1_EINT_SHIFT                     0  /* IM_GP1_EINT */
+#define WM8994_IM_GP1_EINT_WIDTH                     1  /* IM_GP1_EINT */
+
+/*
+ * R1849 (0x739) - Interrupt Status 2 Mask
+ */
+#define WM8994_IM_TEMP_WARN_EINT                0x8000  /* IM_TEMP_WARN_EINT */
+#define WM8994_IM_TEMP_WARN_EINT_MASK           0x8000  /* IM_TEMP_WARN_EINT */
+#define WM8994_IM_TEMP_WARN_EINT_SHIFT              15  /* IM_TEMP_WARN_EINT */
+#define WM8994_IM_TEMP_WARN_EINT_WIDTH               1  /* IM_TEMP_WARN_EINT */
+#define WM8994_IM_DCS_DONE_EINT                 0x4000  /* IM_DCS_DONE_EINT */
+#define WM8994_IM_DCS_DONE_EINT_MASK            0x4000  /* IM_DCS_DONE_EINT */
+#define WM8994_IM_DCS_DONE_EINT_SHIFT               14  /* IM_DCS_DONE_EINT */
+#define WM8994_IM_DCS_DONE_EINT_WIDTH                1  /* IM_DCS_DONE_EINT */
+#define WM8994_IM_WSEQ_DONE_EINT                0x2000  /* IM_WSEQ_DONE_EINT */
+#define WM8994_IM_WSEQ_DONE_EINT_MASK           0x2000  /* IM_WSEQ_DONE_EINT */
+#define WM8994_IM_WSEQ_DONE_EINT_SHIFT              13  /* IM_WSEQ_DONE_EINT */
+#define WM8994_IM_WSEQ_DONE_EINT_WIDTH               1  /* IM_WSEQ_DONE_EINT */
+#define WM8994_IM_FIFOS_ERR_EINT                0x1000  /* IM_FIFOS_ERR_EINT */
+#define WM8994_IM_FIFOS_ERR_EINT_MASK           0x1000  /* IM_FIFOS_ERR_EINT */
+#define WM8994_IM_FIFOS_ERR_EINT_SHIFT              12  /* IM_FIFOS_ERR_EINT */
+#define WM8994_IM_FIFOS_ERR_EINT_WIDTH               1  /* IM_FIFOS_ERR_EINT */
+#define WM8994_IM_AIF2DRC_SIG_DET_EINT          0x0800  /* IM_AIF2DRC_SIG_DET_EINT */
+#define WM8994_IM_AIF2DRC_SIG_DET_EINT_MASK     0x0800  /* IM_AIF2DRC_SIG_DET_EINT */
+#define WM8994_IM_AIF2DRC_SIG_DET_EINT_SHIFT        11  /* IM_AIF2DRC_SIG_DET_EINT */
+#define WM8994_IM_AIF2DRC_SIG_DET_EINT_WIDTH         1  /* IM_AIF2DRC_SIG_DET_EINT */
+#define WM8994_IM_AIF1DRC2_SIG_DET_EINT         0x0400  /* IM_AIF1DRC2_SIG_DET_EINT */
+#define WM8994_IM_AIF1DRC2_SIG_DET_EINT_MASK    0x0400  /* IM_AIF1DRC2_SIG_DET_EINT */
+#define WM8994_IM_AIF1DRC2_SIG_DET_EINT_SHIFT       10  /* IM_AIF1DRC2_SIG_DET_EINT */
+#define WM8994_IM_AIF1DRC2_SIG_DET_EINT_WIDTH        1  /* IM_AIF1DRC2_SIG_DET_EINT */
+#define WM8994_IM_AIF1DRC1_SIG_DET_EINT         0x0200  /* IM_AIF1DRC1_SIG_DET_EINT */
+#define WM8994_IM_AIF1DRC1_SIG_DET_EINT_MASK    0x0200  /* IM_AIF1DRC1_SIG_DET_EINT */
+#define WM8994_IM_AIF1DRC1_SIG_DET_EINT_SHIFT        9  /* IM_AIF1DRC1_SIG_DET_EINT */
+#define WM8994_IM_AIF1DRC1_SIG_DET_EINT_WIDTH        1  /* IM_AIF1DRC1_SIG_DET_EINT */
+#define WM8994_IM_SRC2_LOCK_EINT                0x0100  /* IM_SRC2_LOCK_EINT */
+#define WM8994_IM_SRC2_LOCK_EINT_MASK           0x0100  /* IM_SRC2_LOCK_EINT */
+#define WM8994_IM_SRC2_LOCK_EINT_SHIFT               8  /* IM_SRC2_LOCK_EINT */
+#define WM8994_IM_SRC2_LOCK_EINT_WIDTH               1  /* IM_SRC2_LOCK_EINT */
+#define WM8994_IM_SRC1_LOCK_EINT                0x0080  /* IM_SRC1_LOCK_EINT */
+#define WM8994_IM_SRC1_LOCK_EINT_MASK           0x0080  /* IM_SRC1_LOCK_EINT */
+#define WM8994_IM_SRC1_LOCK_EINT_SHIFT               7  /* IM_SRC1_LOCK_EINT */
+#define WM8994_IM_SRC1_LOCK_EINT_WIDTH               1  /* IM_SRC1_LOCK_EINT */
+#define WM8994_IM_FLL2_LOCK_EINT                0x0040  /* IM_FLL2_LOCK_EINT */
+#define WM8994_IM_FLL2_LOCK_EINT_MASK           0x0040  /* IM_FLL2_LOCK_EINT */
+#define WM8994_IM_FLL2_LOCK_EINT_SHIFT               6  /* IM_FLL2_LOCK_EINT */
+#define WM8994_IM_FLL2_LOCK_EINT_WIDTH               1  /* IM_FLL2_LOCK_EINT */
+#define WM8994_IM_FLL1_LOCK_EINT                0x0020  /* IM_FLL1_LOCK_EINT */
+#define WM8994_IM_FLL1_LOCK_EINT_MASK           0x0020  /* IM_FLL1_LOCK_EINT */
+#define WM8994_IM_FLL1_LOCK_EINT_SHIFT               5  /* IM_FLL1_LOCK_EINT */
+#define WM8994_IM_FLL1_LOCK_EINT_WIDTH               1  /* IM_FLL1_LOCK_EINT */
+#define WM8994_IM_MIC2_SHRT_EINT                0x0010  /* IM_MIC2_SHRT_EINT */
+#define WM8994_IM_MIC2_SHRT_EINT_MASK           0x0010  /* IM_MIC2_SHRT_EINT */
+#define WM8994_IM_MIC2_SHRT_EINT_SHIFT               4  /* IM_MIC2_SHRT_EINT */
+#define WM8994_IM_MIC2_SHRT_EINT_WIDTH               1  /* IM_MIC2_SHRT_EINT */
+#define WM8994_IM_MIC2_DET_EINT                 0x0008  /* IM_MIC2_DET_EINT */
+#define WM8994_IM_MIC2_DET_EINT_MASK            0x0008  /* IM_MIC2_DET_EINT */
+#define WM8994_IM_MIC2_DET_EINT_SHIFT                3  /* IM_MIC2_DET_EINT */
+#define WM8994_IM_MIC2_DET_EINT_WIDTH                1  /* IM_MIC2_DET_EINT */
+#define WM8994_IM_MIC1_SHRT_EINT                0x0004  /* IM_MIC1_SHRT_EINT */
+#define WM8994_IM_MIC1_SHRT_EINT_MASK           0x0004  /* IM_MIC1_SHRT_EINT */
+#define WM8994_IM_MIC1_SHRT_EINT_SHIFT               2  /* IM_MIC1_SHRT_EINT */
+#define WM8994_IM_MIC1_SHRT_EINT_WIDTH               1  /* IM_MIC1_SHRT_EINT */
+#define WM8994_IM_MIC1_DET_EINT                 0x0002  /* IM_MIC1_DET_EINT */
+#define WM8994_IM_MIC1_DET_EINT_MASK            0x0002  /* IM_MIC1_DET_EINT */
+#define WM8994_IM_MIC1_DET_EINT_SHIFT                1  /* IM_MIC1_DET_EINT */
+#define WM8994_IM_MIC1_DET_EINT_WIDTH                1  /* IM_MIC1_DET_EINT */
+#define WM8994_IM_TEMP_SHUT_EINT                0x0001  /* IM_TEMP_SHUT_EINT */
+#define WM8994_IM_TEMP_SHUT_EINT_MASK           0x0001  /* IM_TEMP_SHUT_EINT */
+#define WM8994_IM_TEMP_SHUT_EINT_SHIFT               0  /* IM_TEMP_SHUT_EINT */
+#define WM8994_IM_TEMP_SHUT_EINT_WIDTH               1  /* IM_TEMP_SHUT_EINT */
+
+/*
+ * R1856 (0x740) - Interrupt Control
+ */
+#define WM8994_IM_IRQ                           0x0001  /* IM_IRQ */
+#define WM8994_IM_IRQ_MASK                      0x0001  /* IM_IRQ */
+#define WM8994_IM_IRQ_SHIFT                          0  /* IM_IRQ */
+#define WM8994_IM_IRQ_WIDTH                          1  /* IM_IRQ */
+
+/*
+ * R1864 (0x748) - IRQ Debounce
+ */
+#define WM8994_TEMP_WARN_DB                     0x0020  /* TEMP_WARN_DB */
+#define WM8994_TEMP_WARN_DB_MASK                0x0020  /* TEMP_WARN_DB */
+#define WM8994_TEMP_WARN_DB_SHIFT                    5  /* TEMP_WARN_DB */
+#define WM8994_TEMP_WARN_DB_WIDTH                    1  /* TEMP_WARN_DB */
+#define WM8994_MIC2_SHRT_DB                     0x0010  /* MIC2_SHRT_DB */
+#define WM8994_MIC2_SHRT_DB_MASK                0x0010  /* MIC2_SHRT_DB */
+#define WM8994_MIC2_SHRT_DB_SHIFT                    4  /* MIC2_SHRT_DB */
+#define WM8994_MIC2_SHRT_DB_WIDTH                    1  /* MIC2_SHRT_DB */
+#define WM8994_MIC2_DET_DB                      0x0008  /* MIC2_DET_DB */
+#define WM8994_MIC2_DET_DB_MASK                 0x0008  /* MIC2_DET_DB */
+#define WM8994_MIC2_DET_DB_SHIFT                     3  /* MIC2_DET_DB */
+#define WM8994_MIC2_DET_DB_WIDTH                     1  /* MIC2_DET_DB */
+#define WM8994_MIC1_SHRT_DB                     0x0004  /* MIC1_SHRT_DB */
+#define WM8994_MIC1_SHRT_DB_MASK                0x0004  /* MIC1_SHRT_DB */
+#define WM8994_MIC1_SHRT_DB_SHIFT                    2  /* MIC1_SHRT_DB */
+#define WM8994_MIC1_SHRT_DB_WIDTH                    1  /* MIC1_SHRT_DB */
+#define WM8994_MIC1_DET_DB                      0x0002  /* MIC1_DET_DB */
+#define WM8994_MIC1_DET_DB_MASK                 0x0002  /* MIC1_DET_DB */
+#define WM8994_MIC1_DET_DB_SHIFT                     1  /* MIC1_DET_DB */
+#define WM8994_MIC1_DET_DB_WIDTH                     1  /* MIC1_DET_DB */
+#define WM8994_TEMP_SHUT_DB                     0x0001  /* TEMP_SHUT_DB */
+#define WM8994_TEMP_SHUT_DB_MASK                0x0001  /* TEMP_SHUT_DB */
+#define WM8994_TEMP_SHUT_DB_SHIFT                    0  /* TEMP_SHUT_DB */
+#define WM8994_TEMP_SHUT_DB_WIDTH                    1  /* TEMP_SHUT_DB */
+
+#endif
-- 
cgit v1.2.3


From 9e50108668a70a9927257298bd4e679300124420 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 29 Jan 2010 18:20:29 +0000
Subject: mfd: Add initial WM8994 support

The WM8994 is a highly integrated ultra low power audio hub CODEC.
Since it includes on-board regulators and GPIOs it is represented
as a multi-function device, though the overwhelming majority of
the functionality is provided by the ASoC CODEC driver.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig              |  12 +
 drivers/mfd/Makefile             |   1 +
 drivers/mfd/wm8994-core.c        | 537 +++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm8994/core.h  |  54 ++++
 include/linux/mfd/wm8994/gpio.h  |  72 ++++++
 include/linux/mfd/wm8994/pdata.h |  97 +++++++
 6 files changed, 773 insertions(+)
 create mode 100644 drivers/mfd/wm8994-core.c
 create mode 100644 include/linux/mfd/wm8994/core.h
 create mode 100644 include/linux/mfd/wm8994/gpio.h
 create mode 100644 include/linux/mfd/wm8994/pdata.h

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index a760dbefd27a..64fbe3334eb3 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -295,6 +295,18 @@ config MFD_WM8350_I2C
 	  I2C as the control interface.  Additional options must be
 	  selected to enable support for the functionality of the chip.
 
+config MFD_WM8994
+	tristate "Support Wolfson Microelectronics WM8994"
+	select MFD_CORE
+	depends on I2C
+	help
+	  The WM8994 is a highly integrated hi-fi CODEC designed for
+	  smartphone applicatiosn.  As well as audio functionality it
+	  has on board GPIO and regulator functionality which is
+	  supported via the relevant subsystems.  This driver provides
+	  core support for the WM8994, in order to use the actual
+	  functionaltiy of the device other drivers must be enabled.
+
 config MFD_PCF50633
 	tristate "Support for NXP PCF50633"
 	depends on I2C
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 142d31202b14..878cac691067 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -25,6 +25,7 @@ wm8350-objs			:= wm8350-core.o wm8350-regmap.o wm8350-gpio.o
 wm8350-objs			+= wm8350-irq.o
 obj-$(CONFIG_MFD_WM8350)	+= wm8350.o
 obj-$(CONFIG_MFD_WM8350_I2C)	+= wm8350-i2c.o
+obj-$(CONFIG_MFD_WM8994)	+= wm8994-core.o
 
 obj-$(CONFIG_TPS65010)		+= tps65010.o
 obj-$(CONFIG_MENELAUS)		+= menelaus.o
diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
new file mode 100644
index 000000000000..299c1af1334e
--- /dev/null
+++ b/drivers/mfd/wm8994-core.c
@@ -0,0 +1,537 @@
+/*
+ * wm8994-core.c  --  Device access for Wolfson WM8994
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/delay.h>
+#include <linux/mfd/core.h>
+#include <linux/regulator/consumer.h>
+#include <linux/regulator/machine.h>
+
+#include <linux/mfd/wm8994/core.h>
+#include <linux/mfd/wm8994/pdata.h>
+#include <linux/mfd/wm8994/registers.h>
+
+static int wm8994_read(struct wm8994 *wm8994, unsigned short reg,
+		       int bytes, void *dest)
+{
+	int ret, i;
+	u16 *buf = dest;
+
+	BUG_ON(bytes % 2);
+	BUG_ON(bytes <= 0);
+
+	ret = wm8994->read_dev(wm8994, reg, bytes, dest);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < bytes / 2; i++) {
+		buf[i] = be16_to_cpu(buf[i]);
+
+		dev_vdbg(wm8994->dev, "Read %04x from R%d(0x%x)\n",
+			 buf[i], reg + i, reg + i);
+	}
+
+	return 0;
+}
+
+/**
+ * wm8994_reg_read: Read a single WM8994 register.
+ *
+ * @wm8994: Device to read from.
+ * @reg: Register to read.
+ */
+int wm8994_reg_read(struct wm8994 *wm8994, unsigned short reg)
+{
+	unsigned short val;
+	int ret;
+
+	mutex_lock(&wm8994->io_lock);
+
+	ret = wm8994_read(wm8994, reg, 2, &val);
+
+	mutex_unlock(&wm8994->io_lock);
+
+	if (ret < 0)
+		return ret;
+	else
+		return val;
+}
+EXPORT_SYMBOL_GPL(wm8994_reg_read);
+
+/**
+ * wm8994_bulk_read: Read multiple WM8994 registers
+ *
+ * @wm8994: Device to read from
+ * @reg: First register
+ * @count: Number of registers
+ * @buf: Buffer to fill.
+ */
+int wm8994_bulk_read(struct wm8994 *wm8994, unsigned short reg,
+		     int count, u16 *buf)
+{
+	int ret;
+
+	mutex_lock(&wm8994->io_lock);
+
+	ret = wm8994_read(wm8994, reg, count * 2, buf);
+
+	mutex_unlock(&wm8994->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm8994_bulk_read);
+
+static int wm8994_write(struct wm8994 *wm8994, unsigned short reg,
+			int bytes, void *src)
+{
+	u16 *buf = src;
+	int i;
+
+	BUG_ON(bytes % 2);
+	BUG_ON(bytes <= 0);
+
+	for (i = 0; i < bytes / 2; i++) {
+		dev_vdbg(wm8994->dev, "Write %04x to R%d(0x%x)\n",
+			 buf[i], reg + i, reg + i);
+
+		buf[i] = cpu_to_be16(buf[i]);
+	}
+
+	return wm8994->write_dev(wm8994, reg, bytes, src);
+}
+
+/**
+ * wm8994_reg_write: Write a single WM8994 register.
+ *
+ * @wm8994: Device to write to.
+ * @reg: Register to write to.
+ * @val: Value to write.
+ */
+int wm8994_reg_write(struct wm8994 *wm8994, unsigned short reg,
+		     unsigned short val)
+{
+	int ret;
+
+	mutex_lock(&wm8994->io_lock);
+
+	ret = wm8994_write(wm8994, reg, 2, &val);
+
+	mutex_unlock(&wm8994->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm8994_reg_write);
+
+/**
+ * wm8994_set_bits: Set the value of a bitfield in a WM8994 register
+ *
+ * @wm8994: Device to write to.
+ * @reg: Register to write to.
+ * @mask: Mask of bits to set.
+ * @val: Value to set (unshifted)
+ */
+int wm8994_set_bits(struct wm8994 *wm8994, unsigned short reg,
+		    unsigned short mask, unsigned short val)
+{
+	int ret;
+	u16 r;
+
+	mutex_lock(&wm8994->io_lock);
+
+	ret = wm8994_read(wm8994, reg, 2, &r);
+	if (ret < 0)
+		goto out;
+
+	r &= ~mask;
+	r |= val;
+
+	ret = wm8994_write(wm8994, reg, 2, &r);
+
+out:
+	mutex_unlock(&wm8994->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm8994_set_bits);
+
+static struct mfd_cell wm8994_regulator_devs[] = {
+	{ .name = "wm8994-ldo", .id = 1 },
+	{ .name = "wm8994-ldo", .id = 2 },
+};
+
+static struct mfd_cell wm8994_devs[] = {
+	{ .name = "wm8994-codec" },
+	{ .name = "wm8994-gpio" },
+};
+
+/*
+ * Supplies for the main bulk of CODEC; the LDO supplies are ignored
+ * and should be handled via the standard regulator API supply
+ * management.
+ */
+static const char *wm8994_main_supplies[] = {
+	"DBVDD",
+	"DCVDD",
+	"AVDD1",
+	"AVDD2",
+	"CPVDD",
+	"SPKVDD1",
+	"SPKVDD2",
+};
+
+#ifdef CONFIG_PM
+static int wm8994_device_suspend(struct device *dev)
+{
+	struct wm8994 *wm8994 = dev_get_drvdata(dev);
+	int ret;
+
+	/* GPIO configuration state is saved here since we may be configuring
+	 * the GPIO alternate functions even if we're not using the gpiolib
+	 * driver for them.
+	 */
+	ret = wm8994_read(wm8994, WM8994_GPIO_1, WM8994_NUM_GPIO_REGS * 2,
+			  &wm8994->gpio_regs);
+	if (ret < 0)
+		dev_err(dev, "Failed to save GPIO registers: %d\n", ret);
+
+	/* For similar reasons we also stash the regulator states */
+	ret = wm8994_read(wm8994, WM8994_LDO_1, WM8994_NUM_LDO_REGS * 2,
+			  &wm8994->ldo_regs);
+	if (ret < 0)
+		dev_err(dev, "Failed to save LDO registers: %d\n", ret);
+
+	ret = regulator_bulk_disable(ARRAY_SIZE(wm8994_main_supplies),
+				     wm8994->supplies);
+	if (ret != 0) {
+		dev_err(dev, "Failed to disable supplies: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int wm8994_device_resume(struct device *dev)
+{
+	struct wm8994 *wm8994 = dev_get_drvdata(dev);
+	int ret;
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(wm8994_main_supplies),
+				    wm8994->supplies);
+	if (ret != 0) {
+		dev_err(dev, "Failed to enable supplies: %d\n", ret);
+		return ret;
+	}
+
+	ret = wm8994_write(wm8994, WM8994_LDO_1, WM8994_NUM_LDO_REGS * 2,
+			   &wm8994->ldo_regs);
+	if (ret < 0)
+		dev_err(dev, "Failed to restore LDO registers: %d\n", ret);
+
+	ret = wm8994_write(wm8994, WM8994_GPIO_1, WM8994_NUM_GPIO_REGS * 2,
+			   &wm8994->gpio_regs);
+	if (ret < 0)
+		dev_err(dev, "Failed to restore GPIO registers: %d\n", ret);
+
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_REGULATOR
+static int wm8994_ldo_in_use(struct wm8994_pdata *pdata, int ldo)
+{
+	struct wm8994_ldo_pdata *ldo_pdata;
+
+	if (!pdata)
+		return 0;
+
+	ldo_pdata = &pdata->ldo[ldo];
+
+	if (!ldo_pdata->init_data)
+		return 0;
+
+	return ldo_pdata->init_data->num_consumer_supplies != 0;
+}
+#else
+static int wm8994_ldo_in_use(struct wm8994_pdata *pdata, int ldo)
+{
+	return 0;
+}
+#endif
+
+/*
+ * Instantiate the generic non-control parts of the device.
+ */
+static int wm8994_device_init(struct wm8994 *wm8994, unsigned long id, int irq)
+{
+	struct wm8994_pdata *pdata = wm8994->dev->platform_data;
+	int ret, i;
+
+	mutex_init(&wm8994->io_lock);
+	dev_set_drvdata(wm8994->dev, wm8994);
+
+	/* Add the on-chip regulators first for bootstrapping */
+	ret = mfd_add_devices(wm8994->dev, -1,
+			      wm8994_regulator_devs,
+			      ARRAY_SIZE(wm8994_regulator_devs),
+			      NULL, 0);
+	if (ret != 0) {
+		dev_err(wm8994->dev, "Failed to add children: %d\n", ret);
+		goto err;
+	}
+
+	wm8994->supplies = kzalloc(sizeof(struct regulator_bulk_data) *
+				   ARRAY_SIZE(wm8994_main_supplies),
+				   GFP_KERNEL);
+	if (!wm8994->supplies)
+		goto err;
+
+	for (i = 0; i < ARRAY_SIZE(wm8994_main_supplies); i++)
+		wm8994->supplies[i].supply = wm8994_main_supplies[i];
+
+	ret = regulator_bulk_get(wm8994->dev, ARRAY_SIZE(wm8994_main_supplies),
+				 wm8994->supplies);
+	if (ret != 0) {
+		dev_err(wm8994->dev, "Failed to get supplies: %d\n", ret);
+		goto err_get;
+	}
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(wm8994_main_supplies),
+				    wm8994->supplies);
+	if (ret != 0) {
+		dev_err(wm8994->dev, "Failed to enable supplies: %d\n", ret);
+		goto err_supplies;
+	}
+
+	ret = wm8994_reg_read(wm8994, WM8994_SOFTWARE_RESET);
+	if (ret < 0) {
+		dev_err(wm8994->dev, "Failed to read ID register\n");
+		goto err_enable;
+	}
+	if (ret != 0x8994) {
+		dev_err(wm8994->dev, "Device is not a WM8994, ID is %x\n",
+			ret);
+		ret = -EINVAL;
+		goto err_enable;
+	}
+
+	ret = wm8994_reg_read(wm8994, WM8994_CHIP_REVISION);
+	if (ret < 0) {
+		dev_err(wm8994->dev, "Failed to read revision register: %d\n",
+			ret);
+		goto err_enable;
+	}
+
+	switch (ret) {
+	case 0:
+	case 1:
+		dev_warn(wm8994->dev, "revision %c not fully supported\n",
+			'A' + ret);
+		break;
+	default:
+		dev_info(wm8994->dev, "revision %c\n", 'A' + ret);
+		break;
+	}
+
+
+	if (pdata) {
+		wm8994->gpio_base = pdata->gpio_base;
+
+		/* GPIO configuration is only applied if it's non-zero */
+		for (i = 0; i < ARRAY_SIZE(pdata->gpio_defaults); i++) {
+			if (pdata->gpio_defaults[i]) {
+				wm8994_set_bits(wm8994, WM8994_GPIO_1 + i,
+						0xffff,
+						pdata->gpio_defaults[i]);
+			}
+		}
+	}
+
+	/* In some system designs where the regulators are not in use,
+	 * we can achieve a small reduction in leakage currents by
+	 * floating LDO outputs.  This bit makes no difference if the
+	 * LDOs are enabled, it only affects cases where the LDOs were
+	 * in operation and are then disabled.
+	 */
+	for (i = 0; i < WM8994_NUM_LDO_REGS; i++) {
+		if (wm8994_ldo_in_use(pdata, i))
+			wm8994_set_bits(wm8994, WM8994_LDO_1 + i,
+					WM8994_LDO1_DISCH, WM8994_LDO1_DISCH);
+		else
+			wm8994_set_bits(wm8994, WM8994_LDO_1 + i,
+					WM8994_LDO1_DISCH, 0);
+	}
+
+	ret = mfd_add_devices(wm8994->dev, -1,
+			      wm8994_devs, ARRAY_SIZE(wm8994_devs),
+			      NULL, 0);
+	if (ret != 0) {
+		dev_err(wm8994->dev, "Failed to add children: %d\n", ret);
+		goto err_enable;
+	}
+
+	return 0;
+
+err_enable:
+	regulator_bulk_disable(ARRAY_SIZE(wm8994_main_supplies),
+			       wm8994->supplies);
+err_get:
+	regulator_bulk_free(ARRAY_SIZE(wm8994_main_supplies), wm8994->supplies);
+err_supplies:
+	kfree(wm8994->supplies);
+err:
+	mfd_remove_devices(wm8994->dev);
+	kfree(wm8994);
+	return ret;
+}
+
+static void wm8994_device_exit(struct wm8994 *wm8994)
+{
+	mfd_remove_devices(wm8994->dev);
+	regulator_bulk_disable(ARRAY_SIZE(wm8994_main_supplies),
+			       wm8994->supplies);
+	regulator_bulk_free(ARRAY_SIZE(wm8994_main_supplies), wm8994->supplies);
+	kfree(wm8994->supplies);
+	kfree(wm8994);
+}
+
+static int wm8994_i2c_read_device(struct wm8994 *wm8994, unsigned short reg,
+				  int bytes, void *dest)
+{
+	struct i2c_client *i2c = wm8994->control_data;
+	int ret;
+	u16 r = cpu_to_be16(reg);
+
+	ret = i2c_master_send(i2c, (unsigned char *)&r, 2);
+	if (ret < 0)
+		return ret;
+	if (ret != 2)
+		return -EIO;
+
+	ret = i2c_master_recv(i2c, dest, bytes);
+	if (ret < 0)
+		return ret;
+	if (ret != bytes)
+		return -EIO;
+	return 0;
+}
+
+/* Currently we allocate the write buffer on the stack; this is OK for
+ * small writes - if we need to do large writes this will need to be
+ * revised.
+ */
+static int wm8994_i2c_write_device(struct wm8994 *wm8994, unsigned short reg,
+				   int bytes, void *src)
+{
+	struct i2c_client *i2c = wm8994->control_data;
+	unsigned char msg[bytes + 2];
+	int ret;
+
+	reg = cpu_to_be16(reg);
+	memcpy(&msg[0], &reg, 2);
+	memcpy(&msg[2], src, bytes);
+
+	ret = i2c_master_send(i2c, msg, bytes + 2);
+	if (ret < 0)
+		return ret;
+	if (ret < bytes + 2)
+		return -EIO;
+
+	return 0;
+}
+
+static int wm8994_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct wm8994 *wm8994;
+
+	wm8994 = kzalloc(sizeof(struct wm8994), GFP_KERNEL);
+	if (wm8994 == NULL) {
+		kfree(i2c);
+		return -ENOMEM;
+	}
+
+	i2c_set_clientdata(i2c, wm8994);
+	wm8994->dev = &i2c->dev;
+	wm8994->control_data = i2c;
+	wm8994->read_dev = wm8994_i2c_read_device;
+	wm8994->write_dev = wm8994_i2c_write_device;
+
+	return wm8994_device_init(wm8994, id->driver_data, i2c->irq);
+}
+
+static int wm8994_i2c_remove(struct i2c_client *i2c)
+{
+	struct wm8994 *wm8994 = i2c_get_clientdata(i2c);
+
+	wm8994_device_exit(wm8994);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int wm8994_i2c_suspend(struct i2c_client *i2c, pm_message_t state)
+{
+	return wm8994_device_suspend(&i2c->dev);
+}
+
+static int wm8994_i2c_resume(struct i2c_client *i2c)
+{
+	return wm8994_device_resume(&i2c->dev);
+}
+#else
+#define wm8994_i2c_suspend NULL
+#define wm8994_i2c_resume NULL
+#endif
+
+static const struct i2c_device_id wm8994_i2c_id[] = {
+	{ "wm8994", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, wm8994_i2c_id);
+
+static struct i2c_driver wm8994_i2c_driver = {
+	.driver = {
+		   .name = "wm8994",
+		   .owner = THIS_MODULE,
+	},
+	.probe = wm8994_i2c_probe,
+	.remove = wm8994_i2c_remove,
+	.suspend = wm8994_i2c_suspend,
+	.resume = wm8994_i2c_resume,
+	.id_table = wm8994_i2c_id,
+};
+
+static int __init wm8994_i2c_init(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&wm8994_i2c_driver);
+	if (ret != 0)
+		pr_err("Failed to register wm8994 I2C driver: %d\n", ret);
+
+	return ret;
+}
+module_init(wm8994_i2c_init);
+
+static void __exit wm8994_i2c_exit(void)
+{
+	i2c_del_driver(&wm8994_i2c_driver);
+}
+module_exit(wm8994_i2c_exit);
+
+MODULE_DESCRIPTION("Core support for the WM8994 audio CODEC");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
new file mode 100644
index 000000000000..b06ff2846748
--- /dev/null
+++ b/include/linux/mfd/wm8994/core.h
@@ -0,0 +1,54 @@
+/*
+ * include/linux/mfd/wm8994/core.h -- Core interface for WM8994
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM8994_CORE_H__
+#define __MFD_WM8994_CORE_H__
+
+struct regulator_dev;
+struct regulator_bulk_data;
+
+#define WM8994_NUM_GPIO_REGS 11
+#define WM8994_NUM_LDO_REGS 2
+
+struct wm8994 {
+	struct mutex io_lock;
+
+	struct device *dev;
+	int (*read_dev)(struct wm8994 *wm8994, unsigned short reg,
+			int bytes, void *dest);
+	int (*write_dev)(struct wm8994 *wm8994, unsigned short reg,
+			 int bytes, void *src);
+
+	void *control_data;
+
+	int gpio_base;
+
+	/* Used over suspend/resume */
+	u16 ldo_regs[WM8994_NUM_LDO_REGS];
+	u16 gpio_regs[WM8994_NUM_GPIO_REGS];
+
+	struct regulator_dev *dbvdd;
+	struct regulator_bulk_data *supplies;
+};
+
+/* Device I/O API */
+int wm8994_reg_read(struct wm8994 *wm8994, unsigned short reg);
+int wm8994_reg_write(struct wm8994 *wm8994, unsigned short reg,
+		 unsigned short val);
+int wm8994_set_bits(struct wm8994 *wm8994, unsigned short reg,
+		    unsigned short mask, unsigned short val);
+int wm8994_bulk_read(struct wm8994 *wm8994, unsigned short reg,
+		     int count, u16 *buf);
+
+#endif
diff --git a/include/linux/mfd/wm8994/gpio.h b/include/linux/mfd/wm8994/gpio.h
new file mode 100644
index 000000000000..b4d4c22991e8
--- /dev/null
+++ b/include/linux/mfd/wm8994/gpio.h
@@ -0,0 +1,72 @@
+/*
+ * include/linux/mfd/wm8994/gpio.h - GPIO configuration for WM8994
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM8994_GPIO_H__
+#define __MFD_WM8994_GPIO_H__
+
+#define WM8994_GPIO_MAX 11
+
+#define WM8994_GP_FN_PIN_SPECIFIC    0
+#define WM8994_GP_FN_GPIO            1
+#define WM8994_GP_FN_SDOUT           2
+#define WM8994_GP_FN_IRQ             3
+#define WM8994_GP_FN_TEMPERATURE     4
+#define WM8994_GP_FN_MICBIAS1_DET    5
+#define WM8994_GP_FN_MICBIAS1_SHORT  6
+#define WM8994_GP_FN_MICBIAS2_DET    7
+#define WM8994_GP_FN_MICBIAS2_SHORT  8
+#define WM8994_GP_FN_FLL1_LOCK       9
+#define WM8994_GP_FN_FLL2_LOCK      10
+#define WM8994_GP_FN_SRC1_LOCK      11
+#define WM8994_GP_FN_SRC2_LOCK      12
+#define WM8994_GP_FN_DRC1_ACT       13
+#define WM8994_GP_FN_DRC2_ACT       14
+#define WM8994_GP_FN_DRC3_ACT       15
+#define WM8994_GP_FN_WSEQ_STATUS    16
+#define WM8994_GP_FN_FIFO_ERROR     17
+#define WM8994_GP_FN_OPCLK          18
+
+#define WM8994_GPN_DIR                          0x8000  /* GPN_DIR */
+#define WM8994_GPN_DIR_MASK                     0x8000  /* GPN_DIR */
+#define WM8994_GPN_DIR_SHIFT                        15  /* GPN_DIR */
+#define WM8994_GPN_DIR_WIDTH                         1  /* GPN_DIR */
+#define WM8994_GPN_PU                           0x4000  /* GPN_PU */
+#define WM8994_GPN_PU_MASK                      0x4000  /* GPN_PU */
+#define WM8994_GPN_PU_SHIFT                         14  /* GPN_PU */
+#define WM8994_GPN_PU_WIDTH                          1  /* GPN_PU */
+#define WM8994_GPN_PD                           0x2000  /* GPN_PD */
+#define WM8994_GPN_PD_MASK                      0x2000  /* GPN_PD */
+#define WM8994_GPN_PD_SHIFT                         13  /* GPN_PD */
+#define WM8994_GPN_PD_WIDTH                          1  /* GPN_PD */
+#define WM8994_GPN_POL                          0x0400  /* GPN_POL */
+#define WM8994_GPN_POL_MASK                     0x0400  /* GPN_POL */
+#define WM8994_GPN_POL_SHIFT                        10  /* GPN_POL */
+#define WM8994_GPN_POL_WIDTH                         1  /* GPN_POL */
+#define WM8994_GPN_OP_CFG                       0x0200  /* GPN_OP_CFG */
+#define WM8994_GPN_OP_CFG_MASK                  0x0200  /* GPN_OP_CFG */
+#define WM8994_GPN_OP_CFG_SHIFT                      9  /* GPN_OP_CFG */
+#define WM8994_GPN_OP_CFG_WIDTH                      1  /* GPN_OP_CFG */
+#define WM8994_GPN_DB                           0x0100  /* GPN_DB */
+#define WM8994_GPN_DB_MASK                      0x0100  /* GPN_DB */
+#define WM8994_GPN_DB_SHIFT                          8  /* GPN_DB */
+#define WM8994_GPN_DB_WIDTH                          1  /* GPN_DB */
+#define WM8994_GPN_LVL                          0x0040  /* GPN_LVL */
+#define WM8994_GPN_LVL_MASK                     0x0040  /* GPN_LVL */
+#define WM8994_GPN_LVL_SHIFT                         6  /* GPN_LVL */
+#define WM8994_GPN_LVL_WIDTH                         1  /* GPN_LVL */
+#define WM8994_GPN_FN_MASK                      0x001F  /* GPN_FN - [4:0] */
+#define WM8994_GPN_FN_SHIFT                          0  /* GPN_FN - [4:0] */
+#define WM8994_GPN_FN_WIDTH                          5  /* GPN_FN - [4:0] */
+
+#endif
diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
new file mode 100644
index 000000000000..70d6a8687dc5
--- /dev/null
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -0,0 +1,97 @@
+/*
+ * include/linux/mfd/wm8994/pdata.h -- Platform data for WM8994
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM8994_PDATA_H__
+#define __MFD_WM8994_PDATA_H__
+
+#define WM8994_NUM_LDO   2
+#define WM8994_NUM_GPIO 11
+
+struct wm8994_ldo_pdata {
+	/** GPIOs to enable regulator, 0 or less if not available */
+	int enable;
+
+	const char *supply;
+	struct regulator_init_data *init_data;
+};
+
+#define WM8994_CONFIGURE_GPIO 0x8000
+
+#define WM8994_DRC_REGS 5
+#define WM8994_EQ_REGS  19
+
+/**
+ * DRC configurations are specified with a label and a set of register
+ * values to write (the enable bits will be ignored).  At runtime an
+ * enumerated control will be presented for each DRC block allowing
+ * the user to choose the configration to use.
+ *
+ * Configurations may be generated by hand or by using the DRC control
+ * panel provided by the WISCE - see  http://www.wolfsonmicro.com/wisce/
+ * for details.
+ */
+struct wm8994_drc_cfg {
+        const char *name;
+        u16 regs[WM8994_DRC_REGS];
+};
+
+/**
+ * ReTune Mobile configurations are specified with a label, sample
+ * rate and set of values to write (the enable bits will be ignored).
+ *
+ * Configurations are expected to be generated using the ReTune Mobile
+ * control panel in WISCE - see http://www.wolfsonmicro.com/wisce/
+ */
+struct wm8994_retune_mobile_cfg {
+        const char *name;
+        unsigned int rate;
+        u16 regs[WM8994_EQ_REGS];
+};
+
+struct wm8994_pdata {
+	int gpio_base;
+
+	/**
+	 * Default values for GPIOs if non-zero, WM8994_CONFIGURE_GPIO
+	 * can be used for all zero values.
+	 */
+	int gpio_defaults[WM8994_NUM_GPIO];
+
+	struct wm8994_ldo_pdata ldo[WM8994_NUM_LDO];
+
+
+        int num_drc_cfgs;
+        struct wm8994_drc_cfg *drc_cfgs;
+
+        int num_retune_mobile_cfgs;
+        struct wm8994_retune_mobile_cfg *retune_mobile_cfgs;
+
+        /* LINEOUT can be differential or single ended */
+        unsigned int lineout1_diff:1;
+        unsigned int lineout2_diff:1;
+
+        /* Common mode feedback */
+        unsigned int lineout1fb:1;
+        unsigned int lineout2fb:1;
+
+        /* Microphone biases: 0=0.9*AVDD1 1=0.65*AVVD1 */
+        unsigned int micbias1_lvl:1;
+        unsigned int micbias2_lvl:1;
+
+        /* Jack detect threashold levels, see datasheet for values */
+        unsigned int jd_scthr:2;
+        unsigned int jd_thr:2;
+};
+
+#endif
-- 
cgit v1.2.3


From 1f1cf8f98cf6588365efeaab8e7e7758aaa77f6e Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Fri, 5 Feb 2010 16:07:54 +0100
Subject: mfd: Update irq handler in max8925

Update thread irq handler. Simply the interface of using thread irq.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig         |   4 +-
 drivers/mfd/max8925-core.c  | 678 ++++++++++++++++++++++++++++++--------------
 include/linux/mfd/max8925.h | 139 ++++++---
 3 files changed, 564 insertions(+), 257 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 64fbe3334eb3..84a68d260772 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -204,8 +204,8 @@ config PMIC_ADP5520
 	  under the corresponding menus.
 
 config MFD_MAX8925
-	tristate "Maxim Semiconductor MAX8925 PMIC Support"
-	depends on I2C
+	bool "Maxim Semiconductor MAX8925 PMIC Support"
+	depends on I2C=y
 	select MFD_CORE
 	help
 	  Say yes here to support for Maxim Semiconductor MAX8925. This is
diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c
index f36c494b80f1..85d63c04749b 100644
--- a/drivers/mfd/max8925-core.c
+++ b/drivers/mfd/max8925-core.c
@@ -1,7 +1,7 @@
 /*
  * Base driver for Maxim MAX8925
  *
- * Copyright (C) 2009 Marvell International Ltd.
+ * Copyright (C) 2009-2010 Marvell International Ltd.
  *	Haojian Zhuang <haojian.zhuang@marvell.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -12,14 +12,12 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/i2c.h>
+#include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/max8925.h>
 
-#define IRQ_MODE_STATUS		0
-#define IRQ_MODE_MASK		1
-
 static struct resource backlight_resources[] = {
 	{
 		.name	= "max8925-backlight",
@@ -56,6 +54,42 @@ static struct mfd_cell touch_devs[] = {
 	},
 };
 
+static struct resource power_supply_resources[] = {
+	{
+		.name	= "max8925-power",
+		.start	= MAX8925_CHG_IRQ1,
+		.end	= MAX8925_CHG_IRQ1_MASK,
+		.flags	= IORESOURCE_IO,
+	},
+};
+
+static struct mfd_cell power_devs[] = {
+	{
+		.name		= "max8925-power",
+		.num_resources	= 1,
+		.resources	= &power_supply_resources[0],
+		.id		= -1,
+	},
+};
+
+static struct resource rtc_resources[] = {
+	{
+		.name	= "max8925-rtc",
+		.start	= MAX8925_RTC_IRQ,
+		.end	= MAX8925_RTC_IRQ_MASK,
+		.flags	= IORESOURCE_IO,
+	},
+};
+
+static struct mfd_cell rtc_devs[] = {
+	{
+		.name		= "max8925-rtc",
+		.num_resources	= 1,
+		.resources	= &rtc_resources[0],
+		.id		= -1,
+	},
+};
+
 #define MAX8925_REG_RESOURCE(_start, _end)	\
 {						\
 	.start	= MAX8925_##_start,		\
@@ -123,203 +157,450 @@ static struct mfd_cell regulator_devs[] = {
 	MAX8925_REG_DEVS(LDO20),
 };
 
-static int __get_irq_offset(struct max8925_chip *chip, int irq, int mode,
-			    int *offset, int *bit)
+enum {
+	FLAGS_ADC = 1,	/* register in ADC component */
+	FLAGS_RTC,	/* register in RTC component */
+};
+
+struct max8925_irq_data {
+	int	reg;
+	int	mask_reg;
+	int	enable;		/* enable or not */
+	int	offs;		/* bit offset in mask register */
+	int	flags;
+	int	tsc_irq;
+};
+
+static struct max8925_irq_data max8925_irqs[] = {
+	[MAX8925_IRQ_VCHG_DC_OVP] = {
+		.reg		= MAX8925_CHG_IRQ1,
+		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
+		.offs		= 1 << 0,
+	},
+	[MAX8925_IRQ_VCHG_DC_F] = {
+		.reg		= MAX8925_CHG_IRQ1,
+		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
+		.offs		= 1 << 1,
+	},
+	[MAX8925_IRQ_VCHG_DC_R] = {
+		.reg		= MAX8925_CHG_IRQ1,
+		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
+		.offs		= 1 << 2,
+	},
+	[MAX8925_IRQ_VCHG_USB_OVP] = {
+		.reg		= MAX8925_CHG_IRQ1,
+		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
+		.offs		= 1 << 3,
+	},
+	[MAX8925_IRQ_VCHG_USB_F] =  {
+		.reg		= MAX8925_CHG_IRQ1,
+		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
+		.offs		= 1 << 4,
+	},
+	[MAX8925_IRQ_VCHG_USB_R] = {
+		.reg		= MAX8925_CHG_IRQ1,
+		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
+		.offs		= 1 << 5,
+	},
+	[MAX8925_IRQ_VCHG_THM_OK_R] = {
+		.reg		= MAX8925_CHG_IRQ2,
+		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
+		.offs		= 1 << 0,
+	},
+	[MAX8925_IRQ_VCHG_THM_OK_F] = {
+		.reg		= MAX8925_CHG_IRQ2,
+		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
+		.offs		= 1 << 1,
+	},
+	[MAX8925_IRQ_VCHG_SYSLOW_F] = {
+		.reg		= MAX8925_CHG_IRQ2,
+		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
+		.offs		= 1 << 2,
+	},
+	[MAX8925_IRQ_VCHG_SYSLOW_R] = {
+		.reg		= MAX8925_CHG_IRQ2,
+		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
+		.offs		= 1 << 3,
+	},
+	[MAX8925_IRQ_VCHG_RST] = {
+		.reg		= MAX8925_CHG_IRQ2,
+		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
+		.offs		= 1 << 4,
+	},
+	[MAX8925_IRQ_VCHG_DONE] = {
+		.reg		= MAX8925_CHG_IRQ2,
+		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
+		.offs		= 1 << 5,
+	},
+	[MAX8925_IRQ_VCHG_TOPOFF] = {
+		.reg		= MAX8925_CHG_IRQ2,
+		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
+		.offs		= 1 << 6,
+	},
+	[MAX8925_IRQ_VCHG_TMR_FAULT] = {
+		.reg		= MAX8925_CHG_IRQ2,
+		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
+		.offs		= 1 << 7,
+	},
+	[MAX8925_IRQ_GPM_RSTIN] = {
+		.reg		= MAX8925_ON_OFF_IRQ1,
+		.mask_reg	= MAX8925_ON_OFF_IRQ1_MASK,
+		.offs		= 1 << 0,
+	},
+	[MAX8925_IRQ_GPM_MPL] = {
+		.reg		= MAX8925_ON_OFF_IRQ1,
+		.mask_reg	= MAX8925_ON_OFF_IRQ1_MASK,
+		.offs		= 1 << 1,
+	},
+	[MAX8925_IRQ_GPM_SW_3SEC] = {
+		.reg		= MAX8925_ON_OFF_IRQ1,
+		.mask_reg	= MAX8925_ON_OFF_IRQ1_MASK,
+		.offs		= 1 << 2,
+	},
+	[MAX8925_IRQ_GPM_EXTON_F] = {
+		.reg		= MAX8925_ON_OFF_IRQ1,
+		.mask_reg	= MAX8925_ON_OFF_IRQ1_MASK,
+		.offs		= 1 << 3,
+	},
+	[MAX8925_IRQ_GPM_EXTON_R] = {
+		.reg		= MAX8925_ON_OFF_IRQ1,
+		.mask_reg	= MAX8925_ON_OFF_IRQ1_MASK,
+		.offs		= 1 << 4,
+	},
+	[MAX8925_IRQ_GPM_SW_1SEC] = {
+		.reg		= MAX8925_ON_OFF_IRQ1,
+		.mask_reg	= MAX8925_ON_OFF_IRQ1_MASK,
+		.offs		= 1 << 5,
+	},
+	[MAX8925_IRQ_GPM_SW_F] = {
+		.reg		= MAX8925_ON_OFF_IRQ1,
+		.mask_reg	= MAX8925_ON_OFF_IRQ1_MASK,
+		.offs		= 1 << 6,
+	},
+	[MAX8925_IRQ_GPM_SW_R] = {
+		.reg		= MAX8925_ON_OFF_IRQ1,
+		.mask_reg	= MAX8925_ON_OFF_IRQ1_MASK,
+		.offs		= 1 << 7,
+	},
+	[MAX8925_IRQ_GPM_SYSCKEN_F] = {
+		.reg		= MAX8925_ON_OFF_IRQ2,
+		.mask_reg	= MAX8925_ON_OFF_IRQ2_MASK,
+		.offs		= 1 << 0,
+	},
+	[MAX8925_IRQ_GPM_SYSCKEN_R] = {
+		.reg		= MAX8925_ON_OFF_IRQ2,
+		.mask_reg	= MAX8925_ON_OFF_IRQ2_MASK,
+		.offs		= 1 << 1,
+	},
+	[MAX8925_IRQ_RTC_ALARM1] = {
+		.reg		= MAX8925_RTC_IRQ,
+		.mask_reg	= MAX8925_RTC_IRQ_MASK,
+		.offs		= 1 << 2,
+		.flags		= FLAGS_RTC,
+	},
+	[MAX8925_IRQ_RTC_ALARM0] = {
+		.reg		= MAX8925_RTC_IRQ,
+		.mask_reg	= MAX8925_RTC_IRQ_MASK,
+		.offs		= 1 << 3,
+		.flags		= FLAGS_RTC,
+	},
+	[MAX8925_IRQ_TSC_STICK] = {
+		.reg		= MAX8925_TSC_IRQ,
+		.mask_reg	= MAX8925_TSC_IRQ_MASK,
+		.offs		= 1 << 0,
+		.flags		= FLAGS_ADC,
+		.tsc_irq	= 1,
+	},
+	[MAX8925_IRQ_TSC_NSTICK] = {
+		.reg		= MAX8925_TSC_IRQ,
+		.mask_reg	= MAX8925_TSC_IRQ_MASK,
+		.offs		= 1 << 1,
+		.flags		= FLAGS_ADC,
+		.tsc_irq	= 1,
+	},
+};
+
+static inline struct max8925_irq_data *irq_to_max8925(struct max8925_chip *chip,
+						      int irq)
 {
-	if (!offset || !bit)
-		return -EINVAL;
+	return &max8925_irqs[irq - chip->irq_base];
+}
 
-	switch (chip->chip_id) {
-	case MAX8925_GPM:
-		*bit = irq % BITS_PER_BYTE;
-		if (irq < (BITS_PER_BYTE << 1)) {	/* irq = [0,15] */
-			*offset = (mode) ? MAX8925_CHG_IRQ1_MASK
-				: MAX8925_CHG_IRQ1;
-			if (irq >= BITS_PER_BYTE)
-				(*offset)++;
-		} else {				/* irq = [16,31] */
-			*offset = (mode) ? MAX8925_ON_OFF_IRQ1_MASK
-				: MAX8925_ON_OFF_IRQ1;
-			if (irq >= (BITS_PER_BYTE * 3))
-				(*offset)++;
+static irqreturn_t max8925_irq(int irq, void *data)
+{
+	struct max8925_chip *chip = data;
+	struct max8925_irq_data *irq_data;
+	struct i2c_client *i2c;
+	int read_reg = -1, value = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(max8925_irqs); i++) {
+		irq_data = &max8925_irqs[i];
+		/* TSC IRQ should be serviced in max8925_tsc_irq() */
+		if (irq_data->tsc_irq)
+			continue;
+		if (irq_data->flags == FLAGS_RTC)
+			i2c = chip->rtc;
+		else if (irq_data->flags == FLAGS_ADC)
+			i2c = chip->adc;
+		else
+			i2c = chip->i2c;
+		if (read_reg != irq_data->reg) {
+			read_reg = irq_data->reg;
+			value = max8925_reg_read(i2c, irq_data->reg);
 		}
-		break;
-	case MAX8925_ADC:
-		*bit = irq % BITS_PER_BYTE;
-		*offset = (mode) ? MAX8925_TSC_IRQ_MASK : MAX8925_TSC_IRQ;
-		break;
-	default:
-		goto out;
+		if (value & irq_data->enable)
+			handle_nested_irq(chip->irq_base + i);
 	}
-	return 0;
-out:
-	dev_err(chip->dev, "Wrong irq #%d is assigned\n", irq);
-	return -EINVAL;
+	return IRQ_HANDLED;
 }
 
-static int __check_irq(int irq)
+static irqreturn_t max8925_tsc_irq(int irq, void *data)
 {
-	if ((irq < 0) || (irq >= MAX8925_NUM_IRQ))
-		return -EINVAL;
-	return 0;
+	struct max8925_chip *chip = data;
+	struct max8925_irq_data *irq_data;
+	struct i2c_client *i2c;
+	int read_reg = -1, value = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(max8925_irqs); i++) {
+		irq_data = &max8925_irqs[i];
+		/* non TSC IRQ should be serviced in max8925_irq() */
+		if (!irq_data->tsc_irq)
+			continue;
+		if (irq_data->flags == FLAGS_RTC)
+			i2c = chip->rtc;
+		else if (irq_data->flags == FLAGS_ADC)
+			i2c = chip->adc;
+		else
+			i2c = chip->i2c;
+		if (read_reg != irq_data->reg) {
+			read_reg = irq_data->reg;
+			value = max8925_reg_read(i2c, irq_data->reg);
+		}
+		if (value & irq_data->enable)
+			handle_nested_irq(chip->irq_base + i);
+	}
+	return IRQ_HANDLED;
 }
 
-int max8925_mask_irq(struct max8925_chip *chip, int irq)
+static void max8925_irq_lock(unsigned int irq)
 {
-	int offset, bit, ret;
+	struct max8925_chip *chip = get_irq_chip_data(irq);
 
-	ret = __get_irq_offset(chip, irq, IRQ_MODE_MASK, &offset, &bit);
-	if (ret < 0)
-		return ret;
-	ret = max8925_set_bits(chip->i2c, offset, 1 << bit, 1 << bit);
-	return ret;
+	mutex_lock(&chip->irq_lock);
 }
 
-int max8925_unmask_irq(struct max8925_chip *chip, int irq)
+static void max8925_irq_sync_unlock(unsigned int irq)
 {
-	int offset, bit, ret;
+	struct max8925_chip *chip = get_irq_chip_data(irq);
+	struct max8925_irq_data *irq_data;
+	static unsigned char cache_chg[2] = {0xff, 0xff};
+	static unsigned char cache_on[2] = {0xff, 0xff};
+	static unsigned char cache_rtc = 0xff, cache_tsc = 0xff;
+	unsigned char irq_chg[2], irq_on[2];
+	unsigned char irq_rtc, irq_tsc;
+	int i;
+
+	/* Load cached value. In initial, all IRQs are masked */
+	irq_chg[0] = cache_chg[0];
+	irq_chg[1] = cache_chg[1];
+	irq_on[0] = cache_on[0];
+	irq_on[1] = cache_on[1];
+	irq_rtc = cache_rtc;
+	irq_tsc = cache_tsc;
+	for (i = 0; i < ARRAY_SIZE(max8925_irqs); i++) {
+		irq_data = &max8925_irqs[i];
+		switch (irq_data->mask_reg) {
+		case MAX8925_CHG_IRQ1_MASK:
+			irq_chg[0] &= irq_data->enable;
+			break;
+		case MAX8925_CHG_IRQ2_MASK:
+			irq_chg[1] &= irq_data->enable;
+			break;
+		case MAX8925_ON_OFF_IRQ1_MASK:
+			irq_on[0] &= irq_data->enable;
+			break;
+		case MAX8925_ON_OFF_IRQ2_MASK:
+			irq_on[1] &= irq_data->enable;
+			break;
+		case MAX8925_RTC_IRQ_MASK:
+			irq_rtc &= irq_data->enable;
+			break;
+		case MAX8925_TSC_IRQ_MASK:
+			irq_tsc &= irq_data->enable;
+			break;
+		default:
+			dev_err(chip->dev, "wrong IRQ\n");
+			break;
+		}
+	}
+	/* update mask into registers */
+	if (cache_chg[0] != irq_chg[0]) {
+		cache_chg[0] = irq_chg[0];
+		max8925_reg_write(chip->i2c, MAX8925_CHG_IRQ1_MASK,
+			irq_chg[0]);
+	}
+	if (cache_chg[1] != irq_chg[1]) {
+		cache_chg[1] = irq_chg[1];
+		max8925_reg_write(chip->i2c, MAX8925_CHG_IRQ2_MASK,
+			irq_chg[1]);
+	}
+	if (cache_on[0] != irq_on[0]) {
+		cache_on[0] = irq_on[0];
+		max8925_reg_write(chip->i2c, MAX8925_ON_OFF_IRQ1_MASK,
+				irq_on[0]);
+	}
+	if (cache_on[1] != irq_on[1]) {
+		cache_on[1] = irq_on[1];
+		max8925_reg_write(chip->i2c, MAX8925_ON_OFF_IRQ2_MASK,
+				irq_on[1]);
+	}
+	if (cache_rtc != irq_rtc) {
+		cache_rtc = irq_rtc;
+		max8925_reg_write(chip->rtc, MAX8925_RTC_IRQ_MASK, irq_rtc);
+	}
+	if (cache_tsc != irq_tsc) {
+		cache_tsc = irq_tsc;
+		max8925_reg_write(chip->adc, MAX8925_TSC_IRQ_MASK, irq_tsc);
+	}
 
-	ret = __get_irq_offset(chip, irq, IRQ_MODE_MASK, &offset, &bit);
-	if (ret < 0)
-		return ret;
-	ret = max8925_set_bits(chip->i2c, offset, 1 << bit, 0);
-	return ret;
+	mutex_unlock(&chip->irq_lock);
 }
 
-#define INT_STATUS_NUM		(MAX8925_NUM_IRQ / BITS_PER_BYTE)
-
-static irqreturn_t max8925_irq_thread(int irq, void *data)
+static void max8925_irq_enable(unsigned int irq)
 {
-	struct max8925_chip *chip = data;
-	unsigned long irq_status[INT_STATUS_NUM];
-	unsigned char status_buf[INT_STATUS_NUM << 1];
-	int i, ret;
-
-	memset(irq_status, 0, sizeof(unsigned long) * INT_STATUS_NUM);
-
-	/* all these interrupt status registers are read-only */
-	switch (chip->chip_id) {
-	case MAX8925_GPM:
-		ret = max8925_bulk_read(chip->i2c, MAX8925_CHG_IRQ1,
-					4, status_buf);
-		if (ret < 0)
-			goto out;
-		ret = max8925_bulk_read(chip->i2c, MAX8925_ON_OFF_IRQ1,
-					2, &status_buf[4]);
-		if (ret < 0)
-			goto out;
-		ret = max8925_bulk_read(chip->i2c, MAX8925_ON_OFF_IRQ2,
-					2, &status_buf[6]);
-		if (ret < 0)
-			goto out;
-		/* clear masked interrupt status */
-		status_buf[0] &= (~status_buf[2] & CHG_IRQ1_MASK);
-		irq_status[0] |= status_buf[0];
-		status_buf[1] &= (~status_buf[3] & CHG_IRQ2_MASK);
-		irq_status[0] |= (status_buf[1] << BITS_PER_BYTE);
-		status_buf[4] &= (~status_buf[5] & ON_OFF_IRQ1_MASK);
-		irq_status[0] |= (status_buf[4] << (BITS_PER_BYTE * 2));
-		status_buf[6] &= (~status_buf[7] & ON_OFF_IRQ2_MASK);
-		irq_status[0] |= (status_buf[6] << (BITS_PER_BYTE * 3));
-		break;
-	case MAX8925_ADC:
-		ret = max8925_bulk_read(chip->i2c, MAX8925_TSC_IRQ,
-					2, status_buf);
-		if (ret < 0)
-			goto out;
-		/* clear masked interrupt status */
-		status_buf[0] &= (~status_buf[1] & TSC_IRQ_MASK);
-		irq_status[0] |= status_buf[0];
-		break;
-	default:
-		goto out;
-	}
-
-	for_each_bit(i, &irq_status[0], MAX8925_NUM_IRQ) {
-		clear_bit(i, irq_status);
-		dev_dbg(chip->dev, "Servicing IRQ #%d in %s\n", i, chip->name);
-
-		mutex_lock(&chip->irq_lock);
-		if (chip->irq[i].handler)
-			chip->irq[i].handler(i, chip->irq[i].data);
-		else {
-			max8925_mask_irq(chip, i);
-			dev_err(chip->dev, "Noboday cares IRQ #%d in %s. "
-				"Now mask it.\n", i, chip->name);
-		}
-		mutex_unlock(&chip->irq_lock);
-	}
-out:
-	return IRQ_HANDLED;
+	struct max8925_chip *chip = get_irq_chip_data(irq);
+	max8925_irqs[irq - chip->irq_base].enable
+		= max8925_irqs[irq - chip->irq_base].offs;
 }
 
-int max8925_request_irq(struct max8925_chip *chip, int irq,
-			irq_handler_t handler, void *data)
+static void max8925_irq_disable(unsigned int irq)
 {
-	if ((__check_irq(irq) < 0) || !handler)
-		return -EINVAL;
-
-	mutex_lock(&chip->irq_lock);
-	chip->irq[irq].handler = handler;
-	chip->irq[irq].data = data;
-	mutex_unlock(&chip->irq_lock);
-	return 0;
+	struct max8925_chip *chip = get_irq_chip_data(irq);
+	max8925_irqs[irq - chip->irq_base].enable = 0;
 }
-EXPORT_SYMBOL(max8925_request_irq);
 
-int max8925_free_irq(struct max8925_chip *chip, int irq)
+static struct irq_chip max8925_irq_chip = {
+	.name		= "max8925",
+	.bus_lock	= max8925_irq_lock,
+	.bus_sync_unlock = max8925_irq_sync_unlock,
+	.enable		= max8925_irq_enable,
+	.disable	= max8925_irq_disable,
+};
+
+static int max8925_irq_init(struct max8925_chip *chip, int irq,
+			    struct max8925_platform_data *pdata)
 {
-	if (__check_irq(irq) < 0)
+	unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT;
+	struct irq_desc *desc;
+	int i, ret;
+	int __irq;
+
+	if (!pdata || !pdata->irq_base) {
+		dev_warn(chip->dev, "No interrupt support on IRQ base\n");
 		return -EINVAL;
+	}
+	/* clear all interrupts */
+	max8925_reg_read(chip->i2c, MAX8925_CHG_IRQ1);
+	max8925_reg_read(chip->i2c, MAX8925_CHG_IRQ2);
+	max8925_reg_read(chip->i2c, MAX8925_ON_OFF_IRQ1);
+	max8925_reg_read(chip->i2c, MAX8925_ON_OFF_IRQ2);
+	max8925_reg_read(chip->rtc, MAX8925_RTC_IRQ);
+	max8925_reg_read(chip->adc, MAX8925_TSC_IRQ);
+	/* mask all interrupts */
+	max8925_reg_write(chip->rtc, MAX8925_ALARM0_CNTL, 0);
+	max8925_reg_write(chip->rtc, MAX8925_ALARM1_CNTL, 0);
+	max8925_reg_write(chip->i2c, MAX8925_CHG_IRQ1_MASK, 0xff);
+	max8925_reg_write(chip->i2c, MAX8925_CHG_IRQ2_MASK, 0xff);
+	max8925_reg_write(chip->i2c, MAX8925_ON_OFF_IRQ1_MASK, 0xff);
+	max8925_reg_write(chip->i2c, MAX8925_ON_OFF_IRQ2_MASK, 0xff);
+	max8925_reg_write(chip->rtc, MAX8925_RTC_IRQ_MASK, 0xff);
+	max8925_reg_write(chip->adc, MAX8925_TSC_IRQ_MASK, 0xff);
+
+	mutex_init(&chip->irq_lock);
+	chip->core_irq = irq;
+	chip->irq_base = pdata->irq_base;
+	desc = irq_to_desc(chip->core_irq);
+
+	/* register with genirq */
+	for (i = 0; i < ARRAY_SIZE(max8925_irqs); i++) {
+		__irq = i + chip->irq_base;
+		set_irq_chip_data(__irq, chip);
+		set_irq_chip_and_handler(__irq, &max8925_irq_chip,
+					 handle_edge_irq);
+		set_irq_nested_thread(__irq, 1);
+#ifdef CONFIG_ARM
+		set_irq_flags(__irq, IRQF_VALID);
+#else
+		set_irq_noprobe(__irq);
+#endif
+	}
+	if (!irq) {
+		dev_warn(chip->dev, "No interrupt support on core IRQ\n");
+		goto tsc_irq;
+	}
 
-	mutex_lock(&chip->irq_lock);
-	chip->irq[irq].handler = NULL;
-	chip->irq[irq].data = NULL;
-	mutex_unlock(&chip->irq_lock);
+	ret = request_threaded_irq(irq, NULL, max8925_irq, flags,
+				   "max8925", chip);
+	if (ret) {
+		dev_err(chip->dev, "Failed to request core IRQ: %d\n", ret);
+		chip->core_irq = 0;
+	}
+tsc_irq:
+	if (!pdata->tsc_irq) {
+		dev_warn(chip->dev, "No interrupt support on TSC IRQ\n");
+		return 0;
+	}
+	chip->tsc_irq = pdata->tsc_irq;
+
+	ret = request_threaded_irq(chip->tsc_irq, NULL, max8925_tsc_irq,
+				   flags, "max8925-tsc", chip);
+	if (ret) {
+		dev_err(chip->dev, "Failed to request TSC IRQ: %d\n", ret);
+		chip->tsc_irq = 0;
+	}
 	return 0;
 }
-EXPORT_SYMBOL(max8925_free_irq);
 
-static int __devinit device_gpm_init(struct max8925_chip *chip,
-				      struct i2c_client *i2c,
-				      struct max8925_platform_data *pdata)
+int __devinit max8925_device_init(struct max8925_chip *chip,
+				  struct max8925_platform_data *pdata)
 {
 	int ret;
 
-	/* mask all IRQs */
-	ret = max8925_set_bits(i2c, MAX8925_CHG_IRQ1_MASK, 0x7, 0x7);
-	if (ret < 0)
-		goto out;
-	ret = max8925_set_bits(i2c, MAX8925_CHG_IRQ2_MASK, 0xff, 0xff);
-	if (ret < 0)
-		goto out;
-	ret = max8925_set_bits(i2c, MAX8925_ON_OFF_IRQ1_MASK, 0xff, 0xff);
-	if (ret < 0)
-		goto out;
-	ret = max8925_set_bits(i2c, MAX8925_ON_OFF_IRQ2_MASK, 0x3, 0x3);
-	if (ret < 0)
-		goto out;
-
-	chip->name = "GPM";
-	memset(chip->irq, 0, sizeof(struct max8925_irq) * MAX8925_NUM_IRQ);
-	ret = request_threaded_irq(i2c->irq, NULL, max8925_irq_thread,
-				IRQF_ONESHOT | IRQF_TRIGGER_LOW,
-				"max8925-gpm", chip);
-	if (ret < 0) {
-		dev_err(chip->dev, "Failed to request IRQ #%d.\n", i2c->irq);
-		goto out;
+	max8925_irq_init(chip, chip->i2c->irq, pdata);
+
+	if (pdata && (pdata->power || pdata->touch)) {
+		/* enable ADC to control internal reference */
+		max8925_set_bits(chip->i2c, MAX8925_RESET_CNFG, 1, 1);
+		/* enable internal reference for ADC */
+		max8925_set_bits(chip->adc, MAX8925_TSC_CNFG1, 3, 2);
+		/* check for internal reference IRQ */
+		do {
+			ret = max8925_reg_read(chip->adc, MAX8925_TSC_IRQ);
+		} while (ret & MAX8925_NREF_OK);
+		/* enaable ADC scheduler, interval is 1 second */
+		max8925_set_bits(chip->adc, MAX8925_ADC_SCHED, 3, 2);
 	}
-	chip->chip_irq = i2c->irq;
 
-	/* enable hard-reset for ONKEY power-off */
-	max8925_set_bits(i2c, MAX8925_SYSENSEL, 0x80, 0x80);
+	/* enable Momentary Power Loss */
+	max8925_set_bits(chip->rtc, MAX8925_MPL_CNTL, 1 << 4, 1 << 4);
 
-	ret = mfd_add_devices(chip->dev, 0, &regulator_devs[0],
-			      ARRAY_SIZE(regulator_devs),
-			      &regulator_resources[0], 0);
+	ret = mfd_add_devices(chip->dev, 0, &rtc_devs[0],
+			      ARRAY_SIZE(rtc_devs),
+			      &rtc_resources[0], 0);
 	if (ret < 0) {
-		dev_err(chip->dev, "Failed to add regulator subdev\n");
-		goto out_irq;
+		dev_err(chip->dev, "Failed to add rtc subdev\n");
+		goto out;
+	}
+	if (pdata && pdata->regulator[0]) {
+		ret = mfd_add_devices(chip->dev, 0, &regulator_devs[0],
+				      ARRAY_SIZE(regulator_devs),
+				      &regulator_resources[0], 0);
+		if (ret < 0) {
+			dev_err(chip->dev, "Failed to add regulator subdev\n");
+			goto out_dev;
+		}
 	}
 
 	if (pdata && pdata->backlight) {
@@ -331,35 +612,17 @@ static int __devinit device_gpm_init(struct max8925_chip *chip,
 			goto out_dev;
 		}
 	}
-	return 0;
-out_dev:
-	mfd_remove_devices(chip->dev);
-out_irq:
-	if (chip->chip_irq)
-		free_irq(chip->chip_irq, chip);
-out:
-	return ret;
-}
-
-static int __devinit device_adc_init(struct max8925_chip *chip,
-				     struct i2c_client *i2c,
-				     struct max8925_platform_data *pdata)
-{
-	int ret;
-
-	/* mask all IRQs */
-	ret = max8925_set_bits(i2c, MAX8925_TSC_IRQ_MASK, 3, 3);
 
-	chip->name = "ADC";
-	memset(chip->irq, 0, sizeof(struct max8925_irq) * MAX8925_NUM_IRQ);
-	ret = request_threaded_irq(i2c->irq, NULL, max8925_irq_thread,
-				IRQF_ONESHOT | IRQF_TRIGGER_LOW,
-				"max8925-adc", chip);
-	if (ret < 0) {
-		dev_err(chip->dev, "Failed to request IRQ #%d.\n", i2c->irq);
-		goto out;
+	if (pdata && pdata->power) {
+		ret = mfd_add_devices(chip->dev, 0, &power_devs[0],
+					ARRAY_SIZE(power_devs),
+					&power_supply_resources[0], 0);
+		if (ret < 0) {
+			dev_err(chip->dev, "Failed to add power supply "
+				"subdev\n");
+			goto out_dev;
+		}
 	}
-	chip->chip_irq = i2c->irq;
 
 	if (pdata && pdata->touch) {
 		ret = mfd_add_devices(chip->dev, 0, &touch_devs[0],
@@ -367,38 +630,27 @@ static int __devinit device_adc_init(struct max8925_chip *chip,
 				      &touch_resources[0], 0);
 		if (ret < 0) {
 			dev_err(chip->dev, "Failed to add touch subdev\n");
-			goto out_irq;
+			goto out_dev;
 		}
 	}
+
 	return 0;
-out_irq:
-	if (chip->chip_irq)
-		free_irq(chip->chip_irq, chip);
+out_dev:
+	mfd_remove_devices(chip->dev);
 out:
 	return ret;
 }
 
-int __devinit max8925_device_init(struct max8925_chip *chip,
-				  struct max8925_platform_data *pdata)
+void __devexit max8925_device_exit(struct max8925_chip *chip)
 {
-	switch (chip->chip_id) {
-	case MAX8925_GPM:
-		device_gpm_init(chip, chip->i2c, pdata);
-		break;
-	case MAX8925_ADC:
-		device_adc_init(chip, chip->i2c, pdata);
-		break;
-	}
-	return 0;
-}
-
-void max8925_device_exit(struct max8925_chip *chip)
-{
-	if (chip->chip_irq >= 0)
-		free_irq(chip->chip_irq, chip);
+	if (chip->core_irq)
+		free_irq(chip->core_irq, chip);
+	if (chip->tsc_irq)
+		free_irq(chip->tsc_irq, chip);
 	mfd_remove_devices(chip->dev);
 }
 
+
 MODULE_DESCRIPTION("PMIC Driver for Maxim MAX8925");
 MODULE_AUTHOR("Haojian Zhuang <haojian.zhuang@marvell.com");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index b72dbe174d51..18c1844ed472 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h
@@ -12,6 +12,7 @@
 #ifndef __LINUX_MFD_MAX8925_H
 #define __LINUX_MFD_MAX8925_H
 
+#include <linux/mutex.h>
 #include <linux/interrupt.h>
 
 /* Unified sub device IDs for MAX8925 */
@@ -39,6 +40,30 @@ enum {
 	MAX8925_ID_LDO18,
 	MAX8925_ID_LDO19,
 	MAX8925_ID_LDO20,
+	MAX8925_ID_MAX,
+};
+
+enum {
+	/*
+	 * Charging current threshold trigger going from fast charge
+	 * to TOPOFF charge. From 5% to 20% of fasting charging current.
+	 */
+	MAX8925_TOPOFF_THR_5PER,
+	MAX8925_TOPOFF_THR_10PER,
+	MAX8925_TOPOFF_THR_15PER,
+	MAX8925_TOPOFF_THR_20PER,
+};
+
+enum {
+	/* Fast charging current */
+	MAX8925_FCHG_85MA,
+	MAX8925_FCHG_300MA,
+	MAX8925_FCHG_460MA,
+	MAX8925_FCHG_600MA,
+	MAX8925_FCHG_700MA,
+	MAX8925_FCHG_800MA,
+	MAX8925_FCHG_900MA,
+	MAX8925_FCHG_1000MA,
 };
 
 /* Charger registers */
@@ -46,12 +71,13 @@ enum {
 #define MAX8925_CHG_IRQ2		(0x7f)
 #define MAX8925_CHG_IRQ1_MASK		(0x80)
 #define MAX8925_CHG_IRQ2_MASK		(0x81)
+#define MAX8925_CHG_STATUS		(0x82)
 
 /* GPM registers */
 #define MAX8925_SYSENSEL		(0x00)
 #define MAX8925_ON_OFF_IRQ1		(0x01)
 #define MAX8925_ON_OFF_IRQ1_MASK	(0x02)
-#define MAX8925_ON_OFF_STAT		(0x03)
+#define MAX8925_ON_OFF_STATUS		(0x03)
 #define MAX8925_ON_OFF_IRQ2		(0x0d)
 #define MAX8925_ON_OFF_IRQ2_MASK	(0x0e)
 #define MAX8925_RESET_CNFG		(0x0f)
@@ -59,12 +85,18 @@ enum {
 /* Touch registers */
 #define MAX8925_TSC_IRQ			(0x00)
 #define MAX8925_TSC_IRQ_MASK		(0x01)
+#define MAX8925_TSC_CNFG1		(0x02)
+#define MAX8925_ADC_SCHED		(0x10)
 #define MAX8925_ADC_RES_END		(0x6f)
 
+#define MAX8925_NREF_OK			(1 << 4)
+
 /* RTC registers */
-#define MAX8925_RTC_STATUS		(0x1a)
+#define MAX8925_ALARM0_CNTL		(0x18)
+#define MAX8925_ALARM1_CNTL		(0x19)
 #define MAX8925_RTC_IRQ			(0x1c)
 #define MAX8925_RTC_IRQ_MASK		(0x1d)
+#define MAX8925_MPL_CNTL		(0x1e)
 
 /* WLED registers */
 #define MAX8925_WLED_MODE_CNTL		(0x84)
@@ -126,45 +158,48 @@ enum {
 #define TSC_IRQ_MASK			(0x03)
 #define RTC_IRQ_MASK			(0x0c)
 
-#define MAX8925_NUM_IRQ			(32)
+#define MAX8925_GPM_NUM_IRQ		(40)
+#define MAX8925_ADC_NUM_IRQ		(8)
+#define MAX8925_NUM_IRQ			(MAX8925_GPM_NUM_IRQ	\
+					+ MAX8925_ADC_NUM_IRQ)
+
+#define MAX8925_MAX_REGULATOR		(23)
 
 #define MAX8925_NAME_SIZE		(32)
 
+/* IRQ definitions */
 enum {
-	MAX8925_INVALID = 0,
-	MAX8925_RTC,
-	MAX8925_ADC,
-	MAX8925_GPM,	/* general power management */
-	MAX8925_MAX,
+	MAX8925_IRQ_VCHG_DC_OVP,
+	MAX8925_IRQ_VCHG_DC_F,
+	MAX8925_IRQ_VCHG_DC_R,
+	MAX8925_IRQ_VCHG_USB_OVP,
+	MAX8925_IRQ_VCHG_USB_F,
+	MAX8925_IRQ_VCHG_USB_R,
+	MAX8925_IRQ_VCHG_THM_OK_R,
+	MAX8925_IRQ_VCHG_THM_OK_F,
+	MAX8925_IRQ_VCHG_SYSLOW_F,
+	MAX8925_IRQ_VCHG_SYSLOW_R,
+	MAX8925_IRQ_VCHG_RST,
+	MAX8925_IRQ_VCHG_DONE,
+	MAX8925_IRQ_VCHG_TOPOFF,
+	MAX8925_IRQ_VCHG_TMR_FAULT,
+	MAX8925_IRQ_GPM_RSTIN,
+	MAX8925_IRQ_GPM_MPL,
+	MAX8925_IRQ_GPM_SW_3SEC,
+	MAX8925_IRQ_GPM_EXTON_F,
+	MAX8925_IRQ_GPM_EXTON_R,
+	MAX8925_IRQ_GPM_SW_1SEC,
+	MAX8925_IRQ_GPM_SW_F,
+	MAX8925_IRQ_GPM_SW_R,
+	MAX8925_IRQ_GPM_SYSCKEN_F,
+	MAX8925_IRQ_GPM_SYSCKEN_R,
+	MAX8925_IRQ_RTC_ALARM1,
+	MAX8925_IRQ_RTC_ALARM0,
+	MAX8925_IRQ_TSC_STICK,
+	MAX8925_IRQ_TSC_NSTICK,
+	MAX8925_NR_IRQS,
 };
 
-#define MAX8925_IRQ_VCHG_OVP		(0)
-#define MAX8925_IRQ_VCHG_F		(1)
-#define MAX8925_IRQ_VCHG_R		(2)
-#define MAX8925_IRQ_VCHG_THM_OK_R	(8)
-#define MAX8925_IRQ_VCHG_THM_OK_F	(9)
-#define MAX8925_IRQ_VCHG_BATTLOW_F	(10)
-#define MAX8925_IRQ_VCHG_BATTLOW_R	(11)
-#define MAX8925_IRQ_VCHG_RST		(12)
-#define MAX8925_IRQ_VCHG_DONE		(13)
-#define MAX8925_IRQ_VCHG_TOPOFF		(14)
-#define MAX8925_IRQ_VCHG_TMR_FAULT	(15)
-#define MAX8925_IRQ_GPM_RSTIN		(16)
-#define MAX8925_IRQ_GPM_MPL		(17)
-#define MAX8925_IRQ_GPM_SW_3SEC		(18)
-#define MAX8925_IRQ_GPM_EXTON_F		(19)
-#define MAX8925_IRQ_GPM_EXTON_R		(20)
-#define MAX8925_IRQ_GPM_SW_1SEC		(21)
-#define MAX8925_IRQ_GPM_SW_F		(22)
-#define MAX8925_IRQ_GPM_SW_R		(23)
-#define MAX8925_IRQ_GPM_SYSCKEN_F	(24)
-#define MAX8925_IRQ_GPM_SYSCKEN_R	(25)
-
-#define MAX8925_IRQ_TSC_STICK		(0)
-#define MAX8925_IRQ_TSC_NSTICK		(1)
-
-#define MAX8925_MAX_REGULATOR		(23)
-
 struct max8925_irq {
 	irq_handler_t		handler;
 	void			*data;
@@ -172,14 +207,16 @@ struct max8925_irq {
 
 struct max8925_chip {
 	struct device		*dev;
+	struct i2c_client	*i2c;
+	struct i2c_client	*adc;
+	struct i2c_client	*rtc;
+	struct max8925_irq	irqs[MAX8925_NUM_IRQ];
 	struct mutex		io_lock;
 	struct mutex		irq_lock;
-	struct i2c_client	*i2c;
-	struct max8925_irq	irq[MAX8925_NUM_IRQ];
 
-	const char		*name;
-	int			chip_id;
-	int			chip_irq;
+	int			irq_base;
+	int			core_irq;
+	int			tsc_irq;
 };
 
 struct max8925_backlight_pdata {
@@ -192,13 +229,25 @@ struct max8925_touch_pdata {
 	unsigned int		flags;
 };
 
+struct max8925_power_pdata {
+	int		(*set_charger)(int);
+	unsigned	batt_detect:1;
+	unsigned	topoff_threshold:2;
+	unsigned	fast_charge:3;	/* charge current */
+};
+
+/*
+ * irq_base: stores IRQ base number of MAX8925 in platform
+ * tsc_irq: stores IRQ number of MAX8925 TSC
+ */
 struct max8925_platform_data {
 	struct max8925_backlight_pdata	*backlight;
 	struct max8925_touch_pdata	*touch;
+	struct max8925_power_pdata	*power;
 	struct regulator_init_data	*regulator[MAX8925_MAX_REGULATOR];
 
-	int	chip_id;
-	int	chip_irq;
+	int		irq_base;
+	int		tsc_irq;
 };
 
 extern int max8925_reg_read(struct i2c_client *, int);
@@ -208,6 +257,12 @@ extern int max8925_bulk_write(struct i2c_client *, int, int, unsigned char *);
 extern int max8925_set_bits(struct i2c_client *, int, unsigned char,
 			unsigned char);
 
+extern int max8925_request_irq(struct max8925_chip *, int,
+			irq_handler_t, void *);
+extern int max8925_free_irq(struct max8925_chip *, int);
+extern int max8925_mask_irq(struct max8925_chip *, int);
+extern int max8925_unmask_irq(struct max8925_chip *, int);
+
 extern int max8925_device_init(struct max8925_chip *,
 				struct max8925_platform_data *);
 extern void max8925_device_exit(struct max8925_chip *);
-- 
cgit v1.2.3


From 34c9120805ff4b3f7a8053bd64157ba564774433 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Wed, 3 Feb 2010 15:37:23 -0500
Subject: mfd: Clean code in max8925

Remove unused definitions.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/max8925.h | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index 18c1844ed472..5259dfe8c585 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h
@@ -158,11 +158,6 @@ enum {
 #define TSC_IRQ_MASK			(0x03)
 #define RTC_IRQ_MASK			(0x0c)
 
-#define MAX8925_GPM_NUM_IRQ		(40)
-#define MAX8925_ADC_NUM_IRQ		(8)
-#define MAX8925_NUM_IRQ			(MAX8925_GPM_NUM_IRQ	\
-					+ MAX8925_ADC_NUM_IRQ)
-
 #define MAX8925_MAX_REGULATOR		(23)
 
 #define MAX8925_NAME_SIZE		(32)
@@ -200,17 +195,11 @@ enum {
 	MAX8925_NR_IRQS,
 };
 
-struct max8925_irq {
-	irq_handler_t		handler;
-	void			*data;
-};
-
 struct max8925_chip {
 	struct device		*dev;
 	struct i2c_client	*i2c;
 	struct i2c_client	*adc;
 	struct i2c_client	*rtc;
-	struct max8925_irq	irqs[MAX8925_NUM_IRQ];
 	struct mutex		io_lock;
 	struct mutex		irq_lock;
 
@@ -257,12 +246,6 @@ extern int max8925_bulk_write(struct i2c_client *, int, int, unsigned char *);
 extern int max8925_set_bits(struct i2c_client *, int, unsigned char,
 			unsigned char);
 
-extern int max8925_request_irq(struct max8925_chip *, int,
-			irq_handler_t, void *);
-extern int max8925_free_irq(struct max8925_chip *, int);
-extern int max8925_mask_irq(struct max8925_chip *, int);
-extern int max8925_unmask_irq(struct max8925_chip *, int);
-
 extern int max8925_device_init(struct max8925_chip *,
 				struct max8925_platform_data *);
 extern void max8925_device_exit(struct max8925_chip *);
-- 
cgit v1.2.3


From 2afa62ea76027b00e472ddb672191e6e15425b43 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Mon, 8 Feb 2010 05:02:00 -0500
Subject: mfd: Use genirq in 88pm860x

Use genirq to simplify IRQ handling in 88pm860x. Remove the interface of
mask/free IRQs on 88pm860x. All these work is taken by genirq. Update the
touchscreen driver of 88pm860x since IRQ handling is changed.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/input/touchscreen/88pm860x-ts.c |  15 +-
 drivers/mfd/88pm860x-core.c             | 408 +++++++++++++++++++++++---------
 include/linux/mfd/88pm860x.h            |  30 +--
 3 files changed, 311 insertions(+), 142 deletions(-)

(limited to 'include')

diff --git a/drivers/input/touchscreen/88pm860x-ts.c b/drivers/input/touchscreen/88pm860x-ts.c
index 56254d2a1f6e..286bb490a9f2 100644
--- a/drivers/input/touchscreen/88pm860x-ts.c
+++ b/drivers/input/touchscreen/88pm860x-ts.c
@@ -54,7 +54,6 @@ static irqreturn_t pm860x_touch_handler(int irq, void *data)
 	int z1, z2, rt = 0;
 	int ret;
 
-	pm860x_mask_irq(chip, irq);
 	ret = pm860x_bulk_read(touch->i2c, MEAS_TSIX_1, MEAS_LEN, buf);
 	if (ret < 0)
 		goto out;
@@ -83,7 +82,6 @@ static irqreturn_t pm860x_touch_handler(int irq, void *data)
 		dev_dbg(chip->dev, "pen release\n");
 	}
 	input_sync(touch->idev);
-	pm860x_unmask_irq(chip, irq);
 
 out:
 	return IRQ_HANDLED;
@@ -92,7 +90,6 @@ out:
 static int pm860x_touch_open(struct input_dev *dev)
 {
 	struct pm860x_touch *touch = input_get_drvdata(dev);
-	struct pm860x_chip *chip = touch->chip;
 	int data, ret;
 
 	data = MEAS_PD_EN | MEAS_TSIX_EN | MEAS_TSIY_EN
@@ -100,7 +97,6 @@ static int pm860x_touch_open(struct input_dev *dev)
 	ret = pm860x_set_bits(touch->i2c, MEAS_EN3, data, data);
 	if (ret < 0)
 		goto out;
-	pm860x_unmask_irq(chip, touch->irq);
 	return 0;
 out:
 	return ret;
@@ -109,13 +105,11 @@ out:
 static void pm860x_touch_close(struct input_dev *dev)
 {
 	struct pm860x_touch *touch = input_get_drvdata(dev);
-	struct pm860x_chip *chip = touch->chip;
 	int data;
 
 	data = MEAS_PD_EN | MEAS_TSIX_EN | MEAS_TSIY_EN
 		| MEAS_TSIZ1_EN | MEAS_TSIZ2_EN;
 	pm860x_set_bits(touch->i2c, MEAS_EN3, data, 0);
-	pm860x_mask_irq(chip, touch->irq);
 }
 
 static int __devinit pm860x_touch_probe(struct platform_device *pdev)
@@ -164,11 +158,12 @@ static int __devinit pm860x_touch_probe(struct platform_device *pdev)
 	touch->idev->close = pm860x_touch_close;
 	touch->chip = chip;
 	touch->i2c = (chip->id == CHIP_PM8607) ? chip->client : chip->companion;
-	touch->irq = irq;
+	touch->irq = irq + chip->irq_base;
 	touch->res_x = pdata->res_x;
 	input_set_drvdata(touch->idev, touch);
 
-	ret = pm860x_request_irq(chip, irq, pm860x_touch_handler, touch);
+	ret = request_threaded_irq(touch->irq, NULL, pm860x_touch_handler,
+				   IRQF_ONESHOT, "touch", touch);
 	if (ret < 0)
 		goto out_irq;
 
@@ -194,7 +189,7 @@ static int __devinit pm860x_touch_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, touch);
 	return 0;
 out_rg:
-	pm860x_free_irq(chip, irq);
+	free_irq(touch->irq, touch);
 out_irq:
 	input_free_device(touch->idev);
 out:
@@ -207,7 +202,7 @@ static int __devexit pm860x_touch_remove(struct platform_device *pdev)
 	struct pm860x_touch *touch = platform_get_drvdata(pdev);
 
 	input_unregister_device(touch->idev);
-	pm860x_free_irq(touch->chip, touch->irq);
+	free_irq(touch->irq, touch);
 	platform_set_drvdata(pdev, NULL);
 	kfree(touch);
 	return 0;
diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index 16f0dca707a7..6a14d2b1ccf0 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c
@@ -12,11 +12,14 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/i2c.h>
+#include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/88pm860x.h>
 
+#define INT_STATUS_NUM			3
+
 char pm860x_backlight_name[][MFD_NAME_SIZE] = {
 	"backlight-0",
 	"backlight-1",
@@ -119,6 +122,42 @@ static struct mfd_cell touch_devs[] = {
 	.flags	= IORESOURCE_IO,			\
 }
 
+static struct resource power_supply_resources[] = {
+	{
+		.name		= "88pm860x-power",
+		.start		= PM8607_IRQ_CHG,
+		.end		= PM8607_IRQ_CHG,
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static struct mfd_cell power_devs[] = {
+	{
+		.name		= "88pm860x-power",
+		.num_resources	= 1,
+		.resources	= &power_supply_resources[0],
+		.id		= -1,
+	},
+};
+
+static struct resource onkey_resources[] = {
+	{
+		.name		= "88pm860x-onkey",
+		.start		= PM8607_IRQ_ONKEY,
+		.end		= PM8607_IRQ_ONKEY,
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static struct mfd_cell onkey_devs[] = {
+	{
+		.name		= "88pm860x-onkey",
+		.num_resources	= 1,
+		.resources	= &onkey_resources[0],
+		.id		= -1,
+	},
+};
+
 static struct resource regulator_resources[] = {
 	PM8607_REG_RESOURCE(BUCK1, BUCK1),
 	PM8607_REG_RESOURCE(BUCK2, BUCK2),
@@ -163,129 +202,224 @@ static struct mfd_cell regulator_devs[] = {
 	PM8607_REG_DEVS(ldo14, LDO14),
 };
 
-#define CHECK_IRQ(irq)					\
-do {							\
-	if ((irq < 0) || (irq >= PM860X_NUM_IRQ))	\
-		return -EINVAL;				\
-} while (0)
-
-/* IRQs only occur on 88PM8607 */
-int pm860x_mask_irq(struct pm860x_chip *chip, int irq)
-{
-	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
-				: chip->companion;
-	int offset, data, ret;
-
-	CHECK_IRQ(irq);
-
-	offset = (irq >> 3) + PM8607_INT_MASK_1;
-	data = 1 << (irq % 8);
-	ret = pm860x_set_bits(i2c, offset, data, 0);
+struct pm860x_irq_data {
+	int	reg;
+	int	mask_reg;
+	int	enable;		/* enable or not */
+	int	offs;		/* bit offset in mask register */
+};
 
-	return ret;
-}
-EXPORT_SYMBOL(pm860x_mask_irq);
+static struct pm860x_irq_data pm860x_irqs[] = {
+	[PM8607_IRQ_ONKEY] = {
+		.reg		= PM8607_INT_STATUS1,
+		.mask_reg	= PM8607_INT_MASK_1,
+		.offs		= 1 << 0,
+	},
+	[PM8607_IRQ_EXTON] = {
+		.reg		= PM8607_INT_STATUS1,
+		.mask_reg	= PM8607_INT_MASK_1,
+		.offs		= 1 << 1,
+	},
+	[PM8607_IRQ_CHG] = {
+		.reg		= PM8607_INT_STATUS1,
+		.mask_reg	= PM8607_INT_MASK_1,
+		.offs		= 1 << 2,
+	},
+	[PM8607_IRQ_BAT] = {
+		.reg		= PM8607_INT_STATUS1,
+		.mask_reg	= PM8607_INT_MASK_1,
+		.offs		= 1 << 3,
+	},
+	[PM8607_IRQ_RTC] = {
+		.reg		= PM8607_INT_STATUS1,
+		.mask_reg	= PM8607_INT_MASK_1,
+		.offs		= 1 << 4,
+	},
+	[PM8607_IRQ_CC] = {
+		.reg		= PM8607_INT_STATUS1,
+		.mask_reg	= PM8607_INT_MASK_1,
+		.offs		= 1 << 5,
+	},
+	[PM8607_IRQ_VBAT] = {
+		.reg		= PM8607_INT_STATUS2,
+		.mask_reg	= PM8607_INT_MASK_2,
+		.offs		= 1 << 0,
+	},
+	[PM8607_IRQ_VCHG] = {
+		.reg		= PM8607_INT_STATUS2,
+		.mask_reg	= PM8607_INT_MASK_2,
+		.offs		= 1 << 1,
+	},
+	[PM8607_IRQ_VSYS] = {
+		.reg		= PM8607_INT_STATUS2,
+		.mask_reg	= PM8607_INT_MASK_2,
+		.offs		= 1 << 2,
+	},
+	[PM8607_IRQ_TINT] = {
+		.reg		= PM8607_INT_STATUS2,
+		.mask_reg	= PM8607_INT_MASK_2,
+		.offs		= 1 << 3,
+	},
+	[PM8607_IRQ_GPADC0] = {
+		.reg		= PM8607_INT_STATUS2,
+		.mask_reg	= PM8607_INT_MASK_2,
+		.offs		= 1 << 4,
+	},
+	[PM8607_IRQ_GPADC1] = {
+		.reg		= PM8607_INT_STATUS2,
+		.mask_reg	= PM8607_INT_MASK_2,
+		.offs		= 1 << 5,
+	},
+	[PM8607_IRQ_GPADC2] = {
+		.reg		= PM8607_INT_STATUS2,
+		.mask_reg	= PM8607_INT_MASK_2,
+		.offs		= 1 << 6,
+	},
+	[PM8607_IRQ_GPADC3] = {
+		.reg		= PM8607_INT_STATUS2,
+		.mask_reg	= PM8607_INT_MASK_2,
+		.offs		= 1 << 7,
+	},
+	[PM8607_IRQ_AUDIO_SHORT] = {
+		.reg		= PM8607_INT_STATUS3,
+		.mask_reg	= PM8607_INT_MASK_3,
+		.offs		= 1 << 0,
+	},
+	[PM8607_IRQ_PEN] = {
+		.reg		= PM8607_INT_STATUS3,
+		.mask_reg	= PM8607_INT_MASK_3,
+		.offs		= 1 << 1,
+	},
+	[PM8607_IRQ_HEADSET] = {
+		.reg		= PM8607_INT_STATUS3,
+		.mask_reg	= PM8607_INT_MASK_3,
+		.offs		= 1 << 2,
+	},
+	[PM8607_IRQ_HOOK] = {
+		.reg		= PM8607_INT_STATUS3,
+		.mask_reg	= PM8607_INT_MASK_3,
+		.offs		= 1 << 3,
+	},
+	[PM8607_IRQ_MICIN] = {
+		.reg		= PM8607_INT_STATUS3,
+		.mask_reg	= PM8607_INT_MASK_3,
+		.offs		= 1 << 4,
+	},
+	[PM8607_IRQ_CHG_FAIL] = {
+		.reg		= PM8607_INT_STATUS3,
+		.mask_reg	= PM8607_INT_MASK_3,
+		.offs		= 1 << 5,
+	},
+	[PM8607_IRQ_CHG_DONE] = {
+		.reg		= PM8607_INT_STATUS3,
+		.mask_reg	= PM8607_INT_MASK_3,
+		.offs		= 1 << 6,
+	},
+	[PM8607_IRQ_CHG_FAULT] = {
+		.reg		= PM8607_INT_STATUS3,
+		.mask_reg	= PM8607_INT_MASK_3,
+		.offs		= 1 << 7,
+	},
+};
 
-int pm860x_unmask_irq(struct pm860x_chip *chip, int irq)
+static inline struct pm860x_irq_data *irq_to_pm860x(struct pm860x_chip *chip,
+						    int irq)
 {
-	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
-				: chip->companion;
-	int offset, data, ret;
-
-	CHECK_IRQ(irq);
-
-	offset = (irq >> 3) + PM8607_INT_MASK_1;
-	data = 1 << (irq % 8);
-	ret = pm860x_set_bits(i2c, offset, data, data);
-
-	return ret;
+	return &pm860x_irqs[irq - chip->irq_base];
 }
-EXPORT_SYMBOL(pm860x_unmask_irq);
 
-#define INT_STATUS_NUM		(3)
-
-static irqreturn_t pm8607_irq_thread(int irq, void *data)
+static irqreturn_t pm860x_irq(int irq, void *data)
 {
-	DECLARE_BITMAP(irq_status, PM860X_NUM_IRQ);
 	struct pm860x_chip *chip = data;
-	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
-				: chip->companion;
-	unsigned char status_buf[INT_STATUS_NUM << 1];
-	unsigned long value;
-	int i, ret;
-
-	irq_status[0] = 0;
-
-	/* read out status register */
-	ret = pm860x_bulk_read(i2c, PM8607_INT_STATUS1,
-				INT_STATUS_NUM << 1, status_buf);
-	if (ret < 0)
-		goto out;
-	if (chip->irq_mode) {
-		/* 0, clear by read. 1, clear by write */
-		ret = pm860x_bulk_write(i2c, PM8607_INT_STATUS1,
-					INT_STATUS_NUM, status_buf);
-		if (ret < 0)
-			goto out;
-	}
-
-	/* clear masked interrupt status */
-	for (i = 0, value = 0; i < INT_STATUS_NUM; i++) {
-		status_buf[i] &= status_buf[i + INT_STATUS_NUM];
-		irq_status[0] |= status_buf[i] << (i * 8);
-	}
-
-	while (!bitmap_empty(irq_status, PM860X_NUM_IRQ)) {
-		irq = find_first_bit(irq_status, PM860X_NUM_IRQ);
-		clear_bit(irq, irq_status);
-		dev_dbg(chip->dev, "Servicing IRQ #%d\n", irq);
-
-		mutex_lock(&chip->irq_lock);
-		if (chip->irq[irq].handler)
-			chip->irq[irq].handler(irq, chip->irq[irq].data);
-		else {
-			pm860x_mask_irq(chip, irq);
-			dev_err(chip->dev, "Nobody cares IRQ %d. "
-				"Now mask it.\n", irq);
-			for (i = 0; i < (INT_STATUS_NUM << 1); i++) {
-				dev_err(chip->dev, "status[%d]:%x\n", i,
-					status_buf[i]);
-			}
+	struct pm860x_irq_data *irq_data;
+	struct i2c_client *i2c;
+	int read_reg = -1, value = 0;
+	int i;
+
+	i2c = (chip->id == CHIP_PM8607) ? chip->client : chip->companion;
+	for (i = 0; i < ARRAY_SIZE(pm860x_irqs); i++) {
+		irq_data = &pm860x_irqs[i];
+		if (read_reg != irq_data->reg) {
+			read_reg = irq_data->reg;
+			value = pm860x_reg_read(i2c, irq_data->reg);
 		}
-		mutex_unlock(&chip->irq_lock);
+		if (value & irq_data->enable)
+			handle_nested_irq(chip->irq_base + i);
 	}
-out:
 	return IRQ_HANDLED;
 }
 
-int pm860x_request_irq(struct pm860x_chip *chip, int irq,
-		       irq_handler_t handler, void *data)
+static void pm860x_irq_lock(unsigned int irq)
 {
-	CHECK_IRQ(irq);
-	if (!handler)
-		return -EINVAL;
+	struct pm860x_chip *chip = get_irq_chip_data(irq);
 
 	mutex_lock(&chip->irq_lock);
-	chip->irq[irq].handler = handler;
-	chip->irq[irq].data = data;
-	mutex_unlock(&chip->irq_lock);
-
-	return 0;
 }
-EXPORT_SYMBOL(pm860x_request_irq);
 
-int pm860x_free_irq(struct pm860x_chip *chip, int irq)
+static void pm860x_irq_sync_unlock(unsigned int irq)
 {
-	CHECK_IRQ(irq);
+	struct pm860x_chip *chip = get_irq_chip_data(irq);
+	struct pm860x_irq_data *irq_data;
+	struct i2c_client *i2c;
+	static unsigned char cached[3] = {0x0, 0x0, 0x0};
+	unsigned char mask[3];
+	int i;
+
+	i2c = (chip->id == CHIP_PM8607) ? chip->client : chip->companion;
+	/* Load cached value. In initial, all IRQs are masked */
+	for (i = 0; i < 3; i++)
+		mask[i] = cached[i];
+	for (i = 0; i < ARRAY_SIZE(pm860x_irqs); i++) {
+		irq_data = &pm860x_irqs[i];
+		switch (irq_data->mask_reg) {
+		case PM8607_INT_MASK_1:
+			mask[0] &= ~irq_data->offs;
+			mask[0] |= irq_data->enable;
+			break;
+		case PM8607_INT_MASK_2:
+			mask[1] &= ~irq_data->offs;
+			mask[1] |= irq_data->enable;
+			break;
+		case PM8607_INT_MASK_3:
+			mask[2] &= ~irq_data->offs;
+			mask[2] |= irq_data->enable;
+			break;
+		default:
+			dev_err(chip->dev, "wrong IRQ\n");
+			break;
+		}
+	}
+	/* update mask into registers */
+	for (i = 0; i < 3; i++) {
+		if (mask[i] != cached[i]) {
+			cached[i] = mask[i];
+			pm860x_reg_write(i2c, PM8607_INT_MASK_1 + i, mask[i]);
+		}
+	}
 
-	mutex_lock(&chip->irq_lock);
-	chip->irq[irq].handler = NULL;
-	chip->irq[irq].data = NULL;
 	mutex_unlock(&chip->irq_lock);
+}
 
-	return 0;
+static void pm860x_irq_enable(unsigned int irq)
+{
+	struct pm860x_chip *chip = get_irq_chip_data(irq);
+	pm860x_irqs[irq - chip->irq_base].enable
+		= pm860x_irqs[irq - chip->irq_base].offs;
 }
-EXPORT_SYMBOL(pm860x_free_irq);
+
+static void pm860x_irq_disable(unsigned int irq)
+{
+	struct pm860x_chip *chip = get_irq_chip_data(irq);
+	pm860x_irqs[irq - chip->irq_base].enable = 0;
+}
+
+static struct irq_chip pm860x_irq_chip = {
+	.name		= "88pm860x",
+	.bus_lock	= pm860x_irq_lock,
+	.bus_sync_unlock = pm860x_irq_sync_unlock,
+	.enable		= pm860x_irq_enable,
+	.disable	= pm860x_irq_disable,
+};
 
 static int __devinit device_gpadc_init(struct pm860x_chip *chip,
 				       struct pm860x_platform_data *pdata)
@@ -348,9 +482,15 @@ static int __devinit device_irq_init(struct pm860x_chip *chip,
 	struct i2c_client *i2c = (chip->id == CHIP_PM8607) ? chip->client \
 				: chip->companion;
 	unsigned char status_buf[INT_STATUS_NUM];
-	int data, mask, ret = -EINVAL;
+	unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT;
+	struct irq_desc *desc;
+	int i, data, mask, ret = -EINVAL;
+	int __irq;
 
-	mutex_init(&chip->irq_lock);
+	if (!pdata || !pdata->irq_base) {
+		dev_warn(chip->dev, "No interrupt support on IRQ base\n");
+		return -EINVAL;
+	}
 
 	mask = PM8607_B0_MISC1_INV_INT | PM8607_B0_MISC1_INT_CLEAR
 		| PM8607_B0_MISC1_INT_MASK;
@@ -389,25 +529,45 @@ static int __devinit device_irq_init(struct pm860x_chip *chip,
 	if (ret < 0)
 		goto out;
 
-	memset(chip->irq, 0, sizeof(struct pm860x_irq) * PM860X_NUM_IRQ);
-
-	ret = request_threaded_irq(i2c->irq, NULL, pm8607_irq_thread,
-				IRQF_ONESHOT | IRQF_TRIGGER_LOW,
-				"88PM8607", chip);
-	if (ret < 0) {
-		dev_err(chip->dev, "Failed to request IRQ #%d.\n", i2c->irq);
+	mutex_init(&chip->irq_lock);
+	chip->irq_base = pdata->irq_base;
+	chip->core_irq = i2c->irq;
+	if (!chip->core_irq)
 		goto out;
+
+	desc = irq_to_desc(chip->core_irq);
+
+	/* register IRQ by genirq */
+	for (i = 0; i < ARRAY_SIZE(pm860x_irqs); i++) {
+		__irq = i + chip->irq_base;
+		set_irq_chip_data(__irq, chip);
+		set_irq_chip_and_handler(__irq, &pm860x_irq_chip,
+					 handle_edge_irq);
+		set_irq_nested_thread(__irq, 1);
+#ifdef CONFIG_ARM
+		set_irq_flags(__irq, IRQF_VALID);
+#else
+		set_irq_noprobe(__irq);
+#endif
 	}
-	chip->chip_irq = i2c->irq;
+
+	ret = request_threaded_irq(chip->core_irq, NULL, pm860x_irq, flags,
+				   "88pm860x", chip);
+	if (ret) {
+		dev_err(chip->dev, "Failed to request IRQ: %d\n", ret);
+		chip->core_irq = 0;
+	}
+
 	return 0;
 out:
+	chip->core_irq = 0;
 	return ret;
 }
 
 static void __devexit device_irq_exit(struct pm860x_chip *chip)
 {
-	if (chip->chip_irq >= 0)
-		free_irq(chip->chip_irq, chip);
+	if (chip->core_irq)
+		free_irq(chip->core_irq, chip);
 }
 
 static void __devinit device_8606_init(struct pm860x_chip *chip,
@@ -513,6 +673,26 @@ static void __devinit device_8607_init(struct pm860x_chip *chip,
 			goto out_dev;
 		}
 	}
+
+	if (pdata && pdata->power) {
+		ret = mfd_add_devices(chip->dev, 0, &power_devs[0],
+				      ARRAY_SIZE(power_devs),
+				      &power_supply_resources[0], 0);
+		if (ret < 0) {
+			dev_err(chip->dev, "Failed to add power supply "
+				"subdev\n");
+			goto out_dev;
+		}
+	}
+
+	ret = mfd_add_devices(chip->dev, 0, &onkey_devs[0],
+			      ARRAY_SIZE(onkey_devs),
+			      &onkey_resources[0], 0);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to add onkey subdev\n");
+		goto out_dev;
+	}
+
 	return;
 out_dev:
 	mfd_remove_devices(chip->dev);
@@ -524,7 +704,7 @@ out:
 int pm860x_device_init(struct pm860x_chip *chip,
 		       struct pm860x_platform_data *pdata)
 {
-	chip->chip_irq = -EINVAL;
+	chip->core_irq = 0;
 
 	switch (chip->id) {
 	case CHIP_PM8606:
diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h
index 80bc82a7ac96..73f92c5feea2 100644
--- a/include/linux/mfd/88pm860x.h
+++ b/include/linux/mfd/88pm860x.h
@@ -262,12 +262,13 @@ enum {
 
 /* Interrupt Number in 88PM8607 */
 enum {
-	PM8607_IRQ_ONKEY = 0,
+	PM8607_IRQ_ONKEY,
 	PM8607_IRQ_EXTON,
 	PM8607_IRQ_CHG,
 	PM8607_IRQ_BAT,
 	PM8607_IRQ_RTC,
-	PM8607_IRQ_VBAT = 8,
+	PM8607_IRQ_CC,
+	PM8607_IRQ_VBAT,
 	PM8607_IRQ_VCHG,
 	PM8607_IRQ_VSYS,
 	PM8607_IRQ_TINT,
@@ -275,7 +276,7 @@ enum {
 	PM8607_IRQ_GPADC1,
 	PM8607_IRQ_GPADC2,
 	PM8607_IRQ_GPADC3,
-	PM8607_IRQ_AUDIO_SHORT = 16,
+	PM8607_IRQ_AUDIO_SHORT,
 	PM8607_IRQ_PEN,
 	PM8607_IRQ_HEADSET,
 	PM8607_IRQ_HOOK,
@@ -291,26 +292,19 @@ enum {
 	PM8607_CHIP_B0 = 0x48,
 };
 
-#define PM860X_NUM_IRQ		24
-
-struct pm860x_irq {
-	irq_handler_t		handler;
-	void			*data;
-};
-
 struct pm860x_chip {
 	struct device		*dev;
 	struct mutex		io_lock;
 	struct mutex		irq_lock;
 	struct i2c_client	*client;
 	struct i2c_client	*companion;	/* companion chip client */
-	struct pm860x_irq	irq[PM860X_NUM_IRQ];
 
 	int			buck3_double;	/* DVC ramp slope double */
 	unsigned short		companion_addr;
 	int			id;
 	int			irq_mode;
-	int			chip_irq;
+	int			irq_base;
+	int			core_irq;
 	unsigned char		chip_version;
 
 };
@@ -347,14 +341,20 @@ struct pm860x_touch_pdata {
 	unsigned long	flags;
 };
 
+struct pm860x_power_pdata {
+	unsigned	fast_charge;	/* charge current */
+};
+
 struct pm860x_platform_data {
 	struct pm860x_backlight_pdata	*backlight;
 	struct pm860x_led_pdata		*led;
 	struct pm860x_touch_pdata	*touch;
+	struct pm860x_power_pdata	*power;
 
 	unsigned short	companion_addr;	/* I2C address of companion chip */
 	int		i2c_port;	/* Controlled by GI2C or PI2C */
 	int		irq_mode;	/* Clear interrupt by read/write(0/1) */
+	int		irq_base;	/* IRQ base number of 88pm860x */
 	struct regulator_init_data *regulator[PM8607_MAX_REGULATOR];
 };
 
@@ -368,12 +368,6 @@ extern int pm860x_bulk_write(struct i2c_client *, int, int, unsigned char *);
 extern int pm860x_set_bits(struct i2c_client *, int, unsigned char,
 			   unsigned char);
 
-extern int pm860x_mask_irq(struct pm860x_chip *, int);
-extern int pm860x_unmask_irq(struct pm860x_chip *, int);
-extern int pm860x_request_irq(struct pm860x_chip *, int,
-			      irq_handler_t handler, void *);
-extern int pm860x_free_irq(struct pm860x_chip *, int);
-
 extern int pm860x_device_init(struct pm860x_chip *chip,
 			      struct pm860x_platform_data *pdata);
 extern void pm860x_device_exit(struct pm860x_chip *chip);
-- 
cgit v1.2.3


From a29aaf55cd6faa75e35abfe00bd3ffc537490485 Mon Sep 17 00:00:00 2001
From: Moiz Sonasath <m-sonasath@ti.com>
Date: Tue, 16 Feb 2010 18:57:21 -0600
Subject: mfd: Disable TWL4030/5030 I2C1/I2C4 internal pull-ups

This patch disables TWL4030/5030 I2C1 adn I2C4(SR) internal pull-up, to
use only the external HW resistor >=470 Ohm for the assured
functionality in HS mode.

While testing the I2C in High Speed mode, it was discovered that
without a proper pull-up resistor, there is data corruption during
multi-byte transfer. RTC(time_set) test case was used for testing.

From the analysis done, it was concluded that ideally we need a
pull-up of 1.6k Ohm(recomended) or atleast 470 Ohm or greater for
assured performance in HS mode.

Signed-off-by: Moiz Sonasath <m-sonasath@ti.com>
Signed-off-by: Allen Pais <allen.pais@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl-core.c  | 13 +++++++++++++
 include/linux/i2c/twl.h | 15 +++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 942a1e837819..7ccc39f3aa48 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -958,6 +958,7 @@ twl_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	int				status;
 	unsigned			i;
 	struct twl4030_platform_data	*pdata = client->dev.platform_data;
+	u8 temp;
 
 	if (!pdata) {
 		dev_dbg(&client->dev, "no platform data?\n");
@@ -1025,6 +1026,18 @@ twl_probe(struct i2c_client *client, const struct i2c_device_id *id)
 			goto fail;
 	}
 
+	/* Disable TWL4030/TWL5030 I2C Pull-up on I2C1 and I2C4(SR) interface.
+	 * Program I2C_SCL_CTRL_PU(bit 0)=0, I2C_SDA_CTRL_PU (bit 2)=0,
+	 * SR_I2C_SCL_CTRL_PU(bit 4)=0 and SR_I2C_SDA_CTRL_PU(bit 6)=0.
+	 */
+
+	if (twl_class_is_4030()) {
+		twl_i2c_read_u8(TWL4030_MODULE_INTBR, &temp, REG_GPPUPDCTR1);
+		temp &= ~(SR_I2C_SDA_CTRL_PU | SR_I2C_SCL_CTRL_PU | \
+		I2C_SDA_CTRL_PU | I2C_SCL_CTRL_PU);
+		twl_i2c_write_u8(TWL4030_MODULE_INTBR, temp, REG_GPPUPDCTR1);
+	}
+
 	status = add_children(pdata, id->driver_data);
 fail:
 	if (status < 0)
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 9733e9e53f2b..e28d4c0e45bd 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -239,6 +239,21 @@ int twl6030_interrupt_mask(u8 bit_mask, u8 offset);
 
 /*----------------------------------------------------------------------*/
 
+/*Interface Bit Register (INTBR) offsets
+ *(Use TWL_4030_MODULE_INTBR)
+ */
+
+#define REG_GPPUPDCTR1			0x0F
+
+/*I2C1 and I2C4(SR) SDA/SCL pull-up control bits */
+
+#define I2C_SCL_CTRL_PU			BIT(0)
+#define I2C_SDA_CTRL_PU			BIT(2)
+#define SR_I2C_SCL_CTRL_PU		BIT(4)
+#define SR_I2C_SDA_CTRL_PU		BIT(6)
+
+/*----------------------------------------------------------------------*/
+
 /*
  * Keypad register offsets (use TWL4030_MODULE_KEYPAD)
  * ... SIH/interrupt only
-- 
cgit v1.2.3


From fa0d976298b25d090fafc3460c63fee1c8eea854 Mon Sep 17 00:00:00 2001
From: Balaji T K <balajitk@ti.com>
Date: Fri, 19 Feb 2010 12:39:38 +0100
Subject: mfd: Add twl6030 base addr for ID0, ID1, ID2

Add base address for generic slave ID0, ID1, ID2
and introduced one more entry to align RTC module number between
twl4030 and twl6030

Signed-off-by: Balaji T K <balajitk@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl-core.c  | 7 +++++--
 include/linux/i2c/twl.h | 5 +++++
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 7ccc39f3aa48..562cd4935e17 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -198,6 +198,7 @@
 /* subchip/slave 3 0x4B - AUDIO */
 #define TWL6030_BASEADD_AUDIO		0x0000
 #define TWL6030_BASEADD_RSV		0x0000
+#define TWL6030_BASEADD_ZERO		0x0000
 
 /* Few power values */
 #define R_CFG_BOOT			0x05
@@ -313,9 +314,11 @@ static struct twl_mapping twl6030_map[] = {
 	{ SUB_CHIP_ID1, TWL6030_BASEADD_CHARGER },
 	{ SUB_CHIP_ID1, TWL6030_BASEADD_GASGAUGE },
 	{ SUB_CHIP_ID1, TWL6030_BASEADD_PWM },
-	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
-	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
+	{ SUB_CHIP_ID0, TWL6030_BASEADD_ZERO },
+	{ SUB_CHIP_ID1, TWL6030_BASEADD_ZERO },
 
+	{ SUB_CHIP_ID2, TWL6030_BASEADD_ZERO },
+	{ SUB_CHIP_ID2, TWL6030_BASEADD_ZERO },
 	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
 	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
 	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index e28d4c0e45bd..70d4caf48571 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -80,6 +80,11 @@
 #define TWL_MODULE_PM_MASTER	TWL4030_MODULE_PM_MASTER
 #define TWL_MODULE_PM_RECEIVER	TWL4030_MODULE_PM_RECEIVER
 #define TWL_MODULE_RTC		TWL4030_MODULE_RTC
+#define TWL_MODULE_PWM		TWL4030_MODULE_PWM0
+
+#define TWL6030_MODULE_ID0	0x0D
+#define TWL6030_MODULE_ID1	0x0E
+#define TWL6030_MODULE_ID2	0x0F
 
 #define GPIO_INTR_OFFSET	0
 #define KEYPAD_INTR_OFFSET	1
-- 
cgit v1.2.3


From b741d440a97c376af309e902eeb2f3c5673d2c92 Mon Sep 17 00:00:00 2001
From: Yusuke Goda <goda.yusuke@renesas.com>
Date: Wed, 17 Feb 2010 16:37:55 +0900
Subject: tmio_mmc: Add MMC_CAP_MMC_HIGHSPEED support V2

Enable MMC_CAP_XX support in the tmio_mmc driver if
pdata->capabilities is set.

Signed-off-by: Yusuke Goda <goda.yusuke@renesas.com>
Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mmc/host/tmio_mmc.c | 1 +
 include/linux/mfd/tmio.h    | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index e22c3fa3516a..e2c0cc9a0ca6 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -550,6 +550,7 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 
 	mmc->ops = &tmio_mmc_ops;
 	mmc->caps = MMC_CAP_4_BIT_DATA;
+	mmc->caps |= pdata->capabilities;
 	mmc->f_max = pdata->hclk;
 	mmc->f_min = mmc->f_max / 512;
 	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 9cb1834deffa..37d941420ce4 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -60,6 +60,7 @@ void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state);
  */
 struct tmio_mmc_data {
 	const unsigned int		hclk;
+	unsigned long			capabilities;
 	void (*set_pwr)(struct platform_device *host, int state);
 	void (*set_clk_div)(struct platform_device *host, int state);
 };
-- 
cgit v1.2.3


From 707f0b2fbc65876e8abd94d26d8d0620600c05d4 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 17 Feb 2010 16:38:14 +0900
Subject: tmio_mmc: Remove const from platform data V3

Remove const from the tmio-mmc platform data hclk V3.
This change makes it possible to remove the type cast
from the sh_mobile_sdhi driver which is using the clock
framework to get the clock rate.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/tmio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 37d941420ce4..c3f7dff8effc 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -59,7 +59,7 @@ void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state);
  * data for the MMC controller
  */
 struct tmio_mmc_data {
-	const unsigned int		hclk;
+	unsigned int			hclk;
 	unsigned long			capabilities;
 	void (*set_pwr)(struct platform_device *host, int state);
 	void (*set_clk_div)(struct platform_device *host, int state);
-- 
cgit v1.2.3


From f92e8f8144243a3651b2e350b706ea2d04931f8c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 17 Feb 2010 18:45:25 +0000
Subject: mfd: Add WM831x revision B support

Revision B of the WM831x devices changes the sense of the tristate
bit for GPIO configuration, inverting it to become an enable instead.
Take account of this in the gpiolib driver.

A current sink regulation status bit has also been added in revision B,
add a flag indicating if it's present but don't use it yet.

This revision also adds an interrupt on key up for the ON pin event
which the existing code is able to take advantage of.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/gpio/wm831x-gpio.c      | 21 ++++++++++++++++-----
 drivers/mfd/wm831x-core.c       | 15 +++++++++++++++
 include/linux/mfd/wm831x/core.h |  4 ++++
 include/linux/mfd/wm831x/gpio.h |  4 ++++
 4 files changed, 39 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/gpio/wm831x-gpio.c b/drivers/gpio/wm831x-gpio.c
index 2554180534a1..5b8dc098d80f 100644
--- a/drivers/gpio/wm831x-gpio.c
+++ b/drivers/gpio/wm831x-gpio.c
@@ -38,10 +38,13 @@ static int wm831x_gpio_direction_in(struct gpio_chip *chip, unsigned offset)
 {
 	struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
 	struct wm831x *wm831x = wm831x_gpio->wm831x;
+	int val = WM831X_GPN_DIR;
+
+	if (wm831x->has_gpio_ena)
+		val |= WM831X_GPN_TRI;
 
 	return wm831x_set_bits(wm831x, WM831X_GPIO1_CONTROL + offset,
-			       WM831X_GPN_DIR | WM831X_GPN_TRI,
-			       WM831X_GPN_DIR);
+			       WM831X_GPN_DIR | WM831X_GPN_TRI, val);
 }
 
 static int wm831x_gpio_get(struct gpio_chip *chip, unsigned offset)
@@ -74,10 +77,14 @@ static int wm831x_gpio_direction_out(struct gpio_chip *chip,
 {
 	struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
 	struct wm831x *wm831x = wm831x_gpio->wm831x;
+	int val = 0;
 	int ret;
 
+	if (wm831x->has_gpio_ena)
+		val |= WM831X_GPN_TRI;
+
 	ret = wm831x_set_bits(wm831x, WM831X_GPIO1_CONTROL + offset,
-			      WM831X_GPN_DIR | WM831X_GPN_TRI, 0);
+			      WM831X_GPN_DIR | WM831X_GPN_TRI, val);
 	if (ret < 0)
 		return ret;
 
@@ -103,7 +110,7 @@ static void wm831x_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 {
 	struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
 	struct wm831x *wm831x = wm831x_gpio->wm831x;
-	int i;
+	int i, tristated;
 
 	for (i = 0; i < chip->ngpio; i++) {
 		int gpio = i + chip->base;
@@ -170,6 +177,10 @@ static void wm831x_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 			break;
 		}
 
+		tristated = reg & WM831X_GPN_TRI;
+		if (wm831x->has_gpio_ena)
+			tristated = !tristated;
+
 		seq_printf(s, " %s %s %s %s%s\n"
 			   "                                  %s%s (0x%4x)\n",
 			   reg & WM831X_GPN_DIR ? "in" : "out",
@@ -178,7 +189,7 @@ static void wm831x_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 			   powerdomain,
 			   reg & WM831X_GPN_POL ? "" : " inverted",
 			   reg & WM831X_GPN_OD ? "open-drain" : "CMOS",
-			   reg & WM831X_GPN_TRI ? " tristated" : "",
+			   tristated ? " tristated" : "",
 			   reg);
 	}
 }
diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 4b2021af1d96..c428d9f918fc 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -1449,18 +1449,33 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 	case WM8310:
 		parent = WM8310;
 		wm831x->num_gpio = 16;
+		if (rev > 0) {
+			wm831x->has_gpio_ena = 1;
+			wm831x->has_cs_sts = 1;
+		}
+
 		dev_info(wm831x->dev, "WM8310 revision %c\n", 'A' + rev);
 		break;
 
 	case WM8311:
 		parent = WM8311;
 		wm831x->num_gpio = 16;
+		if (rev > 0) {
+			wm831x->has_gpio_ena = 1;
+			wm831x->has_cs_sts = 1;
+		}
+
 		dev_info(wm831x->dev, "WM8311 revision %c\n", 'A' + rev);
 		break;
 
 	case WM8312:
 		parent = WM8312;
 		wm831x->num_gpio = 16;
+		if (rev > 0) {
+			wm831x->has_gpio_ena = 1;
+			wm831x->has_cs_sts = 1;
+		}
+
 		dev_info(wm831x->dev, "WM8312 revision %c\n", 'A' + rev);
 		break;
 
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index 5184b79c700b..53580b592bc9 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -254,6 +254,10 @@ struct wm831x {
 	int irq_masks_cur[WM831X_NUM_IRQ_REGS];   /* Currently active value */
 	int irq_masks_cache[WM831X_NUM_IRQ_REGS]; /* Cached hardware value */
 
+	/* Chip revision based flags */
+	unsigned has_gpio_ena:1;  /* Has GPIO enable bit */
+	unsigned has_cs_sts:1;    /* Has current sink status bit */
+
 	int num_gpio;
 
 	struct mutex auxadc_lock;
diff --git a/include/linux/mfd/wm831x/gpio.h b/include/linux/mfd/wm831x/gpio.h
index 2835614af0e3..9b163c58865f 100644
--- a/include/linux/mfd/wm831x/gpio.h
+++ b/include/linux/mfd/wm831x/gpio.h
@@ -41,6 +41,10 @@
 #define WM831X_GPN_OD_MASK                      0x0200  /* GPN_OD */
 #define WM831X_GPN_OD_SHIFT                          9  /* GPN_OD */
 #define WM831X_GPN_OD_WIDTH                          1  /* GPN_OD */
+#define WM831X_GPN_ENA                          0x0080  /* GPN_ENA */
+#define WM831X_GPN_ENA_MASK                     0x0080  /* GPN_ENA */
+#define WM831X_GPN_ENA_SHIFT                         7  /* GPN_ENA */
+#define WM831X_GPN_ENA_WIDTH                         1  /* GPN_ENA */
 #define WM831X_GPN_TRI                          0x0080  /* GPN_TRI */
 #define WM831X_GPN_TRI_MASK                     0x0080  /* GPN_TRI */
 #define WM831X_GPN_TRI_SHIFT                         7  /* GPN_TRI */
-- 
cgit v1.2.3


From 11a441ce82d6ffecfd39b324024de0cd630b36c1 Mon Sep 17 00:00:00 2001
From: Mike Turquette <mturquette@ti.com>
Date: Mon, 22 Feb 2010 11:16:30 -0600
Subject: mfd: Introduce remove_script function for twl4030

New function twl4030_remove_script(u8 flags) takes a script type as
defined in twl.h and prevents any script already loaded in that position
from running.  This is accomplished by programming SEQ_ADD_* to 0x3f,
the END_OF_SCRIPT value, where SEQ_ADD_* is determined by flags.

(Future) users of this function include OMAP board files for machines
facing a race condition between sleep and warm reset.

Signed-off-by: Mike Turquette <mturquette@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl4030-power.c | 50 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/twl.h     |  1 +
 2 files changed, 51 insertions(+)

(limited to 'include')

diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index 5b045ff4a2c2..7efa8789a3a2 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c
@@ -461,6 +461,56 @@ out:
 	return err;
 }
 
+int twl4030_remove_script(u8 flags)
+{
+	int err = 0;
+
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, R_KEY_1,
+			R_PROTECT_KEY);
+	if (err) {
+		pr_err("twl4030: unable to unlock PROTECT_KEY\n");
+		return err;
+	}
+
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, R_KEY_2,
+			R_PROTECT_KEY);
+	if (err) {
+		pr_err("twl4030: unable to unlock PROTECT_KEY\n");
+		return err;
+	}
+
+	if (flags & TWL4030_WRST_SCRIPT) {
+		err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, END_OF_SCRIPT,
+				R_SEQ_ADD_WARM);
+		if (err)
+			return err;
+	}
+	if (flags & TWL4030_WAKEUP12_SCRIPT) {
+		if (err)
+		err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, END_OF_SCRIPT,
+				R_SEQ_ADD_S2A12);
+			return err;
+	}
+	if (flags & TWL4030_WAKEUP3_SCRIPT) {
+		err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, END_OF_SCRIPT,
+				R_SEQ_ADD_S2A3);
+		if (err)
+			return err;
+	}
+	if (flags & TWL4030_SLEEP_SCRIPT) {
+		err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, END_OF_SCRIPT,
+				R_SEQ_ADD_A2S);
+		if (err)
+			return err;
+	}
+
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0, R_PROTECT_KEY);
+	if (err)
+		pr_err("TWL4030 Unable to relock registers\n");
+
+	return err;
+}
+
 void __init twl4030_power_init(struct twl4030_power_data *twl4030_scripts)
 {
 	int err = 0;
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 70d4caf48571..fb6784e86d5f 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -550,6 +550,7 @@ struct twl4030_power_data {
 };
 
 extern void twl4030_power_init(struct twl4030_power_data *triton2_scripts);
+extern int twl4030_remove_script(u8 flags);
 
 struct twl4030_codec_audio_data {
 	unsigned int	audio_mclk;
-- 
cgit v1.2.3


From d19663ac61a6e36eec655d3c84a106686ebddd2c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 23 Feb 2010 11:08:05 +0000
Subject: mfd: Use completion interrupt for WM835x AUXADC

Use the completion interrupt generated by the device rather than
polling for conversions to complete. As a backup we still check
the state of the AUXADC if we don't get a completion, mostly for
systems that don't have the WM8350 interrupt infrastructure hooked
up.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8350-core.c       | 35 +++++++++++++++++++++++++++++------
 include/linux/mfd/wm8350/core.h |  2 ++
 2 files changed, 31 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 9a970bd68775..bd75807d5302 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -339,7 +339,6 @@ EXPORT_SYMBOL_GPL(wm8350_reg_unlock);
 int wm8350_read_auxadc(struct wm8350 *wm8350, int channel, int scale, int vref)
 {
 	u16 reg, result = 0;
-	int tries = 5;
 
 	if (channel < WM8350_AUXADC_AUX1 || channel > WM8350_AUXADC_TEMP)
 		return -EINVAL;
@@ -363,12 +362,13 @@ int wm8350_read_auxadc(struct wm8350 *wm8350, int channel, int scale, int vref)
 	reg |= 1 << channel | WM8350_AUXADC_POLL;
 	wm8350_reg_write(wm8350, WM8350_DIGITISER_CONTROL_1, reg);
 
-	do {
-		schedule_timeout_interruptible(1);
-		reg = wm8350_reg_read(wm8350, WM8350_DIGITISER_CONTROL_1);
-	} while ((reg & WM8350_AUXADC_POLL) && --tries);
+	/* We ignore the result of the completion and just check for a
+	 * conversion result, allowing us to soldier on if the IRQ
+	 * infrastructure is not set up for the chip. */
+	wait_for_completion_timeout(&wm8350->auxadc_done, msecs_to_jiffies(5));
 
-	if (!tries)
+	reg = wm8350_reg_read(wm8350, WM8350_DIGITISER_CONTROL_1);
+	if (reg & WM8350_AUXADC_POLL)
 		dev_err(wm8350->dev, "adc chn %d read timeout\n", channel);
 	else
 		result = wm8350_reg_read(wm8350,
@@ -385,6 +385,15 @@ int wm8350_read_auxadc(struct wm8350 *wm8350, int channel, int scale, int vref)
 }
 EXPORT_SYMBOL_GPL(wm8350_read_auxadc);
 
+static irqreturn_t wm8350_auxadc_irq(int irq, void *irq_data)
+{
+	struct wm8350 *wm8350 = irq_data;
+
+	complete(&wm8350->auxadc_done);
+
+	return IRQ_HANDLED;
+}
+
 /*
  * Cache is always host endian.
  */
@@ -682,11 +691,22 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 	}
 
 	mutex_init(&wm8350->auxadc_mutex);
+	init_completion(&wm8350->auxadc_done);
 
 	ret = wm8350_irq_init(wm8350, irq, pdata);
 	if (ret < 0)
 		goto err;
 
+	if (wm8350->irq_base) {
+		ret = request_threaded_irq(wm8350->irq_base +
+					   WM8350_IRQ_AUXADC_DATARDY,
+					   NULL, wm8350_auxadc_irq, 0,
+					   "auxadc", wm8350);
+		if (ret < 0)
+			dev_warn(wm8350->dev,
+				 "Failed to request AUXADC IRQ: %d\n", ret);
+	}
+
 	if (pdata && pdata->init) {
 		ret = pdata->init(wm8350);
 		if (ret != 0) {
@@ -736,6 +756,9 @@ void wm8350_device_exit(struct wm8350 *wm8350)
 	platform_device_unregister(wm8350->gpio.pdev);
 	platform_device_unregister(wm8350->codec.pdev);
 
+	if (wm8350->irq_base)
+		free_irq(wm8350->irq_base + WM8350_IRQ_AUXADC_DATARDY, wm8350);
+
 	wm8350_irq_exit(wm8350);
 
 	kfree(wm8350->reg_cache);
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index fae08aa65413..98fcc977e82b 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -16,6 +16,7 @@
 #include <linux/kernel.h>
 #include <linux/mutex.h>
 #include <linux/interrupt.h>
+#include <linux/completion.h>
 
 #include <linux/mfd/wm8350/audio.h>
 #include <linux/mfd/wm8350/gpio.h>
@@ -621,6 +622,7 @@ struct wm8350 {
 	u16 *reg_cache;
 
 	struct mutex auxadc_mutex;
+	struct completion auxadc_done;
 
 	/* Interrupt handling */
 	struct mutex irq_lock;
-- 
cgit v1.2.3


From 473fe73650b9f92114edbedfbb616561c1a0026c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 23 Feb 2010 11:08:06 +0000
Subject: mfd: Use completion interrupt for WM831x AUXADC

Use the completion interrupt generated by the device rather than
polling for conversions to complete. As a backup we still check
the status of the AUXADC if we don't get a completion, mostly for
systems that don't have the WM831x interrupt infrastructure hooked
up.

Also reduce the timeout for completion of conversions to 5ms from
the previous 10ms, the lower timeout should be sufficient.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c       | 36 +++++++++++++++++++++++++++++-------
 include/linux/mfd/wm831x/core.h |  2 ++
 2 files changed, 31 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index c428d9f918fc..07101e9e1cba 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -321,7 +321,6 @@ EXPORT_SYMBOL_GPL(wm831x_set_bits);
  */
 int wm831x_auxadc_read(struct wm831x *wm831x, enum wm831x_auxadc input)
 {
-	int tries = 10;
 	int ret, src;
 
 	mutex_lock(&wm831x->auxadc_lock);
@@ -349,13 +348,14 @@ int wm831x_auxadc_read(struct wm831x *wm831x, enum wm831x_auxadc input)
 		goto disable;
 	}
 
-	do {
-		msleep(1);
+	/* Ignore the result to allow us to soldier on without IRQ hookup */
+	wait_for_completion_timeout(&wm831x->auxadc_done, msecs_to_jiffies(5));
 
-		ret = wm831x_reg_read(wm831x, WM831X_AUXADC_CONTROL);
-		if (ret < 0)
-			ret = WM831X_AUX_CVT_ENA;
-	} while ((ret & WM831X_AUX_CVT_ENA) && --tries);
+	ret = wm831x_reg_read(wm831x, WM831X_AUXADC_CONTROL);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "AUXADC status read failed: %d\n", ret);
+		goto disable;
+	}
 
 	if (ret & WM831X_AUX_CVT_ENA) {
 		dev_err(wm831x->dev, "Timed out reading AUXADC\n");
@@ -390,6 +390,15 @@ out:
 }
 EXPORT_SYMBOL_GPL(wm831x_auxadc_read);
 
+static irqreturn_t wm831x_auxadc_irq(int irq, void *irq_data)
+{
+	struct wm831x *wm831x = irq_data;
+
+	complete(&wm831x->auxadc_done);
+
+	return IRQ_HANDLED;
+}
+
 /**
  * wm831x_auxadc_read_uv: Read a voltage from the WM831x AUXADC
  *
@@ -1411,6 +1420,7 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 	mutex_init(&wm831x->io_lock);
 	mutex_init(&wm831x->key_lock);
 	mutex_init(&wm831x->auxadc_lock);
+	init_completion(&wm831x->auxadc_done);
 	dev_set_drvdata(wm831x->dev, wm831x);
 
 	ret = wm831x_reg_read(wm831x, WM831X_PARENT_ID);
@@ -1523,6 +1533,16 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 	if (ret != 0)
 		goto err;
 
+	if (wm831x->irq_base) {
+		ret = request_threaded_irq(wm831x->irq_base +
+					   WM831X_IRQ_AUXADC_DATA,
+					   NULL, wm831x_auxadc_irq, 0,
+					   "auxadc", wm831x);
+		if (ret < 0)
+			dev_err(wm831x->dev, "AUXADC IRQ request failed: %d\n",
+				ret);
+	}
+
 	/* The core device is up, instantiate the subdevices. */
 	switch (parent) {
 	case WM8310:
@@ -1593,6 +1613,8 @@ static void wm831x_device_exit(struct wm831x *wm831x)
 {
 	wm831x_otp_exit(wm831x);
 	mfd_remove_devices(wm831x->dev);
+	if (wm831x->irq_base)
+		free_irq(wm831x->irq_base + WM831X_IRQ_AUXADC_DATA, wm831x);
 	wm831x_irq_exit(wm831x);
 	kfree(wm831x);
 }
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index 53580b592bc9..5915f6e3d9ab 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -15,6 +15,7 @@
 #ifndef __MFD_WM831X_CORE_H__
 #define __MFD_WM831X_CORE_H__
 
+#include <linux/completion.h>
 #include <linux/interrupt.h>
 
 /*
@@ -261,6 +262,7 @@ struct wm831x {
 	int num_gpio;
 
 	struct mutex auxadc_lock;
+	struct completion auxadc_done;
 
 	/* The WM831x has a security key blocking access to certain
 	 * registers.  The mutex is taken by the accessors for locking
-- 
cgit v1.2.3


From 2c08583c6a6b4c5f5dea4cb0931eca82af7db6fe Mon Sep 17 00:00:00 2001
From: Peter Huewe <peterhuewe@gmx.de>
Date: Sat, 6 Mar 2010 14:36:38 +0100
Subject: mfd: Fix ucb1x00 build failure for collie_defconfig

This patch fixes a build failure[1], by adding the missing semaphore.h include

References:
[1] http://kisskb.ellerman.id.au/kisskb/buildresult/2234322/

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ucb1x00-core.c  | 1 +
 include/linux/mfd/ucb1x00.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
index 252b74188ec2..b281217334eb 100644
--- a/drivers/mfd/ucb1x00-core.c
+++ b/drivers/mfd/ucb1x00-core.c
@@ -27,6 +27,7 @@
 #include <linux/mutex.h>
 #include <linux/mfd/ucb1x00.h>
 #include <linux/gpio.h>
+#include <linux/semaphore.h>
 
 #include <mach/dma.h>
 #include <mach/hardware.h>
diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h
index aa9c3789bed4..4321f044d1e4 100644
--- a/include/linux/mfd/ucb1x00.h
+++ b/include/linux/mfd/ucb1x00.h
@@ -12,6 +12,7 @@
 
 #include <linux/mfd/mcp.h>
 #include <linux/gpio.h>
+#include <linux/semaphore.h>
 
 #define UCB_IO_DATA	0x00
 #define UCB_IO_DIR	0x01
-- 
cgit v1.2.3


From 0c9a2ac1f8a2e55b3382dfc27256878a58ea49e9 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Date: Sun, 7 Mar 2010 00:14:44 +0000
Subject: ipv6: Optmize translation between IPV6_PREFER_SRC_xxx and
 RT6_LOOKUP_F_xxx.

IPV6_PREFER_SRC_xxx definitions:
| #define IPV6_PREFER_SRC_TMP             0x0001
| #define IPV6_PREFER_SRC_PUBLIC          0x0002
| #define IPV6_PREFER_SRC_COA             0x0004

RT6_LOOKUP_F_xxx definitions:
| #define RT6_LOOKUP_F_SRCPREF_TMP        0x00000008
| #define RT6_LOOKUP_F_SRCPREF_PUBLIC     0x00000010
| #define RT6_LOOKUP_F_SRCPREF_COA        0x00000020

So, we can translate between these two groups by shift operation
instead of multiple 'if's.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h | 18 ++++++++++++++++++
 net/ipv6/fib6_rules.c   | 11 ++---------
 net/ipv6/route.c        | 11 ++---------
 3 files changed, 22 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 4a808de7c0f6..68f67836e146 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -37,6 +37,24 @@ struct route_info {
 #define RT6_LOOKUP_F_SRCPREF_PUBLIC	0x00000010
 #define RT6_LOOKUP_F_SRCPREF_COA	0x00000020
 
+/*
+ * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate
+ * between IPV6_ADDR_PREFERENCES socket option values
+ *	IPV6_PREFER_SRC_TMP    = 0x1
+ *	IPV6_PREFER_SRC_PUBLIC = 0x2
+ *	IPV6_PREFER_SRC_COA    = 0x4
+ * and above RT6_LOOKUP_F_SRCPREF_xxx flags.
+ */
+static inline int rt6_srcprefs2flags(unsigned int srcprefs)
+{
+	/* No need to bitmask because srcprefs have only 3 bits. */
+	return srcprefs << 3;
+}
+
+static inline unsigned int rt6_flags2srcprefs(int flags)
+{
+	return (flags >> 3) & 7;
+}
 
 extern void			ip6_route_input(struct sk_buff *skb);
 
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 551882b9dfd6..5e463c43fcc2 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -84,18 +84,11 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 		if ((rule->flags & FIB_RULE_FIND_SADDR) &&
 		    r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
 			struct in6_addr saddr;
-			unsigned int srcprefs = 0;
-
-			if (flags & RT6_LOOKUP_F_SRCPREF_TMP)
-				srcprefs |= IPV6_PREFER_SRC_TMP;
-			if (flags & RT6_LOOKUP_F_SRCPREF_PUBLIC)
-				srcprefs |= IPV6_PREFER_SRC_PUBLIC;
-			if (flags & RT6_LOOKUP_F_SRCPREF_COA)
-				srcprefs |= IPV6_PREFER_SRC_COA;
 
 			if (ipv6_dev_get_saddr(net,
 					       ip6_dst_idev(&rt->u.dst)->dev,
-					       &flp->fl6_dst, srcprefs,
+					       &flp->fl6_dst,
+					       rt6_flags2srcprefs(flags),
 					       &saddr))
 				goto again;
 			if (!ipv6_prefix_equal(&saddr, &r->src.addr,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b08879e97f22..52cd3eff31dc 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -819,15 +819,8 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
 
 	if (!ipv6_addr_any(&fl->fl6_src))
 		flags |= RT6_LOOKUP_F_HAS_SADDR;
-	else if (sk) {
-		unsigned int prefs = inet6_sk(sk)->srcprefs;
-		if (prefs & IPV6_PREFER_SRC_TMP)
-			flags |= RT6_LOOKUP_F_SRCPREF_TMP;
-		if (prefs & IPV6_PREFER_SRC_PUBLIC)
-			flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
-		if (prefs & IPV6_PREFER_SRC_COA)
-			flags |= RT6_LOOKUP_F_SRCPREF_COA;
-	}
+	else if (sk)
+		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 
 	return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
 }
-- 
cgit v1.2.3


From ecdf6ceb8cf4756bd4214bf9755755752b6015f5 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 29 Dec 2009 20:11:20 -0800
Subject: Driver core: add platform_create_bundle() helper

Many legacy-style module create singleton platform devices themselves,
along with corresponding platform driver. Instead of replicating error
handling code in all such drivers, provide a helper that allocates and
registers a single platform device and a driver and binds them together.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/platform.c         | 58 +++++++++++++++++++++++++++++++++++++++++
 include/linux/platform_device.h |  5 ++++
 2 files changed, 63 insertions(+)

(limited to 'include')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 58efaf2f1259..937d58021d1b 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -548,6 +548,64 @@ int __init_or_module platform_driver_probe(struct platform_driver *drv,
 }
 EXPORT_SYMBOL_GPL(platform_driver_probe);
 
+/**
+ * platform_create_bundle - register driver and create corresponding device
+ * @driver: platform driver structure
+ * @probe: the driver probe routine, probably from an __init section
+ * @res: set of resources that needs to be allocated for the device
+ * @n_res: number of resources
+ * @data: platform specific data for this platform device
+ * @size: size of platform specific data
+ *
+ * Use this in legacy-style modules that probe hardware directly and
+ * register a single platform device and corresponding platform driver.
+ */
+struct platform_device * __init_or_module platform_create_bundle(
+			struct platform_driver *driver,
+			int (*probe)(struct platform_device *),
+			struct resource *res, unsigned int n_res,
+			const void *data, size_t size)
+{
+	struct platform_device *pdev;
+	int error;
+
+	pdev = platform_device_alloc(driver->driver.name, -1);
+	if (!pdev) {
+		error = -ENOMEM;
+		goto err_out;
+	}
+
+	if (res) {
+		error = platform_device_add_resources(pdev, res, n_res);
+		if (error)
+			goto err_pdev_put;
+	}
+
+	if (data) {
+		error = platform_device_add_data(pdev, data, size);
+		if (error)
+			goto err_pdev_put;
+	}
+
+	error = platform_device_add(pdev);
+	if (error)
+		goto err_pdev_put;
+
+	error = platform_driver_probe(driver, probe);
+	if (error)
+		goto err_pdev_del;
+
+	return pdev;
+
+err_pdev_del:
+	platform_device_del(pdev);
+err_pdev_put:
+	platform_device_put(pdev);
+err_out:
+	return ERR_PTR(error);
+}
+EXPORT_SYMBOL_GPL(platform_create_bundle);
+
 /* modalias support enables more hands-off userspace setup:
  * (a) environment variable lets new-style hotplug events work once system is
  *     fully running:  "modprobe $MODALIAS"
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 71ff887ca44e..25e64b43e644 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -77,6 +77,11 @@ extern int platform_driver_probe(struct platform_driver *driver,
 #define platform_get_drvdata(_dev)	dev_get_drvdata(&(_dev)->dev)
 #define platform_set_drvdata(_dev,data)	dev_set_drvdata(&(_dev)->dev, (data))
 
+extern struct platform_device *platform_create_bundle(struct platform_driver *driver,
+					int (*probe)(struct platform_device *),
+					struct resource *res, unsigned int n_res,
+					const void *data, size_t size);
+
 /* early platform driver interface */
 struct early_platform_driver {
 	const char *class_str;
-- 
cgit v1.2.3


From 3d03ba4d1dd2246adff5a9ff1194a539b3bc05a7 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.y.miao@gmail.com>
Date: Fri, 1 Jan 2010 15:43:28 +0800
Subject: driver core: make platform_device_id table const

The platform ID table is normally const, force that by adding the attribute.

Signed-off-by: Eric Miao <eric.y.miao@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/platform_device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 25e64b43e644..2c2d035bfb92 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -21,7 +21,7 @@ struct platform_device {
 	u32		num_resources;
 	struct resource	* resource;
 
-	struct platform_device_id	*id_entry;
+	const struct platform_device_id	*id_entry;
 
 	/* arch specific additions */
 	struct pdev_archdata	archdata;
-- 
cgit v1.2.3


From c9be0a36f9bf392a7984473124a67a12964df11f Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 5 Jan 2010 12:47:58 +0100
Subject: sysdev: Pass attribute in sysdev_class attributes show/store

Passing the attribute to the low level IO functions allows all kinds
of cleanups, by sharing low level IO code without requiring
an own function for every piece of data.

Also drivers can extend the attributes with own data fields
and use that in the low level function.

Similar to sysdev_attributes and normal attributes.

This is a tree-wide sweep, converting everything in one go.

No functional changes in this patch other than passing the new
argument everywhere.

Tested on x86, the non x86 parts are uncompiled.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/mips/txx9/generic/7segled.c |  5 +++-
 arch/s390/kernel/smp.c           |  8 +++++--
 arch/s390/kernel/time.c          | 49 ++++++++++++++++++++++++++++++----------
 drivers/base/cpu.c               |  9 +++++---
 drivers/base/node.c              | 17 ++++++++++----
 drivers/base/sys.c               |  4 ++--
 drivers/cpuidle/sysfs.c          |  4 ++++
 include/linux/sysdev.h           |  6 +++--
 kernel/perf_event.c              | 13 ++++++++---
 kernel/sched.c                   |  4 ++++
 10 files changed, 89 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/arch/mips/txx9/generic/7segled.c b/arch/mips/txx9/generic/7segled.c
index 727ab21b6618..7f8416f86222 100644
--- a/arch/mips/txx9/generic/7segled.c
+++ b/arch/mips/txx9/generic/7segled.c
@@ -58,13 +58,16 @@ static ssize_t raw_store(struct sys_device *dev,
 static SYSDEV_ATTR(ascii, 0200, NULL, ascii_store);
 static SYSDEV_ATTR(raw, 0200, NULL, raw_store);
 
-static ssize_t map_seg7_show(struct sysdev_class *class, char *buf)
+static ssize_t map_seg7_show(struct sysdev_class *class,
+			     struct sysdev_class_attribute *attr,
+			     char *buf)
 {
 	memcpy(buf, &txx9_seg7map, sizeof(txx9_seg7map));
 	return sizeof(txx9_seg7map);
 }
 
 static ssize_t map_seg7_store(struct sysdev_class *class,
+			      struct sysdev_class_attribute *attr,
 			      const char *buf, size_t size)
 {
 	if (size != sizeof(txx9_seg7map))
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 8b10127c00ad..e2121099f03b 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -1020,7 +1020,9 @@ out:
 	return rc;
 }
 
-static ssize_t __ref rescan_store(struct sysdev_class *class, const char *buf,
+static ssize_t __ref rescan_store(struct sysdev_class *class,
+				  struct sysdev_class_attribute *attr,
+				  const char *buf,
 				  size_t count)
 {
 	int rc;
@@ -1041,7 +1043,9 @@ static ssize_t dispatching_show(struct sysdev_class *class, char *buf)
 	return count;
 }
 
-static ssize_t dispatching_store(struct sysdev_class *dev, const char *buf,
+static ssize_t dispatching_store(struct sysdev_class *dev,
+				 struct sysdev_class_attribute *attr,
+				 const char *buf,
 				 size_t count)
 {
 	int val, rc;
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index a8f93f1705ad..75894c281710 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -1116,14 +1116,18 @@ static struct sys_device etr_port1_dev = {
 /*
  * ETR class attributes
  */
-static ssize_t etr_stepping_port_show(struct sysdev_class *class, char *buf)
+static ssize_t etr_stepping_port_show(struct sysdev_class *class,
+					struct sysdev_class_attribute *attr,
+					char *buf)
 {
 	return sprintf(buf, "%i\n", etr_port0.esw.p);
 }
 
 static SYSDEV_CLASS_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL);
 
-static ssize_t etr_stepping_mode_show(struct sysdev_class *class, char *buf)
+static ssize_t etr_stepping_mode_show(struct sysdev_class *class,
+				      	struct sysdev_class_attribute *attr,
+					char *buf)
 {
 	char *mode_str;
 
@@ -1584,7 +1588,9 @@ static struct sysdev_class stp_sysclass = {
 	.name	= "stp",
 };
 
-static ssize_t stp_ctn_id_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_ctn_id_show(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
+				char *buf)
 {
 	if (!stp_online)
 		return -ENODATA;
@@ -1594,7 +1600,9 @@ static ssize_t stp_ctn_id_show(struct sysdev_class *class, char *buf)
 
 static SYSDEV_CLASS_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL);
 
-static ssize_t stp_ctn_type_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_ctn_type_show(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
+				char *buf)
 {
 	if (!stp_online)
 		return -ENODATA;
@@ -1603,7 +1611,9 @@ static ssize_t stp_ctn_type_show(struct sysdev_class *class, char *buf)
 
 static SYSDEV_CLASS_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL);
 
-static ssize_t stp_dst_offset_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_dst_offset_show(struct sysdev_class *class,
+				   struct sysdev_class_attribute *attr,
+				   char *buf)
 {
 	if (!stp_online || !(stp_info.vbits & 0x2000))
 		return -ENODATA;
@@ -1612,7 +1622,9 @@ static ssize_t stp_dst_offset_show(struct sysdev_class *class, char *buf)
 
 static SYSDEV_CLASS_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL);
 
-static ssize_t stp_leap_seconds_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_leap_seconds_show(struct sysdev_class *class,
+					struct sysdev_class_attribute *attr,
+					char *buf)
 {
 	if (!stp_online || !(stp_info.vbits & 0x8000))
 		return -ENODATA;
@@ -1621,7 +1633,9 @@ static ssize_t stp_leap_seconds_show(struct sysdev_class *class, char *buf)
 
 static SYSDEV_CLASS_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL);
 
-static ssize_t stp_stratum_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_stratum_show(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
+				char *buf)
 {
 	if (!stp_online)
 		return -ENODATA;
@@ -1630,7 +1644,9 @@ static ssize_t stp_stratum_show(struct sysdev_class *class, char *buf)
 
 static SYSDEV_CLASS_ATTR(stratum, 0400, stp_stratum_show, NULL);
 
-static ssize_t stp_time_offset_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_time_offset_show(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
+				char *buf)
 {
 	if (!stp_online || !(stp_info.vbits & 0x0800))
 		return -ENODATA;
@@ -1639,7 +1655,9 @@ static ssize_t stp_time_offset_show(struct sysdev_class *class, char *buf)
 
 static SYSDEV_CLASS_ATTR(time_offset, 0400, stp_time_offset_show, NULL);
 
-static ssize_t stp_time_zone_offset_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_time_zone_offset_show(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
+				char *buf)
 {
 	if (!stp_online || !(stp_info.vbits & 0x4000))
 		return -ENODATA;
@@ -1649,7 +1667,9 @@ static ssize_t stp_time_zone_offset_show(struct sysdev_class *class, char *buf)
 static SYSDEV_CLASS_ATTR(time_zone_offset, 0400,
 			 stp_time_zone_offset_show, NULL);
 
-static ssize_t stp_timing_mode_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_timing_mode_show(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
+				char *buf)
 {
 	if (!stp_online)
 		return -ENODATA;
@@ -1658,7 +1678,9 @@ static ssize_t stp_timing_mode_show(struct sysdev_class *class, char *buf)
 
 static SYSDEV_CLASS_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL);
 
-static ssize_t stp_timing_state_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_timing_state_show(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
+				char *buf)
 {
 	if (!stp_online)
 		return -ENODATA;
@@ -1667,12 +1689,15 @@ static ssize_t stp_timing_state_show(struct sysdev_class *class, char *buf)
 
 static SYSDEV_CLASS_ATTR(timing_state, 0400, stp_timing_state_show, NULL);
 
-static ssize_t stp_online_show(struct sysdev_class *class, char *buf)
+static ssize_t stp_online_show(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
+				char *buf)
 {
 	return sprintf(buf, "%i\n", stp_online);
 }
 
 static ssize_t stp_online_store(struct sysdev_class *class,
+				struct sysdev_class_attribute *attr,
 				const char *buf, size_t count)
 {
 	unsigned int value;
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 958bd1540c30..fd1b2f9b7b8f 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -151,7 +151,8 @@ static ssize_t print_cpus_map(char *buf, const struct cpumask *map)
 }
 
 #define	print_cpus_func(type) \
-static ssize_t print_cpus_##type(struct sysdev_class *class, char *buf)	\
+static ssize_t print_cpus_##type(struct sysdev_class *class, 		\
+	 		struct sysdev_class_attribute *attr, char *buf)	\
 {									\
 	return print_cpus_map(buf, cpu_##type##_mask);			\
 }									\
@@ -165,7 +166,8 @@ print_cpus_func(present);
 /*
  * Print values for NR_CPUS and offlined cpus
  */
-static ssize_t print_cpus_kernel_max(struct sysdev_class *class, char *buf)
+static ssize_t print_cpus_kernel_max(struct sysdev_class *class,
+				     struct sysdev_class_attribute *attr, char *buf)
 {
 	int n = snprintf(buf, PAGE_SIZE-2, "%d\n", NR_CPUS - 1);
 	return n;
@@ -175,7 +177,8 @@ static SYSDEV_CLASS_ATTR(kernel_max, 0444, print_cpus_kernel_max, NULL);
 /* arch-optional setting to enable display of offline cpus >= nr_cpu_ids */
 unsigned int total_cpus;
 
-static ssize_t print_cpus_offline(struct sysdev_class *class, char *buf)
+static ssize_t print_cpus_offline(struct sysdev_class *class,
+				  struct sysdev_class_attribute *attr, char *buf)
 {
 	int n = 0, len = PAGE_SIZE-2;
 	cpumask_var_t offline;
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 70122791683d..85c9d30d7004 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -544,23 +544,29 @@ static ssize_t print_nodes_state(enum node_states state, char *buf)
 	return n;
 }
 
-static ssize_t print_nodes_possible(struct sysdev_class *class, char *buf)
+static ssize_t print_nodes_possible(struct sysdev_class *class,
+				    struct sysdev_class_attribute *attr, char *buf)
 {
 	return print_nodes_state(N_POSSIBLE, buf);
 }
 
-static ssize_t print_nodes_online(struct sysdev_class *class, char *buf)
+static ssize_t print_nodes_online(struct sysdev_class *class,
+				  struct sysdev_class_attribute *attr,
+				  char *buf)
 {
 	return print_nodes_state(N_ONLINE, buf);
 }
 
 static ssize_t print_nodes_has_normal_memory(struct sysdev_class *class,
-						char *buf)
+					     struct sysdev_class_attribute *attr,
+					     char *buf)
 {
 	return print_nodes_state(N_NORMAL_MEMORY, buf);
 }
 
-static ssize_t print_nodes_has_cpu(struct sysdev_class *class, char *buf)
+static ssize_t print_nodes_has_cpu(struct sysdev_class *class,
+				   struct sysdev_class_attribute *attr,
+				   char *buf)
 {
 	return print_nodes_state(N_CPU, buf);
 }
@@ -573,7 +579,8 @@ static SYSDEV_CLASS_ATTR(has_cpu, 0444, print_nodes_has_cpu, NULL);
 
 #ifdef CONFIG_HIGHMEM
 static ssize_t print_nodes_has_high_memory(struct sysdev_class *class,
-						 char *buf)
+					   struct sysdev_class_attribute *attr,
+					   char *buf)
 {
 	return print_nodes_state(N_HIGH_MEMORY, buf);
 }
diff --git a/drivers/base/sys.c b/drivers/base/sys.c
index 0d903909af7e..a38445c0f8c5 100644
--- a/drivers/base/sys.c
+++ b/drivers/base/sys.c
@@ -89,7 +89,7 @@ static ssize_t sysdev_class_show(struct kobject *kobj, struct attribute *attr,
 	struct sysdev_class_attribute *class_attr = to_sysdev_class_attr(attr);
 
 	if (class_attr->show)
-		return class_attr->show(class, buffer);
+		return class_attr->show(class, class_attr, buffer);
 	return -EIO;
 }
 
@@ -100,7 +100,7 @@ static ssize_t sysdev_class_store(struct kobject *kobj, struct attribute *attr,
 	struct sysdev_class_attribute *class_attr = to_sysdev_class_attr(attr);
 
 	if (class_attr->store)
-		return class_attr->store(class, buffer, count);
+		return class_attr->store(class, class_attr, buffer, count);
 	return -EIO;
 }
 
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 97b003839fb6..c9cefacabf37 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -22,6 +22,7 @@ static int __init cpuidle_sysfs_setup(char *unused)
 __setup("cpuidle_sysfs_switch", cpuidle_sysfs_setup);
 
 static ssize_t show_available_governors(struct sysdev_class *class,
+					struct sysdev_class_attribute *attr,
 					char *buf)
 {
 	ssize_t i = 0;
@@ -41,6 +42,7 @@ out:
 }
 
 static ssize_t show_current_driver(struct sysdev_class *class,
+				   struct sysdev_class_attribute *attr,
 				   char *buf)
 {
 	ssize_t ret;
@@ -56,6 +58,7 @@ static ssize_t show_current_driver(struct sysdev_class *class,
 }
 
 static ssize_t show_current_governor(struct sysdev_class *class,
+				     struct sysdev_class_attribute *attr,
 				     char *buf)
 {
 	ssize_t ret;
@@ -71,6 +74,7 @@ static ssize_t show_current_governor(struct sysdev_class *class,
 }
 
 static ssize_t store_current_governor(struct sysdev_class *class,
+				      struct sysdev_class_attribute *attr,
 				      const char *buf, size_t count)
 {
 	char gov_name[CPUIDLE_NAME_LEN];
diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h
index f395bb3fa2f2..c2458fa8376c 100644
--- a/include/linux/sysdev.h
+++ b/include/linux/sysdev.h
@@ -41,8 +41,10 @@ struct sysdev_class {
 
 struct sysdev_class_attribute {
 	struct attribute attr;
-	ssize_t (*show)(struct sysdev_class *, char *);
-	ssize_t (*store)(struct sysdev_class *, const char *, size_t);
+	ssize_t (*show)(struct sysdev_class *, struct sysdev_class_attribute *,
+			char *);
+	ssize_t (*store)(struct sysdev_class *, struct sysdev_class_attribute *,
+			 const char *, size_t);
 };
 
 #define _SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) 		\
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 8e352c756ba7..f40560b86544 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -5481,13 +5481,16 @@ void __init perf_event_init(void)
 	register_cpu_notifier(&perf_cpu_nb);
 }
 
-static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf)
+static ssize_t perf_show_reserve_percpu(struct sysdev_class *class,
+					struct sysdev_class_attribute *attr,
+					char *buf)
 {
 	return sprintf(buf, "%d\n", perf_reserved_percpu);
 }
 
 static ssize_t
 perf_set_reserve_percpu(struct sysdev_class *class,
+			struct sysdev_class_attribute *attr,
 			const char *buf,
 			size_t count)
 {
@@ -5516,13 +5519,17 @@ perf_set_reserve_percpu(struct sysdev_class *class,
 	return count;
 }
 
-static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf)
+static ssize_t perf_show_overcommit(struct sysdev_class *class,
+				    struct sysdev_class_attribute *attr,
+				    char *buf)
 {
 	return sprintf(buf, "%d\n", perf_overcommit);
 }
 
 static ssize_t
-perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count)
+perf_set_overcommit(struct sysdev_class *class,
+		    struct sysdev_class_attribute *attr,
+		    const char *buf, size_t count)
 {
 	unsigned long val;
 	int err;
diff --git a/kernel/sched.c b/kernel/sched.c
index b47ceeec1a91..150b6988de49 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7406,11 +7406,13 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
 
 #ifdef CONFIG_SCHED_MC
 static ssize_t sched_mc_power_savings_show(struct sysdev_class *class,
+					   struct sysdev_class_attribute *attr,
 					   char *page)
 {
 	return sprintf(page, "%u\n", sched_mc_power_savings);
 }
 static ssize_t sched_mc_power_savings_store(struct sysdev_class *class,
+					    struct sysdev_class_attribute *attr,
 					    const char *buf, size_t count)
 {
 	return sched_power_savings_store(buf, count, 0);
@@ -7422,11 +7424,13 @@ static SYSDEV_CLASS_ATTR(sched_mc_power_savings, 0644,
 
 #ifdef CONFIG_SCHED_SMT
 static ssize_t sched_smt_power_savings_show(struct sysdev_class *dev,
+					    struct sysdev_class_attribute *attr,
 					    char *page)
 {
 	return sprintf(page, "%u\n", sched_smt_power_savings);
 }
 static ssize_t sched_smt_power_savings_store(struct sysdev_class *dev,
+					     struct sysdev_class_attribute *attr,
 					     const char *buf, size_t count)
 {
 	return sched_power_savings_store(buf, count, 1);
-- 
cgit v1.2.3


From 1c205ae18db53ff72985dd79f3baaf2dbaba6db7 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 5 Jan 2010 12:48:01 +0100
Subject: sysfs: Add sysfs_add/remove_files utility functions

Adding/Removing a whole array of attributes is very common. Add a standard
utility function to do this with a simple function call, instead of
requiring drivers to open code this.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/file.c       | 20 ++++++++++++++++++++
 include/linux/sysfs.h | 14 ++++++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index dc30d9e31683..50b725bcc3f3 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -542,6 +542,18 @@ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
 
 }
 
+int sysfs_create_files(struct kobject *kobj, const struct attribute **ptr)
+{
+	int err = 0;
+	int i;
+
+	for (i = 0; ptr[i] && !err; i++)
+		err = sysfs_create_file(kobj, ptr[i]);
+	if (err)
+		while (--i >= 0)
+			sysfs_remove_file(kobj, ptr[i]);
+	return err;
+}
 
 /**
  * sysfs_add_file_to_group - add an attribute file to a pre-existing group.
@@ -614,6 +626,12 @@ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
 	sysfs_hash_and_remove(kobj->sd, attr->name);
 }
 
+void sysfs_remove_files(struct kobject * kobj, const struct attribute **ptr)
+{
+	int i;
+	for (i = 0; ptr[i]; i++)
+		sysfs_remove_file(kobj, ptr[i]);
+}
 
 /**
  * sysfs_remove_file_from_group - remove an attribute file from a group.
@@ -732,3 +750,5 @@ EXPORT_SYMBOL_GPL(sysfs_schedule_callback);
 
 EXPORT_SYMBOL_GPL(sysfs_create_file);
 EXPORT_SYMBOL_GPL(sysfs_remove_file);
+EXPORT_SYMBOL_GPL(sysfs_remove_files);
+EXPORT_SYMBOL_GPL(sysfs_create_files);
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index cfa83083a2d4..3e8526582146 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -94,9 +94,12 @@ int __must_check sysfs_move_dir(struct kobject *kobj,
 
 int __must_check sysfs_create_file(struct kobject *kobj,
 				   const struct attribute *attr);
+int __must_check sysfs_create_files(struct kobject *kobj,
+				   const struct attribute **attr);
 int __must_check sysfs_chmod_file(struct kobject *kobj, struct attribute *attr,
 				  mode_t mode);
 void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr);
+void sysfs_remove_files(struct kobject *kobj, const struct attribute **attr);
 
 int __must_check sysfs_create_bin_file(struct kobject *kobj,
 				       const struct bin_attribute *attr);
@@ -164,6 +167,12 @@ static inline int sysfs_create_file(struct kobject *kobj,
 	return 0;
 }
 
+static inline int sysfs_create_files(struct kobject *kobj,
+				    const struct attribute **attr)
+{
+	return 0;
+}
+
 static inline int sysfs_chmod_file(struct kobject *kobj,
 				   struct attribute *attr, mode_t mode)
 {
@@ -175,6 +184,11 @@ static inline void sysfs_remove_file(struct kobject *kobj,
 {
 }
 
+static inline void sysfs_remove_files(struct kobject *kobj,
+				     const struct attribute **attr)
+{
+}
+
 static inline int sysfs_create_bin_file(struct kobject *kobj,
 					const struct bin_attribute *attr)
 {
-- 
cgit v1.2.3


From 38457ab3a0d36320370c715145ba6da514127194 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 5 Jan 2010 12:48:02 +0100
Subject: sysfs: Add attribute array to sysdev classes

Add a attribute array that is automatically registered and unregistered
to struct sysdev_class. This is similar to what struct class has.

A lot of drivers add list of attributes, so it's better to do
this easily in the common sysdev layer.

This adds a new field to struct sysdev_class. I audited the
whole tree and there are no dynamically allocated sysdev classes,
so this is fully compatible.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/sys.c     | 9 ++++++++-
 include/linux/sysdev.h | 2 ++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/base/sys.c b/drivers/base/sys.c
index a38445c0f8c5..747c99e0568b 100644
--- a/drivers/base/sys.c
+++ b/drivers/base/sys.c
@@ -145,13 +145,20 @@ int sysdev_class_register(struct sysdev_class *cls)
 	if (retval)
 		return retval;
 
-	return kset_register(&cls->kset);
+	retval = kset_register(&cls->kset);
+	if (!retval && cls->attrs)
+		retval = sysfs_create_files(&cls->kset.kobj,
+					    (const struct attribute **)cls->attrs);
+	return retval;
 }
 
 void sysdev_class_unregister(struct sysdev_class *cls)
 {
 	pr_debug("Unregistering sysdev class '%s'\n",
 		 kobject_name(&cls->kset.kobj));
+	if (cls->attrs)
+		sysfs_remove_files(&cls->kset.kobj,
+				   (const struct attribute **)cls->attrs);
 	kset_unregister(&cls->kset);
 }
 
diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h
index c2458fa8376c..b6244f9b533f 100644
--- a/include/linux/sysdev.h
+++ b/include/linux/sysdev.h
@@ -27,10 +27,12 @@
 
 
 struct sys_device;
+struct sysdev_class_attribute;
 
 struct sysdev_class {
 	const char *name;
 	struct list_head	drivers;
+	struct sysdev_class_attribute **attrs;
 
 	/* Default operations for these types of devices */
 	int	(*shutdown)(struct sys_device *);
-- 
cgit v1.2.3


From 1e395ab3d9b6aa09c5f0aa46a1b0a6fc5bd33133 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 5 Jan 2010 12:48:05 +0100
Subject: sysdev: Add sysdev_create/remove_files

Allow to create/remove arrays of sysdev attributes

Just wrappers around sysfs_create/move_files

Will be used later to clean up some drivers.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysdev.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h
index b6244f9b533f..1154c29f4101 100644
--- a/include/linux/sysdev.h
+++ b/include/linux/sysdev.h
@@ -123,6 +123,19 @@ struct sysdev_attribute {
 extern int sysdev_create_file(struct sys_device *, struct sysdev_attribute *);
 extern void sysdev_remove_file(struct sys_device *, struct sysdev_attribute *);
 
+/* Create/remove NULL terminated attribute list */
+static inline int
+sysdev_create_files(struct sys_device *d, struct sysdev_attribute **a)
+{
+	return sysfs_create_files(&d->kobj, (const struct attribute **)a);
+}
+
+static inline void
+sysdev_remove_files(struct sys_device *d, struct sysdev_attribute **a)
+{
+	return sysfs_remove_files(&d->kobj, (const struct attribute **)a);
+}
+
 struct sysdev_ext_attribute {
 	struct sysdev_attribute attr;
 	void *var;
-- 
cgit v1.2.3


From 28812fe11a21826ba4c97c6c7971a619987cd912 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 5 Jan 2010 12:48:07 +0100
Subject: driver-core: Add attribute argument to class_attribute show/store

Passing the attribute to the low level IO functions allows all kinds
of cleanups, by sharing low level IO code without requiring
an own function for every piece of data.

Also drivers can extend the attributes with own data fields
and use that in the low level function.

This makes the class attributes the same as sysdev_class attributes
and plain attributes.

This will allow further cleanups in drivers.

Full tree sweep converting all users.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/class.c                  |  4 ++--
 drivers/base/cpu.c                    |  8 ++++++--
 drivers/base/firmware_class.c         |  8 ++++++--
 drivers/base/memory.c                 | 11 ++++++++---
 drivers/block/osdblk.c                | 12 +++++++++---
 drivers/block/pktcdvd.c               | 12 +++++++++---
 drivers/gpio/gpiolib.c                |  8 ++++++--
 drivers/gpu/drm/drm_sysfs.c           |  3 ++-
 drivers/infiniband/core/ucm.c         |  4 +++-
 drivers/infiniband/core/user_mad.c    |  4 +++-
 drivers/infiniband/core/uverbs_main.c |  4 +++-
 drivers/misc/phantom.c                |  2 +-
 drivers/mtd/ubi/build.c               |  3 ++-
 drivers/net/bonding/bond_sysfs.c      |  5 ++++-
 drivers/staging/asus_oled/asus_oled.c |  4 +++-
 drivers/uwb/driver.c                  |  5 ++++-
 include/linux/device.h                |  6 ++++--
 net/bluetooth/l2cap.c                 |  4 +++-
 net/bluetooth/rfcomm/core.c           |  4 +++-
 net/bluetooth/rfcomm/sock.c           |  4 +++-
 net/bluetooth/sco.c                   |  4 +++-
 21 files changed, 87 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/base/class.c b/drivers/base/class.c
index 6e2c3b064f53..34a2de9c5385 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -31,7 +31,7 @@ static ssize_t class_attr_show(struct kobject *kobj, struct attribute *attr,
 	ssize_t ret = -EIO;
 
 	if (class_attr->show)
-		ret = class_attr->show(cp->class, buf);
+		ret = class_attr->show(cp->class, class_attr, buf);
 	return ret;
 }
 
@@ -43,7 +43,7 @@ static ssize_t class_attr_store(struct kobject *kobj, struct attribute *attr,
 	ssize_t ret = -EIO;
 
 	if (class_attr->store)
-		ret = class_attr->store(cp->class, buf, count);
+		ret = class_attr->store(cp->class, class_attr, buf, count);
 	return ret;
 }
 
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index fb456b729803..9121c77b77fa 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -79,13 +79,17 @@ void unregister_cpu(struct cpu *cpu)
 }
 
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
-static ssize_t cpu_probe_store(struct class *class, const char *buf,
+static ssize_t cpu_probe_store(struct class *class,
+				struct class_attribute *attr,
+				const char *buf,
 			       size_t count)
 {
 	return arch_cpu_probe(buf, count);
 }
 
-static ssize_t cpu_release_store(struct class *class, const char *buf,
+static ssize_t cpu_release_store(struct class *class,
+				struct class_attribute *attr,
+				const char *buf,
 				 size_t count)
 {
 	return arch_cpu_release(buf, count);
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index a95024166b66..6604fb33d072 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -69,7 +69,9 @@ fw_load_abort(struct firmware_priv *fw_priv)
 }
 
 static ssize_t
-firmware_timeout_show(struct class *class, char *buf)
+firmware_timeout_show(struct class *class,
+		      struct class_attribute *attr,
+		      char *buf)
 {
 	return sprintf(buf, "%d\n", loading_timeout);
 }
@@ -87,7 +89,9 @@ firmware_timeout_show(struct class *class, char *buf)
  *	Note: zero means 'wait forever'.
  **/
 static ssize_t
-firmware_timeout_store(struct class *class, const char *buf, size_t count)
+firmware_timeout_store(struct class *class,
+			struct class_attribute *attr,
+			const char *buf, size_t count)
 {
 	loading_timeout = simple_strtol(buf, NULL, 10);
 	if (loading_timeout < 0)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 563656ad75a1..495f15e92d4c 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -331,7 +331,8 @@ static int block_size_init(void)
  */
 #ifdef CONFIG_ARCH_MEMORY_PROBE
 static ssize_t
-memory_probe_store(struct class *class, const char *buf, size_t count)
+memory_probe_store(struct class *class, struct class_attribute *attr,
+		   const char *buf, size_t count)
 {
 	u64 phys_addr;
 	int nid;
@@ -368,7 +369,9 @@ static inline int memory_probe_init(void)
 
 /* Soft offline a page */
 static ssize_t
-store_soft_offline_page(struct class *class, const char *buf, size_t count)
+store_soft_offline_page(struct class *class,
+			struct class_attribute *attr,
+			const char *buf, size_t count)
 {
 	int ret;
 	u64 pfn;
@@ -385,7 +388,9 @@ store_soft_offline_page(struct class *class, const char *buf, size_t count)
 
 /* Forcibly offline a page, including killing processes. */
 static ssize_t
-store_hard_offline_page(struct class *class, const char *buf, size_t count)
+store_hard_offline_page(struct class *class,
+			struct class_attribute *attr,
+			const char *buf, size_t count)
 {
 	int ret;
 	u64 pfn;
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index a808b1530b3b..eb2091aa1c19 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -476,7 +476,9 @@ static void class_osdblk_release(struct class *cls)
 	kfree(cls);
 }
 
-static ssize_t class_osdblk_list(struct class *c, char *data)
+static ssize_t class_osdblk_list(struct class *c,
+				struct class_attribute *attr,
+				char *data)
 {
 	int n = 0;
 	struct list_head *tmp;
@@ -500,7 +502,9 @@ static ssize_t class_osdblk_list(struct class *c, char *data)
 	return n;
 }
 
-static ssize_t class_osdblk_add(struct class *c, const char *buf, size_t count)
+static ssize_t class_osdblk_add(struct class *c,
+				struct class_attribute *attr,
+				const char *buf, size_t count)
 {
 	struct osdblk_device *osdev;
 	ssize_t rc;
@@ -592,7 +596,9 @@ err_out_mod:
 	return rc;
 }
 
-static ssize_t class_osdblk_remove(struct class *c, const char *buf,
+static ssize_t class_osdblk_remove(struct class *c,
+					struct class_attribute *attr,
+					const char *buf,
 					size_t count)
 {
 	struct osdblk_device *osdev = NULL;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index b72935b8f203..73d815d3f1b2 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -337,7 +337,9 @@ static void class_pktcdvd_release(struct class *cls)
 {
 	kfree(cls);
 }
-static ssize_t class_pktcdvd_show_map(struct class *c, char *data)
+static ssize_t class_pktcdvd_show_map(struct class *c,
+					struct class_attribute *attr,
+					char *data)
 {
 	int n = 0;
 	int idx;
@@ -356,7 +358,9 @@ static ssize_t class_pktcdvd_show_map(struct class *c, char *data)
 	return n;
 }
 
-static ssize_t class_pktcdvd_store_add(struct class *c, const char *buf,
+static ssize_t class_pktcdvd_store_add(struct class *c,
+					struct class_attribute *attr,
+					const char *buf,
 					size_t count)
 {
 	unsigned int major, minor;
@@ -376,7 +380,9 @@ static ssize_t class_pktcdvd_store_add(struct class *c, const char *buf,
 	return -EINVAL;
 }
 
-static ssize_t class_pktcdvd_store_remove(struct class *c, const char *buf,
+static ssize_t class_pktcdvd_store_remove(struct class *c,
+					  struct class_attribute *attr,
+					  const char *buf,
 					size_t count)
 {
 	unsigned int major, minor;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 9006fdb26fea..6d1b86661e63 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -623,7 +623,9 @@ static const struct attribute_group gpiochip_attr_group = {
  * /sys/class/gpio/unexport ... write-only
  *	integer N ... number of GPIO to unexport
  */
-static ssize_t export_store(struct class *class, const char *buf, size_t len)
+static ssize_t export_store(struct class *class,
+				struct class_attribute *attr,
+				const char *buf, size_t len)
 {
 	long	gpio;
 	int	status;
@@ -653,7 +655,9 @@ done:
 	return status ? : len;
 }
 
-static ssize_t unexport_store(struct class *class, const char *buf, size_t len)
+static ssize_t unexport_store(struct class *class,
+				struct class_attribute *attr,
+				const char *buf, size_t len)
 {
 	long	gpio;
 	int	status;
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 7e42b7e9d43a..b95aaf23596e 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -71,7 +71,8 @@ static int drm_class_resume(struct device *dev)
 }
 
 /* Display the version of drm_core. This doesn't work right in current design */
-static ssize_t version_show(struct class *dev, char *buf)
+static ssize_t version_show(struct class *dev, struct class_attribute *attr,
+				char *buf)
 {
 	return sprintf(buf, "%s %d.%d.%d %s\n", CORE_NAME, CORE_MAJOR,
 		       CORE_MINOR, CORE_PATCHLEVEL, CORE_DATE);
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 1b09b735c5a8..02e209ff33fd 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1336,7 +1336,9 @@ static void ib_ucm_remove_one(struct ib_device *device)
 	device_unregister(&ucm_dev->dev);
 }
 
-static ssize_t show_abi_version(struct class *class, char *buf)
+static ssize_t show_abi_version(struct class *class,
+				struct class_attribute *attr,
+				char *buf)
 {
 	return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION);
 }
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 02d360cfc2f7..d0de8f265f45 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -965,7 +965,9 @@ static ssize_t show_port(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
 
-static ssize_t show_abi_version(struct class *class, char *buf)
+static ssize_t show_abi_version(struct class *class,
+				struct class_attribute *attr,
+				char *buf)
 {
 	return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
 }
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 4fa2e6516441..60879399207a 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -691,7 +691,9 @@ static ssize_t show_dev_abi_version(struct device *device,
 }
 static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
 
-static ssize_t show_abi_version(struct class *class, char *buf)
+static ssize_t show_abi_version(struct class *class,
+				struct class_attribute *attr,
+				char *buf)
 {
 	return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION);
 }
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
index 04c27266f567..d30ae9560309 100644
--- a/drivers/misc/phantom.c
+++ b/drivers/misc/phantom.c
@@ -497,7 +497,7 @@ static struct pci_driver phantom_pci_driver = {
 	.resume = phantom_resume
 };
 
-static ssize_t phantom_show_version(struct class *cls, char *buf)
+static ssize_t phantom_show_version(struct class *cls, struct class_attribute *attr, char *buf)
 {
 	return sprintf(buf, PHANTOM_VERSION "\n");
 }
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index bc45ef9af17d..fad40aa6f099 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -89,7 +89,8 @@ DEFINE_MUTEX(ubi_devices_mutex);
 static DEFINE_SPINLOCK(ubi_devices_lock);
 
 /* "Show" method for files in '/<sysfs>/class/ubi/' */
-static ssize_t ubi_version_show(struct class *class, char *buf)
+static ssize_t ubi_version_show(struct class *class, struct class_attribute *attr,
+				char *buf)
 {
 	return sprintf(buf, "%d\n", UBI_VERSION);
 }
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 5acd557cea9b..b8bec086daa1 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -51,7 +51,9 @@
  * "show" function for the bond_masters attribute.
  * The class parameter is ignored.
  */
-static ssize_t bonding_show_bonds(struct class *cls, char *buf)
+static ssize_t bonding_show_bonds(struct class *cls,
+				  struct class_attribute *attr,
+				  char *buf)
 {
 	struct net *net = current->nsproxy->net_ns;
 	struct bond_net *bn = net_generic(net, bond_net_id);
@@ -98,6 +100,7 @@ static struct net_device *bond_get_by_name(struct net *net, const char *ifname)
  */
 
 static ssize_t bonding_store_bonds(struct class *cls,
+				   struct class_attribute *attr,
 				   const char *buffer, size_t count)
 {
 	struct net *net = current->nsproxy->net_ns;
diff --git a/drivers/staging/asus_oled/asus_oled.c b/drivers/staging/asus_oled/asus_oled.c
index cadb6f7321ad..7d93f50a0a64 100644
--- a/drivers/staging/asus_oled/asus_oled.c
+++ b/drivers/staging/asus_oled/asus_oled.c
@@ -770,7 +770,9 @@ static struct usb_driver oled_driver = {
 	.id_table =	id_table,
 };
 
-static ssize_t version_show(struct class *dev, char *buf)
+static ssize_t version_show(struct class *dev,
+			    struct class_attribute *attr,
+			    char *buf)
 {
 	return sprintf(buf, ASUS_OLED_UNDERSCORE_NAME " %s\n",
 		       ASUS_OLED_VERSION);
diff --git a/drivers/uwb/driver.c b/drivers/uwb/driver.c
index da77e41de990..08bd6dbfd4a6 100644
--- a/drivers/uwb/driver.c
+++ b/drivers/uwb/driver.c
@@ -74,13 +74,16 @@
 unsigned long beacon_timeout_ms = 500;
 
 static
-ssize_t beacon_timeout_ms_show(struct class *class, char *buf)
+ssize_t beacon_timeout_ms_show(struct class *class,
+				struct class_attribute *attr,
+				char *buf)
 {
 	return scnprintf(buf, PAGE_SIZE, "%lu\n", beacon_timeout_ms);
 }
 
 static
 ssize_t beacon_timeout_ms_store(struct class *class,
+				struct class_attribute *attr,
 				const char *buf, size_t size)
 {
 	unsigned long bt;
diff --git a/include/linux/device.h b/include/linux/device.h
index b30527db3ac0..190f8d30d1d3 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -251,8 +251,10 @@ extern struct device *class_find_device(struct class *class,
 
 struct class_attribute {
 	struct attribute attr;
-	ssize_t (*show)(struct class *class, char *buf);
-	ssize_t (*store)(struct class *class, const char *buf, size_t count);
+	ssize_t (*show)(struct class *class, struct class_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct class *class, struct class_attribute *attr,
+			const char *buf, size_t count);
 };
 
 #define CLASS_ATTR(_name, _mode, _show, _store)			\
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 400efa26ddba..4db7ae2fe07d 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3937,7 +3937,9 @@ drop:
 	return 0;
 }
 
-static ssize_t l2cap_sysfs_show(struct class *dev, char *buf)
+static ssize_t l2cap_sysfs_show(struct class *dev,
+				struct class_attribute *attr,
+				char *buf)
 {
 	struct sock *sk;
 	struct hlist_node *node;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 89f4a59eb82b..db8a68e1a5ba 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -2098,7 +2098,9 @@ static struct hci_cb rfcomm_cb = {
 	.security_cfm	= rfcomm_security_cfm
 };
 
-static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, char *buf)
+static ssize_t rfcomm_dlc_sysfs_show(struct class *dev,
+				     struct class_attribute *attr,
+				     char *buf)
 {
 	struct rfcomm_session *s;
 	struct list_head *pp, *p;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 4b5968dda673..ca87d6ac6a20 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -1061,7 +1061,9 @@ done:
 	return result;
 }
 
-static ssize_t rfcomm_sock_sysfs_show(struct class *dev, char *buf)
+static ssize_t rfcomm_sock_sysfs_show(struct class *dev,
+				      struct class_attribute *attr,
+				      char *buf)
 {
 	struct sock *sk;
 	struct hlist_node *node;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index dd8f6ec57dce..f93b939539bc 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -953,7 +953,9 @@ drop:
 	return 0;
 }
 
-static ssize_t sco_sysfs_show(struct class *dev, char *buf)
+static ssize_t sco_sysfs_show(struct class *dev,
+				struct class_attribute *attr,
+				char *buf)
 {
 	struct sock *sk;
 	struct hlist_node *node;
-- 
cgit v1.2.3


From 869dfc875e32fd832385fd52ce54525a10401ed6 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 5 Jan 2010 12:48:08 +0100
Subject: driver core: Add class_attr_string for simple read-only string

Several drivers just export a static string as class attributes.

Use the new extensible attribute support to define a simple
CLASS_ATTR_STRING() macro for this.

This will allow to remove code from drivers in followon patches.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/class.c   | 10 ++++++++++
 include/linux/device.h | 17 +++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/drivers/base/class.c b/drivers/base/class.c
index 34a2de9c5385..2e297cc4cd3d 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -490,6 +490,16 @@ void class_interface_unregister(struct class_interface *class_intf)
 	class_put(parent);
 }
 
+ssize_t show_class_attr_string(struct class *class, struct class_attribute *attr,
+                        	char *buf)
+{
+	struct class_attribute_string *cs;
+	cs = container_of(attr, struct class_attribute_string, attr);
+	return snprintf(buf, PAGE_SIZE, "%s\n", cs->str);
+}
+
+EXPORT_SYMBOL_GPL(show_class_attr_string);
+
 struct class_compat {
 	struct kobject *kobj;
 };
diff --git a/include/linux/device.h b/include/linux/device.h
index 190f8d30d1d3..f95d5bfe8248 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -265,6 +265,23 @@ extern int __must_check class_create_file(struct class *class,
 extern void class_remove_file(struct class *class,
 			      const struct class_attribute *attr);
 
+/* Simple class attribute that is just a static string */
+
+struct class_attribute_string {
+	struct class_attribute attr;
+	char *str;
+};
+
+/* Currently read-only only */
+#define _CLASS_ATTR_STRING(_name, _mode, _str) \
+	{ __ATTR(_name, _mode, show_class_attr_string, NULL), _str }
+#define CLASS_ATTR_STRING(_name, _mode, _str) \
+	struct class_attribute_string class_attr_##_name = \
+		_CLASS_ATTR_STRING(_name, _mode, _str)
+
+extern ssize_t show_class_attr_string(struct class *class, struct class_attribute *attr,
+                        char *buf);
+
 struct class_interface {
 	struct list_head	node;
 	struct class		*class;
-- 
cgit v1.2.3


From 9cd43611ccfb46632bfa7d19f688924ea93f1613 Mon Sep 17 00:00:00 2001
From: Emese Revfy <re.emese@gmail.com>
Date: Thu, 31 Dec 2009 14:52:51 +0100
Subject: kobject: Constify struct kset_uevent_ops

Constify struct kset_uevent_ops.

This is part of the ops structure constification
effort started by Arjan van de Ven et al.

Benefits of this constification:

 * prevents modification of data that is shared
   (referenced) by many other structure instances
   at runtime

 * detects/prevents accidental (but not intentional)
   modification attempts on archs that enforce
   read-only kernel data at runtime

 * potentially better optimized code as the compiler
   can assume that the const data cannot be changed

 * the compiler/linker move const data into .rodata
   and therefore exclude them from false sharing

Signed-off-by: Emese Revfy <re.emese@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/bus.c      |  2 +-
 drivers/base/core.c     |  2 +-
 drivers/base/memory.c   |  2 +-
 fs/gfs2/sys.c           |  2 +-
 include/linux/kobject.h | 10 +++++-----
 kernel/params.c         |  2 +-
 lib/kobject.c           |  4 ++--
 lib/kobject_uevent.c    |  2 +-
 mm/slub.c               |  2 +-
 9 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index c0c5a43d9fb3..2afe599eb35d 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -154,7 +154,7 @@ static int bus_uevent_filter(struct kset *kset, struct kobject *kobj)
 	return 0;
 }
 
-static struct kset_uevent_ops bus_uevent_ops = {
+static const struct kset_uevent_ops bus_uevent_ops = {
 	.filter = bus_uevent_filter,
 };
 
diff --git a/drivers/base/core.c b/drivers/base/core.c
index f6c73a9e3d95..58ec1069f4b0 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -252,7 +252,7 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj,
 	return retval;
 }
 
-static struct kset_uevent_ops device_uevent_ops = {
+static const struct kset_uevent_ops device_uevent_ops = {
 	.filter =	dev_uevent_filter,
 	.name =		dev_uevent_name,
 	.uevent =	dev_uevent,
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 495f15e92d4c..2f8691511190 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -44,7 +44,7 @@ static int memory_uevent(struct kset *kset, struct kobject *obj, struct kobj_uev
 	return retval;
 }
 
-static struct kset_uevent_ops memory_uevent_ops = {
+static const struct kset_uevent_ops memory_uevent_ops = {
 	.name		= memory_uevent_name,
 	.uevent		= memory_uevent,
 };
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index b5f1a46133c8..543503010ed0 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -574,7 +574,7 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
 	return 0;
 }
 
-static struct kset_uevent_ops gfs2_uevent_ops = {
+static const struct kset_uevent_ops gfs2_uevent_ops = {
 	.uevent = gfs2_uevent,
 };
 
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 58ae8e00fcdd..57a1eaae9096 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -118,9 +118,9 @@ struct kobj_uevent_env {
 };
 
 struct kset_uevent_ops {
-	int (*filter)(struct kset *kset, struct kobject *kobj);
-	const char *(*name)(struct kset *kset, struct kobject *kobj);
-	int (*uevent)(struct kset *kset, struct kobject *kobj,
+	int (* const filter)(struct kset *kset, struct kobject *kobj);
+	const char *(* const name)(struct kset *kset, struct kobject *kobj);
+	int (* const uevent)(struct kset *kset, struct kobject *kobj,
 		      struct kobj_uevent_env *env);
 };
 
@@ -155,14 +155,14 @@ struct kset {
 	struct list_head list;
 	spinlock_t list_lock;
 	struct kobject kobj;
-	struct kset_uevent_ops *uevent_ops;
+	const struct kset_uevent_ops *uevent_ops;
 };
 
 extern void kset_init(struct kset *kset);
 extern int __must_check kset_register(struct kset *kset);
 extern void kset_unregister(struct kset *kset);
 extern struct kset * __must_check kset_create_and_add(const char *name,
-						struct kset_uevent_ops *u,
+						const struct kset_uevent_ops *u,
 						struct kobject *parent_kobj);
 
 static inline struct kset *to_kset(struct kobject *kobj)
diff --git a/kernel/params.c b/kernel/params.c
index 8d95f5451b22..48370be3c0a1 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -736,7 +736,7 @@ static int uevent_filter(struct kset *kset, struct kobject *kobj)
 	return 0;
 }
 
-static struct kset_uevent_ops module_uevent_ops = {
+static const struct kset_uevent_ops module_uevent_ops = {
 	.filter = uevent_filter,
 };
 
diff --git a/lib/kobject.c b/lib/kobject.c
index b512b746d2af..cecf5a0ef6e1 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -789,7 +789,7 @@ static struct kobj_type kset_ktype = {
  * If the kset was not able to be created, NULL will be returned.
  */
 static struct kset *kset_create(const char *name,
-				struct kset_uevent_ops *uevent_ops,
+				const struct kset_uevent_ops *uevent_ops,
 				struct kobject *parent_kobj)
 {
 	struct kset *kset;
@@ -832,7 +832,7 @@ static struct kset *kset_create(const char *name,
  * If the kset was not able to be created, NULL will be returned.
  */
 struct kset *kset_create_and_add(const char *name,
-				 struct kset_uevent_ops *uevent_ops,
+				 const struct kset_uevent_ops *uevent_ops,
 				 struct kobject *parent_kobj)
 {
 	struct kset *kset;
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 920a3ca6e259..c9d3a3e8405d 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -95,7 +95,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 	const char *subsystem;
 	struct kobject *top_kobj;
 	struct kset *kset;
-	struct kset_uevent_ops *uevent_ops;
+	const struct kset_uevent_ops *uevent_ops;
 	u64 seq;
 	int i = 0;
 	int retval = 0;
diff --git a/mm/slub.c b/mm/slub.c
index 0bfd3863d521..a26753c12dcd 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4409,7 +4409,7 @@ static int uevent_filter(struct kset *kset, struct kobject *kobj)
 	return 0;
 }
 
-static struct kset_uevent_ops slab_uevent_ops = {
+static const struct kset_uevent_ops slab_uevent_ops = {
 	.filter = uevent_filter,
 };
 
-- 
cgit v1.2.3


From 52cf25d0ab7f78eeecc59ac652ed5090f69b619e Mon Sep 17 00:00:00 2001
From: Emese Revfy <re.emese@gmail.com>
Date: Tue, 19 Jan 2010 02:58:23 +0100
Subject: Driver core: Constify struct sysfs_ops in struct kobj_type

Constify struct sysfs_ops.

This is part of the ops structure constification
effort started by Arjan van de Ven et al.

Benefits of this constification:

 * prevents modification of data that is shared
   (referenced) by many other structure instances
   at runtime

 * detects/prevents accidental (but not intentional)
   modification attempts on archs that enforce
   read-only kernel data at runtime

 * potentially better optimized code as the compiler
   can assume that the const data cannot be changed

 * the compiler/linker move const data into .rodata
   and therefore exclude them from false sharing

Signed-off-by: Emese Revfy <re.emese@gmail.com>
Acked-by: David Teigland <teigland@redhat.com>
Acked-by: Matt Domsch <Matt_Domsch@dell.com>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Acked-by: Hans J. Koch <hjk@linutronix.de>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Jens Axboe <jens.axboe@oracle.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/kobject.txt             | 2 +-
 arch/ia64/kernel/topology.c           | 2 +-
 arch/powerpc/kernel/cacheinfo.c       | 2 +-
 arch/sh/kernel/cpu/sh4/sq.c           | 2 +-
 arch/x86/kernel/cpu/intel_cacheinfo.c | 2 +-
 arch/x86/kernel/cpu/mcheck/mce_amd.c  | 2 +-
 block/blk-integrity.c                 | 2 +-
 block/blk-sysfs.c                     | 2 +-
 block/elevator.c                      | 2 +-
 drivers/base/bus.c                    | 4 ++--
 drivers/base/class.c                  | 2 +-
 drivers/base/core.c                   | 2 +-
 drivers/base/sys.c                    | 4 ++--
 drivers/block/pktcdvd.c               | 2 +-
 drivers/cpufreq/cpufreq.c             | 2 +-
 drivers/cpuidle/sysfs.c               | 4 ++--
 drivers/dma/ioat/dma.c                | 2 +-
 drivers/dma/ioat/dma.h                | 2 +-
 drivers/edac/edac_device_sysfs.c      | 6 +++---
 drivers/edac/edac_mc_sysfs.c          | 4 ++--
 drivers/edac/edac_pci_sysfs.c         | 4 ++--
 drivers/firmware/edd.c                | 2 +-
 drivers/firmware/efivars.c            | 2 +-
 drivers/firmware/iscsi_ibft.c         | 2 +-
 drivers/firmware/memmap.c             | 2 +-
 drivers/gpu/drm/ttm/ttm_bo.c          | 2 +-
 drivers/gpu/drm/ttm/ttm_memory.c      | 2 +-
 drivers/infiniband/core/cm.c          | 2 +-
 drivers/infiniband/core/sysfs.c       | 2 +-
 drivers/md/dm-sysfs.c                 | 2 +-
 drivers/md/md.c                       | 4 ++--
 drivers/net/ibmveth.c                 | 2 +-
 drivers/net/iseries_veth.c            | 4 ++--
 drivers/parisc/pdc_stable.c           | 2 +-
 drivers/pci/hotplug/fakephp.c         | 2 +-
 drivers/pci/slot.c                    | 2 +-
 drivers/uio/uio.c                     | 4 ++--
 drivers/uwb/wlp/sysfs.c               | 3 +--
 drivers/video/omap2/dss/manager.c     | 2 +-
 drivers/video/omap2/dss/overlay.c     | 2 +-
 drivers/xen/sys-hypervisor.c          | 2 +-
 fs/btrfs/sysfs.c                      | 4 ++--
 fs/dlm/lockspace.c                    | 2 +-
 fs/ext4/super.c                       | 2 +-
 fs/gfs2/sys.c                         | 2 +-
 fs/ocfs2/cluster/masklog.c            | 2 +-
 fs/sysfs/file.c                       | 8 ++++----
 include/linux/kobject.h               | 4 ++--
 kernel/params.c                       | 2 +-
 lib/kobject.c                         | 2 +-
 mm/slub.c                             | 2 +-
 net/bridge/br_private.h               | 2 +-
 net/bridge/br_sysfs_if.c              | 2 +-
 samples/kobject/kset-example.c        | 2 +-
 54 files changed, 69 insertions(+), 70 deletions(-)

(limited to 'include')

diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index c79ab996dada..bdb13817e1e9 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -266,7 +266,7 @@ kobj_type:
 
     struct kobj_type {
 	    void (*release)(struct kobject *);
-	    struct sysfs_ops	*sysfs_ops;
+	    const struct sysfs_ops *sysfs_ops;
 	    struct attribute	**default_attrs;
     };
 
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 8f060352e129..b3a5818088d9 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -282,7 +282,7 @@ static ssize_t cache_show(struct kobject * kobj, struct attribute * attr, char *
 	return ret;
 }
 
-static struct sysfs_ops cache_sysfs_ops = {
+static const struct sysfs_ops cache_sysfs_ops = {
 	.show   = cache_show
 };
 
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index bb37b1d19a58..01fe9ce28379 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -642,7 +642,7 @@ static struct kobj_attribute *cache_index_opt_attrs[] = {
 	&cache_assoc_attr,
 };
 
-static struct sysfs_ops cache_index_ops = {
+static const struct sysfs_ops cache_index_ops = {
 	.show = cache_index_show,
 };
 
diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c
index fc065f9da6e5..14726eef1ce0 100644
--- a/arch/sh/kernel/cpu/sh4/sq.c
+++ b/arch/sh/kernel/cpu/sh4/sq.c
@@ -326,7 +326,7 @@ static struct attribute *sq_sysfs_attrs[] = {
 	NULL,
 };
 
-static struct sysfs_ops sq_sysfs_ops = {
+static const struct sysfs_ops sq_sysfs_ops = {
 	.show	= sq_sysfs_show,
 	.store	= sq_sysfs_store,
 };
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index eddb1bdd1b8f..b3eeb66c0a51 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -903,7 +903,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static struct sysfs_ops sysfs_ops = {
+static const struct sysfs_ops sysfs_ops = {
 	.show   = show,
 	.store  = store,
 };
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 83a3d1f4efca..cda932ca3ade 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -388,7 +388,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static struct sysfs_ops threshold_ops = {
+static const struct sysfs_ops threshold_ops = {
 	.show			= show,
 	.store			= store,
 };
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 15c630813b1c..96e83c2bdb94 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -278,7 +278,7 @@ static struct attribute *integrity_attrs[] = {
 	NULL,
 };
 
-static struct sysfs_ops integrity_ops = {
+static const struct sysfs_ops integrity_ops = {
 	.show	= &integrity_attr_show,
 	.store	= &integrity_attr_store,
 };
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e85442415db3..2ae2cb3f362f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -450,7 +450,7 @@ static void blk_release_queue(struct kobject *kobj)
 	kmem_cache_free(blk_requestq_cachep, q);
 }
 
-static struct sysfs_ops queue_sysfs_ops = {
+static const struct sysfs_ops queue_sysfs_ops = {
 	.show	= queue_attr_show,
 	.store	= queue_attr_store,
 };
diff --git a/block/elevator.c b/block/elevator.c
index ee3a883840f2..df75676f6671 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -892,7 +892,7 @@ elv_attr_store(struct kobject *kobj, struct attribute *attr,
 	return error;
 }
 
-static struct sysfs_ops elv_sysfs_ops = {
+static const struct sysfs_ops elv_sysfs_ops = {
 	.show	= elv_attr_show,
 	.store	= elv_attr_store,
 };
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 2afe599eb35d..cca1aa10054c 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -70,7 +70,7 @@ static ssize_t drv_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static struct sysfs_ops driver_sysfs_ops = {
+static const struct sysfs_ops driver_sysfs_ops = {
 	.show	= drv_attr_show,
 	.store	= drv_attr_store,
 };
@@ -115,7 +115,7 @@ static ssize_t bus_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static struct sysfs_ops bus_sysfs_ops = {
+static const struct sysfs_ops bus_sysfs_ops = {
 	.show	= bus_attr_show,
 	.store	= bus_attr_store,
 };
diff --git a/drivers/base/class.c b/drivers/base/class.c
index 2e297cc4cd3d..0147f476b8a9 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -63,7 +63,7 @@ static void class_release(struct kobject *kobj)
 	kfree(cp);
 }
 
-static struct sysfs_ops class_sysfs_ops = {
+static const struct sysfs_ops class_sysfs_ops = {
 	.show	= class_attr_show,
 	.store	= class_attr_store,
 };
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 58ec1069f4b0..b0d6646a2814 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -100,7 +100,7 @@ static ssize_t dev_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static struct sysfs_ops dev_sysfs_ops = {
+static const struct sysfs_ops dev_sysfs_ops = {
 	.show	= dev_attr_show,
 	.store	= dev_attr_store,
 };
diff --git a/drivers/base/sys.c b/drivers/base/sys.c
index 747c99e0568b..8980feec5d14 100644
--- a/drivers/base/sys.c
+++ b/drivers/base/sys.c
@@ -54,7 +54,7 @@ sysdev_store(struct kobject *kobj, struct attribute *attr,
 	return -EIO;
 }
 
-static struct sysfs_ops sysfs_ops = {
+static const struct sysfs_ops sysfs_ops = {
 	.show	= sysdev_show,
 	.store	= sysdev_store,
 };
@@ -104,7 +104,7 @@ static ssize_t sysdev_class_store(struct kobject *kobj, struct attribute *attr,
 	return -EIO;
 }
 
-static struct sysfs_ops sysfs_class_ops = {
+static const struct sysfs_ops sysfs_class_ops = {
 	.show	= sysdev_class_show,
 	.store	= sysdev_class_store,
 };
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 73d815d3f1b2..39c8514442eb 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -284,7 +284,7 @@ static ssize_t kobj_pkt_store(struct kobject *kobj,
 	return len;
 }
 
-static struct sysfs_ops kobj_pkt_ops = {
+static const struct sysfs_ops kobj_pkt_ops = {
 	.show = kobj_pkt_show,
 	.store = kobj_pkt_store
 };
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 67bc2ece7b4b..2d5d575e889d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -766,7 +766,7 @@ static void cpufreq_sysfs_release(struct kobject *kobj)
 	complete(&policy->kobj_unregister);
 }
 
-static struct sysfs_ops sysfs_ops = {
+static const struct sysfs_ops sysfs_ops = {
 	.show	= show,
 	.store	= store,
 };
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index c9cefacabf37..8719b36e1a4d 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -195,7 +195,7 @@ static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr,
 	return ret;
 }
 
-static struct sysfs_ops cpuidle_sysfs_ops = {
+static const struct sysfs_ops cpuidle_sysfs_ops = {
 	.show = cpuidle_show,
 	.store = cpuidle_store,
 };
@@ -281,7 +281,7 @@ static ssize_t cpuidle_state_show(struct kobject * kobj,
 	return ret;
 }
 
-static struct sysfs_ops cpuidle_state_sysfs_ops = {
+static const struct sysfs_ops cpuidle_state_sysfs_ops = {
 	.show = cpuidle_state_show,
 };
 
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index af14c9a5b8d4..0099340b9616 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -1138,7 +1138,7 @@ ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 	return entry->show(&chan->common, page);
 }
 
-struct sysfs_ops ioat_sysfs_ops = {
+const struct sysfs_ops ioat_sysfs_ops = {
 	.show	= ioat_attr_show,
 };
 
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index 4f747a254074..86b97ac8774e 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -346,7 +346,7 @@ bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
 			   unsigned long *phys_complete);
 void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
 void ioat_kobject_del(struct ioatdma_device *device);
-extern struct sysfs_ops ioat_sysfs_ops;
+extern const struct sysfs_ops ioat_sysfs_ops;
 extern struct ioat_sysfs_entry ioat_version_attr;
 extern struct ioat_sysfs_entry ioat_cap_attr;
 #endif /* IOATDMA_H */
diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c
index 53764577035f..5fdedbc0f545 100644
--- a/drivers/edac/edac_device_sysfs.c
+++ b/drivers/edac/edac_device_sysfs.c
@@ -137,7 +137,7 @@ static ssize_t edac_dev_ctl_info_store(struct kobject *kobj,
 }
 
 /* edac_dev file operations for an 'ctl_info' */
-static struct sysfs_ops device_ctl_info_ops = {
+static const struct sysfs_ops device_ctl_info_ops = {
 	.show = edac_dev_ctl_info_show,
 	.store = edac_dev_ctl_info_store
 };
@@ -373,7 +373,7 @@ static ssize_t edac_dev_instance_store(struct kobject *kobj,
 }
 
 /* edac_dev file operations for an 'instance' */
-static struct sysfs_ops device_instance_ops = {
+static const struct sysfs_ops device_instance_ops = {
 	.show = edac_dev_instance_show,
 	.store = edac_dev_instance_store
 };
@@ -476,7 +476,7 @@ static ssize_t edac_dev_block_store(struct kobject *kobj,
 }
 
 /* edac_dev file operations for a 'block' */
-static struct sysfs_ops device_block_ops = {
+static const struct sysfs_ops device_block_ops = {
 	.show = edac_dev_block_show,
 	.store = edac_dev_block_store
 };
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index e1d4ce083481..88840e9fa3e0 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -245,7 +245,7 @@ static ssize_t csrowdev_store(struct kobject *kobj, struct attribute *attr,
 	return -EIO;
 }
 
-static struct sysfs_ops csrowfs_ops = {
+static const struct sysfs_ops csrowfs_ops = {
 	.show = csrowdev_show,
 	.store = csrowdev_store
 };
@@ -575,7 +575,7 @@ static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr,
 }
 
 /* Intermediate show/store table */
-static struct sysfs_ops mci_ops = {
+static const struct sysfs_ops mci_ops = {
 	.show = mcidev_show,
 	.store = mcidev_store
 };
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index fb60a877d768..bef94e3d9944 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -121,7 +121,7 @@ static ssize_t edac_pci_instance_store(struct kobject *kobj,
 }
 
 /* fs_ops table */
-static struct sysfs_ops pci_instance_ops = {
+static const struct sysfs_ops pci_instance_ops = {
 	.show = edac_pci_instance_show,
 	.store = edac_pci_instance_store
 };
@@ -261,7 +261,7 @@ static ssize_t edac_pci_dev_store(struct kobject *kobj,
 	return -EIO;
 }
 
-static struct sysfs_ops edac_pci_sysfs_ops = {
+static const struct sysfs_ops edac_pci_sysfs_ops = {
 	.show = edac_pci_dev_show,
 	.store = edac_pci_dev_store
 };
diff --git a/drivers/firmware/edd.c b/drivers/firmware/edd.c
index 9e4f59dc7f1e..110e24e50883 100644
--- a/drivers/firmware/edd.c
+++ b/drivers/firmware/edd.c
@@ -122,7 +122,7 @@ edd_attr_show(struct kobject * kobj, struct attribute *attr, char *buf)
 	return ret;
 }
 
-static struct sysfs_ops edd_attr_ops = {
+static const struct sysfs_ops edd_attr_ops = {
 	.show = edd_attr_show,
 };
 
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index f4f709d1370b..082f06ecd327 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -362,7 +362,7 @@ static ssize_t efivar_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static struct sysfs_ops efivar_attr_ops = {
+static const struct sysfs_ops efivar_attr_ops = {
 	.show = efivar_attr_show,
 	.store = efivar_attr_store,
 };
diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index a3600e3ed0fa..ed2801c378de 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -519,7 +519,7 @@ static ssize_t ibft_show_attribute(struct kobject *kobj,
 	return ret;
 }
 
-static struct sysfs_ops ibft_attr_ops = {
+static const struct sysfs_ops ibft_attr_ops = {
 	.show = ibft_show_attribute,
 };
 
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 20f645743ead..d59f7cad2269 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -74,7 +74,7 @@ static struct attribute *def_attrs[] = {
 	NULL
 };
 
-static struct sysfs_ops memmap_attr_ops = {
+static const struct sysfs_ops memmap_attr_ops = {
 	.show = memmap_attr_show,
 };
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index c7320ce4567d..89c38c49066f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -128,7 +128,7 @@ static struct attribute *ttm_bo_global_attrs[] = {
 	NULL
 };
 
-static struct sysfs_ops ttm_bo_global_ops = {
+static const struct sysfs_ops ttm_bo_global_ops = {
 	.show = &ttm_bo_global_show
 };
 
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index f5245c02b8fd..eb143e04d402 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -152,7 +152,7 @@ static struct attribute *ttm_mem_zone_attrs[] = {
 	NULL
 };
 
-static struct sysfs_ops ttm_mem_zone_ops = {
+static const struct sysfs_ops ttm_mem_zone_ops = {
 	.show = &ttm_mem_zone_show,
 	.store = &ttm_mem_zone_store
 };
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 5130fc55b8e2..764787ebe8d8 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3597,7 +3597,7 @@ static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
 		       atomic_long_read(&group->counter[cm_attr->index]));
 }
 
-static struct sysfs_ops cm_counter_ops = {
+static const struct sysfs_ops cm_counter_ops = {
 	.show = cm_show_counter
 };
 
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 158a214da2f7..1558bb7fc74d 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -79,7 +79,7 @@ static ssize_t port_attr_show(struct kobject *kobj,
 	return port_attr->show(p, port_attr, buf);
 }
 
-static struct sysfs_ops port_sysfs_ops = {
+static const struct sysfs_ops port_sysfs_ops = {
 	.show = port_attr_show
 };
 
diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c
index f91b40942e07..84d2b91e4efb 100644
--- a/drivers/md/dm-sysfs.c
+++ b/drivers/md/dm-sysfs.c
@@ -75,7 +75,7 @@ static struct attribute *dm_attrs[] = {
 	NULL,
 };
 
-static struct sysfs_ops dm_sysfs_ops = {
+static const struct sysfs_ops dm_sysfs_ops = {
 	.show	= dm_attr_show,
 };
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index a20a71e5efd3..fdc1890b6ac5 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2642,7 +2642,7 @@ static void rdev_free(struct kobject *ko)
 	mdk_rdev_t *rdev = container_of(ko, mdk_rdev_t, kobj);
 	kfree(rdev);
 }
-static struct sysfs_ops rdev_sysfs_ops = {
+static const struct sysfs_ops rdev_sysfs_ops = {
 	.show		= rdev_attr_show,
 	.store		= rdev_attr_store,
 };
@@ -4059,7 +4059,7 @@ static void md_free(struct kobject *ko)
 	kfree(mddev);
 }
 
-static struct sysfs_ops md_sysfs_ops = {
+static const struct sysfs_ops md_sysfs_ops = {
 	.show	= md_attr_show,
 	.store	= md_attr_store,
 };
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index f2b937966950..0bc777bac9b4 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -1577,7 +1577,7 @@ static struct attribute * veth_pool_attrs[] = {
 	NULL,
 };
 
-static struct sysfs_ops veth_pool_ops = {
+static const struct sysfs_ops veth_pool_ops = {
 	.show   = veth_pool_show,
 	.store  = veth_pool_store,
 };
diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c
index 966de5d69521..e6e972d9b7ca 100644
--- a/drivers/net/iseries_veth.c
+++ b/drivers/net/iseries_veth.c
@@ -384,7 +384,7 @@ static struct attribute *veth_cnx_default_attrs[] = {
 	NULL
 };
 
-static struct sysfs_ops veth_cnx_sysfs_ops = {
+static const struct sysfs_ops veth_cnx_sysfs_ops = {
 		.show = veth_cnx_attribute_show
 };
 
@@ -441,7 +441,7 @@ static struct attribute *veth_port_default_attrs[] = {
 	NULL
 };
 
-static struct sysfs_ops veth_port_sysfs_ops = {
+static const struct sysfs_ops veth_port_sysfs_ops = {
 	.show = veth_port_attribute_show
 };
 
diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c
index 0bc5d474b168..1062b8ffe244 100644
--- a/drivers/parisc/pdc_stable.c
+++ b/drivers/parisc/pdc_stable.c
@@ -481,7 +481,7 @@ pdcspath_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static struct sysfs_ops pdcspath_attr_ops = {
+static const struct sysfs_ops pdcspath_attr_ops = {
 	.show = pdcspath_attr_show,
 	.store = pdcspath_attr_store,
 };
diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c
index 6151389fd903..0a894efd4b9b 100644
--- a/drivers/pci/hotplug/fakephp.c
+++ b/drivers/pci/hotplug/fakephp.c
@@ -73,7 +73,7 @@ static void legacy_release(struct kobject *kobj)
 }
 
 static struct kobj_type legacy_ktype = {
-	.sysfs_ops = &(struct sysfs_ops){
+	.sysfs_ops = &(const struct sysfs_ops){
 		.store = legacy_store, .show = legacy_show
 	},
 	.release = &legacy_release,
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 49c9e6c9779a..f75a44d37fbe 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -29,7 +29,7 @@ static ssize_t pci_slot_attr_store(struct kobject *kobj,
 	return attribute->store ? attribute->store(slot, buf, len) : -EIO;
 }
 
-static struct sysfs_ops pci_slot_sysfs_ops = {
+static const struct sysfs_ops pci_slot_sysfs_ops = {
 	.show = pci_slot_attr_show,
 	.store = pci_slot_attr_store,
 };
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index e941367dd28f..4de382acd8f2 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -129,7 +129,7 @@ static ssize_t map_type_show(struct kobject *kobj, struct attribute *attr,
 	return entry->show(mem, buf);
 }
 
-static struct sysfs_ops map_sysfs_ops = {
+static const struct sysfs_ops map_sysfs_ops = {
 	.show = map_type_show,
 };
 
@@ -217,7 +217,7 @@ static ssize_t portio_type_show(struct kobject *kobj, struct attribute *attr,
 	return entry->show(port, buf);
 }
 
-static struct sysfs_ops portio_sysfs_ops = {
+static const struct sysfs_ops portio_sysfs_ops = {
 	.show = portio_type_show,
 };
 
diff --git a/drivers/uwb/wlp/sysfs.c b/drivers/uwb/wlp/sysfs.c
index 0370399ff4bb..6627c94cc854 100644
--- a/drivers/uwb/wlp/sysfs.c
+++ b/drivers/uwb/wlp/sysfs.c
@@ -615,8 +615,7 @@ ssize_t wlp_wss_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-static
-struct sysfs_ops wss_sysfs_ops = {
+static const struct sysfs_ops wss_sysfs_ops = {
 	.show	= wlp_wss_attr_show,
 	.store	= wlp_wss_attr_store,
 };
diff --git a/drivers/video/omap2/dss/manager.c b/drivers/video/omap2/dss/manager.c
index 913142d4cab1..9acef00c47ea 100644
--- a/drivers/video/omap2/dss/manager.c
+++ b/drivers/video/omap2/dss/manager.c
@@ -341,7 +341,7 @@ static ssize_t manager_attr_store(struct kobject *kobj, struct attribute *attr,
 	return manager_attr->store(manager, buf, size);
 }
 
-static struct sysfs_ops manager_sysfs_ops = {
+static const struct sysfs_ops manager_sysfs_ops = {
 	.show = manager_attr_show,
 	.store = manager_attr_store,
 };
diff --git a/drivers/video/omap2/dss/overlay.c b/drivers/video/omap2/dss/overlay.c
index 0c5bea263ac6..aed3f3194347 100644
--- a/drivers/video/omap2/dss/overlay.c
+++ b/drivers/video/omap2/dss/overlay.c
@@ -320,7 +320,7 @@ static ssize_t overlay_attr_store(struct kobject *kobj, struct attribute *attr,
 	return overlay_attr->store(overlay, buf, size);
 }
 
-static struct sysfs_ops overlay_sysfs_ops = {
+static const struct sysfs_ops overlay_sysfs_ops = {
 	.show = overlay_attr_show,
 	.store = overlay_attr_store,
 };
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
index ae5cb05a1a1c..bb71ab2336c8 100644
--- a/drivers/xen/sys-hypervisor.c
+++ b/drivers/xen/sys-hypervisor.c
@@ -426,7 +426,7 @@ static ssize_t hyp_sysfs_store(struct kobject *kobj,
 	return 0;
 }
 
-static struct sysfs_ops hyp_sysfs_ops = {
+static const struct sysfs_ops hyp_sysfs_ops = {
 	.show = hyp_sysfs_show,
 	.store = hyp_sysfs_store,
 };
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index a240b6fa81df..4ce16ef702a3 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -164,12 +164,12 @@ static void btrfs_root_release(struct kobject *kobj)
 	complete(&root->kobj_unregister);
 }
 
-static struct sysfs_ops btrfs_super_attr_ops = {
+static const struct sysfs_ops btrfs_super_attr_ops = {
 	.show	= btrfs_super_attr_show,
 	.store	= btrfs_super_attr_store,
 };
 
-static struct sysfs_ops btrfs_root_attr_ops = {
+static const struct sysfs_ops btrfs_root_attr_ops = {
 	.show	= btrfs_root_attr_show,
 	.store	= btrfs_root_attr_store,
 };
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 26a8bd40400a..f994a7dfda85 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -148,7 +148,7 @@ static void lockspace_kobj_release(struct kobject *k)
 	kfree(ls);
 }
 
-static struct sysfs_ops dlm_attr_ops = {
+static const struct sysfs_ops dlm_attr_ops = {
 	.show  = dlm_attr_show,
 	.store = dlm_attr_store,
 };
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2b83b96cb2eb..ce84a6ed4a48 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2358,7 +2358,7 @@ static void ext4_sb_release(struct kobject *kobj)
 }
 
 
-static struct sysfs_ops ext4_attr_ops = {
+static const struct sysfs_ops ext4_attr_ops = {
 	.show	= ext4_attr_show,
 	.store	= ext4_attr_store,
 };
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 543503010ed0..419042f7f0b6 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -49,7 +49,7 @@ static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr,
 	return a->store ? a->store(sdp, buf, len) : len;
 }
 
-static struct sysfs_ops gfs2_attr_ops = {
+static const struct sysfs_ops gfs2_attr_ops = {
 	.show  = gfs2_attr_show,
 	.store = gfs2_attr_store,
 };
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index b39da877b12f..3bb928a2bf7d 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -136,7 +136,7 @@ static ssize_t mlog_store(struct kobject *obj, struct attribute *attr,
 	return mlog_mask_store(mlog_attr->mask, buf, count);
 }
 
-static struct sysfs_ops mlog_attr_ops = {
+static const struct sysfs_ops mlog_attr_ops = {
 	.show  = mlog_show,
 	.store = mlog_store,
 };
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 50b725bcc3f3..ced2299f1c9a 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -53,7 +53,7 @@ struct sysfs_buffer {
 	size_t			count;
 	loff_t			pos;
 	char			* page;
-	struct sysfs_ops	* ops;
+	const struct sysfs_ops	* ops;
 	struct mutex		mutex;
 	int			needs_read_fill;
 	int			event;
@@ -75,7 +75,7 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
 {
 	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
 	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
-	struct sysfs_ops * ops = buffer->ops;
+	const struct sysfs_ops * ops = buffer->ops;
 	int ret = 0;
 	ssize_t count;
 
@@ -199,7 +199,7 @@ flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t
 {
 	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
 	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
-	struct sysfs_ops * ops = buffer->ops;
+	const struct sysfs_ops * ops = buffer->ops;
 	int rc;
 
 	/* need attr_sd for attr and ops, its parent for kobj */
@@ -335,7 +335,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
 	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
 	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
 	struct sysfs_buffer *buffer;
-	struct sysfs_ops *ops;
+	const struct sysfs_ops *ops;
 	int error = -EACCES;
 	char *p;
 
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 57a1eaae9096..3950d3c2850d 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -106,7 +106,7 @@ extern char *kobject_get_path(struct kobject *kobj, gfp_t flag);
 
 struct kobj_type {
 	void (*release)(struct kobject *kobj);
-	struct sysfs_ops *sysfs_ops;
+	const struct sysfs_ops *sysfs_ops;
 	struct attribute **default_attrs;
 };
 
@@ -132,7 +132,7 @@ struct kobj_attribute {
 			 const char *buf, size_t count);
 };
 
-extern struct sysfs_ops kobj_sysfs_ops;
+extern const struct sysfs_ops kobj_sysfs_ops;
 
 /**
  * struct kset - a set of kobjects of a specific type, belonging to a specific subsystem.
diff --git a/kernel/params.c b/kernel/params.c
index 48370be3c0a1..68396d73c838 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -722,7 +722,7 @@ static ssize_t module_attr_store(struct kobject *kobj,
 	return ret;
 }
 
-static struct sysfs_ops module_sysfs_ops = {
+static const struct sysfs_ops module_sysfs_ops = {
 	.show = module_attr_show,
 	.store = module_attr_store,
 };
diff --git a/lib/kobject.c b/lib/kobject.c
index cecf5a0ef6e1..8115eb1bbf4d 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -700,7 +700,7 @@ static ssize_t kobj_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-struct sysfs_ops kobj_sysfs_ops = {
+const struct sysfs_ops kobj_sysfs_ops = {
 	.show	= kobj_attr_show,
 	.store	= kobj_attr_store,
 };
diff --git a/mm/slub.c b/mm/slub.c
index a26753c12dcd..a2b8969ba6d0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4390,7 +4390,7 @@ static void kmem_cache_release(struct kobject *kobj)
 	kfree(s);
 }
 
-static struct sysfs_ops slab_sysfs_ops = {
+static const struct sysfs_ops slab_sysfs_ops = {
 	.show = slab_attr_show,
 	.store = slab_attr_store,
 };
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1cf2cef78584..fef0384e3c0b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -423,7 +423,7 @@ extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
 
 #ifdef CONFIG_SYSFS
 /* br_sysfs_if.c */
-extern struct sysfs_ops brport_sysfs_ops;
+extern const struct sysfs_ops brport_sysfs_ops;
 extern int br_sysfs_addif(struct net_bridge_port *p);
 
 /* br_sysfs_br.c */
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 696596cd3384..0b9916489d6b 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -238,7 +238,7 @@ static ssize_t brport_store(struct kobject * kobj,
 	return ret;
 }
 
-struct sysfs_ops brport_sysfs_ops = {
+const struct sysfs_ops brport_sysfs_ops = {
 	.show = brport_show,
 	.store = brport_store,
 };
diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c
index 7c6088140528..3b126d1f8599 100644
--- a/samples/kobject/kset-example.c
+++ b/samples/kobject/kset-example.c
@@ -87,7 +87,7 @@ static ssize_t foo_attr_store(struct kobject *kobj,
 }
 
 /* Our custom sysfs_ops that we will associate with our ktype later on */
-static struct sysfs_ops foo_sysfs_ops = {
+static const struct sysfs_ops foo_sysfs_ops = {
 	.show = foo_attr_show,
 	.store = foo_attr_store,
 };
-- 
cgit v1.2.3


From 831fad2f75f0d7bfc339de81173e7068a3c72276 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Tue, 26 Jan 2010 09:35:00 +0100
Subject: Driver core: make struct platform_driver.id_table const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a warning on several pxa based machines:

	arch/arm/mach-pxa/ssp.c:475: warning: initialization discards qualifiers from pointer target type

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Vikram Dhillon <dhillonv10@gmail.com>
Acked-by: Eric Miao <eric.y.miao@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/platform.c         | 2 +-
 include/linux/platform_device.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 937d58021d1b..575e08bc6630 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -636,7 +636,7 @@ static int platform_uevent(struct device *dev, struct kobj_uevent_env *env)
 }
 
 static const struct platform_device_id *platform_match_id(
-			struct platform_device_id *id,
+			const struct platform_device_id *id,
 			struct platform_device *pdev)
 {
 	while (id->name[0]) {
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 2c2d035bfb92..212da17d06af 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -62,7 +62,7 @@ struct platform_driver {
 	int (*suspend)(struct platform_device *, pm_message_t state);
 	int (*resume)(struct platform_device *);
 	struct device_driver driver;
-	struct platform_device_id *id_table;
+	const struct platform_device_id *id_table;
 };
 
 extern int platform_driver_register(struct platform_driver *);
-- 
cgit v1.2.3


From 6992f5334995af474c2b58d010d08bc597f0f2fe Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 11 Feb 2010 15:21:53 -0800
Subject: sysfs: Use one lockdep class per sysfs attribute.

Acknowledge that the logical sysfs rwsem has one instance per
sysfs attribute with different locking depencencies for different
attributes.

There is a sysfs idiom where writing to one sysfs file causes the
addition or removal of other sysfs files.   Lumping all of the
sysfs attributes together in one lock class causes lockdep to
generate lots of false positives.

This introduces the requirement that non-static sysfs attributes
need to be initialized with sysfs_attr_init or sysfs_bin_attr_init.
Strictly speaking this requirement only exists when lockdep is
enabled, and when lockdep is enabled we get a bit fat warning
if this requirement is not met.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/sysfs.h      |  7 +++++--
 include/linux/sysfs.h | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 7db6884f4206..37e0e086233c 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -92,9 +92,12 @@ static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 #define sysfs_dirent_init_lockdep(sd)				\
 do {								\
-	static struct lock_class_key __key;			\
+	struct attribute *attr = sd->s_attr.attr;		\
+	struct lock_class_key *key = attr->key;			\
+	if (!key)						\
+		key = &attr->skey;				\
 								\
-	lockdep_init_map(&sd->dep_map, "s_active", &__key, 0);	\
+	lockdep_init_map(&sd->dep_map, "s_active", key, 0);	\
 } while(0)
 #else
 #define sysfs_dirent_init_lockdep(sd) do {} while(0)
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 3e8526582146..006c359e63c0 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -15,6 +15,7 @@
 #include <linux/compiler.h>
 #include <linux/errno.h>
 #include <linux/list.h>
+#include <linux/lockdep.h>
 #include <asm/atomic.h>
 
 struct kobject;
@@ -29,8 +30,23 @@ struct attribute {
 	const char		*name;
 	struct module		*owner;
 	mode_t			mode;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lock_class_key	*key;
+	struct lock_class_key	skey;
+#endif
 };
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define sysfs_attr_init(attr)				\
+do {							\
+	static struct lock_class_key __key;		\
+							\
+	(attr)->key = &__key;				\
+} while(0)
+#else
+#define sysfs_attr_init(attr) do {} while(0)
+#endif
+
 struct attribute_group {
 	const char		*name;
 	mode_t			(*is_visible)(struct kobject *,
@@ -74,6 +90,8 @@ struct bin_attribute {
 		    struct vm_area_struct *vma);
 };
 
+#define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&bin_attr->attr)
+
 struct sysfs_ops {
 	ssize_t	(*show)(struct kobject *, struct attribute *,char *);
 	ssize_t	(*store)(struct kobject *,struct attribute *,const char *, size_t);
-- 
cgit v1.2.3


From 35960258ed388cdcebdb71df35fd5126978ca325 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 12 Feb 2010 04:35:32 -0800
Subject: sysfs: Document sysfs_attr_init and sysfs_bin_attr_init

I have added a new requirement to the external sysfs interface
that dynamically allocated sysfs attributes must call sysfs_attr_init
if lockdep is enabled.  For the time being callying sysfs_attr_init
is only mandatory if lockdep is enabled, so we can live with a few
unconverted instances until we find them all.  As this is part of
the public interface of sysfs it is a good idea to document these
pseudo functions so someone inspeciting the code can find out
what has happened.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysfs.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 006c359e63c0..5b8f80f5aca6 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -36,6 +36,16 @@ struct attribute {
 #endif
 };
 
+/**
+ *	sysfs_attr_init - initialize a dynamically allocated sysfs attribute
+ *	@attr: struct attribute to initialize
+ *
+ *	Initialize a dynamically allocated struct attribute so we can
+ *	make lockdep happy.  This is a new requirement for attributes
+ *	and initially this is only needed when lockdep is enabled.
+ *	Lockdep gives a nice error when your attribute is added to
+ *	sysfs if you don't have this.
+ */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 #define sysfs_attr_init(attr)				\
 do {							\
@@ -90,6 +100,16 @@ struct bin_attribute {
 		    struct vm_area_struct *vma);
 };
 
+/**
+ *	sysfs_bin_attr_init - initialize a dynamically allocated bin_attribute
+ *	@attr: struct bin_attribute to initialize
+ *
+ *	Initialize a dynamically allocated struct bin_attribute so we
+ *	can make lockdep happy.  This is a new requirement for
+ *	attributes and initially this is only needed when lockdep is
+ *	enabled.  Lockdep gives a nice error when your attribute is
+ *	added to sysfs if you don't have this.
+ */
 #define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&bin_attr->attr)
 
 struct sysfs_ops {
-- 
cgit v1.2.3


From 7cb32942d91a501b2df944928ccc9e6590ab237b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 12 Feb 2010 19:22:25 -0800
Subject: sysfs: Implement sysfs_rename_link

Because of rename ordering problems we occassionally give false
warnings about invalid sysfs operations.  So using sysfs_rename
create a sysfs_rename_link function that doesn't need strange
workarounds.

Cc: Benjamin Thery <benjamin.thery@bull.net>
Cc: Daniel Lezcano <dlezcano@fr.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/symlink.c    | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/sysfs.h |  9 +++++++++
 2 files changed, 47 insertions(+)

(limited to 'include')

diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index c5eff49fa41b..1b9a3a1e8a17 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -123,6 +123,44 @@ void sysfs_remove_link(struct kobject * kobj, const char * name)
 	sysfs_hash_and_remove(parent_sd, name);
 }
 
+/**
+ *	sysfs_rename_link - rename symlink in object's directory.
+ *	@kobj:	object we're acting for.
+ *	@targ:	object we're pointing to.
+ *	@old:	previous name of the symlink.
+ *	@new:	new name of the symlink.
+ *
+ *	A helper function for the common rename symlink idiom.
+ */
+int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
+			const char *old, const char *new)
+{
+	struct sysfs_dirent *parent_sd, *sd = NULL;
+	int result;
+
+	if (!kobj)
+		parent_sd = &sysfs_root;
+	else
+		parent_sd = kobj->sd;
+
+	result = -ENOENT;
+	sd = sysfs_get_dirent(parent_sd, old);
+	if (!sd)
+		goto out;
+
+	result = -EINVAL;
+	if (sysfs_type(sd) != SYSFS_KOBJ_LINK)
+		goto out;
+	if (sd->s_symlink.target_sd->s_dir.kobj != targ)
+		goto out;
+
+	result = sysfs_rename(sd, parent_sd, new);
+
+out:
+	sysfs_put(sd);
+	return result;
+}
+
 static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
 				 struct sysfs_dirent *target_sd, char *path)
 {
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 5b8f80f5aca6..d77cde6d0498 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -151,6 +151,9 @@ int __must_check sysfs_create_link_nowarn(struct kobject *kobj,
 					  const char *name);
 void sysfs_remove_link(struct kobject *kobj, const char *name);
 
+int sysfs_rename_link(struct kobject *kobj, struct kobject *target,
+			const char *old_name, const char *new_name);
+
 int __must_check sysfs_create_group(struct kobject *kobj,
 				    const struct attribute_group *grp);
 int sysfs_update_group(struct kobject *kobj,
@@ -255,6 +258,12 @@ static inline void sysfs_remove_link(struct kobject *kobj, const char *name)
 {
 }
 
+static inline int sysfs_rename_link(struct kobject *k, struct kobject *t,
+				    const char *old_name, const char *new_name)
+{
+	return 0;
+}
+
 static inline int sysfs_create_group(struct kobject *kobj,
 				     const struct attribute_group *grp)
 {
-- 
cgit v1.2.3


From 62e877b893e6350c900d381f353aa62ed48dcc97 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 1 Mar 2010 20:38:36 +1100
Subject: sysfs: fix for thinko with sysfs_bin_attr_init()

After merging the final tree, today's linux-next build (powerpc
allyesconfig) failed like this:

drivers/pci/pci-sysfs.c: In function 'pci_create_legacy_files':
drivers/pci/pci-sysfs.c:645: error: lvalue required as unary '&' operand
drivers/pci/pci-sysfs.c:658: error: lvalue required as unary '&' operand

Caused by commit "sysfs: Use sysfs_attr_init and sysfs_bin_attr_init on
dynamic attributes" interacting with commit "sysfs: Use one lockdep
class per sysfs attribute") both from the driver-core tree.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/pci/pci-sysfs.c | 4 ++--
 include/linux/sysfs.h   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 9fa183cfb0e9..de296452c957 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -642,7 +642,7 @@ void pci_create_legacy_files(struct pci_bus *b)
 	if (!b->legacy_io)
 		goto kzalloc_err;
 
-	sysfs_bin_attr_init(&b->legacy_io);
+	sysfs_bin_attr_init(b->legacy_io);
 	b->legacy_io->attr.name = "legacy_io";
 	b->legacy_io->size = 0xffff;
 	b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
@@ -655,7 +655,7 @@ void pci_create_legacy_files(struct pci_bus *b)
 		goto legacy_io_err;
 
 	/* Allocated above after the legacy_io struct */
-	sysfs_bin_attr_init(&b->legacy_mem);
+	sysfs_bin_attr_init(b->legacy_mem);
 	b->legacy_mem = b->legacy_io + 1;
 	b->legacy_mem->attr.name = "legacy_mem";
 	b->legacy_mem->size = 1024*1024;
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index d77cde6d0498..f0496b3d1811 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -110,7 +110,7 @@ struct bin_attribute {
  *	enabled.  Lockdep gives a nice error when your attribute is
  *	added to sysfs if you don't have this.
  */
-#define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&bin_attr->attr)
+#define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&(bin_attr)->attr)
 
 struct sysfs_ops {
 	ssize_t	(*show)(struct kobject *, struct attribute *,char *);
-- 
cgit v1.2.3


From 8e9394ce2412254ec69fd2a4f3e44a66eade2297 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Wed, 17 Feb 2010 10:57:05 -0800
Subject: Driver core: create lock/unlock functions for struct device

In the future, we are going to be changing the lock type for struct
device (once we get the lockdep infrastructure properly worked out)  To
make that changeover easier, and to possibly burry the lock in a
different part of struct device, let's create some functions to lock and
unlock a device so that no out-of-core code needs to be changed in the
future.

This patch creates the device_lock/unlock/trylock() functions, and
converts all in-tree users to them.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Dave Young <hidave.darkstar@gmail.com>
Cc: Ming Lei <tom.leiming@gmail.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Phil Carmody <ext-phil.2.carmody@nokia.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Len Brown <len.brown@intel.com>
Cc: Magnus Damm <damm@igel.co.jp>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Alex Chiang <achiang@hp.com>
Cc: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andrew Patterson <andrew.patterson@hp.com>
Cc: Yu Zhao <yu.zhao@intel.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Wolfram Sang <w.sang@pengutronix.de>
Cc: CHENG Renquan <rqcheng@smu.edu.sg>
Cc: Oliver Neukum <oliver@neukum.org>
Cc: Frans Pop <elendil@planet.nl>
Cc: David Vrabel <david.vrabel@csr.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/bus.c             | 20 ++++++++++----------
 drivers/base/dd.c              | 38 +++++++++++++++++++-------------------
 drivers/base/power/main.c      | 20 ++++++++++----------
 drivers/firewire/core-device.c |  5 ++---
 drivers/ieee1394/nodemgr.c     |  5 ++---
 drivers/pci/bus.c              |  4 ++--
 drivers/pci/pci.c              |  4 ++--
 drivers/pcmcia/ds.c            |  8 ++++----
 drivers/usb/core/driver.c      |  4 ++--
 drivers/uwb/umc-bus.c          |  4 ++--
 drivers/uwb/uwb-internal.h     |  4 ++--
 include/linux/device.h         | 17 ++++++++++++++++-
 include/linux/usb.h            |  6 +++---
 13 files changed, 76 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index cca1aa10054c..71f6af5c8b0b 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -173,10 +173,10 @@ static ssize_t driver_unbind(struct device_driver *drv,
 	dev = bus_find_device_by_name(bus, NULL, buf);
 	if (dev && dev->driver == drv) {
 		if (dev->parent)	/* Needed for USB */
-			down(&dev->parent->sem);
+			device_lock(dev->parent);
 		device_release_driver(dev);
 		if (dev->parent)
-			up(&dev->parent->sem);
+			device_unlock(dev->parent);
 		err = count;
 	}
 	put_device(dev);
@@ -200,12 +200,12 @@ static ssize_t driver_bind(struct device_driver *drv,
 	dev = bus_find_device_by_name(bus, NULL, buf);
 	if (dev && dev->driver == NULL && driver_match_device(drv, dev)) {
 		if (dev->parent)	/* Needed for USB */
-			down(&dev->parent->sem);
-		down(&dev->sem);
+			device_lock(dev->parent);
+		device_lock(dev);
 		err = driver_probe_device(drv, dev);
-		up(&dev->sem);
+		device_unlock(dev);
 		if (dev->parent)
-			up(&dev->parent->sem);
+			device_unlock(dev->parent);
 
 		if (err > 0) {
 			/* success */
@@ -744,10 +744,10 @@ static int __must_check bus_rescan_devices_helper(struct device *dev,
 
 	if (!dev->driver) {
 		if (dev->parent)	/* Needed for USB */
-			down(&dev->parent->sem);
+			device_lock(dev->parent);
 		ret = device_attach(dev);
 		if (dev->parent)
-			up(&dev->parent->sem);
+			device_unlock(dev->parent);
 	}
 	return ret < 0 ? ret : 0;
 }
@@ -779,10 +779,10 @@ int device_reprobe(struct device *dev)
 {
 	if (dev->driver) {
 		if (dev->parent)        /* Needed for USB */
-			down(&dev->parent->sem);
+			device_lock(dev->parent);
 		device_release_driver(dev);
 		if (dev->parent)
-			up(&dev->parent->sem);
+			device_unlock(dev->parent);
 	}
 	return bus_rescan_devices_helper(dev, NULL);
 }
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index ee95c76bfd3d..c89291f8a16b 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -85,7 +85,7 @@ static void driver_sysfs_remove(struct device *dev)
  * for before calling this. (It is ok to call with no other effort
  * from a driver's probe() method.)
  *
- * This function must be called with @dev->sem held.
+ * This function must be called with the device lock held.
  */
 int device_bind_driver(struct device *dev)
 {
@@ -190,8 +190,8 @@ EXPORT_SYMBOL_GPL(wait_for_device_probe);
  * This function returns -ENODEV if the device is not registered,
  * 1 if the device is bound successfully and 0 otherwise.
  *
- * This function must be called with @dev->sem held.  When called for a
- * USB interface, @dev->parent->sem must be held as well.
+ * This function must be called with @dev lock held.  When called for a
+ * USB interface, @dev->parent lock must be held as well.
  */
 int driver_probe_device(struct device_driver *drv, struct device *dev)
 {
@@ -233,13 +233,13 @@ static int __device_attach(struct device_driver *drv, void *data)
  * 0 if no matching driver was found;
  * -ENODEV if the device is not registered.
  *
- * When called for a USB interface, @dev->parent->sem must be held.
+ * When called for a USB interface, @dev->parent lock must be held.
  */
 int device_attach(struct device *dev)
 {
 	int ret = 0;
 
-	down(&dev->sem);
+	device_lock(dev);
 	if (dev->driver) {
 		ret = device_bind_driver(dev);
 		if (ret == 0)
@@ -253,7 +253,7 @@ int device_attach(struct device *dev)
 		ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach);
 		pm_runtime_put_sync(dev);
 	}
-	up(&dev->sem);
+	device_unlock(dev);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(device_attach);
@@ -276,13 +276,13 @@ static int __driver_attach(struct device *dev, void *data)
 		return 0;
 
 	if (dev->parent)	/* Needed for USB */
-		down(&dev->parent->sem);
-	down(&dev->sem);
+		device_lock(dev->parent);
+	device_lock(dev);
 	if (!dev->driver)
 		driver_probe_device(drv, dev);
-	up(&dev->sem);
+	device_unlock(dev);
 	if (dev->parent)
-		up(&dev->parent->sem);
+		device_unlock(dev->parent);
 
 	return 0;
 }
@@ -303,8 +303,8 @@ int driver_attach(struct device_driver *drv)
 EXPORT_SYMBOL_GPL(driver_attach);
 
 /*
- * __device_release_driver() must be called with @dev->sem held.
- * When called for a USB interface, @dev->parent->sem must be held as well.
+ * __device_release_driver() must be called with @dev lock held.
+ * When called for a USB interface, @dev->parent lock must be held as well.
  */
 static void __device_release_driver(struct device *dev)
 {
@@ -343,7 +343,7 @@ static void __device_release_driver(struct device *dev)
  * @dev: device.
  *
  * Manually detach device from driver.
- * When called for a USB interface, @dev->parent->sem must be held.
+ * When called for a USB interface, @dev->parent lock must be held.
  */
 void device_release_driver(struct device *dev)
 {
@@ -352,9 +352,9 @@ void device_release_driver(struct device *dev)
 	 * within their ->remove callback for the same device, they
 	 * will deadlock right here.
 	 */
-	down(&dev->sem);
+	device_lock(dev);
 	__device_release_driver(dev);
-	up(&dev->sem);
+	device_unlock(dev);
 }
 EXPORT_SYMBOL_GPL(device_release_driver);
 
@@ -381,13 +381,13 @@ void driver_detach(struct device_driver *drv)
 		spin_unlock(&drv->p->klist_devices.k_lock);
 
 		if (dev->parent)	/* Needed for USB */
-			down(&dev->parent->sem);
-		down(&dev->sem);
+			device_lock(dev->parent);
+		device_lock(dev);
 		if (dev->driver == drv)
 			__device_release_driver(dev);
-		up(&dev->sem);
+		device_unlock(dev);
 		if (dev->parent)
-			up(&dev->parent->sem);
+			device_unlock(dev->parent);
 		put_device(dev);
 	}
 }
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 0e26a6f6fd48..d477f4dc5e51 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -35,8 +35,8 @@
  * because children are guaranteed to be discovered after parents, and
  * are inserted at the back of the list on discovery.
  *
- * Since device_pm_add() may be called with a device semaphore held,
- * we must never try to acquire a device semaphore while holding
+ * Since device_pm_add() may be called with a device lock held,
+ * we must never try to acquire a device lock while holding
  * dpm_list_mutex.
  */
 
@@ -508,7 +508,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 	TRACE_RESUME(0);
 
 	dpm_wait(dev->parent, async);
-	down(&dev->sem);
+	device_lock(dev);
 
 	dev->power.status = DPM_RESUMING;
 
@@ -543,7 +543,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 		}
 	}
  End:
-	up(&dev->sem);
+	device_unlock(dev);
 	complete_all(&dev->power.completion);
 
 	TRACE_RESUME(error);
@@ -629,7 +629,7 @@ static void dpm_resume(pm_message_t state)
  */
 static void device_complete(struct device *dev, pm_message_t state)
 {
-	down(&dev->sem);
+	device_lock(dev);
 
 	if (dev->class && dev->class->pm && dev->class->pm->complete) {
 		pm_dev_dbg(dev, state, "completing class ");
@@ -646,7 +646,7 @@ static void device_complete(struct device *dev, pm_message_t state)
 		dev->bus->pm->complete(dev);
 	}
 
-	up(&dev->sem);
+	device_unlock(dev);
 }
 
 /**
@@ -809,7 +809,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	int error = 0;
 
 	dpm_wait_for_children(dev, async);
-	down(&dev->sem);
+	device_lock(dev);
 
 	if (async_error)
 		goto End;
@@ -849,7 +849,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 		dev->power.status = DPM_OFF;
 
  End:
-	up(&dev->sem);
+	device_unlock(dev);
 	complete_all(&dev->power.completion);
 
 	return error;
@@ -938,7 +938,7 @@ static int device_prepare(struct device *dev, pm_message_t state)
 {
 	int error = 0;
 
-	down(&dev->sem);
+	device_lock(dev);
 
 	if (dev->bus && dev->bus->pm && dev->bus->pm->prepare) {
 		pm_dev_dbg(dev, state, "preparing ");
@@ -962,7 +962,7 @@ static int device_prepare(struct device *dev, pm_message_t state)
 		suspend_report_result(dev->class->pm->prepare, error);
 	}
  End:
-	up(&dev->sem);
+	device_unlock(dev);
 
 	return error;
 }
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 014cabd3afda..5db0518c66da 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -33,7 +33,6 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/rwsem.h>
-#include <linux/semaphore.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
@@ -828,9 +827,9 @@ static int update_unit(struct device *dev, void *data)
 	struct fw_driver *driver = (struct fw_driver *)dev->driver;
 
 	if (is_fw_unit(dev) && driver != NULL && driver->update != NULL) {
-		down(&dev->sem);
+		device_lock(dev);
 		driver->update(unit);
-		up(&dev->sem);
+		device_unlock(dev);
 	}
 
 	return 0;
diff --git a/drivers/ieee1394/nodemgr.c b/drivers/ieee1394/nodemgr.c
index 5122b5a8aa2d..18350213479e 100644
--- a/drivers/ieee1394/nodemgr.c
+++ b/drivers/ieee1394/nodemgr.c
@@ -19,7 +19,6 @@
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
 #include <linux/freezer.h>
-#include <linux/semaphore.h>
 #include <asm/atomic.h>
 
 #include "csr.h"
@@ -1397,9 +1396,9 @@ static int update_pdrv(struct device *dev, void *data)
 			pdrv = container_of(drv, struct hpsb_protocol_driver,
 					    driver);
 			if (pdrv->update) {
-				down(&ud->device.sem);
+				device_lock(&ud->device);
 				error = pdrv->update(ud);
-				up(&ud->device.sem);
+				device_unlock(&ud->device);
 			}
 			if (error)
 				device_release_driver(&ud->device);
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 712250f5874a..26301cb25e7f 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -288,9 +288,9 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
 			next = dev->bus_list.next;
 
 		/* Run device routines with the device locked */
-		down(&dev->dev.sem);
+		device_lock(&dev->dev);
 		retval = cb(dev, userdata);
-		up(&dev->dev.sem);
+		device_unlock(&dev->dev);
 		if (retval)
 			break;
 	}
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 77b493b3d97b..897fa5ccdb78 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2486,7 +2486,7 @@ static int pci_dev_reset(struct pci_dev *dev, int probe)
 	if (!probe) {
 		pci_block_user_cfg_access(dev);
 		/* block PM suspend, driver probe, etc. */
-		down(&dev->dev.sem);
+		device_lock(&dev->dev);
 	}
 
 	rc = pci_dev_specific_reset(dev, probe);
@@ -2508,7 +2508,7 @@ static int pci_dev_reset(struct pci_dev *dev, int probe)
 	rc = pci_parent_bus_reset(dev, probe);
 done:
 	if (!probe) {
-		up(&dev->dev.sem);
+		device_unlock(&dev->dev);
 		pci_unblock_user_cfg_access(dev);
 	}
 
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 0f98be4450b7..ad93ebd7b2a2 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -971,9 +971,9 @@ static int runtime_suspend(struct device *dev)
 {
 	int rc;
 
-	down(&dev->sem);
+	device_lock(dev);
 	rc = pcmcia_dev_suspend(dev, PMSG_SUSPEND);
-	up(&dev->sem);
+	device_unlock(dev);
 	return rc;
 }
 
@@ -981,9 +981,9 @@ static int runtime_resume(struct device *dev)
 {
 	int rc;
 
-	down(&dev->sem);
+	device_lock(dev);
 	rc = pcmcia_dev_resume(dev);
-	up(&dev->sem);
+	device_unlock(dev);
 	return rc;
 }
 
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index a7037bf81688..f3c233806fa3 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -489,10 +489,10 @@ void usb_driver_release_interface(struct usb_driver *driver,
 	if (device_is_registered(dev)) {
 		device_release_driver(dev);
 	} else {
-		down(&dev->sem);
+		device_lock(dev);
 		usb_unbind_interface(dev);
 		dev->driver = NULL;
-		up(&dev->sem);
+		device_unlock(dev);
 	}
 }
 EXPORT_SYMBOL_GPL(usb_driver_release_interface);
diff --git a/drivers/uwb/umc-bus.c b/drivers/uwb/umc-bus.c
index cdd6c8efc9f8..5fad4e791b3e 100644
--- a/drivers/uwb/umc-bus.c
+++ b/drivers/uwb/umc-bus.c
@@ -62,12 +62,12 @@ int umc_controller_reset(struct umc_dev *umc)
 	struct device *parent = umc->dev.parent;
 	int ret = 0;
 
-	if(down_trylock(&parent->sem))
+	if (device_trylock(parent))
 		return -EAGAIN;
 	ret = device_for_each_child(parent, parent, umc_bus_pre_reset_helper);
 	if (ret >= 0)
 		ret = device_for_each_child(parent, parent, umc_bus_post_reset_helper);
-	up(&parent->sem);
+	device_unlock(parent);
 
 	return ret;
 }
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
index d5bcfc1c227a..157485c862c0 100644
--- a/drivers/uwb/uwb-internal.h
+++ b/drivers/uwb/uwb-internal.h
@@ -366,12 +366,12 @@ struct dentry *uwb_dbg_create_pal_dir(struct uwb_pal *pal);
 
 static inline void uwb_dev_lock(struct uwb_dev *uwb_dev)
 {
-	down(&uwb_dev->dev.sem);
+	device_lock(&uwb_dev->dev);
 }
 
 static inline void uwb_dev_unlock(struct uwb_dev *uwb_dev)
 {
-	up(&uwb_dev->dev.sem);
+	device_unlock(&uwb_dev->dev);
 }
 
 #endif /* #ifndef __UWB_INTERNAL_H__ */
diff --git a/include/linux/device.h b/include/linux/device.h
index f95d5bfe8248..182192892d45 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -106,7 +106,7 @@ extern int bus_unregister_notifier(struct bus_type *bus,
 
 /* All 4 notifers below get called with the target struct device *
  * as an argument. Note that those functions are likely to be called
- * with the device semaphore held in the core, so be careful.
+ * with the device lock held in the core, so be careful.
  */
 #define BUS_NOTIFY_ADD_DEVICE		0x00000001 /* device added */
 #define BUS_NOTIFY_DEL_DEVICE		0x00000002 /* device removed */
@@ -508,6 +508,21 @@ static inline bool device_async_suspend_enabled(struct device *dev)
 	return !!dev->power.async_suspend;
 }
 
+static inline void device_lock(struct device *dev)
+{
+	down(&dev->sem);
+}
+
+static inline int device_trylock(struct device *dev)
+{
+	return down_trylock(&dev->sem);
+}
+
+static inline void device_unlock(struct device *dev)
+{
+	up(&dev->sem);
+}
+
 void driver_init(void);
 
 /*
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 3492abf82e75..8c9f053111bb 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -512,9 +512,9 @@ extern struct usb_device *usb_get_dev(struct usb_device *dev);
 extern void usb_put_dev(struct usb_device *dev);
 
 /* USB device locking */
-#define usb_lock_device(udev)		down(&(udev)->dev.sem)
-#define usb_unlock_device(udev)		up(&(udev)->dev.sem)
-#define usb_trylock_device(udev)	down_trylock(&(udev)->dev.sem)
+#define usb_lock_device(udev)		device_lock(&(udev)->dev)
+#define usb_unlock_device(udev)		device_unlock(&(udev)->dev)
+#define usb_trylock_device(udev)	device_trylock(&(udev)->dev)
 extern int usb_lock_device_for_reset(struct usb_device *udev,
 				     const struct usb_interface *iface);
 
-- 
cgit v1.2.3


From 4045635318538d3ddd2007720412fdc4b08f6a62 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Sun, 7 Mar 2010 16:21:39 +0000
Subject: net: add __must_check to sk_add_backlog

Add the "__must_check" tag to sk_add_backlog() so that any failure to
check and drop packets will be warned about.

Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 170353dd9570..092b0551e77f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -604,7 +604,7 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 }
 
 /* The per-socket spinlock must be held here. */
-static inline int sk_add_backlog(struct sock *sk, struct sk_buff *skb)
+static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1))
 		return -ENOBUFS;
-- 
cgit v1.2.3


From 6cce09f87a04797fae5b947ef2626c14a78f0b49 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 7 Mar 2010 23:21:57 +0000
Subject: tcp: Add SNMP counters for backlog and min_ttl drops

Commit 6b03a53a (tcp: use limited socket backlog) added the possibility
of dropping frames when backlog queue is full.

Commit d218d111 (tcp: Generalized TTL Security Mechanism) added the
possibility of dropping frames when TTL is under a given limit.

This patch adds new SNMP MIB entries, named TCPBacklogDrop and
TCPMinTTLDrop, published in /proc/net/netstat in TcpExt: line

netstat -s | egrep "TCPBacklogDrop|TCPMinTTLDrop"
    TCPBacklogDrop: 0
    TCPMinTTLDrop: 0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h | 2 ++
 net/ipv4/proc.c      | 2 ++
 net/ipv4/tcp_ipv4.c  | 7 +++++--
 net/ipv6/tcp_ipv6.c  | 3 ++-
 4 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index e28f5a0182e8..4435d1084755 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -225,6 +225,8 @@ enum
 	LINUX_MIB_SACKSHIFTED,
 	LINUX_MIB_SACKMERGED,
 	LINUX_MIB_SACKSHIFTFALLBACK,
+	LINUX_MIB_TCPBACKLOGDROP,
+	LINUX_MIB_TCPMINTTLDROP, /* RFC 5082 */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 242ed2307370..4f1f337f4337 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -249,6 +249,8 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED),
 	SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED),
 	SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
+	SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP),
+	SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1915f7dc30e6..8d51d39ad1bb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1651,8 +1651,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!sk)
 		goto no_tcp_socket;
 
-	if (iph->ttl < inet_sk(sk)->min_ttl)
+	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
 		goto discard_and_relse;
+	}
 
 process:
 	if (sk->sk_state == TCP_TIME_WAIT)
@@ -1682,8 +1684,9 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v4_do_rcv(sk, skb);
 		}
-	} else if (sk_add_backlog(sk, skb)) {
+	} else if (unlikely(sk_add_backlog(sk, skb))) {
 		bh_unlock_sock(sk);
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
 		goto discard_and_relse;
 	}
 	bh_unlock_sock(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2c378b1bd5cf..9b6dbba80d31 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1740,8 +1740,9 @@ process:
 			if (!tcp_prequeue(sk, skb))
 				ret = tcp_v6_do_rcv(sk, skb);
 		}
-	} else if (sk_add_backlog(sk, skb)) {
+	} else if (unlikely(sk_add_backlog(sk, skb))) {
 		bh_unlock_sock(sk);
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
 		goto discard_and_relse;
 	}
 	bh_unlock_sock(sk);
-- 
cgit v1.2.3


From 58b939959d228681208ba997595411fddc860849 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 8 Mar 2010 22:37:10 -0800
Subject: Input: scancode in get/set_keycodes should be unsigned
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The HID layer has some scan codes of the form 0xffbc0000 for logitech
devices which do not work if scancode is typed as signed int, so we need
to switch to unsigned it instead. While at it keycode being signed does
not make much sense either.

Acked-by: Márton Németh <nm127@freemail.hu>
Acked-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/hid/hid-input.c                    | 24 ++++++++++++------------
 drivers/input/evdev.c                      |  2 +-
 drivers/input/input.c                      | 20 +++++++++-----------
 drivers/input/misc/ati_remote2.c           | 14 +++++++-------
 drivers/input/misc/winbond-cir.c           | 12 +++++-------
 drivers/input/sparse-keymap.c              |  6 ++++--
 drivers/media/IR/ir-keytable.c             |  4 ++--
 drivers/media/dvb/dvb-usb/dvb-usb-remote.c |  4 ++--
 drivers/platform/x86/dell-wmi.c            | 16 +++++++---------
 drivers/platform/x86/hp-wmi.c              | 15 +++++++--------
 drivers/platform/x86/panasonic-laptop.c    | 15 +++++++--------
 drivers/platform/x86/topstar-laptop.c      | 13 ++++++-------
 drivers/platform/x86/toshiba_acpi.c        | 17 +++++++----------
 include/linux/input.h                      | 20 ++++++++++++--------
 14 files changed, 88 insertions(+), 94 deletions(-)

(limited to 'include')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 79d9edd0bdfa..7a0d2e4661a1 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -68,22 +68,25 @@ static const struct {
 #define map_key_clear(c)	hid_map_usage_clear(hidinput, usage, &bit, \
 		&max, EV_KEY, (c))
 
-static inline int match_scancode(int code, int scancode)
+static inline int match_scancode(unsigned int code, unsigned int scancode)
 {
 	if (scancode == 0)
 		return 1;
-	return ((code & (HID_USAGE_PAGE | HID_USAGE)) == scancode);
+
+	return (code & (HID_USAGE_PAGE | HID_USAGE)) == scancode;
 }
 
-static inline int match_keycode(int code, int keycode)
+static inline int match_keycode(unsigned int code, unsigned int keycode)
 {
 	if (keycode == 0)
 		return 1;
-	return (code == keycode);
+
+	return code == keycode;
 }
 
 static struct hid_usage *hidinput_find_key(struct hid_device *hid,
-		int scancode, int keycode)
+					   unsigned int scancode,
+					   unsigned int keycode)
 {
 	int i, j, k;
 	struct hid_report *report;
@@ -105,8 +108,8 @@ static struct hid_usage *hidinput_find_key(struct hid_device *hid,
 	return NULL;
 }
 
-static int hidinput_getkeycode(struct input_dev *dev, int scancode,
-				int *keycode)
+static int hidinput_getkeycode(struct input_dev *dev,
+			       unsigned int scancode, unsigned int *keycode)
 {
 	struct hid_device *hid = input_get_drvdata(dev);
 	struct hid_usage *usage;
@@ -119,16 +122,13 @@ static int hidinput_getkeycode(struct input_dev *dev, int scancode,
 	return -EINVAL;
 }
 
-static int hidinput_setkeycode(struct input_dev *dev, int scancode,
-				int keycode)
+static int hidinput_setkeycode(struct input_dev *dev,
+			       unsigned int scancode, unsigned int keycode)
 {
 	struct hid_device *hid = input_get_drvdata(dev);
 	struct hid_usage *usage;
 	int old_keycode;
 
-	if (keycode < 0 || keycode > KEY_MAX)
-		return -EINVAL;
-
 	usage = hidinput_find_key(hid, scancode, 0);
 	if (usage) {
 		old_keycode = usage->code;
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 9f9816baeb97..2ee6c7a68bdc 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -515,7 +515,7 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
 	struct input_absinfo abs;
 	struct ff_effect effect;
 	int __user *ip = (int __user *)p;
-	int i, t, u, v;
+	unsigned int i, t, u, v;
 	int error;
 
 	switch (cmd) {
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 41168d5f8c17..e2dd8858e19d 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -582,7 +582,8 @@ static int input_fetch_keycode(struct input_dev *dev, int scancode)
 }
 
 static int input_default_getkeycode(struct input_dev *dev,
-				    int scancode, int *keycode)
+				    unsigned int scancode,
+				    unsigned int *keycode)
 {
 	if (!dev->keycodesize)
 		return -EINVAL;
@@ -596,7 +597,8 @@ static int input_default_getkeycode(struct input_dev *dev,
 }
 
 static int input_default_setkeycode(struct input_dev *dev,
-				    int scancode, int keycode)
+				    unsigned int scancode,
+				    unsigned int keycode)
 {
 	int old_keycode;
 	int i;
@@ -654,11 +656,9 @@ static int input_default_setkeycode(struct input_dev *dev,
  * This function should be called by anyone interested in retrieving current
  * keymap. Presently keyboard and evdev handlers use it.
  */
-int input_get_keycode(struct input_dev *dev, int scancode, int *keycode)
+int input_get_keycode(struct input_dev *dev,
+		      unsigned int scancode, unsigned int *keycode)
 {
-	if (scancode < 0)
-		return -EINVAL;
-
 	return dev->getkeycode(dev, scancode, keycode);
 }
 EXPORT_SYMBOL(input_get_keycode);
@@ -672,16 +672,14 @@ EXPORT_SYMBOL(input_get_keycode);
  * This function should be called by anyone needing to update current
  * keymap. Presently keyboard and evdev handlers use it.
  */
-int input_set_keycode(struct input_dev *dev, int scancode, int keycode)
+int input_set_keycode(struct input_dev *dev,
+		      unsigned int scancode, unsigned int keycode)
 {
 	unsigned long flags;
 	int old_keycode;
 	int retval;
 
-	if (scancode < 0)
-		return -EINVAL;
-
-	if (keycode < 0 || keycode > KEY_MAX)
+	if (keycode > KEY_MAX)
 		return -EINVAL;
 
 	spin_lock_irqsave(&dev->event_lock, flags);
diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c
index 0501f0e65157..15be5430bc6d 100644
--- a/drivers/input/misc/ati_remote2.c
+++ b/drivers/input/misc/ati_remote2.c
@@ -474,10 +474,11 @@ static void ati_remote2_complete_key(struct urb *urb)
 }
 
 static int ati_remote2_getkeycode(struct input_dev *idev,
-				  int scancode, int *keycode)
+				  unsigned int scancode, unsigned int *keycode)
 {
 	struct ati_remote2 *ar2 = input_get_drvdata(idev);
-	int index, mode;
+	unsigned int mode;
+	int index;
 
 	mode = scancode >> 8;
 	if (mode > ATI_REMOTE2_PC || !((1 << mode) & ar2->mode_mask))
@@ -491,10 +492,12 @@ static int ati_remote2_getkeycode(struct input_dev *idev,
 	return 0;
 }
 
-static int ati_remote2_setkeycode(struct input_dev *idev, int scancode, int keycode)
+static int ati_remote2_setkeycode(struct input_dev *idev,
+				  unsigned int scancode, unsigned int keycode)
 {
 	struct ati_remote2 *ar2 = input_get_drvdata(idev);
-	int index, mode, old_keycode;
+	unsigned int mode, old_keycode;
+	int index;
 
 	mode = scancode >> 8;
 	if (mode > ATI_REMOTE2_PC || !((1 << mode) & ar2->mode_mask))
@@ -504,9 +507,6 @@ static int ati_remote2_setkeycode(struct input_dev *idev, int scancode, int keyc
 	if (index < 0)
 		return -EINVAL;
 
-	if (keycode < KEY_RESERVED || keycode > KEY_MAX)
-		return -EINVAL;
-
 	old_keycode = ar2->keycode[mode][index];
 	ar2->keycode[mode][index] = keycode;
 	__set_bit(keycode, idev->keybit);
diff --git a/drivers/input/misc/winbond-cir.c b/drivers/input/misc/winbond-cir.c
index cbec3dfdd42b..9c155a43abc2 100644
--- a/drivers/input/misc/winbond-cir.c
+++ b/drivers/input/misc/winbond-cir.c
@@ -385,26 +385,24 @@ wbcir_do_getkeycode(struct wbcir_data *data, u32 scancode)
 }
 
 static int
-wbcir_getkeycode(struct input_dev *dev, int scancode, int *keycode)
+wbcir_getkeycode(struct input_dev *dev,
+		 unsigned int scancode, unsigned int *keycode)
 {
 	struct wbcir_data *data = input_get_drvdata(dev);
 
-	*keycode = (int)wbcir_do_getkeycode(data, (u32)scancode);
+	*keycode = wbcir_do_getkeycode(data, scancode);
 	return 0;
 }
 
 static int
-wbcir_setkeycode(struct input_dev *dev, int sscancode, int keycode)
+wbcir_setkeycode(struct input_dev *dev,
+		 unsigned int scancode, unsigned int keycode)
 {
 	struct wbcir_data *data = input_get_drvdata(dev);
 	struct wbcir_keyentry *keyentry;
 	struct wbcir_keyentry *new_keyentry;
 	unsigned long flags;
 	unsigned int old_keycode = KEY_RESERVED;
-	u32 scancode = (u32)sscancode;
-
-	if (keycode < 0 || keycode > KEY_MAX)
-		return -EINVAL;
 
 	new_keyentry = kmalloc(sizeof(*new_keyentry), GFP_KERNEL);
 	if (!new_keyentry)
diff --git a/drivers/input/sparse-keymap.c b/drivers/input/sparse-keymap.c
index fbd3987af57f..e6bde55e5203 100644
--- a/drivers/input/sparse-keymap.c
+++ b/drivers/input/sparse-keymap.c
@@ -64,7 +64,8 @@ struct key_entry *sparse_keymap_entry_from_keycode(struct input_dev *dev,
 EXPORT_SYMBOL(sparse_keymap_entry_from_keycode);
 
 static int sparse_keymap_getkeycode(struct input_dev *dev,
-				    int scancode, int *keycode)
+				    unsigned int scancode,
+				    unsigned int *keycode)
 {
 	const struct key_entry *key =
 			sparse_keymap_entry_from_scancode(dev, scancode);
@@ -78,7 +79,8 @@ static int sparse_keymap_getkeycode(struct input_dev *dev,
 }
 
 static int sparse_keymap_setkeycode(struct input_dev *dev,
-				    int scancode, int keycode)
+				    unsigned int scancode,
+				    unsigned int keycode)
 {
 	struct key_entry *key;
 	int old_keycode;
diff --git a/drivers/media/IR/ir-keytable.c b/drivers/media/IR/ir-keytable.c
index 0903f539bf68..0a3b4ed38e48 100644
--- a/drivers/media/IR/ir-keytable.c
+++ b/drivers/media/IR/ir-keytable.c
@@ -123,7 +123,7 @@ static int ir_copy_table(struct ir_scancode_table *destin,
  * If the key is not found, returns -EINVAL, otherwise, returns 0.
  */
 static int ir_getkeycode(struct input_dev *dev,
-			 int scancode, int *keycode)
+			 unsigned int scancode, unsigned int *keycode)
 {
 	int elem;
 	struct ir_input_dev *ir_dev = input_get_drvdata(dev);
@@ -291,7 +291,7 @@ static int ir_insert_key(struct ir_scancode_table *rc_tab,
  * If the key is not found, returns -EINVAL, otherwise, returns 0.
  */
 static int ir_setkeycode(struct input_dev *dev,
-			 int scancode, int keycode)
+			 unsigned int scancode, unsigned int keycode)
 {
 	int rc = 0;
 	struct ir_input_dev *ir_dev = input_get_drvdata(dev);
diff --git a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
index a03ef7efec9a..852fe89539cf 100644
--- a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
+++ b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
@@ -9,7 +9,7 @@
 #include <linux/usb/input.h>
 
 static int dvb_usb_getkeycode(struct input_dev *dev,
-				    int scancode, int *keycode)
+				unsigned int scancode, unsigned int *keycode)
 {
 	struct dvb_usb_device *d = input_get_drvdata(dev);
 
@@ -39,7 +39,7 @@ static int dvb_usb_getkeycode(struct input_dev *dev,
 }
 
 static int dvb_usb_setkeycode(struct input_dev *dev,
-				    int scancode, int keycode)
+				unsigned int scancode, unsigned int keycode)
 {
 	struct dvb_usb_device *d = input_get_drvdata(dev);
 
diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index 1b1dddbd5744..bed764e3ea2a 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c
@@ -142,7 +142,7 @@ static struct key_entry *dell_wmi_keymap = dell_legacy_wmi_keymap;
 
 static struct input_dev *dell_wmi_input_dev;
 
-static struct key_entry *dell_wmi_get_entry_by_scancode(int code)
+static struct key_entry *dell_wmi_get_entry_by_scancode(unsigned int code)
 {
 	struct key_entry *key;
 
@@ -153,7 +153,7 @@ static struct key_entry *dell_wmi_get_entry_by_scancode(int code)
 	return NULL;
 }
 
-static struct key_entry *dell_wmi_get_entry_by_keycode(int keycode)
+static struct key_entry *dell_wmi_get_entry_by_keycode(unsigned int keycode)
 {
 	struct key_entry *key;
 
@@ -164,8 +164,8 @@ static struct key_entry *dell_wmi_get_entry_by_keycode(int keycode)
 	return NULL;
 }
 
-static int dell_wmi_getkeycode(struct input_dev *dev, int scancode,
-			       int *keycode)
+static int dell_wmi_getkeycode(struct input_dev *dev,
+				unsigned int scancode, unsigned int *keycode)
 {
 	struct key_entry *key = dell_wmi_get_entry_by_scancode(scancode);
 
@@ -177,13 +177,11 @@ static int dell_wmi_getkeycode(struct input_dev *dev, int scancode,
 	return -EINVAL;
 }
 
-static int dell_wmi_setkeycode(struct input_dev *dev, int scancode, int keycode)
+static int dell_wmi_setkeycode(struct input_dev *dev,
+				unsigned int scancode, unsigned int keycode)
 {
 	struct key_entry *key;
-	int old_keycode;
-
-	if (keycode < 0 || keycode > KEY_MAX)
-		return -EINVAL;
+	unsigned int old_keycode;
 
 	key = dell_wmi_get_entry_by_scancode(scancode);
 	if (key && key->type == KE_KEY) {
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index 7ccf33c08967..56086363becc 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
@@ -278,7 +278,7 @@ static DEVICE_ATTR(als, S_IRUGO | S_IWUSR, show_als, set_als);
 static DEVICE_ATTR(dock, S_IRUGO, show_dock, NULL);
 static DEVICE_ATTR(tablet, S_IRUGO, show_tablet, NULL);
 
-static struct key_entry *hp_wmi_get_entry_by_scancode(int code)
+static struct key_entry *hp_wmi_get_entry_by_scancode(unsigned int code)
 {
 	struct key_entry *key;
 
@@ -289,7 +289,7 @@ static struct key_entry *hp_wmi_get_entry_by_scancode(int code)
 	return NULL;
 }
 
-static struct key_entry *hp_wmi_get_entry_by_keycode(int keycode)
+static struct key_entry *hp_wmi_get_entry_by_keycode(unsigned int keycode)
 {
 	struct key_entry *key;
 
@@ -300,7 +300,8 @@ static struct key_entry *hp_wmi_get_entry_by_keycode(int keycode)
 	return NULL;
 }
 
-static int hp_wmi_getkeycode(struct input_dev *dev, int scancode, int *keycode)
+static int hp_wmi_getkeycode(struct input_dev *dev,
+			     unsigned int scancode, unsigned int *keycode)
 {
 	struct key_entry *key = hp_wmi_get_entry_by_scancode(scancode);
 
@@ -312,13 +313,11 @@ static int hp_wmi_getkeycode(struct input_dev *dev, int scancode, int *keycode)
 	return -EINVAL;
 }
 
-static int hp_wmi_setkeycode(struct input_dev *dev, int scancode, int keycode)
+static int hp_wmi_setkeycode(struct input_dev *dev,
+			     unsigned int scancode, unsigned int keycode)
 {
 	struct key_entry *key;
-	int old_keycode;
-
-	if (keycode < 0 || keycode > KEY_MAX)
-		return -EINVAL;
+	unsigned int old_keycode;
 
 	key = hp_wmi_get_entry_by_scancode(scancode);
 	if (key && key->type == KE_KEY) {
diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c
index fe7cf0188acc..c9fc479fc290 100644
--- a/drivers/platform/x86/panasonic-laptop.c
+++ b/drivers/platform/x86/panasonic-laptop.c
@@ -200,7 +200,7 @@ static struct acpi_driver acpi_pcc_driver = {
 };
 
 #define KEYMAP_SIZE		11
-static const int initial_keymap[KEYMAP_SIZE] = {
+static const unsigned int initial_keymap[KEYMAP_SIZE] = {
 	/*  0 */ KEY_RESERVED,
 	/*  1 */ KEY_BRIGHTNESSDOWN,
 	/*  2 */ KEY_BRIGHTNESSUP,
@@ -222,7 +222,7 @@ struct pcc_acpi {
 	struct acpi_device	*device;
 	struct input_dev	*input_dev;
 	struct backlight_device	*backlight;
-	int			keymap[KEYMAP_SIZE];
+	unsigned int		keymap[KEYMAP_SIZE];
 };
 
 struct pcc_keyinput {
@@ -445,7 +445,8 @@ static struct attribute_group pcc_attr_group = {
 
 /* hotkey input device driver */
 
-static int pcc_getkeycode(struct input_dev *dev, int scancode, int *keycode)
+static int pcc_getkeycode(struct input_dev *dev,
+			  unsigned int scancode, unsigned int *keycode)
 {
 	struct pcc_acpi *pcc = input_get_drvdata(dev);
 
@@ -457,7 +458,7 @@ static int pcc_getkeycode(struct input_dev *dev, int scancode, int *keycode)
 	return 0;
 }
 
-static int keymap_get_by_keycode(struct pcc_acpi *pcc, int keycode)
+static int keymap_get_by_keycode(struct pcc_acpi *pcc, unsigned int keycode)
 {
 	int i;
 
@@ -469,7 +470,8 @@ static int keymap_get_by_keycode(struct pcc_acpi *pcc, int keycode)
 	return 0;
 }
 
-static int pcc_setkeycode(struct input_dev *dev, int scancode, int keycode)
+static int pcc_setkeycode(struct input_dev *dev,
+			  unsigned int scancode, unsigned int keycode)
 {
 	struct pcc_acpi *pcc = input_get_drvdata(dev);
 	int oldkeycode;
@@ -477,9 +479,6 @@ static int pcc_setkeycode(struct input_dev *dev, int scancode, int keycode)
 	if (scancode >= ARRAY_SIZE(pcc->keymap))
 		return -EINVAL;
 
-	if (keycode < 0 || keycode > KEY_MAX)
-		return -EINVAL;
-
 	oldkeycode = pcc->keymap[scancode];
 	pcc->keymap[scancode] = keycode;
 
diff --git a/drivers/platform/x86/topstar-laptop.c b/drivers/platform/x86/topstar-laptop.c
index 02f3d4e9e666..4d6516fded7e 100644
--- a/drivers/platform/x86/topstar-laptop.c
+++ b/drivers/platform/x86/topstar-laptop.c
@@ -46,7 +46,7 @@ static struct tps_key_entry topstar_keymap[] = {
 	{ }
 };
 
-static struct tps_key_entry *tps_get_key_by_scancode(int code)
+static struct tps_key_entry *tps_get_key_by_scancode(unsigned int code)
 {
 	struct tps_key_entry *key;
 
@@ -57,7 +57,7 @@ static struct tps_key_entry *tps_get_key_by_scancode(int code)
 	return NULL;
 }
 
-static struct tps_key_entry *tps_get_key_by_keycode(int code)
+static struct tps_key_entry *tps_get_key_by_keycode(unsigned int code)
 {
 	struct tps_key_entry *key;
 
@@ -126,7 +126,8 @@ static int acpi_topstar_fncx_switch(struct acpi_device *device, bool state)
 	return 0;
 }
 
-static int topstar_getkeycode(struct input_dev *dev, int scancode, int *keycode)
+static int topstar_getkeycode(struct input_dev *dev,
+				unsigned int scancode, unsigned int *keycode)
 {
 	struct tps_key_entry *key = tps_get_key_by_scancode(scancode);
 
@@ -137,14 +138,12 @@ static int topstar_getkeycode(struct input_dev *dev, int scancode, int *keycode)
 	return 0;
 }
 
-static int topstar_setkeycode(struct input_dev *dev, int scancode, int keycode)
+static int topstar_setkeycode(struct input_dev *dev,
+				unsigned int scancode, unsigned int keycode)
 {
 	struct tps_key_entry *key;
 	int old_keycode;
 
-	if (keycode < 0 || keycode > KEY_MAX)
-		return -EINVAL;
-
 	key = tps_get_key_by_scancode(scancode);
 
 	if (!key)
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 405b969734d6..789240d1b577 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -745,7 +745,7 @@ static struct backlight_ops toshiba_backlight_data = {
         .update_status  = set_lcd_status,
 };
 
-static struct key_entry *toshiba_acpi_get_entry_by_scancode(int code)
+static struct key_entry *toshiba_acpi_get_entry_by_scancode(unsigned int code)
 {
 	struct key_entry *key;
 
@@ -756,7 +756,7 @@ static struct key_entry *toshiba_acpi_get_entry_by_scancode(int code)
 	return NULL;
 }
 
-static struct key_entry *toshiba_acpi_get_entry_by_keycode(int code)
+static struct key_entry *toshiba_acpi_get_entry_by_keycode(unsigned int code)
 {
 	struct key_entry *key;
 
@@ -767,8 +767,8 @@ static struct key_entry *toshiba_acpi_get_entry_by_keycode(int code)
 	return NULL;
 }
 
-static int toshiba_acpi_getkeycode(struct input_dev *dev, int scancode,
-				   int *keycode)
+static int toshiba_acpi_getkeycode(struct input_dev *dev,
+				   unsigned int scancode, unsigned int *keycode)
 {
 	struct key_entry *key = toshiba_acpi_get_entry_by_scancode(scancode);
 
@@ -780,14 +780,11 @@ static int toshiba_acpi_getkeycode(struct input_dev *dev, int scancode,
 	return -EINVAL;
 }
 
-static int toshiba_acpi_setkeycode(struct input_dev *dev, int scancode,
-				   int keycode)
+static int toshiba_acpi_setkeycode(struct input_dev *dev,
+				   unsigned int scancode, unsigned int keycode)
 {
 	struct key_entry *key;
-	int old_keycode;
-
-	if (keycode < 0 || keycode > KEY_MAX)
-		return -EINVAL;
+	unsigned int old_keycode;
 
 	key = toshiba_acpi_get_entry_by_scancode(scancode);
 	if (key && key->type == KE_KEY) {
diff --git a/include/linux/input.h b/include/linux/input.h
index dc24effb6d0e..7ed2251b33f1 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -58,10 +58,10 @@ struct input_absinfo {
 
 #define EVIOCGVERSION		_IOR('E', 0x01, int)			/* get driver version */
 #define EVIOCGID		_IOR('E', 0x02, struct input_id)	/* get device ID */
-#define EVIOCGREP		_IOR('E', 0x03, int[2])			/* get repeat settings */
-#define EVIOCSREP		_IOW('E', 0x03, int[2])			/* set repeat settings */
-#define EVIOCGKEYCODE		_IOR('E', 0x04, int[2])			/* get keycode */
-#define EVIOCSKEYCODE		_IOW('E', 0x04, int[2])			/* set keycode */
+#define EVIOCGREP		_IOR('E', 0x03, unsigned int[2])	/* get repeat settings */
+#define EVIOCSREP		_IOW('E', 0x03, unsigned int[2])	/* set repeat settings */
+#define EVIOCGKEYCODE		_IOR('E', 0x04, unsigned int[2])	/* get keycode */
+#define EVIOCSKEYCODE		_IOW('E', 0x04, unsigned int[2])	/* set keycode */
 
 #define EVIOCGNAME(len)		_IOC(_IOC_READ, 'E', 0x06, len)		/* get device name */
 #define EVIOCGPHYS(len)		_IOC(_IOC_READ, 'E', 0x07, len)		/* get physical location */
@@ -1142,8 +1142,10 @@ struct input_dev {
 	unsigned int keycodemax;
 	unsigned int keycodesize;
 	void *keycode;
-	int (*setkeycode)(struct input_dev *dev, int scancode, int keycode);
-	int (*getkeycode)(struct input_dev *dev, int scancode, int *keycode);
+	int (*setkeycode)(struct input_dev *dev,
+			  unsigned int scancode, unsigned int keycode);
+	int (*getkeycode)(struct input_dev *dev,
+			  unsigned int scancode, unsigned int *keycode);
 
 	struct ff_device *ff;
 
@@ -1415,8 +1417,10 @@ static inline void input_set_abs_params(struct input_dev *dev, int axis, int min
 	dev->absbit[BIT_WORD(axis)] |= BIT_MASK(axis);
 }
 
-int input_get_keycode(struct input_dev *dev, int scancode, int *keycode);
-int input_set_keycode(struct input_dev *dev, int scancode, int keycode);
+int input_get_keycode(struct input_dev *dev,
+		      unsigned int scancode, unsigned int *keycode);
+int input_set_keycode(struct input_dev *dev,
+		      unsigned int scancode, unsigned int keycode);
 
 extern struct class input_class;
 
-- 
cgit v1.2.3


From 04e82ffb0f02e645f3dde5128ef39d425a8b3c6d Mon Sep 17 00:00:00 2001
From: Peter Huewe <peterhuewe@gmx.de>
Date: Wed, 10 Mar 2010 11:55:05 +0900
Subject: serial: sh-sci: Fix build failure for non-sh architectures.

This patch fixes a build failure for various arm based defconfigs
[1][2][3] and maybe other architectures/configs.

The build failure was introduced by the sh specific patch [4]
"serial: sh-sci: Add DMA support"
by Guennadi Liakhovetski

Patch against linux-next of 20100309

References:
[1] http://kisskb.ellerman.id.au/kisskb/buildresult/2248992/
[2] http://kisskb.ellerman.id.au/kisskb/buildresult/2248996/
[3] http://kisskb.ellerman.id.au/kisskb/buildresult/2248998/
[4] http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git;a=commit;h=73a19e4c0301908ce6346715fd08a74308451f5a

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/linux/serial_sci.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 1b177d29a7f0..193d4bfe42ff 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -2,7 +2,9 @@
 #define __LINUX_SERIAL_SCI_H
 
 #include <linux/serial_core.h>
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
 #include <asm/dmaengine.h>
+#endif
 
 /*
  * Generic header for SuperH SCI(F) (used by sh/sh64/h8300 and related parts)
@@ -30,8 +32,10 @@ struct plat_sci_port {
 	upf_t		flags;			/* UPF_* flags */
 	char		*clk;			/* clock string */
 	struct device	*dma_dev;
+#ifdef CONFIG_SERIAL_SH_SCI_DMA
 	enum sh_dmae_slave_chan_id dma_slave_tx;
 	enum sh_dmae_slave_chan_id dma_slave_rx;
+#endif
 };
 
 #endif /* __LINUX_SERIAL_SCI_H */
-- 
cgit v1.2.3


From 06a09124b5ec65f81df66c56695d9a9ae04a0114 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Tue, 9 Mar 2010 20:38:45 -0800
Subject: Input: ads7846 - add support for AD7843 parts

The AD7873 is almost identical to the ADS7846; the only difference is
related to the Power Management bits PD0 and PD1.  This results in a
slightly different PENIRQ enable behavior.  For the AD7873, VREF should
be turned off during differential measurements.

So, add the AD7873/43 to the list of driver supported devices, and prevent
VREF usage during differential/ratiometric conversion modes.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/Kconfig   |  9 +++++----
 drivers/input/touchscreen/ads7846.c | 10 ++++++++++
 include/linux/spi/ads7846.h         |  2 +-
 3 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 7208654a94ae..8a8fa4d2d6a8 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -24,17 +24,18 @@ config TOUCHSCREEN_88PM860X
 	  module will be called 88pm860x-ts.
 
 config TOUCHSCREEN_ADS7846
-	tristate "ADS7846/TSC2046 and ADS7843 based touchscreens"
+	tristate "ADS7846/TSC2046/AD7873 and AD(S)7843 based touchscreens"
 	depends on SPI_MASTER
 	depends on HWMON = n || HWMON
 	help
 	  Say Y here if you have a touchscreen interface using the
-	  ADS7846/TSC2046 or ADS7843 controller, and your board-specific
-	  setup code includes that in its table of SPI devices.
+	  ADS7846/TSC2046/AD7873 or ADS7843/AD7843 controller,
+	  and your board-specific setup code includes that in its
+	  table of SPI devices.
 
 	  If HWMON is selected, and the driver is told the reference voltage
 	  on your board, you will also get hwmon interfaces for the voltage
-	  (and on ads7846/tsc2046, temperature) sensors of this chip.
+	  (and on ads7846/tsc2046/ad7873, temperature) sensors of this chip.
 
 	  If unsure, say N (but it's safe to say "Y").
 
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index 8b05d8e97543..d187be05955f 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -36,6 +36,7 @@
  * TSC2046 is just newer ads7846 silicon.
  * Support for ads7843 tested on Atmel at91sam926x-EK.
  * Support for ads7845 has only been stubbed in.
+ * Support for Analog Devices AD7873 and AD7843 tested.
  *
  * IRQ handling needs a workaround because of a shortcoming in handling
  * edge triggered IRQs on some platforms like the OMAP1/2. These
@@ -984,6 +985,15 @@ static int __devinit ads7846_probe(struct spi_device *spi)
 
 	vref = pdata->keep_vref_on;
 
+	if (ts->model == 7873) {
+		/* The AD7873 is almost identical to the ADS7846
+		 * keep VREF off during differential/ratiometric
+		 * conversion modes
+		 */
+		ts->model = 7846;
+		vref = 0;
+	}
+
 	/* set up the transfers to read touchscreen state; this assumes we
 	 * use formula #2 for pressure, not #3.
 	 */
diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h
index 51948eb6927a..5710c15d394a 100644
--- a/include/linux/spi/ads7846.h
+++ b/include/linux/spi/ads7846.h
@@ -12,7 +12,7 @@ enum ads7846_filter {
 };
 
 struct ads7846_platform_data {
-	u16	model;			/* 7843, 7845, 7846. */
+	u16	model;			/* 7843, 7845, 7846, 7873. */
 	u16	vref_delay_usecs;	/* 0 for external vref; etc */
 	u16	vref_mv;		/* external vref value, milliVolts */
 	bool	keep_vref_on;		/* set to keep vref on for differential
-- 
cgit v1.2.3


From dc1d628a67a8f042e711ea5accc0beedc3ef0092 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 3 Mar 2010 15:55:04 +0100
Subject: perf: Provide generic perf_sample_data initialization

This makes it easier to extend perf_sample_data and fixes a bug on arm
and sparc, which failed to set ->raw to NULL, which can cause crashes
when combined with PERF_SAMPLE_RAW.

It also optimizes PowerPC and tracepoint, because the struct
initialization is forced to zero out the whole structure.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Jean Pihet <jpihet@mvista.com>
Reviewed-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Jamie Iles <jamie.iles@picochip.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: stable@kernel.org
LKML-Reference: <20100304140100.315416040@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/arm/kernel/perf_event.c           |  4 ++--
 arch/powerpc/kernel/perf_event.c       |  8 ++++----
 arch/sparc/kernel/perf_event.c         |  2 +-
 arch/x86/kernel/cpu/perf_event.c       |  3 +--
 arch/x86/kernel/cpu/perf_event_intel.c |  6 ++----
 include/linux/perf_event.h             |  7 +++++++
 kernel/perf_event.c                    | 21 ++++++++-------------
 7 files changed, 25 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index c54ceb3d1f97..3875d99cc40f 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -965,7 +965,7 @@ armv6pmu_handle_irq(int irq_num,
 	 */
 	armv6_pmcr_write(pmcr);
 
-	data.addr = 0;
+	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 	for (idx = 0; idx <= armpmu->num_events; ++idx) {
@@ -1945,7 +1945,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 	 */
 	regs = get_irq_regs();
 
-	data.addr = 0;
+	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 	for (idx = 0; idx <= armpmu->num_events; ++idx) {
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index b6cf8f1f4d35..5120bd44f69a 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1164,10 +1164,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 	 * Finally record data if requested.
 	 */
 	if (record) {
-		struct perf_sample_data data = {
-			.addr	= ~0ULL,
-			.period	= event->hw.last_period,
-		};
+		struct perf_sample_data data;
+
+		perf_sample_data_init(&data, ~0ULL);
+		data.period = event->hw.last_period;
 
 		if (event->attr.sample_type & PERF_SAMPLE_ADDR)
 			perf_get_data_addr(regs, &data.addr);
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 9f2b2bac8b2b..6504208f375f 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1189,7 +1189,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
 
 	regs = args->regs;
 
-	data.addr = 0;
+	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 97cddbf32936..42aafd11e170 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1097,8 +1097,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 	int idx, handled = 0;
 	u64 val;
 
-	data.addr = 0;
-	data.raw = NULL;
+	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 73102df8bfc1..44b60c852107 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -590,10 +590,9 @@ static void intel_pmu_drain_bts_buffer(void)
 
 	ds->bts_index = ds->bts_buffer_base;
 
+	perf_sample_data_init(&data, 0);
 
 	data.period	= event->hw.last_period;
-	data.addr	= 0;
-	data.raw	= NULL;
 	regs.ip		= 0;
 
 	/*
@@ -742,8 +741,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 	int bit, loops;
 	u64 ack, status;
 
-	data.addr = 0;
-	data.raw = NULL;
+	perf_sample_data_init(&data, 0);
 
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 90e0521b1690..6f8cd7da1a01 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -801,6 +801,13 @@ struct perf_sample_data {
 	struct perf_raw_record		*raw;
 };
 
+static inline
+void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
+{
+	data->addr = addr;
+	data->raw  = NULL;
+}
+
 extern void perf_output_sample(struct perf_output_handle *handle,
 			       struct perf_event_header *header,
 			       struct perf_sample_data *data,
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index e68745053013..4393b9e73740 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4108,8 +4108,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
 	if (rctx < 0)
 		return;
 
-	data.addr = addr;
-	data.raw  = NULL;
+	perf_sample_data_init(&data, addr);
 
 	do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
 
@@ -4154,11 +4153,10 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
 	struct perf_event *event;
 	u64 period;
 
-	event	= container_of(hrtimer, struct perf_event, hw.hrtimer);
+	event = container_of(hrtimer, struct perf_event, hw.hrtimer);
 	event->pmu->read(event);
 
-	data.addr = 0;
-	data.raw = NULL;
+	perf_sample_data_init(&data, 0);
 	data.period = event->hw.last_period;
 	regs = get_irq_regs();
 	/*
@@ -4322,17 +4320,15 @@ static const struct pmu perf_ops_task_clock = {
 void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 			  int entry_size)
 {
+	struct pt_regs *regs = get_irq_regs();
+	struct perf_sample_data data;
 	struct perf_raw_record raw = {
 		.size = entry_size,
 		.data = record,
 	};
 
-	struct perf_sample_data data = {
-		.addr = addr,
-		.raw = &raw,
-	};
-
-	struct pt_regs *regs = get_irq_regs();
+	perf_sample_data_init(&data, addr);
+	data.raw = &raw;
 
 	if (!regs)
 		regs = task_pt_regs(current);
@@ -4448,8 +4444,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
 	struct perf_sample_data sample;
 	struct pt_regs *regs = data;
 
-	sample.raw = NULL;
-	sample.addr = bp->attr.bp_addr;
+	perf_sample_data_init(&sample, bp->attr.bp_addr);
 
 	if (!perf_exclude_event(bp, regs))
 		perf_swevent_add(bp, 1, 1, &sample, regs);
-- 
cgit v1.2.3


From 3f6da3905398826d85731247e7fbcf53400c18bd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 5 Mar 2010 13:01:18 +0100
Subject: perf: Rework and fix the arch CPU-hotplug hooks

Remove the hw_perf_event_*() hotplug hooks in favour of per PMU hotplug
notifiers. This has the advantage of reducing the static weak interface
as well as exposing all hotplug actions to the PMU.

Use this to fix x86 hotplug usage where we did things in ONLINE which
should have been done in UP_PREPARE or STARTING.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
LKML-Reference: <20100305154128.736225361@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_event.c       | 21 +++++++++-
 arch/sh/kernel/perf_event.c            | 20 +++++++++-
 arch/x86/kernel/cpu/perf_event.c       | 70 ++++++++++++++++++++--------------
 arch/x86/kernel/cpu/perf_event_amd.c   | 60 ++++++++++++-----------------
 arch/x86/kernel/cpu/perf_event_intel.c |  5 ++-
 include/linux/perf_event.h             | 16 ++++++++
 kernel/perf_event.c                    | 15 --------
 7 files changed, 126 insertions(+), 81 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 5120bd44f69a..fbe101d7505d 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1287,7 +1287,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
 		irq_exit();
 }
 
-void hw_perf_event_setup(int cpu)
+static void power_pmu_setup(int cpu)
 {
 	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
 
@@ -1297,6 +1297,23 @@ void hw_perf_event_setup(int cpu)
 	cpuhw->mmcr[0] = MMCR0_FC;
 }
 
+static int __cpuinit
+power_pmu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_UP_PREPARE:
+		power_pmu_setup(cpu);
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
 int register_power_pmu(struct power_pmu *pmu)
 {
 	if (ppmu)
@@ -1314,5 +1331,7 @@ int register_power_pmu(struct power_pmu *pmu)
 		freeze_events_kernel = MMCR0_FCHV;
 #endif /* CONFIG_PPC64 */
 
+	perf_cpu_notifier(power_pmu_notifier);
+
 	return 0;
 }
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 7ff0943e7a08..9f253e9cce01 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -275,13 +275,30 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 	return &pmu;
 }
 
-void hw_perf_event_setup(int cpu)
+static void sh_pmu_setup(int cpu)
 {
 	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
 
 	memset(cpuhw, 0, sizeof(struct cpu_hw_events));
 }
 
+static int __cpuinit
+sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_UP_PREPARE:
+		sh_pmu_setup(cpu);
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
 void hw_perf_enable(void)
 {
 	if (!sh_pmu_initialized())
@@ -308,5 +325,6 @@ int register_sh_pmu(struct sh_pmu *pmu)
 
 	WARN_ON(pmu->num_events > MAX_HWEVENTS);
 
+	perf_cpu_notifier(sh_pmu_notifier);
 	return 0;
 }
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 42aafd11e170..585d5608ae6b 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -157,6 +157,11 @@ struct x86_pmu {
 	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
 						 struct perf_event *event);
 	struct event_constraint *event_constraints;
+
+	void		(*cpu_prepare)(int cpu);
+	void		(*cpu_starting)(int cpu);
+	void		(*cpu_dying)(int cpu);
+	void		(*cpu_dead)(int cpu);
 };
 
 static struct x86_pmu x86_pmu __read_mostly;
@@ -293,7 +298,7 @@ static inline bool bts_available(void)
 	return x86_pmu.enable_bts != NULL;
 }
 
-static inline void init_debug_store_on_cpu(int cpu)
+static void init_debug_store_on_cpu(int cpu)
 {
 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 
@@ -305,7 +310,7 @@ static inline void init_debug_store_on_cpu(int cpu)
 		     (u32)((u64)(unsigned long)ds >> 32));
 }
 
-static inline void fini_debug_store_on_cpu(int cpu)
+static void fini_debug_store_on_cpu(int cpu)
 {
 	if (!per_cpu(cpu_hw_events, cpu).ds)
 		return;
@@ -1337,6 +1342,39 @@ undo:
 #include "perf_event_p6.c"
 #include "perf_event_intel.c"
 
+static int __cpuinit
+x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_UP_PREPARE:
+		if (x86_pmu.cpu_prepare)
+			x86_pmu.cpu_prepare(cpu);
+		break;
+
+	case CPU_STARTING:
+		if (x86_pmu.cpu_starting)
+			x86_pmu.cpu_starting(cpu);
+		break;
+
+	case CPU_DYING:
+		if (x86_pmu.cpu_dying)
+			x86_pmu.cpu_dying(cpu);
+		break;
+
+	case CPU_DEAD:
+		if (x86_pmu.cpu_dead)
+			x86_pmu.cpu_dead(cpu);
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
 static void __init pmu_check_apic(void)
 {
 	if (cpu_has_apic)
@@ -1415,6 +1453,8 @@ void __init init_hw_perf_events(void)
 	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
 	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);
 	pr_info("... event mask:             %016Lx\n", perf_event_mask);
+
+	perf_cpu_notifier(x86_pmu_notifier);
 }
 
 static inline void x86_pmu_read(struct perf_event *event)
@@ -1674,29 +1714,3 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 
 	return entry;
 }
-
-void hw_perf_event_setup_online(int cpu)
-{
-	init_debug_store_on_cpu(cpu);
-
-	switch (boot_cpu_data.x86_vendor) {
-	case X86_VENDOR_AMD:
-		amd_pmu_cpu_online(cpu);
-		break;
-	default:
-		return;
-	}
-}
-
-void hw_perf_event_setup_offline(int cpu)
-{
-	init_debug_store_on_cpu(cpu);
-
-	switch (boot_cpu_data.x86_vendor) {
-	case X86_VENDOR_AMD:
-		amd_pmu_cpu_offline(cpu);
-		break;
-	default:
-		return;
-	}
-}
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 8f3dbfda3c4f..014528ba7d57 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -271,28 +271,6 @@ done:
 	return &emptyconstraint;
 }
 
-static __initconst struct x86_pmu amd_pmu = {
-	.name			= "AMD",
-	.handle_irq		= x86_pmu_handle_irq,
-	.disable_all		= x86_pmu_disable_all,
-	.enable_all		= x86_pmu_enable_all,
-	.enable			= x86_pmu_enable_event,
-	.disable		= x86_pmu_disable_event,
-	.eventsel		= MSR_K7_EVNTSEL0,
-	.perfctr		= MSR_K7_PERFCTR0,
-	.event_map		= amd_pmu_event_map,
-	.raw_event		= amd_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_events		= 4,
-	.event_bits		= 48,
-	.event_mask		= (1ULL << 48) - 1,
-	.apic			= 1,
-	/* use highest bit to detect overflow */
-	.max_period		= (1ULL << 47) - 1,
-	.get_event_constraints	= amd_get_event_constraints,
-	.put_event_constraints	= amd_put_event_constraints
-};
-
 static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
 {
 	struct amd_nb *nb;
@@ -378,6 +356,31 @@ static void amd_pmu_cpu_offline(int cpu)
 	raw_spin_unlock(&amd_nb_lock);
 }
 
+static __initconst struct x86_pmu amd_pmu = {
+	.name			= "AMD",
+	.handle_irq		= x86_pmu_handle_irq,
+	.disable_all		= x86_pmu_disable_all,
+	.enable_all		= x86_pmu_enable_all,
+	.enable			= x86_pmu_enable_event,
+	.disable		= x86_pmu_disable_event,
+	.eventsel		= MSR_K7_EVNTSEL0,
+	.perfctr		= MSR_K7_PERFCTR0,
+	.event_map		= amd_pmu_event_map,
+	.raw_event		= amd_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
+	.num_events		= 4,
+	.event_bits		= 48,
+	.event_mask		= (1ULL << 48) - 1,
+	.apic			= 1,
+	/* use highest bit to detect overflow */
+	.max_period		= (1ULL << 47) - 1,
+	.get_event_constraints	= amd_get_event_constraints,
+	.put_event_constraints	= amd_put_event_constraints,
+
+	.cpu_prepare		= amd_pmu_cpu_online,
+	.cpu_dead		= amd_pmu_cpu_offline,
+};
+
 static __init int amd_pmu_init(void)
 {
 	/* Performance-monitoring supported from K7 and later: */
@@ -390,11 +393,6 @@ static __init int amd_pmu_init(void)
 	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
 	       sizeof(hw_cache_event_ids));
 
-	/*
-	 * explicitly initialize the boot cpu, other cpus will get
-	 * the cpu hotplug callbacks from smp_init()
-	 */
-	amd_pmu_cpu_online(smp_processor_id());
 	return 0;
 }
 
@@ -405,12 +403,4 @@ static int amd_pmu_init(void)
 	return 0;
 }
 
-static void amd_pmu_cpu_online(int cpu)
-{
-}
-
-static void amd_pmu_cpu_offline(int cpu)
-{
-}
-
 #endif
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 44b60c852107..12e811a7d747 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -870,7 +870,10 @@ static __initconst struct x86_pmu intel_pmu = {
 	.max_period		= (1ULL << 31) - 1,
 	.enable_bts		= intel_pmu_enable_bts,
 	.disable_bts		= intel_pmu_disable_bts,
-	.get_event_constraints	= intel_get_event_constraints
+	.get_event_constraints	= intel_get_event_constraints,
+
+	.cpu_starting		= init_debug_store_on_cpu,
+	.cpu_dying		= fini_debug_store_on_cpu,
 };
 
 static __init int intel_pmu_init(void)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6f8cd7da1a01..80acbf3d5de1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -936,5 +936,21 @@ static inline void perf_event_disable(struct perf_event *event)		{ }
 #define perf_output_put(handle, x) \
 	perf_output_copy((handle), &(x), sizeof(x))
 
+/*
+ * This has to have a higher priority than migration_notifier in sched.c.
+ */
+#define perf_cpu_notifier(fn)					\
+do {								\
+	static struct notifier_block fn##_nb __cpuinitdata =	\
+		{ .notifier_call = fn, .priority = 20 };	\
+	fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,		\
+		(void *)(unsigned long)smp_processor_id());	\
+	fn(&fn##_nb, (unsigned long)CPU_STARTING,		\
+		(void *)(unsigned long)smp_processor_id());	\
+	fn(&fn##_nb, (unsigned long)CPU_ONLINE,			\
+		(void *)(unsigned long)smp_processor_id());	\
+	register_cpu_notifier(&fn##_nb);			\
+} while (0)
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_PERF_EVENT_H */
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 4393b9e73740..73329dedb5ad 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -81,10 +81,6 @@ extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
 void __weak hw_perf_disable(void)		{ barrier(); }
 void __weak hw_perf_enable(void)		{ barrier(); }
 
-void __weak hw_perf_event_setup(int cpu)	{ barrier(); }
-void __weak hw_perf_event_setup_online(int cpu)	{ barrier(); }
-void __weak hw_perf_event_setup_offline(int cpu)	{ barrier(); }
-
 int __weak
 hw_perf_group_sched_in(struct perf_event *group_leader,
 	       struct perf_cpu_context *cpuctx,
@@ -5382,8 +5378,6 @@ static void __cpuinit perf_event_init_cpu(int cpu)
 	spin_lock(&perf_resource_lock);
 	cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
 	spin_unlock(&perf_resource_lock);
-
-	hw_perf_event_setup(cpu);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -5423,20 +5417,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 		perf_event_init_cpu(cpu);
 		break;
 
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		hw_perf_event_setup_online(cpu);
-		break;
-
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
 		perf_event_exit_cpu(cpu);
 		break;
 
-	case CPU_DEAD:
-		hw_perf_event_setup_offline(cpu);
-		break;
-
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From 5331d7b84613b8325362dde53dc2bff2fb87d351 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 4 Mar 2010 21:15:56 +0100
Subject: perf: Introduce new perf_fetch_caller_regs() for hot regs snapshot

Events that trigger overflows by interrupting a context can
use get_irq_regs() or task_pt_regs() to retrieve the state
when the event triggered. But this is not the case for some
other class of events like trace events as tracepoints are
executed in the same context than the code that triggered
the event.

It means we need a different api to capture the regs there,
namely we need a hot snapshot to get the most important
informations for perf: the instruction pointer to get the
event origin, the frame pointer for the callchain, the code
segment for user_mode() tests (we always use __KERNEL_CS as
trace events always occur from the kernel) and the eflags
for further purposes.

v2: rename perf_save_regs to perf_fetch_caller_regs as per
Masami's suggestion.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Archs <linux-arch@vger.kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c | 12 ++++++++++++
 arch/x86/kernel/dumpstack.h      | 15 ++++++++++++++
 include/linux/perf_event.h       | 42 +++++++++++++++++++++++++++++++++++++++-
 kernel/perf_event.c              |  5 +++++
 4 files changed, 73 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1d665a0b202c..c6bde7d7afdc 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1707,3 +1707,15 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 
 	return entry;
 }
+
+void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
+{
+	regs->ip = ip;
+	/*
+	 * perf_arch_fetch_caller_regs adds another call, we need to increment
+	 * the skip level
+	 */
+	regs->bp = rewind_frame_pointer(skip + 1);
+	regs->cs = __KERNEL_CS;
+	local_save_flags(regs->flags);
+}
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
index 4fd1420faffa..29e5f7c845b2 100644
--- a/arch/x86/kernel/dumpstack.h
+++ b/arch/x86/kernel/dumpstack.h
@@ -29,4 +29,19 @@ struct stack_frame {
 	struct stack_frame *next_frame;
 	unsigned long return_address;
 };
+
+static inline unsigned long rewind_frame_pointer(int n)
+{
+	struct stack_frame *frame;
+
+	get_bp(frame);
+
+#ifdef CONFIG_FRAME_POINTER
+	while (n--)
+		frame = frame->next_frame;
 #endif
+
+	return (unsigned long)frame;
+}
+
+#endif /* DUMPSTACK_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 80acbf3d5de1..70cffd052c04 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -452,6 +452,7 @@ enum perf_callchain_context {
 #include <linux/fs.h>
 #include <linux/pid_namespace.h>
 #include <linux/workqueue.h>
+#include <linux/ftrace.h>
 #include <asm/atomic.h>
 
 #define PERF_MAX_STACK_DEPTH		255
@@ -847,6 +848,44 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
 		__perf_sw_event(event_id, nr, nmi, regs, addr);
 }
 
+extern void
+perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
+
+/*
+ * Take a snapshot of the regs. Skip ip and frame pointer to
+ * the nth caller. We only need a few of the regs:
+ * - ip for PERF_SAMPLE_IP
+ * - cs for user_mode() tests
+ * - bp for callchains
+ * - eflags, for future purposes, just in case
+ */
+static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip)
+{
+	unsigned long ip;
+
+	memset(regs, 0, sizeof(*regs));
+
+	switch (skip) {
+	case 1 :
+		ip = CALLER_ADDR0;
+		break;
+	case 2 :
+		ip = CALLER_ADDR1;
+		break;
+	case 3 :
+		ip = CALLER_ADDR2;
+		break;
+	case 4:
+		ip = CALLER_ADDR3;
+		break;
+	/* No need to support further for now */
+	default:
+		ip = 0;
+	}
+
+	return perf_arch_fetch_caller_regs(regs, ip, skip);
+}
+
 extern void __perf_event_mmap(struct vm_area_struct *vma);
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)
@@ -880,7 +919,8 @@ static inline bool perf_paranoid_kernel(void)
 }
 
 extern void perf_event_init(void);
-extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size);
+extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
+			  int entry_size, struct pt_regs *regs);
 extern void perf_bp_event(struct perf_event *event, void *data);
 
 #ifndef perf_misc_flags
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 52c69a34d697..359d7f690c2b 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2786,6 +2786,11 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 	return NULL;
 }
 
+__weak
+void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
+{
+}
+
 /*
  * Output
  */
-- 
cgit v1.2.3


From c530665c31c0140b74ca7689e7f836177796e5bd Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 3 Mar 2010 07:16:16 +0100
Subject: perf: Take a hot regs snapshot for trace events

We are taking a wrong regs snapshot when a trace event triggers.
Either we use get_irq_regs(), which gives us the interrupted
registers if we are in an interrupt, or we use task_pt_regs()
which gives us the state before we entered the kernel, assuming
we are lucky enough to be no kernel thread, in which case
task_pt_regs() returns the initial set of regs when the kernel
thread was started.

What we want is different. We need a hot snapshot of the regs,
so that we can get the instruction pointer to record in the
sample, the frame pointer for the callchain, and some other
things.

Let's use the new perf_fetch_caller_regs() for that.

Comparison with perf record -e lock: -R -a -f -g
Before:

        perf  [kernel]                   [k] __do_softirq
               |
               --- __do_softirq
                  |
                  |--55.16%-- __open
                  |
                   --44.84%-- __write_nocancel

After:

            perf  [kernel]           [k] perf_tp_event
               |
               --- perf_tp_event
                  |
                  |--41.07%-- lock_acquire
                  |          |
                  |          |--39.36%-- _raw_spin_lock
                  |          |          |
                  |          |          |--7.81%-- hrtimer_interrupt
                  |          |          |          smp_apic_timer_interrupt
                  |          |          |          apic_timer_interrupt

The old case was producing unreliable callchains. Now having
right frame and instruction pointers, we have the trace we
want.

Also syscalls and kprobe events already have the right regs,
let's use them instead of wasting a retrieval.

v2: Follow the rename perf_save_regs() -> perf_fetch_caller_regs()

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Archs <linux-arch@vger.kernel.org>
---
 include/linux/ftrace_event.h       | 7 +++++--
 include/trace/ftrace.h             | 6 +++++-
 kernel/perf_event.c                | 8 ++------
 kernel/trace/trace_event_profile.c | 3 ++-
 kernel/trace/trace_kprobe.c        | 5 +++--
 kernel/trace/trace_syscalls.c      | 4 ++--
 6 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 6b7c444ab8f6..ac424f18ce63 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -187,6 +187,9 @@ do {									\
 
 #ifdef CONFIG_PERF_EVENTS
 struct perf_event;
+
+DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
+
 extern int ftrace_profile_enable(int event_id);
 extern void ftrace_profile_disable(int event_id);
 extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
@@ -198,11 +201,11 @@ ftrace_perf_buf_prepare(int size, unsigned short type, int *rctxp,
 
 static inline void
 ftrace_perf_buf_submit(void *raw_data, int size, int rctx, u64 addr,
-		       u64 count, unsigned long irq_flags)
+		       u64 count, unsigned long irq_flags, struct pt_regs *regs)
 {
 	struct trace_entry *entry = raw_data;
 
-	perf_tp_event(entry->type, addr, count, raw_data, size);
+	perf_tp_event(entry->type, addr, count, raw_data, size, regs);
 	perf_swevent_put_recursion_context(rctx);
 	local_irq_restore(irq_flags);
 }
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 0804cd594803..f31bb8b9777c 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -764,6 +764,7 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
 	struct ftrace_raw_##call *entry;				\
 	u64 __addr = 0, __count = 1;					\
 	unsigned long irq_flags;					\
+	struct pt_regs *__regs;						\
 	int __entry_size;						\
 	int __data_size;						\
 	int rctx;							\
@@ -784,8 +785,11 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
 									\
 	{ assign; }							\
 									\
+	__regs = &__get_cpu_var(perf_trace_regs);			\
+	perf_fetch_caller_regs(__regs, 2);				\
+									\
 	ftrace_perf_buf_submit(entry, __entry_size, rctx, __addr,	\
-			       __count, irq_flags);			\
+			       __count, irq_flags, __regs);		\
 }
 
 #undef DEFINE_EVENT
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 359d7f690c2b..45b4b6e55891 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4318,9 +4318,8 @@ static const struct pmu perf_ops_task_clock = {
 #ifdef CONFIG_EVENT_TRACING
 
 void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
-			  int entry_size)
+		   int entry_size, struct pt_regs *regs)
 {
-	struct pt_regs *regs = get_irq_regs();
 	struct perf_sample_data data;
 	struct perf_raw_record raw = {
 		.size = entry_size,
@@ -4330,12 +4329,9 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
 	perf_sample_data_init(&data, addr);
 	data.raw = &raw;
 
-	if (!regs)
-		regs = task_pt_regs(current);
-
 	/* Trace events already protected against recursion */
 	do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
-				&data, regs);
+			 &data, regs);
 }
 EXPORT_SYMBOL_GPL(perf_tp_event);
 
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index f0d693005075..e66d21e15a0f 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -2,13 +2,14 @@
  * trace event based perf counter profiling
  *
  * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
- *
+ * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
  */
 
 #include <linux/module.h>
 #include <linux/kprobes.h>
 #include "trace.h"
 
+DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
 
 static char *perf_trace_buf;
 static char *perf_trace_buf_nmi;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 505c92273b1a..f7a20a8bfb31 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1240,7 +1240,7 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp,
 	for (i = 0; i < tp->nr_args; i++)
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 
-	ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags);
+	ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);
 }
 
 /* Kretprobe profile handler */
@@ -1271,7 +1271,8 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
 	for (i = 0; i < tp->nr_args; i++)
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 
-	ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags);
+	ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1,
+			       irq_flags, regs);
 }
 
 static int probe_profile_enable(struct ftrace_event_call *call)
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index cba47d7935cc..7e6e84fb7b6c 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -467,7 +467,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	rec->nr = syscall_nr;
 	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 			       (unsigned long *)&rec->args);
-	ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
+	ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags, regs);
 }
 
 int prof_sysenter_enable(struct ftrace_event_call *call)
@@ -542,7 +542,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	rec->nr = syscall_nr;
 	rec->ret = syscall_get_return_value(current, regs);
 
-	ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags);
+	ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags, regs);
 }
 
 int prof_sysexit_enable(struct ftrace_event_call *call)
-- 
cgit v1.2.3


From 97d5a22005f38057b4bc0d95f81cd26510268794 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 5 Mar 2010 05:35:37 +0100
Subject: perf: Drop the obsolete profile naming for trace events

Drop the obsolete "profile" naming used by perf for trace events.
Perf can now do more than simple events counting, so generalize
the API naming.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
---
 include/linux/ftrace_event.h       |  16 ++--
 include/linux/syscalls.h           |  24 +++---
 include/trace/ftrace.h             |  38 ++++-----
 include/trace/syscall.h            |   8 +-
 kernel/perf_event.c                |   4 +-
 kernel/trace/Makefile              |   2 +-
 kernel/trace/trace_event_perf.c    | 165 +++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_event_profile.c | 165 -------------------------------------
 kernel/trace/trace_events.c        |   2 +-
 kernel/trace/trace_kprobe.c        |  28 +++----
 kernel/trace/trace_syscalls.c      |  72 ++++++++--------
 11 files changed, 262 insertions(+), 262 deletions(-)
 create mode 100644 kernel/trace/trace_event_perf.c
 delete mode 100644 kernel/trace/trace_event_profile.c

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index ac424f18ce63..c0f4b364c711 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -131,12 +131,12 @@ struct ftrace_event_call {
 	void			*mod;
 	void			*data;
 
-	int			profile_count;
-	int			(*profile_enable)(struct ftrace_event_call *);
-	void			(*profile_disable)(struct ftrace_event_call *);
+	int			perf_refcount;
+	int			(*perf_event_enable)(struct ftrace_event_call *);
+	void			(*perf_event_disable)(struct ftrace_event_call *);
 };
 
-#define FTRACE_MAX_PROFILE_SIZE	2048
+#define PERF_MAX_TRACE_SIZE	2048
 
 #define MAX_FILTER_PRED		32
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
@@ -190,17 +190,17 @@ struct perf_event;
 
 DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
 
-extern int ftrace_profile_enable(int event_id);
-extern void ftrace_profile_disable(int event_id);
+extern int perf_trace_enable(int event_id);
+extern void perf_trace_disable(int event_id);
 extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
 				     char *filter_str);
 extern void ftrace_profile_free_filter(struct perf_event *event);
 extern void *
-ftrace_perf_buf_prepare(int size, unsigned short type, int *rctxp,
+perf_trace_buf_prepare(int size, unsigned short type, int *rctxp,
 			 unsigned long *irq_flags);
 
 static inline void
-ftrace_perf_buf_submit(void *raw_data, int size, int rctx, u64 addr,
+perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
 		       u64 count, unsigned long irq_flags, struct pt_regs *regs)
 {
 	struct trace_entry *entry = raw_data;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 8126f239edf0..51435bcc3460 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -101,18 +101,18 @@ struct perf_event_attr;
 
 #ifdef CONFIG_PERF_EVENTS
 
-#define TRACE_SYS_ENTER_PROFILE_INIT(sname)				       \
-	.profile_enable = prof_sysenter_enable,				       \
-	.profile_disable = prof_sysenter_disable,
+#define TRACE_SYS_ENTER_PERF_INIT(sname)				       \
+	.perf_event_enable = perf_sysenter_enable,			       \
+	.perf_event_disable = perf_sysenter_disable,
 
-#define TRACE_SYS_EXIT_PROFILE_INIT(sname)				       \
-	.profile_enable = prof_sysexit_enable,				       \
-	.profile_disable = prof_sysexit_disable,
+#define TRACE_SYS_EXIT_PERF_INIT(sname)					       \
+	.perf_event_enable = perf_sysexit_enable,			       \
+	.perf_event_disable = perf_sysexit_disable,
 #else
-#define TRACE_SYS_ENTER_PROFILE(sname)
-#define TRACE_SYS_ENTER_PROFILE_INIT(sname)
-#define TRACE_SYS_EXIT_PROFILE(sname)
-#define TRACE_SYS_EXIT_PROFILE_INIT(sname)
+#define TRACE_SYS_ENTER_PERF(sname)
+#define TRACE_SYS_ENTER_PERF_INIT(sname)
+#define TRACE_SYS_EXIT_PERF(sname)
+#define TRACE_SYS_EXIT_PERF_INIT(sname)
 #endif /* CONFIG_PERF_EVENTS */
 
 #ifdef CONFIG_FTRACE_SYSCALLS
@@ -149,7 +149,7 @@ struct perf_event_attr;
 		.regfunc		= reg_event_syscall_enter,	\
 		.unregfunc		= unreg_event_syscall_enter,	\
 		.data			= (void *)&__syscall_meta_##sname,\
-		TRACE_SYS_ENTER_PROFILE_INIT(sname)			\
+		TRACE_SYS_ENTER_PERF_INIT(sname)			\
 	}
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)					\
@@ -171,7 +171,7 @@ struct perf_event_attr;
 		.regfunc		= reg_event_syscall_exit,	\
 		.unregfunc		= unreg_event_syscall_exit,	\
 		.data			= (void *)&__syscall_meta_##sname,\
-		TRACE_SYS_EXIT_PROFILE_INIT(sname)			\
+		TRACE_SYS_EXIT_PERF_INIT(sname)			\
 	}
 
 #define SYSCALL_METADATA(sname, nb)				\
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index f31bb8b9777c..25ab56f75d65 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -401,18 +401,18 @@ static inline notrace int ftrace_get_offsets_##call(			\
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, name, proto, args)			\
 									\
-static void ftrace_profile_##name(proto);				\
+static void perf_trace_##name(proto);					\
 									\
 static notrace int							\
-ftrace_profile_enable_##name(struct ftrace_event_call *unused)		\
+perf_trace_enable_##name(struct ftrace_event_call *unused)		\
 {									\
-	return register_trace_##name(ftrace_profile_##name);		\
+	return register_trace_##name(perf_trace_##name);		\
 }									\
 									\
 static notrace void							\
-ftrace_profile_disable_##name(struct ftrace_event_call *unused)		\
+perf_trace_disable_##name(struct ftrace_event_call *unused)		\
 {									\
-	unregister_trace_##name(ftrace_profile_##name);			\
+	unregister_trace_##name(perf_trace_##name);			\
 }
 
 #undef DEFINE_EVENT_PRINT
@@ -507,12 +507,12 @@ ftrace_profile_disable_##name(struct ftrace_event_call *unused)		\
 
 #ifdef CONFIG_PERF_EVENTS
 
-#define _TRACE_PROFILE_INIT(call)					\
-	.profile_enable = ftrace_profile_enable_##call,			\
-	.profile_disable = ftrace_profile_disable_##call,
+#define _TRACE_PERF_INIT(call)						\
+	.perf_event_enable = perf_trace_enable_##call,			\
+	.perf_event_disable = perf_trace_disable_##call,
 
 #else
-#define _TRACE_PROFILE_INIT(call)
+#define _TRACE_PERF_INIT(call)
 #endif /* CONFIG_PERF_EVENTS */
 
 #undef __entry
@@ -638,7 +638,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
 	.print_fmt		= print_fmt_##template,			\
 	.define_fields		= ftrace_define_fields_##template,	\
-	_TRACE_PROFILE_INIT(call)					\
+	_TRACE_PERF_INIT(call)					\
 }
 
 #undef DEFINE_EVENT_PRINT
@@ -657,18 +657,18 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 	.unregfunc		= ftrace_raw_unreg_event_##call,	\
 	.print_fmt		= print_fmt_##call,			\
 	.define_fields		= ftrace_define_fields_##template,	\
-	_TRACE_PROFILE_INIT(call)					\
+	_TRACE_PERF_INIT(call)					\
 }
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
- * Define the insertion callback to profile events
+ * Define the insertion callback to perf events
  *
  * The job is very similar to ftrace_raw_event_<call> except that we don't
  * insert in the ring buffer but in a perf counter.
  *
- * static void ftrace_profile_<call>(proto)
+ * static void ftrace_perf_<call>(proto)
  * {
  *	struct ftrace_data_offsets_<call> __maybe_unused __data_offsets;
  *	struct ftrace_event_call *event_call = &event_<call>;
@@ -757,7 +757,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 static notrace void							\
-ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
+perf_trace_templ_##call(struct ftrace_event_call *event_call,		\
 			    proto)					\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
@@ -774,10 +774,10 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
 			     sizeof(u64));				\
 	__entry_size -= sizeof(u32);					\
 									\
-	if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE,		\
+	if (WARN_ONCE(__entry_size > PERF_MAX_TRACE_SIZE,		\
 		      "profile buffer not large enough"))		\
 		return;							\
-	entry = (struct ftrace_raw_##call *)ftrace_perf_buf_prepare(	\
+	entry = (struct ftrace_raw_##call *)perf_trace_buf_prepare(	\
 		__entry_size, event_call->id, &rctx, &irq_flags);	\
 	if (!entry)							\
 		return;							\
@@ -788,17 +788,17 @@ ftrace_profile_templ_##call(struct ftrace_event_call *event_call,	\
 	__regs = &__get_cpu_var(perf_trace_regs);			\
 	perf_fetch_caller_regs(__regs, 2);				\
 									\
-	ftrace_perf_buf_submit(entry, __entry_size, rctx, __addr,	\
+	perf_trace_buf_submit(entry, __entry_size, rctx, __addr,	\
 			       __count, irq_flags, __regs);		\
 }
 
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, call, proto, args)		\
-static notrace void ftrace_profile_##call(proto)		\
+static notrace void perf_trace_##call(proto)			\
 {								\
 	struct ftrace_event_call *event_call = &event_##call;	\
 								\
-	ftrace_profile_templ_##template(event_call, args);	\
+	perf_trace_templ_##template(event_call, args);		\
 }
 
 #undef DEFINE_EVENT_PRINT
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 0387100752f0..e5e5f48dbfb3 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -47,10 +47,10 @@ enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
 #endif
 
 #ifdef CONFIG_PERF_EVENTS
-int prof_sysenter_enable(struct ftrace_event_call *call);
-void prof_sysenter_disable(struct ftrace_event_call *call);
-int prof_sysexit_enable(struct ftrace_event_call *call);
-void prof_sysexit_disable(struct ftrace_event_call *call);
+int perf_sysenter_enable(struct ftrace_event_call *call);
+void perf_sysenter_disable(struct ftrace_event_call *call);
+int perf_sysexit_enable(struct ftrace_event_call *call);
+void perf_sysexit_disable(struct ftrace_event_call *call);
 #endif
 
 #endif /* _TRACE_SYSCALL_H */
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 45b4b6e55891..c502b18594cc 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -4347,7 +4347,7 @@ static int perf_tp_event_match(struct perf_event *event,
 
 static void tp_perf_event_destroy(struct perf_event *event)
 {
-	ftrace_profile_disable(event->attr.config);
+	perf_trace_disable(event->attr.config);
 }
 
 static const struct pmu *tp_perf_event_init(struct perf_event *event)
@@ -4361,7 +4361,7 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
 			!capable(CAP_SYS_ADMIN))
 		return ERR_PTR(-EPERM);
 
-	if (ftrace_profile_enable(event->attr.config))
+	if (perf_trace_enable(event->attr.config))
 		return NULL;
 
 	event->destroy = tp_perf_event_destroy;
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index d00c6fe23f54..78edc6490038 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events.o
 obj-$(CONFIG_EVENT_TRACING) += trace_export.o
 obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
 ifeq ($(CONFIG_PERF_EVENTS),y)
-obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o
+obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
 endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
new file mode 100644
index 000000000000..f315b12a41d8
--- /dev/null
+++ b/kernel/trace/trace_event_perf.c
@@ -0,0 +1,165 @@
+/*
+ * trace event based perf event profiling/tracing
+ *
+ * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kprobes.h>
+#include "trace.h"
+
+DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
+
+static char *perf_trace_buf;
+static char *perf_trace_buf_nmi;
+
+typedef typeof(char [PERF_MAX_TRACE_SIZE]) perf_trace_t ;
+
+/* Count the events in use (per event id, not per instance) */
+static int	total_ref_count;
+
+static int perf_trace_event_enable(struct ftrace_event_call *event)
+{
+	char *buf;
+	int ret = -ENOMEM;
+
+	if (event->perf_refcount++ > 0)
+		return 0;
+
+	if (!total_ref_count) {
+		buf = (char *)alloc_percpu(perf_trace_t);
+		if (!buf)
+			goto fail_buf;
+
+		rcu_assign_pointer(perf_trace_buf, buf);
+
+		buf = (char *)alloc_percpu(perf_trace_t);
+		if (!buf)
+			goto fail_buf_nmi;
+
+		rcu_assign_pointer(perf_trace_buf_nmi, buf);
+	}
+
+	ret = event->perf_event_enable(event);
+	if (!ret) {
+		total_ref_count++;
+		return 0;
+	}
+
+fail_buf_nmi:
+	if (!total_ref_count) {
+		free_percpu(perf_trace_buf_nmi);
+		free_percpu(perf_trace_buf);
+		perf_trace_buf_nmi = NULL;
+		perf_trace_buf = NULL;
+	}
+fail_buf:
+	event->perf_refcount--;
+
+	return ret;
+}
+
+int perf_trace_enable(int event_id)
+{
+	struct ftrace_event_call *event;
+	int ret = -EINVAL;
+
+	mutex_lock(&event_mutex);
+	list_for_each_entry(event, &ftrace_events, list) {
+		if (event->id == event_id && event->perf_event_enable &&
+		    try_module_get(event->mod)) {
+			ret = perf_trace_event_enable(event);
+			break;
+		}
+	}
+	mutex_unlock(&event_mutex);
+
+	return ret;
+}
+
+static void perf_trace_event_disable(struct ftrace_event_call *event)
+{
+	char *buf, *nmi_buf;
+
+	if (--event->perf_refcount > 0)
+		return;
+
+	event->perf_event_disable(event);
+
+	if (!--total_ref_count) {
+		buf = perf_trace_buf;
+		rcu_assign_pointer(perf_trace_buf, NULL);
+
+		nmi_buf = perf_trace_buf_nmi;
+		rcu_assign_pointer(perf_trace_buf_nmi, NULL);
+
+		/*
+		 * Ensure every events in profiling have finished before
+		 * releasing the buffers
+		 */
+		synchronize_sched();
+
+		free_percpu(buf);
+		free_percpu(nmi_buf);
+	}
+}
+
+void perf_trace_disable(int event_id)
+{
+	struct ftrace_event_call *event;
+
+	mutex_lock(&event_mutex);
+	list_for_each_entry(event, &ftrace_events, list) {
+		if (event->id == event_id) {
+			perf_trace_event_disable(event);
+			module_put(event->mod);
+			break;
+		}
+	}
+	mutex_unlock(&event_mutex);
+}
+
+__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
+				       int *rctxp, unsigned long *irq_flags)
+{
+	struct trace_entry *entry;
+	char *trace_buf, *raw_data;
+	int pc, cpu;
+
+	pc = preempt_count();
+
+	/* Protect the per cpu buffer, begin the rcu read side */
+	local_irq_save(*irq_flags);
+
+	*rctxp = perf_swevent_get_recursion_context();
+	if (*rctxp < 0)
+		goto err_recursion;
+
+	cpu = smp_processor_id();
+
+	if (in_nmi())
+		trace_buf = rcu_dereference(perf_trace_buf_nmi);
+	else
+		trace_buf = rcu_dereference(perf_trace_buf);
+
+	if (!trace_buf)
+		goto err;
+
+	raw_data = per_cpu_ptr(trace_buf, cpu);
+
+	/* zero the dead bytes from align to not leak stack to user */
+	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+
+	entry = (struct trace_entry *)raw_data;
+	tracing_generic_entry_update(entry, *irq_flags, pc);
+	entry->type = type;
+
+	return raw_data;
+err:
+	perf_swevent_put_recursion_context(*rctxp);
+err_recursion:
+	local_irq_restore(*irq_flags);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
deleted file mode 100644
index e66d21e15a0f..000000000000
--- a/kernel/trace/trace_event_profile.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * trace event based perf counter profiling
- *
- * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
- * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
- */
-
-#include <linux/module.h>
-#include <linux/kprobes.h>
-#include "trace.h"
-
-DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
-
-static char *perf_trace_buf;
-static char *perf_trace_buf_nmi;
-
-typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
-
-/* Count the events in use (per event id, not per instance) */
-static int	total_profile_count;
-
-static int ftrace_profile_enable_event(struct ftrace_event_call *event)
-{
-	char *buf;
-	int ret = -ENOMEM;
-
-	if (event->profile_count++ > 0)
-		return 0;
-
-	if (!total_profile_count) {
-		buf = (char *)alloc_percpu(perf_trace_t);
-		if (!buf)
-			goto fail_buf;
-
-		rcu_assign_pointer(perf_trace_buf, buf);
-
-		buf = (char *)alloc_percpu(perf_trace_t);
-		if (!buf)
-			goto fail_buf_nmi;
-
-		rcu_assign_pointer(perf_trace_buf_nmi, buf);
-	}
-
-	ret = event->profile_enable(event);
-	if (!ret) {
-		total_profile_count++;
-		return 0;
-	}
-
-fail_buf_nmi:
-	if (!total_profile_count) {
-		free_percpu(perf_trace_buf_nmi);
-		free_percpu(perf_trace_buf);
-		perf_trace_buf_nmi = NULL;
-		perf_trace_buf = NULL;
-	}
-fail_buf:
-	event->profile_count--;
-
-	return ret;
-}
-
-int ftrace_profile_enable(int event_id)
-{
-	struct ftrace_event_call *event;
-	int ret = -EINVAL;
-
-	mutex_lock(&event_mutex);
-	list_for_each_entry(event, &ftrace_events, list) {
-		if (event->id == event_id && event->profile_enable &&
-		    try_module_get(event->mod)) {
-			ret = ftrace_profile_enable_event(event);
-			break;
-		}
-	}
-	mutex_unlock(&event_mutex);
-
-	return ret;
-}
-
-static void ftrace_profile_disable_event(struct ftrace_event_call *event)
-{
-	char *buf, *nmi_buf;
-
-	if (--event->profile_count > 0)
-		return;
-
-	event->profile_disable(event);
-
-	if (!--total_profile_count) {
-		buf = perf_trace_buf;
-		rcu_assign_pointer(perf_trace_buf, NULL);
-
-		nmi_buf = perf_trace_buf_nmi;
-		rcu_assign_pointer(perf_trace_buf_nmi, NULL);
-
-		/*
-		 * Ensure every events in profiling have finished before
-		 * releasing the buffers
-		 */
-		synchronize_sched();
-
-		free_percpu(buf);
-		free_percpu(nmi_buf);
-	}
-}
-
-void ftrace_profile_disable(int event_id)
-{
-	struct ftrace_event_call *event;
-
-	mutex_lock(&event_mutex);
-	list_for_each_entry(event, &ftrace_events, list) {
-		if (event->id == event_id) {
-			ftrace_profile_disable_event(event);
-			module_put(event->mod);
-			break;
-		}
-	}
-	mutex_unlock(&event_mutex);
-}
-
-__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
-					int *rctxp, unsigned long *irq_flags)
-{
-	struct trace_entry *entry;
-	char *trace_buf, *raw_data;
-	int pc, cpu;
-
-	pc = preempt_count();
-
-	/* Protect the per cpu buffer, begin the rcu read side */
-	local_irq_save(*irq_flags);
-
-	*rctxp = perf_swevent_get_recursion_context();
-	if (*rctxp < 0)
-		goto err_recursion;
-
-	cpu = smp_processor_id();
-
-	if (in_nmi())
-		trace_buf = rcu_dereference(perf_trace_buf_nmi);
-	else
-		trace_buf = rcu_dereference(perf_trace_buf);
-
-	if (!trace_buf)
-		goto err;
-
-	raw_data = per_cpu_ptr(trace_buf, cpu);
-
-	/* zero the dead bytes from align to not leak stack to user */
-	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
-
-	entry = (struct trace_entry *)raw_data;
-	tracing_generic_entry_update(entry, *irq_flags, pc);
-	entry->type = type;
-
-	return raw_data;
-err:
-	perf_swevent_put_recursion_context(*rctxp);
-err_recursion:
-	local_irq_restore(*irq_flags);
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 3f972ad98d04..beab8bf2f310 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -938,7 +938,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
 		trace_create_file("enable", 0644, call->dir, call,
 				  enable);
 
-	if (call->id && call->profile_enable)
+	if (call->id && call->perf_event_enable)
 		trace_create_file("id", 0444, call->dir, call,
 		 		  id);
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index f7a20a8bfb31..1251e367bae9 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1214,7 +1214,7 @@ static int set_print_fmt(struct trace_probe *tp)
 #ifdef CONFIG_PERF_EVENTS
 
 /* Kprobe profile handler */
-static __kprobes void kprobe_profile_func(struct kprobe *kp,
+static __kprobes void kprobe_perf_func(struct kprobe *kp,
 					 struct pt_regs *regs)
 {
 	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
@@ -1227,11 +1227,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp,
 	__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
-	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
 		     "profile buffer not large enough"))
 		return;
 
-	entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
+	entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
 	if (!entry)
 		return;
 
@@ -1240,11 +1240,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp,
 	for (i = 0; i < tp->nr_args; i++)
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 
-	ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);
+	perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);
 }
 
 /* Kretprobe profile handler */
-static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
+static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
 					    struct pt_regs *regs)
 {
 	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
@@ -1257,11 +1257,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
 	__size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
-	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
 		     "profile buffer not large enough"))
 		return;
 
-	entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags);
+	entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
 	if (!entry)
 		return;
 
@@ -1271,11 +1271,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
 	for (i = 0; i < tp->nr_args; i++)
 		entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
 
-	ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1,
+	perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,
 			       irq_flags, regs);
 }
 
-static int probe_profile_enable(struct ftrace_event_call *call)
+static int probe_perf_enable(struct ftrace_event_call *call)
 {
 	struct trace_probe *tp = (struct trace_probe *)call->data;
 
@@ -1287,7 +1287,7 @@ static int probe_profile_enable(struct ftrace_event_call *call)
 		return enable_kprobe(&tp->rp.kp);
 }
 
-static void probe_profile_disable(struct ftrace_event_call *call)
+static void probe_perf_disable(struct ftrace_event_call *call)
 {
 	struct trace_probe *tp = (struct trace_probe *)call->data;
 
@@ -1312,7 +1312,7 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 		kprobe_trace_func(kp, regs);
 #ifdef CONFIG_PERF_EVENTS
 	if (tp->flags & TP_FLAG_PROFILE)
-		kprobe_profile_func(kp, regs);
+		kprobe_perf_func(kp, regs);
 #endif
 	return 0;	/* We don't tweek kernel, so just return 0 */
 }
@@ -1326,7 +1326,7 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
 		kretprobe_trace_func(ri, regs);
 #ifdef CONFIG_PERF_EVENTS
 	if (tp->flags & TP_FLAG_PROFILE)
-		kretprobe_profile_func(ri, regs);
+		kretprobe_perf_func(ri, regs);
 #endif
 	return 0;	/* We don't tweek kernel, so just return 0 */
 }
@@ -1359,8 +1359,8 @@ static int register_probe_event(struct trace_probe *tp)
 	call->unregfunc = probe_event_disable;
 
 #ifdef CONFIG_PERF_EVENTS
-	call->profile_enable = probe_profile_enable;
-	call->profile_disable = probe_profile_disable;
+	call->perf_event_enable = probe_perf_enable;
+	call->perf_event_disable = probe_perf_disable;
 #endif
 	call->data = tp;
 	ret = trace_add_event_call(call);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 7e6e84fb7b6c..33c2a5b769dc 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -428,12 +428,12 @@ core_initcall(init_ftrace_syscalls);
 
 #ifdef CONFIG_PERF_EVENTS
 
-static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
-static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
-static int sys_prof_refcount_enter;
-static int sys_prof_refcount_exit;
+static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
+static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
+static int sys_perf_refcount_enter;
+static int sys_perf_refcount_exit;
 
-static void prof_syscall_enter(struct pt_regs *regs, long id)
+static void perf_syscall_enter(struct pt_regs *regs, long id)
 {
 	struct syscall_metadata *sys_data;
 	struct syscall_trace_enter *rec;
@@ -443,7 +443,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	int size;
 
 	syscall_nr = syscall_get_nr(current, regs);
-	if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
+	if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
 		return;
 
 	sys_data = syscall_nr_to_meta(syscall_nr);
@@ -455,11 +455,11 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	size = ALIGN(size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
 
-	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
-		      "profile buffer not large enough"))
+	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
+		      "perf buffer not large enough"))
 		return;
 
-	rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size,
+	rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
 				sys_data->enter_event->id, &rctx, &flags);
 	if (!rec)
 		return;
@@ -467,10 +467,10 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	rec->nr = syscall_nr;
 	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 			       (unsigned long *)&rec->args);
-	ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags, regs);
+	perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
 }
 
-int prof_sysenter_enable(struct ftrace_event_call *call)
+int perf_sysenter_enable(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
@@ -478,34 +478,34 @@ int prof_sysenter_enable(struct ftrace_event_call *call)
 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
-	if (!sys_prof_refcount_enter)
-		ret = register_trace_sys_enter(prof_syscall_enter);
+	if (!sys_perf_refcount_enter)
+		ret = register_trace_sys_enter(perf_syscall_enter);
 	if (ret) {
 		pr_info("event trace: Could not activate"
 				"syscall entry trace point");
 	} else {
-		set_bit(num, enabled_prof_enter_syscalls);
-		sys_prof_refcount_enter++;
+		set_bit(num, enabled_perf_enter_syscalls);
+		sys_perf_refcount_enter++;
 	}
 	mutex_unlock(&syscall_trace_lock);
 	return ret;
 }
 
-void prof_sysenter_disable(struct ftrace_event_call *call)
+void perf_sysenter_disable(struct ftrace_event_call *call)
 {
 	int num;
 
 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
-	sys_prof_refcount_enter--;
-	clear_bit(num, enabled_prof_enter_syscalls);
-	if (!sys_prof_refcount_enter)
-		unregister_trace_sys_enter(prof_syscall_enter);
+	sys_perf_refcount_enter--;
+	clear_bit(num, enabled_perf_enter_syscalls);
+	if (!sys_perf_refcount_enter)
+		unregister_trace_sys_enter(perf_syscall_enter);
 	mutex_unlock(&syscall_trace_lock);
 }
 
-static void prof_syscall_exit(struct pt_regs *regs, long ret)
+static void perf_syscall_exit(struct pt_regs *regs, long ret)
 {
 	struct syscall_metadata *sys_data;
 	struct syscall_trace_exit *rec;
@@ -515,7 +515,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	int size;
 
 	syscall_nr = syscall_get_nr(current, regs);
-	if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
+	if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
 		return;
 
 	sys_data = syscall_nr_to_meta(syscall_nr);
@@ -530,11 +530,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	 * Impossible, but be paranoid with the future
 	 * How to put this check outside runtime?
 	 */
-	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
-		"exit event has grown above profile buffer size"))
+	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
+		"exit event has grown above perf buffer size"))
 		return;
 
-	rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size,
+	rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
 				sys_data->exit_event->id, &rctx, &flags);
 	if (!rec)
 		return;
@@ -542,10 +542,10 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	rec->nr = syscall_nr;
 	rec->ret = syscall_get_return_value(current, regs);
 
-	ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags, regs);
+	perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
 }
 
-int prof_sysexit_enable(struct ftrace_event_call *call)
+int perf_sysexit_enable(struct ftrace_event_call *call)
 {
 	int ret = 0;
 	int num;
@@ -553,30 +553,30 @@ int prof_sysexit_enable(struct ftrace_event_call *call)
 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
-	if (!sys_prof_refcount_exit)
-		ret = register_trace_sys_exit(prof_syscall_exit);
+	if (!sys_perf_refcount_exit)
+		ret = register_trace_sys_exit(perf_syscall_exit);
 	if (ret) {
 		pr_info("event trace: Could not activate"
 				"syscall exit trace point");
 	} else {
-		set_bit(num, enabled_prof_exit_syscalls);
-		sys_prof_refcount_exit++;
+		set_bit(num, enabled_perf_exit_syscalls);
+		sys_perf_refcount_exit++;
 	}
 	mutex_unlock(&syscall_trace_lock);
 	return ret;
 }
 
-void prof_sysexit_disable(struct ftrace_event_call *call)
+void perf_sysexit_disable(struct ftrace_event_call *call)
 {
 	int num;
 
 	num = ((struct syscall_metadata *)call->data)->syscall_nr;
 
 	mutex_lock(&syscall_trace_lock);
-	sys_prof_refcount_exit--;
-	clear_bit(num, enabled_prof_exit_syscalls);
-	if (!sys_prof_refcount_exit)
-		unregister_trace_sys_exit(prof_syscall_exit);
+	sys_perf_refcount_exit--;
+	clear_bit(num, enabled_perf_exit_syscalls);
+	if (!sys_perf_refcount_exit)
+		unregister_trace_sys_exit(perf_syscall_exit);
 	mutex_unlock(&syscall_trace_lock);
 }
 
-- 
cgit v1.2.3


From 2b4c32972b9bcfee29d5e2c1b6f261dda5ef2a21 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Date: Tue, 9 Mar 2010 16:47:52 +0000
Subject: ipv6 ip6_tunnel: eliminate unused recursion field from ip6_tnl{}.

Commit a43912ab19... ("tunnel: eliminate recursion field") eliminated
use of recursion field from tunnel structures, but its definition
still exists in ip6_tnl{}.

Let's remove that unused field.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_tunnel.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 83b4e008b16d..fbf9d1cda27b 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -15,7 +15,6 @@
 struct ip6_tnl {
 	struct ip6_tnl *next;	/* next tunnel in list */
 	struct net_device *dev;	/* virtual device associated with tunnel */
-	int recursion;		/* depth of hard_start_xmit recursion */
 	struct ip6_tnl_parm parms;	/* tunnel configuration parameters */
 	struct flowi fl;	/* flowi template for xmit */
 	struct dst_entry *dst_cache;    /* cached dst */
-- 
cgit v1.2.3


From fdba2bb1f2eed85085a0fe154e1acb82de3239f7 Mon Sep 17 00:00:00 2001
From: Ranjith Lohithakshan <ranjithl@ti.com>
Date: Wed, 10 Mar 2010 23:41:22 -0800
Subject: Input: ads7846 - add wakeup support

Add wakeup support to the ads7846 driver. Platforms can enable wakeup
capability by setting the wakeup flag in ads7846_platform_data. With this
patch the ads7846 driver can be used to wake the system from suspend.

Signed-off-by: Ranjith Lohithakshan <ranjithl@ti.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/ads7846.c | 10 ++++++++++
 include/linux/spi/ads7846.h         |  1 +
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index d187be05955f..532279cda0e4 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -822,6 +822,9 @@ static int ads7846_suspend(struct spi_device *spi, pm_message_t message)
 
 	spin_unlock_irq(&ts->lock);
 
+	if (device_may_wakeup(&ts->spi->dev))
+		enable_irq_wake(ts->spi->irq);
+
 	return 0;
 
 }
@@ -830,6 +833,9 @@ static int ads7846_resume(struct spi_device *spi)
 {
 	struct ads7846 *ts = dev_get_drvdata(&spi->dev);
 
+	if (device_may_wakeup(&ts->spi->dev))
+		disable_irq_wake(ts->spi->irq);
+
 	spin_lock_irq(&ts->lock);
 
 	ts->is_suspended = 0;
@@ -1201,6 +1207,8 @@ static int __devinit ads7846_probe(struct spi_device *spi)
 	if (err)
 		goto err_remove_attr_group;
 
+	device_init_wakeup(&spi->dev, pdata->wakeup);
+
 	return 0;
 
  err_remove_attr_group:
@@ -1230,6 +1238,8 @@ static int __devexit ads7846_remove(struct spi_device *spi)
 {
 	struct ads7846		*ts = dev_get_drvdata(&spi->dev);
 
+	device_init_wakeup(&spi->dev, false);
+
 	ads784x_hwmon_unregister(spi, ts);
 	input_unregister_device(ts->input);
 
diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h
index 5710c15d394a..b4ae570d3c98 100644
--- a/include/linux/spi/ads7846.h
+++ b/include/linux/spi/ads7846.h
@@ -53,5 +53,6 @@ struct ads7846_platform_data {
 	int	(*filter)	(void *filter_data, int data_idx, int *val);
 	void	(*filter_cleanup)(void *filter_data);
 	void	(*wait_for_sync)(void);
+	bool	wakeup;
 };
 
-- 
cgit v1.2.3


From 85cfabbcd10f8d112feee6e2ec64ee78033b6d3c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Mar 2010 13:06:56 +0100
Subject: perf, ppc: Fix compile error due to new cpu notifiers

Fix:

  arch/powerpc/kernel/perf_event.c:1334: error: 'power_pmu_notifier' undeclared (first use in this function)
  arch/powerpc/kernel/perf_event.c:1334: error: (Each undeclared identifier is reported only once
  arch/powerpc/kernel/perf_event.c:1334: error: for each function it appears in.)
  arch/powerpc/kernel/perf_event.c:1334: error: implicit declaration of function 'power_pmu_notifier'
  arch/powerpc/kernel/perf_event.c:1334: error: implicit declaration of function 'register_cpu_notifier'

Due to commit 3f6da390 (perf: Rework and fix the arch CPU-hotplug hooks).

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_event.c | 2 +-
 include/linux/perf_event.h       | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index fbe101d7505d..08460a2e9f41 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1298,7 +1298,7 @@ static void power_pmu_setup(int cpu)
 }
 
 static int __cpuinit
-power_pmu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
+power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (long)hcpu;
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 70cffd052c04..95477038a72a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -453,6 +453,7 @@ enum perf_callchain_context {
 #include <linux/pid_namespace.h>
 #include <linux/workqueue.h>
 #include <linux/ftrace.h>
+#include <linux/cpu.h>
 #include <asm/atomic.h>
 
 #define PERF_MAX_STACK_DEPTH		255
-- 
cgit v1.2.3


From 80a05b9ffa7dc13f6693902dd8999a2b61a3a0d7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 12 Mar 2010 17:34:14 +0100
Subject: clockevents: Sanitize min_delta_ns adjustment and prevent overflows

The current logic which handles clock events programming failures can
increase min_delta_ns unlimited and even can cause overflows.

Sanitize it by:
 - prevent zero increase when min_delta_ns == 1
 - limiting min_delta_ns to a jiffie
 - bail out if the jiffie limit is hit
 - add retries stats for /proc/timer_list so we can gather data

Reported-by: Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clockchips.h |  2 ++
 kernel/time/tick-oneshot.c | 52 +++++++++++++++++++++++++++++++++++-----------
 kernel/time/timer_list.c   |  3 ++-
 3 files changed, 44 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 0cf725bdd2a1..fc53492b6ad7 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -73,6 +73,7 @@ enum clock_event_nofitiers {
  * @list:		list head for the management code
  * @mode:		operating mode assigned by the management code
  * @next_event:		local storage for the next event in oneshot mode
+ * @retries:		number of forced programming retries
  */
 struct clock_event_device {
 	const char		*name;
@@ -93,6 +94,7 @@ struct clock_event_device {
 	struct list_head	list;
 	enum clock_event_mode	mode;
 	ktime_t			next_event;
+	unsigned long		retries;
 };
 
 /*
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 0a8a213016f0..aada0e52680a 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -22,6 +22,29 @@
 
 #include "tick-internal.h"
 
+/* Limit min_delta to a jiffie */
+#define MIN_DELTA_LIMIT		(NSEC_PER_SEC / HZ)
+
+static int tick_increase_min_delta(struct clock_event_device *dev)
+{
+	/* Nothing to do if we already reached the limit */
+	if (dev->min_delta_ns >= MIN_DELTA_LIMIT)
+		return -ETIME;
+
+	if (dev->min_delta_ns < 5000)
+		dev->min_delta_ns = 5000;
+	else
+		dev->min_delta_ns += dev->min_delta_ns >> 1;
+
+	if (dev->min_delta_ns > MIN_DELTA_LIMIT)
+		dev->min_delta_ns = MIN_DELTA_LIMIT;
+
+	printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n",
+	       dev->name ? dev->name : "?",
+	       (unsigned long long) dev->min_delta_ns);
+	return 0;
+}
+
 /**
  * tick_program_event internal worker function
  */
@@ -37,23 +60,28 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
 		if (!ret || !force)
 			return ret;
 
+		dev->retries++;
 		/*
-		 * We tried 2 times to program the device with the given
-		 * min_delta_ns. If that's not working then we double it
+		 * We tried 3 times to program the device with the given
+		 * min_delta_ns. If that's not working then we increase it
 		 * and emit a warning.
 		 */
 		if (++i > 2) {
 			/* Increase the min. delta and try again */
-			if (!dev->min_delta_ns)
-				dev->min_delta_ns = 5000;
-			else
-				dev->min_delta_ns += dev->min_delta_ns >> 1;
-
-			printk(KERN_WARNING
-			       "CE: %s increasing min_delta_ns to %llu nsec\n",
-			       dev->name ? dev->name : "?",
-			       (unsigned long long) dev->min_delta_ns << 1);
-
+			if (tick_increase_min_delta(dev)) {
+				/*
+				 * Get out of the loop if min_delta_ns
+				 * hit the limit already. That's
+				 * better than staying here forever.
+				 *
+				 * We clear next_event so we have a
+				 * chance that the box survives.
+				 */
+				printk(KERN_WARNING
+				       "CE: Reprogramming failure. Giving up\n");
+				dev->next_event.tv64 = KTIME_MAX;
+				return -ETIME;
+			}
 			i = 0;
 		}
 
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index bdfb8dd1050c..1a4a7dd78777 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -228,6 +228,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 	SEQ_printf(m, " event_handler:  ");
 	print_name_offset(m, dev->event_handler);
 	SEQ_printf(m, "\n");
+	SEQ_printf(m, " retries:        %lu\n", dev->retries);
 }
 
 static void timer_list_show_tickdevices(struct seq_file *m)
@@ -257,7 +258,7 @@ static int timer_list_show(struct seq_file *m, void *v)
 	u64 now = ktime_to_ns(ktime_get());
 	int cpu;
 
-	SEQ_printf(m, "Timer List Version: v0.5\n");
+	SEQ_printf(m, "Timer List Version: v0.6\n");
 	SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
 	SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
 
-- 
cgit v1.2.3


From d0ab4a4d5094e5d17b103dc5073529a04f00a469 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Wed, 10 Mar 2010 15:20:35 -0800
Subject: rtc/hctosys: only claim the RTC provided the system time if it did
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Without this patch /sys/class/rtc/$CONFIG_RTC_HCTOSYS_DEVICE/hctosys
contains a 1 (meaning "This rtc was used to initialize the system clock")
even if reading the time at bootup failed.

Moreover change error handling in rtc_hctosys() to use goto and so reduce
the indention level.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Paul Gortmaker <p_gortmaker@yahoo.com>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/hctosys.c   | 59 ++++++++++++++++++++++++++++---------------------
 drivers/rtc/rtc-sysfs.c |  5 +++--
 include/linux/rtc.h     |  6 +++++
 3 files changed, 43 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/rtc/hctosys.c b/drivers/rtc/hctosys.c
index 33c0e98243ee..bc90b091f195 100644
--- a/drivers/rtc/hctosys.c
+++ b/drivers/rtc/hctosys.c
@@ -22,48 +22,57 @@
  * the best guess is to add 0.5s.
  */
 
+int rtc_hctosys_ret = -ENODEV;
+
 static int __init rtc_hctosys(void)
 {
-	int err;
+	int err = -ENODEV;
 	struct rtc_time tm;
+	struct timespec tv = {
+		.tv_nsec = NSEC_PER_SEC >> 1,
+	};
 	struct rtc_device *rtc = rtc_class_open(CONFIG_RTC_HCTOSYS_DEVICE);
 
 	if (rtc == NULL) {
-		printk("%s: unable to open rtc device (%s)\n",
+		pr_err("%s: unable to open rtc device (%s)\n",
 			__FILE__, CONFIG_RTC_HCTOSYS_DEVICE);
-		return -ENODEV;
+		goto err_open;
 	}
 
 	err = rtc_read_time(rtc, &tm);
-	if (err == 0) {
-		err = rtc_valid_tm(&tm);
-		if (err == 0) {
-			struct timespec tv;
+	if (err) {
+		dev_err(rtc->dev.parent,
+			"hctosys: unable to read the hardware clock\n");
+		goto err_read;
 
-			tv.tv_nsec = NSEC_PER_SEC >> 1;
+	}
 
-			rtc_tm_to_time(&tm, &tv.tv_sec);
+	err = rtc_valid_tm(&tm);
+	if (err) {
+		dev_err(rtc->dev.parent,
+			"hctosys: invalid date/time\n");
+		goto err_invalid;
+	}
 
-			do_settimeofday(&tv);
+	rtc_tm_to_time(&tm, &tv.tv_sec);
 
-			dev_info(rtc->dev.parent,
-				"setting system clock to "
-				"%d-%02d-%02d %02d:%02d:%02d UTC (%u)\n",
-				tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
-				tm.tm_hour, tm.tm_min, tm.tm_sec,
-				(unsigned int) tv.tv_sec);
-		}
-		else
-			dev_err(rtc->dev.parent,
-				"hctosys: invalid date/time\n");
-	}
-	else
-		dev_err(rtc->dev.parent,
-			"hctosys: unable to read the hardware clock\n");
+	do_settimeofday(&tv);
 
+	dev_info(rtc->dev.parent,
+		"setting system clock to "
+		"%d-%02d-%02d %02d:%02d:%02d UTC (%u)\n",
+		tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+		tm.tm_hour, tm.tm_min, tm.tm_sec,
+		(unsigned int) tv.tv_sec);
+
+err_invalid:
+err_read:
 	rtc_class_close(rtc);
 
-	return 0;
+err_open:
+	rtc_hctosys_ret = err;
+
+	return err;
 }
 
 late_initcall(rtc_hctosys);
diff --git a/drivers/rtc/rtc-sysfs.c b/drivers/rtc/rtc-sysfs.c
index 7dd23a6fc825..380083ca572f 100644
--- a/drivers/rtc/rtc-sysfs.c
+++ b/drivers/rtc/rtc-sysfs.c
@@ -107,8 +107,9 @@ rtc_sysfs_show_hctosys(struct device *dev, struct device_attribute *attr,
 		char *buf)
 {
 #ifdef CONFIG_RTC_HCTOSYS_DEVICE
-	if (strcmp(dev_name(&to_rtc_device(dev)->dev),
-		   CONFIG_RTC_HCTOSYS_DEVICE) == 0)
+	if (rtc_hctosys_ret == 0 &&
+			strcmp(dev_name(&to_rtc_device(dev)->dev),
+				CONFIG_RTC_HCTOSYS_DEVICE) == 0)
 		return sprintf(buf, "1\n");
 	else
 #endif
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 60f88a7fb13d..14dbc83ded20 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -238,6 +238,12 @@ static inline bool is_leap_year(unsigned int year)
 	return (!(year % 4) && (year % 100)) || !(year % 400);
 }
 
+#ifdef CONFIG_RTC_HCTOSYS
+extern int rtc_hctosys_ret;
+#else
+#define rtc_hctosys_ret -ENODEV
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_RTC_H_ */
-- 
cgit v1.2.3


From 53bddb4e9f3f53df02a783751984ddeade71b085 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 10 Mar 2010 15:20:38 -0800
Subject: nommu: fix build breakage

Commit 34e55232e59f7b19050267a05ff1226e5cd122a5 ("mm: avoid false sharing
of mm_counter") added sync_mm_rss() for syncing loosely accounted rss
counters.  It's for CONFIG_MMU but sync_mm_rss is called even in NOMMU
enviroment (kerne/exit.c, fs/exec.c).  Above commit doesn't handle it
well.

This patch changes
  SPLIT_RSS_COUNTING depends on SPLIT_PTLOCKS && CONFIG_MMU

And for avoid unnecessary function calls, sync_mm_rss changed to be inlined
noop function in header file.

Reported-by: David Howells <dhowells@redhat.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Michal Simek <monstr@monstr.eu>
Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Greg Ungerer <gerg@snapgear.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h       | 6 ++++++
 include/linux/mm_types.h | 2 +-
 mm/memory.c              | 3 ---
 3 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3899395a03de..7f693b272c4a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -971,7 +971,13 @@ static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
 		*maxrss = hiwater_rss;
 }
 
+#if defined(SPLIT_RSS_COUNTING)
 void sync_mm_rss(struct task_struct *task, struct mm_struct *mm);
+#else
+static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
+{
+}
+#endif
 
 /*
  * A callback you can register to apply pressure to ageable caches.
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 048b46270aa5..b8bb9a6a1f37 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -203,7 +203,7 @@ enum {
 	NR_MM_COUNTERS
 };
 
-#if USE_SPLIT_PTLOCKS
+#if USE_SPLIT_PTLOCKS && defined(CONFIG_MMU)
 #define SPLIT_RSS_COUNTING
 struct mm_rss_stat {
 	atomic_long_t count[NR_MM_COUNTERS];
diff --git a/mm/memory.c b/mm/memory.c
index d1153e37e9ba..3d9130bd95d0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -190,9 +190,6 @@ static void check_sync_rss_stat(struct task_struct *task)
 {
 }
 
-void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
-{
-}
 #endif
 
 /*
-- 
cgit v1.2.3


From 718a38211bf4375c0a1efad3afbc5dbaef5d33f9 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 10 Mar 2010 15:20:43 -0800
Subject: mm: introduce dump_page() and print symbolic flag names

- introduce dump_page() to print the page info for debugging some error
  condition.

- convert three mm users: bad_page(), print_bad_pte() and memory offline
  failure.

- print an extra field: the symbolic names of page->flags

Example dump_page() output:

[  157.521694] page:ffffea0000a7cba8 count:2 mapcount:1 mapping:ffff88001c901791 index:0x147
[  157.525570] page flags: 0x100000000100068(uptodate|lru|active|swapbacked)

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Alex Chiang <achiang@hp.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Mel Gorman <mel@linux.vnet.ibm.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h  |  2 ++
 mm/memory.c         |  8 ++----
 mm/memory_hotplug.c |  6 ++--
 mm/page_alloc.c     | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 86 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7f693b272c4a..e70f21beb4b4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1465,5 +1465,7 @@ extern void shake_page(struct page *p, int access);
 extern atomic_long_t mce_bad_pages;
 extern int soft_offline_page(struct page *page, int flags);
 
+extern void dump_page(struct page *page);
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/memory.c b/mm/memory.c
index 3d9130bd95d0..5b7f2002e54b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -509,12 +509,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
 		"BUG: Bad page map in process %s  pte:%08llx pmd:%08llx\n",
 		current->comm,
 		(long long)pte_val(pte), (long long)pmd_val(*pmd));
-	if (page) {
-		printk(KERN_ALERT
-		"page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n",
-		page, (void *)page->flags, page_count(page),
-		page_mapcount(page), page->mapping, page->index);
-	}
+	if (page)
+		dump_page(page);
 	printk(KERN_ALERT
 		"addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n",
 		(void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 78e34e63c7b8..be211a582930 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -688,9 +688,9 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 			if (page_count(page))
 				not_managed++;
 #ifdef CONFIG_DEBUG_VM
-			printk(KERN_INFO "removing from LRU failed"
-					 " %lx/%d/%lx\n",
-				pfn, page_count(page), page->flags);
+			printk(KERN_ALERT "removing pfn %lx from LRU failed\n",
+			       pfn);
+			dump_page(page);
 #endif
 		}
 	}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 78ce90dd671f..d03c946d5566 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -50,6 +50,7 @@
 #include <linux/kmemleak.h>
 #include <linux/memory.h>
 #include <trace/events/kmem.h>
+#include <linux/ftrace_event.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -288,10 +289,7 @@ static void bad_page(struct page *page)
 
 	printk(KERN_ALERT "BUG: Bad page state in process %s  pfn:%05lx\n",
 		current->comm, page_to_pfn(page));
-	printk(KERN_ALERT
-		"page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n",
-		page, (void *)page->flags, page_count(page),
-		page_mapcount(page), page->mapping, page->index);
+	dump_page(page);
 
 	dump_stack();
 out:
@@ -5183,3 +5181,80 @@ bool is_free_buddy_page(struct page *page)
 	return order < MAX_ORDER;
 }
 #endif
+
+static struct trace_print_flags pageflag_names[] = {
+	{1UL << PG_locked,		"locked"	},
+	{1UL << PG_error,		"error"		},
+	{1UL << PG_referenced,		"referenced"	},
+	{1UL << PG_uptodate,		"uptodate"	},
+	{1UL << PG_dirty,		"dirty"		},
+	{1UL << PG_lru,			"lru"		},
+	{1UL << PG_active,		"active"	},
+	{1UL << PG_slab,		"slab"		},
+	{1UL << PG_owner_priv_1,	"owner_priv_1"	},
+	{1UL << PG_arch_1,		"arch_1"	},
+	{1UL << PG_reserved,		"reserved"	},
+	{1UL << PG_private,		"private"	},
+	{1UL << PG_private_2,		"private_2"	},
+	{1UL << PG_writeback,		"writeback"	},
+#ifdef CONFIG_PAGEFLAGS_EXTENDED
+	{1UL << PG_head,		"head"		},
+	{1UL << PG_tail,		"tail"		},
+#else
+	{1UL << PG_compound,		"compound"	},
+#endif
+	{1UL << PG_swapcache,		"swapcache"	},
+	{1UL << PG_mappedtodisk,	"mappedtodisk"	},
+	{1UL << PG_reclaim,		"reclaim"	},
+	{1UL << PG_buddy,		"buddy"		},
+	{1UL << PG_swapbacked,		"swapbacked"	},
+	{1UL << PG_unevictable,		"unevictable"	},
+#ifdef CONFIG_MMU
+	{1UL << PG_mlocked,		"mlocked"	},
+#endif
+#ifdef CONFIG_ARCH_USES_PG_UNCACHED
+	{1UL << PG_uncached,		"uncached"	},
+#endif
+#ifdef CONFIG_MEMORY_FAILURE
+	{1UL << PG_hwpoison,		"hwpoison"	},
+#endif
+	{-1UL,				NULL		},
+};
+
+static void dump_page_flags(unsigned long flags)
+{
+	const char *delim = "";
+	unsigned long mask;
+	int i;
+
+	printk(KERN_ALERT "page flags: %#lx(", flags);
+
+	/* remove zone id */
+	flags &= (1UL << NR_PAGEFLAGS) - 1;
+
+	for (i = 0; pageflag_names[i].name && flags; i++) {
+
+		mask = pageflag_names[i].mask;
+		if ((flags & mask) != mask)
+			continue;
+
+		flags &= ~mask;
+		printk("%s%s", delim, pageflag_names[i].name);
+		delim = "|";
+	}
+
+	/* check for left over flags */
+	if (flags)
+		printk("%s%#lx", delim, flags);
+
+	printk(")\n");
+}
+
+void dump_page(struct page *page)
+{
+	printk(KERN_ALERT
+	       "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
+		page, page_count(page), page_mapcount(page),
+		page->mapping, page->index);
+	dump_page_flags(page->flags);
+}
-- 
cgit v1.2.3


From 5d0e52830e9ae09b872567f4aca3dfb5b5918079 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 Mar 2010 15:21:13 -0800
Subject: Add generic sys_old_select()

Add a generic implementation of the old select() syscall, which expects
its argument in a memory block and switch all architectures over to use
it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Reviewed-by: H. Peter Anvin <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: James Morris <jmorris@namei.org>
Acked-by: Andreas Schwab <schwab@linux-m68k.org>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/unistd.h        |  1 +
 arch/arm/kernel/calls.S              |  2 +-
 arch/arm/kernel/sys_arm.c            | 21 ---------------------
 arch/h8300/include/asm/unistd.h      |  1 +
 arch/h8300/kernel/sys_h8300.c        | 16 ----------------
 arch/h8300/kernel/syscalls.S         |  2 +-
 arch/m68k/include/asm/unistd.h       |  1 +
 arch/m68k/kernel/entry.S             |  2 +-
 arch/m68k/kernel/sys_m68k.c          | 16 ----------------
 arch/m68knommu/kernel/sys_m68k.c     | 16 ----------------
 arch/m68knommu/kernel/syscalltable.S |  2 +-
 arch/mn10300/include/asm/unistd.h    |  1 +
 arch/mn10300/kernel/entry.S          |  2 +-
 arch/mn10300/kernel/sys_mn10300.c    | 18 ------------------
 arch/s390/kernel/entry.h             |  1 -
 arch/um/sys-i386/syscalls.c          | 18 ------------------
 arch/x86/ia32/ia32entry.S            |  2 +-
 arch/x86/ia32/sys_ia32.c             | 18 ------------------
 arch/x86/include/asm/sys_ia32.h      |  2 --
 arch/x86/include/asm/syscalls.h      |  2 --
 arch/x86/include/asm/unistd_32.h     |  1 +
 arch/x86/kernel/sys_i386_32.c        | 17 -----------------
 arch/x86/kernel/syscall_table_32.S   |  2 +-
 fs/compat.c                          | 18 ++++++++++++++++++
 fs/select.c                          | 17 +++++++++++++++++
 include/linux/compat.h               |  3 +++
 include/linux/syscalls.h             |  2 ++
 27 files changed, 52 insertions(+), 152 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index cf9cdaa2d4d4..e18500d305ba 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -443,6 +443,7 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
+#define __ARCH_WANT_SYS_OLD_SELECT
 
 #if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
 #define __ARCH_WANT_SYS_TIME
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 9314a2d681f1..7671e9a75449 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -91,7 +91,7 @@
 		CALL(sys_settimeofday)
 /* 80 */	CALL(sys_getgroups16)
 		CALL(sys_setgroups16)
-		CALL(OBSOLETE(old_select))	/* used by libc4 */
+		CALL(OBSOLETE(sys_old_select))	/* used by libc4 */
 		CALL(sys_symlink)
 		CALL(sys_ni_syscall)		/* was sys_lstat */
 /* 85 */	CALL(sys_readlink)
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index ae4027bd01bd..e59cddedcbba 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -54,27 +54,6 @@ out:
 	return error;
 }
 
-/*
- * Perform the select(nd, in, out, ex, tv) and mmap() system
- * calls.
- */
-
-struct sel_arg_struct {
-	unsigned long n;
-	fd_set __user *inp, *outp, *exp;
-	struct timeval __user *tvp;
-};
-
-asmlinkage int old_select(struct sel_arg_struct __user *arg)
-{
-	struct sel_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-	/* sys_select() does the appropriate kernel locking */
-	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
-}
-
 #if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h
index 99f3c3561ecb..3bea0b3eb24f 100644
--- a/arch/h8300/include/asm/unistd.h
+++ b/arch/h8300/include/asm/unistd.h
@@ -348,6 +348,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_SELECT
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c
index b5969db0ca10..e9a3ecf90c9c 100644
--- a/arch/h8300/kernel/sys_h8300.c
+++ b/arch/h8300/kernel/sys_h8300.c
@@ -60,22 +60,6 @@ out:
 	return error;
 }
 
-struct sel_arg_struct {
-	unsigned long n;
-	fd_set *inp, *outp, *exp;
-	struct timeval *tvp;
-};
-
-asmlinkage int old_select(struct sel_arg_struct *arg)
-{
-	struct sel_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-	/* sys_select() does the appropriate kernel locking */
-	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S
index 2d69881eda6a..fe5ae20e60c5 100644
--- a/arch/h8300/kernel/syscalls.S
+++ b/arch/h8300/kernel/syscalls.S
@@ -96,7 +96,7 @@ SYMBOL_NAME_LABEL(sys_call_table)
 	.long SYMBOL_NAME(sys_settimeofday)
 	.long SYMBOL_NAME(sys_getgroups16)	/* 80 */
 	.long SYMBOL_NAME(sys_setgroups16)
-	.long SYMBOL_NAME(old_select)
+	.long SYMBOL_NAME(sys_old_select)
 	.long SYMBOL_NAME(sys_symlink)
 	.long SYMBOL_NAME(sys_lstat)
 	.long SYMBOL_NAME(sys_readlink)		/* 85 */
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index d72a71dabecb..1582c2db1c86 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -363,6 +363,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_SELECT
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index e136b8cbe9b9..09b1f09be3a6 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -510,7 +510,7 @@ sys_call_table:
 	.long sys_settimeofday
 	.long sys_getgroups16	/* 80 */
 	.long sys_setgroups16
-	.long old_select
+	.long sys_old_select
 	.long sys_symlink
 	.long sys_lstat
 	.long sys_readlink	/* 85 */
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index e3ad2d671973..03b58dd86c7a 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -80,22 +80,6 @@ out:
 	return error;
 }
 
-struct sel_arg_struct {
-	unsigned long n;
-	fd_set __user *inp, *outp, *exp;
-	struct timeval __user *tvp;
-};
-
-asmlinkage int old_select(struct sel_arg_struct __user *arg)
-{
-	struct sel_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-	/* sys_select() does the appropriate kernel locking */
-	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c
index 923dd4aab875..e0d3f13e77a8 100644
--- a/arch/m68knommu/kernel/sys_m68k.c
+++ b/arch/m68knommu/kernel/sys_m68k.c
@@ -61,22 +61,6 @@ out:
 	return error;
 }
 
-struct sel_arg_struct {
-	unsigned long n;
-	fd_set *inp, *outp, *exp;
-	struct timeval *tvp;
-};
-
-asmlinkage int old_select(struct sel_arg_struct *arg)
-{
-	struct sel_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-	/* sys_select() does the appropriate kernel locking */
-	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S
index 56dd01ded148..405738351700 100644
--- a/arch/m68knommu/kernel/syscalltable.S
+++ b/arch/m68knommu/kernel/syscalltable.S
@@ -100,7 +100,7 @@ ENTRY(sys_call_table)
 	.long sys_settimeofday
 	.long sys_getgroups16	/* 80 */
 	.long sys_setgroups16
-	.long old_select
+	.long sys_old_select
 	.long sys_symlink
 	.long sys_lstat
 	.long sys_readlink	/* 85 */
diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index c05acb95c2a9..d13a56e99bad 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h
@@ -375,6 +375,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_SELECT
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index 88e3e1c3cc21..d9ed5a15c547 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -468,7 +468,7 @@ ENTRY(sys_call_table)
 	.long sys_settimeofday
 	.long sys_getgroups16	/* 80 */
 	.long sys_setgroups16
-	.long old_select
+	.long sys_old_select
 	.long sys_symlink
 	.long sys_lstat
 	.long sys_readlink	/* 85 */
diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c
index 17cc6ce04e84..bef69d6daf15 100644
--- a/arch/mn10300/kernel/sys_mn10300.c
+++ b/arch/mn10300/kernel/sys_mn10300.c
@@ -32,24 +32,6 @@ asmlinkage long old_mmap(unsigned long addr, unsigned long len,
 	return sys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
 }
 
-struct sel_arg_struct {
-	unsigned long n;
-	fd_set *inp;
-	fd_set *outp;
-	fd_set *exp;
-	struct timeval *tvp;
-};
-
-asmlinkage int old_select(struct sel_arg_struct __user *arg)
-{
-	struct sel_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-	/* sys_select() does the appropriate kernel locking */
-	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index e1e5e767ab56..9905a0cacf93 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -28,7 +28,6 @@ struct new_utsname;
 struct mmap_arg_struct;
 struct fadvise64_64_args;
 struct old_sigaction;
-struct sel_arg_struct;
 
 long sys_mmap2(struct mmap_arg_struct __user  *arg);
 long sys_s390_old_mmap(struct mmap_arg_struct __user *arg);
diff --git a/arch/um/sys-i386/syscalls.c b/arch/um/sys-i386/syscalls.c
index 857ca0b3bdef..0e49d2a20c17 100644
--- a/arch/um/sys-i386/syscalls.c
+++ b/arch/um/sys-i386/syscalls.c
@@ -44,24 +44,6 @@ long old_mmap_i386(struct mmap_arg_struct __user *arg)
 	return err;
 }
 
-struct sel_arg_struct {
-	unsigned long n;
-	fd_set __user *inp;
-	fd_set __user *outp;
-	fd_set __user *exp;
-	struct timeval __user *tvp;
-};
-
-long old_select(struct sel_arg_struct __user *arg)
-{
-	struct sel_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-	/* sys_select() does the appropriate kernel locking */
-	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
-}
-
 /*
  * The prototype on i386 is:
  *
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 53147ad85b96..34f821802c23 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -586,7 +586,7 @@ ia32_sys_call_table:
 	.quad compat_sys_settimeofday
 	.quad sys_getgroups16	/* 80 */
 	.quad sys_setgroups16
-	.quad sys32_old_select
+	.quad compat_sys_old_select
 	.quad sys_symlink
 	.quad sys_lstat
 	.quad sys_readlink		/* 85 */
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 422572c77923..cb80816e7a16 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -332,24 +332,6 @@ asmlinkage long sys32_alarm(unsigned int seconds)
 	return alarm_setitimer(seconds);
 }
 
-struct sel_arg_struct {
-	unsigned int n;
-	unsigned int inp;
-	unsigned int outp;
-	unsigned int exp;
-	unsigned int tvp;
-};
-
-asmlinkage long sys32_old_select(struct sel_arg_struct __user *arg)
-{
-	struct sel_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-	return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
-				 compat_ptr(a.exp), compat_ptr(a.tvp));
-}
-
 asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
 			      int options)
 {
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index d5f69045c100..b26fc750e416 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -40,8 +40,6 @@ asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *,
 				     compat_sigset_t __user *, unsigned int);
 asmlinkage long sys32_alarm(unsigned int);
 
-struct sel_arg_struct;
-asmlinkage long sys32_old_select(struct sel_arg_struct __user *);
 asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int);
 asmlinkage long sys32_sysfs(int, u32, u32);
 
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 8868b9420b0e..8406d06c118d 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -52,12 +52,10 @@ unsigned long sys_sigreturn(struct pt_regs *);
 
 /* kernel/sys_i386_32.c */
 struct mmap_arg_struct;
-struct sel_arg_struct;
 struct oldold_utsname;
 struct old_utsname;
 
 asmlinkage int old_mmap(struct mmap_arg_struct __user *);
-asmlinkage int old_select(struct sel_arg_struct __user *);
 asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
 asmlinkage int sys_uname(struct old_utsname __user *);
 asmlinkage int sys_olduname(struct oldold_utsname __user *);
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 3baf379fa840..4eb2667b54ae 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -366,6 +366,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_SELECT
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index dee1ff7cba58..345dbd19a2b3 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -58,23 +58,6 @@ out:
 	return err;
 }
 
-
-struct sel_arg_struct {
-	unsigned long n;
-	fd_set __user *inp, *outp, *exp;
-	struct timeval __user *tvp;
-};
-
-asmlinkage int old_select(struct sel_arg_struct __user *arg)
-{
-	struct sel_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
-	/* sys_select() does the appropriate kernel locking */
-	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 15228b5d3eb7..4d10abacecdb 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -81,7 +81,7 @@ ENTRY(sys_call_table)
 	.long sys_settimeofday
 	.long sys_getgroups16	/* 80 */
 	.long sys_setgroups16
-	.long old_select
+	.long sys_old_select
 	.long sys_symlink
 	.long sys_lstat
 	.long sys_readlink	/* 85 */
diff --git a/fs/compat.c b/fs/compat.c
index 00d90c2e66f0..030602d453b7 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1795,6 +1795,24 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 	return ret;
 }
 
+struct compat_sel_arg_struct {
+	compat_ulong_t n;
+	compat_uptr_t inp;
+	compat_uptr_t outp;
+	compat_uptr_t exp;
+	compat_uptr_t tvp;
+};
+
+asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg)
+{
+	struct compat_sel_arg_struct a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		return -EFAULT;
+	return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
+				 compat_ptr(a.exp), compat_ptr(a.tvp));
+}
+
 #ifdef HAVE_SET_RESTORE_SIGMASK
 static long do_compat_pselect(int n, compat_ulong_t __user *inp,
 	compat_ulong_t __user *outp, compat_ulong_t __user *exp,
diff --git a/fs/select.c b/fs/select.c
index 73715e90030f..500a669f7790 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -691,6 +691,23 @@ SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
 }
 #endif /* HAVE_SET_RESTORE_SIGMASK */
 
+#ifdef __ARCH_WANT_SYS_OLD_SELECT
+struct sel_arg_struct {
+	unsigned long n;
+	fd_set __user *inp, *outp, *exp;
+	struct timeval __user *tvp;
+};
+
+SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
+{
+	struct sel_arg_struct a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		return -EFAULT;
+	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
+}
+#endif
+
 struct poll_list {
 	struct poll_list *next;
 	int len;
diff --git a/include/linux/compat.h b/include/linux/compat.h
index ef68119a4fd2..717c691ecd8e 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -23,6 +23,7 @@
 typedef __compat_uid32_t	compat_uid_t;
 typedef __compat_gid32_t	compat_gid_t;
 
+struct compat_sel_arg_struct;
 struct rusage;
 
 struct compat_itimerspec { 
@@ -249,6 +250,8 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 		compat_ulong_t __user *outp, compat_ulong_t __user *exp,
 		struct compat_timeval __user *tvp);
 
+asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg);
+
 asmlinkage long compat_sys_wait4(compat_pid_t pid,
 				 compat_uint_t __user *stat_addr, int options,
 				 struct compat_rusage __user *ru);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 8126f239edf0..85a9f21fe11a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -34,6 +34,7 @@ struct pollfd;
 struct rlimit;
 struct rusage;
 struct sched_param;
+struct sel_arg_struct;
 struct semaphore;
 struct sembuf;
 struct shmid_ds;
@@ -638,6 +639,7 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
 				long timeout);
 asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 			fd_set __user *exp, struct timeval __user *tvp);
+asmlinkage long sys_old_select(struct sel_arg_struct __user *arg);
 asmlinkage long sys_epoll_create(int size);
 asmlinkage long sys_epoll_create1(int flags);
 asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
-- 
cgit v1.2.3


From a4679373cf4ee0e7792dc56205365732b725c2c1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 Mar 2010 15:21:15 -0800
Subject: Add generic sys_old_mmap()

Add a generic implementation of the old mmap() syscall, which expects its
argument in a memory block and switch all architectures over to use it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Reviewed-by: H. Peter Anvin <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: James Morris <jmorris@namei.org>
Cc: Andreas Schwab <schwab@linux-m68k.org>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/unistd.h             |  1 +
 arch/arm/kernel/calls.S                   |  2 +-
 arch/arm/kernel/sys_arm.c                 | 26 -----------------------
 arch/cris/arch-v10/kernel/entry.S         |  2 +-
 arch/cris/arch-v32/kernel/entry.S         |  2 +-
 arch/cris/include/asm/unistd.h            |  1 +
 arch/cris/kernel/sys_cris.c               | 18 ----------------
 arch/h8300/include/asm/unistd.h           |  1 +
 arch/h8300/kernel/sys_h8300.c             | 34 -------------------------------
 arch/h8300/kernel/syscalls.S              |  2 +-
 arch/m68k/include/asm/unistd.h            |  1 +
 arch/m68k/kernel/entry.S                  |  2 +-
 arch/m68k/kernel/sys_m68k.c               | 34 -------------------------------
 arch/m68knommu/kernel/sys_m68k.c          | 34 -------------------------------
 arch/m68knommu/kernel/syscalltable.S      |  2 +-
 arch/s390/include/asm/unistd.h            |  1 +
 arch/s390/kernel/entry.h                  |  5 ++---
 arch/s390/kernel/sys_s390.c               | 30 ++++++---------------------
 arch/s390/kernel/syscalls.S               |  2 +-
 arch/um/sys-i386/shared/sysdep/syscalls.h |  2 --
 arch/um/sys-i386/sys_call_table.S         |  2 +-
 arch/um/sys-i386/syscalls.c               | 33 ------------------------------
 arch/x86/ia32/sys_ia32.c                  |  6 +++---
 arch/x86/include/asm/sys_ia32.h           |  4 ++--
 arch/x86/include/asm/syscalls.h           |  2 --
 arch/x86/include/asm/unistd_32.h          |  1 +
 arch/x86/kernel/sys_i386_32.c             | 34 -------------------------------
 arch/x86/kernel/syscall_table_32.S        |  2 +-
 include/linux/syscalls.h                  |  3 +++
 mm/mmap.c                                 | 24 ++++++++++++++++++++++
 mm/nommu.c                                | 24 ++++++++++++++++++++++
 31 files changed, 79 insertions(+), 258 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index e18500d305ba..e6eeb2d29953 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -443,6 +443,7 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
+#define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLD_SELECT
 
 #if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 7671e9a75449..37ae301cc47c 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -99,7 +99,7 @@
 		CALL(sys_swapon)
 		CALL(sys_reboot)
 		CALL(OBSOLETE(sys_old_readdir))	/* used by libc4 */
-/* 90 */	CALL(OBSOLETE(old_mmap))	/* used by libc4 */
+/* 90 */	CALL(OBSOLETE(sys_old_mmap))	/* used by libc4 */
 		CALL(sys_munmap)
 		CALL(sys_truncate)
 		CALL(sys_ftruncate)
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index e59cddedcbba..a2e0e6f2ea7f 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -28,32 +28,6 @@
 #include <linux/ipc.h>
 #include <linux/uaccess.h>
 
-struct mmap_arg_struct {
-	unsigned long addr;
-	unsigned long len;
-	unsigned long prot;
-	unsigned long flags;
-	unsigned long fd;
-	unsigned long offset;
-};
-
-asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
-{
-	int error = -EFAULT;
-	struct mmap_arg_struct a;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		goto out;
-
-	error = -EINVAL;
-	if (a.offset & ~PAGE_MASK)
-		goto out;
-
-	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
-out:
-	return error;
-}
-
 #if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S
index c52bef39e250..0d6420d087fd 100644
--- a/arch/cris/arch-v10/kernel/entry.S
+++ b/arch/cris/arch-v10/kernel/entry.S
@@ -692,7 +692,7 @@ sys_call_table:
 	.long sys_swapon
 	.long sys_reboot
 	.long sys_old_readdir
-	.long old_mmap		/* 90 */
+	.long sys_old_mmap	/* 90 */
 	.long sys_munmap
 	.long sys_truncate
 	.long sys_ftruncate
diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S
index 435b9671bd4b..1f39861eac8c 100644
--- a/arch/cris/arch-v32/kernel/entry.S
+++ b/arch/cris/arch-v32/kernel/entry.S
@@ -615,7 +615,7 @@ sys_call_table:
 	.long sys_swapon
 	.long sys_reboot
 	.long sys_old_readdir
-	.long old_mmap		/* 90 */
+	.long sys_old_mmap	/* 90 */
 	.long sys_munmap
 	.long sys_truncate
 	.long sys_ftruncate
diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h
index c17079388bb9..8cffd22623fd 100644
--- a/arch/cris/include/asm/unistd.h
+++ b/arch/cris/include/asm/unistd.h
@@ -364,6 +364,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/cris/kernel/sys_cris.c b/arch/cris/kernel/sys_cris.c
index c2bbb1ac98a9..22f9d6cd947f 100644
--- a/arch/cris/kernel/sys_cris.c
+++ b/arch/cris/kernel/sys_cris.c
@@ -26,24 +26,6 @@
 #include <asm/uaccess.h>
 #include <asm/segment.h>
 
-asmlinkage unsigned long old_mmap(unsigned long __user *args)
-{        
-	unsigned long buffer[6];
-	int err = -EFAULT;
-
-	if (copy_from_user(&buffer, args, sizeof(buffer)))
-		goto out;
-
-	err = -EINVAL;
-	if (buffer[5] & ~PAGE_MASK) /* verify that offset is on page boundary */
-		goto out;
-
-	err = sys_mmap_pgoff(buffer[0], buffer[1], buffer[2], buffer[3],
-                       buffer[4], buffer[5] >> PAGE_SHIFT);
-out:
-	return err;
-}
-
 asmlinkage long
 sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot,
           unsigned long flags, unsigned long fd, unsigned long pgoff)
diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h
index 3bea0b3eb24f..54dab4726954 100644
--- a/arch/h8300/include/asm/unistd.h
+++ b/arch/h8300/include/asm/unistd.h
@@ -348,6 +348,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLD_SELECT
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c
index e9a3ecf90c9c..1f13fd6e5309 100644
--- a/arch/h8300/kernel/sys_h8300.c
+++ b/arch/h8300/kernel/sys_h8300.c
@@ -26,40 +26,6 @@
 #include <asm/traps.h>
 #include <asm/unistd.h>
 
-/*
- * Perform the select(nd, in, out, ex, tv) and mmap() system
- * calls. Linux/m68k cloned Linux/i386, which didn't use to be able to
- * handle more than 4 system call parameters, so these system calls
- * used a memory block for parameter passing..
- */
-
-struct mmap_arg_struct {
-	unsigned long addr;
-	unsigned long len;
-	unsigned long prot;
-	unsigned long flags;
-	unsigned long fd;
-	unsigned long offset;
-};
-
-asmlinkage int old_mmap(struct mmap_arg_struct *arg)
-{
-	struct mmap_arg_struct a;
-	int error = -EFAULT;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		goto out;
-
-	error = -EINVAL;
-	if (a.offset & ~PAGE_MASK)
-		goto out;
-
-	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
-			       a.offset >> PAGE_SHIFT);
-out:
-	return error;
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S
index fe5ae20e60c5..faefaff7d43d 100644
--- a/arch/h8300/kernel/syscalls.S
+++ b/arch/h8300/kernel/syscalls.S
@@ -104,7 +104,7 @@ SYMBOL_NAME_LABEL(sys_call_table)
 	.long SYMBOL_NAME(sys_swapon)
 	.long SYMBOL_NAME(sys_reboot)
 	.long SYMBOL_NAME(sys_old_readdir)
-	.long SYMBOL_NAME(old_mmap)		/* 90 */
+	.long SYMBOL_NAME(sys_old_mmap)		/* 90 */
 	.long SYMBOL_NAME(sys_munmap)
 	.long SYMBOL_NAME(sys_truncate)
 	.long SYMBOL_NAME(sys_ftruncate)
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 1582c2db1c86..d801154310ea 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -363,6 +363,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLD_SELECT
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 09b1f09be3a6..2391bdff0996 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -518,7 +518,7 @@ sys_call_table:
 	.long sys_swapon
 	.long sys_reboot
 	.long sys_old_readdir
-	.long old_mmap		/* 90 */
+	.long sys_old_mmap	/* 90 */
 	.long sys_munmap
 	.long sys_truncate
 	.long sys_ftruncate
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 03b58dd86c7a..7b309e7b6cef 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -46,40 +46,6 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 	return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
 }
 
-/*
- * Perform the select(nd, in, out, ex, tv) and mmap() system
- * calls. Linux/m68k cloned Linux/i386, which didn't use to be able to
- * handle more than 4 system call parameters, so these system calls
- * used a memory block for parameter passing..
- */
-
-struct mmap_arg_struct {
-	unsigned long addr;
-	unsigned long len;
-	unsigned long prot;
-	unsigned long flags;
-	unsigned long fd;
-	unsigned long offset;
-};
-
-asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
-{
-	struct mmap_arg_struct a;
-	int error = -EFAULT;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		goto out;
-
-	error = -EINVAL;
-	if (a.offset & ~PAGE_MASK)
-		goto out;
-
-	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
-			       a.offset >> PAGE_SHIFT);
-out:
-	return error;
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c
index e0d3f13e77a8..3e371cc9fd91 100644
--- a/arch/m68knommu/kernel/sys_m68k.c
+++ b/arch/m68knommu/kernel/sys_m68k.c
@@ -27,40 +27,6 @@
 #include <asm/cacheflush.h>
 #include <asm/unistd.h>
 
-/*
- * Perform the select(nd, in, out, ex, tv) and mmap() system
- * calls. Linux/m68k cloned Linux/i386, which didn't use to be able to
- * handle more than 4 system call parameters, so these system calls
- * used a memory block for parameter passing..
- */
-
-struct mmap_arg_struct {
-	unsigned long addr;
-	unsigned long len;
-	unsigned long prot;
-	unsigned long flags;
-	unsigned long fd;
-	unsigned long offset;
-};
-
-asmlinkage int old_mmap(struct mmap_arg_struct *arg)
-{
-	struct mmap_arg_struct a;
-	int error = -EFAULT;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		goto out;
-
-	error = -EINVAL;
-	if (a.offset & ~PAGE_MASK)
-		goto out;
-
-	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
-				a.offset >> PAGE_SHIFT);
-out:
-	return error;
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S
index 405738351700..b30b3eb197a5 100644
--- a/arch/m68knommu/kernel/syscalltable.S
+++ b/arch/m68knommu/kernel/syscalltable.S
@@ -108,7 +108,7 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall	/* sys_swapon */
 	.long sys_reboot
 	.long sys_old_readdir
-	.long old_mmap		/* 90 */
+	.long sys_old_mmap	/* 90 */
 	.long sys_munmap
 	.long sys_truncate
 	.long sys_ftruncate
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 6e9f049fa823..5f0075150a65 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -392,6 +392,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 9905a0cacf93..5de54d2af0b2 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -25,12 +25,11 @@ void __init startup_init(void);
 void die(const char * str, struct pt_regs * regs, long err);
 
 struct new_utsname;
-struct mmap_arg_struct;
+struct s390_mmap_arg_struct;
 struct fadvise64_64_args;
 struct old_sigaction;
 
-long sys_mmap2(struct mmap_arg_struct __user  *arg);
-long sys_s390_old_mmap(struct mmap_arg_struct __user *arg);
+long sys_mmap2(struct s390_mmap_arg_struct __user  *arg);
 long sys_ipc(uint call, int first, unsigned long second,
 	     unsigned long third, void __user *ptr);
 long sys_s390_newuname(struct new_utsname __user *name);
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
index 86a74c9c9e63..b2563509b5a9 100644
--- a/arch/s390/kernel/sys_s390.c
+++ b/arch/s390/kernel/sys_s390.c
@@ -33,13 +33,12 @@
 #include "entry.h"
 
 /*
- * Perform the select(nd, in, out, ex, tv) and mmap() system
- * calls. Linux for S/390 isn't able to handle more than 5
- * system call parameters, so these system calls used a memory
- * block for parameter passing..
+ * Perform the mmap() system call. Linux for S/390 isn't able to handle more
+ * than 5 system call parameters, so this system call uses a memory block
+ * for parameter passing.
  */
 
-struct mmap_arg_struct {
+struct s390_mmap_arg_struct {
 	unsigned long addr;
 	unsigned long len;
 	unsigned long prot;
@@ -48,9 +47,9 @@ struct mmap_arg_struct {
 	unsigned long offset;
 };
 
-SYSCALL_DEFINE1(mmap2, struct mmap_arg_struct __user *, arg)
+SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
 {
-	struct mmap_arg_struct a;
+	struct s390_mmap_arg_struct a;
 	int error = -EFAULT;
 
 	if (copy_from_user(&a, arg, sizeof(a)))
@@ -60,23 +59,6 @@ out:
 	return error;
 }
 
-SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct __user *, arg)
-{
-	struct mmap_arg_struct a;
-	long error = -EFAULT;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		goto out;
-
-	error = -EINVAL;
-	if (a.offset & ~PAGE_MASK)
-		goto out;
-
-	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
-out:
-	return error;
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 30eca070d426..2a24766567af 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -98,7 +98,7 @@ SYSCALL(sys_uselib,sys_uselib,sys32_uselib_wrapper)
 SYSCALL(sys_swapon,sys_swapon,sys32_swapon_wrapper)
 SYSCALL(sys_reboot,sys_reboot,sys32_reboot_wrapper)
 SYSCALL(sys_ni_syscall,sys_ni_syscall,old32_readdir_wrapper)	/* old readdir syscall */
-SYSCALL(sys_s390_old_mmap,sys_s390_old_mmap,old32_mmap_wrapper)	/* 90 */
+SYSCALL(sys_old_mmap,sys_old_mmap,old32_mmap_wrapper)		/* 90 */
 SYSCALL(sys_munmap,sys_munmap,sys32_munmap_wrapper)
 SYSCALL(sys_truncate,sys_truncate,sys32_truncate_wrapper)
 SYSCALL(sys_ftruncate,sys_ftruncate,sys32_ftruncate_wrapper)
diff --git a/arch/um/sys-i386/shared/sysdep/syscalls.h b/arch/um/sys-i386/shared/sysdep/syscalls.h
index e7787679e317..05cb796aecb5 100644
--- a/arch/um/sys-i386/shared/sysdep/syscalls.h
+++ b/arch/um/sys-i386/shared/sysdep/syscalls.h
@@ -13,8 +13,6 @@ typedef long syscall_handler_t(struct pt_regs);
  */
 extern syscall_handler_t sys_rt_sigaction;
 
-extern syscall_handler_t old_mmap_i386;
-
 extern syscall_handler_t *sys_call_table[];
 
 #define EXECUTE_SYSCALL(syscall, regs) \
diff --git a/arch/um/sys-i386/sys_call_table.S b/arch/um/sys-i386/sys_call_table.S
index c6260dd6ebb9..de274071455d 100644
--- a/arch/um/sys-i386/sys_call_table.S
+++ b/arch/um/sys-i386/sys_call_table.S
@@ -7,7 +7,7 @@
 #define sys_vm86old sys_ni_syscall
 #define sys_vm86 sys_ni_syscall
 
-#define old_mmap old_mmap_i386
+#define old_mmap sys_old_mmap
 
 #define ptregs_fork sys_fork
 #define ptregs_execve sys_execve
diff --git a/arch/um/sys-i386/syscalls.c b/arch/um/sys-i386/syscalls.c
index 0e49d2a20c17..d0aa8f125ee6 100644
--- a/arch/um/sys-i386/syscalls.c
+++ b/arch/um/sys-i386/syscalls.c
@@ -11,39 +11,6 @@
 #include "asm/uaccess.h"
 #include "asm/unistd.h"
 
-/*
- * Perform the select(nd, in, out, ex, tv) and mmap() system
- * calls. Linux/i386 didn't use to be able to handle more than
- * 4 system call parameters, so these system calls used a memory
- * block for parameter passing..
- */
-
-struct mmap_arg_struct {
-	unsigned long addr;
-	unsigned long len;
-	unsigned long prot;
-	unsigned long flags;
-	unsigned long fd;
-	unsigned long offset;
-};
-
-extern int old_mmap(unsigned long addr, unsigned long len,
-		    unsigned long prot, unsigned long flags,
-		    unsigned long fd, unsigned long offset);
-
-long old_mmap_i386(struct mmap_arg_struct __user *arg)
-{
-	struct mmap_arg_struct a;
-	int err = -EFAULT;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		goto out;
-
-	err = old_mmap(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
- out:
-	return err;
-}
-
 /*
  * The prototype on i386 is:
  *
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index cb80816e7a16..56c99f46e289 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -143,7 +143,7 @@ asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename,
  * block for parameter passing..
  */
 
-struct mmap_arg_struct {
+struct mmap_arg_struct32 {
 	unsigned int addr;
 	unsigned int len;
 	unsigned int prot;
@@ -152,9 +152,9 @@ struct mmap_arg_struct {
 	unsigned int offset;
 };
 
-asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg)
+asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
 {
-	struct mmap_arg_struct a;
+	struct mmap_arg_struct32 a;
 
 	if (copy_from_user(&a, arg, sizeof(a)))
 		return -EFAULT;
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index b26fc750e416..7d348d803669 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -26,8 +26,8 @@ asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *);
 asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *);
 asmlinkage long sys32_fstatat(unsigned int, char __user *,
 			      struct stat64 __user *, int);
-struct mmap_arg_struct;
-asmlinkage long sys32_mmap(struct mmap_arg_struct __user *);
+struct mmap_arg_struct32;
+asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *);
 asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long);
 
 struct sigaction32;
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 8406d06c118d..86ab6a0623fd 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -51,11 +51,9 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
 unsigned long sys_sigreturn(struct pt_regs *);
 
 /* kernel/sys_i386_32.c */
-struct mmap_arg_struct;
 struct oldold_utsname;
 struct old_utsname;
 
-asmlinkage int old_mmap(struct mmap_arg_struct __user *);
 asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
 asmlinkage int sys_uname(struct old_utsname __user *);
 asmlinkage int sys_olduname(struct oldold_utsname __user *);
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 4eb2667b54ae..daa65d9aae95 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -366,6 +366,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLD_SELECT
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 345dbd19a2b3..7955e90c8341 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -24,40 +24,6 @@
 
 #include <asm/syscalls.h>
 
-/*
- * Perform the select(nd, in, out, ex, tv) and mmap() system
- * calls. Linux/i386 didn't use to be able to handle more than
- * 4 system call parameters, so these system calls used a memory
- * block for parameter passing..
- */
-
-struct mmap_arg_struct {
-	unsigned long addr;
-	unsigned long len;
-	unsigned long prot;
-	unsigned long flags;
-	unsigned long fd;
-	unsigned long offset;
-};
-
-asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
-{
-	struct mmap_arg_struct a;
-	int err = -EFAULT;
-
-	if (copy_from_user(&a, arg, sizeof(a)))
-		goto out;
-
-	err = -EINVAL;
-	if (a.offset & ~PAGE_MASK)
-		goto out;
-
-	err = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags,
-			a.fd, a.offset >> PAGE_SHIFT);
-out:
-	return err;
-}
-
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
  *
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 4d10abacecdb..8b3729341216 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -89,7 +89,7 @@ ENTRY(sys_call_table)
 	.long sys_swapon
 	.long sys_reboot
 	.long sys_old_readdir
-	.long old_mmap		/* 90 */
+	.long sys_old_mmap	/* 90 */
 	.long sys_munmap
 	.long sys_truncate
 	.long sys_ftruncate
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 85a9f21fe11a..b60907e3b0d5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -23,6 +23,7 @@ struct kexec_segment;
 struct linux_dirent;
 struct linux_dirent64;
 struct list_head;
+struct mmap_arg_struct;
 struct msgbuf;
 struct msghdr;
 struct mmsghdr;
@@ -838,4 +839,6 @@ asmlinkage long sys_perf_event_open(
 asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
 			unsigned long prot, unsigned long flags,
 			unsigned long fd, unsigned long pgoff);
+asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg);
+
 #endif
diff --git a/mm/mmap.c b/mm/mmap.c
index f1b4448626bf..75557c639ad4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1088,6 +1088,30 @@ out:
 	return retval;
 }
 
+#ifdef __ARCH_WANT_SYS_OLD_MMAP
+struct mmap_arg_struct {
+	unsigned long addr;
+	unsigned long len;
+	unsigned long prot;
+	unsigned long flags;
+	unsigned long fd;
+	unsigned long offset;
+};
+
+SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
+{
+	struct mmap_arg_struct a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		return -EFAULT;
+	if (a.offset & ~PAGE_MASK)
+		return -EINVAL;
+
+	return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+			      a.offset >> PAGE_SHIFT);
+}
+#endif /* __ARCH_WANT_SYS_OLD_MMAP */
+
 /*
  * Some shared mappigns will want the pages marked read-only
  * to track write events. If so, we'll downgrade vm_page_prot
diff --git a/mm/nommu.c b/mm/nommu.c
index b9b5cceb1b68..605ace8982a8 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1428,6 +1428,30 @@ out:
 	return retval;
 }
 
+#ifdef __ARCH_WANT_SYS_OLD_MMAP
+struct mmap_arg_struct {
+	unsigned long addr;
+	unsigned long len;
+	unsigned long prot;
+	unsigned long flags;
+	unsigned long fd;
+	unsigned long offset;
+};
+
+SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
+{
+	struct mmap_arg_struct a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		return -EFAULT;
+	if (a.offset & ~PAGE_MASK)
+		return -EINVAL;
+
+	return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+			      a.offset >> PAGE_SHIFT);
+}
+#endif /* __ARCH_WANT_SYS_OLD_MMAP */
+
 /*
  * split a vma into two pieces at address 'addr', a new vma is allocated either
  * for the first part or the tail.
-- 
cgit v1.2.3


From baed7fc9b580bd3fb8252ff1d9b36eaf1f86b670 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 Mar 2010 15:21:18 -0800
Subject: Add generic sys_ipc wrapper

Add a generic implementation of the ipc demultiplexer syscall.  Except for
s390 and sparc64 all implementations of the sys_ipc are nearly identical.

There are slight differences in the types of the parameters, where mips
and powerpc as the only 64-bit architectures with sys_ipc use unsigned
long for the "third" argument as it gets casted to a pointer later, while
it traditionally is an "int" like most other paramters.  frv goes even
further and uses unsigned long for all parameters execept for "ptr" which
is a pointer type everywhere.  The change from int to unsigned long for
"third" and back to "int" for the others on frv should be fine due to the
in-register calling conventions for syscalls (we already had a similar
issue with the generic sys_ptrace), but I'd prefer to have the arch
maintainers looks over this in details.

Except for that h8300, m68k and m68knommu lack an impplementation of the
semtimedop sub call which this patch adds, and various architectures have
gets used - at least on i386 it seems superflous as the compat code on
x86-64 and ia64 doesn't even bother to implement it.

[akpm@linux-foundation.org: add sys_ipc to sys_ni.c]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Reviewed-by: H. Peter Anvin <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: James Morris <jmorris@namei.org>
Cc: Andreas Schwab <schwab@linux-m68k.org>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: David Howells <dhowells@redhat.com>
Acked-by: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/unistd.h       |   1 +
 arch/arm/kernel/sys_arm.c           |  82 --------------------------
 arch/arm/kernel/sys_oabi-compat.c   |   3 -
 arch/cris/include/asm/unistd.h      |   1 +
 arch/cris/kernel/sys_cris.c         |  78 -------------------------
 arch/frv/include/asm/unistd.h       |   1 +
 arch/frv/kernel/sys_frv.c           |  89 ----------------------------
 arch/h8300/include/asm/unistd.h     |   1 +
 arch/h8300/kernel/sys_h8300.c       |  88 ----------------------------
 arch/m32r/include/asm/unistd.h      |   1 +
 arch/m32r/kernel/sys_m32r.c         |  81 --------------------------
 arch/m68k/include/asm/unistd.h      |   1 +
 arch/m68k/kernel/sys_m68k.c         |  81 --------------------------
 arch/m68knommu/kernel/sys_m68k.c    |  86 ---------------------------
 arch/mips/include/asm/unistd.h      |   1 +
 arch/mips/kernel/syscall.c          |  88 ----------------------------
 arch/mn10300/include/asm/unistd.h   |   1 +
 arch/mn10300/kernel/sys_mn10300.c   |  88 ----------------------------
 arch/powerpc/include/asm/syscalls.h |   2 -
 arch/powerpc/include/asm/unistd.h   |   1 +
 arch/powerpc/kernel/syscalls.c      |  94 ------------------------------
 arch/s390/kernel/entry.h            |   2 +-
 arch/s390/kernel/sys_s390.c         |   2 +-
 arch/s390/kernel/syscalls.S         |   2 +-
 arch/sh/include/asm/syscalls.h      |   2 -
 arch/sh/include/asm/unistd_32.h     |   1 +
 arch/sh/include/asm/unistd_64.h     |   1 +
 arch/sh/kernel/sys_sh.c             | 104 ---------------------------------
 arch/sparc/include/asm/unistd.h     |   4 +-
 arch/sparc/kernel/sys_sparc_32.c    | 113 ------------------------------------
 arch/sparc/kernel/sys_sparc_64.c    |   2 +-
 arch/sparc/kernel/systbls.h         |   2 +-
 arch/sparc/kernel/systbls_64.S      |   2 +-
 arch/um/sys-i386/syscalls.c         |  86 ---------------------------
 arch/x86/include/asm/syscalls.h     |   1 -
 arch/x86/include/asm/unistd_32.h    |   1 +
 arch/x86/kernel/sys_i386_32.c       |  85 ---------------------------
 include/linux/syscalls.h            |   2 +
 ipc/Makefile                        |   2 +-
 ipc/syscall.c                       |  99 +++++++++++++++++++++++++++++++
 kernel/sys_ni.c                     |   1 +
 41 files changed, 124 insertions(+), 1259 deletions(-)
 create mode 100644 ipc/syscall.c

(limited to 'include')

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index e6eeb2d29953..dd2bf53000fe 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -448,6 +448,7 @@
 
 #if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
 #define __ARCH_WANT_SYS_TIME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_UTIME
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index a2e0e6f2ea7f..4350f75e578c 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -28,88 +28,6 @@
 #include <linux/ipc.h>
 #include <linux/uaccess.h>
 
-#if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage int sys_ipc(uint call, int first, int second, int third,
-		       void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	switch (call) {
-	case SEMOP:
-		return sys_semtimedop (first, (struct sembuf __user *)ptr, second, NULL);
-	case SEMTIMEDOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr, second,
-					(const struct timespec __user *)fifth);
-
-	case SEMGET:
-		return sys_semget (first, second, third);
-	case SEMCTL: {
-		union semun fourth;
-		if (!ptr)
-			return -EINVAL;
-		if (get_user(fourth.__pad, (void __user * __user *) ptr))
-			return -EFAULT;
-		return sys_semctl (first, second, third, fourth);
-	}
-
-	case MSGSND:
-		return sys_msgsnd(first, (struct msgbuf __user *) ptr, 
-				  second, third);
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-			if (!ptr)
-				return -EINVAL;
-			if (copy_from_user(&tmp,(struct ipc_kludge __user *)ptr,
-					   sizeof (tmp)))
-				return -EFAULT;
-			return sys_msgrcv (first, tmp.msgp, second,
-					   tmp.msgtyp, third);
-		}
-		default:
-			return sys_msgrcv (first,
-					   (struct msgbuf __user *) ptr,
-					   second, fifth, third);
-		}
-	case MSGGET:
-		return sys_msgget ((key_t) first, second);
-	case MSGCTL:
-		return sys_msgctl(first, second, (struct msqid_ds __user *)ptr);
-
-	case SHMAT:
-		switch (version) {
-		default: {
-			ulong raddr;
-			ret = do_shmat(first, (char __user *)ptr, second, &raddr);
-			if (ret)
-				return ret;
-			return put_user(raddr, (ulong __user *)third);
-		}
-		case 1: /* Of course, we don't support iBCS2! */
-			return -EINVAL;
-		}
-	case SHMDT: 
-		return sys_shmdt ((char __user *)ptr);
-	case SHMGET:
-		return sys_shmget (first, second, third);
-	case SHMCTL:
-		return sys_shmctl (first, second,
-				   (struct shmid_ds __user *) ptr);
-	default:
-		return -ENOSYS;
-	}
-}
-#endif
-
 /* Fork a new task - this creates a new program thread.
  * This is called indirectly via a small wrapper
  */
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index d59a0cd537f0..33ff678e32f2 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -346,9 +346,6 @@ asmlinkage long sys_oabi_semop(int semid, struct oabi_sembuf __user *tsops,
 	return sys_oabi_semtimedop(semid, tsops, nsops, NULL);
 }
 
-extern asmlinkage int sys_ipc(uint call, int first, int second, int third,
-			      void __user *ptr, long fifth);
-
 asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third,
 			    void __user *ptr, long fifth)
 {
diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h
index 8cffd22623fd..f6fad83b3a8c 100644
--- a/arch/cris/include/asm/unistd.h
+++ b/arch/cris/include/asm/unistd.h
@@ -352,6 +352,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_SIGNAL
diff --git a/arch/cris/kernel/sys_cris.c b/arch/cris/kernel/sys_cris.c
index 22f9d6cd947f..7aa036ec78ff 100644
--- a/arch/cris/kernel/sys_cris.c
+++ b/arch/cris/kernel/sys_cris.c
@@ -33,81 +33,3 @@ sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot,
 	/* bug(?): 8Kb pages here */
         return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
 }
-
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly. (same as arch/i386)
- */
-
-asmlinkage int sys_ipc (uint call, int first, int second,
-			int third, void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	switch (call) {
-	case SEMOP:
-		return sys_semtimedop (first, (struct sembuf __user *)ptr, second, NULL);
-	case SEMTIMEDOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr, second,
-					(const struct timespec __user *)fifth);
-
-	case SEMGET:
-		return sys_semget (first, second, third);
-	case SEMCTL: {
-		union semun fourth;
-		if (!ptr)
-			return -EINVAL;
-		if (get_user(fourth.__pad, (void * __user *) ptr))
-			return -EFAULT;
-		return sys_semctl (first, second, third, fourth);
-	}
-
-	case MSGSND:
-		return sys_msgsnd (first, (struct msgbuf __user *) ptr, 
-				   second, third);
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-			if (!ptr)
-				return -EINVAL;
-			
-			if (copy_from_user(&tmp,
-					   (struct ipc_kludge __user *) ptr, 
-					   sizeof (tmp)))
-				return -EFAULT;
-			return sys_msgrcv (first, tmp.msgp, second,
-					   tmp.msgtyp, third);
-		}
-		default:
-			return sys_msgrcv (first,
-					   (struct msgbuf __user *) ptr,
-					   second, fifth, third);
-		}
-	case MSGGET:
-		return sys_msgget ((key_t) first, second);
-	case MSGCTL:
-		return sys_msgctl (first, second, (struct msqid_ds __user *) ptr);
-
-	case SHMAT: {
-                ulong raddr;
-                ret = do_shmat (first, (char __user *) ptr, second, &raddr);
-                if (ret)
-                        return ret;
-                return put_user (raddr, (ulong __user *) third);
-        }
-	case SHMDT: 
-		return sys_shmdt ((char __user *)ptr);
-	case SHMGET:
-		return sys_shmget (first, second, third);
-	case SHMCTL:
-		return sys_shmctl (first, second,
-				   (struct shmid_ds __user *) ptr);
-	default:
-		return -ENOSYS;
-	}
-}
diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h
index be6ef0f5cd42..b28da499e22a 100644
--- a/arch/frv/include/asm/unistd.h
+++ b/arch/frv/include/asm/unistd.h
@@ -354,6 +354,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 /* #define __ARCH_WANT_SYS_GETHOSTNAME */
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 /* #define __ARCH_WANT_SYS_SGETMASK */
 /* #define __ARCH_WANT_SYS_SIGNAL */
diff --git a/arch/frv/kernel/sys_frv.c b/arch/frv/kernel/sys_frv.c
index 1d3d4c9e2521..9c4980825bbb 100644
--- a/arch/frv/kernel/sys_frv.c
+++ b/arch/frv/kernel/sys_frv.c
@@ -42,92 +42,3 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 	return sys_mmap_pgoff(addr, len, prot, flags, fd,
 			      pgoff >> (PAGE_SHIFT - 12));
 }
-
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage long sys_ipc(unsigned long call,
-			unsigned long first,
-			unsigned long second,
-			unsigned long third,
-			void __user *ptr,
-			unsigned long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	switch (call) {
-	case SEMOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr, second, NULL);
-	case SEMTIMEDOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr, second,
-				      (const struct timespec __user *)fifth);
-
-	case SEMGET:
-		return sys_semget (first, second, third);
-	case SEMCTL: {
-		union semun fourth;
-		if (!ptr)
-			return -EINVAL;
-		if (get_user(fourth.__pad, (void * __user *) ptr))
-			return -EFAULT;
-		return sys_semctl (first, second, third, fourth);
-	}
-
-	case MSGSND:
-		return sys_msgsnd (first, (struct msgbuf __user *) ptr,
-				   second, third);
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-			if (!ptr)
-				return -EINVAL;
-
-			if (copy_from_user(&tmp,
-					   (struct ipc_kludge __user *) ptr,
-					   sizeof (tmp)))
-				return -EFAULT;
-			return sys_msgrcv (first, tmp.msgp, second,
-					   tmp.msgtyp, third);
-		}
-		default:
-			return sys_msgrcv (first,
-					   (struct msgbuf __user *) ptr,
-					   second, fifth, third);
-		}
-	case MSGGET:
-		return sys_msgget ((key_t) first, second);
-	case MSGCTL:
-		return sys_msgctl (first, second, (struct msqid_ds __user *) ptr);
-
-	case SHMAT:
-		switch (version) {
-		default: {
-			ulong raddr;
-			ret = do_shmat (first, (char __user *) ptr, second, &raddr);
-			if (ret)
-				return ret;
-			return put_user (raddr, (ulong __user *) third);
-		}
-		case 1:	/* iBCS2 emulator entry point */
-			if (!segment_eq(get_fs(), get_ds()))
-				return -EINVAL;
-			/* The "(ulong *) third" is valid _only_ because of the kernel segment thing */
-			return do_shmat (first, (char __user *) ptr, second, (ulong *) third);
-		}
-	case SHMDT:
-		return sys_shmdt ((char __user *)ptr);
-	case SHMGET:
-		return sys_shmget (first, second, third);
-	case SHMCTL:
-		return sys_shmctl (first, second,
-				   (struct shmid_ds __user *) ptr);
-	default:
-		return -ENOSYS;
-	}
-}
diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h
index 54dab4726954..50f2c5a36591 100644
--- a/arch/h8300/include/asm/unistd.h
+++ b/arch/h8300/include/asm/unistd.h
@@ -336,6 +336,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_SIGNAL
diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c
index 1f13fd6e5309..f9b3f44da69f 100644
--- a/arch/h8300/kernel/sys_h8300.c
+++ b/arch/h8300/kernel/sys_h8300.c
@@ -26,94 +26,6 @@
 #include <asm/traps.h>
 #include <asm/unistd.h>
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage int sys_ipc (uint call, int first, int second,
-			int third, void *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	if (call <= SEMCTL)
-		switch (call) {
-		case SEMOP:
-			return sys_semop (first, (struct sembuf *)ptr, second);
-		case SEMGET:
-			return sys_semget (first, second, third);
-		case SEMCTL: {
-			union semun fourth;
-			if (!ptr)
-				return -EINVAL;
-			if (get_user(fourth.__pad, (void **) ptr))
-				return -EFAULT;
-			return sys_semctl (first, second, third, fourth);
-			}
-		default:
-			return -EINVAL;
-		}
-	if (call <= MSGCTL) 
-		switch (call) {
-		case MSGSND:
-			return sys_msgsnd (first, (struct msgbuf *) ptr, 
-					  second, third);
-		case MSGRCV:
-			switch (version) {
-			case 0: {
-				struct ipc_kludge tmp;
-				if (!ptr)
-					return -EINVAL;
-				if (copy_from_user (&tmp,
-						    (struct ipc_kludge *)ptr,
-						    sizeof (tmp)))
-					return -EFAULT;
-				return sys_msgrcv (first, tmp.msgp, second,
-						   tmp.msgtyp, third);
-				}
-			default:
-				return sys_msgrcv (first,
-						   (struct msgbuf *) ptr,
-						   second, fifth, third);
-			}
-		case MSGGET:
-			return sys_msgget ((key_t) first, second);
-		case MSGCTL:
-			return sys_msgctl (first, second,
-					   (struct msqid_ds *) ptr);
-		default:
-			return -EINVAL;
-		}
-	if (call <= SHMCTL) 
-		switch (call) {
-		case SHMAT:
-			switch (version) {
-			default: {
-				ulong raddr;
-				ret = do_shmat (first, (char *) ptr,
-						 second, &raddr);
-				if (ret)
-					return ret;
-				return put_user (raddr, (ulong *) third);
-			}
-			}
-		case SHMDT: 
-			return sys_shmdt ((char *)ptr);
-		case SHMGET:
-			return sys_shmget (first, second, third);
-		case SHMCTL:
-			return sys_shmctl (first, second,
-					   (struct shmid_ds *) ptr);
-		default:
-			return -EINVAL;
-		}
-
-	return -EINVAL;
-}
-
 /* sys_cacheflush -- no support.  */
 asmlinkage int
 sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h
index cf701c933249..76125777483c 100644
--- a/arch/m32r/include/asm/unistd.h
+++ b/arch/m32r/include/asm/unistd.h
@@ -339,6 +339,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_TIME
 #define __ARCH_WANT_SYS_UTIME
diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c
index d3c865c5a6ba..cf2e7279ce9b 100644
--- a/arch/m32r/kernel/sys_m32r.c
+++ b/arch/m32r/kernel/sys_m32r.c
@@ -76,87 +76,6 @@ asmlinkage int sys_tas(int __user *addr)
 	return oldval;
 }
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage int sys_ipc(uint call, int first, int second,
-		       int third, void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	switch (call) {
-	case SEMOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr,
-				      second, NULL);
-	case SEMTIMEDOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr,
-				      second, (const struct timespec __user *)fifth);
-	case SEMGET:
-		return sys_semget (first, second, third);
-	case SEMCTL: {
-		union semun fourth;
-		if (!ptr)
-			return -EINVAL;
-		if (get_user(fourth.__pad, (void __user * __user *) ptr))
-			return -EFAULT;
-		return sys_semctl (first, second, third, fourth);
-		}
-
-	case MSGSND:
-		return sys_msgsnd (first, (struct msgbuf __user *) ptr,
-				   second, third);
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-			if (!ptr)
-				return -EINVAL;
-
-			if (copy_from_user(&tmp,
-					   (struct ipc_kludge __user *) ptr,
-					   sizeof (tmp)))
-				return -EFAULT;
-			return sys_msgrcv (first, tmp.msgp, second,
-					   tmp.msgtyp, third);
-			}
-		default:
-			return sys_msgrcv (first,
-					   (struct msgbuf __user *) ptr,
-					   second, fifth, third);
-		}
-	case MSGGET:
-		return sys_msgget ((key_t) first, second);
-	case MSGCTL:
-		return sys_msgctl (first, second,
-				   (struct msqid_ds __user *) ptr);
-	case SHMAT: {
-		ulong raddr;
-
-		if (!access_ok(VERIFY_WRITE, (ulong __user *) third,
-				      sizeof(ulong)))
-			return -EFAULT;
-		ret = do_shmat (first, (char __user *) ptr, second, &raddr);
-		if (ret)
-			return ret;
-		return put_user (raddr, (ulong __user *) third);
-		}
-	case SHMDT:
-		return sys_shmdt ((char __user *)ptr);
-	case SHMGET:
-		return sys_shmget (first, second, third);
-	case SHMCTL:
-		return sys_shmctl (first, second,
-				   (struct shmid_ds __user *) ptr);
-	default:
-		return -ENOSYS;
-	}
-}
-
 asmlinkage int sys_uname(struct old_utsname __user * name)
 {
 	int err;
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index d801154310ea..60b15d0aa072 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -351,6 +351,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_SIGNAL
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 7b309e7b6cef..77896692eb0a 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -46,87 +46,6 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 	return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
 }
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage int sys_ipc (uint call, int first, int second,
-			int third, void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	if (call <= SEMCTL)
-		switch (call) {
-		case SEMOP:
-			return sys_semop (first, ptr, second);
-		case SEMGET:
-			return sys_semget (first, second, third);
-		case SEMCTL: {
-			union semun fourth;
-			if (!ptr)
-				return -EINVAL;
-			if (get_user(fourth.__pad, (void __user *__user *) ptr))
-				return -EFAULT;
-			return sys_semctl (first, second, third, fourth);
-			}
-		default:
-			return -ENOSYS;
-		}
-	if (call <= MSGCTL)
-		switch (call) {
-		case MSGSND:
-			return sys_msgsnd (first, ptr, second, third);
-		case MSGRCV:
-			switch (version) {
-			case 0: {
-				struct ipc_kludge tmp;
-				if (!ptr)
-					return -EINVAL;
-				if (copy_from_user (&tmp, ptr, sizeof (tmp)))
-					return -EFAULT;
-				return sys_msgrcv (first, tmp.msgp, second,
-						   tmp.msgtyp, third);
-				}
-			default:
-				return sys_msgrcv (first, ptr,
-						   second, fifth, third);
-			}
-		case MSGGET:
-			return sys_msgget ((key_t) first, second);
-		case MSGCTL:
-			return sys_msgctl (first, second, ptr);
-		default:
-			return -ENOSYS;
-		}
-	if (call <= SHMCTL)
-		switch (call) {
-		case SHMAT:
-			switch (version) {
-			default: {
-				ulong raddr;
-				ret = do_shmat (first, ptr, second, &raddr);
-				if (ret)
-					return ret;
-				return put_user (raddr, (ulong __user *) third);
-			}
-			}
-		case SHMDT:
-			return sys_shmdt (ptr);
-		case SHMGET:
-			return sys_shmget (first, second, third);
-		case SHMCTL:
-			return sys_shmctl (first, second, ptr);
-		default:
-			return -ENOSYS;
-		}
-
-	return -EINVAL;
-}
-
 /* Convert virtual (user) address VADDR to physical address PADDR */
 #define virt_to_phys_040(vaddr)						\
 ({									\
diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c
index 3e371cc9fd91..d65e9c4c930c 100644
--- a/arch/m68knommu/kernel/sys_m68k.c
+++ b/arch/m68knommu/kernel/sys_m68k.c
@@ -27,92 +27,6 @@
 #include <asm/cacheflush.h>
 #include <asm/unistd.h>
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage int sys_ipc (uint call, int first, int second,
-			int third, void *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	if (call <= SEMCTL)
-		switch (call) {
-		case SEMOP:
-			return sys_semop (first, (struct sembuf *)ptr, second);
-		case SEMGET:
-			return sys_semget (first, second, third);
-		case SEMCTL: {
-			union semun fourth;
-			if (!ptr)
-				return -EINVAL;
-			if (get_user(fourth.__pad, (void **) ptr))
-				return -EFAULT;
-			return sys_semctl (first, second, third, fourth);
-			}
-		default:
-			return -EINVAL;
-		}
-	if (call <= MSGCTL) 
-		switch (call) {
-		case MSGSND:
-			return sys_msgsnd (first, (struct msgbuf *) ptr, 
-					  second, third);
-		case MSGRCV:
-			switch (version) {
-			case 0: {
-				struct ipc_kludge tmp;
-				if (!ptr)
-					return -EINVAL;
-				if (copy_from_user (&tmp,
-						    (struct ipc_kludge *)ptr,
-						    sizeof (tmp)))
-					return -EFAULT;
-				return sys_msgrcv (first, tmp.msgp, second,
-						   tmp.msgtyp, third);
-				}
-			default:
-				return sys_msgrcv (first,
-						   (struct msgbuf *) ptr,
-						   second, fifth, third);
-			}
-		case MSGGET:
-			return sys_msgget ((key_t) first, second);
-		case MSGCTL:
-			return sys_msgctl (first, second,
-					   (struct msqid_ds *) ptr);
-		default:
-			return -EINVAL;
-		}
-	if (call <= SHMCTL)
-		switch (call) {
-		case SHMAT:
-			switch (version) {
-			default: {
-				ulong raddr;
-				ret = do_shmat (first, ptr, second, &raddr);
-				if (ret)
-					return ret;
-				return put_user (raddr, (ulong __user *) third);
-			}
-			}
-		case SHMDT:
-			return sys_shmdt (ptr);
-		case SHMGET:
-			return sys_shmget (first, second, third);
-		case SHMCTL:
-			return sys_shmctl (first, second, ptr);
-		default:
-			return -ENOSYS;
-		}
-
-	return -EINVAL;
-}
-
 /* sys_cacheflush -- flush (part of) the processor cache.  */
 asmlinkage int
 sys_cacheflush (unsigned long addr, int scope, int cache, unsigned long len)
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index 65c679ecbe6b..97fe472095f2 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -1004,6 +1004,7 @@
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_UTIME
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 3f7f466190b4..257bf0141775 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -406,94 +406,6 @@ _sys_sysmips(nabi_no_regargs struct pt_regs regs)
 	return -EINVAL;
 }
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, int, second,
-	unsigned long, third, void __user *, ptr, long, fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	switch (call) {
-	case SEMOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr,
-		                      second, NULL);
-	case SEMTIMEDOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr,
-				      second,
-				      (const struct timespec __user *)fifth);
-	case SEMGET:
-		return sys_semget(first, second, third);
-	case SEMCTL: {
-		union semun fourth;
-		if (!ptr)
-			return -EINVAL;
-		if (get_user(fourth.__pad, (void __user *__user *) ptr))
-			return -EFAULT;
-		return sys_semctl(first, second, third, fourth);
-	}
-
-	case MSGSND:
-		return sys_msgsnd(first, (struct msgbuf __user *) ptr,
-				  second, third);
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-			if (!ptr)
-				return -EINVAL;
-
-			if (copy_from_user(&tmp,
-					   (struct ipc_kludge __user *) ptr,
-					   sizeof(tmp)))
-				return -EFAULT;
-			return sys_msgrcv(first, tmp.msgp, second,
-					  tmp.msgtyp, third);
-		}
-		default:
-			return sys_msgrcv(first,
-					  (struct msgbuf __user *) ptr,
-					  second, fifth, third);
-		}
-	case MSGGET:
-		return sys_msgget((key_t) first, second);
-	case MSGCTL:
-		return sys_msgctl(first, second,
-				  (struct msqid_ds __user *) ptr);
-
-	case SHMAT:
-		switch (version) {
-		default: {
-			unsigned long raddr;
-			ret = do_shmat(first, (char __user *) ptr, second,
-				       &raddr);
-			if (ret)
-				return ret;
-			return put_user(raddr, (unsigned long __user *) third);
-		}
-		case 1:	/* iBCS2 emulator entry point */
-			if (!segment_eq(get_fs(), get_ds()))
-				return -EINVAL;
-			return do_shmat(first, (char __user *) ptr, second,
-				        (unsigned long *) third);
-		}
-	case SHMDT:
-		return sys_shmdt((char __user *)ptr);
-	case SHMGET:
-		return sys_shmget(first, second, third);
-	case SHMCTL:
-		return sys_shmctl(first, second,
-				  (struct shmid_ds __user *) ptr);
-	default:
-		return -ENOSYS;
-	}
-}
-
 /*
  * No implemented yet ...
  */
diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index d13a56e99bad..9d056f515929 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h
@@ -363,6 +363,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_SIGNAL
diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c
index bef69d6daf15..815f1355fad4 100644
--- a/arch/mn10300/kernel/sys_mn10300.c
+++ b/arch/mn10300/kernel/sys_mn10300.c
@@ -31,91 +31,3 @@ asmlinkage long old_mmap(unsigned long addr, unsigned long len,
 		return -EINVAL;
 	return sys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
 }
-
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage long sys_ipc(uint call, int first, int second,
-			int third, void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	switch (call) {
-	case SEMOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr,
-				      second, NULL);
-	case SEMTIMEDOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr,
-				      second,
-				      (const struct timespec __user *)fifth);
-	case SEMGET:
-		return sys_semget(first, second, third);
-	case SEMCTL: {
-		union semun fourth;
-		if (!ptr)
-			return -EINVAL;
-		if (get_user(fourth.__pad, (void __user * __user *) ptr))
-			return -EFAULT;
-		return sys_semctl(first, second, third, fourth);
-	}
-
-	case MSGSND:
-		return sys_msgsnd(first, (struct msgbuf __user *) ptr,
-				  second, third);
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-			if (!ptr)
-				return -EINVAL;
-
-			if (copy_from_user(&tmp,
-					   (struct ipc_kludge __user *) ptr,
-					   sizeof(tmp)))
-				return -EFAULT;
-			return sys_msgrcv(first, tmp.msgp, second,
-					  tmp.msgtyp, third);
-		}
-		default:
-			return sys_msgrcv(first,
-					  (struct msgbuf __user *) ptr,
-					   second, fifth, third);
-		}
-	case MSGGET:
-		return sys_msgget((key_t) first, second);
-	case MSGCTL:
-		return sys_msgctl(first, second,
-				   (struct msqid_ds __user *) ptr);
-
-	case SHMAT:
-		switch (version) {
-		default: {
-			ulong raddr;
-			ret = do_shmat(first, (char __user *) ptr, second,
-				       &raddr);
-			if (ret)
-				return ret;
-			return put_user(raddr, (ulong *) third);
-		}
-		case 1:	/* iBCS2 emulator entry point */
-			if (!segment_eq(get_fs(), get_ds()))
-				return -EINVAL;
-			return do_shmat(first, (char __user *) ptr, second,
-					(ulong *) third);
-		}
-	case SHMDT:
-		return sys_shmdt((char __user *)ptr);
-	case SHMGET:
-		return sys_shmget(first, second, third);
-	case SHMCTL:
-		return sys_shmctl(first, second,
-				  (struct shmid_ds __user *) ptr);
-	default:
-		return -EINVAL;
-	}
-}
diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
index eb8eb400c664..23bb74e7f946 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -35,8 +35,6 @@ asmlinkage long sys_pipe2(int __user *fildes, int flags);
 asmlinkage long sys_rt_sigaction(int sig,
 		const struct sigaction __user *act,
 		struct sigaction __user *oact, size_t sigsetsize);
-asmlinkage int sys_ipc(uint call, int first, unsigned long second,
-		long third, void __user *ptr, long fifth);
 asmlinkage long ppc64_personality(unsigned long personality);
 asmlinkage int ppc_rtas(struct rtas_args __user *uargs);
 asmlinkage time_t sys64_time(time_t __user * tloc);
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index f6ca76176766..c13821fe8741 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -364,6 +364,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_SIGNAL
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 3370e62e43d4..5251221e7a5a 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -42,100 +42,6 @@
 #include <asm/time.h>
 #include <asm/unistd.h>
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-int sys_ipc(uint call, int first, unsigned long second, long third,
-	    void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	ret = -ENOSYS;
-	switch (call) {
-	case SEMOP:
-		ret = sys_semtimedop(first, (struct sembuf __user *)ptr,
-				      (unsigned)second, NULL);
-		break;
-	case SEMTIMEDOP:
-		ret = sys_semtimedop(first, (struct sembuf __user *)ptr,
-				      (unsigned)second,
-				      (const struct timespec __user *) fifth);
-		break;
-	case SEMGET:
-		ret = sys_semget (first, (int)second, third);
-		break;
-	case SEMCTL: {
-		union semun fourth;
-
-		ret = -EINVAL;
-		if (!ptr)
-			break;
-		if ((ret = get_user(fourth.__pad, (void __user * __user *)ptr)))
-			break;
-		ret = sys_semctl(first, (int)second, third, fourth);
-		break;
-	}
-	case MSGSND:
-		ret = sys_msgsnd(first, (struct msgbuf __user *)ptr,
-				 (size_t)second, third);
-		break;
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-
-			ret = -EINVAL;
-			if (!ptr)
-				break;
-			if ((ret = copy_from_user(&tmp,
-						(struct ipc_kludge __user *) ptr,
-						sizeof (tmp)) ? -EFAULT : 0))
-				break;
-			ret = sys_msgrcv(first, tmp.msgp, (size_t) second,
-					  tmp.msgtyp, third);
-			break;
-		}
-		default:
-			ret = sys_msgrcv (first, (struct msgbuf __user *) ptr,
-					  (size_t)second, fifth, third);
-			break;
-		}
-		break;
-	case MSGGET:
-		ret = sys_msgget((key_t)first, (int)second);
-		break;
-	case MSGCTL:
-		ret = sys_msgctl(first, (int)second,
-				  (struct msqid_ds __user *)ptr);
-		break;
-	case SHMAT: {
-		ulong raddr;
-		ret = do_shmat(first, (char __user *)ptr, (int)second, &raddr);
-		if (ret)
-			break;
-		ret = put_user(raddr, (ulong __user *) third);
-		break;
-	}
-	case SHMDT:
-		ret = sys_shmdt((char __user *)ptr);
-		break;
-	case SHMGET:
-		ret = sys_shmget(first, (size_t)second, third);
-		break;
-	case SHMCTL:
-		ret = sys_shmctl(first, (int)second,
-				 (struct shmid_ds __user *)ptr);
-		break;
-	}
-
-	return ret;
-}
-
 static inline unsigned long do_mmap2(unsigned long addr, size_t len,
 			unsigned long prot, unsigned long flags,
 			unsigned long fd, unsigned long off, int shift)
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 5de54d2af0b2..15fd68b196c0 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -30,7 +30,7 @@ struct fadvise64_64_args;
 struct old_sigaction;
 
 long sys_mmap2(struct s390_mmap_arg_struct __user  *arg);
-long sys_ipc(uint call, int first, unsigned long second,
+long sys_s390_ipc(uint call, int first, unsigned long second,
 	     unsigned long third, void __user *ptr);
 long sys_s390_newuname(struct new_utsname __user *name);
 long sys_s390_personality(unsigned long personality);
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
index b2563509b5a9..b8b78092ab7c 100644
--- a/arch/s390/kernel/sys_s390.c
+++ b/arch/s390/kernel/sys_s390.c
@@ -64,7 +64,7 @@ out:
  *
  * This is really horribly ugly.
  */
-SYSCALL_DEFINE5(ipc, uint, call, int, first, unsigned long, second,
+SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second,
 		unsigned long, third, void __user *, ptr)
 {
         struct ipc_kludge tmp;
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 2a24766567af..990ac8b321c8 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -125,7 +125,7 @@ NI_SYSCALL							/* vm86old for i386 */
 SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4_wrapper)
 SYSCALL(sys_swapoff,sys_swapoff,sys32_swapoff_wrapper)		/* 115 */
 SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo_wrapper)
-SYSCALL(sys_ipc,sys_ipc,sys32_ipc_wrapper)
+SYSCALL(sys_s390_ipc,sys_s390_ipc,sys32_ipc_wrapper)
 SYSCALL(sys_fsync,sys_fsync,sys32_fsync_wrapper)
 SYSCALL(sys_sigreturn,sys_sigreturn,sys32_sigreturn)
 SYSCALL(sys_clone,sys_clone,sys_clone_wrapper)			/* 120 */
diff --git a/arch/sh/include/asm/syscalls.h b/arch/sh/include/asm/syscalls.h
index c1e2b8deb837..c1ce2862f7be 100644
--- a/arch/sh/include/asm/syscalls.h
+++ b/arch/sh/include/asm/syscalls.h
@@ -11,8 +11,6 @@ asmlinkage int old_mmap(unsigned long addr, unsigned long len,
 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 			  unsigned long prot, unsigned long flags,
 			  unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_ipc(uint call, int first, int second,
-		       int third, void __user *ptr, long fifth);
 asmlinkage int sys_uname(struct old_utsname __user *name);
 
 #ifdef CONFIG_SUPERH32
diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h
index 365744b05269..a48f65e2e429 100644
--- a/arch/sh/include/asm/unistd_32.h
+++ b/arch/sh/include/asm/unistd_32.h
@@ -358,6 +358,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_SIGNAL
diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h
index 25de158aac3a..7709b2b8f752 100644
--- a/arch/sh/include/asm/unistd_64.h
+++ b/arch/sh/include/asm/unistd_64.h
@@ -398,6 +398,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_SIGNAL
diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c
index 71399cde03b5..c18cfaa67fdd 100644
--- a/arch/sh/kernel/sys_sh.c
+++ b/arch/sh/kernel/sys_sh.c
@@ -53,110 +53,6 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 	return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
 }
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage int sys_ipc(uint call, int first, int second,
-		       int third, void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	if (call <= SEMTIMEDOP)
-		switch (call) {
-		case SEMOP:
-			return sys_semtimedop(first,
-					      (struct sembuf __user *)ptr,
-					      second, NULL);
-		case SEMTIMEDOP:
-			return sys_semtimedop(first,
-				(struct sembuf __user *)ptr, second,
-			        (const struct timespec __user *)fifth);
-		case SEMGET:
-			return sys_semget (first, second, third);
-		case SEMCTL: {
-			union semun fourth;
-			if (!ptr)
-				return -EINVAL;
-			if (get_user(fourth.__pad, (void __user * __user *) ptr))
-				return -EFAULT;
-			return sys_semctl (first, second, third, fourth);
-			}
-		default:
-			return -EINVAL;
-		}
-
-	if (call <= MSGCTL)
-		switch (call) {
-		case MSGSND:
-			return sys_msgsnd (first, (struct msgbuf __user *) ptr,
-					  second, third);
-		case MSGRCV:
-			switch (version) {
-			case 0:
-			{
-				struct ipc_kludge tmp;
-
-				if (!ptr)
-					return -EINVAL;
-
-				if (copy_from_user(&tmp,
-					(struct ipc_kludge __user *) ptr,
-						   sizeof (tmp)))
-					return -EFAULT;
-
-				return sys_msgrcv (first, tmp.msgp, second,
-						   tmp.msgtyp, third);
-			}
-			default:
-				return sys_msgrcv (first,
-						   (struct msgbuf __user *) ptr,
-						   second, fifth, third);
-			}
-		case MSGGET:
-			return sys_msgget ((key_t) first, second);
-		case MSGCTL:
-			return sys_msgctl (first, second,
-					   (struct msqid_ds __user *) ptr);
-		default:
-			return -EINVAL;
-		}
-	if (call <= SHMCTL)
-		switch (call) {
-		case SHMAT:
-			switch (version) {
-			default: {
-				ulong raddr;
-				ret = do_shmat (first, (char __user *) ptr,
-						 second, &raddr);
-				if (ret)
-					return ret;
-				return put_user (raddr, (ulong __user *) third);
-			}
-			case 1:	/* iBCS2 emulator entry point */
-				if (!segment_eq(get_fs(), get_ds()))
-					return -EINVAL;
-				return do_shmat (first, (char __user *) ptr,
-						  second, (ulong *) third);
-			}
-		case SHMDT:
-			return sys_shmdt ((char __user *)ptr);
-		case SHMGET:
-			return sys_shmget (first, second, third);
-		case SHMCTL:
-			return sys_shmctl (first, second,
-					   (struct shmid_ds __user *) ptr);
-		default:
-			return -EINVAL;
-		}
-
-	return -EINVAL;
-}
-
 /* sys_cacheflush -- flush (part of) the processor cache.  */
 asmlinkage int sys_cacheflush(unsigned long addr, unsigned long len, int op)
 {
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index cb4b9bfd0d87..d0b3b01ac9d4 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -432,7 +432,9 @@
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#ifndef __32bit_syscall_numbers__
+#ifdef __32bit_syscall_numbers__
+#define __ARCH_WANT_SYS_IPC
+#else
 #define __ARCH_WANT_COMPAT_SYS_TIME
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #endif
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index 3a82e65d8db2..ee995b7dae7e 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -98,119 +98,6 @@ out:
 	return error;
 }
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-
-asmlinkage int sys_ipc (uint call, int first, int second, int third, void __user *ptr, long fifth)
-{
-	int version, err;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	if (call <= SEMCTL)
-		switch (call) {
-		case SEMOP:
-			err = sys_semtimedop (first, (struct sembuf __user *)ptr, second, NULL);
-			goto out;
-		case SEMTIMEDOP:
-			err = sys_semtimedop (first, (struct sembuf __user *)ptr, second, (const struct timespec __user *) fifth);
-			goto out;
-		case SEMGET:
-			err = sys_semget (first, second, third);
-			goto out;
-		case SEMCTL: {
-			union semun fourth;
-			err = -EINVAL;
-			if (!ptr)
-				goto out;
-			err = -EFAULT;
-			if (get_user(fourth.__pad,
-				     (void __user * __user *)ptr))
-				goto out;
-			err = sys_semctl (first, second, third, fourth);
-			goto out;
-			}
-		default:
-			err = -ENOSYS;
-			goto out;
-		}
-	if (call <= MSGCTL) 
-		switch (call) {
-		case MSGSND:
-			err = sys_msgsnd (first, (struct msgbuf __user *) ptr, 
-					  second, third);
-			goto out;
-		case MSGRCV:
-			switch (version) {
-			case 0: {
-				struct ipc_kludge tmp;
-				err = -EINVAL;
-				if (!ptr)
-					goto out;
-				err = -EFAULT;
-				if (copy_from_user(&tmp, (struct ipc_kludge __user *) ptr, sizeof (tmp)))
-					goto out;
-				err = sys_msgrcv (first, tmp.msgp, second, tmp.msgtyp, third);
-				goto out;
-				}
-			case 1: default:
-				err = sys_msgrcv (first,
-						  (struct msgbuf __user *) ptr,
-						  second, fifth, third);
-				goto out;
-			}
-		case MSGGET:
-			err = sys_msgget ((key_t) first, second);
-			goto out;
-		case MSGCTL:
-			err = sys_msgctl (first, second, (struct msqid_ds __user *) ptr);
-			goto out;
-		default:
-			err = -ENOSYS;
-			goto out;
-		}
-	if (call <= SHMCTL) 
-		switch (call) {
-		case SHMAT:
-			switch (version) {
-			case 0: default: {
-				ulong raddr;
-				err = do_shmat (first, (char __user *) ptr, second, &raddr);
-				if (err)
-					goto out;
-				err = -EFAULT;
-				if (put_user (raddr, (ulong __user *) third))
-					goto out;
-				err = 0;
-				goto out;
-				}
-			case 1:	/* iBCS2 emulator entry point */
-				err = -EINVAL;
-				goto out;
-			}
-		case SHMDT: 
-			err = sys_shmdt ((char __user *)ptr);
-			goto out;
-		case SHMGET:
-			err = sys_shmget (first, second, third);
-			goto out;
-		case SHMCTL:
-			err = sys_shmctl (first, second, (struct shmid_ds __user *) ptr);
-			goto out;
-		default:
-			err = -ENOSYS;
-			goto out;
-		}
-	else
-		err = -ENOSYS;
-out:
-	return err;
-}
-
 int sparc_mmap_check(unsigned long addr, unsigned long len)
 {
 	if (ARCH_SUN4C &&
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index cb1bef6f14b7..45410e939628 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -426,7 +426,7 @@ out:
  * This is really horribly ugly.
  */
 
-SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second,
+SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second,
 		unsigned long, third, void __user *, ptr, long, fifth)
 {
 	long err;
diff --git a/arch/sparc/kernel/systbls.h b/arch/sparc/kernel/systbls.h
index 68312fe8da74..2c331c37e748 100644
--- a/arch/sparc/kernel/systbls.h
+++ b/arch/sparc/kernel/systbls.h
@@ -10,7 +10,7 @@ struct new_utsname;
 
 extern asmlinkage unsigned long sys_getpagesize(void);
 extern asmlinkage long sparc_pipe(struct pt_regs *regs);
-extern asmlinkage long sys_ipc(unsigned int call, int first,
+extern asmlinkage long sys_sparc_ipc(unsigned int call, int first,
 			       unsigned long second,
 			       unsigned long third,
 			       void __user *ptr, long fifth);
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 17614251fb6d..30ca2b1d3a17 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -136,7 +136,7 @@ sys_call_table:
 /*200*/	.word sys_ssetmask, sys_nis_syscall, sys_newlstat, sys_uselib, sys_nis_syscall
 	.word sys_readahead, sys_socketcall, sys_syslog, sys_lookup_dcookie, sys_fadvise64
 /*210*/	.word sys_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, sys_sysinfo
-	.word sys_ipc, sys_nis_syscall, sys_clone, sys_ioprio_get, sys_adjtimex
+	.word sys_sparc_ipc, sys_nis_syscall, sys_clone, sys_ioprio_get, sys_adjtimex
 /*220*/	.word sys_nis_syscall, sys_ni_syscall, sys_delete_module, sys_ni_syscall, sys_getpgid
 	.word sys_bdflush, sys_sysfs, sys_nis_syscall, sys_setfsuid, sys_setfsgid
 /*230*/	.word sys_select, sys_nis_syscall, sys_splice, sys_stime, sys_statfs64
diff --git a/arch/um/sys-i386/syscalls.c b/arch/um/sys-i386/syscalls.c
index d0aa8f125ee6..70ca357393b8 100644
--- a/arch/um/sys-i386/syscalls.c
+++ b/arch/um/sys-i386/syscalls.c
@@ -34,92 +34,6 @@ long sys_clone(unsigned long clone_flags, unsigned long newsp,
 	return ret;
 }
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-long sys_ipc (uint call, int first, int second,
-	     int third, void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	switch (call) {
-	case SEMOP:
-		return sys_semtimedop(first, (struct sembuf __user *) ptr,
-				      second, NULL);
-	case SEMTIMEDOP:
-		return sys_semtimedop(first, (struct sembuf __user *) ptr,
-				      second,
-				      (const struct timespec __user *) fifth);
-	case SEMGET:
-		return sys_semget (first, second, third);
-	case SEMCTL: {
-		union semun fourth;
-		if (!ptr)
-			return -EINVAL;
-		if (get_user(fourth.__pad, (void __user * __user *) ptr))
-			return -EFAULT;
-		return sys_semctl (first, second, third, fourth);
-	}
-
-	case MSGSND:
-		return sys_msgsnd (first, (struct msgbuf *) ptr,
-				   second, third);
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-			if (!ptr)
-				return -EINVAL;
-
-			if (copy_from_user(&tmp,
-					   (struct ipc_kludge *) ptr,
-					   sizeof (tmp)))
-				return -EFAULT;
-			return sys_msgrcv (first, tmp.msgp, second,
-					   tmp.msgtyp, third);
-		}
-		default:
-		        panic("msgrcv with version != 0");
-			return sys_msgrcv (first,
-					   (struct msgbuf *) ptr,
-					   second, fifth, third);
-		}
-	case MSGGET:
-		return sys_msgget ((key_t) first, second);
-	case MSGCTL:
-		return sys_msgctl (first, second, (struct msqid_ds *) ptr);
-
-	case SHMAT:
-		switch (version) {
-		default: {
-			ulong raddr;
-			ret = do_shmat (first, (char *) ptr, second, &raddr);
-			if (ret)
-				return ret;
-			return put_user (raddr, (ulong *) third);
-		}
-		case 1:	/* iBCS2 emulator entry point */
-			if (!segment_eq(get_fs(), get_ds()))
-				return -EINVAL;
-			return do_shmat (first, (char *) ptr, second, (ulong *) third);
-		}
-	case SHMDT:
-		return sys_shmdt ((char *)ptr);
-	case SHMGET:
-		return sys_shmget (first, second, third);
-	case SHMCTL:
-		return sys_shmctl (first, second,
-				   (struct shmid_ds *) ptr);
-	default:
-		return -ENOSYS;
-	}
-}
-
 long sys_sigaction(int sig, const struct old_sigaction __user *act,
 			 struct old_sigaction __user *oact)
 {
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 86ab6a0623fd..50f6a569f0d1 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -54,7 +54,6 @@ unsigned long sys_sigreturn(struct pt_regs *);
 struct oldold_utsname;
 struct old_utsname;
 
-asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
 asmlinkage int sys_uname(struct old_utsname __user *);
 asmlinkage int sys_olduname(struct oldold_utsname __user *);
 
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index daa65d9aae95..45e64a17b86e 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -354,6 +354,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_SGETMASK
 #define __ARCH_WANT_SYS_SIGNAL
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 7955e90c8341..8b5c348fdcf2 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -24,91 +24,6 @@
 
 #include <asm/syscalls.h>
 
-/*
- * sys_ipc() is the de-multiplexer for the SysV IPC calls..
- *
- * This is really horribly ugly.
- */
-asmlinkage int sys_ipc(uint call, int first, int second,
-			int third, void __user *ptr, long fifth)
-{
-	int version, ret;
-
-	version = call >> 16; /* hack for backward compatibility */
-	call &= 0xffff;
-
-	switch (call) {
-	case SEMOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr, second, NULL);
-	case SEMTIMEDOP:
-		return sys_semtimedop(first, (struct sembuf __user *)ptr, second,
-					(const struct timespec __user *)fifth);
-
-	case SEMGET:
-		return sys_semget(first, second, third);
-	case SEMCTL: {
-		union semun fourth;
-		if (!ptr)
-			return -EINVAL;
-		if (get_user(fourth.__pad, (void __user * __user *) ptr))
-			return -EFAULT;
-		return sys_semctl(first, second, third, fourth);
-	}
-
-	case MSGSND:
-		return sys_msgsnd(first, (struct msgbuf __user *) ptr,
-				   second, third);
-	case MSGRCV:
-		switch (version) {
-		case 0: {
-			struct ipc_kludge tmp;
-			if (!ptr)
-				return -EINVAL;
-
-			if (copy_from_user(&tmp,
-					   (struct ipc_kludge __user *) ptr,
-					   sizeof(tmp)))
-				return -EFAULT;
-			return sys_msgrcv(first, tmp.msgp, second,
-					   tmp.msgtyp, third);
-		}
-		default:
-			return sys_msgrcv(first,
-					   (struct msgbuf __user *) ptr,
-					   second, fifth, third);
-		}
-	case MSGGET:
-		return sys_msgget((key_t) first, second);
-	case MSGCTL:
-		return sys_msgctl(first, second, (struct msqid_ds __user *) ptr);
-
-	case SHMAT:
-		switch (version) {
-		default: {
-			ulong raddr;
-			ret = do_shmat(first, (char __user *) ptr, second, &raddr);
-			if (ret)
-				return ret;
-			return put_user(raddr, (ulong __user *) third);
-		}
-		case 1:	/* iBCS2 emulator entry point */
-			if (!segment_eq(get_fs(), get_ds()))
-				return -EINVAL;
-			/* The "(ulong *) third" is valid _only_ because of the kernel segment thing */
-			return do_shmat(first, (char __user *) ptr, second, (ulong *) third);
-		}
-	case SHMDT:
-		return sys_shmdt((char __user *)ptr);
-	case SHMGET:
-		return sys_shmget(first, second, third);
-	case SHMCTL:
-		return sys_shmctl(first, second,
-				   (struct shmid_ds __user *) ptr);
-	default:
-		return -ENOSYS;
-	}
-}
-
 /*
  * Old cruft
  */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b60907e3b0d5..fbb61ae70e06 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -684,6 +684,8 @@ asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg);
 asmlinkage long sys_shmget(key_t key, size_t size, int flag);
 asmlinkage long sys_shmdt(char __user *shmaddr);
 asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
+asmlinkage long sys_ipc(unsigned int call, int first, int second,
+		unsigned long third, void __user *ptr, long fifth);
 
 asmlinkage long sys_mq_open(const char __user *name, int oflag, mode_t mode, struct mq_attr __user *attr);
 asmlinkage long sys_mq_unlink(const char __user *name);
diff --git a/ipc/Makefile b/ipc/Makefile
index 4e1955ea815d..9075e172e52c 100644
--- a/ipc/Makefile
+++ b/ipc/Makefile
@@ -3,7 +3,7 @@
 #
 
 obj-$(CONFIG_SYSVIPC_COMPAT) += compat.o
-obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o ipcns_notifier.o
+obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o ipcns_notifier.o syscall.o
 obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o
 obj_mq-$(CONFIG_COMPAT) += compat_mq.o
 obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y)
diff --git a/ipc/syscall.c b/ipc/syscall.c
new file mode 100644
index 000000000000..355a3da9ec73
--- /dev/null
+++ b/ipc/syscall.c
@@ -0,0 +1,99 @@
+/*
+ * sys_ipc() is the old de-multiplexer for the SysV IPC calls.
+ *
+ * This is really horribly ugly, and new architectures should just wire up
+ * the individual syscalls instead.
+ */
+#include <linux/unistd.h>
+
+#ifdef __ARCH_WANT_SYS_IPC
+#include <linux/errno.h>
+#include <linux/ipc.h>
+#include <linux/shm.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+
+SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, int, second,
+		unsigned long, third, void __user *, ptr, long, fifth)
+{
+	int version, ret;
+
+	version = call >> 16; /* hack for backward compatibility */
+	call &= 0xffff;
+
+	switch (call) {
+	case SEMOP:
+		return sys_semtimedop(first, (struct sembuf __user *)ptr,
+				      second, NULL);
+	case SEMTIMEDOP:
+		return sys_semtimedop(first, (struct sembuf __user *)ptr,
+				      second,
+				      (const struct timespec __user *)fifth);
+
+	case SEMGET:
+		return sys_semget(first, second, third);
+	case SEMCTL: {
+		union semun fourth;
+		if (!ptr)
+			return -EINVAL;
+		if (get_user(fourth.__pad, (void __user * __user *) ptr))
+			return -EFAULT;
+		return sys_semctl(first, second, third, fourth);
+	}
+
+	case MSGSND:
+		return sys_msgsnd(first, (struct msgbuf __user *) ptr,
+				  second, third);
+	case MSGRCV:
+		switch (version) {
+		case 0: {
+			struct ipc_kludge tmp;
+			if (!ptr)
+				return -EINVAL;
+
+			if (copy_from_user(&tmp,
+					   (struct ipc_kludge __user *) ptr,
+					   sizeof(tmp)))
+				return -EFAULT;
+			return sys_msgrcv(first, tmp.msgp, second,
+					   tmp.msgtyp, third);
+		}
+		default:
+			return sys_msgrcv(first,
+					   (struct msgbuf __user *) ptr,
+					   second, fifth, third);
+		}
+	case MSGGET:
+		return sys_msgget((key_t) first, second);
+	case MSGCTL:
+		return sys_msgctl(first, second, (struct msqid_ds __user *)ptr);
+
+	case SHMAT:
+		switch (version) {
+		default: {
+			unsigned long raddr;
+			ret = do_shmat(first, (char __user *)ptr,
+				       second, &raddr);
+			if (ret)
+				return ret;
+			return put_user(raddr, (unsigned long __user *) third);
+		}
+		case 1:
+			/*
+			 * This was the entry point for kernel-originating calls
+			 * from iBCS2 in 2.2 days.
+			 */
+			return -EINVAL;
+		}
+	case SHMDT:
+		return sys_shmdt((char __user *)ptr);
+	case SHMGET:
+		return sys_shmget(first, second, third);
+	case SHMCTL:
+		return sys_shmctl(first, second,
+				   (struct shmid_ds __user *) ptr);
+	default:
+		return -ENOSYS;
+	}
+}
+#endif
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 695384f12a7d..70f2ea758ffe 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -126,6 +126,7 @@ cond_syscall(sys_setreuid16);
 cond_syscall(sys_setuid16);
 cond_syscall(sys_vm86old);
 cond_syscall(sys_vm86);
+cond_syscall(sys_ipc);
 cond_syscall(compat_sys_ipc);
 cond_syscall(compat_sys_sysctl);
 cond_syscall(sys_flock);
-- 
cgit v1.2.3


From 5cacdb4add1b1e50fe75edc50ebbb7bddd9cf5e7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 Mar 2010 15:21:21 -0800
Subject: Add generic sys_olduname()

Add generic implementations of the old and really old uname system calls.
Note that sh only implements sys_olduname but not sys_oldolduname, but I'm
not going to bother with another ifdef for that special case.

m32r implemented an old uname but never wired it up, so kill it, too.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: James Morris <jmorris@namei.org>
Cc: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m32r/kernel/sys_m32r.c       | 11 --------
 arch/mips/include/asm/unistd.h    |  1 +
 arch/mips/kernel/syscall.c        | 42 -----------------------------
 arch/powerpc/include/asm/unistd.h |  1 +
 arch/powerpc/kernel/syscalls.c    | 57 ---------------------------------------
 arch/sh/include/asm/syscalls.h    |  3 ---
 arch/sh/include/asm/unistd_32.h   |  1 +
 arch/sh/include/asm/unistd_64.h   |  1 +
 arch/sh/kernel/sys_sh.c           | 11 --------
 arch/um/kernel/syscall.c          | 45 -------------------------------
 arch/x86/ia32/ia32entry.S         |  4 +--
 arch/x86/ia32/sys_ia32.c          | 52 -----------------------------------
 arch/x86/include/asm/sys_ia32.h   |  5 ----
 arch/x86/include/asm/syscalls.h   |  7 -----
 arch/x86/include/asm/unistd_32.h  |  1 +
 arch/x86/include/asm/unistd_64.h  |  1 +
 arch/x86/kernel/sys_i386_32.c     | 49 ---------------------------------
 include/linux/syscalls.h          |  4 +++
 kernel/sys.c                      | 54 +++++++++++++++++++++++++++++++++++++
 19 files changed, 66 insertions(+), 284 deletions(-)

(limited to 'include')

diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c
index cf2e7279ce9b..0a00f467edfa 100644
--- a/arch/m32r/kernel/sys_m32r.c
+++ b/arch/m32r/kernel/sys_m32r.c
@@ -76,17 +76,6 @@ asmlinkage int sys_tas(int __user *addr)
 	return oldval;
 }
 
-asmlinkage int sys_uname(struct old_utsname __user * name)
-{
-	int err;
-	if (!name)
-		return -EFAULT;
-	down_read(&uts_sem);
-	err = copy_to_user(name, utsname(), sizeof (*name));
-	up_read(&uts_sem);
-	return err?-EFAULT:0;
-}
-
 asmlinkage int sys_cacheflush(void *addr, int bytes, int cache)
 {
 	/* This should flush more selectively ...  */
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index 97fe472095f2..1b5a6648eb86 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -1014,6 +1014,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_UNAME
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 257bf0141775..e96b1c30c7aa 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -215,48 +215,6 @@ out:
 	return error;
 }
 
-/*
- * Compacrapability ...
- */
-SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
-{
-	if (name && !copy_to_user(name, utsname(), sizeof (*name)))
-		return 0;
-	return -EFAULT;
-}
-
-/*
- * Compacrapability ...
- */
-SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
-{
-	int error;
-
-	if (!name)
-		return -EFAULT;
-	if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
-		return -EFAULT;
-
-	error = __copy_to_user(&name->sysname, &utsname()->sysname,
-			       __OLD_UTS_LEN);
-	error -= __put_user(0, name->sysname + __OLD_UTS_LEN);
-	error -= __copy_to_user(&name->nodename, &utsname()->nodename,
-				__OLD_UTS_LEN);
-	error -= __put_user(0, name->nodename + __OLD_UTS_LEN);
-	error -= __copy_to_user(&name->release, &utsname()->release,
-				__OLD_UTS_LEN);
-	error -= __put_user(0, name->release + __OLD_UTS_LEN);
-	error -= __copy_to_user(&name->version, &utsname()->version,
-				__OLD_UTS_LEN);
-	error -= __put_user(0, name->version + __OLD_UTS_LEN);
-	error -= __copy_to_user(&name->machine, &utsname()->machine,
-				__OLD_UTS_LEN);
-	error = __put_user(0, name->machine + __OLD_UTS_LEN);
-	error = error ? -EFAULT : 0;
-
-	return error;
-}
-
 SYSCALL_DEFINE1(set_thread_area, unsigned long, addr)
 {
 	struct thread_info *ti = task_thread_info(current);
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index c13821fe8741..f0a10266e7f7 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -377,6 +377,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_UNAME
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 69d3c5d50a54..f2496f2faecc 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -116,63 +116,6 @@ long ppc64_personality(unsigned long personality)
 }
 #endif
 
-#ifdef CONFIG_PPC64
-#define OVERRIDE_MACHINE    (personality(current->personality) == PER_LINUX32)
-#else
-#define OVERRIDE_MACHINE    0
-#endif
-
-static inline int override_machine(char __user *mach)
-{
-	if (OVERRIDE_MACHINE) {
-		/* change ppc64 to ppc */
-		if (__put_user(0, mach+3) || __put_user(0, mach+4))
-			return -EFAULT;
-	}
-	return 0;
-}
-
-int sys_uname(struct old_utsname __user *name)
-{
-	int err = 0;
-	
-	down_read(&uts_sem);
-	if (copy_to_user(name, utsname(), sizeof(*name)))
-		err = -EFAULT;
-	up_read(&uts_sem);
-	if (!err)
-		err = override_machine(name->machine);
-	return err;
-}
-
-int sys_olduname(struct oldold_utsname __user *name)
-{
-	int error;
-
-	if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
-		return -EFAULT;
-  
-	down_read(&uts_sem);
-	error = __copy_to_user(&name->sysname, &utsname()->sysname,
-			       __OLD_UTS_LEN);
-	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->release, &utsname()->release,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->release + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->version, &utsname()->version,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->version + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->machine, &utsname()->machine,
-				__OLD_UTS_LEN);
-	error |= override_machine(name->machine);
-	up_read(&uts_sem);
-
-	return error? -EFAULT: 0;
-}
-
 long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
 		      u32 len_high, u32 len_low)
 {
diff --git a/arch/sh/include/asm/syscalls.h b/arch/sh/include/asm/syscalls.h
index c1ce2862f7be..507725af2e54 100644
--- a/arch/sh/include/asm/syscalls.h
+++ b/arch/sh/include/asm/syscalls.h
@@ -3,15 +3,12 @@
 
 #ifdef __KERNEL__
 
-struct old_utsname;
-
 asmlinkage int old_mmap(unsigned long addr, unsigned long len,
 			unsigned long prot, unsigned long flags,
 			int fd, unsigned long off);
 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 			  unsigned long prot, unsigned long flags,
 			  unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_uname(struct old_utsname __user *name);
 
 #ifdef CONFIG_SUPERH32
 # include "syscalls_32.h"
diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h
index a48f65e2e429..0e7f0fc8f086 100644
--- a/arch/sh/include/asm/unistd_32.h
+++ b/arch/sh/include/asm/unistd_32.h
@@ -371,6 +371,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_UNAME
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h
index 7709b2b8f752..0580c33a1e04 100644
--- a/arch/sh/include/asm/unistd_64.h
+++ b/arch/sh/include/asm/unistd_64.h
@@ -411,6 +411,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_UNAME
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c
index c18cfaa67fdd..81f58371613d 100644
--- a/arch/sh/kernel/sys_sh.c
+++ b/arch/sh/kernel/sys_sh.c
@@ -93,14 +93,3 @@ asmlinkage int sys_cacheflush(unsigned long addr, unsigned long len, int op)
 	up_read(&current->mm->mmap_sem);
 	return 0;
 }
-
-asmlinkage int sys_uname(struct old_utsname __user *name)
-{
-	int err;
-	if (!name)
-		return -EFAULT;
-	down_read(&uts_sem);
-	err = copy_to_user(name, utsname(), sizeof(*name));
-	up_read(&uts_sem);
-	return err?-EFAULT:0;
-}
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index cccab850c27e..4393173923f5 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -51,51 +51,6 @@ long old_mmap(unsigned long addr, unsigned long len,
 	return err;
 }
 
-long sys_uname(struct old_utsname __user * name)
-{
-	long err;
-	if (!name)
-		return -EFAULT;
-	down_read(&uts_sem);
-	err = copy_to_user(name, utsname(), sizeof (*name));
-	up_read(&uts_sem);
-	return err?-EFAULT:0;
-}
-
-long sys_olduname(struct oldold_utsname __user * name)
-{
-	long error;
-
-	if (!name)
-		return -EFAULT;
-	if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
-		return -EFAULT;
-
-	down_read(&uts_sem);
-
-	error = __copy_to_user(&name->sysname, &utsname()->sysname,
-			       __OLD_UTS_LEN);
-	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->release, &utsname()->release,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->release + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->version, &utsname()->version,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->version + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->machine, &utsname()->machine,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->machine + __OLD_UTS_LEN);
-
-	up_read(&uts_sem);
-
-	error = error ? -EFAULT : 0;
-
-	return error;
-}
-
 int kernel_execve(const char *filename, char *const argv[], char *const envp[])
 {
 	mm_segment_t fs;
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 34f821802c23..59b4556a5b92 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -563,7 +563,7 @@ ia32_sys_call_table:
 	.quad quiet_ni_syscall			/* old mpx syscall holder */
 	.quad sys_setpgid
 	.quad quiet_ni_syscall			/* old ulimit syscall holder */
-	.quad sys32_olduname
+	.quad sys_olduname
 	.quad sys_umask		/* 60 */
 	.quad sys_chroot
 	.quad compat_sys_ustat
@@ -613,7 +613,7 @@ ia32_sys_call_table:
 	.quad compat_sys_newstat
 	.quad compat_sys_newlstat
 	.quad compat_sys_newfstat
-	.quad sys32_uname
+	.quad sys_uname
 	.quad stub32_iopl		/* 110 */
 	.quad sys_vhangup
 	.quad quiet_ni_syscall	/* old "idle" system call */
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 56c99f46e289..74c35431b7d8 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -448,58 +448,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd,
 	return ret;
 }
 
-asmlinkage long sys32_olduname(struct oldold_utsname __user *name)
-{
-	char *arch = "x86_64";
-	int err;
-
-	if (!name)
-		return -EFAULT;
-	if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
-		return -EFAULT;
-
-	down_read(&uts_sem);
-
-	err = __copy_to_user(&name->sysname, &utsname()->sysname,
-			     __OLD_UTS_LEN);
-	err |= __put_user(0, name->sysname+__OLD_UTS_LEN);
-	err |= __copy_to_user(&name->nodename, &utsname()->nodename,
-			      __OLD_UTS_LEN);
-	err |= __put_user(0, name->nodename+__OLD_UTS_LEN);
-	err |= __copy_to_user(&name->release, &utsname()->release,
-			      __OLD_UTS_LEN);
-	err |= __put_user(0, name->release+__OLD_UTS_LEN);
-	err |= __copy_to_user(&name->version, &utsname()->version,
-			      __OLD_UTS_LEN);
-	err |= __put_user(0, name->version+__OLD_UTS_LEN);
-
-	if (personality(current->personality) == PER_LINUX32)
-		arch = "i686";
-
-	err |= __copy_to_user(&name->machine, arch, strlen(arch) + 1);
-
-	up_read(&uts_sem);
-
-	err = err ? -EFAULT : 0;
-
-	return err;
-}
-
-long sys32_uname(struct old_utsname __user *name)
-{
-	int err;
-
-	if (!name)
-		return -EFAULT;
-	down_read(&uts_sem);
-	err = copy_to_user(name, utsname(), sizeof(*name));
-	up_read(&uts_sem);
-	if (personality(current->personality) == PER_LINUX32)
-		err |= copy_to_user(&name->machine, "i686", 5);
-
-	return err ? -EFAULT : 0;
-}
-
 asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv,
 			     compat_uptr_t __user *envp, struct pt_regs *regs)
 {
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 7d348d803669..3ad421784ae7 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -54,11 +54,6 @@ asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32);
 asmlinkage long sys32_personality(unsigned long);
 asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
 
-struct oldold_utsname;
-struct old_utsname;
-asmlinkage long sys32_olduname(struct oldold_utsname __user *);
-long sys32_uname(struct old_utsname __user *);
-
 asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *,
 			     compat_uptr_t __user *, struct pt_regs *);
 asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 47cd606c3537..5c044b43e9a7 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -50,13 +50,6 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
 			     struct old_sigaction __user *);
 unsigned long sys_sigreturn(struct pt_regs *);
 
-/* kernel/sys_i386_32.c */
-struct oldold_utsname;
-struct old_utsname;
-
-asmlinkage int sys_uname(struct old_utsname __user *);
-asmlinkage int sys_olduname(struct oldold_utsname __user *);
-
 /* kernel/vm86_32.c */
 int sys_vm86old(struct vm86_struct __user *, struct pt_regs *);
 int sys_vm86(unsigned long, unsigned long, struct pt_regs *);
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 45e64a17b86e..beb9b5f8f8a4 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -367,6 +367,7 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_UNAME
 #define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLD_SELECT
 #define __ARCH_WANT_SYS_OLDUMOUNT
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 83e2d6dc5038..ff4307b0e81e 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -680,6 +680,7 @@ __SYSCALL(__NR_recvmmsg, sys_recvmmsg)
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_UNAME
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 8b5c348fdcf2..196552bb412c 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -24,55 +24,6 @@
 
 #include <asm/syscalls.h>
 
-/*
- * Old cruft
- */
-asmlinkage int sys_uname(struct old_utsname __user *name)
-{
-	int err;
-	if (!name)
-		return -EFAULT;
-	down_read(&uts_sem);
-	err = copy_to_user(name, utsname(), sizeof(*name));
-	up_read(&uts_sem);
-	return err? -EFAULT:0;
-}
-
-asmlinkage int sys_olduname(struct oldold_utsname __user *name)
-{
-	int error;
-
-	if (!name)
-		return -EFAULT;
-	if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
-		return -EFAULT;
-
-	down_read(&uts_sem);
-
-	error = __copy_to_user(&name->sysname, &utsname()->sysname,
-			       __OLD_UTS_LEN);
-	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->release, &utsname()->release,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->release + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->version, &utsname()->version,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->version + __OLD_UTS_LEN);
-	error |= __copy_to_user(&name->machine, &utsname()->machine,
-				__OLD_UTS_LEN);
-	error |= __put_user(0, name->machine + __OLD_UTS_LEN);
-
-	up_read(&uts_sem);
-
-	error = error ? -EFAULT : 0;
-
-	return error;
-}
-
-
 /*
  * Do a system call from kernel instead of calling sys_execve so we
  * end up with proper pt_regs.
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index fbb61ae70e06..44f2ad0e8825 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -31,6 +31,8 @@ struct msqid_ds;
 struct new_utsname;
 struct nfsctl_arg;
 struct __old_kernel_stat;
+struct oldold_utsname;
+struct old_utsname;
 struct pollfd;
 struct rlimit;
 struct rusage;
@@ -655,6 +657,8 @@ asmlinkage long sys_gethostname(char __user *name, int len);
 asmlinkage long sys_sethostname(char __user *name, int len);
 asmlinkage long sys_setdomainname(char __user *name, int len);
 asmlinkage long sys_newuname(struct new_utsname __user *name);
+asmlinkage long sys_uname(struct old_utsname __user *);
+asmlinkage long sys_olduname(struct oldold_utsname __user *);
 
 asmlinkage long sys_getrlimit(unsigned int resource,
 				struct rlimit __user *rlim);
diff --git a/kernel/sys.c b/kernel/sys.c
index e483eb5530e4..8298878f4f71 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1138,6 +1138,60 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
 	return errno;
 }
 
+#ifdef __ARCH_WANT_SYS_OLD_UNAME
+/*
+ * Old cruft
+ */
+SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
+{
+	int error = 0;
+
+	if (!name)
+		return -EFAULT;
+
+	down_read(&uts_sem);
+	if (copy_to_user(name, utsname(), sizeof(*name)))
+		error = -EFAULT;
+	up_read(&uts_sem);
+
+	if (!error && override_architecture(name))
+		error = -EFAULT;
+	return error;
+}
+
+SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
+{
+	int error;
+
+	if (!name)
+		return -EFAULT;
+	if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
+		return -EFAULT;
+
+	down_read(&uts_sem);
+	error = __copy_to_user(&name->sysname, &utsname()->sysname,
+			       __OLD_UTS_LEN);
+	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
+				__OLD_UTS_LEN);
+	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->release, &utsname()->release,
+				__OLD_UTS_LEN);
+	error |= __put_user(0, name->release + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->version, &utsname()->version,
+				__OLD_UTS_LEN);
+	error |= __put_user(0, name->version + __OLD_UTS_LEN);
+	error |= __copy_to_user(&name->machine, &utsname()->machine,
+				__OLD_UTS_LEN);
+	error |= __put_user(0, name->machine + __OLD_UTS_LEN);
+	up_read(&uts_sem);
+
+	if (!error && override_architecture(name))
+		error = -EFAULT;
+	return error ? -EFAULT : 0;
+}
+#endif
+
 SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
 {
 	int errno;
-- 
cgit v1.2.3


From 6e3e37a5a7c91045497553bcbd6a5faea98b21b3 Mon Sep 17 00:00:00 2001
From: André Goddard Rosa <andre.goddard@gmail.com>
Date: Wed, 10 Mar 2010 15:21:22 -0800
Subject: coredump: plug a memory leak situation on dump_seek()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After having started writing the coredump, if filesystem reports an error
anytime while writing part of the core file, we would leak a memory page
when bailing out.

Signed-off-by: André Goddard Rosa <andre.goddard@gmail.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Roland McGrath <roland@redhat.com>
Cc: WANG Cong <amwang@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/coredump.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index b3c91d7cede4..8ba66a9d9022 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -16,6 +16,8 @@ static inline int dump_write(struct file *file, const void *addr, int nr)
 
 static inline int dump_seek(struct file *file, loff_t off)
 {
+	int ret = 1;
+
 	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
 		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
 			return 0;
@@ -29,13 +31,15 @@ static inline int dump_seek(struct file *file, loff_t off)
 
 			if (n > PAGE_SIZE)
 				n = PAGE_SIZE;
-			if (!dump_write(file, buf, n))
-				return 0;
+			if (!dump_write(file, buf, n)) {
+				ret = 0;
+				break;
+			}
 			off -= n;
 		}
 		free_page((unsigned long)buf);
 	}
-	return 1;
+	return ret;
 }
 
 #endif /* _LINUX_COREDUMP_H */
-- 
cgit v1.2.3


From c1c341a060da1bd66a1982198b1a99765b07b8a2 Mon Sep 17 00:00:00 2001
From: Jaya Kumar <jayakumar.lkml@gmail.com>
Date: Wed, 10 Mar 2010 15:21:24 -0800
Subject: broadsheetfb: add multiple panel type support

Update broadsheetfb to add support for multiple panel types.  The 3.7" and
6" are known to work but the 9.7" is untested due to lack of hardware.

Signed-off-by: Jaya Kumar <jayakumar.lkml@gmail.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mach-pxa/am300epd.c |   2 +-
 drivers/video/broadsheetfb.c | 127 +++++++++++++++++++++++++++++++++++++------
 include/video/broadsheetfb.h |   1 +
 3 files changed, 112 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-pxa/am300epd.c b/arch/arm/mach-pxa/am300epd.c
index 4bd10a17332e..993d75e66390 100644
--- a/arch/arm/mach-pxa/am300epd.c
+++ b/arch/arm/mach-pxa/am300epd.c
@@ -288,7 +288,7 @@ int __init am300_init(void)
 }
 
 module_param(panel_type, uint, 0);
-MODULE_PARM_DESC(panel_type, "Select the panel type: 6, 8, 97");
+MODULE_PARM_DESC(panel_type, "Select the panel type: 37, 6, 97");
 
 MODULE_DESCRIPTION("board driver for am300 epd kit");
 MODULE_AUTHOR("Jaya Kumar");
diff --git a/drivers/video/broadsheetfb.c b/drivers/video/broadsheetfb.c
index df9ccb901d86..8f011062dec5 100644
--- a/drivers/video/broadsheetfb.c
+++ b/drivers/video/broadsheetfb.c
@@ -33,7 +33,60 @@
 
 #include <video/broadsheetfb.h>
 
-/* Display specific information */
+/* track panel specific parameters */
+struct panel_info {
+	int w;
+	int h;
+	u16 sdcfg;
+	u16 gdcfg;
+	u16 lutfmt;
+	u16 fsynclen;
+	u16 fendfbegin;
+	u16 lsynclen;
+	u16 lendlbegin;
+	u16 pixclk;
+};
+
+/* table of panel specific parameters to be indexed into by the board drivers */
+static struct panel_info panel_table[] = {
+	{	/* standard 6" on TFT backplane */
+		.w = 800,
+		.h = 600,
+		.sdcfg = (100 | (1 << 8) | (1 << 9)),
+		.gdcfg = 2,
+		.lutfmt = (4 | (1 << 7)),
+		.fsynclen = 4,
+		.fendfbegin = (10 << 8) | 4,
+		.lsynclen = 10,
+		.lendlbegin = (100 << 8) | 4,
+		.pixclk = 6,
+	},
+	{	/* custom 3.7" flexible on PET or steel */
+		.w = 320,
+		.h = 240,
+		.sdcfg = (67 | (0 << 8) | (0 << 9) | (0 << 10) | (0 << 12)),
+		.gdcfg = 3,
+		.lutfmt = (4 | (1 << 7)),
+		.fsynclen = 0,
+		.fendfbegin = (80 << 8) | 4,
+		.lsynclen = 10,
+		.lendlbegin = (80 << 8) | 20,
+		.pixclk = 14,
+	},
+	{	/* standard 9.7" on TFT backplane */
+		.w = 1200,
+		.h = 825,
+		.sdcfg = (100 | (1 << 8) | (1 << 9) | (0 << 10) | (0 << 12)),
+		.gdcfg = 2,
+		.lutfmt = (4 | (1 << 7)),
+		.fsynclen = 0,
+		.fendfbegin = (4 << 8) | 4,
+		.lsynclen = 4,
+		.lendlbegin = (60 << 8) | 10,
+		.pixclk = 3,
+	},
+};
+
 #define DPY_W 800
 #define DPY_H 600
 
@@ -160,6 +213,14 @@ static void broadsheet_write_reg(struct broadsheetfb_par *par, u16 reg,
 	par->board->set_ctl(par, BS_CS, 1);
 }
 
+static void broadsheet_write_reg32(struct broadsheetfb_par *par, u16 reg,
+					u32 data)
+{
+	broadsheet_write_reg(par, reg, cpu_to_le32(data) & 0xFFFF);
+	broadsheet_write_reg(par, reg + 2, (cpu_to_le32(data) >> 16) & 0xFFFF);
+}
+
+
 static u16 broadsheet_read_reg(struct broadsheetfb_par *par, u16 reg)
 {
 	broadsheet_send_command(par, reg);
@@ -170,24 +231,28 @@ static u16 broadsheet_read_reg(struct broadsheetfb_par *par, u16 reg)
 static void __devinit broadsheet_init_display(struct broadsheetfb_par *par)
 {
 	u16 args[5];
-
-	args[0] = DPY_W;
-	args[1] = DPY_H;
-	args[2] = (100 | (1 << 8) | (1 << 9)); /* sdcfg */
-	args[3] = 2; /* gdrv cfg */
-	args[4] = (4 | (1 << 7)); /* lut index format */
+	int xres = par->info->var.xres;
+	int yres = par->info->var.yres;
+
+	args[0] = panel_table[par->panel_index].w;
+	args[1] = panel_table[par->panel_index].h;
+	args[2] = panel_table[par->panel_index].sdcfg;
+	args[3] = panel_table[par->panel_index].gdcfg;
+	args[4] = panel_table[par->panel_index].lutfmt;
 	broadsheet_send_cmdargs(par, BS_CMD_INIT_DSPE_CFG, 5, args);
 
 	/* did the controller really set it? */
 	broadsheet_send_cmdargs(par, BS_CMD_INIT_DSPE_CFG, 5, args);
 
-	args[0] = 4; /* fsync len */
-	args[1] = (10 << 8) | 4; /* fend/fbegin len */
-	args[2] = 10; /* line sync len */
-	args[3] = (100 << 8) | 4; /* line end/begin len */
-	args[4] = 6; /* pixel clock cfg */
+	args[0] = panel_table[par->panel_index].fsynclen;
+	args[1] = panel_table[par->panel_index].fendfbegin;
+	args[2] = panel_table[par->panel_index].lsynclen;
+	args[3] = panel_table[par->panel_index].lendlbegin;
+	args[4] = panel_table[par->panel_index].pixclk;
 	broadsheet_send_cmdargs(par, BS_CMD_INIT_DSPE_TMG, 5, args);
 
+	broadsheet_write_reg32(par, 0x310, xres*yres*2);
+
 	/* setup waveform */
 	args[0] = 0x886;
 	args[1] = 0;
@@ -207,8 +272,9 @@ static void __devinit broadsheet_init_display(struct broadsheetfb_par *par)
 	args[0] = 0x154;
 	broadsheet_send_cmdargs(par, BS_CMD_WR_REG, 1, args);
 
-	broadsheet_burst_write(par, DPY_W*DPY_H/2,
-				(u16 *) par->info->screen_base);
+	broadsheet_burst_write(par, (panel_table[par->panel_index].w *
+					panel_table[par->panel_index].h)/2,
+					(u16 *) par->info->screen_base);
 
 	broadsheet_send_command(par, BS_CMD_LD_IMG_END);
 
@@ -277,8 +343,9 @@ static void broadsheetfb_dpy_update(struct broadsheetfb_par *par)
 
 	args[0] = 0x154;
 	broadsheet_send_cmdargs(par, BS_CMD_WR_REG, 1, args);
-	broadsheet_burst_write(par, DPY_W*DPY_H/2,
-				(u16 *) par->info->screen_base);
+	broadsheet_burst_write(par, (panel_table[par->panel_index].w *
+					panel_table[par->panel_index].h)/2,
+					(u16 *) par->info->screen_base);
 
 	broadsheet_send_command(par, BS_CMD_LD_IMG_END);
 
@@ -436,6 +503,8 @@ static int __devinit broadsheetfb_probe(struct platform_device *dev)
 	unsigned char *videomemory;
 	struct broadsheetfb_par *par;
 	int i;
+	int dpyw, dpyh;
+	int panel_index;
 
 	/* pick up board specific routines */
 	board = dev->dev.platform_data;
@@ -450,7 +519,24 @@ static int __devinit broadsheetfb_probe(struct platform_device *dev)
 	if (!info)
 		goto err;
 
-	videomemorysize = (DPY_W*DPY_H);
+	switch (board->get_panel_type()) {
+	case 37:
+		panel_index = 1;
+		break;
+	case 97:
+		panel_index = 2;
+		break;
+	case 6:
+	default:
+		panel_index = 0;
+		break;
+	}
+
+	dpyw = panel_table[panel_index].w;
+	dpyh = panel_table[panel_index].h;
+
+	videomemorysize = roundup((dpyw*dpyh), PAGE_SIZE);
+
 	videomemory = vmalloc(videomemorysize);
 	if (!videomemory)
 		goto err_fb_rel;
@@ -460,10 +546,17 @@ static int __devinit broadsheetfb_probe(struct platform_device *dev)
 	info->screen_base = (char *)videomemory;
 	info->fbops = &broadsheetfb_ops;
 
+	broadsheetfb_var.xres = dpyw;
+	broadsheetfb_var.yres = dpyh;
+	broadsheetfb_var.xres_virtual = dpyw;
+	broadsheetfb_var.yres_virtual = dpyh;
 	info->var = broadsheetfb_var;
+
+	broadsheetfb_fix.line_length = dpyw;
 	info->fix = broadsheetfb_fix;
 	info->fix.smem_len = videomemorysize;
 	par = info->par;
+	par->panel_index = panel_index;
 	par->info = info;
 	par->board = board;
 	par->write_reg = broadsheet_write_reg;
diff --git a/include/video/broadsheetfb.h b/include/video/broadsheetfb.h
index a758534c0272..a2c2829ceb38 100644
--- a/include/video/broadsheetfb.h
+++ b/include/video/broadsheetfb.h
@@ -41,6 +41,7 @@ struct broadsheetfb_par {
 	void (*write_reg)(struct broadsheetfb_par *, u16 reg, u16 val);
 	u16 (*read_reg)(struct broadsheetfb_par *, u16 reg);
 	wait_queue_head_t waitq;
+	int panel_index;
 };
 
 /* board specific routines */
-- 
cgit v1.2.3


From 2afb18981739a1426af2a6c952e03c5966b3dfc6 Mon Sep 17 00:00:00 2001
From: Jaya Kumar <jayakumar.lkml@gmail.com>
Date: Wed, 10 Mar 2010 15:21:41 -0800
Subject: broadsheetfb: add MMIO hooks

Allow boards with GP-MMIO controllers to provide hooks to broadsheetfb in
order to offload cmd/data writes and data reads instead of relying only on
host based GPIO wiggling.

Signed-off-by: Jaya Kumar <jayakumar.lkml@gmail.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/broadsheetfb.c | 110 ++++++++++++++++++++++++++++++++++++-------
 include/video/broadsheetfb.h |  17 +++++--
 2 files changed, 107 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/video/broadsheetfb.c b/drivers/video/broadsheetfb.c
index 8f011062dec5..57dc1a0065c2 100644
--- a/drivers/video/broadsheetfb.c
+++ b/drivers/video/broadsheetfb.c
@@ -115,30 +115,30 @@ static struct fb_var_screeninfo broadsheetfb_var __devinitdata = {
 };
 
 /* main broadsheetfb functions */
-static void broadsheet_issue_data(struct broadsheetfb_par *par, u16 data)
+static void broadsheet_gpio_issue_data(struct broadsheetfb_par *par, u16 data)
 {
 	par->board->set_ctl(par, BS_WR, 0);
 	par->board->set_hdb(par, data);
 	par->board->set_ctl(par, BS_WR, 1);
 }
 
-static void broadsheet_issue_cmd(struct broadsheetfb_par *par, u16 data)
+static void broadsheet_gpio_issue_cmd(struct broadsheetfb_par *par, u16 data)
 {
 	par->board->set_ctl(par, BS_DC, 0);
-	broadsheet_issue_data(par, data);
+	broadsheet_gpio_issue_data(par, data);
 }
 
-static void broadsheet_send_command(struct broadsheetfb_par *par, u16 data)
+static void broadsheet_gpio_send_command(struct broadsheetfb_par *par, u16 data)
 {
 	par->board->wait_for_rdy(par);
 
 	par->board->set_ctl(par, BS_CS, 0);
-	broadsheet_issue_cmd(par, data);
+	broadsheet_gpio_issue_cmd(par, data);
 	par->board->set_ctl(par, BS_DC, 1);
 	par->board->set_ctl(par, BS_CS, 1);
 }
 
-static void broadsheet_send_cmdargs(struct broadsheetfb_par *par, u16 cmd,
+static void broadsheet_gpio_send_cmdargs(struct broadsheetfb_par *par, u16 cmd,
 					int argc, u16 *argv)
 {
 	int i;
@@ -146,15 +146,43 @@ static void broadsheet_send_cmdargs(struct broadsheetfb_par *par, u16 cmd,
 	par->board->wait_for_rdy(par);
 
 	par->board->set_ctl(par, BS_CS, 0);
-	broadsheet_issue_cmd(par, cmd);
+	broadsheet_gpio_issue_cmd(par, cmd);
 	par->board->set_ctl(par, BS_DC, 1);
 
 	for (i = 0; i < argc; i++)
-		broadsheet_issue_data(par, argv[i]);
+		broadsheet_gpio_issue_data(par, argv[i]);
 	par->board->set_ctl(par, BS_CS, 1);
 }
 
-static void broadsheet_burst_write(struct broadsheetfb_par *par, int size,
+static void broadsheet_mmio_send_cmdargs(struct broadsheetfb_par *par, u16 cmd,
+				    int argc, u16 *argv)
+{
+	int i;
+
+	par->board->mmio_write(par, BS_MMIO_CMD, cmd);
+
+	for (i = 0; i < argc; i++)
+		par->board->mmio_write(par, BS_MMIO_DATA, argv[i]);
+}
+
+static void broadsheet_send_command(struct broadsheetfb_par *par, u16 data)
+{
+	if (par->board->mmio_write)
+		par->board->mmio_write(par, BS_MMIO_CMD, data);
+	else
+		broadsheet_gpio_send_command(par, data);
+}
+
+static void broadsheet_send_cmdargs(struct broadsheetfb_par *par, u16 cmd,
+				    int argc, u16 *argv)
+{
+	if (par->board->mmio_write)
+		broadsheet_mmio_send_cmdargs(par, cmd, argc, argv);
+	else
+		broadsheet_gpio_send_cmdargs(par, cmd, argc, argv);
+}
+
+static void broadsheet_gpio_burst_write(struct broadsheetfb_par *par, int size,
 					u16 *data)
 {
 	int i;
@@ -174,7 +202,30 @@ static void broadsheet_burst_write(struct broadsheetfb_par *par, int size,
 	par->board->set_ctl(par, BS_CS, 1);
 }
 
-static u16 broadsheet_get_data(struct broadsheetfb_par *par)
+static void broadsheet_mmio_burst_write(struct broadsheetfb_par *par, int size,
+				   u16 *data)
+{
+	int i;
+	u16 tmp;
+
+	for (i = 0; i < size; i++) {
+		tmp = (data[i] & 0x0F) << 4;
+		tmp |= (data[i] & 0x0F00) << 4;
+		par->board->mmio_write(par, BS_MMIO_DATA, tmp);
+	}
+
+}
+
+static void broadsheet_burst_write(struct broadsheetfb_par *par, int size,
+				   u16 *data)
+{
+	if (par->board->mmio_write)
+		broadsheet_mmio_burst_write(par, size, data);
+	else
+		broadsheet_gpio_burst_write(par, size, data);
+}
+
+static u16 broadsheet_gpio_get_data(struct broadsheetfb_par *par)
 {
 	u16 res;
 	/* wait for ready to go hi. (lo is busy) */
@@ -194,7 +245,16 @@ static u16 broadsheet_get_data(struct broadsheetfb_par *par)
 	return res;
 }
 
-static void broadsheet_write_reg(struct broadsheetfb_par *par, u16 reg,
+
+static u16 broadsheet_get_data(struct broadsheetfb_par *par)
+{
+	if (par->board->mmio_read)
+		return par->board->mmio_read(par);
+	else
+		return broadsheet_gpio_get_data(par);
+}
+
+static void broadsheet_gpio_write_reg(struct broadsheetfb_par *par, u16 reg,
 					u16 data)
 {
 	/* wait for ready to go hi. (lo is busy) */
@@ -203,16 +263,34 @@ static void broadsheet_write_reg(struct broadsheetfb_par *par, u16 reg,
 	/* cs lo, dc lo for cmd, we lo for each data, db as usual */
 	par->board->set_ctl(par, BS_CS, 0);
 
-	broadsheet_issue_cmd(par, BS_CMD_WR_REG);
+	broadsheet_gpio_issue_cmd(par, BS_CMD_WR_REG);
 
 	par->board->set_ctl(par, BS_DC, 1);
 
-	broadsheet_issue_data(par, reg);
-	broadsheet_issue_data(par, data);
+	broadsheet_gpio_issue_data(par, reg);
+	broadsheet_gpio_issue_data(par, data);
 
 	par->board->set_ctl(par, BS_CS, 1);
 }
 
+static void broadsheet_mmio_write_reg(struct broadsheetfb_par *par, u16 reg,
+				 u16 data)
+{
+	par->board->mmio_write(par, BS_MMIO_CMD, BS_CMD_WR_REG);
+	par->board->mmio_write(par, BS_MMIO_DATA, reg);
+	par->board->mmio_write(par, BS_MMIO_DATA, data);
+
+}
+
+static void broadsheet_write_reg(struct broadsheetfb_par *par, u16 reg,
+					u16 data)
+{
+	if (par->board->mmio_write)
+		broadsheet_mmio_write_reg(par, reg, data);
+	else
+		broadsheet_gpio_write_reg(par, reg, data);
+}
+
 static void broadsheet_write_reg32(struct broadsheetfb_par *par, u16 reg,
 					u32 data)
 {
@@ -223,8 +301,8 @@ static void broadsheet_write_reg32(struct broadsheetfb_par *par, u16 reg,
 
 static u16 broadsheet_read_reg(struct broadsheetfb_par *par, u16 reg)
 {
-	broadsheet_send_command(par, reg);
-	msleep(100);
+	broadsheet_send_cmdargs(par, BS_CMD_RD_REG, 1, &reg);
+	par->board->wait_for_rdy(par);
 	return broadsheet_get_data(par);
 }
 
diff --git a/include/video/broadsheetfb.h b/include/video/broadsheetfb.h
index a2c2829ceb38..d65b6689e92c 100644
--- a/include/video/broadsheetfb.h
+++ b/include/video/broadsheetfb.h
@@ -34,6 +34,10 @@
 #define BS_DC 	0x02
 #define BS_WR 	0x03
 
+/* Broadsheet IO interface specific defines */
+#define BS_MMIO_CMD	0x01
+#define BS_MMIO_DATA	0x02
+
 /* struct used by broadsheet. board specific stuff comes from *board */
 struct broadsheetfb_par {
 	struct fb_info *info;
@@ -49,12 +53,17 @@ struct broadsheet_board {
 	struct module *owner;
 	int (*init)(struct broadsheetfb_par *);
 	int (*wait_for_rdy)(struct broadsheetfb_par *);
-	void (*set_ctl)(struct broadsheetfb_par *, unsigned char, u8);
-	void (*set_hdb)(struct broadsheetfb_par *, u16);
-	u16 (*get_hdb)(struct broadsheetfb_par *);
 	void (*cleanup)(struct broadsheetfb_par *);
 	int (*get_panel_type)(void);
 	int (*setup_irq)(struct fb_info *);
-};
 
+	/* Functions for boards that use GPIO */
+	void (*set_ctl)(struct broadsheetfb_par *, unsigned char, u8);
+	void (*set_hdb)(struct broadsheetfb_par *, u16);
+	u16 (*get_hdb)(struct broadsheetfb_par *);
+
+	/* Functions for boards that have specialized MMIO */
+	void (*mmio_write)(struct broadsheetfb_par *, int type, u16);
+	u16 (*mmio_read)(struct broadsheetfb_par *);
+};
 #endif
-- 
cgit v1.2.3


From b32bfc3843f7fd090d30853d98866d923cfb8fff Mon Sep 17 00:00:00 2001
From: Jaya Kumar <jayakumar.lkml@gmail.com>
Date: Wed, 10 Mar 2010 15:21:43 -0800
Subject: broadsheetfb: support storing waveform

This patch adds waveform storing capability to broadsheetfb. It uses the
firmware class to retrieve the waveform, and the request to initiate the
waveform storing is done via a driver sysfs entry, loadstore_waveform.

Broadsheet is a framebuffer device.  It is slightly different from a
typical framebuffer controller that drives a normal TFT-LCD display.  Most
E-Ink display panels require a waveform in order to function.  That is, in
order to drive the state of a pixel to black, gray, or white, a specific
waveform is utilized.  Basically, that waveform represents the specific
E-field wiggling needed to get the pixel to its optimal state given
current temperature, and its previous state.  TN/IPS-LCDs use a similar
concept but the driving waveform is sufficiently simple that it is
internalized in the TFT source/gate driver.

These E-Ink waveforms are specific to a production batch.  That is, a
batch of display films are produced, then they get characterized and a
waveform is generated for that batch.  Broadsheet, typically, is attached
to its private SPI flash which is then flashed with this waveform.

Users won't be able to see the waveform and typically won't ever need to
know about it.  If however, the display panel attached to broadsheet is
changed out, then they will need to update their waveform.  That would
typically be done at a factory or repair facility rather than by a user.

[akpm@linux-foundation.org: fix printk warning]
Signed-off-by: Jaya Kumar <jayakumar.lkml@gmail.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/broadsheetfb.c | 501 ++++++++++++++++++++++++++++++++++++++++++-
 include/video/broadsheetfb.h |   5 +
 2 files changed, 505 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/video/broadsheetfb.c b/drivers/video/broadsheetfb.c
index 57dc1a0065c2..ebda6876d3a9 100644
--- a/drivers/video/broadsheetfb.c
+++ b/drivers/video/broadsheetfb.c
@@ -29,6 +29,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/list.h>
+#include <linux/firmware.h>
 #include <linux/uaccess.h>
 
 #include <video/broadsheetfb.h>
@@ -306,6 +307,473 @@ static u16 broadsheet_read_reg(struct broadsheetfb_par *par, u16 reg)
 	return broadsheet_get_data(par);
 }
 
+/* functions for waveform manipulation */
+static int is_broadsheet_pll_locked(struct broadsheetfb_par *par)
+{
+	return broadsheet_read_reg(par, 0x000A) & 0x0001;
+}
+
+static int broadsheet_setup_plls(struct broadsheetfb_par *par)
+{
+	int retry_count = 0;
+	u16 tmp;
+
+	/* disable arral saemipu mode */
+	broadsheet_write_reg(par, 0x0006, 0x0000);
+
+	broadsheet_write_reg(par, 0x0010, 0x0004);
+	broadsheet_write_reg(par, 0x0012, 0x5949);
+	broadsheet_write_reg(par, 0x0014, 0x0040);
+	broadsheet_write_reg(par, 0x0016, 0x0000);
+
+	do {
+		if (retry_count++ > 100)
+			return -ETIMEDOUT;
+		mdelay(1);
+	} while (!is_broadsheet_pll_locked(par));
+
+	tmp = broadsheet_read_reg(par, 0x0006);
+	tmp &= ~0x1;
+	broadsheet_write_reg(par, 0x0006, tmp);
+
+	return 0;
+}
+
+static int broadsheet_setup_spi(struct broadsheetfb_par *par)
+{
+
+	broadsheet_write_reg(par, 0x0204, ((3 << 3) | 1));
+	broadsheet_write_reg(par, 0x0208, 0x0001);
+
+	return 0;
+}
+
+static int broadsheet_setup_spiflash(struct broadsheetfb_par *par,
+						u16 *orig_sfmcd)
+{
+
+	*orig_sfmcd = broadsheet_read_reg(par, 0x0204);
+	broadsheet_write_reg(par, 0x0208, 0);
+	broadsheet_write_reg(par, 0x0204, 0);
+	broadsheet_write_reg(par, 0x0204, ((3 << 3) | 1));
+
+	return 0;
+}
+
+static int broadsheet_spiflash_wait_for_bit(struct broadsheetfb_par *par,
+						u16 reg, int bitnum, int val,
+						int timeout)
+{
+	u16 tmp;
+
+	do {
+		tmp = broadsheet_read_reg(par, reg);
+		if (((tmp >> bitnum) & 1) == val)
+			return 0;
+		mdelay(1);
+	} while (timeout--);
+
+	return -ETIMEDOUT;
+}
+
+static int broadsheet_spiflash_write_byte(struct broadsheetfb_par *par, u8 data)
+{
+	broadsheet_write_reg(par, 0x0202, (data | 0x100));
+
+	return broadsheet_spiflash_wait_for_bit(par, 0x0206, 3, 0, 100);
+}
+
+static int broadsheet_spiflash_read_byte(struct broadsheetfb_par *par, u8 *data)
+{
+	int err;
+	u16 tmp;
+
+	broadsheet_write_reg(par, 0x0202, 0);
+
+	err = broadsheet_spiflash_wait_for_bit(par, 0x0206, 3, 0, 100);
+	if (err)
+		return err;
+
+	tmp = broadsheet_read_reg(par, 0x200);
+
+	*data = tmp & 0xFF;
+
+	return 0;
+}
+
+static int broadsheet_spiflash_wait_for_status(struct broadsheetfb_par *par,
+								int timeout)
+{
+	u8 tmp;
+	int err;
+
+	do {
+		broadsheet_write_reg(par, 0x0208, 1);
+
+		err = broadsheet_spiflash_write_byte(par, 0x05);
+		if (err)
+			goto failout;
+
+		err = broadsheet_spiflash_read_byte(par, &tmp);
+		if (err)
+			goto failout;
+
+		broadsheet_write_reg(par, 0x0208, 0);
+
+		if (!(tmp & 0x1))
+			return 0;
+
+		mdelay(5);
+	} while (timeout--);
+
+	dev_err(par->info->device, "Timed out waiting for spiflash status\n");
+	return -ETIMEDOUT;
+
+failout:
+	broadsheet_write_reg(par, 0x0208, 0);
+	return err;
+}
+
+static int broadsheet_spiflash_op_on_address(struct broadsheetfb_par *par,
+							u8 op, u32 addr)
+{
+	int i;
+	u8 tmp;
+	int err;
+
+	broadsheet_write_reg(par, 0x0208, 1);
+
+	err = broadsheet_spiflash_write_byte(par, op);
+	if (err)
+		return err;
+
+	for (i = 2; i >= 0; i--) {
+		tmp = ((addr >> (i * 8)) & 0xFF);
+		err = broadsheet_spiflash_write_byte(par, tmp);
+		if (err)
+			return err;
+	}
+
+	return err;
+}
+
+static int broadsheet_verify_spiflash(struct broadsheetfb_par *par,
+						int *flash_type)
+{
+	int err = 0;
+	u8 sig;
+
+	err = broadsheet_spiflash_op_on_address(par, 0xAB, 0x00000000);
+	if (err)
+		goto failout;
+
+	err = broadsheet_spiflash_read_byte(par, &sig);
+	if (err)
+		goto failout;
+
+	if ((sig != 0x10) && (sig != 0x11)) {
+		dev_err(par->info->device, "Unexpected flash type\n");
+		err = -EINVAL;
+		goto failout;
+	}
+
+	*flash_type = sig;
+
+failout:
+	broadsheet_write_reg(par, 0x0208, 0);
+	return err;
+}
+
+static int broadsheet_setup_for_wfm_write(struct broadsheetfb_par *par,
+					u16 *initial_sfmcd, int *flash_type)
+
+{
+	int err;
+
+	err = broadsheet_setup_plls(par);
+	if (err)
+		return err;
+
+	broadsheet_write_reg(par, 0x0106, 0x0203);
+
+	err = broadsheet_setup_spi(par);
+	if (err)
+		return err;
+
+	err = broadsheet_setup_spiflash(par, initial_sfmcd);
+	if (err)
+		return err;
+
+	return broadsheet_verify_spiflash(par, flash_type);
+}
+
+static int broadsheet_spiflash_write_control(struct broadsheetfb_par *par,
+						int mode)
+{
+	int err;
+
+	broadsheet_write_reg(par, 0x0208, 1);
+	if (mode)
+		err = broadsheet_spiflash_write_byte(par, 0x06);
+	else
+		err = broadsheet_spiflash_write_byte(par, 0x04);
+
+	broadsheet_write_reg(par, 0x0208, 0);
+	return err;
+}
+
+static int broadsheet_spiflash_erase_sector(struct broadsheetfb_par *par,
+						int addr)
+{
+	int err;
+
+	broadsheet_spiflash_write_control(par, 1);
+
+	err = broadsheet_spiflash_op_on_address(par, 0xD8, addr);
+
+	broadsheet_write_reg(par, 0x0208, 0);
+
+	if (err)
+		return err;
+
+	err = broadsheet_spiflash_wait_for_status(par, 1000);
+
+	return err;
+}
+
+static int broadsheet_spiflash_read_range(struct broadsheetfb_par *par,
+						int addr, int size, char *data)
+{
+	int err;
+	int i;
+
+	err = broadsheet_spiflash_op_on_address(par, 0x03, addr);
+	if (err)
+		goto failout;
+
+	for (i = 0; i < size; i++) {
+		err = broadsheet_spiflash_read_byte(par, &data[i]);
+		if (err)
+			goto failout;
+	}
+
+failout:
+	broadsheet_write_reg(par, 0x0208, 0);
+	return err;
+}
+
+#define BS_SPIFLASH_PAGE_SIZE 256
+static int broadsheet_spiflash_write_page(struct broadsheetfb_par *par,
+						int addr, const char *data)
+{
+	int err;
+	int i;
+
+	broadsheet_spiflash_write_control(par, 1);
+
+	err = broadsheet_spiflash_op_on_address(par, 0x02, addr);
+	if (err)
+		goto failout;
+
+	for (i = 0; i < BS_SPIFLASH_PAGE_SIZE; i++) {
+		err = broadsheet_spiflash_write_byte(par, data[i]);
+		if (err)
+			goto failout;
+	}
+
+	broadsheet_write_reg(par, 0x0208, 0);
+
+	err = broadsheet_spiflash_wait_for_status(par, 100);
+
+failout:
+	return err;
+}
+
+static int broadsheet_spiflash_write_sector(struct broadsheetfb_par *par,
+				int addr, const char *data, int sector_size)
+{
+	int i;
+	int err;
+
+	for (i = 0; i < sector_size; i += BS_SPIFLASH_PAGE_SIZE) {
+		err = broadsheet_spiflash_write_page(par, addr + i, &data[i]);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+/*
+ * The caller must guarantee that the data to be rewritten is entirely
+ * contained within this sector. That is, data_start_addr + data_len
+ * must be less than sector_start_addr + sector_size.
+ */
+static int broadsheet_spiflash_rewrite_sector(struct broadsheetfb_par *par,
+					int sector_size, int data_start_addr,
+					int data_len, const char *data)
+{
+	int err;
+	char *sector_buffer;
+	int tail_start_addr;
+	int start_sector_addr;
+
+	sector_buffer = kzalloc(sizeof(char)*sector_size, GFP_KERNEL);
+	if (!sector_buffer)
+		return -ENOMEM;
+
+	/* the start address of the sector is the 0th byte of that sector */
+	start_sector_addr = (data_start_addr / sector_size) * sector_size;
+
+	/*
+	 * check if there is head data that we need to readback into our sector
+	 * buffer first
+	 */
+	if (data_start_addr != start_sector_addr) {
+		/*
+		 * we need to read every byte up till the start address of our
+		 * data and we put it into our sector buffer.
+		 */
+		err = broadsheet_spiflash_read_range(par, start_sector_addr,
+						data_start_addr, sector_buffer);
+		if (err)
+			return err;
+	}
+
+	/* now we copy our data into the right place in the sector buffer */
+	memcpy(sector_buffer + data_start_addr, data, data_len);
+
+	/*
+	 * now we check if there is a tail section of the sector that we need to
+	 * readback.
+	 */
+	tail_start_addr = (data_start_addr + data_len) % sector_size;
+
+	if (tail_start_addr) {
+		int tail_len;
+
+		tail_len = sector_size - tail_start_addr;
+
+		/* now we read this tail into our sector buffer */
+		err = broadsheet_spiflash_read_range(par, tail_start_addr,
+			tail_len, sector_buffer + tail_start_addr);
+		if (err)
+			return err;
+	}
+
+	/* if we got here we have the full sector that we want to rewrite. */
+
+	/* first erase the sector */
+	err = broadsheet_spiflash_erase_sector(par, start_sector_addr);
+	if (err)
+		return err;
+
+	/* now write it */
+	err = broadsheet_spiflash_write_sector(par, start_sector_addr,
+					sector_buffer, sector_size);
+	return err;
+}
+
+static int broadsheet_write_spiflash(struct broadsheetfb_par *par, u32 wfm_addr,
+				const u8 *wfm, int bytecount, int flash_type)
+{
+	int sector_size;
+	int err;
+	int cur_addr;
+	int writecount;
+	int maxlen;
+	int offset = 0;
+
+	switch (flash_type) {
+	case 0x10:
+		sector_size = 32*1024;
+		break;
+	case 0x11:
+	default:
+		sector_size = 64*1024;
+		break;
+	}
+
+	while (bytecount) {
+		cur_addr = wfm_addr + offset;
+		maxlen = roundup(cur_addr, sector_size) - cur_addr;
+		writecount = min(bytecount, maxlen);
+
+		err = broadsheet_spiflash_rewrite_sector(par, sector_size,
+				cur_addr, writecount, wfm + offset);
+		if (err)
+			return err;
+
+		offset += writecount;
+		bytecount -= writecount;
+	}
+
+	return 0;
+}
+
+static int broadsheet_store_waveform_to_spiflash(struct broadsheetfb_par *par,
+						const u8 *wfm, size_t wfm_size)
+{
+	int err = 0;
+	u16 initial_sfmcd = 0;
+	int flash_type = 0;
+
+	err = broadsheet_setup_for_wfm_write(par, &initial_sfmcd, &flash_type);
+	if (err)
+		goto failout;
+
+	err = broadsheet_write_spiflash(par, 0x886, wfm, wfm_size, flash_type);
+
+failout:
+	broadsheet_write_reg(par, 0x0204, initial_sfmcd);
+	return err;
+}
+
+static ssize_t broadsheet_loadstore_waveform(struct device *dev,
+						struct device_attribute *attr,
+						const char *buf, size_t len)
+{
+	int err;
+	struct fb_info *info = dev_get_drvdata(dev);
+	struct broadsheetfb_par *par = info->par;
+	const struct firmware *fw_entry;
+
+	if (len < 1)
+		return -EINVAL;
+
+	err = request_firmware(&fw_entry, "broadsheet.wbf", dev);
+	if (err < 0) {
+		dev_err(dev, "Failed to get broadsheet waveform\n");
+		goto err_failed;
+	}
+
+	/* try to enforce reasonable min max on waveform */
+	if ((fw_entry->size < 8*1024) || (fw_entry->size > 64*1024)) {
+		dev_err(dev, "Invalid waveform\n");
+		err = -EINVAL;
+		goto err_failed;
+	}
+
+	mutex_lock(&(par->io_lock));
+	err = broadsheet_store_waveform_to_spiflash(par, fw_entry->data,
+							fw_entry->size);
+
+	mutex_unlock(&(par->io_lock));
+	if (err < 0) {
+		dev_err(dev, "Failed to store broadsheet waveform\n");
+		goto err_failed;
+	}
+
+	dev_info(dev, "Stored broadsheet waveform, size %zd\n", fw_entry->size);
+
+	return len;
+
+err_failed:
+	return err;
+}
+static DEVICE_ATTR(loadstore_waveform, S_IWUSR, NULL,
+			broadsheet_loadstore_waveform);
+
+/* upper level functions that manipulate the display and other stuff */
 static void __devinit broadsheet_init_display(struct broadsheetfb_par *par)
 {
 	u16 args[5];
@@ -366,6 +834,21 @@ static void __devinit broadsheet_init_display(struct broadsheetfb_par *par)
 	par->board->wait_for_rdy(par);
 }
 
+static void __devinit broadsheet_identify(struct broadsheetfb_par *par)
+{
+	u16 rev, prc;
+	struct device *dev = par->info->device;
+
+	rev = broadsheet_read_reg(par, BS_REG_REV);
+	prc = broadsheet_read_reg(par, BS_REG_PRC);
+	dev_info(dev, "Broadsheet Rev 0x%x, Product Code 0x%x\n", rev, prc);
+
+	if (prc != 0x0047)
+		dev_warn(dev, "Unrecognized Broadsheet Product Code\n");
+	if (rev != 0x0100)
+		dev_warn(dev, "Unrecognized Broadsheet Revision\n");
+}
+
 static void __devinit broadsheet_init(struct broadsheetfb_par *par)
 {
 	broadsheet_send_command(par, BS_CMD_INIT_SYS_RUN);
@@ -380,6 +863,7 @@ static void broadsheetfb_dpy_update_pages(struct broadsheetfb_par *par,
 	u16 args[5];
 	unsigned char *buf = (unsigned char *)par->info->screen_base;
 
+	mutex_lock(&(par->io_lock));
 	/* y1 must be a multiple of 4 so drop the lower bits */
 	y1 &= 0xFFFC;
 	/* y2 must be a multiple of 4 , but - 1 so up the lower bits */
@@ -409,6 +893,7 @@ static void broadsheetfb_dpy_update_pages(struct broadsheetfb_par *par,
 	broadsheet_send_command(par, BS_CMD_WAIT_DSPE_FREND);
 
 	par->board->wait_for_rdy(par);
+	mutex_unlock(&(par->io_lock));
 
 }
 
@@ -416,6 +901,7 @@ static void broadsheetfb_dpy_update(struct broadsheetfb_par *par)
 {
 	u16 args[5];
 
+	mutex_lock(&(par->io_lock));
 	args[0] = 0x3 << 4;
 	broadsheet_send_cmdargs(par, BS_CMD_LD_IMG, 1, args);
 
@@ -435,7 +921,7 @@ static void broadsheetfb_dpy_update(struct broadsheetfb_par *par)
 	broadsheet_send_command(par, BS_CMD_WAIT_DSPE_FREND);
 
 	par->board->wait_for_rdy(par);
-
+	mutex_unlock(&(par->io_lock));
 }
 
 /* this is called back from the deferred io workqueue */
@@ -641,6 +1127,8 @@ static int __devinit broadsheetfb_probe(struct platform_device *dev)
 	par->read_reg = broadsheet_read_reg;
 	init_waitqueue_head(&par->waitq);
 
+	mutex_init(&par->io_lock);
+
 	info->flags = FBINFO_FLAG_DEFAULT | FBINFO_VIRTFB;
 
 	info->fbdefio = &broadsheetfb_defio;
@@ -667,13 +1155,20 @@ static int __devinit broadsheetfb_probe(struct platform_device *dev)
 	if (retval < 0)
 		goto err_free_irq;
 
+	broadsheet_identify(par);
+
 	broadsheet_init(par);
 
 	retval = register_framebuffer(info);
 	if (retval < 0)
 		goto err_free_irq;
+
 	platform_set_drvdata(dev, info);
 
+	retval = device_create_file(&dev->dev, &dev_attr_loadstore_waveform);
+	if (retval < 0)
+		goto err_unreg_fb;
+
 	printk(KERN_INFO
 	       "fb%d: Broadsheet frame buffer, using %dK of video memory\n",
 	       info->node, videomemorysize >> 10);
@@ -681,6 +1176,8 @@ static int __devinit broadsheetfb_probe(struct platform_device *dev)
 
 	return 0;
 
+err_unreg_fb:
+	unregister_framebuffer(info);
 err_free_irq:
 	board->cleanup(par);
 err_cmap:
@@ -701,6 +1198,8 @@ static int __devexit broadsheetfb_remove(struct platform_device *dev)
 
 	if (info) {
 		struct broadsheetfb_par *par = info->par;
+
+		device_remove_file(info->dev, &dev_attr_loadstore_waveform);
 		unregister_framebuffer(info);
 		fb_deferred_io_cleanup(info);
 		par->board->cleanup(par);
diff --git a/include/video/broadsheetfb.h b/include/video/broadsheetfb.h
index d65b6689e92c..548d28f4ec67 100644
--- a/include/video/broadsheetfb.h
+++ b/include/video/broadsheetfb.h
@@ -29,6 +29,10 @@
 #define BS_CMD_UPD_FULL		0x33
 #define BS_CMD_UPD_GDRV_CLR	0x37
 
+/* Broadsheet register interface defines */
+#define BS_REG_REV		0x00
+#define BS_REG_PRC		0x02
+
 /* Broadsheet pin interface specific defines */
 #define BS_CS	0x01
 #define BS_DC 	0x02
@@ -46,6 +50,7 @@ struct broadsheetfb_par {
 	u16 (*read_reg)(struct broadsheetfb_par *, u16 reg);
 	wait_queue_head_t waitq;
 	int panel_index;
+	struct mutex io_lock;
 };
 
 /* board specific routines */
-- 
cgit v1.2.3


From 2468c7234b366eeb799ee0648cb58f9cba394a54 Mon Sep 17 00:00:00 2001
From: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Date: Wed, 10 Mar 2010 15:22:03 -0800
Subject: cgroup: introduce cancel_attach()

Add cancel_attach() operation to struct cgroup_subsys.  cancel_attach()
can be used when can_attach() operation prepares something for the subsys,
but we should rollback what can_attach() operation has prepared if attach
task fails after we've succeeded in can_attach().

Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Reviewed-by: Paul Menage <menage@google.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/cgroups.txt | 13 ++++++++++++-
 include/linux/cgroup.h            |  2 ++
 kernel/cgroup.c                   | 40 ++++++++++++++++++++++++++++++++-------
 3 files changed, 47 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 0b33bfe7dde9..d45082653e3d 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -536,10 +536,21 @@ returns an error, this will abort the attach operation.  If a NULL
 task is passed, then a successful result indicates that *any*
 unspecified task can be moved into the cgroup. Note that this isn't
 called on a fork. If this method returns 0 (success) then this should
-remain valid while the caller holds cgroup_mutex. If threadgroup is
+remain valid while the caller holds cgroup_mutex and it is ensured that either
+attach() or cancel_attach() will be called in future. If threadgroup is
 true, then a successful result indicates that all threads in the given
 thread's threadgroup can be moved together.
 
+void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+	       struct task_struct *task, bool threadgroup)
+(cgroup_mutex held by caller)
+
+Called when a task attach operation has failed after can_attach() has succeeded.
+A subsystem whose can_attach() has some side-effects should provide this
+function, so that the subsytem can implement a rollback. If not, not necessary.
+This will be called only about subsystems whose can_attach() operation have
+succeeded.
+
 void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 	    struct cgroup *old_cgrp, struct task_struct *task,
 	    bool threadgroup)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c9bbcb2a75ae..d08cfe7e12e5 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -428,6 +428,8 @@ struct cgroup_subsys {
 	void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 	int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
 			  struct task_struct *tsk, bool threadgroup);
+	void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
+			  struct task_struct *tsk, bool threadgroup);
 	void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
 			struct cgroup *old_cgrp, struct task_struct *tsk,
 			bool threadgroup);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4fd90e129772..be45d2f6008a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1554,7 +1554,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
 	int retval = 0;
-	struct cgroup_subsys *ss;
+	struct cgroup_subsys *ss, *failed_ss = NULL;
 	struct cgroup *oldcgrp;
 	struct css_set *cg;
 	struct css_set *newcg;
@@ -1568,8 +1568,16 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	for_each_subsys(root, ss) {
 		if (ss->can_attach) {
 			retval = ss->can_attach(ss, cgrp, tsk, false);
-			if (retval)
-				return retval;
+			if (retval) {
+				/*
+				 * Remember on which subsystem the can_attach()
+				 * failed, so that we only call cancel_attach()
+				 * against the subsystems whose can_attach()
+				 * succeeded. (See below)
+				 */
+				failed_ss = ss;
+				goto out;
+			}
 		}
 	}
 
@@ -1583,14 +1591,17 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	 */
 	newcg = find_css_set(cg, cgrp);
 	put_css_set(cg);
-	if (!newcg)
-		return -ENOMEM;
+	if (!newcg) {
+		retval = -ENOMEM;
+		goto out;
+	}
 
 	task_lock(tsk);
 	if (tsk->flags & PF_EXITING) {
 		task_unlock(tsk);
 		put_css_set(newcg);
-		return -ESRCH;
+		retval = -ESRCH;
+		goto out;
 	}
 	rcu_assign_pointer(tsk->cgroups, newcg);
 	task_unlock(tsk);
@@ -1616,7 +1627,22 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	 * is no longer empty.
 	 */
 	cgroup_wakeup_rmdir_waiter(cgrp);
-	return 0;
+out:
+	if (retval) {
+		for_each_subsys(root, ss) {
+			if (ss == failed_ss)
+				/*
+				 * This subsystem was the one that failed the
+				 * can_attach() check earlier, so we don't need
+				 * to call cancel_attach() against it or any
+				 * remaining subsystems.
+				 */
+				break;
+			if (ss->cancel_attach)
+				ss->cancel_attach(ss, cgrp, tsk, false);
+		}
+	}
+	return retval;
 }
 
 /*
-- 
cgit v1.2.3


From d7b9fff711d5e8db8c844161c684017e556c38a0 Mon Sep 17 00:00:00 2001
From: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Date: Wed, 10 Mar 2010 15:22:05 -0800
Subject: cgroup: introduce coalesce css_get() and css_put()

Current css_get() and css_put() increment/decrement css->refcnt one by
one.

This patch add a new function __css_get(), which takes "count" as a arg
and increment the css->refcnt by "count".  And this patch also add a new
arg("count") to __css_put() and change the function to decrement the
css->refcnt by "count".

These coalesce version of __css_get()/__css_put() will be used to improve
performance of memcg's moving charge feature later, where instead of
calling css_get()/css_put() repeatedly, these new functions will be used.

No change is needed for current users of css_get()/css_put().

Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Acked-by: Paul Menage <menage@google.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 12 +++++++++---
 kernel/cgroup.c        |  5 +++--
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index d08cfe7e12e5..14160b5b693f 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -76,6 +76,12 @@ enum {
 	CSS_REMOVED, /* This CSS is dead */
 };
 
+/* Caller must verify that the css is not for root cgroup */
+static inline void __css_get(struct cgroup_subsys_state *css, int count)
+{
+	atomic_add(count, &css->refcnt);
+}
+
 /*
  * Call css_get() to hold a reference on the css; it can be used
  * for a reference obtained via:
@@ -87,7 +93,7 @@ static inline void css_get(struct cgroup_subsys_state *css)
 {
 	/* We don't need to reference count the root state */
 	if (!test_bit(CSS_ROOT, &css->flags))
-		atomic_inc(&css->refcnt);
+		__css_get(css, 1);
 }
 
 static inline bool css_is_removed(struct cgroup_subsys_state *css)
@@ -118,11 +124,11 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
  * css_get() or css_tryget()
  */
 
-extern void __css_put(struct cgroup_subsys_state *css);
+extern void __css_put(struct cgroup_subsys_state *css, int count);
 static inline void css_put(struct cgroup_subsys_state *css)
 {
 	if (!test_bit(CSS_ROOT, &css->flags))
-		__css_put(css);
+		__css_put(css, 1);
 }
 
 /* bits in struct cgroup flags field */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index be45d2f6008a..cace83ddbcdc 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3746,12 +3746,13 @@ static void check_for_release(struct cgroup *cgrp)
 	}
 }
 
-void __css_put(struct cgroup_subsys_state *css)
+/* Caller must verify that the css is not for root cgroup */
+void __css_put(struct cgroup_subsys_state *css, int count)
 {
 	struct cgroup *cgrp = css->cgroup;
 	int val;
 	rcu_read_lock();
-	val = atomic_dec_return(&css->refcnt);
+	val = atomic_sub_return(count, &css->refcnt);
 	if (val == 1) {
 		if (notify_on_release(cgrp)) {
 			set_bit(CGRP_RELEASABLE, &cgrp->flags);
-- 
cgit v1.2.3


From aae8aab40367036931608fdaf9e2dc568b516f19 Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@andrew.cmu.edu>
Date: Wed, 10 Mar 2010 15:22:07 -0800
Subject: cgroups: revamp subsys array

This patch series provides the ability for cgroup subsystems to be
compiled as modules both within and outside the kernel tree.  This is
mainly useful for classifiers and subsystems that hook into components
that are already modules.  cls_cgroup and blkio-cgroup serve as the
example use cases for this feature.

It provides an interface cgroup_load_subsys() and cgroup_unload_subsys()
which modular subsystems can use to register and depart during runtime.
The net_cls classifier subsystem serves as the example for a subsystem
which can be converted into a module using these changes.

Patch #1 sets up the subsys[] array so its contents can be dynamic as
modules appear and (eventually) disappear.  Iterations over the array are
modified to handle when subsystems are absent, and the dynamic section of
the array is protected by cgroup_mutex.

Patch #2 implements an interface for modules to load subsystems, called
cgroup_load_subsys, similar to cgroup_init_subsys, and adds a module
pointer in struct cgroup_subsys.

Patch #3 adds a mechanism for unloading modular subsystems, which includes
a more advanced rework of the rudimentary reference counting introduced in
patch 2.

Patch #4 modifies the net_cls subsystem, which already had some module
declarations, to be configurable as a module, which also serves as a
simple proof-of-concept.

Part of implementing patches 2 and 4 involved updating css pointers in
each css_set when the module appears or leaves.  In doing this, it was
discovered that css_sets always remain linked to the dummy cgroup,
regardless of whether or not any subsystems are actually bound to it
(i.e., not mounted on an actual hierarchy).  The subsystem loading and
unloading code therefore should keep in mind the special cases where the
added subsystem is the only one in the dummy cgroup (and therefore all
css_sets need to be linked back into it) and where the removed subsys was
the only one in the dummy cgroup (and therefore all css_sets should be
unlinked from it) - however, as all css_sets always stay attached to the
dummy cgroup anyway, these cases are ignored.  Any fix that addresses this
issue should also make sure these cases are addressed in the subsystem
loading and unloading code.

This patch:

Make subsys[] able to be dynamically populated to support modular
subsystems

This patch reworks the way the subsys[] array is used so that subsystems
can register themselves after boot time, and enables the internals of
cgroups to be able to handle when subsystems are not present or may
appear/disappear.

Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 10 ++++--
 kernel/cgroup.c        | 96 +++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 88 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 14160b5b693f..28319a9fe569 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -40,13 +40,19 @@ extern int cgroupstats_build(struct cgroupstats *stats,
 
 extern const struct file_operations proc_cgroup_operations;
 
-/* Define the enumeration of all cgroup subsystems */
+/* Define the enumeration of all builtin cgroup subsystems */
 #define SUBSYS(_x) _x ## _subsys_id,
 enum cgroup_subsys_id {
 #include <linux/cgroup_subsys.h>
-	CGROUP_SUBSYS_COUNT
+	CGROUP_BUILTIN_SUBSYS_COUNT
 };
 #undef SUBSYS
+/*
+ * This define indicates the maximum number of subsystems that can be loaded
+ * at once. We limit to this many since cgroupfs_root has subsys_bits to keep
+ * track of all of them.
+ */
+#define CGROUP_SUBSYS_COUNT (BITS_PER_BYTE*sizeof(unsigned long))
 
 /* Per-subsystem/per-cgroup state maintained by the system. */
 struct cgroup_subsys_state {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index cace83ddbcdc..c92fb9549358 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -57,10 +57,14 @@
 
 static DEFINE_MUTEX(cgroup_mutex);
 
-/* Generate an array of cgroup subsystem pointers */
+/*
+ * Generate an array of cgroup subsystem pointers. At boot time, this is
+ * populated up to CGROUP_BUILTIN_SUBSYS_COUNT, and modular subsystems are
+ * registered after that. The mutable section of this array is protected by
+ * cgroup_mutex.
+ */
 #define SUBSYS(_x) &_x ## _subsys,
-
-static struct cgroup_subsys *subsys[] = {
+static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
 #include <linux/cgroup_subsys.h>
 };
 
@@ -448,8 +452,11 @@ static struct css_set *find_existing_css_set(
 	struct hlist_node *node;
 	struct css_set *cg;
 
-	/* Built the set of subsystem state objects that we want to
-	 * see in the new css_set */
+	/*
+	 * Build the set of subsystem state objects that we want to see in the
+	 * new css_set. while subsystems can change globally, the entries here
+	 * won't change, so no need for locking.
+	 */
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		if (root->subsys_bits & (1UL << i)) {
 			/* Subsystem is in this hierarchy. So we want
@@ -884,7 +891,9 @@ void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
 	css_put(css);
 }
 
-
+/*
+ * Call with cgroup_mutex held.
+ */
 static int rebind_subsystems(struct cgroupfs_root *root,
 			      unsigned long final_bits)
 {
@@ -892,6 +901,8 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 	struct cgroup *cgrp = &root->top_cgroup;
 	int i;
 
+	BUG_ON(!mutex_is_locked(&cgroup_mutex));
+
 	removed_bits = root->actual_subsys_bits & ~final_bits;
 	added_bits = final_bits & ~root->actual_subsys_bits;
 	/* Check that any added subsystems are currently free */
@@ -900,6 +911,12 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 		struct cgroup_subsys *ss = subsys[i];
 		if (!(bit & added_bits))
 			continue;
+		/*
+		 * Nobody should tell us to do a subsys that doesn't exist:
+		 * parse_cgroupfs_options should catch that case and refcounts
+		 * ensure that subsystems won't disappear once selected.
+		 */
+		BUG_ON(ss == NULL);
 		if (ss->root != &rootnode) {
 			/* Subsystem isn't free */
 			return -EBUSY;
@@ -919,6 +936,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 		unsigned long bit = 1UL << i;
 		if (bit & added_bits) {
 			/* We're binding this subsystem to this hierarchy */
+			BUG_ON(ss == NULL);
 			BUG_ON(cgrp->subsys[i]);
 			BUG_ON(!dummytop->subsys[i]);
 			BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
@@ -932,6 +950,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 			mutex_unlock(&ss->hierarchy_mutex);
 		} else if (bit & removed_bits) {
 			/* We're removing this subsystem */
+			BUG_ON(ss == NULL);
 			BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
 			BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
 			mutex_lock(&ss->hierarchy_mutex);
@@ -944,6 +963,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 			mutex_unlock(&ss->hierarchy_mutex);
 		} else if (bit & final_bits) {
 			/* Subsystem state should already exist */
+			BUG_ON(ss == NULL);
 			BUG_ON(!cgrp->subsys[i]);
 		} else {
 			/* Subsystem state shouldn't exist */
@@ -986,14 +1006,18 @@ struct cgroup_sb_opts {
 
 };
 
-/* Convert a hierarchy specifier into a bitmask of subsystems and
- * flags. */
+/*
+ * Convert a hierarchy specifier into a bitmask of subsystems and flags. Call
+ * with cgroup_mutex held to protect the subsys[] array.
+ */
 static int parse_cgroupfs_options(char *data,
 				     struct cgroup_sb_opts *opts)
 {
 	char *token, *o = data ?: "all";
 	unsigned long mask = (unsigned long)-1;
 
+	BUG_ON(!mutex_is_locked(&cgroup_mutex));
+
 #ifdef CONFIG_CPUSETS
 	mask = ~(1UL << cpuset_subsys_id);
 #endif
@@ -1009,6 +1033,8 @@ static int parse_cgroupfs_options(char *data,
 			opts->subsys_bits = 0;
 			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 				struct cgroup_subsys *ss = subsys[i];
+				if (ss == NULL)
+					continue;
 				if (!ss->disabled)
 					opts->subsys_bits |= 1ul << i;
 			}
@@ -1053,6 +1079,8 @@ static int parse_cgroupfs_options(char *data,
 			int i;
 			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 				ss = subsys[i];
+				if (ss == NULL)
+					continue;
 				if (!strcmp(token, ss->name)) {
 					if (!ss->disabled)
 						set_bit(i, &opts->subsys_bits);
@@ -1306,7 +1334,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 	struct cgroupfs_root *new_root;
 
 	/* First find the desired set of subsystems */
+	mutex_lock(&cgroup_mutex);
 	ret = parse_cgroupfs_options(data, &opts);
+	mutex_unlock(&cgroup_mutex);
 	if (ret)
 		goto out_err;
 
@@ -2918,8 +2948,14 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
 	/* We need to take each hierarchy_mutex in a consistent order */
 	int i;
 
+	/*
+	 * No worry about a race with rebind_subsystems that might mess up the
+	 * locking order, since both parties are under cgroup_mutex.
+	 */
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
+		if (ss == NULL)
+			continue;
 		if (ss->root == root)
 			mutex_lock(&ss->hierarchy_mutex);
 	}
@@ -2931,6 +2967,8 @@ static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
 
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
+		if (ss == NULL)
+			continue;
 		if (ss->root == root)
 			mutex_unlock(&ss->hierarchy_mutex);
 	}
@@ -3054,11 +3092,16 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
 	 * synchronization other than RCU, and the subsystem linked
 	 * list isn't RCU-safe */
 	int i;
+	/*
+	 * We won't need to lock the subsys array, because the subsystems
+	 * we're concerned about aren't going anywhere since our cgroup root
+	 * has a reference on them.
+	 */
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
 		struct cgroup_subsys_state *css;
-		/* Skip subsystems not in this hierarchy */
-		if (ss->root != cgrp->root)
+		/* Skip subsystems not present or not in this hierarchy */
+		if (ss == NULL || ss->root != cgrp->root)
 			continue;
 		css = cgrp->subsys[ss->subsys_id];
 		/* When called from check_for_release() it's possible
@@ -3279,7 +3322,8 @@ int __init cgroup_init_early(void)
 	for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
 		INIT_HLIST_HEAD(&css_set_table[i]);
 
-	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+	/* at bootup time, we don't worry about modular subsystems */
+	for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
 
 		BUG_ON(!ss->name);
@@ -3314,7 +3358,8 @@ int __init cgroup_init(void)
 	if (err)
 		return err;
 
-	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+	/* at bootup time, we don't worry about modular subsystems */
+	for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
 		if (!ss->early_init)
 			cgroup_init_subsys(ss);
@@ -3423,9 +3468,16 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
 	int i;
 
 	seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
+	/*
+	 * ideally we don't want subsystems moving around while we do this.
+	 * cgroup_mutex is also necessary to guarantee an atomic snapshot of
+	 * subsys/hierarchy state.
+	 */
 	mutex_lock(&cgroup_mutex);
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
+		if (ss == NULL)
+			continue;
 		seq_printf(m, "%s\t%d\t%d\t%d\n",
 			   ss->name, ss->root->hierarchy_id,
 			   ss->root->number_of_cgroups, !ss->disabled);
@@ -3483,7 +3535,12 @@ void cgroup_fork_callbacks(struct task_struct *child)
 {
 	if (need_forkexit_callback) {
 		int i;
-		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		/*
+		 * forkexit callbacks are only supported for builtin
+		 * subsystems, and the builtin section of the subsys array is
+		 * immutable, so we don't need to lock the subsys array here.
+		 */
+		for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
 			struct cgroup_subsys *ss = subsys[i];
 			if (ss->fork)
 				ss->fork(ss, child);
@@ -3552,7 +3609,11 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
 	struct css_set *cg;
 
 	if (run_callbacks && need_forkexit_callback) {
-		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		/*
+		 * modular subsystems can't use callbacks, so no need to lock
+		 * the subsys array
+		 */
+		for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
 			struct cgroup_subsys *ss = subsys[i];
 			if (ss->exit)
 				ss->exit(ss, tsk);
@@ -3844,8 +3905,11 @@ static int __init cgroup_disable(char *str)
 	while ((token = strsep(&str, ",")) != NULL) {
 		if (!*token)
 			continue;
-
-		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		/*
+		 * cgroup_disable, being at boot time, can't know about module
+		 * subsystems, so we don't worry about them.
+		 */
+		for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
 			struct cgroup_subsys *ss = subsys[i];
 
 			if (!strcmp(token, ss->name)) {
-- 
cgit v1.2.3


From e6a1105ba08b265023dd71a4174fb4a29ebc7083 Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@andrew.cmu.edu>
Date: Wed, 10 Mar 2010 15:22:09 -0800
Subject: cgroups: subsystem module loading interface

Add interface between cgroups subsystem management and module loading

This patch implements rudimentary module-loading support for cgroups -
namely, a cgroup_load_subsys (similar to cgroup_init_subsys) for use as a
module initcall, and a struct module pointer in struct cgroup_subsys.

Several functions that might be wanted by modules have had EXPORT_SYMBOL
added to them, but it's unclear exactly which functions want it and which
won't.

Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/cgroups.txt |   4 +
 include/linux/cgroup.h            |   4 +
 kernel/cgroup.c                   | 150 ++++++++++++++++++++++++++++++++++++--
 3 files changed, 153 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index d45082653e3d..ae8a037a761e 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -488,6 +488,10 @@ Each subsystem should:
 - add an entry in linux/cgroup_subsys.h
 - define a cgroup_subsys object called <name>_subsys
 
+If a subsystem can be compiled as a module, it should also have in its
+module initcall a call to cgroup_load_subsys(&its_subsys_struct). It
+should also set its_subsys.module = THIS_MODULE in its .c file.
+
 Each subsystem may export the following methods. The only mandatory
 methods are create/destroy. Any others that are null are presumed to
 be successful no-ops.
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 28319a9fe569..402ce477c47e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -37,6 +37,7 @@ extern void cgroup_post_fork(struct task_struct *p);
 extern void cgroup_exit(struct task_struct *p, int run_callbacks);
 extern int cgroupstats_build(struct cgroupstats *stats,
 				struct dentry *dentry);
+extern int cgroup_load_subsys(struct cgroup_subsys *ss);
 
 extern const struct file_operations proc_cgroup_operations;
 
@@ -486,6 +487,9 @@ struct cgroup_subsys {
 	/* used when use_id == true */
 	struct idr idr;
 	spinlock_t id_lock;
+
+	/* should be defined only by modular subsystems */
+	struct module *module;
 };
 
 #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c92fb9549358..2cae38e64c59 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -44,6 +44,7 @@
 #include <linux/string.h>
 #include <linux/sort.h>
 #include <linux/kmod.h>
+#include <linux/module.h>
 #include <linux/delayacct.h>
 #include <linux/cgroupstats.h>
 #include <linux/hash.h>
@@ -254,7 +255,8 @@ struct cg_cgroup_link {
 static struct css_set init_css_set;
 static struct cg_cgroup_link init_css_set_link;
 
-static int cgroup_subsys_init_idr(struct cgroup_subsys *ss);
+static int cgroup_init_idr(struct cgroup_subsys *ss,
+			   struct cgroup_subsys_state *css);
 
 /* css_set_lock protects the list of css_set objects, and the
  * chain of tasks off each css_set.  Nests outside task->alloc_lock
@@ -2125,6 +2127,7 @@ int cgroup_add_file(struct cgroup *cgrp,
 		error = PTR_ERR(dentry);
 	return error;
 }
+EXPORT_SYMBOL_GPL(cgroup_add_file);
 
 int cgroup_add_files(struct cgroup *cgrp,
 			struct cgroup_subsys *subsys,
@@ -2139,6 +2142,7 @@ int cgroup_add_files(struct cgroup *cgrp,
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(cgroup_add_files);
 
 /**
  * cgroup_task_count - count the number of tasks in a cgroup.
@@ -3292,7 +3296,144 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 	mutex_init(&ss->hierarchy_mutex);
 	lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
 	ss->active = 1;
+
+	/* this function shouldn't be used with modular subsystems, since they
+	 * need to register a subsys_id, among other things */
+	BUG_ON(ss->module);
+}
+
+/**
+ * cgroup_load_subsys: load and register a modular subsystem at runtime
+ * @ss: the subsystem to load
+ *
+ * This function should be called in a modular subsystem's initcall. If the
+ * subsytem is built as a module, it will be assigned a new subsys_id and set
+ * up for use. If the subsystem is built-in anyway, work is delegated to the
+ * simpler cgroup_init_subsys.
+ */
+int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
+{
+	int i;
+	struct cgroup_subsys_state *css;
+
+	/* check name and function validity */
+	if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
+	    ss->create == NULL || ss->destroy == NULL)
+		return -EINVAL;
+
+	/*
+	 * we don't support callbacks in modular subsystems. this check is
+	 * before the ss->module check for consistency; a subsystem that could
+	 * be a module should still have no callbacks even if the user isn't
+	 * compiling it as one.
+	 */
+	if (ss->fork || ss->exit)
+		return -EINVAL;
+
+	/*
+	 * an optionally modular subsystem is built-in: we want to do nothing,
+	 * since cgroup_init_subsys will have already taken care of it.
+	 */
+	if (ss->module == NULL) {
+		/* a few sanity checks */
+		BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT);
+		BUG_ON(subsys[ss->subsys_id] != ss);
+		return 0;
+	}
+
+	/*
+	 * need to register a subsys id before anything else - for example,
+	 * init_cgroup_css needs it.
+	 */
+	mutex_lock(&cgroup_mutex);
+	/* find the first empty slot in the array */
+	for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
+		if (subsys[i] == NULL)
+			break;
+	}
+	if (i == CGROUP_SUBSYS_COUNT) {
+		/* maximum number of subsystems already registered! */
+		mutex_unlock(&cgroup_mutex);
+		return -EBUSY;
+	}
+	/* assign ourselves the subsys_id */
+	ss->subsys_id = i;
+	subsys[i] = ss;
+
+	/*
+	 * no ss->create seems to need anything important in the ss struct, so
+	 * this can happen first (i.e. before the rootnode attachment).
+	 */
+	css = ss->create(ss, dummytop);
+	if (IS_ERR(css)) {
+		/* failure case - need to deassign the subsys[] slot. */
+		subsys[i] = NULL;
+		mutex_unlock(&cgroup_mutex);
+		return PTR_ERR(css);
+	}
+
+	list_add(&ss->sibling, &rootnode.subsys_list);
+	ss->root = &rootnode;
+
+	/* our new subsystem will be attached to the dummy hierarchy. */
+	init_cgroup_css(css, ss, dummytop);
+	/* init_idr must be after init_cgroup_css because it sets css->id. */
+	if (ss->use_id) {
+		int ret = cgroup_init_idr(ss, css);
+		if (ret) {
+			dummytop->subsys[ss->subsys_id] = NULL;
+			ss->destroy(ss, dummytop);
+			subsys[i] = NULL;
+			mutex_unlock(&cgroup_mutex);
+			return ret;
+		}
+	}
+
+	/*
+	 * Now we need to entangle the css into the existing css_sets. unlike
+	 * in cgroup_init_subsys, there are now multiple css_sets, so each one
+	 * will need a new pointer to it; done by iterating the css_set_table.
+	 * furthermore, modifying the existing css_sets will corrupt the hash
+	 * table state, so each changed css_set will need its hash recomputed.
+	 * this is all done under the css_set_lock.
+	 */
+	write_lock(&css_set_lock);
+	for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
+		struct css_set *cg;
+		struct hlist_node *node, *tmp;
+		struct hlist_head *bucket = &css_set_table[i], *new_bucket;
+
+		hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) {
+			/* skip entries that we already rehashed */
+			if (cg->subsys[ss->subsys_id])
+				continue;
+			/* remove existing entry */
+			hlist_del(&cg->hlist);
+			/* set new value */
+			cg->subsys[ss->subsys_id] = css;
+			/* recompute hash and restore entry */
+			new_bucket = css_set_hash(cg->subsys);
+			hlist_add_head(&cg->hlist, new_bucket);
+		}
+	}
+	write_unlock(&css_set_lock);
+
+	mutex_init(&ss->hierarchy_mutex);
+	lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
+	ss->active = 1;
+
+	/*
+	 * pin the subsystem's module so it doesn't go away. this shouldn't
+	 * fail, since the module's initcall calls us.
+	 * TODO: with module unloading, move this elsewhere
+	 */
+	BUG_ON(!try_module_get(ss->module));
+
+	/* success! */
+	mutex_unlock(&cgroup_mutex);
+	return 0;
 }
+EXPORT_SYMBOL_GPL(cgroup_load_subsys);
 
 /**
  * cgroup_init_early - cgroup initialization at system boot
@@ -3364,7 +3505,7 @@ int __init cgroup_init(void)
 		if (!ss->early_init)
 			cgroup_init_subsys(ss);
 		if (ss->use_id)
-			cgroup_subsys_init_idr(ss);
+			cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
 	}
 
 	/* Add init_css_set to the hash table */
@@ -4033,15 +4174,14 @@ err_out:
 
 }
 
-static int __init cgroup_subsys_init_idr(struct cgroup_subsys *ss)
+static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
+					    struct cgroup_subsys_state *rootcss)
 {
 	struct css_id *newid;
-	struct cgroup_subsys_state *rootcss;
 
 	spin_lock_init(&ss->id_lock);
 	idr_init(&ss->idr);
 
-	rootcss = init_css_set.subsys[ss->subsys_id];
 	newid = get_new_cssid(ss, 0);
 	if (IS_ERR(newid))
 		return PTR_ERR(newid);
-- 
cgit v1.2.3


From cf5d5941fda647fe3d2f2d00cf9e0245236a5f08 Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@andrew.cmu.edu>
Date: Wed, 10 Mar 2010 15:22:09 -0800
Subject: cgroups: subsystem module unloading

Provides support for unloading modular subsystems.

This patch adds a new function cgroup_unload_subsys which is to be used
for removing a loaded subsystem during module deletion.  Reference
counting of the subsystems' modules is moved from once (at load time) to
once per attached hierarchy (in parse_cgroupfs_options and
rebind_subsystems) (i.e., 0 or 1).

Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/cgroups.txt |   5 +-
 include/linux/cgroup.h            |   4 +-
 kernel/cgroup.c                   | 167 ++++++++++++++++++++++++++++++++------
 3 files changed, 148 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index ae8a037a761e..764007b63921 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -489,8 +489,9 @@ Each subsystem should:
 - define a cgroup_subsys object called <name>_subsys
 
 If a subsystem can be compiled as a module, it should also have in its
-module initcall a call to cgroup_load_subsys(&its_subsys_struct). It
-should also set its_subsys.module = THIS_MODULE in its .c file.
+module initcall a call to cgroup_load_subsys(), and in its exitcall a
+call to cgroup_unload_subsys(). It should also set its_subsys.module =
+THIS_MODULE in its .c file.
 
 Each subsystem may export the following methods. The only mandatory
 methods are create/destroy. Any others that are null are presumed to
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 402ce477c47e..2a59d3101e5d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -38,6 +38,7 @@ extern void cgroup_exit(struct task_struct *p, int run_callbacks);
 extern int cgroupstats_build(struct cgroupstats *stats,
 				struct dentry *dentry);
 extern int cgroup_load_subsys(struct cgroup_subsys *ss);
+extern void cgroup_unload_subsys(struct cgroup_subsys *ss);
 
 extern const struct file_operations proc_cgroup_operations;
 
@@ -271,7 +272,8 @@ struct css_set {
 	/*
 	 * Set of subsystem states, one for each subsystem. This array
 	 * is immutable after creation apart from the init_css_set
-	 * during subsystem registration (at boot time).
+	 * during subsystem registration (at boot time) and modular subsystem
+	 * loading/unloading.
 	 */
 	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2cae38e64c59..aa889c96cc74 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -894,7 +894,9 @@ void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
 }
 
 /*
- * Call with cgroup_mutex held.
+ * Call with cgroup_mutex held. Drops reference counts on modules, including
+ * any duplicate ones that parse_cgroupfs_options took. If this function
+ * returns an error, no reference counts are touched.
  */
 static int rebind_subsystems(struct cgroupfs_root *root,
 			      unsigned long final_bits)
@@ -950,6 +952,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 			if (ss->bind)
 				ss->bind(ss, cgrp);
 			mutex_unlock(&ss->hierarchy_mutex);
+			/* refcount was already taken, and we're keeping it */
 		} else if (bit & removed_bits) {
 			/* We're removing this subsystem */
 			BUG_ON(ss == NULL);
@@ -963,10 +966,20 @@ static int rebind_subsystems(struct cgroupfs_root *root,
 			subsys[i]->root = &rootnode;
 			list_move(&ss->sibling, &rootnode.subsys_list);
 			mutex_unlock(&ss->hierarchy_mutex);
+			/* subsystem is now free - drop reference on module */
+			module_put(ss->module);
 		} else if (bit & final_bits) {
 			/* Subsystem state should already exist */
 			BUG_ON(ss == NULL);
 			BUG_ON(!cgrp->subsys[i]);
+			/*
+			 * a refcount was taken, but we already had one, so
+			 * drop the extra reference.
+			 */
+			module_put(ss->module);
+#ifdef CONFIG_MODULE_UNLOAD
+			BUG_ON(ss->module && !module_refcount(ss->module));
+#endif
 		} else {
 			/* Subsystem state shouldn't exist */
 			BUG_ON(cgrp->subsys[i]);
@@ -1010,13 +1023,16 @@ struct cgroup_sb_opts {
 
 /*
  * Convert a hierarchy specifier into a bitmask of subsystems and flags. Call
- * with cgroup_mutex held to protect the subsys[] array.
+ * with cgroup_mutex held to protect the subsys[] array. This function takes
+ * refcounts on subsystems to be used, unless it returns error, in which case
+ * no refcounts are taken.
  */
-static int parse_cgroupfs_options(char *data,
-				     struct cgroup_sb_opts *opts)
+static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 {
 	char *token, *o = data ?: "all";
 	unsigned long mask = (unsigned long)-1;
+	int i;
+	bool module_pin_failed = false;
 
 	BUG_ON(!mutex_is_locked(&cgroup_mutex));
 
@@ -1031,7 +1047,6 @@ static int parse_cgroupfs_options(char *data,
 			return -EINVAL;
 		if (!strcmp(token, "all")) {
 			/* Add all non-disabled subsystems */
-			int i;
 			opts->subsys_bits = 0;
 			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 				struct cgroup_subsys *ss = subsys[i];
@@ -1054,7 +1069,6 @@ static int parse_cgroupfs_options(char *data,
 			if (!opts->release_agent)
 				return -ENOMEM;
 		} else if (!strncmp(token, "name=", 5)) {
-			int i;
 			const char *name = token + 5;
 			/* Can't specify an empty name */
 			if (!strlen(name))
@@ -1078,7 +1092,6 @@ static int parse_cgroupfs_options(char *data,
 				return -ENOMEM;
 		} else {
 			struct cgroup_subsys *ss;
-			int i;
 			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 				ss = subsys[i];
 				if (ss == NULL)
@@ -1117,9 +1130,54 @@ static int parse_cgroupfs_options(char *data,
 	if (!opts->subsys_bits && !opts->name)
 		return -EINVAL;
 
+	/*
+	 * Grab references on all the modules we'll need, so the subsystems
+	 * don't dance around before rebind_subsystems attaches them. This may
+	 * take duplicate reference counts on a subsystem that's already used,
+	 * but rebind_subsystems handles this case.
+	 */
+	for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
+		unsigned long bit = 1UL << i;
+
+		if (!(bit & opts->subsys_bits))
+			continue;
+		if (!try_module_get(subsys[i]->module)) {
+			module_pin_failed = true;
+			break;
+		}
+	}
+	if (module_pin_failed) {
+		/*
+		 * oops, one of the modules was going away. this means that we
+		 * raced with a module_delete call, and to the user this is
+		 * essentially a "subsystem doesn't exist" case.
+		 */
+		for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) {
+			/* drop refcounts only on the ones we took */
+			unsigned long bit = 1UL << i;
+
+			if (!(bit & opts->subsys_bits))
+				continue;
+			module_put(subsys[i]->module);
+		}
+		return -ENOENT;
+	}
+
 	return 0;
 }
 
+static void drop_parsed_module_refcounts(unsigned long subsys_bits)
+{
+	int i;
+	for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
+		unsigned long bit = 1UL << i;
+
+		if (!(bit & subsys_bits))
+			continue;
+		module_put(subsys[i]->module);
+	}
+}
+
 static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 {
 	int ret = 0;
@@ -1136,21 +1194,19 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
 	if (ret)
 		goto out_unlock;
 
-	/* Don't allow flags to change at remount */
-	if (opts.flags != root->flags) {
-		ret = -EINVAL;
-		goto out_unlock;
-	}
-
-	/* Don't allow name to change at remount */
-	if (opts.name && strcmp(opts.name, root->name)) {
+	/* Don't allow flags or name to change at remount */
+	if (opts.flags != root->flags ||
+	    (opts.name && strcmp(opts.name, root->name))) {
 		ret = -EINVAL;
+		drop_parsed_module_refcounts(opts.subsys_bits);
 		goto out_unlock;
 	}
 
 	ret = rebind_subsystems(root, opts.subsys_bits);
-	if (ret)
+	if (ret) {
+		drop_parsed_module_refcounts(opts.subsys_bits);
 		goto out_unlock;
+	}
 
 	/* (re)populate subsystem files */
 	cgroup_populate_dir(cgrp);
@@ -1349,7 +1405,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 	new_root = cgroup_root_from_opts(&opts);
 	if (IS_ERR(new_root)) {
 		ret = PTR_ERR(new_root);
-		goto out_err;
+		goto drop_modules;
 	}
 	opts.new_root = new_root;
 
@@ -1358,7 +1414,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 	if (IS_ERR(sb)) {
 		ret = PTR_ERR(sb);
 		cgroup_drop_root(opts.new_root);
-		goto out_err;
+		goto drop_modules;
 	}
 
 	root = sb->s_fs_info;
@@ -1414,6 +1470,11 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 			free_cg_links(&tmp_cg_links);
 			goto drop_new_super;
 		}
+		/*
+		 * There must be no failure case after here, since rebinding
+		 * takes care of subsystems' refcounts, which are explicitly
+		 * dropped in the failure exit path.
+		 */
 
 		/* EBUSY should be the only error here */
 		BUG_ON(ret);
@@ -1452,6 +1513,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 		 * any) is not needed
 		 */
 		cgroup_drop_root(opts.new_root);
+		/* no subsys rebinding, so refcounts don't change */
+		drop_parsed_module_refcounts(opts.subsys_bits);
 	}
 
 	simple_set_mnt(mnt, sb);
@@ -1461,6 +1524,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
 
  drop_new_super:
 	deactivate_locked_super(sb);
+ drop_modules:
+	drop_parsed_module_refcounts(opts.subsys_bits);
  out_err:
 	kfree(opts.release_agent);
 	kfree(opts.name);
@@ -3422,19 +3487,71 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 	lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
 	ss->active = 1;
 
-	/*
-	 * pin the subsystem's module so it doesn't go away. this shouldn't
-	 * fail, since the module's initcall calls us.
-	 * TODO: with module unloading, move this elsewhere
-	 */
-	BUG_ON(!try_module_get(ss->module));
-
 	/* success! */
 	mutex_unlock(&cgroup_mutex);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(cgroup_load_subsys);
 
+/**
+ * cgroup_unload_subsys: unload a modular subsystem
+ * @ss: the subsystem to unload
+ *
+ * This function should be called in a modular subsystem's exitcall. When this
+ * function is invoked, the refcount on the subsystem's module will be 0, so
+ * the subsystem will not be attached to any hierarchy.
+ */
+void cgroup_unload_subsys(struct cgroup_subsys *ss)
+{
+	struct cg_cgroup_link *link;
+	struct hlist_head *hhead;
+
+	BUG_ON(ss->module == NULL);
+
+	/*
+	 * we shouldn't be called if the subsystem is in use, and the use of
+	 * try_module_get in parse_cgroupfs_options should ensure that it
+	 * doesn't start being used while we're killing it off.
+	 */
+	BUG_ON(ss->root != &rootnode);
+
+	mutex_lock(&cgroup_mutex);
+	/* deassign the subsys_id */
+	BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT);
+	subsys[ss->subsys_id] = NULL;
+
+	/* remove subsystem from rootnode's list of subsystems */
+	list_del(&ss->sibling);
+
+	/*
+	 * disentangle the css from all css_sets attached to the dummytop. as
+	 * in loading, we need to pay our respects to the hashtable gods.
+	 */
+	write_lock(&css_set_lock);
+	list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
+		struct css_set *cg = link->cg;
+
+		hlist_del(&cg->hlist);
+		BUG_ON(!cg->subsys[ss->subsys_id]);
+		cg->subsys[ss->subsys_id] = NULL;
+		hhead = css_set_hash(cg->subsys);
+		hlist_add_head(&cg->hlist, hhead);
+	}
+	write_unlock(&css_set_lock);
+
+	/*
+	 * remove subsystem's css from the dummytop and free it - need to free
+	 * before marking as null because ss->destroy needs the cgrp->subsys
+	 * pointer to find their state. note that this also takes care of
+	 * freeing the css_id.
+	 */
+	ss->destroy(ss, dummytop);
+	dummytop->subsys[ss->subsys_id] = NULL;
+
+	mutex_unlock(&cgroup_mutex);
+}
+EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
+
 /**
  * cgroup_init_early - cgroup initialization at system boot
  *
-- 
cgit v1.2.3


From 67523c48aa74d5637848edeccf285af1c60bf14a Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@andrew.cmu.edu>
Date: Wed, 10 Mar 2010 15:22:11 -0800
Subject: cgroups: blkio subsystem as module

Modify the Block I/O cgroup subsystem to be able to be built as a module.
As the CFQ disk scheduler optionally depends on blk-cgroup, config options
in block/Kconfig, block/Kconfig.iosched, and block/blk-cgroup.h are
enhanced to support the new module dependency.

Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 block/Kconfig             |  2 +-
 block/Kconfig.iosched     |  2 +-
 block/blk-cgroup.c        | 53 ++++++++++++++++++++++++++++++++++++-----------
 block/blk-cgroup.h        | 10 +++++++--
 include/linux/iocontext.h |  2 +-
 kernel/cgroup.c           |  9 ++++++++
 6 files changed, 61 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/block/Kconfig b/block/Kconfig
index e20fbde0875c..62a5921321cd 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -78,7 +78,7 @@ config BLK_DEV_INTEGRITY
 	Protection.  If in doubt, say N.
 
 config BLK_CGROUP
-	bool
+	tristate
 	depends on CGROUPS
 	default n
 	---help---
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index b71abfb0d726..fc71cf071fb2 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -23,6 +23,7 @@ config IOSCHED_DEADLINE
 
 config IOSCHED_CFQ
 	tristate "CFQ I/O scheduler"
+	select BLK_CGROUP if CFQ_GROUP_IOSCHED
 	default y
 	---help---
 	  The CFQ I/O scheduler tries to distribute bandwidth equally
@@ -35,7 +36,6 @@ config IOSCHED_CFQ
 config CFQ_GROUP_IOSCHED
 	bool "CFQ Group Scheduling support"
 	depends on IOSCHED_CFQ && CGROUPS
-	select BLK_CGROUP
 	default n
 	---help---
 	  Enable group IO scheduling in CFQ.
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c85d74cae200..4b686ad08eaa 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -23,6 +23,31 @@ static LIST_HEAD(blkio_list);
 struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
 EXPORT_SYMBOL_GPL(blkio_root_cgroup);
 
+static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
+						  struct cgroup *);
+static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
+			      struct task_struct *, bool);
+static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
+			   struct cgroup *, struct task_struct *, bool);
+static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
+static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
+
+struct cgroup_subsys blkio_subsys = {
+	.name = "blkio",
+	.create = blkiocg_create,
+	.can_attach = blkiocg_can_attach,
+	.attach = blkiocg_attach,
+	.destroy = blkiocg_destroy,
+	.populate = blkiocg_populate,
+#ifdef CONFIG_BLK_CGROUP
+	/* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */
+	.subsys_id = blkio_subsys_id,
+#endif
+	.use_id = 1,
+	.module = THIS_MODULE,
+};
+EXPORT_SYMBOL_GPL(blkio_subsys);
+
 struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
 {
 	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
@@ -253,7 +278,8 @@ remove_entry:
 done:
 	free_css_id(&blkio_subsys, &blkcg->css);
 	rcu_read_unlock();
-	kfree(blkcg);
+	if (blkcg != &blkio_root_cgroup)
+		kfree(blkcg);
 }
 
 static struct cgroup_subsys_state *
@@ -319,17 +345,6 @@ static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
 	task_unlock(tsk);
 }
 
-struct cgroup_subsys blkio_subsys = {
-	.name = "blkio",
-	.create = blkiocg_create,
-	.can_attach = blkiocg_can_attach,
-	.attach = blkiocg_attach,
-	.destroy = blkiocg_destroy,
-	.populate = blkiocg_populate,
-	.subsys_id = blkio_subsys_id,
-	.use_id = 1,
-};
-
 void blkio_policy_register(struct blkio_policy_type *blkiop)
 {
 	spin_lock(&blkio_list_lock);
@@ -345,3 +360,17 @@ void blkio_policy_unregister(struct blkio_policy_type *blkiop)
 	spin_unlock(&blkio_list_lock);
 }
 EXPORT_SYMBOL_GPL(blkio_policy_unregister);
+
+static int __init init_cgroup_blkio(void)
+{
+	return cgroup_load_subsys(&blkio_subsys);
+}
+
+static void __exit exit_cgroup_blkio(void)
+{
+	cgroup_unload_subsys(&blkio_subsys);
+}
+
+module_init(init_cgroup_blkio);
+module_exit(exit_cgroup_blkio);
+MODULE_LICENSE("GPL");
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 84bf745fa775..8ccc20464dae 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -15,7 +15,13 @@
 
 #include <linux/cgroup.h>
 
-#ifdef CONFIG_BLK_CGROUP
+#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
+
+#ifndef CONFIG_BLK_CGROUP
+/* When blk-cgroup is a module, its subsys_id isn't a compile-time constant */
+extern struct cgroup_subsys blkio_subsys;
+#define blkio_subsys_id blkio_subsys.subsys_id
+#endif
 
 struct blkio_cgroup {
 	struct cgroup_subsys_state css;
@@ -91,7 +97,7 @@ static inline void blkiocg_update_blkio_group_dequeue_stats(
 			struct blkio_group *blkg, unsigned long dequeue) {}
 #endif
 
-#ifdef CONFIG_BLK_CGROUP
+#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
 extern struct blkio_cgroup blkio_root_cgroup;
 extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
 extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 1195a806fe0c..a0bb301afac0 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -42,7 +42,7 @@ struct io_context {
 	unsigned short ioprio;
 	unsigned short ioprio_changed;
 
-#ifdef CONFIG_BLK_CGROUP
+#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
 	unsigned short cgroup_changed;
 #endif
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index aa889c96cc74..521591dbab2f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -705,6 +705,7 @@ void cgroup_lock(void)
 {
 	mutex_lock(&cgroup_mutex);
 }
+EXPORT_SYMBOL_GPL(cgroup_lock);
 
 /**
  * cgroup_unlock - release lock on cgroup changes
@@ -715,6 +716,7 @@ void cgroup_unlock(void)
 {
 	mutex_unlock(&cgroup_mutex);
 }
+EXPORT_SYMBOL_GPL(cgroup_unlock);
 
 /*
  * A couple of forward declarations required, due to cyclic reference loop:
@@ -1639,6 +1641,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 	memmove(buf, start, buf + buflen - start);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(cgroup_path);
 
 /**
  * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
@@ -1805,6 +1808,7 @@ bool cgroup_lock_live_group(struct cgroup *cgrp)
 	}
 	return true;
 }
+EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
 
 static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
 				      const char *buffer)
@@ -4082,6 +4086,7 @@ void __css_put(struct cgroup_subsys_state *css, int count)
 	rcu_read_unlock();
 	WARN_ON_ONCE(val < 1);
 }
+EXPORT_SYMBOL_GPL(__css_put);
 
 /*
  * Notify userspace when a cgroup is released, by running the
@@ -4197,6 +4202,7 @@ unsigned short css_id(struct cgroup_subsys_state *css)
 		return cssid->id;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(css_id);
 
 unsigned short css_depth(struct cgroup_subsys_state *css)
 {
@@ -4206,6 +4212,7 @@ unsigned short css_depth(struct cgroup_subsys_state *css)
 		return cssid->depth;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(css_depth);
 
 bool css_is_ancestor(struct cgroup_subsys_state *child,
 		    const struct cgroup_subsys_state *root)
@@ -4242,6 +4249,7 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
 	spin_unlock(&ss->id_lock);
 	call_rcu(&id->rcu_head, __free_css_id_cb);
 }
+EXPORT_SYMBOL_GPL(free_css_id);
 
 /*
  * This is called by init or create(). Then, calls to this function are
@@ -4358,6 +4366,7 @@ struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
 
 	return rcu_dereference(cssid->css);
 }
+EXPORT_SYMBOL_GPL(css_lookup);
 
 /**
  * css_get_next - lookup next cgroup under specified hierarchy.
-- 
cgit v1.2.3


From 024914477e15ef8b17f271ec47f1bb8a589f0806 Mon Sep 17 00:00:00 2001
From: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Date: Wed, 10 Mar 2010 15:22:17 -0800
Subject: memcg: move charges of anonymous swap

This patch is another core part of this move-charge-at-task-migration
feature.  It enables moving charges of anonymous swaps.

To move the charge of swap, we need to exchange swap_cgroup's record.

In current implementation, swap_cgroup's record is protected by:

  - page lock: if the entry is on swap cache.
  - swap_lock: if the entry is not on swap cache.

This works well in usual swap-in/out activity.

But this behavior make the feature of moving swap charge check many
conditions to exchange swap_cgroup's record safely.

So I changed modification of swap_cgroup's recored(swap_cgroup_record())
to use xchg, and define a new function to cmpxchg swap_cgroup's record.

This patch also enables moving charge of non pte_present but not uncharged
swap caches, which can be exist on swap-out path, by getting the target
pages via find_get_page() as do_mincore() does.

[kosaki.motohiro@jp.fujitsu.com: fix ia64 build]
[akpm@linux-foundation.org: fix typos]
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/memory.txt |   2 +
 include/linux/page_cgroup.h      |   2 +
 include/linux/swap.h             |   9 ++
 mm/memcontrol.c                  | 183 +++++++++++++++++++++++++++++++--------
 mm/page_cgroup.c                 |  34 +++++++-
 mm/swapfile.c                    |  31 +++++++
 6 files changed, 223 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index e726fb0df719..1f59a1a38bd9 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -420,6 +420,8 @@ NOTE2: It is recommended to set the soft limit always below the hard limit,
 
 Users can move charges associated with a task along with task migration, that
 is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
+This feature is not supported in !CONFIG_MMU environments because of lack of
+page tables.
 
 8.1 Interface
 
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index b0e4eb126236..30b08136fdf3 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -118,6 +118,8 @@ static inline void __init page_cgroup_init_flatmem(void)
 #include <linux/swap.h>
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
+					unsigned short old, unsigned short new);
 extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
 extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
 extern int swap_cgroup_swapon(int type, unsigned long max_pages);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a2602a8207a6..1f59d9340c4d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -355,6 +355,7 @@ static inline void disable_swap_token(void)
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 extern void
 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
+extern int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep);
 #else
 static inline void
 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
@@ -485,6 +486,14 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
 {
 }
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+static inline int
+mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
+{
+	return 0;
+}
+#endif
+
 #endif /* CONFIG_SWAP */
 #endif /* __KERNEL__*/
 #endif /* _LINUX_SWAP_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 589084f00b70..e883198baf81 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -33,6 +33,7 @@
 #include <linux/rbtree.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
+#include <linux/swapops.h>
 #include <linux/spinlock.h>
 #include <linux/fs.h>
 #include <linux/seq_file.h>
@@ -2270,6 +2271,54 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
 	}
 	rcu_read_unlock();
 }
+
+/**
+ * mem_cgroup_move_swap_account - move swap charge and swap_cgroup's record.
+ * @entry: swap entry to be moved
+ * @from:  mem_cgroup which the entry is moved from
+ * @to:  mem_cgroup which the entry is moved to
+ *
+ * It succeeds only when the swap_cgroup's record for this entry is the same
+ * as the mem_cgroup's id of @from.
+ *
+ * Returns 0 on success, -EINVAL on failure.
+ *
+ * The caller must have charged to @to, IOW, called res_counter_charge() about
+ * both res and memsw, and called css_get().
+ */
+static int mem_cgroup_move_swap_account(swp_entry_t entry,
+				struct mem_cgroup *from, struct mem_cgroup *to)
+{
+	unsigned short old_id, new_id;
+
+	old_id = css_id(&from->css);
+	new_id = css_id(&to->css);
+
+	if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
+		if (!mem_cgroup_is_root(from))
+			res_counter_uncharge(&from->memsw, PAGE_SIZE);
+		mem_cgroup_swap_statistics(from, false);
+		mem_cgroup_put(from);
+		/*
+		 * we charged both to->res and to->memsw, so we should uncharge
+		 * to->res.
+		 */
+		if (!mem_cgroup_is_root(to))
+			res_counter_uncharge(&to->res, PAGE_SIZE);
+		mem_cgroup_swap_statistics(to, true);
+		mem_cgroup_get(to);
+		css_put(&to->css);
+
+		return 0;
+	}
+	return -EINVAL;
+}
+#else
+static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
+				struct mem_cgroup *from, struct mem_cgroup *to)
+{
+	return -EINVAL;
+}
 #endif
 
 /*
@@ -2949,6 +2998,7 @@ static u64 mem_cgroup_move_charge_read(struct cgroup *cgrp,
 	return mem_cgroup_from_cont(cgrp)->move_charge_at_immigrate;
 }
 
+#ifdef CONFIG_MMU
 static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
 					struct cftype *cft, u64 val)
 {
@@ -2967,6 +3017,13 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
 
 	return 0;
 }
+#else
+static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
+					struct cftype *cft, u64 val)
+{
+	return -ENOSYS;
+}
+#endif
 
 
 /* For read statistics */
@@ -3489,6 +3546,7 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss,
 	return ret;
 }
 
+#ifdef CONFIG_MMU
 /* Handlers for move charge at task migration. */
 #define PRECHARGE_COUNT_AT_ONCE	256
 static int mem_cgroup_do_precharge(unsigned long count)
@@ -3544,77 +3602,124 @@ one_by_one:
 	}
 	return ret;
 }
+#else	/* !CONFIG_MMU */
+static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
+				struct cgroup *cgroup,
+				struct task_struct *p,
+				bool threadgroup)
+{
+	return 0;
+}
+static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
+				struct cgroup *cgroup,
+				struct task_struct *p,
+				bool threadgroup)
+{
+}
+static void mem_cgroup_move_task(struct cgroup_subsys *ss,
+				struct cgroup *cont,
+				struct cgroup *old_cont,
+				struct task_struct *p,
+				bool threadgroup)
+{
+}
+#endif
 
 /**
  * is_target_pte_for_mc - check a pte whether it is valid for move charge
  * @vma: the vma the pte to be checked belongs
  * @addr: the address corresponding to the pte to be checked
  * @ptent: the pte to be checked
- * @target: the pointer the target page will be stored(can be NULL)
+ * @target: the pointer the target page or swap ent will be stored(can be NULL)
  *
  * Returns
  *   0(MC_TARGET_NONE): if the pte is not a target for move charge.
  *   1(MC_TARGET_PAGE): if the page corresponding to this pte is a target for
  *     move charge. if @target is not NULL, the page is stored in target->page
  *     with extra refcnt got(Callers should handle it).
+ *   2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
+ *     target for charge migration. if @target is not NULL, the entry is stored
+ *     in target->ent.
  *
  * Called with pte lock held.
  */
-/* We add a new member later. */
 union mc_target {
 	struct page	*page;
+	swp_entry_t	ent;
 };
 
-/* We add a new type later. */
 enum mc_target_type {
 	MC_TARGET_NONE,	/* not used */
 	MC_TARGET_PAGE,
+	MC_TARGET_SWAP,
 };
 
 static int is_target_pte_for_mc(struct vm_area_struct *vma,
 		unsigned long addr, pte_t ptent, union mc_target *target)
 {
-	struct page *page;
+	struct page *page = NULL;
 	struct page_cgroup *pc;
 	int ret = 0;
+	swp_entry_t ent = { .val = 0 };
+	int usage_count = 0;
 	bool move_anon = test_bit(MOVE_CHARGE_TYPE_ANON,
 					&mc.to->move_charge_at_immigrate);
 
-	if (!pte_present(ptent))
-		return 0;
-
-	page = vm_normal_page(vma, addr, ptent);
-	if (!page || !page_mapped(page))
-		return 0;
-	/*
-	 * TODO: We don't move charges of file(including shmem/tmpfs) pages for
-	 * now.
-	 */
-	if (!move_anon || !PageAnon(page))
-		return 0;
-	/*
-	 * TODO: We don't move charges of shared(used by multiple processes)
-	 * pages for now.
-	 */
-	if (page_mapcount(page) > 1)
-		return 0;
-	if (!get_page_unless_zero(page))
+	if (!pte_present(ptent)) {
+		/* TODO: handle swap of shmes/tmpfs */
+		if (pte_none(ptent) || pte_file(ptent))
+			return 0;
+		else if (is_swap_pte(ptent)) {
+			ent = pte_to_swp_entry(ptent);
+			if (!move_anon || non_swap_entry(ent))
+				return 0;
+			usage_count = mem_cgroup_count_swap_user(ent, &page);
+		}
+	} else {
+		page = vm_normal_page(vma, addr, ptent);
+		if (!page || !page_mapped(page))
+			return 0;
+		/*
+		 * TODO: We don't move charges of file(including shmem/tmpfs)
+		 * pages for now.
+		 */
+		if (!move_anon || !PageAnon(page))
+			return 0;
+		if (!get_page_unless_zero(page))
+			return 0;
+		usage_count = page_mapcount(page);
+	}
+	if (usage_count > 1) {
+		/*
+		 * TODO: We don't move charges of shared(used by multiple
+		 * processes) pages for now.
+		 */
+		if (page)
+			put_page(page);
 		return 0;
-
-	pc = lookup_page_cgroup(page);
-	/*
-	 * Do only loose check w/o page_cgroup lock. mem_cgroup_move_account()
-	 * checks the pc is valid or not under the lock.
-	 */
-	if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
-		ret = MC_TARGET_PAGE;
+	}
+	if (page) {
+		pc = lookup_page_cgroup(page);
+		/*
+		 * Do only loose check w/o page_cgroup lock.
+		 * mem_cgroup_move_account() checks the pc is valid or not under
+		 * the lock.
+		 */
+		if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
+			ret = MC_TARGET_PAGE;
+			if (target)
+				target->page = page;
+		}
+		if (!ret || !target)
+			put_page(page);
+	}
+	/* throught */
+	if (ent.val && do_swap_account && !ret &&
+			css_id(&mc.from->css) == lookup_swap_cgroup(ent)) {
+		ret = MC_TARGET_SWAP;
 		if (target)
-			target->page = page;
+			target->ent = ent;
 	}
-
-	if (!ret || !target)
-		put_page(page);
-
 	return ret;
 }
 
@@ -3754,6 +3859,7 @@ retry:
 		int type;
 		struct page *page;
 		struct page_cgroup *pc;
+		swp_entry_t ent;
 
 		if (!mc.precharge)
 			break;
@@ -3775,6 +3881,11 @@ retry:
 put:			/* is_target_pte_for_mc() gets the page */
 			put_page(page);
 			break;
+		case MC_TARGET_SWAP:
+			ent = target.ent;
+			if (!mem_cgroup_move_swap_account(ent, mc.from, mc.to))
+				mc.precharge--;
+			break;
 		default:
 			break;
 		}
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 3d535d594826..3dd88539a0e6 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -334,6 +334,37 @@ not_enough_page:
 	return -ENOMEM;
 }
 
+/**
+ * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
+ * @end: swap entry to be cmpxchged
+ * @old: old id
+ * @new: new id
+ *
+ * Returns old id at success, 0 at failure.
+ * (There is no mem_cgroup useing 0 as its id)
+ */
+unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
+					unsigned short old, unsigned short new)
+{
+	int type = swp_type(ent);
+	unsigned long offset = swp_offset(ent);
+	unsigned long idx = offset / SC_PER_PAGE;
+	unsigned long pos = offset & SC_POS_MASK;
+	struct swap_cgroup_ctrl *ctrl;
+	struct page *mappage;
+	struct swap_cgroup *sc;
+
+	ctrl = &swap_cgroup_ctrl[type];
+
+	mappage = ctrl->map[idx];
+	sc = page_address(mappage);
+	sc += pos;
+	if (cmpxchg(&sc->id, old, new) == old)
+		return old;
+	else
+		return 0;
+}
+
 /**
  * swap_cgroup_record - record mem_cgroup for this swp_entry.
  * @ent: swap entry to be recorded into
@@ -358,8 +389,7 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
 	mappage = ctrl->map[idx];
 	sc = page_address(mappage);
 	sc += pos;
-	old = sc->id;
-	sc->id = id;
+	old = xchg(&sc->id, id);
 
 	return old;
 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 84374d8cf814..6cd0a8f90dc7 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -723,6 +723,37 @@ int free_swap_and_cache(swp_entry_t entry)
 	return p != NULL;
 }
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+/**
+ * mem_cgroup_count_swap_user - count the user of a swap entry
+ * @ent: the swap entry to be checked
+ * @pagep: the pointer for the swap cache page of the entry to be stored
+ *
+ * Returns the number of the user of the swap entry. The number is valid only
+ * for swaps of anonymous pages.
+ * If the entry is found on swap cache, the page is stored to pagep with
+ * refcount of it being incremented.
+ */
+int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
+{
+	struct page *page;
+	struct swap_info_struct *p;
+	int count = 0;
+
+	page = find_get_page(&swapper_space, ent.val);
+	if (page)
+		count += page_mapcount(page);
+	p = swap_info_get(ent);
+	if (p) {
+		count += swap_count(p->swap_map[swp_offset(ent)]);
+		spin_unlock(&swap_lock);
+	}
+
+	*pagep = page;
+	return count;
+}
+#endif
+
 #ifdef CONFIG_HIBERNATION
 /*
  * Find the swap type that corresponds to given device (if any).
-- 
cgit v1.2.3


From 0dea116876eefc9c7ca9c5d74fe665481e499fa3 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Wed, 10 Mar 2010 15:22:20 -0800
Subject: cgroup: implement eventfd-based generic API for notifications

This patchset introduces eventfd-based API for notifications in cgroups
and implements memory notifications on top of it.

It uses statistics in memory controler to track memory usage.

Output of time(1) on building kernel on tmpfs:

Root cgroup before changes:
	make -j2  506.37 user 60.93s system 193% cpu 4:52.77 total
Non-root cgroup before changes:
	make -j2  507.14 user 62.66s system 193% cpu 4:54.74 total
Root cgroup after changes (0 thresholds):
	make -j2  507.13 user 62.20s system 193% cpu 4:53.55 total
Non-root cgroup after changes (0 thresholds):
	make -j2  507.70 user 64.20s system 193% cpu 4:55.70 total
Root cgroup after changes (1 thresholds, never crossed):
	make -j2  506.97 user 62.20s system 193% cpu 4:53.90 total
Non-root cgroup after changes (1 thresholds, never crossed):
	make -j2  507.55 user 64.08s system 193% cpu 4:55.63 total

This patch:

Introduce the write-only file "cgroup.event_control" in every cgroup.

To register new notification handler you need:
- create an eventfd;
- open a control file to be monitored. Callbacks register_event() and
  unregister_event() must be defined for the control file;
- write "<event_fd> <control_fd> <args>" to cgroup.event_control.
  Interpretation of args is defined by control file implementation;

eventfd will be woken up by control file implementation or when the
cgroup is removed.

To unregister notification handler just close eventfd.

If you need notification functionality for a control file you have to
implement callbacks register_event() and unregister_event() in the
struct cftype.

[kamezawa.hiroyu@jp.fujitsu.com: Kconfig fix]
Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Dan Malek <dan@embeddedalley.com>
Cc: Vladislav Buzov <vbuzov@embeddedalley.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Alexander Shishkin <virtuoso@slind.org>
Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/cgroups.txt |  20 ++++
 include/linux/cgroup.h            |  24 ++++
 init/Kconfig                      |   1 +
 kernel/cgroup.c                   | 228 +++++++++++++++++++++++++++++++++++++-
 4 files changed, 272 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index c0358c30c64f..fd588ff0e296 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -23,6 +23,7 @@ CONTENTS:
   2.1 Basic Usage
   2.2 Attaching processes
   2.3 Mounting hierarchies by name
+  2.4 Notification API
 3. Kernel API
   3.1 Overview
   3.2 Synchronization
@@ -435,6 +436,25 @@ you give a subsystem a name.
 The name of the subsystem appears as part of the hierarchy description
 in /proc/mounts and /proc/<pid>/cgroups.
 
+2.4 Notification API
+--------------------
+
+There is mechanism which allows to get notifications about changing
+status of a cgroup.
+
+To register new notification handler you need:
+ - create a file descriptor for event notification using eventfd(2);
+ - open a control file to be monitored (e.g. memory.usage_in_bytes);
+ - write "<event_fd> <control_fd> <args>" to cgroup.event_control.
+   Interpretation of args is defined by control file implementation;
+
+eventfd will be woken up by control file implementation or when the
+cgroup is removed.
+
+To unregister notification handler just close eventfd.
+
+NOTE: Support of notifications should be implemented for the control
+file. See documentation for the subsystem.
 
 3. Kernel API
 =============
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 2a59d3101e5d..b4f2201321cd 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -235,6 +235,10 @@ struct cgroup {
 
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
+
+	/* List of events which userspace want to recieve */
+	struct list_head event_list;
+	spinlock_t event_list_lock;
 };
 
 /*
@@ -378,6 +382,26 @@ struct cftype {
 	int (*trigger)(struct cgroup *cgrp, unsigned int event);
 
 	int (*release)(struct inode *inode, struct file *file);
+
+	/*
+	 * register_event() callback will be used to add new userspace
+	 * waiter for changes related to the cftype. Implement it if
+	 * you want to provide this functionality. Use eventfd_signal()
+	 * on eventfd to send notification to userspace.
+	 */
+	int (*register_event)(struct cgroup *cgrp, struct cftype *cft,
+			struct eventfd_ctx *eventfd, const char *args);
+	/*
+	 * unregister_event() callback will be called when userspace
+	 * closes the eventfd or on cgroup removing.
+	 * This callback must be implemented, if you want provide
+	 * notification functionality.
+	 *
+	 * Be careful. It can be called after destroy(), so you have
+	 * to keep all nesessary data, until all events are removed.
+	 */
+	int (*unregister_event)(struct cgroup *cgrp, struct cftype *cft,
+			struct eventfd_ctx *eventfd);
 };
 
 struct cgroup_scanner {
diff --git a/init/Kconfig b/init/Kconfig
index 089a230e5652..eb77e8ccde1c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -463,6 +463,7 @@ config HAVE_UNSTABLE_SCHED_CLOCK
 
 menuconfig CGROUPS
 	boolean "Control Group support"
+	depends on EVENTFD
 	help
 	  This option adds support for grouping sets of processes together, for
 	  use with process control subsystems such as Cpusets, CFS, memory
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 1bf4d6db54ab..ea94984a3895 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4,6 +4,10 @@
  *  Based originally on the cpuset system, extracted by Paul Menage
  *  Copyright (C) 2006 Google, Inc
  *
+ *  Notifications support
+ *  Copyright (C) 2009 Nokia Corporation
+ *  Author: Kirill A. Shutemov
+ *
  *  Copyright notices from the original cpuset code:
  *  --------------------------------------------------
  *  Copyright (C) 2003 BULL SA.
@@ -53,6 +57,8 @@
 #include <linux/pid_namespace.h>
 #include <linux/idr.h>
 #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
+#include <linux/eventfd.h>
+#include <linux/poll.h>
 
 #include <asm/atomic.h>
 
@@ -152,6 +158,35 @@ struct css_id {
 	unsigned short stack[0]; /* Array of Length (depth+1) */
 };
 
+/*
+ * cgroup_event represents events which userspace want to recieve.
+ */
+struct cgroup_event {
+	/*
+	 * Cgroup which the event belongs to.
+	 */
+	struct cgroup *cgrp;
+	/*
+	 * Control file which the event associated.
+	 */
+	struct cftype *cft;
+	/*
+	 * eventfd to signal userspace about the event.
+	 */
+	struct eventfd_ctx *eventfd;
+	/*
+	 * Each of these stored in a list by the cgroup.
+	 */
+	struct list_head list;
+	/*
+	 * All fields below needed to unregister event when
+	 * userspace closes eventfd.
+	 */
+	poll_table pt;
+	wait_queue_head_t *wqh;
+	wait_queue_t wait;
+	struct work_struct remove;
+};
 
 /* The list of hierarchy roots */
 
@@ -760,14 +795,28 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
 static int cgroup_call_pre_destroy(struct cgroup *cgrp)
 {
 	struct cgroup_subsys *ss;
+	struct cgroup_event *event, *tmp;
 	int ret = 0;
 
 	for_each_subsys(cgrp->root, ss)
 		if (ss->pre_destroy) {
 			ret = ss->pre_destroy(ss, cgrp);
 			if (ret)
-				break;
+				goto out;
 		}
+
+	/*
+	 * Unregister events and notify userspace.
+	 */
+	spin_lock(&cgrp->event_list_lock);
+	list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
+		list_del(&event->list);
+		eventfd_signal(event->eventfd, 1);
+		schedule_work(&event->remove);
+	}
+	spin_unlock(&cgrp->event_list_lock);
+
+out:
 	return ret;
 }
 
@@ -1239,6 +1288,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->release_list);
 	INIT_LIST_HEAD(&cgrp->pidlists);
 	mutex_init(&cgrp->pidlist_mutex);
+	INIT_LIST_HEAD(&cgrp->event_list);
+	spin_lock_init(&cgrp->event_list_lock);
 }
 
 static void init_cgroup_root(struct cgroupfs_root *root)
@@ -2077,6 +2128,16 @@ static const struct inode_operations cgroup_dir_inode_operations = {
 	.rename = cgroup_rename,
 };
 
+/*
+ * Check if a file is a control file
+ */
+static inline struct cftype *__file_cft(struct file *file)
+{
+	if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
+		return ERR_PTR(-EINVAL);
+	return __d_cft(file->f_dentry);
+}
+
 static int cgroup_create_file(struct dentry *dentry, mode_t mode,
 				struct super_block *sb)
 {
@@ -2930,6 +2991,166 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp,
 	return 0;
 }
 
+/*
+ * Unregister event and free resources.
+ *
+ * Gets called from workqueue.
+ */
+static void cgroup_event_remove(struct work_struct *work)
+{
+	struct cgroup_event *event = container_of(work, struct cgroup_event,
+			remove);
+	struct cgroup *cgrp = event->cgrp;
+
+	/* TODO: check return code */
+	event->cft->unregister_event(cgrp, event->cft, event->eventfd);
+
+	eventfd_ctx_put(event->eventfd);
+	remove_wait_queue(event->wqh, &event->wait);
+	kfree(event);
+}
+
+/*
+ * Gets called on POLLHUP on eventfd when user closes it.
+ *
+ * Called with wqh->lock held and interrupts disabled.
+ */
+static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
+		int sync, void *key)
+{
+	struct cgroup_event *event = container_of(wait,
+			struct cgroup_event, wait);
+	struct cgroup *cgrp = event->cgrp;
+	unsigned long flags = (unsigned long)key;
+
+	if (flags & POLLHUP) {
+		spin_lock(&cgrp->event_list_lock);
+		list_del(&event->list);
+		spin_unlock(&cgrp->event_list_lock);
+		/*
+		 * We are in atomic context, but cgroup_event_remove() may
+		 * sleep, so we have to call it in workqueue.
+		 */
+		schedule_work(&event->remove);
+	}
+
+	return 0;
+}
+
+static void cgroup_event_ptable_queue_proc(struct file *file,
+		wait_queue_head_t *wqh, poll_table *pt)
+{
+	struct cgroup_event *event = container_of(pt,
+			struct cgroup_event, pt);
+
+	event->wqh = wqh;
+	add_wait_queue(wqh, &event->wait);
+}
+
+/*
+ * Parse input and register new cgroup event handler.
+ *
+ * Input must be in format '<event_fd> <control_fd> <args>'.
+ * Interpretation of args is defined by control file implementation.
+ */
+static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
+				      const char *buffer)
+{
+	struct cgroup_event *event = NULL;
+	unsigned int efd, cfd;
+	struct file *efile = NULL;
+	struct file *cfile = NULL;
+	char *endp;
+	int ret;
+
+	efd = simple_strtoul(buffer, &endp, 10);
+	if (*endp != ' ')
+		return -EINVAL;
+	buffer = endp + 1;
+
+	cfd = simple_strtoul(buffer, &endp, 10);
+	if ((*endp != ' ') && (*endp != '\0'))
+		return -EINVAL;
+	buffer = endp + 1;
+
+	event = kzalloc(sizeof(*event), GFP_KERNEL);
+	if (!event)
+		return -ENOMEM;
+	event->cgrp = cgrp;
+	INIT_LIST_HEAD(&event->list);
+	init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
+	init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
+	INIT_WORK(&event->remove, cgroup_event_remove);
+
+	efile = eventfd_fget(efd);
+	if (IS_ERR(efile)) {
+		ret = PTR_ERR(efile);
+		goto fail;
+	}
+
+	event->eventfd = eventfd_ctx_fileget(efile);
+	if (IS_ERR(event->eventfd)) {
+		ret = PTR_ERR(event->eventfd);
+		goto fail;
+	}
+
+	cfile = fget(cfd);
+	if (!cfile) {
+		ret = -EBADF;
+		goto fail;
+	}
+
+	/* the process need read permission on control file */
+	ret = file_permission(cfile, MAY_READ);
+	if (ret < 0)
+		goto fail;
+
+	event->cft = __file_cft(cfile);
+	if (IS_ERR(event->cft)) {
+		ret = PTR_ERR(event->cft);
+		goto fail;
+	}
+
+	if (!event->cft->register_event || !event->cft->unregister_event) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	ret = event->cft->register_event(cgrp, event->cft,
+			event->eventfd, buffer);
+	if (ret)
+		goto fail;
+
+	if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
+		event->cft->unregister_event(cgrp, event->cft, event->eventfd);
+		ret = 0;
+		goto fail;
+	}
+
+	spin_lock(&cgrp->event_list_lock);
+	list_add(&event->list, &cgrp->event_list);
+	spin_unlock(&cgrp->event_list_lock);
+
+	fput(cfile);
+	fput(efile);
+
+	return 0;
+
+fail:
+	if (cfile)
+		fput(cfile);
+
+	if (event && event->eventfd && !IS_ERR(event->eventfd))
+		eventfd_ctx_put(event->eventfd);
+
+	if (!IS_ERR_OR_NULL(efile))
+		fput(efile);
+
+	kfree(event);
+
+	return ret;
+}
+
 /*
  * for the common functions, 'private' gives the type of file
  */
@@ -2955,6 +3176,11 @@ static struct cftype files[] = {
 		.read_u64 = cgroup_read_notify_on_release,
 		.write_u64 = cgroup_write_notify_on_release,
 	},
+	{
+		.name = CGROUP_FILE_GENERIC_PREFIX "event_control",
+		.write_string = cgroup_write_event_control,
+		.mode = S_IWUGO,
+	},
 };
 
 static struct cftype cft_release_agent = {
-- 
cgit v1.2.3


From a0a4db548edcce067c1201ef25cf2bc29f32dca4 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Wed, 10 Mar 2010 15:22:34 -0800
Subject: cgroups: remove events before destroying subsystem state objects

Events should be removed after rmdir of cgroup directory, but before
destroying subsystem state objects.  Let's take reference to cgroup
directory dentry to do that.

Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hioryu@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Dan Malek <dan@embeddedalley.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 3 ---
 kernel/cgroup.c        | 8 ++++++++
 mm/memcontrol.c        | 9 ---------
 3 files changed, 8 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b4f2201321cd..b8ad1ea99586 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -396,9 +396,6 @@ struct cftype {
 	 * closes the eventfd or on cgroup removing.
 	 * This callback must be implemented, if you want provide
 	 * notification functionality.
-	 *
-	 * Be careful. It can be called after destroy(), so you have
-	 * to keep all nesessary data, until all events are removed.
 	 */
 	int (*unregister_event)(struct cgroup *cgrp, struct cftype *cft,
 			struct eventfd_ctx *eventfd);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 87441fc75663..ef909a329750 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2994,6 +2994,7 @@ static void cgroup_event_remove(struct work_struct *work)
 
 	eventfd_ctx_put(event->eventfd);
 	kfree(event);
+	dput(cgrp->dentry);
 }
 
 /*
@@ -3114,6 +3115,13 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
 		goto fail;
 	}
 
+	/*
+	 * Events should be removed after rmdir of cgroup directory, but before
+	 * destroying subsystem state objects. Let's take reference to cgroup
+	 * directory dentry to do that.
+	 */
+	dget(cgrp->dentry);
+
 	spin_lock(&cgrp->event_list_lock);
 	list_add(&event->list, &cgrp->event_list);
 	spin_unlock(&cgrp->event_list_lock);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f9ae4b4c36eb..f7b910fc14fb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3361,12 +3361,6 @@ static int mem_cgroup_register_event(struct cgroup *cgrp, struct cftype *cft,
 		}
 	}
 
-	/*
-	 * We need to increment refcnt to be sure that all thresholds
-	 * will be unregistered before calling __mem_cgroup_free()
-	 */
-	mem_cgroup_get(memcg);
-
 	if (type == _MEM)
 		rcu_assign_pointer(memcg->thresholds, thresholds_new);
 	else
@@ -3460,9 +3454,6 @@ assign:
 	/* To be sure that nobody uses thresholds before freeing it */
 	synchronize_rcu();
 
-	for (i = 0; i < thresholds->size - size; i++)
-		mem_cgroup_put(memcg);
-
 	kfree(thresholds);
 unlock:
 	mutex_unlock(&memcg->thresholds_lock);
-- 
cgit v1.2.3


From 867578cbccb0893cc14fc29c670f7185809c90d6 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 10 Mar 2010 15:22:39 -0800
Subject: memcg: fix oom kill behavior

In current page-fault code,

	handle_mm_fault()
		-> ...
		-> mem_cgroup_charge()
		-> map page or handle error.
	-> check return code.

If page fault's return code is VM_FAULT_OOM, page_fault_out_of_memory() is
called.  But if it's caused by memcg, OOM should have been already
invoked.

Then, I added a patch: a636b327f731143ccc544b966cfd8de6cb6d72c6.  That
patch records last_oom_jiffies for memcg's sub-hierarchy and prevents
page_fault_out_of_memory from being invoked in near future.

But Nishimura-san reported that check by jiffies is not enough when the
system is terribly heavy.

This patch changes memcg's oom logic as.
 * If memcg causes OOM-kill, continue to retry.
 * remove jiffies check which is used now.
 * add memcg-oom-lock which works like perzone oom lock.
 * If current is killed(as a process), bypass charge.

Something more sophisticated can be added but this pactch does
fundamental things.
TODO:
 - add oom notifier
 - add permemcg disable-oom-kill flag and freezer at oom.
 - more chances for wake up oom waiter (when changing memory limit etc..)

Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Tested-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |   6 --
 mm/memcontrol.c            | 134 ++++++++++++++++++++++++++++++++++++---------
 mm/oom_kill.c              |   8 ---
 3 files changed, 107 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1f9b119f4ace..44301c6affa8 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -124,7 +124,6 @@ static inline bool mem_cgroup_disabled(void)
 	return false;
 }
 
-extern bool mem_cgroup_oom_called(struct task_struct *task);
 void mem_cgroup_update_file_mapped(struct page *page, int val);
 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 						gfp_t gfp_mask, int nid,
@@ -258,11 +257,6 @@ static inline bool mem_cgroup_disabled(void)
 	return true;
 }
 
-static inline bool mem_cgroup_oom_called(struct task_struct *task)
-{
-	return false;
-}
-
 static inline int
 mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f7b910fc14fb..7973b5221fb8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -203,7 +203,7 @@ struct mem_cgroup {
 	 * Should the accounting and control be hierarchical, per subtree?
 	 */
 	bool use_hierarchy;
-	unsigned long	last_oom_jiffies;
+	atomic_t	oom_lock;
 	atomic_t	refcnt;
 
 	unsigned int	swappiness;
@@ -1246,32 +1246,102 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
 	return total;
 }
 
-bool mem_cgroup_oom_called(struct task_struct *task)
+static int mem_cgroup_oom_lock_cb(struct mem_cgroup *mem, void *data)
 {
-	bool ret = false;
-	struct mem_cgroup *mem;
-	struct mm_struct *mm;
+	int *val = (int *)data;
+	int x;
+	/*
+	 * Logically, we can stop scanning immediately when we find
+	 * a memcg is already locked. But condidering unlock ops and
+	 * creation/removal of memcg, scan-all is simple operation.
+	 */
+	x = atomic_inc_return(&mem->oom_lock);
+	*val = max(x, *val);
+	return 0;
+}
+/*
+ * Check OOM-Killer is already running under our hierarchy.
+ * If someone is running, return false.
+ */
+static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
+{
+	int lock_count = 0;
 
-	rcu_read_lock();
-	mm = task->mm;
-	if (!mm)
-		mm = &init_mm;
-	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
-	if (mem && time_before(jiffies, mem->last_oom_jiffies + HZ/10))
-		ret = true;
-	rcu_read_unlock();
-	return ret;
+	mem_cgroup_walk_tree(mem, &lock_count, mem_cgroup_oom_lock_cb);
+
+	if (lock_count == 1)
+		return true;
+	return false;
 }
 
-static int record_last_oom_cb(struct mem_cgroup *mem, void *data)
+static int mem_cgroup_oom_unlock_cb(struct mem_cgroup *mem, void *data)
 {
-	mem->last_oom_jiffies = jiffies;
+	/*
+	 * When a new child is created while the hierarchy is under oom,
+	 * mem_cgroup_oom_lock() may not be called. We have to use
+	 * atomic_add_unless() here.
+	 */
+	atomic_add_unless(&mem->oom_lock, -1, 0);
 	return 0;
 }
 
-static void record_last_oom(struct mem_cgroup *mem)
+static void mem_cgroup_oom_unlock(struct mem_cgroup *mem)
 {
-	mem_cgroup_walk_tree(mem, NULL, record_last_oom_cb);
+	mem_cgroup_walk_tree(mem, NULL,	mem_cgroup_oom_unlock_cb);
+}
+
+static DEFINE_MUTEX(memcg_oom_mutex);
+static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
+
+/*
+ * try to call OOM killer. returns false if we should exit memory-reclaim loop.
+ */
+bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask)
+{
+	DEFINE_WAIT(wait);
+	bool locked;
+
+	/* At first, try to OOM lock hierarchy under mem.*/
+	mutex_lock(&memcg_oom_mutex);
+	locked = mem_cgroup_oom_lock(mem);
+	/*
+	 * Even if signal_pending(), we can't quit charge() loop without
+	 * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL
+	 * under OOM is always welcomed, use TASK_KILLABLE here.
+	 */
+	if (!locked)
+		prepare_to_wait(&memcg_oom_waitq, &wait, TASK_KILLABLE);
+	mutex_unlock(&memcg_oom_mutex);
+
+	if (locked)
+		mem_cgroup_out_of_memory(mem, mask);
+	else {
+		schedule();
+		finish_wait(&memcg_oom_waitq, &wait);
+	}
+	mutex_lock(&memcg_oom_mutex);
+	mem_cgroup_oom_unlock(mem);
+	/*
+	 * Here, we use global waitq .....more fine grained waitq ?
+	 * Assume following hierarchy.
+	 * A/
+	 *   01
+	 *   02
+	 * assume OOM happens both in A and 01 at the same time. Tthey are
+	 * mutually exclusive by lock. (kill in 01 helps A.)
+	 * When we use per memcg waitq, we have to wake up waiters on A and 02
+	 * in addtion to waiters on 01. We use global waitq for avoiding mess.
+	 * It will not be a big problem.
+	 * (And a task may be moved to other groups while it's waiting for OOM.)
+	 */
+	wake_up_all(&memcg_oom_waitq);
+	mutex_unlock(&memcg_oom_mutex);
+
+	if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
+		return false;
+	/* Give chance to dying process */
+	schedule_timeout(1);
+	return true;
 }
 
 /*
@@ -1443,11 +1513,14 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 	struct res_counter *fail_res;
 	int csize = CHARGE_SIZE;
 
-	if (unlikely(test_thread_flag(TIF_MEMDIE))) {
-		/* Don't account this! */
-		*memcg = NULL;
-		return 0;
-	}
+	/*
+	 * Unlike gloval-vm's OOM-kill, we're not in memory shortage
+	 * in system level. So, allow to go ahead dying process in addition to
+	 * MEMDIE process.
+	 */
+	if (unlikely(test_thread_flag(TIF_MEMDIE)
+		     || fatal_signal_pending(current)))
+		goto bypass;
 
 	/*
 	 * We always charge the cgroup the mm_struct belongs to.
@@ -1560,11 +1633,15 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 		}
 
 		if (!nr_retries--) {
-			if (oom) {
-				mem_cgroup_out_of_memory(mem_over_limit, gfp_mask);
-				record_last_oom(mem_over_limit);
+			if (!oom)
+				goto nomem;
+			if (mem_cgroup_handle_oom(mem_over_limit, gfp_mask)) {
+				nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
+				continue;
 			}
-			goto nomem;
+			/* When we reach here, current task is dying .*/
+			css_put(&mem->css);
+			goto bypass;
 		}
 	}
 	if (csize > PAGE_SIZE)
@@ -1574,6 +1651,9 @@ done:
 nomem:
 	css_put(&mem->css);
 	return -ENOMEM;
+bypass:
+	*memcg = NULL;
+	return 0;
 }
 
 /*
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 71d10bf52dc8..9b223af6a147 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -603,13 +603,6 @@ void pagefault_out_of_memory(void)
 		/* Got some memory back in the last second. */
 		return;
 
-	/*
-	 * If this is from memcg, oom-killer is already invoked.
-	 * and not worth to go system-wide-oom.
-	 */
-	if (mem_cgroup_oom_called(current))
-		goto rest_and_return;
-
 	if (sysctl_panic_on_oom)
 		panic("out of memory from page fault. panic_on_oom is selected.\n");
 
@@ -621,7 +614,6 @@ void pagefault_out_of_memory(void)
 	 * Give "p" a good chance of killing itself before we
 	 * retry to allocate memory.
 	 */
-rest_and_return:
 	if (!test_thread_flag(TIF_MEMDIE))
 		schedule_timeout_uninterruptible(1);
 }
-- 
cgit v1.2.3


From 7baab93f9297da3e42a8cecfbf91d5f22f415500 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Wed, 10 Mar 2010 15:22:42 -0800
Subject: nodemask: fix the declaration of NODEMASK_ALLOC()

we can't declarate two variable at the same scope by NODEMASK_ALLOC().

This patch fixes it.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Paul Menage <menage@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nodemask.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index c4fa64b585ff..dba35e413371 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -483,7 +483,7 @@ static inline int num_node_state(enum node_states state)
 			type *name = kmalloc(sizeof(*name), gfp_flags)
 #define NODEMASK_FREE(m)			kfree(m)
 #else
-#define NODEMASK_ALLOC(type, name, gfp_flags)	type _name, *name = &_name
+#define NODEMASK_ALLOC(type, name, gfp_flags)	type _##name, *name = &_##name
 #define NODEMASK_FREE(m)			do {} while (0)
 #endif
 
-- 
cgit v1.2.3


From dacbe41f776db0a5a9aee1e41594f405c95778a5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 Mar 2010 15:22:46 -0800
Subject: ptrace: move user_enable_single_step & co prototypes to
 linux/ptrace.h

While in theory user_enable_single_step/user_disable_single_step/
user_enable_blockstep could also be provided as an inline or macro there's
no good reason to do so, and having the prototype in one places keeps code
size and confusion down.

Roland said:

  The original thought there was that user_enable_single_step() et al
  might well be only an instruction or three on a sane machine (as if we
  have any of those!), and since there is only one call site inlining
  would be beneficial.  But I agree that there is no strong reason to care
  about inlining it.

  As to the arch changes, there is only one thought I'd add to the
  record.  It was always my thinking that for an arch where
  PTRACE_SINGLESTEP does text-modifying breakpoint insertion,
  user_enable_single_step() should not be provided.  That is,
  arch_has_single_step()=>true means that there is an arch facility with
  "pure" semantics that does not have any unexpected side effects.
  Inserting a breakpoint might do very unexpected strange things in
  multi-threaded situations.  Aside from that, it is a peculiar side
  effect that user_{enable,disable}_single_step() should cause COW
  de-sharing of text pages and so forth.  For PTRACE_SINGLESTEP, all these
  peculiarities are the status quo ante for that arch, so having
  arch_ptrace() itself do those is one thing.  But for building other
  things in the future, it is nicer to have a uniform "pure" semantics
  that arch-independent code can expect.

  OTOH, all such arch issues are really up to the arch maintainer.  As
  of today, there is nothing but ptrace using user_enable_single_step() et
  al so it's a distinction without a practical difference.  If/when there
  are other facilities that use user_enable_single_step() and might care,
  the affected arch's can revisit the question when someone cares about
  the quality of the arch support for said new facility.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/frv/include/asm/ptrace.h     | 2 --
 arch/ia64/include/asm/ptrace.h    | 4 ----
 arch/m68k/include/asm/ptrace.h    | 8 --------
 arch/mn10300/include/asm/ptrace.h | 2 --
 arch/parisc/include/asm/ptrace.h  | 5 -----
 arch/powerpc/include/asm/ptrace.h | 7 -------
 arch/s390/include/asm/ptrace.h    | 3 ---
 arch/score/include/asm/ptrace.h   | 3 +--
 arch/sh/include/asm/ptrace.h      | 2 --
 arch/x86/include/asm/ptrace.h     | 7 -------
 include/linux/ptrace.h            | 5 +++++
 11 files changed, 6 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/arch/frv/include/asm/ptrace.h b/arch/frv/include/asm/ptrace.h
index a54b535c9e49..6bfad4cf1907 100644
--- a/arch/frv/include/asm/ptrace.h
+++ b/arch/frv/include/asm/ptrace.h
@@ -84,8 +84,6 @@ extern void show_regs(struct pt_regs *);
 #define task_pt_regs(task) ((task)->thread.frame0)
 
 #define arch_has_single_step()	(1)
-extern void user_enable_single_step(struct task_struct *);
-extern void user_disable_single_step(struct task_struct *);
 
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h
index 14055c636adf..7ae9c3f15a1c 100644
--- a/arch/ia64/include/asm/ptrace.h
+++ b/arch/ia64/include/asm/ptrace.h
@@ -319,11 +319,7 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs)
 	ptrace_attach_sync_user_rbs(child)
 
   #define arch_has_single_step()  (1)
-  extern void user_enable_single_step(struct task_struct *);
-  extern void user_disable_single_step(struct task_struct *);
-
   #define arch_has_block_step()   (1)
-  extern void user_enable_block_step(struct task_struct *);
 
 #endif /* !__KERNEL__ */
 
diff --git a/arch/m68k/include/asm/ptrace.h b/arch/m68k/include/asm/ptrace.h
index 21605c736f69..6e6e3ac1d913 100644
--- a/arch/m68k/include/asm/ptrace.h
+++ b/arch/m68k/include/asm/ptrace.h
@@ -87,18 +87,10 @@ struct switch_stack {
 #define profile_pc(regs) instruction_pointer(regs)
 extern void show_regs(struct pt_regs *);
 
-/*
- * These are defined as per linux/ptrace.h.
- */
-struct task_struct;
-
 #define arch_has_single_step()	(1)
-extern void user_enable_single_step(struct task_struct *);
-extern void user_disable_single_step(struct task_struct *);
 
 #ifdef CONFIG_MMU
 #define arch_has_block_step()	(1)
-extern void user_enable_block_step(struct task_struct *);
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/arch/mn10300/include/asm/ptrace.h b/arch/mn10300/include/asm/ptrace.h
index 1b0ba5e182b0..7c2e911052b6 100644
--- a/arch/mn10300/include/asm/ptrace.h
+++ b/arch/mn10300/include/asm/ptrace.h
@@ -99,8 +99,6 @@ struct task_struct;
 extern void show_regs(struct pt_regs *);
 
 #define arch_has_single_step()	(1)
-extern void user_enable_single_step(struct task_struct *);
-extern void user_disable_single_step(struct task_struct *);
 
 #endif  /*  !__ASSEMBLY  */
 
diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h
index aead40b16dd8..7f09533da771 100644
--- a/arch/parisc/include/asm/ptrace.h
+++ b/arch/parisc/include/asm/ptrace.h
@@ -47,13 +47,8 @@ struct pt_regs {
 
 #define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
 
-struct task_struct;
 #define arch_has_single_step()	1
-void user_disable_single_step(struct task_struct *task);
-void user_enable_single_step(struct task_struct *task);
-
 #define arch_has_block_step()	1
-void user_enable_block_step(struct task_struct *task);
 
 /* XXX should we use iaoq[1] or iaoq[0] ? */
 #define user_mode(regs)			(((regs)->iaoq[0] & 3) ? 1 : 0)
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index b45108126562..9e2d84c06b74 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -137,15 +137,8 @@ do {									      \
 } while (0)
 #endif /* __powerpc64__ */
 
-/*
- * These are defined as per linux/ptrace.h, which see.
- */
 #define arch_has_single_step()	(1)
 #define arch_has_block_step()	(!cpu_has_feature(CPU_FTR_601))
-extern void user_enable_single_step(struct task_struct *);
-extern void user_enable_block_step(struct task_struct *);
-extern void user_disable_single_step(struct task_struct *);
-
 #define ARCH_HAS_USER_SINGLE_STEP_INFO
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index dd2d913afcae..fef9b33cdd59 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -489,9 +489,6 @@ struct user_regs_struct
  * These are defined as per linux/ptrace.h, which see.
  */
 #define arch_has_single_step()	(1)
-struct task_struct;
-extern void user_enable_single_step(struct task_struct *);
-extern void user_disable_single_step(struct task_struct *);
 extern void show_regs(struct pt_regs * regs);
 
 #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
diff --git a/arch/score/include/asm/ptrace.h b/arch/score/include/asm/ptrace.h
index d40e691f23e2..e89dc9b1ef49 100644
--- a/arch/score/include/asm/ptrace.h
+++ b/arch/score/include/asm/ptrace.h
@@ -90,8 +90,7 @@ extern int read_tsk_short(struct task_struct *, unsigned long,
 			 unsigned short *);
 
 #define arch_has_single_step()	(1)
-extern void user_enable_single_step(struct task_struct *);
-extern void user_disable_single_step(struct task_struct *);
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_SCORE_PTRACE_H */
diff --git a/arch/sh/include/asm/ptrace.h b/arch/sh/include/asm/ptrace.h
index e11b14ea2c43..2168fde25611 100644
--- a/arch/sh/include/asm/ptrace.h
+++ b/arch/sh/include/asm/ptrace.h
@@ -123,8 +123,6 @@ extern void show_regs(struct pt_regs *);
 struct task_struct;
 
 #define arch_has_single_step()	(1)
-extern void user_enable_single_step(struct task_struct *);
-extern void user_disable_single_step(struct task_struct *);
 
 struct perf_event;
 struct perf_sample_data;
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 20102808b191..69a686a7dff0 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -274,14 +274,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
 		return 0;
 }
 
-/*
- * These are defined as per linux/ptrace.h, which see.
- */
 #define arch_has_single_step()	(1)
-extern void user_enable_single_step(struct task_struct *);
-extern void user_disable_single_step(struct task_struct *);
-
-extern void user_enable_block_step(struct task_struct *);
 #ifdef CONFIG_X86_DEBUGCTLMSR
 #define arch_has_block_step()	(1)
 #else
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index c5eab89da51e..e1fb60729979 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -264,6 +264,9 @@ static inline void user_enable_single_step(struct task_struct *task)
 static inline void user_disable_single_step(struct task_struct *task)
 {
 }
+#else
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
 #endif	/* arch_has_single_step */
 
 #ifndef arch_has_block_step
@@ -291,6 +294,8 @@ static inline void user_enable_block_step(struct task_struct *task)
 {
 	BUG();			/* This can never be called.  */
 }
+#else
+extern void user_enable_block_step(struct task_struct *);
 #endif	/* arch_has_block_step */
 
 #ifdef ARCH_HAS_USER_SINGLE_STEP_INFO
-- 
cgit v1.2.3


From 4dd66e69d472f0ba5355a2529364d0db9a18a02b Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Wed, 10 Mar 2010 15:23:02 -0800
Subject: copy_signal() cleanup: kill taskstats_tgid_init() and
 acct_init_pacct()

Kill unused functions taskstats_tgid_init() and acct_init_pacct() because
we don't use them anywhere after using kmem_cache_zalloc() in
copy_signal().

Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/acct.h           |  2 --
 include/linux/taskstats_kern.h |  7 -------
 kernel/acct.c                  | 10 ----------
 3 files changed, 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/acct.h b/include/linux/acct.h
index 882dc7248766..93f46096ad4c 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -123,14 +123,12 @@ struct pacct_struct;
 struct pid_namespace;
 extern void acct_auto_close_mnt(struct vfsmount *m);
 extern void acct_auto_close(struct super_block *sb);
-extern void acct_init_pacct(struct pacct_struct *pacct);
 extern void acct_collect(long exitcode, int group_dead);
 extern void acct_process(void);
 extern void acct_exit_ns(struct pid_namespace *);
 #else
 #define acct_auto_close_mnt(x)	do { } while (0)
 #define acct_auto_close(x)	do { } while (0)
-#define acct_init_pacct(x)	do { } while (0)
 #define acct_collect(x,y)	do { } while (0)
 #define acct_process()		do { } while (0)
 #define acct_exit_ns(ns)	do { } while (0)
diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
index 3398f4553269..b6523c1427ce 100644
--- a/include/linux/taskstats_kern.h
+++ b/include/linux/taskstats_kern.h
@@ -14,11 +14,6 @@
 extern struct kmem_cache *taskstats_cache;
 extern struct mutex taskstats_exit_mutex;
 
-static inline void taskstats_tgid_init(struct signal_struct *sig)
-{
-	sig->stats = NULL;
-}
-
 static inline void taskstats_tgid_free(struct signal_struct *sig)
 {
 	if (sig->stats)
@@ -30,8 +25,6 @@ extern void taskstats_init_early(void);
 #else
 static inline void taskstats_exit(struct task_struct *tsk, int group_dead)
 {}
-static inline void taskstats_tgid_init(struct signal_struct *sig)
-{}
 static inline void taskstats_tgid_free(struct signal_struct *sig)
 {}
 static inline void taskstats_init_early(void)
diff --git a/kernel/acct.c b/kernel/acct.c
index a6605ca921b6..24f8c81fc48d 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -587,16 +587,6 @@ out:
 	revert_creds(orig_cred);
 }
 
-/**
- * acct_init_pacct - initialize a new pacct_struct
- * @pacct: per-process accounting info struct to initialize
- */
-void acct_init_pacct(struct pacct_struct *pacct)
-{
-	memset(pacct, 0, sizeof(struct pacct_struct));
-	pacct->ac_utime = pacct->ac_stime = cputime_zero;
-}
-
 /**
  * acct_collect - collect accounting information into pacct_struct
  * @exitcode: task exit code
-- 
cgit v1.2.3


From 93c59907c6f247d09239135caecf294a106a2ae0 Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Wed, 10 Mar 2010 15:23:03 -0800
Subject: copy_signal() cleanup: clean thread_group_cputime_init()

Remove unneeded initializations in thread_group_cputime_init() and in
posix_cpu_timers_init_group().  They are useless after kmem_cache_zalloc()
was used in copy_signal().

Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  2 --
 kernel/fork.c         | 11 -----------
 2 files changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 46c6f8d5dc06..ca635c128482 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2391,9 +2391,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
 
 static inline void thread_group_cputime_init(struct signal_struct *sig)
 {
-	sig->cputimer.cputime = INIT_CPUTIME;
 	spin_lock_init(&sig->cputimer.lock);
-	sig->cputimer.running = 0;
 }
 
 static inline void thread_group_cputime_free(struct signal_struct *sig)
diff --git a/kernel/fork.c b/kernel/fork.c
index ce2666f84d85..1beb6c303c41 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -833,17 +833,6 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
 	/* Thread group counters. */
 	thread_group_cputime_init(sig);
 
-	/* Expiration times and increments. */
-	sig->it[CPUCLOCK_PROF].expires = cputime_zero;
-	sig->it[CPUCLOCK_PROF].incr = cputime_zero;
-	sig->it[CPUCLOCK_VIRT].expires = cputime_zero;
-	sig->it[CPUCLOCK_VIRT].incr = cputime_zero;
-
-	/* Cached expiration times. */
-	sig->cputime_expires.prof_exp = cputime_zero;
-	sig->cputime_expires.virt_exp = cputime_zero;
-	sig->cputime_expires.sched_exp = 0;
-
 	cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
 	if (cpu_limit != RLIM_INFINITY) {
 		sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
-- 
cgit v1.2.3


From 6edb6764409392836b44a61b06d94954efd6200f Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Wed, 10 Mar 2010 15:23:07 -0800
Subject: ipmi: remove ipmi_smi.h self-include

There is no need for linux/ipmi_smi.h to include itself.

Signed-off-by: Corey Minyard <minyard@acm.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipmi_smi.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index f7c9c75a2775..4b48318ac542 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -39,7 +39,6 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/platform_device.h>
-#include <linux/ipmi_smi.h>
 
 /* This files describes the interface for IPMI system management interface
    drivers to bind into the IPMI message handler. */
-- 
cgit v1.2.3


From 8467005da3ef6104b89a4cc5e9c9d9445b75565f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 10 Mar 2010 15:23:10 -0800
Subject: nsproxy: remove INIT_NSPROXY()

Remove INIT_NSPROXY(), use C99 initializer.
Remove INIT_IPC_NS(), INIT_NET_NS() while I'm at it.

Note: headers trim will be done later, now it's quite pointless because
results will be invalidated by merge window.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/init_task.h     |  8 --------
 include/linux/ipc_namespace.h |  5 -----
 include/net/net_namespace.h   |  5 -----
 kernel/nsproxy.c              | 13 ++++++++++++-
 4 files changed, 12 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index abec69b63d7e..b1ed1cd8e2a8 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -32,14 +32,6 @@ extern struct fs_struct init_fs;
 }
 
 extern struct nsproxy init_nsproxy;
-#define INIT_NSPROXY(nsproxy) {						\
-	.pid_ns		= &init_pid_ns,					\
-	.count		= ATOMIC_INIT(1),				\
-	.uts_ns		= &init_uts_ns,					\
-	.mnt_ns		= NULL,						\
-	INIT_NET_NS(net_ns)                                             \
-	INIT_IPC_NS(ipc_ns)						\
-}
 
 #define INIT_SIGHAND(sighand) {						\
 	.count		= ATOMIC_INIT(1), 				\
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 07baa38bce37..51952989ad42 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -62,11 +62,6 @@ extern struct ipc_namespace init_ipc_ns;
 extern atomic_t nr_ipc_ns;
 
 extern spinlock_t mq_lock;
-#if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
-#define INIT_IPC_NS(ns)		.ns		= &init_ipc_ns,
-#else
-#define INIT_IPC_NS(ns)
-#endif
 
 #ifdef CONFIG_SYSVIPC
 extern int register_ipcns_notifier(struct ipc_namespace *);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 82b7be4db89a..bd10a7908993 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -100,14 +100,9 @@ struct net {
 extern struct net init_net;
 
 #ifdef CONFIG_NET
-#define INIT_NET_NS(net_ns) .net_ns = &init_net,
-
 extern struct net *copy_net_ns(unsigned long flags, struct net *net_ns);
 
 #else /* CONFIG_NET */
-
-#define INIT_NET_NS(net_ns)
-
 static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns)
 {
 	/* There is nothing to copy so this is a noop */
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 09b4ff9711b2..2ab67233ee8f 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -24,7 +24,18 @@
 
 static struct kmem_cache *nsproxy_cachep;
 
-struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
+struct nsproxy init_nsproxy = {
+	.count	= ATOMIC_INIT(1),
+	.uts_ns	= &init_uts_ns,
+#if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
+	.ipc_ns	= &init_ipc_ns,
+#endif
+	.mnt_ns	= NULL,
+	.pid_ns	= &init_pid_ns,
+#ifdef CONFIG_NET
+	.net_ns	= &init_net,
+#endif
+};
 
 static inline struct nsproxy *create_nsproxy(void)
 {
-- 
cgit v1.2.3


From 5637f2df8d56b64697c1ee5c96cf0d6b650b30cb Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 10 Mar 2010 15:23:21 -0800
Subject: pci-dma: add include/linux/pci-dma.h

This patch adds include/linux/pci-dma.h that defines the pci_unmap state
API:

DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
DECLARE_PCI_UNMAP_LEN(LEN_NAME)
pci_unmap_addr(PTR, ADDR_NAME)
pci_unmap_addr_set(PTR, ADDR_NAME, VAL)
pci_unmap_len(PTR, LEN_NAME)
pci_unmap_len_set(PTR, LEN_NAME, VAL)

This enables us to remove lots of the duplication in architecture
implementations since there are only two ways to define the API.

If architectures define CONFIG_NEED_DMA_MAP_STATE, they get the real
definition of pci_unmap state API.  If not, they get the noop definition.

In the long term, it's better to replace the API with the generic device
model API such as DECLARE_DMA_UNMAP_ADDR.  We can map the API to the
generic one (like dma-mapping-compat.h does).  This patch also makes the
migration process easier.  We can remove this file after the migration.

It might be simpler to add the API to include/linux/pci.h but looks it's
already too large.  We'll remove pci-dma.h after finishing moving to the
generic device model.  So I put the API to a separate file.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Helge Deller <deller@gmx.de>
Cc: James Bottomley <James.Bottomley@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Chris Zankel <chris@zankel.net>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pci-dma.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 include/linux/pci-dma.h

(limited to 'include')

diff --git a/include/linux/pci-dma.h b/include/linux/pci-dma.h
new file mode 100644
index 000000000000..cfd63ab09abc
--- /dev/null
+++ b/include/linux/pci-dma.h
@@ -0,0 +1,20 @@
+#ifndef _LINUX_PCI_DMA_H
+#define _LINUX_PCI_DMA_H
+
+#ifdef CONFIG_NEED_DMA_MAP_STATE
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)        dma_addr_t ADDR_NAME;
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)          __u32 LEN_NAME;
+#define pci_unmap_addr(PTR, ADDR_NAME)           ((PTR)->ADDR_NAME)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)  (((PTR)->ADDR_NAME) = (VAL))
+#define pci_unmap_len(PTR, LEN_NAME)             ((PTR)->LEN_NAME)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL)    (((PTR)->LEN_NAME) = (VAL))
+#else
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
+#define pci_unmap_addr(PTR, ADDR_NAME)           (0)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)  do { } while (0)
+#define pci_unmap_len(PTR, LEN_NAME)             (0)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL)    do { } while (0)
+#endif
+
+#endif
-- 
cgit v1.2.3


From f41b177157718abe9a93868bb76e47d4a6f3681d Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 10 Mar 2010 15:23:30 -0800
Subject: pci-dma: add linux/pci-dma.h to linux/pci.h

All the architectures properly set NEED_DMA_MAP_STATE now so we can safely
add linux/pci-dma.h to linux/pci.h and remove the linux/pci-dma.h
inclusion in arch's asm/pci.h

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/pci.h   | 2 --
 arch/arm/include/asm/pci.h     | 2 --
 arch/cris/include/asm/pci.h    | 2 --
 arch/frv/include/asm/pci.h     | 2 --
 arch/ia64/include/asm/pci.h    | 2 --
 arch/mips/include/asm/pci.h    | 2 --
 arch/parisc/include/asm/pci.h  | 2 --
 arch/powerpc/include/asm/pci.h | 2 --
 arch/sh/include/asm/pci.h      | 2 --
 arch/sparc/include/asm/pci.h   | 2 --
 arch/x86/include/asm/pci.h     | 2 --
 arch/xtensa/include/asm/pci.h  | 2 --
 include/linux/pci.h            | 1 +
 13 files changed, 1 insertion(+), 24 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h
index 47659464ef45..e1846ba6aaba 100644
--- a/arch/alpha/include/asm/pci.h
+++ b/arch/alpha/include/asm/pci.h
@@ -119,8 +119,6 @@ pci_dma_mapping_error(struct pci_dev *pdev, dma_addr_t dma_addr)
 extern void pci_unmap_single(struct pci_dev *, dma_addr_t, size_t, int);
 extern void pci_unmap_page(struct pci_dev *, dma_addr_t, size_t, int);
 
-#include <linux/pci-dma.h>
-
 /* Map a set of buffers described by scatterlist in streaming mode for
    PCI DMA.  This is the scatter-gather version of the above
    pci_map_single interface.  Here the scatter gather list elements
diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h
index aea3450dd27a..47980118d0a5 100644
--- a/arch/arm/include/asm/pci.h
+++ b/arch/arm/include/asm/pci.h
@@ -30,8 +30,6 @@ static inline void pcibios_penalize_isa_irq(int irq, int active)
  */
 #define PCI_DMA_BUS_IS_PHYS     (1)
 
-#include <linux/pci-dma.h>
-
 #ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
diff --git a/arch/cris/include/asm/pci.h b/arch/cris/include/asm/pci.h
index 43cfa0ca0583..9f1cd56da28c 100644
--- a/arch/cris/include/asm/pci.h
+++ b/arch/cris/include/asm/pci.h
@@ -44,8 +44,6 @@ struct pci_dev;
  */
 #define PCI_DMA_BUS_IS_PHYS	(1)
 
-#include <linux/pci-dma.h>
-
 #define HAVE_PCI_MMAP
 extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 			       enum pci_mmap_state mmap_state, int write_combine);
diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h
index 05db569e52e8..0d5997909850 100644
--- a/arch/frv/include/asm/pci.h
+++ b/arch/frv/include/asm/pci.h
@@ -43,8 +43,6 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
 /* Return the index of the PCI controller for device PDEV. */
 #define pci_controller_num(PDEV)	(0)
 
-#include <linux/pci-dma.h>
-
 #ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 4adf22762277..73b5f785e70c 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -56,8 +56,6 @@ pcibios_penalize_isa_irq (int irq, int active)
 
 #include <asm-generic/pci-dma-compat.h>
 
-#include <linux/pci-dma.h>
-
 #ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index 9b196f8f7060..3beea1479b43 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -102,8 +102,6 @@ struct pci_dev;
  */
 extern unsigned int PCI_DMA_BUS_IS_PHYS;
 
-#include <linux/pci-dma.h>
-
 #ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h
index 632088d0aa67..2242a5c636c2 100644
--- a/arch/parisc/include/asm/pci.h
+++ b/arch/parisc/include/asm/pci.h
@@ -183,8 +183,6 @@ struct pci_bios_ops {
 	void (*fixup_bus)(struct pci_bus *bus);
 };
 
-#include <linux/pci-dma.h>
-
 /*
 ** Stuff declared in arch/parisc/kernel/pci.c
 */
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 4a9991ba249a..a20a9ad2258b 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -141,8 +141,6 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
 
 #define HAVE_PCI_LEGACY	1
 
-#include <linux/pci-dma.h>
-
 #ifdef CONFIG_PPC64
 
 /* The PCI address space does not equal the physical memory address
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index 991c2a424b24..8bd952fcf3ba 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -83,8 +83,6 @@ static inline void pcibios_penalize_isa_irq(int irq, int active)
  */
 #define PCI_DMA_BUS_IS_PHYS	(dma_ops->is_phys)
 
-#include <linux/pci-dma.h>
-
 #ifdef CONFIG_PCI
 /*
  * None of the SH PCI controllers support MWI, it is always treated as a
diff --git a/arch/sparc/include/asm/pci.h b/arch/sparc/include/asm/pci.h
index 5ce773eded12..d9c031f9910f 100644
--- a/arch/sparc/include/asm/pci.h
+++ b/arch/sparc/include/asm/pci.h
@@ -6,8 +6,6 @@
 #include <asm/pci_32.h>
 #endif
 
-#include <linux/pci-dma.h>
-
 #include <asm-generic/pci-dma-compat.h>
 
 #endif
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index e2655dc9b9cd..404a880ea325 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -108,8 +108,6 @@ void dma32_reserve_bootmem(void);
 /* implement the pci_ DMA API in terms of the generic device dma_ one */
 #include <asm-generic/pci-dma-compat.h>
 
-#include <linux/pci-dma.h>
-
 /* generic pci stuff */
 #include <asm-generic/pci.h>
 #define PCIBIOS_MAX_MEM_32 0xffffffff
diff --git a/arch/xtensa/include/asm/pci.h b/arch/xtensa/include/asm/pci.h
index f3f0cf3439ad..4609b0f15f1f 100644
--- a/arch/xtensa/include/asm/pci.h
+++ b/arch/xtensa/include/asm/pci.h
@@ -56,8 +56,6 @@ struct pci_dev;
 
 #define PCI_DMA_BUS_IS_PHYS	(1)
 
-#include <linux/pci-dma.h>
-
 /* Map a range of PCI memory or I/O space for a device into user space */
 int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma,
                         enum pci_mmap_state mmap_state, int write_combine);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index cd5809a5963e..7fd5c574efae 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -904,6 +904,7 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode,
 		      unsigned int command_bits, bool change_bridge);
 /* kmem_cache style wrapper around pci_alloc_consistent() */
 
+#include <linux/pci-dma.h>
 #include <linux/dmapool.h>
 
 #define	pci_pool dma_pool
-- 
cgit v1.2.3


From 0acedc124aca35f5cce9d4ee288dc372bf517e09 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 10 Mar 2010 15:23:31 -0800
Subject: dma-mapping.h: add the dma_unmap state API

Adds the following macros:

DECLARE_DMA_UNMAP_ADDR(ADDR_NAME)
DECLARE_DMA_UNMAP_LEN(LEN_NAME)
dma_unmap_addr(PTR, ADDR_NAME)
dma_unmap_addr_set(PTR, ADDR_NAME, VAL)
dma_unmap_len(PTR, LEN_NAME)
dma_unmap_len_set(PTR, LEN_NAME, VAL)

The API corresponds to the pci_unmap state API.  We'll move to this new
generic API from the PCI specific API in the long term.  As
include/asm-generic/pci-dma-compat.h does, the pci_unmap API simply calls
the new generic API for some time.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: James Bottomley <James.Bottomley@suse.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/DMA-API.txt   | 58 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dma-mapping.h | 16 +++++++++++++
 include/linux/pci-dma.h     | 21 +++++-----------
 3 files changed, 80 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 364a6cb444a5..29a48fbae779 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -472,6 +472,64 @@ void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr,
 	....
 
 
+Part Ie - Optimizing Unmap State Space Consumption
+--------------------------------
+
+On some platforms, dma_unmap_{single,page}() is simply a nop.
+Therefore, keeping track of the mapping address and length is a waste
+of space. Instead of filling your drivers up with ifdefs and the like
+to "work around" this (which would defeat the whole purpose of a
+portable API) the following facilities are provided.
+
+Actually, instead of describing the macros one by one, we'll
+transform some example code.
+
+1) Use DEFINE_DMA_UNMAP_{ADDR,LEN} in state saving structures.
+   Example, before:
+
+	struct ring_state {
+		struct sk_buff *skb;
+		dma_addr_t mapping;
+		__u32 len;
+	};
+
+   after:
+
+	struct ring_state {
+		struct sk_buff *skb;
+		DEFINE_DMA_UNMAP_ADDR(mapping);
+		DEFINE_DMA_UNMAP_LEN(len);
+	};
+
+2) Use dma_unmap_{addr,len}_set to set these values.
+   Example, before:
+
+	ringp->mapping = FOO;
+	ringp->len = BAR;
+
+   after:
+
+	dma_unmap_addr_set(ringp, mapping, FOO);
+	dma_unmap_len_set(ringp, len, BAR);
+
+3) Use dma_unmap_{addr,len} to access these values.
+   Example, before:
+
+	dma_unmap_single(dev, ringp->mapping, ringp->len,
+			 DMA_FROM_DEVICE);
+
+   after:
+
+	dma_unmap_single(dev,
+			 dma_unmap_addr(ringp, mapping),
+			 dma_unmap_len(ringp, len),
+			 DMA_FROM_DEVICE);
+
+It really should be self-explanatory.  We treat the ADDR and LEN
+separately, because it is possible for an implementation to only
+need the address in order to perform the unmap operation.
+
+
 Part II - Advanced dma_ usage
 -----------------------------
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 91b761846061..c5ac9d49cc06 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -232,4 +232,20 @@ struct dma_attrs;
 
 #endif /* CONFIG_HAVE_DMA_ATTRS */
 
+#ifdef CONFIG_NEED_DMA_MAP_STATE
+#define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME)        dma_addr_t ADDR_NAME
+#define DEFINE_DMA_UNMAP_LEN(LEN_NAME)          __u32 LEN_NAME
+#define dma_unmap_addr(PTR, ADDR_NAME)           ((PTR)->ADDR_NAME)
+#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL)  (((PTR)->ADDR_NAME) = (VAL))
+#define dma_unmap_len(PTR, LEN_NAME)             ((PTR)->LEN_NAME)
+#define dma_unmap_len_set(PTR, LEN_NAME, VAL)    (((PTR)->LEN_NAME) = (VAL))
+#else
+#define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME)
+#define DEFINE_DMA_UNMAP_LEN(LEN_NAME)
+#define dma_unmap_addr(PTR, ADDR_NAME)           (0)
+#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL)  do { } while (0)
+#define dma_unmap_len(PTR, LEN_NAME)             (0)
+#define dma_unmap_len_set(PTR, LEN_NAME, VAL)    do { } while (0)
+#endif
+
 #endif
diff --git a/include/linux/pci-dma.h b/include/linux/pci-dma.h
index cfd63ab09abc..549a041f9c08 100644
--- a/include/linux/pci-dma.h
+++ b/include/linux/pci-dma.h
@@ -1,20 +1,11 @@
 #ifndef _LINUX_PCI_DMA_H
 #define _LINUX_PCI_DMA_H
 
-#ifdef CONFIG_NEED_DMA_MAP_STATE
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)        dma_addr_t ADDR_NAME;
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)          __u32 LEN_NAME;
-#define pci_unmap_addr(PTR, ADDR_NAME)           ((PTR)->ADDR_NAME)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)  (((PTR)->ADDR_NAME) = (VAL))
-#define pci_unmap_len(PTR, LEN_NAME)             ((PTR)->LEN_NAME)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL)    (((PTR)->LEN_NAME) = (VAL))
-#else
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
-#define pci_unmap_addr(PTR, ADDR_NAME)           (0)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)  do { } while (0)
-#define pci_unmap_len(PTR, LEN_NAME)             (0)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL)    do { } while (0)
-#endif
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) DEFINE_DMA_UNMAP_ADDR(ADDR_NAME);
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)   DEFINE_DMA_UNMAP_LEN(LEN_NAME);
+#define pci_unmap_addr             dma_unmap_addr
+#define pci_unmap_addr_set         dma_unmap_addr_set
+#define pci_unmap_len              dma_unmap_len
+#define pci_unmap_len_set          dma_unmap_len_set
 
 #endif
-- 
cgit v1.2.3


From 6a1961f49ee8d7339ea2454443dfc0460e0b2748 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 10 Mar 2010 15:23:39 -0800
Subject: dma-mapping: dma-mapping.h: add dma_set_coherent_mask

dma_set_coherent_mask corresponds to pci_set_consistent_dma_mask.  This is
necessary to move to the generic device model DMA API from the PCI bus
specific API in the long term.

dma_set_coherent_mask works in the exact same way that
pci_set_consistent_dma_mask does.  So this patch also changes
pci_set_consistent_dma_mask to call dma_set_coherent_mask.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: James Bottomley <James.Bottomley@suse.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Greg KH <greg@kroah.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/DMA-API.txt   | 10 ++++++++++
 drivers/pci/pci.c           |  7 +++----
 include/linux/dma-mapping.h |  8 ++++++++
 3 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 29a48fbae779..0fc5728ed487 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -167,6 +167,16 @@ parameters if it is.
 
 Returns: 0 if successful and a negative error if not.
 
+int
+dma_set_coherent_mask(struct device *dev, u64 mask)
+int
+pci_set_consistent_dma_mask(struct pci_device *dev, u64 mask)
+
+Checks to see if the mask is possible and updates the device
+parameters if it is.
+
+Returns: 0 if successful and a negative error if not.
+
 u64
 dma_get_required_mask(struct device *dev)
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b2d23d1b0d41..929fd3932032 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2315,12 +2315,11 @@ pci_set_dma_mask(struct pci_dev *dev, u64 mask)
 int
 pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask)
 {
-	if (!pci_dma_supported(dev, mask))
-		return -EIO;
+	int ret = dma_set_coherent_mask(&dev->dev, mask);
+	if (ret)
+		return ret;
 
-	dev->dev.coherent_dma_mask = mask;
 	dev_dbg(&dev->dev, "using %dbit consistent DMA mask\n", fls64(mask));
-
 	return 0;
 }
 #endif
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index c5ac9d49cc06..ca32ed78b057 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -127,6 +127,14 @@ static inline u64 dma_get_mask(struct device *dev)
 	return DMA_BIT_MASK(32);
 }
 
+static inline int dma_set_coherent_mask(struct device *dev, u64 mask)
+{
+	if (!dma_supported(dev, mask))
+		return -EIO;
+	dev->coherent_dma_mask = mask;
+	return 0;
+}
+
 extern u64 dma_get_required_mask(struct device *dev);
 
 static inline unsigned int dma_get_max_seg_size(struct device *dev)
-- 
cgit v1.2.3


From 5f3cd1e0bb452c31a306a3e764514ea2eaf7d2e0 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 10 Mar 2010 15:23:41 -0800
Subject: dma-mapping: pci: move pci_set_dma_mask and
 pci_set_consistent_dma_mask to pci-dma-compat.h

We can use pci-dma-compat.h to implement pci_set_dma_mask and
pci_set_consistent_dma_mask as we do with the other PCI DMA API.

We can remove HAVE_ARCH_PCI_SET_DMA_MASK too.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Greg KH <greg@kroah.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pci/pci.c                    | 28 ----------------------------
 include/asm-generic/pci-dma-compat.h | 15 ++++++++++++---
 include/linux/pci.h                  |  2 --
 3 files changed, 12 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 929fd3932032..cb1dd5f4988c 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2298,32 +2298,6 @@ void pci_msi_off(struct pci_dev *dev)
 	}
 }
 
-#ifndef HAVE_ARCH_PCI_SET_DMA_MASK
-/*
- * These can be overridden by arch-specific implementations
- */
-int
-pci_set_dma_mask(struct pci_dev *dev, u64 mask)
-{
-	int ret = dma_set_mask(&dev->dev, mask);
-	if (ret)
-		return ret;
-	dev_dbg(&dev->dev, "using %dbit DMA mask\n", fls64(mask));
-	return 0;
-}
-
-int
-pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask)
-{
-	int ret = dma_set_coherent_mask(&dev->dev, mask);
-	if (ret)
-		return ret;
-
-	dev_dbg(&dev->dev, "using %dbit consistent DMA mask\n", fls64(mask));
-	return 0;
-}
-#endif
-
 #ifndef HAVE_ARCH_PCI_SET_DMA_MAX_SEGMENT_SIZE
 int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size)
 {
@@ -3065,8 +3039,6 @@ EXPORT_SYMBOL(pci_set_mwi);
 EXPORT_SYMBOL(pci_try_set_mwi);
 EXPORT_SYMBOL(pci_clear_mwi);
 EXPORT_SYMBOL_GPL(pci_intx);
-EXPORT_SYMBOL(pci_set_dma_mask);
-EXPORT_SYMBOL(pci_set_consistent_dma_mask);
 EXPORT_SYMBOL(pci_assign_resource);
 EXPORT_SYMBOL(pci_find_parent_resource);
 EXPORT_SYMBOL(pci_select_bars);
diff --git a/include/asm-generic/pci-dma-compat.h b/include/asm-generic/pci-dma-compat.h
index 37b3706226e7..1437b7da09b2 100644
--- a/include/asm-generic/pci-dma-compat.h
+++ b/include/asm-generic/pci-dma-compat.h
@@ -6,9 +6,6 @@
 
 #include <linux/dma-mapping.h>
 
-/* note pci_set_dma_mask isn't here, since it's a public function
- * exported from drivers/pci, use dma_supported instead */
-
 static inline int
 pci_dma_supported(struct pci_dev *hwdev, u64 mask)
 {
@@ -104,4 +101,16 @@ pci_dma_mapping_error(struct pci_dev *pdev, dma_addr_t dma_addr)
 	return dma_mapping_error(&pdev->dev, dma_addr);
 }
 
+#ifdef CONFIG_PCI
+static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask)
+{
+	return dma_set_mask(&dev->dev, mask);
+}
+
+static inline int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask)
+{
+	return dma_set_coherent_mask(&dev->dev, mask);
+}
+#endif
+
 #endif
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7fd5c574efae..a788fa12ff31 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -769,8 +769,6 @@ int pci_try_set_mwi(struct pci_dev *dev);
 void pci_clear_mwi(struct pci_dev *dev);
 void pci_intx(struct pci_dev *dev, int enable);
 void pci_msi_off(struct pci_dev *dev);
-int pci_set_dma_mask(struct pci_dev *dev, u64 mask);
-int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask);
 int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size);
 int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask);
 int pcix_get_max_mmrbc(struct pci_dev *dev);
-- 
cgit v1.2.3


From b3e63afe8a74c0134d05a551cc74facc3b3ec0d7 Mon Sep 17 00:00:00 2001
From: Rodolfo Giometti <giometti@linux.it>
Date: Wed, 10 Mar 2010 15:23:45 -0800
Subject: ldisc: new dcd_change() method for line disciplines

Signed-off-by: Rodolfo Giometti <giometti@linux.it>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg KH <greg@kroah.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Alexander Gordeev <lasaine@lvk.cs.msu.su>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/serial/tty.txt | 4 ++++
 include/linux/tty_ldisc.h    | 8 ++++++++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/Documentation/serial/tty.txt b/Documentation/serial/tty.txt
index 5e5349a4fcd2..7c900507279f 100644
--- a/Documentation/serial/tty.txt
+++ b/Documentation/serial/tty.txt
@@ -105,6 +105,10 @@ write_wakeup()	-	May be called at any point between open and close.
 			is permitted to call the driver write method from
 			this function. In such a situation defer it.
 
+dcd_change()	-	Report to the tty line the current DCD pin status
+			changes and the relative timestamp. The timestamp
+			can be NULL.
+
 
 Driver Access
 
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index 0c4ee9b88f85..526d66f066a3 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -99,6 +99,12 @@
  *	cease I/O to the tty driver. Can sleep. The driver should
  *	seek to perform this action quickly but should wait until
  *	any pending driver I/O is completed.
+ *
+ * void (*dcd_change)(struct tty_struct *tty, unsigned int status,
+ * 			struct timespec *ts)
+ *
+ *	Tells the discipline that the DCD pin has changed its status and
+ *	the relative timestamp. Pointer ts can be NULL.
  */
 
 #include <linux/fs.h>
@@ -136,6 +142,8 @@ struct tty_ldisc_ops {
 	void	(*receive_buf)(struct tty_struct *, const unsigned char *cp,
 			       char *fp, int count);
 	void	(*write_wakeup)(struct tty_struct *);
+	void	(*dcd_change)(struct tty_struct *, unsigned int,
+				struct timespec *);
 
 	struct  module *owner;
 	
-- 
cgit v1.2.3


From 572b9adbd40b5565dc413db04af9cc234f72bf19 Mon Sep 17 00:00:00 2001
From: Rodolfo Giometti <giometti@linux.it>
Date: Wed, 10 Mar 2010 15:23:46 -0800
Subject: ldisc n_tty: add new method n_tty_inherit_ops()

This new method can be used to init a new struct tty_ldisc_ops as the
default tty_ldisc_N_TTY struct.

Signed-off-by: Rodolfo Giometti <giometti@linux.it>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg KH <greg@kroah.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Alexander Gordeev <lasaine@lvk.cs.msu.su>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/n_tty.c | 17 +++++++++++++++++
 include/linux/tty.h  |  1 +
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index 2e50f4dfc79c..bdae8327143c 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -48,6 +48,7 @@
 #include <linux/audit.h>
 #include <linux/file.h>
 #include <linux/uaccess.h>
+#include <linux/module.h>
 
 #include <asm/system.h>
 
@@ -2091,3 +2092,19 @@ struct tty_ldisc_ops tty_ldisc_N_TTY = {
 	.receive_buf     = n_tty_receive_buf,
 	.write_wakeup    = n_tty_write_wakeup
 };
+
+/**
+ *	n_tty_inherit_ops	-	inherit N_TTY methods
+ *	@ops: struct tty_ldisc_ops where to save N_TTY methods
+ *
+ *	Used by a generic struct tty_ldisc_ops to easily inherit N_TTY
+ *	methods.
+ */
+
+void n_tty_inherit_ops(struct tty_ldisc_ops *ops)
+{
+	*ops = tty_ldisc_N_TTY;
+	ops->owner = NULL;
+	ops->refcount = ops->flags = 0;
+}
+EXPORT_SYMBOL_GPL(n_tty_inherit_ops);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d96e5882f129..568369a86306 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -514,6 +514,7 @@ extern void tty_ldisc_enable(struct tty_struct *tty);
 
 /* n_tty.c */
 extern struct tty_ldisc_ops tty_ldisc_N_TTY;
+extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops);
 
 /* tty_audit.c */
 #ifdef CONFIG_AUDIT
-- 
cgit v1.2.3


From a0880df0ccde8d551fc4d88c455acb2ee0801e26 Mon Sep 17 00:00:00 2001
From: Rodolfo Giometti <giometti@linux.it>
Date: Wed, 10 Mar 2010 15:23:47 -0800
Subject: pps: serial clients support

Adds support, by using the PPS line discipline, for the PPS sources
connected with the CD (Carrier Detect) pin of a serial port.

[akpm@linux-foundation.org: fix cast size warnings]
Signed-off-by: Rodolfo Giometti <giometti@linux.it>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg KH <greg@kroah.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Alexander Gordeev <lasaine@lvk.cs.msu.su>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pps/clients/Kconfig     |   7 ++
 drivers/pps/clients/Makefile    |   1 +
 drivers/pps/clients/pps-ldisc.c | 154 ++++++++++++++++++++++++++++++++++++++++
 include/linux/serial_core.h     |  11 ++-
 4 files changed, 172 insertions(+), 1 deletion(-)
 create mode 100644 drivers/pps/clients/pps-ldisc.c

(limited to 'include')

diff --git a/drivers/pps/clients/Kconfig b/drivers/pps/clients/Kconfig
index 43ccd3b644f0..4e801bd7254f 100644
--- a/drivers/pps/clients/Kconfig
+++ b/drivers/pps/clients/Kconfig
@@ -15,4 +15,11 @@ config PPS_CLIENT_KTIMER
 	  This driver can also be built as a module.  If so, the module
 	  will be called pps-ktimer.
 
+config PPS_CLIENT_LDISC
+	tristate "PPS line discipline"
+	depends on PPS
+	help
+	  If you say yes here you get support for a PPS source connected
+	  with the CD (Carrier Detect) pin of your serial port.
+
 endif
diff --git a/drivers/pps/clients/Makefile b/drivers/pps/clients/Makefile
index 115572ae3e99..812c9b19b430 100644
--- a/drivers/pps/clients/Makefile
+++ b/drivers/pps/clients/Makefile
@@ -3,6 +3,7 @@
 #
 
 obj-$(CONFIG_PPS_CLIENT_KTIMER)	+= pps-ktimer.o
+obj-$(CONFIG_PPS_CLIENT_LDISC)	+= pps-ldisc.o
 
 ifeq ($(CONFIG_PPS_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
diff --git a/drivers/pps/clients/pps-ldisc.c b/drivers/pps/clients/pps-ldisc.c
new file mode 100644
index 000000000000..8e1932d29fd4
--- /dev/null
+++ b/drivers/pps/clients/pps-ldisc.c
@@ -0,0 +1,154 @@
+/*
+ * pps-ldisc.c -- PPS line discipline
+ *
+ *
+ * Copyright (C) 2008	Rodolfo Giometti <giometti@linux.it>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/serial_core.h>
+#include <linux/tty.h>
+#include <linux/pps_kernel.h>
+
+#define PPS_TTY_MAGIC		0x0001
+
+static void pps_tty_dcd_change(struct tty_struct *tty, unsigned int status,
+				struct timespec *ts)
+{
+	int id = (long)tty->disc_data;
+	struct timespec __ts;
+	struct pps_ktime pps_ts;
+
+	/* First of all we get the time stamp... */
+	getnstimeofday(&__ts);
+
+	/* Does caller give us a timestamp? */
+	if (ts) {	/* Yes. Let's use it! */
+		pps_ts.sec = ts->tv_sec;
+		pps_ts.nsec = ts->tv_nsec;
+	} else {	/* No. Do it ourself! */
+		pps_ts.sec = __ts.tv_sec;
+		pps_ts.nsec = __ts.tv_nsec;
+	}
+
+	/* Now do the PPS event report */
+	pps_event(id, &pps_ts, status ? PPS_CAPTUREASSERT : PPS_CAPTURECLEAR,
+			NULL);
+
+	pr_debug("PPS %s at %lu on source #%d\n",
+			status ? "assert" : "clear", jiffies, id);
+}
+
+static int (*alias_n_tty_open)(struct tty_struct *tty);
+
+static int pps_tty_open(struct tty_struct *tty)
+{
+	struct pps_source_info info;
+	struct tty_driver *drv = tty->driver;
+	int index = tty->index + drv->name_base;
+	int ret;
+
+	info.owner = THIS_MODULE;
+	info.dev = NULL;
+	snprintf(info.name, PPS_MAX_NAME_LEN, "%s%d", drv->driver_name, index);
+	snprintf(info.path, PPS_MAX_NAME_LEN, "/dev/%s%d", drv->name, index);
+	info.mode = PPS_CAPTUREBOTH | \
+			PPS_OFFSETASSERT | PPS_OFFSETCLEAR | \
+			PPS_CANWAIT | PPS_TSFMT_TSPEC;
+
+	ret = pps_register_source(&info, PPS_CAPTUREBOTH | \
+				PPS_OFFSETASSERT | PPS_OFFSETCLEAR);
+	if (ret < 0) {
+		pr_err("cannot register PPS source \"%s\"\n", info.path);
+		return ret;
+	}
+	tty->disc_data = (void *)(long)ret;
+
+	/* Should open N_TTY ldisc too */
+	ret = alias_n_tty_open(tty);
+	if (ret < 0)
+		pps_unregister_source((long)tty->disc_data);
+
+	pr_info("PPS source #%d \"%s\" added\n", ret, info.path);
+
+	return 0;
+}
+
+static void (*alias_n_tty_close)(struct tty_struct *tty);
+
+static void pps_tty_close(struct tty_struct *tty)
+{
+	int id = (long)tty->disc_data;
+
+	pps_unregister_source(id);
+	alias_n_tty_close(tty);
+
+	pr_info("PPS source #%d removed\n", id);
+}
+
+static struct tty_ldisc_ops pps_ldisc_ops;
+
+/*
+ * Module stuff
+ */
+
+static int __init pps_tty_init(void)
+{
+	int err;
+
+	/* Inherit the N_TTY's ops */
+	n_tty_inherit_ops(&pps_ldisc_ops);
+
+	/* Save N_TTY's open()/close() methods */
+	alias_n_tty_open = pps_ldisc_ops.open;
+	alias_n_tty_close = pps_ldisc_ops.close;
+
+	/* Init PPS_TTY data */
+	pps_ldisc_ops.owner = THIS_MODULE;
+	pps_ldisc_ops.magic = PPS_TTY_MAGIC;
+	pps_ldisc_ops.name = "pps_tty";
+	pps_ldisc_ops.dcd_change = pps_tty_dcd_change;
+	pps_ldisc_ops.open = pps_tty_open;
+	pps_ldisc_ops.close = pps_tty_close;
+
+	err = tty_register_ldisc(N_PPS, &pps_ldisc_ops);
+	if (err)
+		pr_err("can't register PPS line discipline\n");
+	else
+		pr_info("PPS line discipline registered\n");
+
+	return err;
+}
+
+static void __exit pps_tty_cleanup(void)
+{
+	int err;
+
+	err = tty_unregister_ldisc(N_PPS);
+	if (err)
+		pr_err("can't unregister PPS line discipline\n");
+	else
+		pr_info("PPS line discipline removed\n");
+}
+
+module_init(pps_tty_init);
+module_exit(pps_tty_cleanup);
+
+MODULE_ALIAS_LDISC(N_PPS);
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_DESCRIPTION("PPS TTY device driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 8c3dd36fe91a..78dd1e7120a9 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -491,9 +491,13 @@ uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
 {
 	struct uart_state *state = uport->state;
 	struct tty_port *port = &state->port;
+	struct tty_ldisc *ld = tty_ldisc_ref(port->tty);
+	struct timespec ts;
 
-	uport->icount.dcd++;
+	if (ld && ld->ops->dcd_change)
+		getnstimeofday(&ts);
 
+	uport->icount.dcd++;
 #ifdef CONFIG_HARD_PPS
 	if ((uport->flags & UPF_HARDPPS_CD) && status)
 		hardpps();
@@ -505,6 +509,11 @@ uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
 		else if (port->tty)
 			tty_hangup(port->tty);
 	}
+
+	if (ld && ld->ops->dcd_change)
+		ld->ops->dcd_change(port->tty, status, &ts);
+	if (ld)
+		tty_ldisc_deref(ld);
 }
 
 /**
-- 
cgit v1.2.3


From 5ceaa2f39bfa73c4398cd01e78f1c3ebde3d3383 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@arm.linux.org.uk>
Date: Wed, 10 Mar 2010 15:23:53 -0800
Subject: decompress: fix new decompressor for PIC

The ARM kernel decompressor wants to be able to relocate r/w data
independently from the rest of the image, and we do this by ensuring that
r/w data has global visibility.  Define STATIC_RW_DATA to be empty to
achieve this.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Alain Knaff <alain@knaff.lu>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/decompress/mm.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h
index 5032b9a31ae7..ad5ec1d0475e 100644
--- a/include/linux/decompress/mm.h
+++ b/include/linux/decompress/mm.h
@@ -14,11 +14,21 @@
 
 /* Code active when included from pre-boot environment: */
 
+/*
+ * Some architectures want to ensure there is no local data in their
+ * pre-boot environment, so that data can arbitarily relocated (via
+ * GOT references).  This is achieved by defining STATIC_RW_DATA to
+ * be null.
+ */
+#ifndef STATIC_RW_DATA
+#define STATIC_RW_DATA static
+#endif
+
 /* A trivial malloc implementation, adapted from
  *  malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
  */
-static unsigned long malloc_ptr;
-static int malloc_count;
+STATIC_RW_DATA unsigned long malloc_ptr;
+STATIC_RW_DATA int malloc_count;
 
 static void *malloc(int size)
 {
-- 
cgit v1.2.3


From eb5572fed55f4c2b7dbc42582bc82dcb47632380 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 10 Mar 2010 15:23:59 -0800
Subject: sysctl extern cleanup: C_A_D

Extern declarations in sysctl.c should be moved to their own header file,
and then include them in relavant .c files.

Move C_A_D extern variable declaration to linux/reboot.h

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/reboot.h | 1 +
 kernel/sysctl.c        | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index 988e55fe649b..3005d5a7fce5 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -64,6 +64,7 @@ extern void kernel_restart(char *cmd);
 extern void kernel_halt(void);
 extern void kernel_power_off(void);
 
+extern int C_A_D; /* for sysctl */
 void ctrl_alt_del(void);
 
 #define POWEROFF_CMD_PATH_LEN	256
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0ef19c614f6d..72c3b1e80d7b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -65,7 +65,6 @@
 #if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
-extern int C_A_D;
 extern int print_fatal_signals;
 extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
-- 
cgit v1.2.3


From d33ed52d57e794eba55cea3f5eab3c8f80b6cb5a Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 10 Mar 2010 15:23:59 -0800
Subject: sysctl extern cleanup: signal

Extern declarations in sysctl.c should be moved to their own header file,
and then include them in relavant .c files.

Move print_fatal_signals extern declaration to linux/signal.h

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 2 ++
 kernel/sysctl.c        | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index ab9272cc270c..fcd2b14b1932 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -7,6 +7,8 @@
 #ifdef __KERNEL__
 #include <linux/list.h>
 
+/* for sysctl */
+extern int print_fatal_signals;
 /*
  * Real Time signals may be queued.
  */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 72c3b1e80d7b..a8fd10a9a501 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -23,6 +23,7 @@
 #include <linux/swap.h>
 #include <linux/slab.h>
 #include <linux/sysctl.h>
+#include <linux/signal.h>
 #include <linux/proc_fs.h>
 #include <linux/security.h>
 #include <linux/ctype.h>
@@ -65,7 +66,6 @@
 #if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
-extern int print_fatal_signals;
 extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
 extern int sysctl_panic_on_oom;
-- 
cgit v1.2.3


From e5ab67726f33b50f40db0ccf271ceb3c658554d5 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 10 Mar 2010 15:24:05 -0800
Subject: sysctl extern cleanup: rcu

Extern declarations in sysctl.c should be moved to their own header file,
and then include them in relavant .c files.

Move rcutorture_runnable extern declaration to linux/rcupdate.h

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Acked-by: Josh Triplett <josh@joshtriplett.org>
Reviewed-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rcupdate.h | 4 ++++
 kernel/sysctl.c          | 3 ---
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index c84373626336..a005cac5e302 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -41,6 +41,10 @@
 #include <linux/lockdep.h>
 #include <linux/completion.h>
 
+#ifdef CONFIG_RCU_TORTURE_TEST
+extern int rcutorture_runnable; /* for sysctl */
+#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
+
 /**
  * struct rcu_head - callback structure for use with RCU
  * @next: next update requests in a list
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a8fd10a9a501..f18aaa7b0d65 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -87,9 +87,6 @@ extern int sysctl_nr_open_min, sysctl_nr_open_max;
 #ifndef CONFIG_MMU
 extern int sysctl_nr_trim_pages;
 #endif
-#ifdef CONFIG_RCU_TORTURE_TEST
-extern int rcutorture_runnable;
-#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 #ifdef CONFIG_BLOCK
 extern int blk_iopoll_enabled;
 #endif
-- 
cgit v1.2.3


From 5ed109103d73b0bafc92e860cead56725231384d Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 10 Mar 2010 15:24:06 -0800
Subject: sysctl extern cleanup: module

Extern declarations in sysctl.c should be moved to their own header file,
and then include them in relavant .c files.

Move modprobe_path extern declaration to linux/kmod.h
Move modules_disabled extern declaration to linux/module.h

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmod.h   | 1 +
 include/linux/module.h | 1 +
 kernel/sysctl.c        | 4 ----
 3 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 384ca8bbf1ac..facb27fe7de0 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -27,6 +27,7 @@
 #define KMOD_PATH_LEN 256
 
 #ifdef CONFIG_MODULES
+extern char modprobe_path[]; /* for sysctl */
 /* modprobe exit status on success, -ve on error.  Return value
  * usually useless though. */
 extern int __request_module(bool wait, const char *name, ...) \
diff --git a/include/linux/module.h b/include/linux/module.h
index dd618eb026aa..5e869ffd34aa 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -175,6 +175,7 @@ struct notifier_block;
 
 #ifdef CONFIG_MODULES
 
+extern int modules_disabled; /* for sysctl */
 /* Get/put a kernel symbol (calls must be symmetric) */
 void *__symbol_get(const char *symbol);
 void *__symbol_get_gpl(const char *symbol);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f18aaa7b0d65..44e9492368fd 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -116,10 +116,6 @@ static int min_percpu_pagelist_fract = 8;
 
 static int ngroups_max = NGROUPS_MAX;
 
-#ifdef CONFIG_MODULES
-extern char modprobe_path[];
-extern int modules_disabled;
-#endif
 #ifdef CONFIG_CHR_DEV_SG
 extern int sg_big_buff;
 #endif
-- 
cgit v1.2.3


From 15485a4682d1d3bfee2aa78b4b1a5d36f5746b64 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 10 Mar 2010 15:24:07 -0800
Subject: sysctl extern cleanup: sg

Extern declarations in sysctl.c should be moved to their own header file,
and then include them in relavant .c files.

Move sg_big_buff extern declaration to scsi/sg.h

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Acked-by: Doug Gilbert <dgilbert@interlog.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/scsi/sg.h | 3 +++
 kernel/sysctl.c   | 7 +++----
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/scsi/sg.h b/include/scsi/sg.h
index 934ae389671d..a9f3c6fc3f57 100644
--- a/include/scsi/sg.h
+++ b/include/scsi/sg.h
@@ -70,6 +70,9 @@ Major new features in SG 3.x driver (cf SG 2.x drivers)
  (for the lk 2.2 series).
 */
 
+#ifdef __KERNEL__
+extern int sg_big_buff; /* for sysctl */
+#endif
 
 /* New interface introduced in the 3.x SG drivers follows */
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 44e9492368fd..5290c437f151 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -61,6 +61,9 @@
 #include <asm/stacktrace.h>
 #include <asm/io.h>
 #endif
+#ifdef CONFIG_CHR_DEV_SG
+#include <scsi/sg.h>
+#endif
 
 
 #if defined(CONFIG_SYSCTL)
@@ -116,10 +119,6 @@ static int min_percpu_pagelist_fract = 8;
 
 static int ngroups_max = NGROUPS_MAX;
 
-#ifdef CONFIG_CHR_DEV_SG
-extern int sg_big_buff;
-#endif
-
 #ifdef CONFIG_SPARC
 #include <asm/system.h>
 #endif
-- 
cgit v1.2.3


From c55b7c3e82d0ad58f35a0785faaaf2f70b9b6cd3 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 10 Mar 2010 15:24:08 -0800
Subject: sysctl extern cleanup: acct

Extern declarations in sysctl.c should be moved to their own header file,
and then include them in relavant .c files.

Move acct_parm extern declaration to linux/acct.h

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/acct.h | 1 +
 kernel/sysctl.c      | 7 +++----
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/acct.h b/include/linux/acct.h
index 93f46096ad4c..3e4737fa6cce 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -121,6 +121,7 @@ struct vfsmount;
 struct super_block;
 struct pacct_struct;
 struct pid_namespace;
+extern int acct_parm[]; /* for sysctl */
 extern void acct_auto_close_mnt(struct vfsmount *m);
 extern void acct_auto_close(struct super_block *sb);
 extern void acct_collect(long exitcode, int group_dead);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5290c437f151..7635bb15f5af 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -61,6 +61,9 @@
 #include <asm/stacktrace.h>
 #include <asm/io.h>
 #endif
+#ifdef CONFIG_BSD_PROCESS_ACCT
+#include <linux/acct.h>
+#endif
 #ifdef CONFIG_CHR_DEV_SG
 #include <scsi/sg.h>
 #endif
@@ -140,10 +143,6 @@ extern int sysctl_userprocess_debug;
 extern int spin_retry;
 #endif
 
-#ifdef CONFIG_BSD_PROCESS_ACCT
-extern int acct_parm[];
-#endif
-
 #ifdef CONFIG_IA64
 extern int no_unaligned_warning;
 extern int unaligned_dump_stack;
-- 
cgit v1.2.3


From 4f0e056fdebc15d3f4724ebc7bbf323158add1d7 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 10 Mar 2010 15:24:09 -0800
Subject: sysctl extern cleanup: rtmutex

Extern declarations in sysctl.c should be moved to their own header file,
and then include them in relavant .c files.

Move max_lock_depth extern declaration to linux/rtmutex.h

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rtmutex.h | 2 ++
 kernel/sysctl.c         | 7 +++----
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 281d8fd775e8..8d522ffeda33 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -16,6 +16,8 @@
 #include <linux/plist.h>
 #include <linux/spinlock_types.h>
 
+extern int max_lock_depth; /* for sysctl */
+
 /**
  * The rt_mutex structure
  *
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7635bb15f5af..622029ba5103 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -64,6 +64,9 @@
 #ifdef CONFIG_BSD_PROCESS_ACCT
 #include <linux/acct.h>
 #endif
+#ifdef CONFIG_RT_MUTEXES
+#include <linux/rtmutex.h>
+#endif
 #ifdef CONFIG_CHR_DEV_SG
 #include <scsi/sg.h>
 #endif
@@ -150,10 +153,6 @@ extern int unaligned_dump_stack;
 
 extern struct ratelimit_state printk_ratelimit_state;
 
-#ifdef CONFIG_RT_MUTEXES
-extern int max_lock_depth;
-#endif
-
 #ifdef CONFIG_PROC_SYSCTL
 static int proc_do_cad_pid(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
-- 
cgit v1.2.3


From 2edf5e49800846a2b2b6461d99cdae18067c440f Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 10 Mar 2010 15:24:10 -0800
Subject: sysctl extern cleanup: lockdep

Extern declarations in sysctl.c should be moved to their own header file,
and then include them in relavant .c files.

Move lockdep extern declarations to linux/lockdep.h

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockdep.h | 4 ++++
 kernel/sysctl.c         | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 10206a87da19..a03977a96d7e 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -12,6 +12,10 @@
 struct task_struct;
 struct lockdep_map;
 
+/* for sysctl */
+extern int prove_locking;
+extern int lock_stat;
+
 #ifdef CONFIG_LOCKDEP
 
 #include <linux/linkage.h>
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 622029ba5103..8686b0f5fc12 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -67,6 +67,9 @@
 #ifdef CONFIG_RT_MUTEXES
 #include <linux/rtmutex.h>
 #endif
+#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
+#include <linux/lockdep.h>
+#endif
 #ifdef CONFIG_CHR_DEV_SG
 #include <scsi/sg.h>
 #endif
@@ -191,9 +194,6 @@ extern struct ctl_table epoll_table[];
 int sysctl_legacy_va_layout;
 #endif
 
-extern int prove_locking;
-extern int lock_stat;
-
 /* The default sysctl tables: */
 
 static struct ctl_table root_table[] = {
-- 
cgit v1.2.3


From 9ff99339447de403a46be5e3f23d0c794d540b06 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 10 Mar 2010 15:24:10 -0800
Subject: sysctl extern cleanup: poll

Extern declarations in sysctl.c should be moved to their own header file,
and then include them in relavant .c files.

Move epoll_table extern declaration to linux/poll.h

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/poll.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/poll.h b/include/linux/poll.h
index 6673743946f7..600cc1fde64d 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -10,8 +10,10 @@
 #include <linux/wait.h>
 #include <linux/string.h>
 #include <linux/fs.h>
+#include <linux/sysctl.h>
 #include <asm/uaccess.h>
 
+extern struct ctl_table epoll_table[]; /* for sysctl */
 /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating
    additional memory. */
 #define MAX_STACK_ALLOC 832
-- 
cgit v1.2.3


From b97c4bc16734a2e597dac7f91ee9eb78f4aeef9a Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Thu, 11 Mar 2010 14:08:45 -0800
Subject: locking: Make sparse work with inline spinlocks and rwlocks

Currently sparse does not work with inline spinlock and rwlock functions.
The problem is that they do not use the __acquires/__releases out-of-line
functions, but use inline functions with no sparse annotations.

This patch adds the appropriate annotations to make it work properly.

Signed-off-by: Luca Barbieri <luca@luca-barbieri.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/rwlock.h   | 20 ++++++++++----------
 include/linux/spinlock.h | 13 ++++++++-----
 2 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index 71e0b00b6f2c..bc2994ed66e1 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -29,25 +29,25 @@ do {								\
 #endif
 
 #ifdef CONFIG_DEBUG_SPINLOCK
- extern void do_raw_read_lock(rwlock_t *lock);
+ extern void do_raw_read_lock(rwlock_t *lock) __acquires(lock);
 #define do_raw_read_lock_flags(lock, flags) do_raw_read_lock(lock)
  extern int do_raw_read_trylock(rwlock_t *lock);
- extern void do_raw_read_unlock(rwlock_t *lock);
- extern void do_raw_write_lock(rwlock_t *lock);
+ extern void do_raw_read_unlock(rwlock_t *lock) __releases(lock);
+ extern void do_raw_write_lock(rwlock_t *lock) __acquires(lock);
 #define do_raw_write_lock_flags(lock, flags) do_raw_write_lock(lock)
  extern int do_raw_write_trylock(rwlock_t *lock);
- extern void do_raw_write_unlock(rwlock_t *lock);
+ extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock);
 #else
-# define do_raw_read_lock(rwlock)	arch_read_lock(&(rwlock)->raw_lock)
+# define do_raw_read_lock(rwlock)	do {__acquire(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0)
 # define do_raw_read_lock_flags(lock, flags) \
-		arch_read_lock_flags(&(lock)->raw_lock, *(flags))
+		do {__acquire(lock); arch_read_lock_flags(&(lock)->raw_lock, *(flags)); } while (0)
 # define do_raw_read_trylock(rwlock)	arch_read_trylock(&(rwlock)->raw_lock)
-# define do_raw_read_unlock(rwlock)	arch_read_unlock(&(rwlock)->raw_lock)
-# define do_raw_write_lock(rwlock)	arch_write_lock(&(rwlock)->raw_lock)
+# define do_raw_read_unlock(rwlock)	do {arch_read_unlock(&(rwlock)->raw_lock); __release(lock); } while (0)
+# define do_raw_write_lock(rwlock)	do {__acquire(lock); arch_write_lock(&(rwlock)->raw_lock); } while (0)
 # define do_raw_write_lock_flags(lock, flags) \
-		arch_write_lock_flags(&(lock)->raw_lock, *(flags))
+		do {__acquire(lock); arch_write_lock_flags(&(lock)->raw_lock, *(flags)); } while (0)
 # define do_raw_write_trylock(rwlock)	arch_write_trylock(&(rwlock)->raw_lock)
-# define do_raw_write_unlock(rwlock)	arch_write_unlock(&(rwlock)->raw_lock)
+# define do_raw_write_unlock(rwlock)	do {arch_write_unlock(&(rwlock)->raw_lock); __release(lock); } while (0)
 #endif
 
 #define read_can_lock(rwlock)		arch_read_can_lock(&(rwlock)->raw_lock)
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 86088213334a..89fac6a3f78b 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -128,19 +128,21 @@ static inline void smp_mb__after_lock(void) { smp_mb(); }
 #define raw_spin_unlock_wait(lock)	arch_spin_unlock_wait(&(lock)->raw_lock)
 
 #ifdef CONFIG_DEBUG_SPINLOCK
- extern void do_raw_spin_lock(raw_spinlock_t *lock);
+ extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock);
 #define do_raw_spin_lock_flags(lock, flags) do_raw_spin_lock(lock)
  extern int do_raw_spin_trylock(raw_spinlock_t *lock);
- extern void do_raw_spin_unlock(raw_spinlock_t *lock);
+ extern void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock);
 #else
-static inline void do_raw_spin_lock(raw_spinlock_t *lock)
+static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock)
 {
+	__acquire(lock);
 	arch_spin_lock(&lock->raw_lock);
 }
 
 static inline void
-do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags)
+do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) __acquires(lock)
 {
+	__acquire(lock);
 	arch_spin_lock_flags(&lock->raw_lock, *flags);
 }
 
@@ -149,9 +151,10 @@ static inline int do_raw_spin_trylock(raw_spinlock_t *lock)
 	return arch_spin_trylock(&(lock)->raw_lock);
 }
 
-static inline void do_raw_spin_unlock(raw_spinlock_t *lock)
+static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
 {
 	arch_spin_unlock(&lock->raw_lock);
+	__release(lock);
 }
 #endif
 
-- 
cgit v1.2.3


From 97ee9b0257402f4731b55dfea42f24d26d793ddf Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Sat, 6 Mar 2010 04:44:14 +0000
Subject: net/9p: Use the tag name in the config space for identifying mount
 point

This patch use the tag name in the config space to identify the
mount device. The the virtio device name depend on the enumeration
order of the device and may not remain the same across multiple boots
So we use the tag name which is set via qemu option to uniquely identify
the mount device

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/linux/virtio_9p.h | 12 ++++++++++++
 net/9p/trans_virtio.c     | 44 ++++++++++++++++++++++++++++++++++++++------
 2 files changed, 50 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index 332275080083..5cf11765146b 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -5,4 +5,16 @@
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
+/* The feature bitmap for virtio 9P */
+
+/* The mount point is specified in a config variable */
+#define VIRTIO_9P_MOUNT_TAG 0
+
+struct virtio_9p_config {
+	/* length of the tag name */
+	__u16 tag_len;
+	/* non-NULL terminated tag name */
+	__u8 tag[0];
+} __attribute__((packed));
+
 #endif /* _LINUX_VIRTIO_9P_H */
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 0aaed4819379..026775ad391a 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -78,6 +78,12 @@ struct virtio_chan {
 	/* Scatterlist: can be too big for stack. */
 	struct scatterlist sg[VIRTQUEUE_NUM];
 
+	int tag_len;
+	/*
+	 * tag name to identify a mount Non-null terminated
+	 */
+	char *tag;
+
 	struct list_head chan_list;
 };
 
@@ -224,6 +230,8 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 
 static int p9_virtio_probe(struct virtio_device *vdev)
 {
+	__u16 tag_len;
+	char *tag;
 	int err;
 	struct virtio_chan *chan;
 
@@ -248,6 +256,23 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	sg_init_table(chan->sg, VIRTQUEUE_NUM);
 
 	chan->inuse = false;
+	if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) {
+		vdev->config->get(vdev,
+				offsetof(struct virtio_9p_config, tag_len),
+				&tag_len, sizeof(tag_len));
+	} else {
+		err = -EINVAL;
+		goto out_free_vq;
+	}
+	tag = kmalloc(tag_len, GFP_KERNEL);
+	if (!tag) {
+		err = -ENOMEM;
+		goto out_free_vq;
+	}
+	vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag),
+			tag, tag_len);
+	chan->tag = tag;
+	chan->tag_len = tag_len;
 	mutex_lock(&virtio_9p_lock);
 	list_add_tail(&chan->chan_list, &virtio_chan_list);
 	mutex_unlock(&virtio_9p_lock);
@@ -284,7 +309,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 
 	mutex_lock(&virtio_9p_lock);
 	list_for_each_entry(chan, &virtio_chan_list, chan_list) {
-		if (!strcmp(devname, dev_name(&chan->vdev->dev))) {
+		if (!strncmp(devname, chan->tag, chan->tag_len)) {
 			if (!chan->inuse) {
 				chan->inuse = true;
 				found = 1;
@@ -323,6 +348,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
 	mutex_lock(&virtio_9p_lock);
 	list_del(&chan->chan_list);
 	mutex_unlock(&virtio_9p_lock);
+	kfree(chan->tag);
 	kfree(chan);
 
 }
@@ -332,13 +358,19 @@ static struct virtio_device_id id_table[] = {
 	{ 0 },
 };
 
+static unsigned int features[] = {
+	VIRTIO_9P_MOUNT_TAG,
+};
+
 /* The standard "struct lguest_driver": */
 static struct virtio_driver p9_virtio_drv = {
-	.driver.name = 	KBUILD_MODNAME,
-	.driver.owner = THIS_MODULE,
-	.id_table =	id_table,
-	.probe = 	p9_virtio_probe,
-	.remove =	p9_virtio_remove,
+	.feature_table  = features,
+	.feature_table_size = ARRAY_SIZE(features),
+	.driver.name    = KBUILD_MODNAME,
+	.driver.owner	= THIS_MODULE,
+	.id_table	= id_table,
+	.probe		= p9_virtio_probe,
+	.remove		= p9_virtio_remove,
 };
 
 static struct p9_trans_module p9_virtio_trans = {
-- 
cgit v1.2.3


From 86c8437383acd85c05ec7c9a004f59fe7ac9821a Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Sat, 6 Mar 2010 04:44:15 +0000
Subject: net/9p: Add sysfs mount_tag file for virtio 9P device

This adds a new file for virtio 9P device. The file
contain details of the mount device name that should
be used to mount the 9P file system.

Ex: /sys/devices/virtio-pci/virtio1/mount_tag  file now
contian the tag name to be used to mount the 9P file system.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 include/linux/virtio.h |  1 +
 net/9p/trans_virtio.c  | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index f508c651e53d..40d1709bdbf4 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -98,6 +98,7 @@ struct virtio_device {
 	void *priv;
 };
 
+#define dev_to_virtio(dev) container_of(dev, struct virtio_device, dev)
 int register_virtio_device(struct virtio_device *dev);
 void unregister_virtio_device(struct virtio_device *dev);
 
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 026775ad391a..afde1a89fbb3 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -220,6 +220,20 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 	return 0;
 }
 
+static ssize_t p9_mount_tag_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct virtio_chan *chan;
+	struct virtio_device *vdev;
+
+	vdev = dev_to_virtio(dev);
+	chan = vdev->priv;
+
+	return snprintf(buf, chan->tag_len + 1, "%s", chan->tag);
+}
+
+static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL);
+
 /**
  * p9_virtio_probe - probe for existence of 9P virtio channels
  * @vdev: virtio device to probe
@@ -273,6 +287,11 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 			tag, tag_len);
 	chan->tag = tag;
 	chan->tag_len = tag_len;
+	err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
+	if (err) {
+		kfree(tag);
+		goto out_free_vq;
+	}
 	mutex_lock(&virtio_9p_lock);
 	list_add_tail(&chan->chan_list, &virtio_chan_list);
 	mutex_unlock(&virtio_9p_lock);
@@ -348,6 +367,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
 	mutex_lock(&virtio_9p_lock);
 	list_del(&chan->chan_list);
 	mutex_unlock(&virtio_9p_lock);
+	sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
 	kfree(chan->tag);
 	kfree(chan);
 
-- 
cgit v1.2.3


From 45bc21edb52fa71dbb1324c6f573aa880e95519d Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Mon, 8 Mar 2010 17:33:04 +0000
Subject: 9p: Change the name of new protocol from 9p2010.L to 9p2000.L

This patch changes the name of the new 9P protocol from 9p2010.L to
9p2000.u. This is because we learnt that the name 9p2010 is already
being used by others.

Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/v9fs.h            |  6 +++---
 include/net/9p/client.h |  4 ++--
 net/9p/client.c         | 16 ++++++++--------
 3 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 79000bf62491..6b801d1ddf4b 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -24,7 +24,7 @@
 /**
  * enum p9_session_flags - option flags for each 9P session
  * @V9FS_PROTO_2000U: whether or not to use 9P2000.u extensions
- * @V9FS_PROTO_2010L: whether or not to use 9P2010.l extensions
+ * @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions
  * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy
  * @V9FS_ACCESS_USER: a new attach will be issued for every user (default)
  * @V9FS_ACCESS_ANY: use a single attach for all users
@@ -34,7 +34,7 @@
  */
 enum p9_session_flags {
 	V9FS_PROTO_2000U	= 0x01,
-	V9FS_PROTO_2010L	= 0x02,
+	V9FS_PROTO_2000L	= 0x02,
 	V9FS_ACCESS_SINGLE	= 0x04,
 	V9FS_ACCESS_USER	= 0x08,
 	V9FS_ACCESS_ANY		= 0x0C,
@@ -130,5 +130,5 @@ static inline int v9fs_proto_dotu(struct v9fs_session_info *v9ses)
 
 static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
 {
-	return v9ses->flags & V9FS_PROTO_2010L;
+	return v9ses->flags & V9FS_PROTO_2000L;
 }
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 52e1fff709e4..f076dfa75ae8 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -32,13 +32,13 @@
 /** enum p9_proto_versions - 9P protocol versions
  * @p9_proto_legacy: 9P Legacy mode, pre-9P2000.u
  * @p9_proto_2000u: 9P2000.u extension
- * @p9_proto_2010L: 9P2010.L extension
+ * @p9_proto_2000L: 9P2000.L extension
  */
 
 enum p9_proto_versions{
 	p9_proto_legacy = 0,
 	p9_proto_2000u = 1,
-	p9_proto_2010L = 2,
+	p9_proto_2000L = 2,
 };
 
 
diff --git a/net/9p/client.c b/net/9p/client.c
index bde9f3d38c57..e3e5bf4469ce 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -60,7 +60,7 @@ static const match_table_t tokens = {
 
 inline int p9_is_proto_dotl(struct p9_client *clnt)
 {
-	return (clnt->proto_version == p9_proto_2010L);
+	return (clnt->proto_version == p9_proto_2000L);
 }
 EXPORT_SYMBOL(p9_is_proto_dotl);
 
@@ -80,9 +80,9 @@ static unsigned char get_protocol_version(const substring_t *name)
 	} else if (!strncmp("9p2000.u", name->from, name->to-name->from)) {
 		version = p9_proto_2000u;
 		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.u\n");
-	} else if (!strncmp("9p2010.L", name->from, name->to-name->from)) {
-		version = p9_proto_2010L;
-		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2010.L\n");
+	} else if (!strncmp("9p2000.L", name->from, name->to-name->from)) {
+		version = p9_proto_2000L;
+		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.L\n");
 	} else {
 		P9_DPRINTK(P9_DEBUG_ERROR, "Unknown protocol version %s. ",
 							name->from);
@@ -672,9 +672,9 @@ int p9_client_version(struct p9_client *c)
 						c->msize, c->proto_version);
 
 	switch (c->proto_version) {
-	case p9_proto_2010L:
+	case p9_proto_2000L:
 		req = p9_client_rpc(c, P9_TVERSION, "ds",
-					c->msize, "9P2010.L");
+					c->msize, "9P2000.L");
 		break;
 	case p9_proto_2000u:
 		req = p9_client_rpc(c, P9_TVERSION, "ds",
@@ -700,8 +700,8 @@ int p9_client_version(struct p9_client *c)
 	}
 
 	P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version);
-	if (!strncmp(version, "9P2010.L", 8))
-		c->proto_version = p9_proto_2010L;
+	if (!strncmp(version, "9P2000.L", 8))
+		c->proto_version = p9_proto_2000L;
 	else if (!strncmp(version, "9P2000.u", 8))
 		c->proto_version = p9_proto_2000u;
 	else if (!strncmp(version, "9P2000", 6))
-- 
cgit v1.2.3


From 0a9c14751377a1407f5e35791e13651d2fc7801c Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sat, 13 Mar 2010 20:56:56 +0100
Subject: i2c-algo-bit: Add pre- and post-xfer hooks

Drivers might have to do random things before and/or after I2C
transfers. Add hooks to the i2c-algo-bit implementation to let them do
so.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Alex Deucher <alexdeucher@gmail.com>
---
 drivers/i2c/algos/i2c-algo-bit.c | 9 +++++++++
 include/linux/i2c-algo-bit.h     | 2 ++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c
index e25e13980af3..e8d568c3fb09 100644
--- a/drivers/i2c/algos/i2c-algo-bit.c
+++ b/drivers/i2c/algos/i2c-algo-bit.c
@@ -522,6 +522,12 @@ static int bit_xfer(struct i2c_adapter *i2c_adap,
 	int i, ret;
 	unsigned short nak_ok;
 
+	if (adap->pre_xfer) {
+		ret = adap->pre_xfer(i2c_adap);
+		if (ret < 0)
+			return ret;
+	}
+
 	bit_dbg(3, &i2c_adap->dev, "emitting start condition\n");
 	i2c_start(adap);
 	for (i = 0; i < num; i++) {
@@ -570,6 +576,9 @@ static int bit_xfer(struct i2c_adapter *i2c_adap,
 bailout:
 	bit_dbg(3, &i2c_adap->dev, "emitting stop condition\n");
 	i2c_stop(adap);
+
+	if (adap->post_xfer)
+		adap->post_xfer(i2c_adap);
 	return ret;
 }
 
diff --git a/include/linux/i2c-algo-bit.h b/include/linux/i2c-algo-bit.h
index 111334f5b922..4f98148c11c3 100644
--- a/include/linux/i2c-algo-bit.h
+++ b/include/linux/i2c-algo-bit.h
@@ -36,6 +36,8 @@ struct i2c_algo_bit_data {
 	void (*setscl) (void *data, int state);
 	int  (*getsda) (void *data);
 	int  (*getscl) (void *data);
+	int  (*pre_xfer)  (struct i2c_adapter *);
+	void (*post_xfer) (struct i2c_adapter *);
 
 	/* local settings */
 	int udelay;		/* half clock cycle time in us,
-- 
cgit v1.2.3


From 3f995f317f7070e81e8e38bb11357d6671ab6969 Mon Sep 17 00:00:00 2001
From: Richard Röjfors <richard.rojfors@pelagicore.com>
Date: Tue, 9 Mar 2010 09:17:36 +0100
Subject: Add the platform data include for the Xilinx XPS IIC Bus Interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This file was missed in the original patch that went into Linus' tree.

Cc: "Ben Dooks (embedded platforms)" <ben-linux@fluff.org>
Cc: linux-i2c@vger.kernel.org
Signed-off-by: Richard Röjfors <richard.rojfors@pelagicore.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/i2c-xiic.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 include/linux/i2c-xiic.h

(limited to 'include')

diff --git a/include/linux/i2c-xiic.h b/include/linux/i2c-xiic.h
new file mode 100644
index 000000000000..4f9f2256a97e
--- /dev/null
+++ b/include/linux/i2c-xiic.h
@@ -0,0 +1,43 @@
+/*
+ * i2c-xiic.h
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Xilinx IIC
+ */
+
+#ifndef _LINUX_I2C_XIIC_H
+#define _LINUX_I2C_XIIC_H
+
+/**
+ * struct xiic_i2c_platform_data - Platform data of the Xilinx I2C driver
+ * @num_devices:	Number of devices that shall be added when the driver
+ *			is probed.
+ * @devices:		The actuall devices to add.
+ *
+ * This purpose of this platform data struct is to be able to provide a number
+ * of devices that should be added to the I2C bus. The reason is that sometimes
+ * the I2C board info is not enough, a new PCI board can for instance be
+ * plugged into a standard PC, and the bus number might be unknown at
+ * early init time.
+ */
+struct xiic_i2c_platform_data {
+	u8				num_devices;
+	struct i2c_board_info const	*devices;
+};
+
+#endif /* _LINUX_I2C_XIIC_H */
-- 
cgit v1.2.3


From 3f17522ce461a31e7ced6311b28fcf5b8a763316 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 12 Feb 2010 14:32:01 +0000
Subject: Video: ARM CLCD: Better fix for swapped IENB and CNTL registers

On PL111, as found on Realview and other platforms, these registers are
always arranged as CNTL then IENB.  On PL110, these registers are IENB
then CNTL, except on Versatile platforms.

Re-arrange the handling of these register swaps so that PL111 always
gets it right without resorting to ifdefs, leaving the only case needing
special handling being PL110 on Versatile.

Fill out amba/clcd.h with the PL110/PL111 register definition
differences in case someone tries to use the PL110 specific definitions
on PL111.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/video/amba-clcd.c | 31 ++++++++++++++++++++++++-------
 include/linux/amba/clcd.h | 33 +++++++++++++++++----------------
 2 files changed, 41 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c
index a21efcd10b78..afe21e6eb544 100644
--- a/drivers/video/amba-clcd.c
+++ b/drivers/video/amba-clcd.c
@@ -65,16 +65,16 @@ static void clcdfb_disable(struct clcd_fb *fb)
 	if (fb->board->disable)
 		fb->board->disable(fb);
 
-	val = readl(fb->regs + CLCD_CNTL);
+	val = readl(fb->regs + fb->off_cntl);
 	if (val & CNTL_LCDPWR) {
 		val &= ~CNTL_LCDPWR;
-		writel(val, fb->regs + CLCD_CNTL);
+		writel(val, fb->regs + fb->off_cntl);
 
 		clcdfb_sleep(20);
 	}
 	if (val & CNTL_LCDEN) {
 		val &= ~CNTL_LCDEN;
-		writel(val, fb->regs + CLCD_CNTL);
+		writel(val, fb->regs + fb->off_cntl);
 	}
 
 	/*
@@ -94,7 +94,7 @@ static void clcdfb_enable(struct clcd_fb *fb, u32 cntl)
 	 * Bring up by first enabling..
 	 */
 	cntl |= CNTL_LCDEN;
-	writel(cntl, fb->regs + CLCD_CNTL);
+	writel(cntl, fb->regs + fb->off_cntl);
 
 	clcdfb_sleep(20);
 
@@ -102,7 +102,7 @@ static void clcdfb_enable(struct clcd_fb *fb, u32 cntl)
 	 * and now apply power.
 	 */
 	cntl |= CNTL_LCDPWR;
-	writel(cntl, fb->regs + CLCD_CNTL);
+	writel(cntl, fb->regs + fb->off_cntl);
 
 	/*
 	 * finally, enable the interface.
@@ -233,7 +233,7 @@ static int clcdfb_set_par(struct fb_info *info)
 		readl(fb->regs + CLCD_TIM0), readl(fb->regs + CLCD_TIM1),
 		readl(fb->regs + CLCD_TIM2), readl(fb->regs + CLCD_TIM3),
 		readl(fb->regs + CLCD_UBAS), readl(fb->regs + CLCD_LBAS),
-		readl(fb->regs + CLCD_IENB), readl(fb->regs + CLCD_CNTL));
+		readl(fb->regs + fb->off_ienb), readl(fb->regs + fb->off_cntl));
 #endif
 
 	return 0;
@@ -345,6 +345,23 @@ static int clcdfb_register(struct clcd_fb *fb)
 {
 	int ret;
 
+	/*
+	 * ARM PL111 always has IENB at 0x1c; it's only PL110
+	 * which is reversed on some platforms.
+	 */
+	if (amba_manf(fb->dev) == 0x41 && amba_part(fb->dev) == 0x111) {
+		fb->off_ienb = CLCD_PL111_IENB;
+		fb->off_cntl = CLCD_PL111_CNTL;
+	} else {
+#ifdef CONFIG_ARCH_VERSATILE
+		fb->off_ienb = CLCD_PL111_IENB;
+		fb->off_cntl = CLCD_PL111_CNTL;
+#else
+		fb->off_ienb = CLCD_PL110_IENB;
+		fb->off_cntl = CLCD_PL110_CNTL;
+#endif
+	}
+
 	fb->clk = clk_get(&fb->dev->dev, NULL);
 	if (IS_ERR(fb->clk)) {
 		ret = PTR_ERR(fb->clk);
@@ -416,7 +433,7 @@ static int clcdfb_register(struct clcd_fb *fb)
 	/*
 	 * Ensure interrupts are disabled.
 	 */
-	writel(0, fb->regs + CLCD_IENB);
+	writel(0, fb->regs + fb->off_ienb);
 
 	fb_set_var(&fb->fb, &fb->fb.var);
 
diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h
index 29c0448265cf..ca16c3801a1e 100644
--- a/include/linux/amba/clcd.h
+++ b/include/linux/amba/clcd.h
@@ -21,22 +21,21 @@
 #define CLCD_UBAS 		0x00000010
 #define CLCD_LBAS 		0x00000014
 
-#if !defined(CONFIG_ARCH_VERSATILE) && !defined(CONFIG_ARCH_REALVIEW)
-#define CLCD_IENB 		0x00000018
-#define CLCD_CNTL 		0x0000001c
-#else
-/*
- * Someone rearranged these two registers on the Versatile
- * platform...
- */
-#define CLCD_IENB 		0x0000001c
-#define CLCD_CNTL 		0x00000018
-#endif
-
-#define CLCD_STAT 		0x00000020
-#define CLCD_INTR 		0x00000024
-#define CLCD_UCUR 		0x00000028
-#define CLCD_LCUR 		0x0000002C
+#define CLCD_PL110_IENB		0x00000018
+#define CLCD_PL110_CNTL		0x0000001c
+#define CLCD_PL110_STAT		0x00000020
+#define CLCD_PL110_INTR 	0x00000024
+#define CLCD_PL110_UCUR		0x00000028
+#define CLCD_PL110_LCUR		0x0000002C
+
+#define CLCD_PL111_CNTL		0x00000018
+#define CLCD_PL111_IENB		0x0000001c
+#define CLCD_PL111_RIS		0x00000020
+#define CLCD_PL111_MIS		0x00000024
+#define CLCD_PL111_ICR		0x00000028
+#define CLCD_PL111_UCUR		0x0000002c
+#define CLCD_PL111_LCUR		0x00000030
+
 #define CLCD_PALL 		0x00000200
 #define CLCD_PALETTE		0x00000200
 
@@ -147,6 +146,8 @@ struct clcd_fb {
 	struct clcd_board	*board;
 	void			*board_data;
 	void __iomem		*regs;
+	u16			off_ienb;
+	u16			off_cntl;
 	u32			clcd_cntl;
 	u32			cmap[16];
 };
-- 
cgit v1.2.3


From 338e2b1d571e4873908b199c90d6a31f65137fe3 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Wed, 3 Mar 2010 13:39:13 -0500
Subject: drm/radeon: add new RS880 pci id

This should go to 2.6.33 stable as well.

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_pciids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 676104b7818c..04a6ebc27b96 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -410,6 +410,7 @@
 	{0x1002, 0x9712, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9713, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9714, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9715, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0, 0, 0}
 
 #define r128_PCI_IDS \
-- 
cgit v1.2.3


From cd7e9fcd1f7c9c397f747cf506c66f7dca11d1c6 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Fri, 5 Mar 2010 10:47:26 -0700
Subject: resource: expand IORESOURCE_TYPE_BITS to make room for bus resource
 type

No functional change; this just makes room for another resource type.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/ioport.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index dda98410d588..b126209a40e2 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -34,20 +34,20 @@ struct resource_list {
  */
 #define IORESOURCE_BITS		0x000000ff	/* Bus-specific bits */
 
-#define IORESOURCE_TYPE_BITS	0x00000f00	/* Resource type */
+#define IORESOURCE_TYPE_BITS	0x00001f00	/* Resource type */
 #define IORESOURCE_IO		0x00000100
 #define IORESOURCE_MEM		0x00000200
 #define IORESOURCE_IRQ		0x00000400
 #define IORESOURCE_DMA		0x00000800
 
-#define IORESOURCE_PREFETCH	0x00001000	/* No side effects */
-#define IORESOURCE_READONLY	0x00002000
-#define IORESOURCE_CACHEABLE	0x00004000
-#define IORESOURCE_RANGELENGTH	0x00008000
-#define IORESOURCE_SHADOWABLE	0x00010000
+#define IORESOURCE_PREFETCH	0x00002000	/* No side effects */
+#define IORESOURCE_READONLY	0x00004000
+#define IORESOURCE_CACHEABLE	0x00008000
+#define IORESOURCE_RANGELENGTH	0x00010000
+#define IORESOURCE_SHADOWABLE	0x00020000
 
-#define IORESOURCE_SIZEALIGN	0x00020000	/* size indicates alignment */
-#define IORESOURCE_STARTALIGN	0x00040000	/* start field is alignment */
+#define IORESOURCE_SIZEALIGN	0x00040000	/* size indicates alignment */
+#define IORESOURCE_STARTALIGN	0x00080000	/* start field is alignment */
 
 #define IORESOURCE_MEM_64	0x00100000
 
-- 
cgit v1.2.3


From 0f4050c7d3ba0275e5f39513c0670a717d43048c Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Fri, 5 Mar 2010 10:47:42 -0700
Subject: resource: add bus number support

Add support for bus number resources.  This is for bridges with a range of
bus numbers behind them.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/ioport.h | 1 +
 lib/vsprintf.c         | 9 +++++++++
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index b126209a40e2..510e4ac918dd 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -39,6 +39,7 @@ struct resource_list {
 #define IORESOURCE_MEM		0x00000200
 #define IORESOURCE_IRQ		0x00000400
 #define IORESOURCE_DMA		0x00000800
+#define IORESOURCE_BUS		0x00001000
 
 #define IORESOURCE_PREFETCH	0x00002000	/* No side effects */
 #define IORESOURCE_READONLY	0x00004000
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 0d461c7c14db..ebbecf90d5d7 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -609,6 +609,12 @@ static char *resource_string(char *buf, char *end, struct resource *res,
 		.precision = -1,
 		.flags = SPECIAL | SMALL | ZEROPAD,
 	};
+	static const struct printf_spec bus_spec = {
+		.base = 16,
+		.field_width = 2,
+		.precision = -1,
+		.flags = SMALL | ZEROPAD,
+	};
 	static const struct printf_spec dec_spec = {
 		.base = 10,
 		.precision = -1,
@@ -651,6 +657,9 @@ static char *resource_string(char *buf, char *end, struct resource *res,
 	} else if (res->flags & IORESOURCE_DMA) {
 		p = string(p, pend, "dma ", str_spec);
 		specp = &dec_spec;
+	} else if (res->flags & IORESOURCE_BUS) {
+		p = string(p, pend, "bus ", str_spec);
+		specp = &bus_spec;
 	} else {
 		p = string(p, pend, "??? ", str_spec);
 		specp = &mem_spec;
-- 
cgit v1.2.3


From 9d7cca04211d4eb104eaaa424b98f650bc29c730 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Fri, 5 Mar 2010 10:47:47 -0700
Subject: resource: add window support

Add support for resource windows.  This is for bridge resources, i.e.,
regions where a bridge forwards transactions from the primary to the
secondary side.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/ioport.h | 1 +
 lib/vsprintf.c         | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 510e4ac918dd..71ab79da7e7f 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -51,6 +51,7 @@ struct resource_list {
 #define IORESOURCE_STARTALIGN	0x00080000	/* start field is alignment */
 
 #define IORESOURCE_MEM_64	0x00100000
+#define IORESOURCE_WINDOW	0x00200000	/* forwarded by bridge */
 
 #define IORESOURCE_EXCLUSIVE	0x08000000	/* Userland may not map this resource */
 #define IORESOURCE_DISABLED	0x10000000
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index ebbecf90d5d7..24112e5a5780 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -635,7 +635,7 @@ static char *resource_string(char *buf, char *end, struct resource *res,
 	 * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */
 #define RSRC_BUF_SIZE		((2 * sizeof(resource_size_t)) + 4)
 #define FLAG_BUF_SIZE		(2 * sizeof(res->flags))
-#define DECODED_BUF_SIZE	sizeof("[mem - 64bit pref disabled]")
+#define DECODED_BUF_SIZE	sizeof("[mem - 64bit pref window disabled]")
 #define RAW_BUF_SIZE		sizeof("[mem - flags 0x]")
 	char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE,
 		     2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)];
@@ -675,6 +675,8 @@ static char *resource_string(char *buf, char *end, struct resource *res,
 			p = string(p, pend, " 64bit", str_spec);
 		if (res->flags & IORESOURCE_PREFETCH)
 			p = string(p, pend, " pref", str_spec);
+		if (res->flags & IORESOURCE_WINDOW)
+			p = string(p, pend, " window", str_spec);
 		if (res->flags & IORESOURCE_DISABLED)
 			p = string(p, pend, " disabled", str_spec);
 	} else {
-- 
cgit v1.2.3


From 72e942dd846f98e2d35aad5436d77a878ef05c5e Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 9 Mar 2010 06:33:26 +0000
Subject: drm/ttm: use drm calloc large and free large

Now that the drm core can do this, lets just use it, split the code out
so TTM doesn't have to drag all of drmP.h in.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_tt.c    | 23 ++-------------
 include/drm/drmP.h              | 34 +--------------------
 include/drm/drm_mem_util.h      | 65 +++++++++++++++++++++++++++++++++++++++++
 include/drm/ttm/ttm_bo_driver.h |  1 -
 4 files changed, 69 insertions(+), 54 deletions(-)
 create mode 100644 include/drm/drm_mem_util.h

(limited to 'include')

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index a759170763bb..bab6cd8d8a1e 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -28,13 +28,13 @@
  * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
  */
 
-#include <linux/vmalloc.h>
 #include <linux/sched.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/swap.h>
 #include "drm_cache.h"
+#include "drm_mem_util.h"
 #include "ttm/ttm_module.h"
 #include "ttm/ttm_bo_driver.h"
 #include "ttm/ttm_placement.h"
@@ -43,32 +43,15 @@ static int ttm_tt_swapin(struct ttm_tt *ttm);
 
 /**
  * Allocates storage for pointers to the pages that back the ttm.
- *
- * Uses kmalloc if possible. Otherwise falls back to vmalloc.
  */
 static void ttm_tt_alloc_page_directory(struct ttm_tt *ttm)
 {
-	unsigned long size = ttm->num_pages * sizeof(*ttm->pages);
-	ttm->pages = NULL;
-
-	if (size <= PAGE_SIZE)
-		ttm->pages = kzalloc(size, GFP_KERNEL);
-
-	if (!ttm->pages) {
-		ttm->pages = vmalloc_user(size);
-		if (ttm->pages)
-			ttm->page_flags |= TTM_PAGE_FLAG_VMALLOC;
-	}
+	ttm->pages = drm_calloc_large(ttm->num_pages, sizeof(*ttm->pages));
 }
 
 static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
 {
-	if (ttm->page_flags & TTM_PAGE_FLAG_VMALLOC) {
-		vfree(ttm->pages);
-		ttm->page_flags &= ~TTM_PAGE_FLAG_VMALLOC;
-	} else {
-		kfree(ttm->pages);
-	}
+	drm_free_large(ttm->pages);
 	ttm->pages = NULL;
 }
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 4a3c4e441027..de2f82efb15f 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1545,39 +1545,7 @@ static __inline__ void drm_core_dropmap(struct drm_local_map *map)
 {
 }
 
-
-static __inline__ void *drm_calloc_large(size_t nmemb, size_t size)
-{
-	if (size != 0 && nmemb > ULONG_MAX / size)
-		return NULL;
-
-	if (size * nmemb <= PAGE_SIZE)
-	    return kcalloc(nmemb, size, GFP_KERNEL);
-
-	return __vmalloc(size * nmemb,
-			 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
-}
-
-/* Modeled after cairo's malloc_ab, it's like calloc but without the zeroing. */
-static __inline__ void *drm_malloc_ab(size_t nmemb, size_t size)
-{
-	if (size != 0 && nmemb > ULONG_MAX / size)
-		return NULL;
-
-	if (size * nmemb <= PAGE_SIZE)
-	    return kmalloc(nmemb * size, GFP_KERNEL);
-
-	return __vmalloc(size * nmemb,
-			 GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
-}
-
-static __inline void drm_free_large(void *ptr)
-{
-	if (!is_vmalloc_addr(ptr))
-		return kfree(ptr);
-
-	vfree(ptr);
-}
+#include "drm_mem_util.h"
 /*@}*/
 
 #endif				/* __KERNEL__ */
diff --git a/include/drm/drm_mem_util.h b/include/drm/drm_mem_util.h
new file mode 100644
index 000000000000..6bd325fedc87
--- /dev/null
+++ b/include/drm/drm_mem_util.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *     Jesse Barnes <jbarnes@virtuousgeek.org>
+ *
+ */
+#ifndef _DRM_MEM_UTIL_H_
+#define _DRM_MEM_UTIL_H_
+
+#include <linux/vmalloc.h>
+
+static __inline__ void *drm_calloc_large(size_t nmemb, size_t size)
+{
+	if (size != 0 && nmemb > ULONG_MAX / size)
+		return NULL;
+
+	if (size * nmemb <= PAGE_SIZE)
+	    return kcalloc(nmemb, size, GFP_KERNEL);
+
+	return __vmalloc(size * nmemb,
+			 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
+}
+
+/* Modeled after cairo's malloc_ab, it's like calloc but without the zeroing. */
+static __inline__ void *drm_malloc_ab(size_t nmemb, size_t size)
+{
+	if (size != 0 && nmemb > ULONG_MAX / size)
+		return NULL;
+
+	if (size * nmemb <= PAGE_SIZE)
+	    return kmalloc(nmemb * size, GFP_KERNEL);
+
+	return __vmalloc(size * nmemb,
+			 GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
+}
+
+static __inline void drm_free_large(void *ptr)
+{
+	if (!is_vmalloc_addr(ptr))
+		return kfree(ptr);
+
+	vfree(ptr);
+}
+
+#endif
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index e3f1b4a4b601..e929c27ede22 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -115,7 +115,6 @@ struct ttm_backend {
 	struct ttm_backend_func *func;
 };
 
-#define TTM_PAGE_FLAG_VMALLOC         (1 << 0)
 #define TTM_PAGE_FLAG_USER            (1 << 1)
 #define TTM_PAGE_FLAG_USER_DIRTY      (1 << 2)
 #define TTM_PAGE_FLAG_WRITE           (1 << 3)
-- 
cgit v1.2.3


From 4d5d4cd88c542ff56cf7feacd29cc907f2abbfbb Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Mon, 22 Feb 2010 12:11:14 -0700
Subject: ACPI: processor: mv processor_pdc.c processor_core.c

We've renamed the old processor_core.c to processor_driver.c, to
convey the idea that it can be built modular and has driver-like
bits.

Now let's re-create a processor_core.c for the bits needed
statically by the rest of the kernel. The contents of processor_pdc.c
are a good starting spot, so let's just rename that file and
complete our three card monte.

Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/Makefile         |   2 +-
 drivers/acpi/processor_core.c | 209 ++++++++++++++++++++++++++++++++++++++++++
 drivers/acpi/processor_pdc.c  | 209 ------------------------------------------
 include/acpi/processor.h      |   2 +-
 4 files changed, 211 insertions(+), 211 deletions(-)
 create mode 100644 drivers/acpi/processor_core.c
 delete mode 100644 drivers/acpi/processor_pdc.c

(limited to 'include')

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 6b363a50d18b..a8d8998dd5c5 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -32,7 +32,7 @@ acpi-$(CONFIG_ACPI_SLEEP)	+= proc.o
 #
 acpi-y				+= bus.o glue.o
 acpi-y				+= scan.o
-acpi-y				+= processor_pdc.o
+acpi-y				+= processor_core.o
 acpi-y				+= ec.o
 acpi-$(CONFIG_ACPI_DOCK)	+= dock.o
 acpi-y				+= pci_root.o pci_link.o pci_irq.o pci_bind.o
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
new file mode 100644
index 000000000000..6f376bf42904
--- /dev/null
+++ b/drivers/acpi/processor_core.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2005 Intel Corporation
+ * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
+ *
+ *	Alex Chiang <achiang@hp.com>
+ *	- Unified x86/ia64 implementations
+ *	Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *	- Added _PDC for platforms with Intel CPUs
+ */
+#include <linux/dmi.h>
+
+#include <acpi/acpi_drivers.h>
+#include <acpi/processor.h>
+
+#include "internal.h"
+
+#define PREFIX			"ACPI: "
+#define _COMPONENT		ACPI_PROCESSOR_COMPONENT
+ACPI_MODULE_NAME("processor_core");
+
+static int set_no_mwait(const struct dmi_system_id *id)
+{
+	printk(KERN_NOTICE PREFIX "%s detected - "
+		"disabling mwait for CPU C-states\n", id->ident);
+	idle_nomwait = 1;
+	return 0;
+}
+
+static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = {
+	{
+	set_no_mwait, "IFL91 board", {
+	DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"),
+	DMI_MATCH(DMI_SYS_VENDOR, "ZEPTO"),
+	DMI_MATCH(DMI_PRODUCT_VERSION, "3215W"),
+	DMI_MATCH(DMI_BOARD_NAME, "IFL91") }, NULL},
+	{
+	set_no_mwait, "Extensa 5220", {
+	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+	DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+	DMI_MATCH(DMI_PRODUCT_VERSION, "0100"),
+	DMI_MATCH(DMI_BOARD_NAME, "Columbia") }, NULL},
+	{},
+};
+
+static void acpi_set_pdc_bits(u32 *buf)
+{
+	buf[0] = ACPI_PDC_REVISION_ID;
+	buf[1] = 1;
+
+	/* Enable coordination with firmware's _TSD info */
+	buf[2] = ACPI_PDC_SMP_T_SWCOORD;
+
+	/* Twiddle arch-specific bits needed for _PDC */
+	arch_acpi_set_pdc_bits(buf);
+}
+
+static struct acpi_object_list *acpi_processor_alloc_pdc(void)
+{
+	struct acpi_object_list *obj_list;
+	union acpi_object *obj;
+	u32 *buf;
+
+	/* allocate and initialize pdc. It will be used later. */
+	obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL);
+	if (!obj_list) {
+		printk(KERN_ERR "Memory allocation error\n");
+		return NULL;
+	}
+
+	obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL);
+	if (!obj) {
+		printk(KERN_ERR "Memory allocation error\n");
+		kfree(obj_list);
+		return NULL;
+	}
+
+	buf = kmalloc(12, GFP_KERNEL);
+	if (!buf) {
+		printk(KERN_ERR "Memory allocation error\n");
+		kfree(obj);
+		kfree(obj_list);
+		return NULL;
+	}
+
+	acpi_set_pdc_bits(buf);
+
+	obj->type = ACPI_TYPE_BUFFER;
+	obj->buffer.length = 12;
+	obj->buffer.pointer = (u8 *) buf;
+	obj_list->count = 1;
+	obj_list->pointer = obj;
+
+	return obj_list;
+}
+
+/*
+ * _PDC is required for a BIOS-OS handshake for most of the newer
+ * ACPI processor features.
+ */
+static int
+acpi_processor_eval_pdc(acpi_handle handle, struct acpi_object_list *pdc_in)
+{
+	acpi_status status = AE_OK;
+
+	if (idle_nomwait) {
+		/*
+		 * If mwait is disabled for CPU C-states, the C2C3_FFH access
+		 * mode will be disabled in the parameter of _PDC object.
+		 * Of course C1_FFH access mode will also be disabled.
+		 */
+		union acpi_object *obj;
+		u32 *buffer = NULL;
+
+		obj = pdc_in->pointer;
+		buffer = (u32 *)(obj->buffer.pointer);
+		buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH);
+
+	}
+	status = acpi_evaluate_object(handle, "_PDC", pdc_in, NULL);
+
+	if (ACPI_FAILURE(status))
+		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+		    "Could not evaluate _PDC, using legacy perf. control.\n"));
+
+	return status;
+}
+
+static int early_pdc_done;
+
+void acpi_processor_set_pdc(acpi_handle handle)
+{
+	struct acpi_object_list *obj_list;
+
+	if (arch_has_acpi_pdc() == false)
+		return;
+
+	if (early_pdc_done)
+		return;
+
+	obj_list = acpi_processor_alloc_pdc();
+	if (!obj_list)
+		return;
+
+	acpi_processor_eval_pdc(handle, obj_list);
+
+	kfree(obj_list->pointer->buffer.pointer);
+	kfree(obj_list->pointer);
+	kfree(obj_list);
+}
+EXPORT_SYMBOL_GPL(acpi_processor_set_pdc);
+
+static int early_pdc_optin;
+static int set_early_pdc_optin(const struct dmi_system_id *id)
+{
+	early_pdc_optin = 1;
+	return 0;
+}
+
+static int param_early_pdc_optin(char *s)
+{
+	early_pdc_optin = 1;
+	return 1;
+}
+__setup("acpi_early_pdc_eval", param_early_pdc_optin);
+
+static struct dmi_system_id __cpuinitdata early_pdc_optin_table[] = {
+	{
+	set_early_pdc_optin, "HP Envy", {
+	DMI_MATCH(DMI_BIOS_VENDOR, "Hewlett-Packard"),
+	DMI_MATCH(DMI_PRODUCT_NAME, "HP Envy") }, NULL},
+	{
+	set_early_pdc_optin, "HP Pavilion dv6", {
+	DMI_MATCH(DMI_BIOS_VENDOR, "Hewlett-Packard"),
+	DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv6") }, NULL},
+	{
+	set_early_pdc_optin, "HP Pavilion dv7", {
+	DMI_MATCH(DMI_BIOS_VENDOR, "Hewlett-Packard"),
+	DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv7") }, NULL},
+	{},
+};
+
+static acpi_status
+early_init_pdc(acpi_handle handle, u32 lvl, void *context, void **rv)
+{
+	acpi_processor_set_pdc(handle);
+	return AE_OK;
+}
+
+void __init acpi_early_processor_set_pdc(void)
+{
+	/*
+	 * Check whether the system is DMI table. If yes, OSPM
+	 * should not use mwait for CPU-states.
+	 */
+	dmi_check_system(processor_idle_dmi_table);
+
+	/*
+	 * Allow systems to opt-in to early _PDC evaluation.
+	 */
+	dmi_check_system(early_pdc_optin_table);
+	if (!early_pdc_optin)
+		return;
+
+	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
+			    ACPI_UINT32_MAX,
+			    early_init_pdc, NULL, NULL, NULL);
+
+	early_pdc_done = 1;
+}
diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
deleted file mode 100644
index e306ba9aa34e..000000000000
--- a/drivers/acpi/processor_pdc.c
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Copyright (C) 2005 Intel Corporation
- * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
- *
- *	Alex Chiang <achiang@hp.com>
- *	- Unified x86/ia64 implementations
- *	Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- *	- Added _PDC for platforms with Intel CPUs
- */
-#include <linux/dmi.h>
-
-#include <acpi/acpi_drivers.h>
-#include <acpi/processor.h>
-
-#include "internal.h"
-
-#define PREFIX			"ACPI: "
-#define _COMPONENT		ACPI_PROCESSOR_COMPONENT
-ACPI_MODULE_NAME("processor_pdc");
-
-static int set_no_mwait(const struct dmi_system_id *id)
-{
-	printk(KERN_NOTICE PREFIX "%s detected - "
-		"disabling mwait for CPU C-states\n", id->ident);
-	idle_nomwait = 1;
-	return 0;
-}
-
-static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = {
-	{
-	set_no_mwait, "IFL91 board", {
-	DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"),
-	DMI_MATCH(DMI_SYS_VENDOR, "ZEPTO"),
-	DMI_MATCH(DMI_PRODUCT_VERSION, "3215W"),
-	DMI_MATCH(DMI_BOARD_NAME, "IFL91") }, NULL},
-	{
-	set_no_mwait, "Extensa 5220", {
-	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-	DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
-	DMI_MATCH(DMI_PRODUCT_VERSION, "0100"),
-	DMI_MATCH(DMI_BOARD_NAME, "Columbia") }, NULL},
-	{},
-};
-
-static void acpi_set_pdc_bits(u32 *buf)
-{
-	buf[0] = ACPI_PDC_REVISION_ID;
-	buf[1] = 1;
-
-	/* Enable coordination with firmware's _TSD info */
-	buf[2] = ACPI_PDC_SMP_T_SWCOORD;
-
-	/* Twiddle arch-specific bits needed for _PDC */
-	arch_acpi_set_pdc_bits(buf);
-}
-
-static struct acpi_object_list *acpi_processor_alloc_pdc(void)
-{
-	struct acpi_object_list *obj_list;
-	union acpi_object *obj;
-	u32 *buf;
-
-	/* allocate and initialize pdc. It will be used later. */
-	obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL);
-	if (!obj_list) {
-		printk(KERN_ERR "Memory allocation error\n");
-		return NULL;
-	}
-
-	obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL);
-	if (!obj) {
-		printk(KERN_ERR "Memory allocation error\n");
-		kfree(obj_list);
-		return NULL;
-	}
-
-	buf = kmalloc(12, GFP_KERNEL);
-	if (!buf) {
-		printk(KERN_ERR "Memory allocation error\n");
-		kfree(obj);
-		kfree(obj_list);
-		return NULL;
-	}
-
-	acpi_set_pdc_bits(buf);
-
-	obj->type = ACPI_TYPE_BUFFER;
-	obj->buffer.length = 12;
-	obj->buffer.pointer = (u8 *) buf;
-	obj_list->count = 1;
-	obj_list->pointer = obj;
-
-	return obj_list;
-}
-
-/*
- * _PDC is required for a BIOS-OS handshake for most of the newer
- * ACPI processor features.
- */
-static int
-acpi_processor_eval_pdc(acpi_handle handle, struct acpi_object_list *pdc_in)
-{
-	acpi_status status = AE_OK;
-
-	if (idle_nomwait) {
-		/*
-		 * If mwait is disabled for CPU C-states, the C2C3_FFH access
-		 * mode will be disabled in the parameter of _PDC object.
-		 * Of course C1_FFH access mode will also be disabled.
-		 */
-		union acpi_object *obj;
-		u32 *buffer = NULL;
-
-		obj = pdc_in->pointer;
-		buffer = (u32 *)(obj->buffer.pointer);
-		buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH);
-
-	}
-	status = acpi_evaluate_object(handle, "_PDC", pdc_in, NULL);
-
-	if (ACPI_FAILURE(status))
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-		    "Could not evaluate _PDC, using legacy perf. control.\n"));
-
-	return status;
-}
-
-static int early_pdc_done;
-
-void acpi_processor_set_pdc(acpi_handle handle)
-{
-	struct acpi_object_list *obj_list;
-
-	if (arch_has_acpi_pdc() == false)
-		return;
-
-	if (early_pdc_done)
-		return;
-
-	obj_list = acpi_processor_alloc_pdc();
-	if (!obj_list)
-		return;
-
-	acpi_processor_eval_pdc(handle, obj_list);
-
-	kfree(obj_list->pointer->buffer.pointer);
-	kfree(obj_list->pointer);
-	kfree(obj_list);
-}
-EXPORT_SYMBOL_GPL(acpi_processor_set_pdc);
-
-static int early_pdc_optin;
-static int set_early_pdc_optin(const struct dmi_system_id *id)
-{
-	early_pdc_optin = 1;
-	return 0;
-}
-
-static int param_early_pdc_optin(char *s)
-{
-	early_pdc_optin = 1;
-	return 1;
-}
-__setup("acpi_early_pdc_eval", param_early_pdc_optin);
-
-static struct dmi_system_id __cpuinitdata early_pdc_optin_table[] = {
-	{
-	set_early_pdc_optin, "HP Envy", {
-	DMI_MATCH(DMI_BIOS_VENDOR, "Hewlett-Packard"),
-	DMI_MATCH(DMI_PRODUCT_NAME, "HP Envy") }, NULL},
-	{
-	set_early_pdc_optin, "HP Pavilion dv6", {
-	DMI_MATCH(DMI_BIOS_VENDOR, "Hewlett-Packard"),
-	DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv6") }, NULL},
-	{
-	set_early_pdc_optin, "HP Pavilion dv7", {
-	DMI_MATCH(DMI_BIOS_VENDOR, "Hewlett-Packard"),
-	DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv7") }, NULL},
-	{},
-};
-
-static acpi_status
-early_init_pdc(acpi_handle handle, u32 lvl, void *context, void **rv)
-{
-	acpi_processor_set_pdc(handle);
-	return AE_OK;
-}
-
-void __init acpi_early_processor_set_pdc(void)
-{
-	/*
-	 * Check whether the system is DMI table. If yes, OSPM
-	 * should not use mwait for CPU-states.
-	 */
-	dmi_check_system(processor_idle_dmi_table);
-
-	/*
-	 * Allow systems to opt-in to early _PDC evaluation.
-	 */
-	dmi_check_system(early_pdc_optin_table);
-	if (!early_pdc_optin)
-		return;
-
-	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
-			    ACPI_UINT32_MAX,
-			    early_init_pdc, NULL, NULL, NULL);
-
-	early_pdc_done = 1;
-}
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 1172c27adadf..7bb0b8b9332e 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -320,7 +320,7 @@ static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
 
 #endif				/* CONFIG_CPU_FREQ */
 
-/* in processor_pdc.c */
+/* in processor_core.c */
 void acpi_processor_set_pdc(acpi_handle handle);
 
 /* in processor_throttling.c */
-- 
cgit v1.2.3


From 2e9d5e4efa0beeca03ad550bda28027826e83e42 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Mon, 22 Feb 2010 12:11:19 -0700
Subject: ACPI: processor: export acpi_get_cpuid()

Rename static get_cpu_id() to acpi_get_cpuid() and export it.

This change also gives us an opportunity to remove the
#ifndef CONFIG_SMP from processor_driver.c and into a header file
where it properly belongs.

Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/processor_driver.c | 10 ++++------
 include/acpi/processor.h        |  8 ++++++++
 2 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index 7b0f4c2a06e8..98358251ce23 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -361,10 +361,7 @@ static inline int acpi_processor_remove_fs(struct acpi_device *device)
 
 /* Use the acpiid in MADT to map cpus in case of SMP */
 
-#ifndef CONFIG_SMP
-static int get_cpu_id(acpi_handle handle, int type, u32 acpi_id) { return -1; }
-#else
-
+#ifdef CONFIG_SMP
 static struct acpi_table_madt *madt;
 
 static int map_lapic_id(struct acpi_subtable_header *entry,
@@ -496,7 +493,7 @@ exit:
 	return apic_id;
 }
 
-static int get_cpu_id(acpi_handle handle, int type, u32 acpi_id)
+int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id)
 {
 	int i;
 	int apic_id = -1;
@@ -513,6 +510,7 @@ static int get_cpu_id(acpi_handle handle, int type, u32 acpi_id)
 	}
 	return -1;
 }
+EXPORT_SYMBOL_GPL(acpi_get_cpuid);
 #endif
 
 /* --------------------------------------------------------------------------
@@ -579,7 +577,7 @@ static int acpi_processor_get_info(struct acpi_device *device)
 		device_declaration = 1;
 		pr->acpi_id = value;
 	}
-	cpu_index = get_cpu_id(pr->handle, device_declaration, pr->acpi_id);
+	cpu_index = acpi_get_cpuid(pr->handle, device_declaration, pr->acpi_id);
 
 	/* Handle UP system running SMP kernel, with no LAPIC in MADT */
 	if (!cpu0_initialized && (cpu_index == -1) &&
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 7bb0b8b9332e..86825ddbe14e 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -322,6 +322,14 @@ static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
 
 /* in processor_core.c */
 void acpi_processor_set_pdc(acpi_handle handle);
+#ifdef CONFIG_SMP
+int acpi_get_cpuid(acpi_handle, int type, u32 acpi_id);
+#else
+static inline int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id)
+{
+	return -1;
+}
+#endif
 
 /* in processor_throttling.c */
 int acpi_processor_tstate_has_changed(struct acpi_processor *pr);
-- 
cgit v1.2.3


From e3818b8dce2a934cd1521dbc4827e5238d8f45d8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 15 Mar 2010 17:03:43 -0700
Subject: rcu: Make rcu_read_lock_bh_held() allow for disabled BH

Disabling BH can stand in for rcu_read_lock_bh(), and this patch
updates rcu_read_lock_bh_held() to allow for this.  In order to
avoid include-file hell, this function is moved out of line to
kernel/rcupdate.c.

This fixes a false positive RCU warning.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <20100316000343.GA25857@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 19 ++++---------------
 kernel/rcupdate.c        | 23 +++++++++++++++++++++++
 2 files changed, 27 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 3024050c82a1..e1bdc4bfd275 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -123,22 +123,11 @@ static inline int rcu_read_lock_held(void)
 	return lock_is_held(&rcu_lock_map);
 }
 
-/**
- * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section?
- *
- * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in
- * an RCU-bh read-side critical section.  In absence of CONFIG_PROVE_LOCKING,
- * this assumes we are in an RCU-bh read-side critical section unless it can
- * prove otherwise.
- *
- * Check rcu_scheduler_active to prevent false positives during boot.
+/*
+ * rcu_read_lock_bh_held() is defined out of line to avoid #include-file
+ * hell.
  */
-static inline int rcu_read_lock_bh_held(void)
-{
-	if (!debug_lockdep_rcu_enabled())
-		return 1;
-	return lock_is_held(&rcu_bh_lock_map);
-}
+extern int rcu_read_lock_bh_held(void);
 
 /**
  * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section?
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f1125c1a6321..63fe25433980 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -45,6 +45,7 @@
 #include <linux/mutex.h>
 #include <linux/module.h>
 #include <linux/kernel_stat.h>
+#include <linux/hardirq.h>
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key rcu_lock_key;
@@ -66,6 +67,28 @@ EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
 int rcu_scheduler_active __read_mostly;
 EXPORT_SYMBOL_GPL(rcu_scheduler_active);
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+/**
+ * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section?
+ *
+ * Check for bottom half being disabled, which covers both the
+ * CONFIG_PROVE_RCU and not cases.  Note that if someone uses
+ * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled)
+ * will show the situation.
+ *
+ * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
+ */
+int rcu_read_lock_bh_held(void)
+{
+	if (!debug_lockdep_rcu_enabled())
+		return 1;
+	return in_softirq();
+}
+EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
+
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
 /*
  * This function is invoked towards the end of the scheduler's initialization
  * process.  Before this is called, the idle task might contain
-- 
cgit v1.2.3


From 7f5b774275df8c76a959eae7488128b637fcbfc8 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 16 Mar 2010 17:00:29 +0800
Subject: rcu: Fix tracepoints & lockdep false positive

tracepoint.h uses rcu_dereference(), which triggers this warning:

[    0.701161] ===================================================
[    0.702211] [ INFO: suspicious rcu_dereference_check() usage. ]
[    0.702716] ---------------------------------------------------
[    0.703203] include/trace/events/workqueue.h:68 invoked rcu_dereference_check() without protection!
[    0.703971] [ 0.703990] other info that might help us debug this:
[    0.703993]
[    0.705590]
[    0.705604] rcu_scheduler_active = 1, debug_locks = 0
[    0.706712] 1 lock held by swapper/1:
[    0.707229]  #0:  (cpu_add_remove_lock){+.+.+.}, at: [<c0142f54>] cpu_maps_update_begin+0x14/0x20
[    0.710097]
[    0.710106] stack backtrace:
[    0.712602] Pid: 1, comm: swapper Not tainted 2.6.34-rc1-tip-01613-g72662bb #168
[    0.713231] Call Trace:
[    0.713997]  [<c017174d>] lockdep_rcu_dereference+0x9d/0xb0
[    0.714746]  [<c015a117>] create_workqueue_thread+0x107/0x110
[    0.715353]  [<c015aee0>] ? worker_thread+0x0/0x340
[    0.715845]  [<c015a8e8>] __create_workqueue_key+0x138/0x240
[    0.716427]  [<c0142f92>] ? cpu_maps_update_done+0x12/0x20
[    0.717012]  [<c086b12f>] init_workqueues+0x6f/0x80
[    0.717530]  [<c08576d2>] kernel_init+0x102/0x1f0
[    0.717570]  [<c08575d0>] ? kernel_init+0x0/0x1f0
[    0.718944]  [<c01030fa>] kernel_thread_helper+0x6/0x10

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4B9F48AD.4000404@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/tracepoint.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index f59604ed0ec6..78b4bd3be496 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -49,7 +49,7 @@ struct tracepoint {
 		void **it_func;						\
 									\
 		rcu_read_lock_sched_notrace();				\
-		it_func = rcu_dereference((tp)->funcs);			\
+		it_func = rcu_dereference_sched((tp)->funcs);		\
 		if (it_func) {						\
 			do {						\
 				((void(*)(proto))(*it_func))(args);	\
-- 
cgit v1.2.3


From e7fb9c4ad351a8da7c09e182bd2e7ccd043daf08 Mon Sep 17 00:00:00 2001
From: Alberto Panizzo <maramaopercheseimorto@gmail.com>
Date: Fri, 18 Dec 2009 16:42:11 +0100
Subject: backlight: Add Epson L4F00242T03 LCD driver

The Epson LCD L4F00242T03 is mounted on the Freescale i.MX31 PDK board.
Based upon Marek Vasut work in l4f00242t03.c, this driver provides
basic init and power on/off functionality for this device through the
sysfs lcd interface.

Unfortunately Datasheet for this device are not available and
all the control sequences sent to the display were copied from the
freescale driver that in the i.MX31 Linux BSP.

As in the i.MX31PDK board the core and io suppliers are voltage
regulators, that functionality is embedded here, but not strict.

Signed-off-by: Alberto Panizzo <maramaopercheseimorto@gmail.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/video/backlight/Kconfig       |   7 +
 drivers/video/backlight/Makefile      |   1 +
 drivers/video/backlight/l4f00242t03.c | 256 ++++++++++++++++++++++++++++++++++
 include/linux/spi/l4f00242t03.h       |  31 ++++
 4 files changed, 295 insertions(+)
 create mode 100644 drivers/video/backlight/l4f00242t03.c
 create mode 100644 include/linux/spi/l4f00242t03.h

(limited to 'include')

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index 0c77fc610212..c025c84601b0 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -31,6 +31,13 @@ config LCD_CORGI
 	  Say y here to support the LCD panels usually found on SHARP
 	  corgi (C7x0) and spitz (Cxx00) models.
 
+config LCD_L4F00242T03
+	tristate "Epson L4F00242T03 LCD"
+	depends on LCD_CLASS_DEVICE && SPI_MASTER && GENERIC_GPIO
+	help
+	  SPI driver for Epson L4F00242T03. This provides basic support
+	  for init and powering the LCD up/down through a sysfs interface.
+
 config LCD_LMS283GF05
 	tristate "Samsung LMS283GF05 LCD"
 	depends on LCD_CLASS_DEVICE && SPI_MASTER && GENERIC_GPIO
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index 6c704d41462d..09d1f14d6257 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -3,6 +3,7 @@
 obj-$(CONFIG_LCD_CLASS_DEVICE)     += lcd.o
 obj-$(CONFIG_LCD_CORGI)		   += corgi_lcd.o
 obj-$(CONFIG_LCD_HP700)		   += jornada720_lcd.o
+obj-$(CONFIG_LCD_L4F00242T03)	   += l4f00242t03.o
 obj-$(CONFIG_LCD_LMS283GF05)	   += lms283gf05.o
 obj-$(CONFIG_LCD_LTV350QV)	   += ltv350qv.o
 obj-$(CONFIG_LCD_ILI9320)	   += ili9320.o
diff --git a/drivers/video/backlight/l4f00242t03.c b/drivers/video/backlight/l4f00242t03.c
new file mode 100644
index 000000000000..42d061e1403b
--- /dev/null
+++ b/drivers/video/backlight/l4f00242t03.c
@@ -0,0 +1,256 @@
+/*
+ * l4f00242t03.c -- support for Epson L4F00242T03 LCD
+ *
+ * Copyright 2007-2009 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * Copyright (c) 2009 Alberto Panizzo <maramaopercheseimorto@gmail.com>
+ * 	Inspired by Marek Vasut work in l4f00242t03.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/lcd.h>
+#include <linux/regulator/consumer.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/l4f00242t03.h>
+
+struct l4f00242t03_priv {
+	struct spi_device	*spi;
+	struct lcd_device	*ld;
+	int lcd_on:1;
+	struct regulator *io_reg;
+	struct regulator *core_reg;
+};
+
+
+static void l4f00242t03_reset(unsigned int gpio)
+{
+	pr_debug("l4f00242t03_reset.\n");
+	gpio_set_value(gpio, 1);
+	mdelay(100);
+	gpio_set_value(gpio, 0);
+	mdelay(10);	/* tRES >= 100us */
+	gpio_set_value(gpio, 1);
+	mdelay(20);
+}
+
+#define param(x) ((x) | 0x100)
+
+static void l4f00242t03_lcd_init(struct spi_device *spi)
+{
+	struct l4f00242t03_pdata *pdata = spi->dev.platform_data;
+	struct l4f00242t03_priv *priv = dev_get_drvdata(&spi->dev);
+	const u16 cmd[] = { 0x36, param(0), 0x3A, param(0x60) };
+
+	dev_dbg(&spi->dev, "initializing LCD\n");
+
+	if (priv->io_reg) {
+		regulator_set_voltage(priv->io_reg, 1800000, 1800000);
+		regulator_enable(priv->io_reg);
+	}
+
+	if (priv->core_reg) {
+		regulator_set_voltage(priv->core_reg, 2800000, 2800000);
+		regulator_enable(priv->core_reg);
+	}
+
+	gpio_set_value(pdata->data_enable_gpio, 1);
+	msleep(60);
+	spi_write(spi, (const u8 *)cmd, ARRAY_SIZE(cmd) * sizeof(u16));
+}
+
+static int l4f00242t03_lcd_power_set(struct lcd_device *ld, int power)
+{
+	struct l4f00242t03_priv *priv = lcd_get_data(ld);
+	struct spi_device *spi = priv->spi;
+
+	const u16 slpout = 0x11;
+	const u16 dison = 0x29;
+
+	const u16 slpin = 0x10;
+	const u16 disoff = 0x28;
+
+	if (power) {
+		if (priv->lcd_on)
+			return 0;
+
+		dev_dbg(&spi->dev, "turning on LCD\n");
+
+		spi_write(spi, (const u8 *)&slpout, sizeof(u16));
+		msleep(60);
+		spi_write(spi, (const u8 *)&dison, sizeof(u16));
+
+		priv->lcd_on = 1;
+	} else {
+		if (!priv->lcd_on)
+			return 0;
+
+		dev_dbg(&spi->dev, "turning off LCD\n");
+
+		spi_write(spi, (const u8 *)&disoff, sizeof(u16));
+		msleep(60);
+		spi_write(spi, (const u8 *)&slpin, sizeof(u16));
+
+		priv->lcd_on = 0;
+	}
+
+	return 0;
+}
+
+static struct lcd_ops l4f_ops = {
+	.set_power	= l4f00242t03_lcd_power_set,
+	.get_power	= NULL,
+};
+
+static int __devinit l4f00242t03_probe(struct spi_device *spi)
+{
+	struct l4f00242t03_priv *priv;
+	struct l4f00242t03_pdata *pdata = spi->dev.platform_data;
+	int ret;
+
+	if (pdata == NULL) {
+		dev_err(&spi->dev, "Uninitialized platform data.\n");
+		return -EINVAL;
+	}
+
+	priv = kzalloc(sizeof(struct l4f00242t03_priv), GFP_KERNEL);
+
+	if (priv == NULL) {
+		dev_err(&spi->dev, "No memory for this device.\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	dev_set_drvdata(&spi->dev, priv);
+	spi->bits_per_word = 9;
+	spi_setup(spi);
+
+	priv->spi = spi;
+
+	ret = gpio_request(pdata->reset_gpio, "lcd l4f00242t03 reset");
+	if (ret) {
+		dev_err(&spi->dev,
+			"Unable to get the lcd l4f00242t03 reset gpio.\n");
+		return ret;
+	}
+
+	ret = gpio_direction_output(pdata->reset_gpio, 1);
+	if (ret)
+		goto err2;
+
+	ret = gpio_request(pdata->data_enable_gpio,
+				"lcd l4f00242t03 data enable");
+	if (ret) {
+		dev_err(&spi->dev,
+			"Unable to get the lcd l4f00242t03 data en gpio.\n");
+		return ret;
+	}
+
+	ret = gpio_direction_output(pdata->data_enable_gpio, 0);
+	if (ret)
+		goto err3;
+
+	if (pdata->io_supply) {
+		priv->io_reg = regulator_get(NULL, pdata->io_supply);
+
+		if (IS_ERR(priv->io_reg)) {
+			pr_err("%s: Unable to get the IO regulator\n",
+								__func__);
+			goto err3;
+		}
+	}
+
+	if (pdata->core_supply) {
+		priv->core_reg = regulator_get(NULL, pdata->core_supply);
+
+		if (IS_ERR(priv->core_reg)) {
+			pr_err("%s: Unable to get the core regulator\n",
+								__func__);
+			goto err4;
+		}
+	}
+
+	priv->ld = lcd_device_register("l4f00242t03",
+					&spi->dev, priv, &l4f_ops);
+	if (IS_ERR(priv->ld)) {
+		ret = PTR_ERR(priv->ld);
+		goto err5;
+	}
+
+	/* Init the LCD */
+	l4f00242t03_reset(pdata->reset_gpio);
+	l4f00242t03_lcd_init(spi);
+	l4f00242t03_lcd_power_set(priv->ld, 1);
+
+	dev_info(&spi->dev, "Epson l4f00242t03 lcd probed.\n");
+
+	return 0;
+
+err5:
+	if (priv->core_reg)
+		regulator_put(priv->core_reg);
+err4:
+	if (priv->io_reg)
+		regulator_put(priv->io_reg);
+err3:
+	gpio_free(pdata->data_enable_gpio);
+err2:
+	gpio_free(pdata->reset_gpio);
+err:
+	kfree(priv);
+
+	return ret;
+}
+
+static int __devexit l4f00242t03_remove(struct spi_device *spi)
+{
+	struct l4f00242t03_priv *priv = dev_get_drvdata(&spi->dev);
+	struct l4f00242t03_pdata *pdata = priv->spi->dev.platform_data;
+
+	l4f00242t03_lcd_power_set(priv->ld, 0);
+	lcd_device_unregister(priv->ld);
+
+	gpio_free(pdata->data_enable_gpio);
+	gpio_free(pdata->reset_gpio);
+
+	if (priv->io_reg)
+		regulator_put(priv->core_reg);
+	if (priv->core_reg)
+		regulator_put(priv->io_reg);
+
+	kfree(priv);
+
+	return 0;
+}
+
+static struct spi_driver l4f00242t03_driver = {
+	.driver = {
+		.name	= "l4f00242t03",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= l4f00242t03_probe,
+	.remove		= __devexit_p(l4f00242t03_remove),
+};
+
+static __init int l4f00242t03_init(void)
+{
+	return spi_register_driver(&l4f00242t03_driver);
+}
+
+static __exit void l4f00242t03_exit(void)
+{
+	spi_unregister_driver(&l4f00242t03_driver);
+}
+
+module_init(l4f00242t03_init);
+module_exit(l4f00242t03_exit);
+
+MODULE_AUTHOR("Alberto Panizzo <maramaopercheseimorto@gmail.com>");
+MODULE_DESCRIPTION("EPSON L4F00242T03 LCD");
diff --git a/include/linux/spi/l4f00242t03.h b/include/linux/spi/l4f00242t03.h
new file mode 100644
index 000000000000..aee1dbda4edc
--- /dev/null
+++ b/include/linux/spi/l4f00242t03.h
@@ -0,0 +1,31 @@
+/*
+ * l4f00242t03.h -- Platform glue for Epson L4F00242T03 LCD
+ *
+ * Copyright (c) 2009 Alberto Panizzo <maramaopercheseimorto@gmail.com>
+ * Based on Marek Vasut work in lms283gf05.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#ifndef _INCLUDE_LINUX_SPI_L4F00242T03_H_
+#define _INCLUDE_LINUX_SPI_L4F00242T03_H_
+
+struct l4f00242t03_pdata {
+	unsigned int	reset_gpio;
+	unsigned int	data_enable_gpio;
+	const char 	*io_supply;	/* will be set to 1.8 V */
+	const char 	*core_supply;	/* will be set to 2.8 V */
+};
+
+#endif /* _INCLUDE_LINUX_SPI_L4F00242T03_H_ */
-- 
cgit v1.2.3


From b4144e4f6e3b448d322095ca08af393682a69e33 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@linux.intel.com>
Date: Mon, 18 Jan 2010 14:16:07 +0000
Subject: backlight: Revert some const qualifiers

9905a43b2d563e6f89e4c63c4278ada03f2ebb14 went a little to far with const
qualifiers as there are legitiment cases where the function pointers
can change (machine specific setup code for example).

Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 include/linux/backlight.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 8c4f884db6b4..ee377d791996 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -36,18 +36,18 @@ struct backlight_device;
 struct fb_info;
 
 struct backlight_ops {
-	const unsigned int options;
+	unsigned int options;
 
 #define BL_CORE_SUSPENDRESUME	(1 << 0)
 
 	/* Notify the backlight driver some property has changed */
-	int (* const update_status)(struct backlight_device *);
+	int (*update_status)(struct backlight_device *);
 	/* Return the current backlight brightness (accounting for power,
 	   fb_blank etc.) */
-	int (* const get_brightness)(struct backlight_device *);
+	int (*get_brightness)(struct backlight_device *);
 	/* Check if given framebuffer device is the one bound to this backlight;
 	   return 0 if not, !=0 if it is. If NULL, backlight always matches the fb. */
-	int (* const check_fb)(struct fb_info *);
+	int (*check_fb)(struct fb_info *);
 };
 
 /* This structure defines all the properties of a backlight */
-- 
cgit v1.2.3


From 57e148b6a975980944f4466ccb669b1d02dfc6a1 Mon Sep 17 00:00:00 2001
From: Bruno Prémont <bonbons@linux-vserver.org>
Date: Sun, 21 Feb 2010 00:20:01 +0100
Subject: backlight: Add backlight_device parameter to check_fb
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

check_fb from backlight_ops lacks a reference to the backlight_device
that's being referred to. Add this parameter so a backlight_device
can be mapped to a single framebuffer, especially if the same driver
handles multiple devices on a single system.

Signed-off-by: Bruno Prémont <bonbons@linux-vserver.org>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/video/backlight/adx_bl.c    | 2 +-
 drivers/video/backlight/backlight.c | 2 +-
 include/linux/backlight.h           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/video/backlight/adx_bl.c b/drivers/video/backlight/adx_bl.c
index d769b0bab21a..a683dd1be4bc 100644
--- a/drivers/video/backlight/adx_bl.c
+++ b/drivers/video/backlight/adx_bl.c
@@ -56,7 +56,7 @@ static int adx_backlight_get_brightness(struct backlight_device *bldev)
 	return brightness & 0xff;
 }
 
-static int adx_backlight_check_fb(struct fb_info *fb)
+static int adx_backlight_check_fb(struct backlight_device *bldev, struct fb_info *fb)
 {
 	return 1;
 }
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 18829cf68b1b..b800cd4eeec8 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -38,7 +38,7 @@ static int fb_notifier_callback(struct notifier_block *self,
 	mutex_lock(&bd->ops_lock);
 	if (bd->ops)
 		if (!bd->ops->check_fb ||
-		    bd->ops->check_fb(evdata->info)) {
+		    bd->ops->check_fb(bd, evdata->info)) {
 			bd->props.fb_blank = *(int *)evdata->data;
 			if (bd->props.fb_blank == FB_BLANK_UNBLANK)
 				bd->props.state &= ~BL_CORE_FBBLANK;
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index ee377d791996..21cd866d24cd 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -47,7 +47,7 @@ struct backlight_ops {
 	int (*get_brightness)(struct backlight_device *);
 	/* Check if given framebuffer device is the one bound to this backlight;
 	   return 0 if not, !=0 if it is. If NULL, backlight always matches the fb. */
-	int (*check_fb)(struct fb_info *);
+	int (*check_fb)(struct backlight_device *, struct fb_info *);
 };
 
 /* This structure defines all the properties of a backlight */
-- 
cgit v1.2.3


From a19a6ee6cad2b20292a774c2f56ba8039b0fac9c Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Wed, 17 Feb 2010 16:39:44 -0500
Subject: backlight: Allow properties to be passed at registration

Values such as max_brightness should be set before backlights are
registered, but the current API doesn't allow that. Add a parameter to
backlight_device_register and update drivers to ensure that they
set this correctly.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/acpi/video.c                            |  9 ++++++---
 drivers/gpu/drm/nouveau/nouveau_backlight.c     | 12 ++++++++----
 drivers/macintosh/via-pmu-backlight.c           |  7 +++++--
 drivers/platform/x86/acer-wmi.c                 |  7 +++++--
 drivers/platform/x86/asus-laptop.c              |  7 +++++--
 drivers/platform/x86/asus_acpi.c                |  7 +++++--
 drivers/platform/x86/classmate-laptop.c         |  8 +++++---
 drivers/platform/x86/compal-laptop.c            | 11 +++++++----
 drivers/platform/x86/dell-laptop.c              | 13 ++++++++-----
 drivers/platform/x86/eeepc-laptop.c             |  8 +++++---
 drivers/platform/x86/fujitsu-laptop.c           | 14 +++++++++-----
 drivers/platform/x86/msi-laptop.c               |  7 +++++--
 drivers/platform/x86/msi-wmi.c                  |  9 ++++++---
 drivers/platform/x86/panasonic-laptop.c         | 24 ++++++++++++------------
 drivers/platform/x86/sony-laptop.c              |  8 +++++---
 drivers/platform/x86/thinkpad_acpi.c            | 12 +++++++-----
 drivers/platform/x86/toshiba_acpi.c             | 10 ++++++----
 drivers/staging/samsung-laptop/samsung-laptop.c |  7 +++++--
 drivers/usb/misc/appledisplay.c                 |  7 ++++---
 drivers/video/atmel_lcdfb.c                     |  8 +++++---
 drivers/video/aty/aty128fb.c                    |  7 +++++--
 drivers/video/aty/atyfb_base.c                  |  7 +++++--
 drivers/video/aty/radeon_backlight.c            |  7 +++++--
 drivers/video/backlight/88pm860x_bl.c           |  6 ++++--
 drivers/video/backlight/adp5520_bl.c            | 11 ++++++-----
 drivers/video/backlight/adx_bl.c                |  8 +++++---
 drivers/video/backlight/atmel-pwm-bl.c          |  8 +++++---
 drivers/video/backlight/backlight.c             |  8 +++++++-
 drivers/video/backlight/corgi_lcd.c             |  8 +++++---
 drivers/video/backlight/cr_bllcd.c              |  8 ++++----
 drivers/video/backlight/da903x_bl.c             |  7 ++++---
 drivers/video/backlight/generic_bl.c            |  8 +++++---
 drivers/video/backlight/hp680_bl.c              |  8 +++++---
 drivers/video/backlight/jornada720_bl.c         |  7 +++++--
 drivers/video/backlight/kb3886_bl.c             |  8 ++++++--
 drivers/video/backlight/locomolcd.c             |  8 ++++++--
 drivers/video/backlight/max8925_bl.c            |  6 ++++--
 drivers/video/backlight/mbp_nvidia_bl.c         | 10 +++++++---
 drivers/video/backlight/omap1_bl.c              |  7 +++++--
 drivers/video/backlight/progear_bl.c            |  7 +++++--
 drivers/video/backlight/pwm_bl.c                |  8 +++++---
 drivers/video/backlight/tosa_bl.c               |  8 +++++---
 drivers/video/backlight/wm831x_bl.c             |  7 ++++---
 drivers/video/bf54x-lq043fb.c                   |  9 +++++----
 drivers/video/bfin-t350mcqb-fb.c                |  9 +++++----
 drivers/video/nvidia/nv_backlight.c             |  7 +++++--
 drivers/video/omap2/displays/panel-taal.c       | 15 +++++++++------
 drivers/video/riva/fbdev.c                      |  7 +++++--
 include/linux/backlight.h                       |  3 ++-
 49 files changed, 271 insertions(+), 151 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 2ff2b6ab5b6c..cbe6f3924a10 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -998,6 +998,7 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 	}
 
 	if (acpi_video_backlight_support()) {
+		struct backlight_properties props;
 		int result;
 		static int count = 0;
 		char *name;
@@ -1010,12 +1011,14 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 			return;
 
 		sprintf(name, "acpi_video%d", count++);
-		device->backlight = backlight_device_register(name,
-			NULL, device, &acpi_backlight_ops);
+		memset(&props, 0, sizeof(struct backlight_properties));
+		props.max_brightness = device->brightness->count - 3;
+		device->backlight = backlight_device_register(name, NULL, device,
+							      &acpi_backlight_ops,
+							      &props);
 		kfree(name);
 		if (IS_ERR(device->backlight))
 			return;
-		device->backlight->props.max_brightness = device->brightness->count-3;
 
 		result = sysfs_create_link(&device->backlight->dev.kobj,
 					   &device->dev->dev.kobj, "device");
diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
index 20564f8cb0ec..406228f4a2a0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
+++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
@@ -89,19 +89,21 @@ static struct backlight_ops nv50_bl_ops = {
 
 static int nouveau_nv40_backlight_init(struct drm_device *dev)
 {
+	struct backlight_properties props;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct backlight_device *bd;
 
 	if (!(nv_rd32(dev, NV40_PMC_BACKLIGHT) & NV40_PMC_BACKLIGHT_MASK))
 		return 0;
 
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 31;
 	bd = backlight_device_register("nv_backlight", &dev->pdev->dev, dev,
-				       &nv40_bl_ops);
+				       &nv40_bl_ops, &props);
 	if (IS_ERR(bd))
 		return PTR_ERR(bd);
 
 	dev_priv->backlight = bd;
-	bd->props.max_brightness = 31;
 	bd->props.brightness = nv40_get_intensity(bd);
 	backlight_update_status(bd);
 
@@ -110,19 +112,21 @@ static int nouveau_nv40_backlight_init(struct drm_device *dev)
 
 static int nouveau_nv50_backlight_init(struct drm_device *dev)
 {
+	struct backlight_properties props;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct backlight_device *bd;
 
 	if (!nv_rd32(dev, NV50_PDISPLAY_SOR_BACKLIGHT))
 		return 0;
 
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 1025;
 	bd = backlight_device_register("nv_backlight", &dev->pdev->dev, dev,
-				       &nv50_bl_ops);
+				       &nv50_bl_ops, &props);
 	if (IS_ERR(bd))
 		return PTR_ERR(bd);
 
 	dev_priv->backlight = bd;
-	bd->props.max_brightness = 1025;
 	bd->props.brightness = nv50_get_intensity(bd);
 	backlight_update_status(bd);
 	return 0;
diff --git a/drivers/macintosh/via-pmu-backlight.c b/drivers/macintosh/via-pmu-backlight.c
index 4f3c4479c16a..1cec02f6c431 100644
--- a/drivers/macintosh/via-pmu-backlight.c
+++ b/drivers/macintosh/via-pmu-backlight.c
@@ -144,6 +144,7 @@ void pmu_backlight_set_sleep(int sleep)
 
 void __init pmu_backlight_init()
 {
+	struct backlight_properties props;
 	struct backlight_device *bd;
 	char name[10];
 	int level, autosave;
@@ -161,13 +162,15 @@ void __init pmu_backlight_init()
 
 	snprintf(name, sizeof(name), "pmubl");
 
-	bd = backlight_device_register(name, NULL, NULL, &pmu_backlight_data);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
+	bd = backlight_device_register(name, NULL, NULL, &pmu_backlight_data,
+				       &props);
 	if (IS_ERR(bd)) {
 		printk(KERN_ERR "PMU Backlight registration failed\n");
 		return;
 	}
 	uses_pmu_bl = 1;
-	bd->props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	pmu_backlight_init_curve(0x7F, 0x46, 0x0E);
 
 	level = bd->props.max_brightness;
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index 226b3e93498c..cbca40aa4006 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -922,9 +922,13 @@ static struct backlight_ops acer_bl_ops = {
 
 static int __devinit acer_backlight_init(struct device *dev)
 {
+	struct backlight_properties props;
 	struct backlight_device *bd;
 
-	bd = backlight_device_register("acer-wmi", dev, NULL, &acer_bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = max_brightness;
+	bd = backlight_device_register("acer-wmi", dev, NULL, &acer_bl_ops,
+				       &props);
 	if (IS_ERR(bd)) {
 		printk(ACER_ERR "Could not register Acer backlight device\n");
 		acer_backlight_device = NULL;
@@ -935,7 +939,6 @@ static int __devinit acer_backlight_init(struct device *dev)
 
 	bd->props.power = FB_BLANK_UNBLANK;
 	bd->props.brightness = read_brightness(bd);
-	bd->props.max_brightness = max_brightness;
 	backlight_update_status(bd);
 	return 0;
 }
diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index 791fcf321506..db5f7db2ba33 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -639,12 +639,16 @@ static int asus_backlight_init(struct asus_laptop *asus)
 {
 	struct backlight_device *bd;
 	struct device *dev = &asus->platform_device->dev;
+	struct backlight_properties props;
 
 	if (!acpi_check_handle(asus->handle, METHOD_BRIGHTNESS_GET, NULL) &&
 	    !acpi_check_handle(asus->handle, METHOD_BRIGHTNESS_SET, NULL) &&
 	    lcd_switch_handle) {
+		memset(&props, 0, sizeof(struct backlight_properties));
+		props.max_brightness = 15;
+
 		bd = backlight_device_register(ASUS_LAPTOP_FILE, dev,
-					       asus, &asusbl_ops);
+					       asus, &asusbl_ops, &props);
 		if (IS_ERR(bd)) {
 			pr_err("Could not register asus backlight device\n");
 			asus->backlight_device = NULL;
@@ -653,7 +657,6 @@ static int asus_backlight_init(struct asus_laptop *asus)
 
 		asus->backlight_device = bd;
 
-		bd->props.max_brightness = 15;
 		bd->props.power = FB_BLANK_UNBLANK;
 		bd->props.brightness = asus_read_brightness(bd);
 		backlight_update_status(bd);
diff --git a/drivers/platform/x86/asus_acpi.c b/drivers/platform/x86/asus_acpi.c
index 1381430e1105..ee520357abaa 100644
--- a/drivers/platform/x86/asus_acpi.c
+++ b/drivers/platform/x86/asus_acpi.c
@@ -1481,6 +1481,7 @@ static void asus_acpi_exit(void)
 
 static int __init asus_acpi_init(void)
 {
+	struct backlight_properties props;
 	int result;
 
 	result = acpi_bus_register_driver(&asus_hotk_driver);
@@ -1507,15 +1508,17 @@ static int __init asus_acpi_init(void)
 		return -ENODEV;
 	}
 
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 15;
 	asus_backlight_device = backlight_device_register("asus", NULL, NULL,
-							  &asus_backlight_data);
+							  &asus_backlight_data,
+							  &props);
 	if (IS_ERR(asus_backlight_device)) {
 		printk(KERN_ERR "Could not register asus backlight device\n");
 		asus_backlight_device = NULL;
 		asus_acpi_exit();
 		return -ENODEV;
 	}
-	asus_backlight_device->props.max_brightness = 15;
 
 	return 0;
 }
diff --git a/drivers/platform/x86/classmate-laptop.c b/drivers/platform/x86/classmate-laptop.c
index 035a7dd65a3f..6670ed8f9e5b 100644
--- a/drivers/platform/x86/classmate-laptop.c
+++ b/drivers/platform/x86/classmate-laptop.c
@@ -462,11 +462,13 @@ static struct backlight_ops cmpc_bl_ops = {
 
 static int cmpc_bl_add(struct acpi_device *acpi)
 {
+	struct backlight_properties props;
 	struct backlight_device *bd;
 
-	bd = backlight_device_register("cmpc_bl", &acpi->dev,
-				       acpi->handle, &cmpc_bl_ops);
-	bd->props.max_brightness = 7;
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 7;
+	bd = backlight_device_register("cmpc_bl", &acpi->dev, acpi->handle,
+				       &cmpc_bl_ops, &props);
 	dev_set_drvdata(&acpi->dev, bd);
 	return 0;
 }
diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c
index 2740b40aad9b..71ff1545a93e 100644
--- a/drivers/platform/x86/compal-laptop.c
+++ b/drivers/platform/x86/compal-laptop.c
@@ -291,12 +291,15 @@ static int __init compal_init(void)
 	/* Register backlight stuff */
 
 	if (!acpi_video_backlight_support()) {
-		compalbl_device = backlight_device_register("compal-laptop", NULL, NULL,
-							    &compalbl_ops);
+		struct backlight_properties props;
+		memset(&props, 0, sizeof(struct backlight_properties));
+		props.max_brightness = COMPAL_LCD_LEVEL_MAX - 1;
+		compalbl_device = backlight_device_register("compal-laptop",
+							    NULL, NULL,
+							    &compalbl_ops,
+							    &props);
 		if (IS_ERR(compalbl_device))
 			return PTR_ERR(compalbl_device);
-
-		compalbl_device->props.max_brightness = COMPAL_LCD_LEVEL_MAX-1;
 	}
 
 	ret = platform_driver_register(&compal_driver);
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index ef614979afe9..46435ac4684f 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -559,10 +559,14 @@ static int __init dell_init(void)
 	release_buffer();
 
 	if (max_intensity) {
-		dell_backlight_device = backlight_device_register(
-			"dell_backlight",
-			&platform_device->dev, NULL,
-			&dell_ops);
+		struct backlight_properties props;
+		memset(&props, 0, sizeof(struct backlight_properties));
+		props.max_brightness = max_intensity;
+		dell_backlight_device = backlight_device_register("dell_backlight",
+								  &platform_device->dev,
+								  NULL,
+								  &dell_ops,
+								  &props);
 
 		if (IS_ERR(dell_backlight_device)) {
 			ret = PTR_ERR(dell_backlight_device);
@@ -570,7 +574,6 @@ static int __init dell_init(void)
 			goto fail_backlight;
 		}
 
-		dell_backlight_device->props.max_brightness = max_intensity;
 		dell_backlight_device->props.brightness =
 			dell_get_intensity(dell_backlight_device);
 		backlight_update_status(dell_backlight_device);
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 9a844caa3756..3fdf21e0052e 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -1131,18 +1131,20 @@ static int eeepc_backlight_notify(struct eeepc_laptop *eeepc)
 
 static int eeepc_backlight_init(struct eeepc_laptop *eeepc)
 {
+	struct backlight_properties props;
 	struct backlight_device *bd;
 
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 15;
 	bd = backlight_device_register(EEEPC_LAPTOP_FILE,
-				       &eeepc->platform_device->dev,
-				       eeepc, &eeepcbl_ops);
+				       &eeepc->platform_device->dev, eeepc,
+				       &eeepcbl_ops, &props);
 	if (IS_ERR(bd)) {
 		pr_err("Could not register eeepc backlight device\n");
 		eeepc->backlight_device = NULL;
 		return PTR_ERR(bd);
 	}
 	eeepc->backlight_device = bd;
-	bd->props.max_brightness = 15;
 	bd->props.brightness = read_brightness(bd);
 	bd->props.power = FB_BLANK_UNBLANK;
 	backlight_update_status(bd);
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 5f3320d468f6..c1074b32490e 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -1126,16 +1126,20 @@ static int __init fujitsu_init(void)
 	/* Register backlight stuff */
 
 	if (!acpi_video_backlight_support()) {
-		fujitsu->bl_device =
-			backlight_device_register("fujitsu-laptop", NULL, NULL,
-						  &fujitsubl_ops);
+		struct backlight_properties props;
+
+		memset(&props, 0, sizeof(struct backlight_properties));
+		max_brightness = fujitsu->max_brightness;
+		props.max_brightness = max_brightness - 1;
+		fujitsu->bl_device = backlight_device_register("fujitsu-laptop",
+							       NULL, NULL,
+							       &fujitsubl_ops,
+							       &props);
 		if (IS_ERR(fujitsu->bl_device)) {
 			ret = PTR_ERR(fujitsu->bl_device);
 			fujitsu->bl_device = NULL;
 			goto fail_sysfs_group;
 		}
-		max_brightness = fujitsu->max_brightness;
-		fujitsu->bl_device->props.max_brightness = max_brightness - 1;
 		fujitsu->bl_device->props.brightness = fujitsu->brightness_level;
 	}
 
diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c
index c2b05da4289a..996223a7c009 100644
--- a/drivers/platform/x86/msi-laptop.c
+++ b/drivers/platform/x86/msi-laptop.c
@@ -683,11 +683,14 @@ static int __init msi_init(void)
 		printk(KERN_INFO "MSI: Brightness ignored, must be controlled "
 		       "by ACPI video driver\n");
 	} else {
+		struct backlight_properties props;
+		memset(&props, 0, sizeof(struct backlight_properties));
+		props.max_brightness = MSI_LCD_LEVEL_MAX - 1;
 		msibl_device = backlight_device_register("msi-laptop-bl", NULL,
-							 NULL, &msibl_ops);
+							 NULL, &msibl_ops,
+							 &props);
 		if (IS_ERR(msibl_device))
 			return PTR_ERR(msibl_device);
-		msibl_device->props.max_brightness = MSI_LCD_LEVEL_MAX-1;
 	}
 
 	ret = platform_driver_register(&msipf_driver);
diff --git a/drivers/platform/x86/msi-wmi.c b/drivers/platform/x86/msi-wmi.c
index f5f70d4c6913..fb7ccaae6563 100644
--- a/drivers/platform/x86/msi-wmi.c
+++ b/drivers/platform/x86/msi-wmi.c
@@ -249,12 +249,15 @@ static int __init msi_wmi_init(void)
 		goto err_uninstall_notifier;
 
 	if (!acpi_video_backlight_support()) {
-		backlight = backlight_device_register(DRV_NAME,
-				NULL, NULL, &msi_backlight_ops);
+		struct backlight_properties props;
+		memset(&props, 0, sizeof(struct backlight_properties));
+		props.max_brightness = ARRAY_SIZE(backlight_map) - 1;
+		backlight = backlight_device_register(DRV_NAME, NULL, NULL,
+						      &msi_backlight_ops,
+						      &props);
 		if (IS_ERR(backlight))
 			goto err_free_input;
 
-		backlight->props.max_brightness = ARRAY_SIZE(backlight_map) - 1;
 		err = bl_get(NULL);
 		if (err < 0)
 			goto err_free_backlight;
diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c
index c9fc479fc290..ab5c9cea1462 100644
--- a/drivers/platform/x86/panasonic-laptop.c
+++ b/drivers/platform/x86/panasonic-laptop.c
@@ -600,6 +600,7 @@ static int acpi_pcc_hotkey_resume(struct acpi_device *device)
 
 static int acpi_pcc_hotkey_add(struct acpi_device *device)
 {
+	struct backlight_properties props;
 	struct pcc_acpi *pcc;
 	int num_sifr, result;
 
@@ -637,24 +638,23 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
 	if (result) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
 				  "Error installing keyinput handler\n"));
-		goto out_sinf;
+		goto out_hotkey;
 	}
 
-	/* initialize backlight */
-	pcc->backlight = backlight_device_register("panasonic", NULL, pcc,
-						   &pcc_backlight_ops);
-	if (IS_ERR(pcc->backlight))
-		goto out_input;
-
 	if (!acpi_pcc_retrieve_biosdata(pcc, pcc->sinf)) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
 				 "Couldn't retrieve BIOS data\n"));
-		goto out_backlight;
+		goto out_input;
 	}
+	/* initialize backlight */
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = pcc->sinf[SINF_AC_MAX_BRIGHT];
+	pcc->backlight = backlight_device_register("panasonic", NULL, pcc,
+						   &pcc_backlight_ops, &props);
+	if (IS_ERR(pcc->backlight))
+		goto out_sinf;
 
 	/* read the initial brightness setting from the hardware */
-	pcc->backlight->props.max_brightness =
-					pcc->sinf[SINF_AC_MAX_BRIGHT];
 	pcc->backlight->props.brightness = pcc->sinf[SINF_AC_CUR_BRIGHT];
 
 	/* read the initial sticky key mode from the hardware */
@@ -669,12 +669,12 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
 
 out_backlight:
 	backlight_device_unregister(pcc->backlight);
+out_sinf:
+	kfree(pcc->sinf);
 out_input:
 	input_unregister_device(pcc->input_dev);
 	/* no need to input_free_device() since core input API refcount and
 	 * free()s the device */
-out_sinf:
-	kfree(pcc->sinf);
 out_hotkey:
 	kfree(pcc);
 
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 5a3d8514c66d..6553b91caaa4 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -1291,9 +1291,13 @@ static int sony_nc_add(struct acpi_device *device)
 		       "controlled by ACPI video driver\n");
 	} else if (ACPI_SUCCESS(acpi_get_handle(sony_nc_acpi_handle, "GBRT",
 						&handle))) {
+							struct backlight_properties props;
+		memset(&props, 0, sizeof(struct backlight_properties));
+		props.max_brightness = SONY_MAX_BRIGHTNESS - 1;
 		sony_backlight_device = backlight_device_register("sony", NULL,
 								  NULL,
-								  &sony_backlight_ops);
+								  &sony_backlight_ops,
+								  &props);
 
 		if (IS_ERR(sony_backlight_device)) {
 			printk(KERN_WARNING DRV_PFX "unable to register backlight device\n");
@@ -1302,8 +1306,6 @@ static int sony_nc_add(struct acpi_device *device)
 			sony_backlight_device->props.brightness =
 			    sony_backlight_get_brightness
 			    (sony_backlight_device);
-			sony_backlight_device->props.max_brightness =
-			    SONY_MAX_BRIGHTNESS - 1;
 		}
 
 	}
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index c64e3528889b..770b85327f84 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -6170,6 +6170,7 @@ static const struct tpacpi_quirk brightness_quirk_table[] __initconst = {
 
 static int __init brightness_init(struct ibm_init_struct *iibm)
 {
+	struct backlight_properties props;
 	int b;
 	unsigned long quirks;
 
@@ -6259,9 +6260,12 @@ static int __init brightness_init(struct ibm_init_struct *iibm)
 		printk(TPACPI_INFO
 		       "detected a 16-level brightness capable ThinkPad\n");
 
-	ibm_backlight_device = backlight_device_register(
-					TPACPI_BACKLIGHT_DEV_NAME, NULL, NULL,
-					&ibm_backlight_data);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = (tp_features.bright_16levels) ? 15 : 7;
+	ibm_backlight_device = backlight_device_register(TPACPI_BACKLIGHT_DEV_NAME,
+							 NULL, NULL,
+							 &ibm_backlight_data,
+							 &props);
 	if (IS_ERR(ibm_backlight_device)) {
 		int rc = PTR_ERR(ibm_backlight_device);
 		ibm_backlight_device = NULL;
@@ -6280,8 +6284,6 @@ static int __init brightness_init(struct ibm_init_struct *iibm)
 			"or not on your ThinkPad\n", TPACPI_MAIL);
 	}
 
-	ibm_backlight_device->props.max_brightness =
-				(tp_features.bright_16levels)? 15 : 7;
 	ibm_backlight_device->props.brightness = b & TP_EC_BACKLIGHT_LVLMSK;
 	backlight_update_status(ibm_backlight_device);
 
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 789240d1b577..def4841183be 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -924,6 +924,7 @@ static int __init toshiba_acpi_init(void)
 	u32 hci_result;
 	bool bt_present;
 	int ret = 0;
+	struct backlight_properties props;
 
 	if (acpi_disabled)
 		return -ENODEV;
@@ -974,10 +975,12 @@ static int __init toshiba_acpi_init(void)
 		}
 	}
 
+	props.max_brightness = HCI_LCD_BRIGHTNESS_LEVELS - 1;
 	toshiba_backlight_device = backlight_device_register("toshiba",
-						&toshiba_acpi.p_dev->dev,
-						NULL,
-						&toshiba_backlight_data);
+							     &toshiba_acpi.p_dev->dev,
+							     NULL,
+							     &toshiba_backlight_data,
+							     &props);
         if (IS_ERR(toshiba_backlight_device)) {
 		ret = PTR_ERR(toshiba_backlight_device);
 
@@ -986,7 +989,6 @@ static int __init toshiba_acpi_init(void)
 		toshiba_acpi_exit();
 		return ret;
 	}
-        toshiba_backlight_device->props.max_brightness = HCI_LCD_BRIGHTNESS_LEVELS - 1;
 
 	/* Register rfkill switch for Bluetooth */
 	if (hci_get_bt_present(&bt_present) == HCI_SUCCESS && bt_present) {
diff --git a/drivers/staging/samsung-laptop/samsung-laptop.c b/drivers/staging/samsung-laptop/samsung-laptop.c
index dd7ea4c075db..eb44b60e1eb5 100644
--- a/drivers/staging/samsung-laptop/samsung-laptop.c
+++ b/drivers/staging/samsung-laptop/samsung-laptop.c
@@ -394,6 +394,7 @@ MODULE_DEVICE_TABLE(dmi, samsung_dmi_table);
 
 static int __init samsung_init(void)
 {
+	struct backlight_properties props;
 	struct sabi_retval sretval;
 	const char *testStr = "SECLINUX";
 	void __iomem *memcheck;
@@ -486,12 +487,14 @@ static int __init samsung_init(void)
 		goto error_no_platform;
 
 	/* create a backlight device to talk to this one */
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = MAX_BRIGHT;
 	backlight_device = backlight_device_register("samsung", &sdev->dev,
-						     NULL, &backlight_ops);
+						     NULL, &backlight_ops,
+						     &props);
 	if (IS_ERR(backlight_device))
 		goto error_no_backlight;
 
-	backlight_device->props.max_brightness = MAX_BRIGHT;
 	backlight_device->props.brightness = read_brightness();
 	backlight_device->props.power = FB_BLANK_UNBLANK;
 	backlight_update_status(backlight_device);
diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c
index 4d2952f1fb13..3adab041355a 100644
--- a/drivers/usb/misc/appledisplay.c
+++ b/drivers/usb/misc/appledisplay.c
@@ -202,6 +202,7 @@ static void appledisplay_work(struct work_struct *work)
 static int appledisplay_probe(struct usb_interface *iface,
 	const struct usb_device_id *id)
 {
+	struct backlight_properties props;
 	struct appledisplay *pdata;
 	struct usb_device *udev = interface_to_usbdev(iface);
 	struct usb_host_interface *iface_desc;
@@ -279,16 +280,16 @@ static int appledisplay_probe(struct usb_interface *iface,
 	/* Register backlight device */
 	snprintf(bl_name, sizeof(bl_name), "appledisplay%d",
 		atomic_inc_return(&count_displays) - 1);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 0xff;
 	pdata->bd = backlight_device_register(bl_name, NULL, pdata,
-						&appledisplay_bl_data);
+					      &appledisplay_bl_data, &props);
 	if (IS_ERR(pdata->bd)) {
 		dev_err(&iface->dev, "Backlight registration failed\n");
 		retval = PTR_ERR(pdata->bd);
 		goto error;
 	}
 
-	pdata->bd->props.max_brightness = 0xff;
-
 	/* Try to get brightness */
 	brightness = appledisplay_bl_get_brightness(pdata->bd);
 
diff --git a/drivers/video/atmel_lcdfb.c b/drivers/video/atmel_lcdfb.c
index 3d886c6902f9..11de3bfd4e54 100644
--- a/drivers/video/atmel_lcdfb.c
+++ b/drivers/video/atmel_lcdfb.c
@@ -117,6 +117,7 @@ static struct backlight_ops atmel_lcdc_bl_ops = {
 
 static void init_backlight(struct atmel_lcdfb_info *sinfo)
 {
+	struct backlight_properties props;
 	struct backlight_device	*bl;
 
 	sinfo->bl_power = FB_BLANK_UNBLANK;
@@ -124,8 +125,10 @@ static void init_backlight(struct atmel_lcdfb_info *sinfo)
 	if (sinfo->backlight)
 		return;
 
-	bl = backlight_device_register("backlight", &sinfo->pdev->dev,
-			sinfo, &atmel_lcdc_bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 0xff;
+	bl = backlight_device_register("backlight", &sinfo->pdev->dev, sinfo,
+				       &atmel_lcdc_bl_ops, &props);
 	if (IS_ERR(bl)) {
 		dev_err(&sinfo->pdev->dev, "error %ld on backlight register\n",
 				PTR_ERR(bl));
@@ -135,7 +138,6 @@ static void init_backlight(struct atmel_lcdfb_info *sinfo)
 
 	bl->props.power = FB_BLANK_UNBLANK;
 	bl->props.fb_blank = FB_BLANK_UNBLANK;
-	bl->props.max_brightness = 0xff;
 	bl->props.brightness = atmel_bl_get_brightness(bl);
 }
 
diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c
index 9ee67d6da710..a489be0c4614 100644
--- a/drivers/video/aty/aty128fb.c
+++ b/drivers/video/aty/aty128fb.c
@@ -1802,6 +1802,7 @@ static void aty128_bl_set_power(struct fb_info *info, int power)
 
 static void aty128_bl_init(struct aty128fb_par *par)
 {
+	struct backlight_properties props;
 	struct fb_info *info = pci_get_drvdata(par->pdev);
 	struct backlight_device *bd;
 	char name[12];
@@ -1817,7 +1818,10 @@ static void aty128_bl_init(struct aty128fb_par *par)
 
 	snprintf(name, sizeof(name), "aty128bl%d", info->node);
 
-	bd = backlight_device_register(name, info->dev, par, &aty128_bl_data);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
+	bd = backlight_device_register(name, info->dev, par, &aty128_bl_data,
+				       &props);
 	if (IS_ERR(bd)) {
 		info->bl_dev = NULL;
 		printk(KERN_WARNING "aty128: Backlight registration failed\n");
@@ -1829,7 +1833,6 @@ static void aty128_bl_init(struct aty128fb_par *par)
 		 63 * FB_BACKLIGHT_MAX / MAX_LEVEL,
 		219 * FB_BACKLIGHT_MAX / MAX_LEVEL);
 
-	bd->props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	bd->props.brightness = bd->props.max_brightness;
 	bd->props.power = FB_BLANK_UNBLANK;
 	backlight_update_status(bd);
diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index e45ab8db2ddc..29d72851f85b 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -2232,6 +2232,7 @@ static struct backlight_ops aty_bl_data = {
 
 static void aty_bl_init(struct atyfb_par *par)
 {
+	struct backlight_properties props;
 	struct fb_info *info = pci_get_drvdata(par->pdev);
 	struct backlight_device *bd;
 	char name[12];
@@ -2243,7 +2244,10 @@ static void aty_bl_init(struct atyfb_par *par)
 
 	snprintf(name, sizeof(name), "atybl%d", info->node);
 
-	bd = backlight_device_register(name, info->dev, par, &aty_bl_data);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
+	bd = backlight_device_register(name, info->dev, par, &aty_bl_data,
+				       &props);
 	if (IS_ERR(bd)) {
 		info->bl_dev = NULL;
 		printk(KERN_WARNING "aty: Backlight registration failed\n");
@@ -2255,7 +2259,6 @@ static void aty_bl_init(struct atyfb_par *par)
 			    0x3F * FB_BACKLIGHT_MAX / MAX_LEVEL,
 			    0xFF * FB_BACKLIGHT_MAX / MAX_LEVEL);
 
-	bd->props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	bd->props.brightness = bd->props.max_brightness;
 	bd->props.power = FB_BLANK_UNBLANK;
 	backlight_update_status(bd);
diff --git a/drivers/video/aty/radeon_backlight.c b/drivers/video/aty/radeon_backlight.c
index fa1198c4ccc5..9fc8c66be3ce 100644
--- a/drivers/video/aty/radeon_backlight.c
+++ b/drivers/video/aty/radeon_backlight.c
@@ -134,6 +134,7 @@ static struct backlight_ops radeon_bl_data = {
 
 void radeonfb_bl_init(struct radeonfb_info *rinfo)
 {
+	struct backlight_properties props;
 	struct backlight_device *bd;
 	struct radeon_bl_privdata *pdata;
 	char name[12];
@@ -155,7 +156,10 @@ void radeonfb_bl_init(struct radeonfb_info *rinfo)
 
 	snprintf(name, sizeof(name), "radeonbl%d", rinfo->info->node);
 
-	bd = backlight_device_register(name, rinfo->info->dev, pdata, &radeon_bl_data);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
+	bd = backlight_device_register(name, rinfo->info->dev, pdata,
+				       &radeon_bl_data, &props);
 	if (IS_ERR(bd)) {
 		rinfo->info->bl_dev = NULL;
 		printk("radeonfb: Backlight registration failed\n");
@@ -185,7 +189,6 @@ void radeonfb_bl_init(struct radeonfb_info *rinfo)
 		 63 * FB_BACKLIGHT_MAX / MAX_RADEON_LEVEL,
 		217 * FB_BACKLIGHT_MAX / MAX_RADEON_LEVEL);
 
-	bd->props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	bd->props.brightness = bd->props.max_brightness;
 	bd->props.power = FB_BLANK_UNBLANK;
 	backlight_update_status(bd);
diff --git a/drivers/video/backlight/88pm860x_bl.c b/drivers/video/backlight/88pm860x_bl.c
index b8f705cca438..93e25c77aeb2 100644
--- a/drivers/video/backlight/88pm860x_bl.c
+++ b/drivers/video/backlight/88pm860x_bl.c
@@ -187,6 +187,7 @@ static int pm860x_backlight_probe(struct platform_device *pdev)
 	struct pm860x_backlight_data *data;
 	struct backlight_device *bl;
 	struct resource *res;
+	struct backlight_properties props;
 	unsigned char value;
 	char name[MFD_NAME_SIZE];
 	int ret;
@@ -223,14 +224,15 @@ static int pm860x_backlight_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = MAX_BRIGHTNESS;
 	bl = backlight_device_register(name, &pdev->dev, data,
-					&pm860x_backlight_ops);
+					&pm860x_backlight_ops, &props);
 	if (IS_ERR(bl)) {
 		dev_err(&pdev->dev, "failed to register backlight\n");
 		kfree(data);
 		return PTR_ERR(bl);
 	}
-	bl->props.max_brightness = MAX_BRIGHTNESS;
 	bl->props.brightness = MAX_BRIGHTNESS;
 
 	platform_set_drvdata(pdev, bl);
diff --git a/drivers/video/backlight/adp5520_bl.c b/drivers/video/backlight/adp5520_bl.c
index 86d95c228adb..5183f0e4d314 100644
--- a/drivers/video/backlight/adp5520_bl.c
+++ b/drivers/video/backlight/adp5520_bl.c
@@ -278,6 +278,7 @@ static const struct attribute_group adp5520_bl_attr_group = {
 
 static int __devinit adp5520_bl_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct backlight_device *bl;
 	struct adp5520_bl *data;
 	int ret = 0;
@@ -300,17 +301,17 @@ static int __devinit adp5520_bl_probe(struct platform_device *pdev)
 
 	mutex_init(&data->lock);
 
-	bl = backlight_device_register(pdev->name, data->master,
-			data, &adp5520_bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = ADP5020_MAX_BRIGHTNESS;
+	bl = backlight_device_register(pdev->name, data->master, data,
+				       &adp5520_bl_ops, &props);
 	if (IS_ERR(bl)) {
 		dev_err(&pdev->dev, "failed to register backlight\n");
 		kfree(data);
 		return PTR_ERR(bl);
 	}
 
-	bl->props.max_brightness =
-		bl->props.brightness = ADP5020_MAX_BRIGHTNESS;
-
+	bl->props.brightness = ADP5020_MAX_BRIGHTNESS;
 	if (data->pdata->en_ambl_sens)
 		ret = sysfs_create_group(&bl->dev.kobj,
 			&adp5520_bl_attr_group);
diff --git a/drivers/video/backlight/adx_bl.c b/drivers/video/backlight/adx_bl.c
index a683dd1be4bc..b0624b983889 100644
--- a/drivers/video/backlight/adx_bl.c
+++ b/drivers/video/backlight/adx_bl.c
@@ -70,6 +70,7 @@ static const struct backlight_ops adx_backlight_ops = {
 
 static int __devinit adx_backlight_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct backlight_device *bldev;
 	struct resource *res;
 	struct adxbl *bl;
@@ -101,14 +102,15 @@ static int __devinit adx_backlight_probe(struct platform_device *pdev)
 		goto out;
 	}
 
-	bldev = backlight_device_register(dev_name(&pdev->dev), &pdev->dev, bl,
-			&adx_backlight_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 0xff;
+	bldev = backlight_device_register(dev_name(&pdev->dev), &pdev->dev,
+					  bl, &adx_backlight_ops, &props);
 	if (!bldev) {
 		ret = -ENOMEM;
 		goto out;
 	}
 
-	bldev->props.max_brightness = 0xff;
 	bldev->props.brightness = 0xff;
 	bldev->props.power = FB_BLANK_UNBLANK;
 
diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c
index f625ffc69ad3..2d9760551a4b 100644
--- a/drivers/video/backlight/atmel-pwm-bl.c
+++ b/drivers/video/backlight/atmel-pwm-bl.c
@@ -120,6 +120,7 @@ static const struct backlight_ops atmel_pwm_bl_ops = {
 
 static int atmel_pwm_bl_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	const struct atmel_pwm_bl_platform_data *pdata;
 	struct backlight_device *bldev;
 	struct atmel_pwm_bl *pwmbl;
@@ -165,8 +166,10 @@ static int atmel_pwm_bl_probe(struct platform_device *pdev)
 			goto err_free_gpio;
 	}
 
-	bldev = backlight_device_register("atmel-pwm-bl",
-			&pdev->dev, pwmbl, &atmel_pwm_bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = pdata->pwm_duty_max - pdata->pwm_duty_min;
+	bldev = backlight_device_register("atmel-pwm-bl", &pdev->dev, pwmbl,
+					  &atmel_pwm_bl_ops, &props);
 	if (IS_ERR(bldev)) {
 		retval = PTR_ERR(bldev);
 		goto err_free_gpio;
@@ -178,7 +181,6 @@ static int atmel_pwm_bl_probe(struct platform_device *pdev)
 
 	/* Power up the backlight by default at middle intesity. */
 	bldev->props.power = FB_BLANK_UNBLANK;
-	bldev->props.max_brightness = pdata->pwm_duty_max - pdata->pwm_duty_min;
 	bldev->props.brightness = bldev->props.max_brightness / 2;
 
 	retval = atmel_pwm_bl_init_pwm(pwmbl);
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index b800cd4eeec8..68bb838b9f11 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -269,7 +269,8 @@ EXPORT_SYMBOL(backlight_force_update);
  * ERR_PTR() or a pointer to the newly allocated device.
  */
 struct backlight_device *backlight_device_register(const char *name,
-		struct device *parent, void *devdata, const struct backlight_ops *ops)
+	struct device *parent, void *devdata, const struct backlight_ops *ops,
+	const struct backlight_properties *props)
 {
 	struct backlight_device *new_bd;
 	int rc;
@@ -289,6 +290,11 @@ struct backlight_device *backlight_device_register(const char *name,
 	dev_set_name(&new_bd->dev, name);
 	dev_set_drvdata(&new_bd->dev, devdata);
 
+	/* Set default properties */
+	if (props)
+		memcpy(&new_bd->props, props,
+		       sizeof(struct backlight_properties));
+
 	rc = device_register(&new_bd->dev);
 	if (rc) {
 		kfree(new_bd);
diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c
index b4bcf8043797..73bdd8454c94 100644
--- a/drivers/video/backlight/corgi_lcd.c
+++ b/drivers/video/backlight/corgi_lcd.c
@@ -533,6 +533,7 @@ err_free_backlight_on:
 
 static int __devinit corgi_lcd_probe(struct spi_device *spi)
 {
+	struct backlight_properties props;
 	struct corgi_lcd_platform_data *pdata = spi->dev.platform_data;
 	struct corgi_lcd *lcd;
 	int ret = 0;
@@ -559,13 +560,14 @@ static int __devinit corgi_lcd_probe(struct spi_device *spi)
 	lcd->power = FB_BLANK_POWERDOWN;
 	lcd->mode = (pdata) ? pdata->init_mode : CORGI_LCD_MODE_VGA;
 
-	lcd->bl_dev = backlight_device_register("corgi_bl", &spi->dev,
-					lcd, &corgi_bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = pdata->max_intensity;
+	lcd->bl_dev = backlight_device_register("corgi_bl", &spi->dev, lcd,
+						&corgi_bl_ops, &props);
 	if (IS_ERR(lcd->bl_dev)) {
 		ret = PTR_ERR(lcd->bl_dev);
 		goto err_unregister_lcd;
 	}
-	lcd->bl_dev->props.max_brightness = pdata->max_intensity;
 	lcd->bl_dev->props.brightness = pdata->default_intensity;
 	lcd->bl_dev->props.power = FB_BLANK_UNBLANK;
 
diff --git a/drivers/video/backlight/cr_bllcd.c b/drivers/video/backlight/cr_bllcd.c
index da86db4374a0..1cce6031bff2 100644
--- a/drivers/video/backlight/cr_bllcd.c
+++ b/drivers/video/backlight/cr_bllcd.c
@@ -170,6 +170,7 @@ static struct lcd_ops cr_lcd_ops = {
 
 static int cr_backlight_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct backlight_device *bdp;
 	struct lcd_device *ldp;
 	struct cr_panel *crp;
@@ -190,8 +191,9 @@ static int cr_backlight_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	bdp = backlight_device_register("cr-backlight",
-					&pdev->dev, NULL, &cr_backlight_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	bdp = backlight_device_register("cr-backlight", &pdev->dev, NULL,
+					&cr_backlight_ops, &props);
 	if (IS_ERR(bdp)) {
 		pci_dev_put(lpc_dev);
 		return PTR_ERR(bdp);
@@ -220,9 +222,7 @@ static int cr_backlight_probe(struct platform_device *pdev)
 	crp->cr_lcd_device = ldp;
 	crp->cr_backlight_device->props.power = FB_BLANK_UNBLANK;
 	crp->cr_backlight_device->props.brightness = 0;
-	crp->cr_backlight_device->props.max_brightness = 0;
 	cr_backlight_set_intensity(crp->cr_backlight_device);
-
 	cr_lcd_set_power(crp->cr_lcd_device, FB_BLANK_UNBLANK);
 
 	platform_set_drvdata(pdev, crp);
diff --git a/drivers/video/backlight/da903x_bl.c b/drivers/video/backlight/da903x_bl.c
index 74cdc640173d..686e4a789238 100644
--- a/drivers/video/backlight/da903x_bl.c
+++ b/drivers/video/backlight/da903x_bl.c
@@ -105,6 +105,7 @@ static int da903x_backlight_probe(struct platform_device *pdev)
 	struct da9034_backlight_pdata *pdata = pdev->dev.platform_data;
 	struct da903x_backlight_data *data;
 	struct backlight_device *bl;
+	struct backlight_properties props;
 	int max_brightness;
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -134,15 +135,15 @@ static int da903x_backlight_probe(struct platform_device *pdev)
 		da903x_write(data->da903x_dev, DA9034_WLED_CONTROL2,
 				DA9034_WLED_ISET(pdata->output_current));
 
-	bl = backlight_device_register(pdev->name, data->da903x_dev,
-			data, &da903x_backlight_ops);
+	props.max_brightness = max_brightness;
+	bl = backlight_device_register(pdev->name, data->da903x_dev, data,
+				       &da903x_backlight_ops, &props);
 	if (IS_ERR(bl)) {
 		dev_err(&pdev->dev, "failed to register backlight\n");
 		kfree(data);
 		return PTR_ERR(bl);
 	}
 
-	bl->props.max_brightness = max_brightness;
 	bl->props.brightness = max_brightness;
 
 	platform_set_drvdata(pdev, bl);
diff --git a/drivers/video/backlight/generic_bl.c b/drivers/video/backlight/generic_bl.c
index e6d348e63596..312ca619735d 100644
--- a/drivers/video/backlight/generic_bl.c
+++ b/drivers/video/backlight/generic_bl.c
@@ -78,6 +78,7 @@ static const struct backlight_ops genericbl_ops = {
 
 static int genericbl_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct generic_bl_info *machinfo = pdev->dev.platform_data;
 	const char *name = "generic-bl";
 	struct backlight_device *bd;
@@ -89,14 +90,15 @@ static int genericbl_probe(struct platform_device *pdev)
 	if (machinfo->name)
 		name = machinfo->name;
 
-	bd = backlight_device_register (name,
-		&pdev->dev, NULL, &genericbl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = machinfo->max_intensity;
+	bd = backlight_device_register(name, &pdev->dev, NULL, &genericbl_ops,
+				       &props);
 	if (IS_ERR (bd))
 		return PTR_ERR (bd);
 
 	platform_set_drvdata(pdev, bd);
 
-	bd->props.max_brightness = machinfo->max_intensity;
 	bd->props.power = FB_BLANK_UNBLANK;
 	bd->props.brightness = machinfo->default_intensity;
 	backlight_update_status(bd);
diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c
index f7cc528d5be7..267d23f8d645 100644
--- a/drivers/video/backlight/hp680_bl.c
+++ b/drivers/video/backlight/hp680_bl.c
@@ -105,16 +105,18 @@ static const struct backlight_ops hp680bl_ops = {
 
 static int __devinit hp680bl_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct backlight_device *bd;
 
-	bd = backlight_device_register ("hp680-bl", &pdev->dev, NULL,
-		    &hp680bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = HP680_MAX_INTENSITY;
+	bd = backlight_device_register("hp680-bl", &pdev->dev, NULL,
+				       &hp680bl_ops, &props);
 	if (IS_ERR(bd))
 		return PTR_ERR(bd);
 
 	platform_set_drvdata(pdev, bd);
 
-	bd->props.max_brightness = HP680_MAX_INTENSITY;
 	bd->props.brightness = HP680_DEFAULT_INTENSITY;
 	hp680bl_send_intensity(bd);
 
diff --git a/drivers/video/backlight/jornada720_bl.c b/drivers/video/backlight/jornada720_bl.c
index db9071fc5665..2f177b3a4885 100644
--- a/drivers/video/backlight/jornada720_bl.c
+++ b/drivers/video/backlight/jornada720_bl.c
@@ -101,10 +101,14 @@ static const struct backlight_ops jornada_bl_ops = {
 
 static int jornada_bl_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	int ret;
 	struct backlight_device *bd;
 
-	bd = backlight_device_register(S1D_DEVICENAME, &pdev->dev, NULL, &jornada_bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = BL_MAX_BRIGHT;
+	bd = backlight_device_register(S1D_DEVICENAME, &pdev->dev, NULL,
+				       &jornada_bl_ops, &props);
 
 	if (IS_ERR(bd)) {
 		ret = PTR_ERR(bd);
@@ -117,7 +121,6 @@ static int jornada_bl_probe(struct platform_device *pdev)
 	/* note. make sure max brightness is set otherwise
 	   you will get seemingly non-related errors when
 	   trying to change brightness */
-	bd->props.max_brightness = BL_MAX_BRIGHT;
 	jornada_bl_update_status(bd);
 
 	platform_set_drvdata(pdev, bd);
diff --git a/drivers/video/backlight/kb3886_bl.c b/drivers/video/backlight/kb3886_bl.c
index 939e7b830cf3..f439a8632287 100644
--- a/drivers/video/backlight/kb3886_bl.c
+++ b/drivers/video/backlight/kb3886_bl.c
@@ -141,20 +141,24 @@ static const struct backlight_ops kb3886bl_ops = {
 
 static int kb3886bl_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct kb3886bl_machinfo *machinfo = pdev->dev.platform_data;
 
 	bl_machinfo = machinfo;
 	if (!machinfo->limit_mask)
 		machinfo->limit_mask = -1;
 
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = machinfo->max_intensity;
 	kb3886_backlight_device = backlight_device_register("kb3886-bl",
-		&pdev->dev, NULL, &kb3886bl_ops);
+							    &pdev->dev, NULL,
+							    &kb3886bl_ops,
+							    &props);
 	if (IS_ERR(kb3886_backlight_device))
 		return PTR_ERR(kb3886_backlight_device);
 
 	platform_set_drvdata(pdev, kb3886_backlight_device);
 
-	kb3886_backlight_device->props.max_brightness = machinfo->max_intensity;
 	kb3886_backlight_device->props.power = FB_BLANK_UNBLANK;
 	kb3886_backlight_device->props.brightness = machinfo->default_intensity;
 	backlight_update_status(kb3886_backlight_device);
diff --git a/drivers/video/backlight/locomolcd.c b/drivers/video/backlight/locomolcd.c
index 00a9591b0003..7571bc26071e 100644
--- a/drivers/video/backlight/locomolcd.c
+++ b/drivers/video/backlight/locomolcd.c
@@ -167,6 +167,7 @@ static int locomolcd_resume(struct locomo_dev *dev)
 
 static int locomolcd_probe(struct locomo_dev *ldev)
 {
+	struct backlight_properties props;
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -182,13 +183,16 @@ static int locomolcd_probe(struct locomo_dev *ldev)
 
 	local_irq_restore(flags);
 
-	locomolcd_bl_device = backlight_device_register("locomo-bl", &ldev->dev, NULL, &locomobl_data);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 4;
+	locomolcd_bl_device = backlight_device_register("locomo-bl",
+							&ldev->dev, NULL,
+							&locomobl_data, &props);
 
 	if (IS_ERR (locomolcd_bl_device))
 		return PTR_ERR (locomolcd_bl_device);
 
 	/* Set up frontlight so that screen is readable */
-	locomolcd_bl_device->props.max_brightness = 4,
 	locomolcd_bl_device->props.brightness = 2;
 	locomolcd_set_intensity(locomolcd_bl_device);
 
diff --git a/drivers/video/backlight/max8925_bl.c b/drivers/video/backlight/max8925_bl.c
index c267069a52a3..c91adaf492cf 100644
--- a/drivers/video/backlight/max8925_bl.c
+++ b/drivers/video/backlight/max8925_bl.c
@@ -104,6 +104,7 @@ static int __devinit max8925_backlight_probe(struct platform_device *pdev)
 	struct max8925_backlight_pdata *pdata = NULL;
 	struct max8925_backlight_data *data;
 	struct backlight_device *bl;
+	struct backlight_properties props;
 	struct resource *res;
 	char name[MAX8925_NAME_SIZE];
 	unsigned char value;
@@ -133,14 +134,15 @@ static int __devinit max8925_backlight_probe(struct platform_device *pdev)
 	data->chip = chip;
 	data->current_brightness = 0;
 
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = MAX_BRIGHTNESS;
 	bl = backlight_device_register(name, &pdev->dev, data,
-					&max8925_backlight_ops);
+					&max8925_backlight_ops, &props);
 	if (IS_ERR(bl)) {
 		dev_err(&pdev->dev, "failed to register backlight\n");
 		kfree(data);
 		return PTR_ERR(bl);
 	}
-	bl->props.max_brightness = MAX_BRIGHTNESS;
 	bl->props.brightness = MAX_BRIGHTNESS;
 
 	platform_set_drvdata(pdev, bl);
diff --git a/drivers/video/backlight/mbp_nvidia_bl.c b/drivers/video/backlight/mbp_nvidia_bl.c
index 2e78b0784bdc..0881358eeace 100644
--- a/drivers/video/backlight/mbp_nvidia_bl.c
+++ b/drivers/video/backlight/mbp_nvidia_bl.c
@@ -250,6 +250,7 @@ static const struct dmi_system_id __initdata mbp_device_table[] = {
 
 static int __init mbp_init(void)
 {
+	struct backlight_properties props;
 	if (!dmi_check_system(mbp_device_table))
 		return -ENODEV;
 
@@ -257,14 +258,17 @@ static int __init mbp_init(void)
 						"Macbook Pro backlight"))
 		return -ENXIO;
 
-	mbp_backlight_device = backlight_device_register("mbp_backlight",
-					NULL, NULL, &driver_data->backlight_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 15;
+	mbp_backlight_device = backlight_device_register("mbp_backlight", NULL,
+							 NULL,
+							 &driver_data->backlight_ops,
+							 &props);
 	if (IS_ERR(mbp_backlight_device)) {
 		release_region(driver_data->iostart, driver_data->iolen);
 		return PTR_ERR(mbp_backlight_device);
 	}
 
-	mbp_backlight_device->props.max_brightness = 15;
 	mbp_backlight_device->props.brightness =
 		driver_data->backlight_ops.get_brightness(mbp_backlight_device);
 	backlight_update_status(mbp_backlight_device);
diff --git a/drivers/video/backlight/omap1_bl.c b/drivers/video/backlight/omap1_bl.c
index a3a7f8938175..333d28e6b062 100644
--- a/drivers/video/backlight/omap1_bl.c
+++ b/drivers/video/backlight/omap1_bl.c
@@ -132,6 +132,7 @@ static const struct backlight_ops omapbl_ops = {
 
 static int omapbl_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct backlight_device *dev;
 	struct omap_backlight *bl;
 	struct omap_backlight_config *pdata = pdev->dev.platform_data;
@@ -143,7 +144,10 @@ static int omapbl_probe(struct platform_device *pdev)
 	if (unlikely(!bl))
 		return -ENOMEM;
 
-	dev = backlight_device_register("omap-bl", &pdev->dev, bl, &omapbl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = OMAPBL_MAX_INTENSITY;
+	dev = backlight_device_register("omap-bl", &pdev->dev, bl, &omapbl_ops,
+					&props);
 	if (IS_ERR(dev)) {
 		kfree(bl);
 		return PTR_ERR(dev);
@@ -160,7 +164,6 @@ static int omapbl_probe(struct platform_device *pdev)
 	omap_cfg_reg(PWL);	/* Conflicts with UART3 */
 
 	dev->props.fb_blank = FB_BLANK_UNBLANK;
-	dev->props.max_brightness = OMAPBL_MAX_INTENSITY;
 	dev->props.brightness = pdata->default_intensity;
 	omapbl_update_status(dev);
 
diff --git a/drivers/video/backlight/progear_bl.c b/drivers/video/backlight/progear_bl.c
index 2ec16deb2397..809278c90738 100644
--- a/drivers/video/backlight/progear_bl.c
+++ b/drivers/video/backlight/progear_bl.c
@@ -61,6 +61,7 @@ static const struct backlight_ops progearbl_ops = {
 
 static int progearbl_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	u8 temp;
 	struct backlight_device *progear_backlight_device;
 	int ret;
@@ -82,9 +83,12 @@ static int progearbl_probe(struct platform_device *pdev)
 	pci_read_config_byte(sb_dev, SB_MPS1, &temp);
 	pci_write_config_byte(sb_dev, SB_MPS1, temp | 0x20);
 
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = HW_LEVEL_MAX - HW_LEVEL_MIN;
 	progear_backlight_device = backlight_device_register("progear-bl",
 							     &pdev->dev, NULL,
-							     &progearbl_ops);
+							     &progearbl_ops,
+							     &props);
 	if (IS_ERR(progear_backlight_device)) {
 		ret = PTR_ERR(progear_backlight_device);
 		goto put_sb;
@@ -94,7 +98,6 @@ static int progearbl_probe(struct platform_device *pdev)
 
 	progear_backlight_device->props.power = FB_BLANK_UNBLANK;
 	progear_backlight_device->props.brightness = HW_LEVEL_MAX - HW_LEVEL_MIN;
-	progear_backlight_device->props.max_brightness = HW_LEVEL_MAX - HW_LEVEL_MIN;
 	progearbl_set_intensity(progear_backlight_device);
 
 	return 0;
diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index 9d2ec2a1cce8..b89eebc3f77d 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -65,6 +65,7 @@ static const struct backlight_ops pwm_backlight_ops = {
 
 static int pwm_backlight_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct platform_pwm_backlight_data *data = pdev->dev.platform_data;
 	struct backlight_device *bl;
 	struct pwm_bl_data *pb;
@@ -100,15 +101,16 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 	} else
 		dev_dbg(&pdev->dev, "got pwm for backlight\n");
 
-	bl = backlight_device_register(dev_name(&pdev->dev), &pdev->dev,
-			pb, &pwm_backlight_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = data->max_brightness;
+	bl = backlight_device_register(dev_name(&pdev->dev), &pdev->dev, pb,
+				       &pwm_backlight_ops, &props);
 	if (IS_ERR(bl)) {
 		dev_err(&pdev->dev, "failed to register backlight\n");
 		ret = PTR_ERR(bl);
 		goto err_bl;
 	}
 
-	bl->props.max_brightness = data->max_brightness;
 	bl->props.brightness = data->dft_brightness;
 	backlight_update_status(bl);
 
diff --git a/drivers/video/backlight/tosa_bl.c b/drivers/video/backlight/tosa_bl.c
index e14ce4d469f5..f57bbf170049 100644
--- a/drivers/video/backlight/tosa_bl.c
+++ b/drivers/video/backlight/tosa_bl.c
@@ -80,6 +80,7 @@ static const struct backlight_ops bl_ops = {
 static int __devinit tosa_bl_probe(struct i2c_client *client,
 		const struct i2c_device_id *id)
 {
+	struct backlight_properties props;
 	struct tosa_bl_data *data = kzalloc(sizeof(struct tosa_bl_data), GFP_KERNEL);
 	int ret = 0;
 	if (!data)
@@ -99,15 +100,16 @@ static int __devinit tosa_bl_probe(struct i2c_client *client,
 	i2c_set_clientdata(client, data);
 	data->i2c = client;
 
-	data->bl = backlight_device_register("tosa-bl", &client->dev,
-			data, &bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 512 - 1;
+	data->bl = backlight_device_register("tosa-bl", &client->dev, data,
+					     &bl_ops, &props);
 	if (IS_ERR(data->bl)) {
 		ret = PTR_ERR(data->bl);
 		goto err_reg;
 	}
 
 	data->bl->props.brightness = 69;
-	data->bl->props.max_brightness = 512 - 1;
 	data->bl->props.power = FB_BLANK_UNBLANK;
 
 	backlight_update_status(data->bl);
diff --git a/drivers/video/backlight/wm831x_bl.c b/drivers/video/backlight/wm831x_bl.c
index e32add37a203..a4312709fb1b 100644
--- a/drivers/video/backlight/wm831x_bl.c
+++ b/drivers/video/backlight/wm831x_bl.c
@@ -125,6 +125,7 @@ static int wm831x_backlight_probe(struct platform_device *pdev)
 	struct wm831x_backlight_pdata *pdata;
 	struct wm831x_backlight_data *data;
 	struct backlight_device *bl;
+	struct backlight_properties props;
 	int ret, i, max_isel, isink_reg, dcdc_cfg;
 
 	/* We need platform data */
@@ -191,15 +192,15 @@ static int wm831x_backlight_probe(struct platform_device *pdev)
 	data->current_brightness = 0;
 	data->isink_reg = isink_reg;
 
-	bl = backlight_device_register("wm831x", &pdev->dev,
-			data, &wm831x_backlight_ops);
+	props.max_brightness = max_isel;
+	bl = backlight_device_register("wm831x", &pdev->dev, data,
+				       &wm831x_backlight_ops, &props);
 	if (IS_ERR(bl)) {
 		dev_err(&pdev->dev, "failed to register backlight\n");
 		kfree(data);
 		return PTR_ERR(bl);
 	}
 
-	bl->props.max_brightness = max_isel;
 	bl->props.brightness = max_isel;
 
 	platform_set_drvdata(pdev, bl);
diff --git a/drivers/video/bf54x-lq043fb.c b/drivers/video/bf54x-lq043fb.c
index 814312a7452f..54df3d44af8f 100644
--- a/drivers/video/bf54x-lq043fb.c
+++ b/drivers/video/bf54x-lq043fb.c
@@ -501,6 +501,7 @@ static irqreturn_t bfin_bf54x_irq_error(int irq, void *dev_id)
 
 static int __devinit bfin_bf54x_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct bfin_bf54xfb_info *info;
 	struct fb_info *fbinfo;
 	int ret;
@@ -645,10 +646,10 @@ static int __devinit bfin_bf54x_probe(struct platform_device *pdev)
 		goto out8;
 	}
 #ifndef NO_BL_SUPPORT
-	bl_dev =
-	    backlight_device_register("bf54x-bl", NULL, NULL,
-				      &bfin_lq043fb_bl_ops);
-	bl_dev->props.max_brightness = 255;
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 255;
+	bl_dev = backlight_device_register("bf54x-bl", NULL, NULL,
+					   &bfin_lq043fb_bl_ops, &props);
 
 	lcd_dev = lcd_device_register(DRIVER_NAME, &pdev->dev, NULL, &bfin_lcd_ops);
 	lcd_dev->props.max_contrast = 255, printk(KERN_INFO "Done.\n");
diff --git a/drivers/video/bfin-t350mcqb-fb.c b/drivers/video/bfin-t350mcqb-fb.c
index 5653d083a983..3a8e811a7e9f 100644
--- a/drivers/video/bfin-t350mcqb-fb.c
+++ b/drivers/video/bfin-t350mcqb-fb.c
@@ -419,6 +419,7 @@ static irqreturn_t bfin_t350mcqb_irq_error(int irq, void *dev_id)
 
 static int __devinit bfin_t350mcqb_probe(struct platform_device *pdev)
 {
+	struct backlight_properties props;
 	struct bfin_t350mcqbfb_info *info;
 	struct fb_info *fbinfo;
 	int ret;
@@ -540,10 +541,10 @@ static int __devinit bfin_t350mcqb_probe(struct platform_device *pdev)
 		goto out8;
 	}
 #ifndef NO_BL_SUPPORT
-	bl_dev =
-	    backlight_device_register("bf52x-bl", NULL, NULL,
-				      &bfin_lq043fb_bl_ops);
-	bl_dev->props.max_brightness = 255;
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = 255;
+	bl_dev = backlight_device_register("bf52x-bl", NULL, NULL,
+					   &bfin_lq043fb_bl_ops, &props);
 
 	lcd_dev = lcd_device_register(DRIVER_NAME, NULL, &bfin_lcd_ops);
 	lcd_dev->props.max_contrast = 255, printk(KERN_INFO "Done.\n");
diff --git a/drivers/video/nvidia/nv_backlight.c b/drivers/video/nvidia/nv_backlight.c
index 443e3c85a9a0..2fb552a6f32c 100644
--- a/drivers/video/nvidia/nv_backlight.c
+++ b/drivers/video/nvidia/nv_backlight.c
@@ -94,6 +94,7 @@ static struct backlight_ops nvidia_bl_ops = {
 
 void nvidia_bl_init(struct nvidia_par *par)
 {
+	struct backlight_properties props;
 	struct fb_info *info = pci_get_drvdata(par->pci_dev);
 	struct backlight_device *bd;
 	char name[12];
@@ -109,7 +110,10 @@ void nvidia_bl_init(struct nvidia_par *par)
 
 	snprintf(name, sizeof(name), "nvidiabl%d", info->node);
 
-	bd = backlight_device_register(name, info->dev, par, &nvidia_bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
+	bd = backlight_device_register(name, info->dev, par, &nvidia_bl_ops,
+				       &props);
 	if (IS_ERR(bd)) {
 		info->bl_dev = NULL;
 		printk(KERN_WARNING "nvidia: Backlight registration failed\n");
@@ -121,7 +125,6 @@ void nvidia_bl_init(struct nvidia_par *par)
 		0x158 * FB_BACKLIGHT_MAX / MAX_LEVEL,
 		0x534 * FB_BACKLIGHT_MAX / MAX_LEVEL);
 
-	bd->props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	bd->props.brightness = bd->props.max_brightness;
 	bd->props.power = FB_BLANK_UNBLANK;
 	backlight_update_status(bd);
diff --git a/drivers/video/omap2/displays/panel-taal.c b/drivers/video/omap2/displays/panel-taal.c
index fcd6a61a91eb..59769e85d41c 100644
--- a/drivers/video/omap2/displays/panel-taal.c
+++ b/drivers/video/omap2/displays/panel-taal.c
@@ -486,6 +486,7 @@ static struct attribute_group taal_attr_group = {
 
 static int taal_probe(struct omap_dss_device *dssdev)
 {
+	struct backlight_properties props;
 	struct taal_data *td;
 	struct backlight_device *bldev;
 	int r;
@@ -520,11 +521,16 @@ static int taal_probe(struct omap_dss_device *dssdev)
 
 	/* if no platform set_backlight() defined, presume DSI backlight
 	 * control */
+	memset(&props, 0, sizeof(struct backlight_properties));
 	if (!dssdev->set_backlight)
 		td->use_dsi_bl = true;
 
+	if (td->use_dsi_bl)
+		props.max_brightness = 255;
+	else
+		props.max_brightness = 127;
 	bldev = backlight_device_register("taal", &dssdev->dev, dssdev,
-			&taal_bl_ops);
+					  &taal_bl_ops, &props);
 	if (IS_ERR(bldev)) {
 		r = PTR_ERR(bldev);
 		goto err2;
@@ -534,13 +540,10 @@ static int taal_probe(struct omap_dss_device *dssdev)
 
 	bldev->props.fb_blank = FB_BLANK_UNBLANK;
 	bldev->props.power = FB_BLANK_UNBLANK;
-	if (td->use_dsi_bl) {
-		bldev->props.max_brightness = 255;
+	if (td->use_dsi_bl)
 		bldev->props.brightness = 255;
-	} else {
-		bldev->props.max_brightness = 127;
+	else
 		bldev->props.brightness = 127;
-	}
 
 	taal_bl_update_status(bldev);
 
diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c
index d94c57ffbdb1..618f36bec10d 100644
--- a/drivers/video/riva/fbdev.c
+++ b/drivers/video/riva/fbdev.c
@@ -338,6 +338,7 @@ static struct backlight_ops riva_bl_ops = {
 
 static void riva_bl_init(struct riva_par *par)
 {
+	struct backlight_properties props;
 	struct fb_info *info = pci_get_drvdata(par->pdev);
 	struct backlight_device *bd;
 	char name[12];
@@ -353,7 +354,10 @@ static void riva_bl_init(struct riva_par *par)
 
 	snprintf(name, sizeof(name), "rivabl%d", info->node);
 
-	bd = backlight_device_register(name, info->dev, par, &riva_bl_ops);
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
+	bd = backlight_device_register(name, info->dev, par, &riva_bl_ops,
+				       &props);
 	if (IS_ERR(bd)) {
 		info->bl_dev = NULL;
 		printk(KERN_WARNING "riva: Backlight registration failed\n");
@@ -365,7 +369,6 @@ static void riva_bl_init(struct riva_par *par)
 		MIN_LEVEL * FB_BACKLIGHT_MAX / MAX_LEVEL,
 		FB_BACKLIGHT_MAX);
 
-	bd->props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	bd->props.brightness = bd->props.max_brightness;
 	bd->props.power = FB_BLANK_UNBLANK;
 	backlight_update_status(bd);
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 21cd866d24cd..4a3d52e545e1 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -103,7 +103,8 @@ static inline void backlight_update_status(struct backlight_device *bd)
 }
 
 extern struct backlight_device *backlight_device_register(const char *name,
-	struct device *dev, void *devdata, const struct backlight_ops *ops);
+	struct device *dev, void *devdata, const struct backlight_ops *ops,
+	const struct backlight_properties *props);
 extern void backlight_device_unregister(struct backlight_device *bd);
 extern void backlight_force_update(struct backlight_device *bd,
 				   enum backlight_update_reason reason);
-- 
cgit v1.2.3


From bc32df00894f0e1dbf583cc3dab210d2969b078a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 15 Mar 2010 00:35:03 -0400
Subject: memory hotplug: allow setting of phys_device

/sys/devices/system/memory/memoryX/phys_device is supposed to contain the
number of the physical device that the corresponding piece of memory
belongs to.

In case a physical device should be replaced or taken offline for whatever
reason it is necessary to set all corresponding memory pieces offline.
The current implementation always sets phys_device to '0' and there is no
way or hook to change that.  Seems like there was a plan to implement that
but it wasn't finished for whatever reason.

So add a weak function which architectures can override to actually set
the phys_device from within add_memory_block().

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/memory.c  | 15 ++++++++++-----
 include/linux/memory.h |  2 ++
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 2f8691511190..db0848e54cc6 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -429,12 +429,16 @@ static inline int memory_fail_init(void)
  * differentiation between which *physical* devices each
  * section belongs to...
  */
+int __weak arch_get_memory_phys_device(unsigned long start_pfn)
+{
+	return 0;
+}
 
 static int add_memory_block(int nid, struct mem_section *section,
-			unsigned long state, int phys_device,
-			enum mem_add_context context)
+			unsigned long state, enum mem_add_context context)
 {
 	struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+	unsigned long start_pfn;
 	int ret = 0;
 
 	if (!mem)
@@ -443,7 +447,8 @@ static int add_memory_block(int nid, struct mem_section *section,
 	mem->phys_index = __section_nr(section);
 	mem->state = state;
 	mutex_init(&mem->state_mutex);
-	mem->phys_device = phys_device;
+	start_pfn = section_nr_to_pfn(mem->phys_index);
+	mem->phys_device = arch_get_memory_phys_device(start_pfn);
 
 	ret = register_memory(mem, section);
 	if (!ret)
@@ -515,7 +520,7 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
  */
 int register_new_memory(int nid, struct mem_section *section)
 {
-	return add_memory_block(nid, section, MEM_OFFLINE, 0, HOTPLUG);
+	return add_memory_block(nid, section, MEM_OFFLINE, HOTPLUG);
 }
 
 int unregister_memory_section(struct mem_section *section)
@@ -548,7 +553,7 @@ int __init memory_dev_init(void)
 		if (!present_section_nr(i))
 			continue;
 		err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE,
-					0, BOOT);
+				       BOOT);
 		if (!ret)
 			ret = err;
 	}
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 1adfe779eb99..85582e1bcee9 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -36,6 +36,8 @@ struct memory_block {
 	struct sys_device sysdev;
 };
 
+int arch_get_memory_phys_device(unsigned long start_pfn);
+
 /* These states are exposed to userspace as text strings in sysfs */
 #define	MEM_ONLINE		(1<<0) /* exposed to userspace */
 #define	MEM_GOING_OFFLINE	(1<<1) /* exposed to userspace */
-- 
cgit v1.2.3


From 1976152fd8e706135deed6cf333e347c08416056 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 18 Mar 2010 07:30:31 -0600
Subject: of: Fix comparison of "compatible" properties

Commit 7c7b60cb87547b1664a4385c187f029bf514a737
"of: put default string compare and #a/s-cell values into common header"

Breaks various things on powerpc due to using strncasecmp instead of
strcasecmp for comparing against "compatible" strings.

This causes things like the 4xx PCI code to fail miserably due to the
partial matches in code like this:

       for_each_compatible_node(np, NULL, "ibm,plb-pcix")
               ppc4xx_probe_pcix_bridge(np);
       for_each_compatible_node(np, NULL, "ibm,plb-pci")
               ppc4xx_probe_pci_bridge(np);

It's not quite right to do partial name match. Entries in a compatible
list are meant to be matched whole. If a device is compatible with both
"foo" and "foo1", then the device should have both strings in its
"compatible" property.

This patch reverts powerpc and microblaze us to use strcasecmp.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
      (for patch description)
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Michal Simek <michal.simek@petalogix.com>
---
 include/linux/of.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/of.h b/include/linux/of.h
index f6d9cbc39c9c..a367e19bb3af 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -127,7 +127,7 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 
 /* Default string compare functions, Allow arch asm/prom.h to override */
 #if !defined(of_compat_cmp)
-#define of_compat_cmp(s1, s2, l)	strncasecmp((s1), (s2), (l))
+#define of_compat_cmp(s1, s2, l)	strcasecmp((s1), (s2))
 #define of_prop_cmp(s1, s2)		strcmp((s1), (s2))
 #define of_node_cmp(s1, s2)		strcasecmp((s1), (s2))
 #endif
-- 
cgit v1.2.3


From 0cff810f54b3b52075c27f7a7021d5b195264b6c Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 18 Mar 2010 12:25:33 -0700
Subject: rcu: Fix local_irq_disable() CONFIG_PROVE_RCU=y false positives

It is documented that local_irq_disable() also delimits RCU_SCHED
read-site critical sections.

See the document of synchronize_sched() or
Documentation/RCU/whatisRCU.txt.

So we have to test irqs_disabled() in rcu_read_lock_sched_held().
Otherwise rcu-lockdep brings incorrect complaint.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
Cc: eric.dumazet@gmail.com
LKML-Reference: <1268940334-10892-1-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e1bdc4bfd275..872a98e13d6a 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -149,7 +149,7 @@ static inline int rcu_read_lock_sched_held(void)
 		return 1;
 	if (debug_locks)
 		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
-	return lockdep_opinion || preempt_count() != 0;
+	return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
 }
 #else /* #ifdef CONFIG_PREEMPT */
 static inline int rcu_read_lock_sched_held(void)
@@ -180,7 +180,7 @@ static inline int rcu_read_lock_bh_held(void)
 #ifdef CONFIG_PREEMPT
 static inline int rcu_read_lock_sched_held(void)
 {
-	return !rcu_scheduler_active || preempt_count() != 0;
+	return !rcu_scheduler_active || preempt_count() != 0 || irqs_disabled();
 }
 #else /* #ifdef CONFIG_PREEMPT */
 static inline int rcu_read_lock_sched_held(void)
-- 
cgit v1.2.3


From 0641e4fbf2f824faee00ea74c459a088d94905fd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 18 Mar 2010 21:16:45 -0700
Subject: net: Potential null skb->dev dereference

When doing "ifenslave -d bond0 eth0", there is chance to get NULL
dereference in netif_receive_skb(), because dev->master suddenly becomes
NULL after we tested it.

We should use ACCESS_ONCE() to avoid this (or rcu_dereference())

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 8 ++++----
 net/8021q/vlan_core.c     | 4 ++--
 net/core/dev.c            | 8 +++++---
 3 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c79a88be7c33..fa8b47637997 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2059,12 +2059,12 @@ static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
  * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
  * ARP on active-backup slaves with arp_validate enabled.
  */
-static inline int skb_bond_should_drop(struct sk_buff *skb)
+static inline int skb_bond_should_drop(struct sk_buff *skb,
+				       struct net_device *master)
 {
-	struct net_device *dev = skb->dev;
-	struct net_device *master = dev->master;
-
 	if (master) {
+		struct net_device *dev = skb->dev;
+
 		if (master->priv_flags & IFF_MASTER_ARPMON)
 			dev->last_rx = jiffies;
 
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index c0316e0ca6e8..c584a0af77d3 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -11,7 +11,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 	if (netpoll_rx(skb))
 		return NET_RX_DROP;
 
-	if (skb_bond_should_drop(skb))
+	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
 		goto drop;
 
 	skb->skb_iif = skb->dev->ifindex;
@@ -83,7 +83,7 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
 {
 	struct sk_buff *p;
 
-	if (skb_bond_should_drop(skb))
+	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
 		goto drop;
 
 	skb->skb_iif = skb->dev->ifindex;
diff --git a/net/core/dev.c b/net/core/dev.c
index bcc490cc9452..59d4394d2ce8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2483,6 +2483,7 @@ int netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
 	struct net_device *orig_dev;
+	struct net_device *master;
 	struct net_device *null_or_orig;
 	struct net_device *null_or_bond;
 	int ret = NET_RX_DROP;
@@ -2503,11 +2504,12 @@ int netif_receive_skb(struct sk_buff *skb)
 
 	null_or_orig = NULL;
 	orig_dev = skb->dev;
-	if (orig_dev->master) {
-		if (skb_bond_should_drop(skb))
+	master = ACCESS_ONCE(orig_dev->master);
+	if (master) {
+		if (skb_bond_should_drop(skb, master))
 			null_or_orig = orig_dev; /* deliver only exact match */
 		else
-			skb->dev = orig_dev->master;
+			skb->dev = master;
 	}
 
 	__get_cpu_var(netdev_rx_stat).total++;
-- 
cgit v1.2.3


From 87a6aca504d65f242589583e04df5e74b5eae1fe Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 15 Mar 2010 17:14:15 -0700
Subject: Revert "tty: Add a new VT mode which is like VT_PROCESS but doesn't
 require a VT_RELDISP ioctl call"

This reverts commit eec9fe7d1ab4a0dfac4cb43047a7657fffd0002f.

Ari writes as the reason this should be reverted:
	The problems with this patch include:
	1. There's at least one subtlety I overlooked - switching
	between X servers (i.e. from one X VT to another) still requires
	the cooperation of both X servers. I was assuming that KMS
	eliminated this.
	2. It hasn't been tested at all (no X server patch exists which
	uses the new mode).

As he was the original author of the patch, I'll revert it.

Cc: Ari Entlich <atrigent@ccs.neu.edu>
Cc: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/vt_ioctl.c | 39 +++++++++++++++++++--------------------
 include/linux/vt.h      |  3 +--
 2 files changed, 20 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index 87778dcf8727..6aa10284104a 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -888,7 +888,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
 			ret = -EFAULT;
 			goto out;
 		}
-		if (tmp.mode != VT_AUTO && tmp.mode != VT_PROCESS && tmp.mode != VT_PROCESS_AUTO) {
+		if (tmp.mode != VT_AUTO && tmp.mode != VT_PROCESS) {
 			ret = -EINVAL;
 			goto out;
 		}
@@ -1622,7 +1622,7 @@ static void complete_change_console(struct vc_data *vc)
 	 * telling it that it has acquired. Also check if it has died and
 	 * clean up (similar to logic employed in change_console())
 	 */
-	if (vc->vt_mode.mode == VT_PROCESS || vc->vt_mode.mode == VT_PROCESS_AUTO) {
+	if (vc->vt_mode.mode == VT_PROCESS) {
 		/*
 		 * Send the signal as privileged - kill_pid() will
 		 * tell us if the process has gone or something else
@@ -1682,7 +1682,7 @@ void change_console(struct vc_data *new_vc)
 	 * vt to auto control.
 	 */
 	vc = vc_cons[fg_console].d;
-	if (vc->vt_mode.mode == VT_PROCESS || vc->vt_mode.mode == VT_PROCESS_AUTO) {
+	if (vc->vt_mode.mode == VT_PROCESS) {
 		/*
 		 * Send the signal as privileged - kill_pid() will
 		 * tell us if the process has gone or something else
@@ -1693,28 +1693,27 @@ void change_console(struct vc_data *new_vc)
 		 */
 		vc->vt_newvt = new_vc->vc_num;
 		if (kill_pid(vc->vt_pid, vc->vt_mode.relsig, 1) == 0) {
-			if(vc->vt_mode.mode == VT_PROCESS)
-				/*
-				 * It worked. Mark the vt to switch to and
-				 * return. The process needs to send us a
-				 * VT_RELDISP ioctl to complete the switch.
-				 */
-				return;
-		} else {
 			/*
-			 * The controlling process has died, so we revert back to
-			 * normal operation. In this case, we'll also change back
-			 * to KD_TEXT mode. I'm not sure if this is strictly correct
-			 * but it saves the agony when the X server dies and the screen
-			 * remains blanked due to KD_GRAPHICS! It would be nice to do
-			 * this outside of VT_PROCESS but there is no single process
-			 * to account for and tracking tty count may be undesirable.
+			 * It worked. Mark the vt to switch to and
+			 * return. The process needs to send us a
+			 * VT_RELDISP ioctl to complete the switch.
 			 */
-			reset_vc(vc);
+			return;
 		}
 
 		/*
-		 * Fall through to normal (VT_AUTO and VT_PROCESS_AUTO) handling of the switch...
+		 * The controlling process has died, so we revert back to
+		 * normal operation. In this case, we'll also change back
+		 * to KD_TEXT mode. I'm not sure if this is strictly correct
+		 * but it saves the agony when the X server dies and the screen
+		 * remains blanked due to KD_GRAPHICS! It would be nice to do
+		 * this outside of VT_PROCESS but there is no single process
+		 * to account for and tracking tty count may be undesirable.
+		 */
+		reset_vc(vc);
+
+		/*
+		 * Fall through to normal (VT_AUTO) handling of the switch...
 		 */
 	}
 
diff --git a/include/linux/vt.h b/include/linux/vt.h
index 778b7b2a47d4..d5dd0bc408fd 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -27,7 +27,7 @@ struct vt_mode {
 #define VT_SETMODE	0x5602	/* set mode of active vt */
 #define		VT_AUTO		0x00	/* auto vt switching */
 #define		VT_PROCESS	0x01	/* process controls switching */
-#define		VT_PROCESS_AUTO 0x02	/* process is notified of switching */
+#define		VT_ACKACQ	0x02	/* acknowledge switch */
 
 struct vt_stat {
 	unsigned short v_active;	/* active vt */
@@ -38,7 +38,6 @@ struct vt_stat {
 #define VT_SENDSIG	0x5604	/* signal to send to bitmask of vts */
 
 #define VT_RELDISP	0x5605	/* release display */
-#define		VT_ACKACQ	0x02	/* acknowledge switch */
 
 #define VT_ACTIVATE	0x5606	/* make vt active */
 #define VT_WAITACTIVE	0x5607	/* wait for vt active */
-- 
cgit v1.2.3


From 352fa6ad16b89f8ffd1a93b4419b1a8f2259feab Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 2 Mar 2010 22:24:19 +0000
Subject: tty: Take a 256 byte padding into account when buffering below
 sub-page units

The TTY layer takes some care to ensure that only sub-page allocations
are made with interrupts disabled. It does this by setting a goal of
"TTY_BUFFER_PAGE" to allocate. Unfortunately, while TTY_BUFFER_PAGE takes the
size of tty_buffer into account, it fails to account that tty_buffer_find()
rounds the buffer size out to the next 256 byte boundary before adding on
the size of the tty_buffer.

This patch adjusts the TTY_BUFFER_PAGE calculation to take into account the
size of the tty_buffer and the padding. Once applied, tty_buffer_alloc()
should not require high-order allocations.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/tty.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 568369a86306..593228a520e1 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -70,12 +70,13 @@ struct tty_buffer {
 
 /*
  * We default to dicing tty buffer allocations to this many characters
- * in order to avoid multiple page allocations. We assume tty_buffer itself
- * is under 256 bytes. See tty_buffer_find for the allocation logic this
- * must match
+ * in order to avoid multiple page allocations. We know the size of
+ * tty_buffer itself but it must also be taken into account that the
+ * the buffer is 256 byte aligned. See tty_buffer_find for the allocation
+ * logic this must match
  */
 
-#define TTY_BUFFER_PAGE		((PAGE_SIZE  - 256) / 2)
+#define TTY_BUFFER_PAGE	(((PAGE_SIZE - sizeof(struct tty_buffer)) / 2) & ~0xFF)
 
 
 struct tty_bufhead {
-- 
cgit v1.2.3


From 336cee42dd52824e360ab419eab4e8888eb054ec Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Mon, 8 Mar 2010 21:50:11 -0600
Subject: tty_port,usb-console: Fix usb serial console open/close regression

Commit e1108a63e10d344284011cccc06328b2cd3e5da3 ("usb_serial: Use the
shutdown() operation") breaks the ability to use a usb console
starting in 2.6.33.  This was observed when using
console=ttyUSB0,115200 as a boot argument with an FTDI device.  The
error is:

ftdi_sio ttyUSB0: ftdi_submit_read_urb - failed submitting read urb, error -22

The handling of the ASYNCB_INITIALIZED changed in 2.6.32 such that in
tty_port_shutdown() it always clears the flag if it is set.  The fix
is to add a variable to the tty_port struct to indicate when the tty
port is a console.

CC: Alan Cox <alan@linux.intel.com>
CC: Alan Stern <stern@rowland.harvard.edu>
CC: Oliver Neukum <oliver@neukum.org>
CC: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/tty_port.c      | 2 +-
 drivers/usb/serial/console.c | 1 +
 include/linux/tty.h          | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index be492dd66437..a3bd1d0b66cf 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(tty_port_tty_set);
 static void tty_port_shutdown(struct tty_port *port)
 {
 	mutex_lock(&port->mutex);
-	if (port->ops->shutdown &&
+	if (port->ops->shutdown && !port->console &&
 		test_and_clear_bit(ASYNCB_INITIALIZED, &port->flags))
 			port->ops->shutdown(port);
 	mutex_unlock(&port->mutex);
diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c
index b22ac3258523..f347da2ef00a 100644
--- a/drivers/usb/serial/console.c
+++ b/drivers/usb/serial/console.c
@@ -181,6 +181,7 @@ static int usb_console_setup(struct console *co, char *options)
 	/* The console is special in terms of closing the device so
 	 * indicate this port is now acting as a system console. */
 	port->console = 1;
+	port->port.console = 1;
 
 	mutex_unlock(&serial->disc_mutex);
 	return retval;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 593228a520e1..4409967db0c4 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -224,6 +224,7 @@ struct tty_port {
 	wait_queue_head_t	close_wait;	/* Close waiters */
 	wait_queue_head_t	delta_msr_wait;	/* Modem status change */
 	unsigned long		flags;		/* TTY flags ASY_*/
+	unsigned char		console:1;	/* port is a console */
 	struct mutex		mutex;		/* Locking */
 	struct mutex		buf_mutex;	/* Buffer alloc lock */
 	unsigned char		*xmit_buf;	/* Optional buffer */
-- 
cgit v1.2.3


From f09a15e6e69884cedec4d1c022089a973aa01f1e Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Tue, 16 Mar 2010 12:55:44 -0700
Subject: USB: Fix usb_fill_int_urb for SuperSpeed devices

USB 3 and Wireless USB specify a logarithmic encoding of the endpoint
interval that matches the USB 2 specification.  usb_fill_int_urb() didn't
know that and was filling in the interval as if it was USB 1.1.  Fix
usb_fill_int_urb() for SuperSpeed devices, but leave the wireless case
alone, because David Vrabel wants to keep the old encoding.

Update the struct urb kernel doc to note that SuperSpeed URBs must have
urb->interval specified in microframes.

Add a missing break statement in the usb_submit_urb() interrupt URB
checking, since wireless USB and SuperSpeed USB encode urb->interval
differently.  This allows xHCI roothubs to actually register with khubd.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/urb.c |  1 +
 include/linux/usb.h    | 18 +++++++++++++-----
 2 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index 27080561a1c2..45a32dadb406 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -453,6 +453,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 			if (urb->interval > (1 << 15))
 				return -EINVAL;
 			max = 1 << 15;
+			break;
 		case USB_SPEED_WIRELESS:
 			if (urb->interval > 16)
 				return -EINVAL;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 8c9f053111bb..ce1323c4e47c 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1055,7 +1055,8 @@ typedef void (*usb_complete_t)(struct urb *);
  * @number_of_packets: Lists the number of ISO transfer buffers.
  * @interval: Specifies the polling interval for interrupt or isochronous
  *	transfers.  The units are frames (milliseconds) for full and low
- *	speed devices, and microframes (1/8 millisecond) for highspeed ones.
+ *	speed devices, and microframes (1/8 millisecond) for highspeed
+ *	and SuperSpeed devices.
  * @error_count: Returns the number of ISO transfers that reported errors.
  * @context: For use in completion functions.  This normally points to
  *	request-specific driver context.
@@ -1286,9 +1287,16 @@ static inline void usb_fill_bulk_urb(struct urb *urb,
  *
  * Initializes a interrupt urb with the proper information needed to submit
  * it to a device.
- * Note that high speed interrupt endpoints use a logarithmic encoding of
- * the endpoint interval, and express polling intervals in microframes
- * (eight per millisecond) rather than in frames (one per millisecond).
+ *
+ * Note that High Speed and SuperSpeed interrupt endpoints use a logarithmic
+ * encoding of the endpoint interval, and express polling intervals in
+ * microframes (eight per millisecond) rather than in frames (one per
+ * millisecond).
+ *
+ * Wireless USB also uses the logarithmic encoding, but specifies it in units of
+ * 128us instead of 125us.  For Wireless USB devices, the interval is passed
+ * through to the host controller, rather than being translated into microframe
+ * units.
  */
 static inline void usb_fill_int_urb(struct urb *urb,
 				    struct usb_device *dev,
@@ -1305,7 +1313,7 @@ static inline void usb_fill_int_urb(struct urb *urb,
 	urb->transfer_buffer_length = buffer_length;
 	urb->complete = complete_fn;
 	urb->context = context;
-	if (dev->speed == USB_SPEED_HIGH)
+	if (dev->speed == USB_SPEED_HIGH || dev->speed == USB_SPEED_SUPER)
 		urb->interval = 1 << (interval - 1);
 	else
 		urb->interval = interval;
-- 
cgit v1.2.3


From f5d410f2ea7ba340f11815a56e05b9fa9421c421 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 16 Mar 2010 13:30:44 +0000
Subject: netlink: fix unaligned access in nla_get_be64()

This patch fixes a unaligned access in nla_get_be64() that was
introduced by myself in a17c859849402315613a0015ac8fbf101acf0cc1.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index f82e463c875a..4fc05b58503e 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -945,7 +945,11 @@ static inline u64 nla_get_u64(const struct nlattr *nla)
  */
 static inline __be64 nla_get_be64(const struct nlattr *nla)
 {
-	return *(__be64 *) nla_data(nla);
+	__be64 tmp;
+
+	nla_memcpy(&tmp, nla, sizeof(tmp));
+
+	return tmp;
 }
 
 /**
-- 
cgit v1.2.3


From 1a50307ba1826e4da0024e64b245ce4eadf7688a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 18 Mar 2010 14:24:42 +0000
Subject: netlink: fix NETLINK_RECV_NO_ENOBUFS in netlink_set_err()

Currently, ENOBUFS errors are reported to the socket via
netlink_set_err() even if NETLINK_RECV_NO_ENOBUFS is set. However,
that should not happen. This fixes this problem and it changes the
prototype of netlink_set_err() to return the number of sockets that
have set the NETLINK_RECV_NO_ENOBUFS socket option. This return
value is used in the next patch in these bugfix series.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  |  2 +-
 net/netlink/af_netlink.c | 17 ++++++++++++++---
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index fde27c017326..6eaca5e1e8ca 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -188,7 +188,7 @@ extern int netlink_has_listeners(struct sock *sk, unsigned int group);
 extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock);
 extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid,
 			     __u32 group, gfp_t allocation);
-extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code);
+extern int netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code);
 extern int netlink_register_notifier(struct notifier_block *nb);
 extern int netlink_unregister_notifier(struct notifier_block *nb);
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 320d0423a240..acbbae1e89b5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1093,6 +1093,7 @@ static inline int do_one_set_err(struct sock *sk,
 				 struct netlink_set_err_data *p)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
+	int ret = 0;
 
 	if (sk == p->exclude_sk)
 		goto out;
@@ -1104,10 +1105,15 @@ static inline int do_one_set_err(struct sock *sk,
 	    !test_bit(p->group - 1, nlk->groups))
 		goto out;
 
+	if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
+		ret = 1;
+		goto out;
+	}
+
 	sk->sk_err = p->code;
 	sk->sk_error_report(sk);
 out:
-	return 0;
+	return ret;
 }
 
 /**
@@ -1116,12 +1122,16 @@ out:
  * @pid: the PID of a process that we want to skip (if any)
  * @groups: the broadcast group that will notice the error
  * @code: error code, must be negative (as usual in kernelspace)
+ *
+ * This function returns the number of broadcast listeners that have set the
+ * NETLINK_RECV_NO_ENOBUFS socket option.
  */
-void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
+int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
 {
 	struct netlink_set_err_data info;
 	struct hlist_node *node;
 	struct sock *sk;
+	int ret = 0;
 
 	info.exclude_sk = ssk;
 	info.pid = pid;
@@ -1132,9 +1142,10 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
 	read_lock(&nl_table_lock);
 
 	sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
-		do_one_set_err(sk, &info);
+		ret += do_one_set_err(sk, &info);
 
 	read_unlock(&nl_table_lock);
+	return ret;
 }
 EXPORT_SYMBOL(netlink_set_err);
 
-- 
cgit v1.2.3


From 37b7ef7203240b3aba577bb1ff6765fe15225976 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 16 Mar 2010 13:30:21 +0000
Subject: netfilter: ctnetlink: fix reliable event delivery if message building
 fails

This patch fixes a bug that allows to lose events when reliable
event delivery mode is used, ie. if NETLINK_BROADCAST_SEND_ERROR
and NETLINK_RECV_NO_ENOBUFS socket options are set.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink.h  | 2 +-
 net/netfilter/nf_conntrack_netlink.c | 4 +++-
 net/netfilter/nfnetlink.c            | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 53923868c9bd..361d6b5630ee 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -76,7 +76,7 @@ extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n);
 extern int nfnetlink_has_listeners(struct net *net, unsigned int group);
 extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group,
 			  int echo, gfp_t flags);
-extern void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error);
+extern int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error);
 extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags);
 
 extern void nfnl_lock(void);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2b2af631d2b8..569410a85953 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -582,7 +582,9 @@ nla_put_failure:
 nlmsg_failure:
 	kfree_skb(skb);
 errout:
-	nfnetlink_set_err(net, 0, group, -ENOBUFS);
+	if (nfnetlink_set_err(net, 0, group, -ENOBUFS) > 0)
+		return -ENOBUFS;
+
 	return 0;
 }
 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 8eb0cc23ada3..6afa3d52ea5f 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -113,9 +113,9 @@ int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid,
 }
 EXPORT_SYMBOL_GPL(nfnetlink_send);
 
-void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error)
+int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error)
 {
-	netlink_set_err(net->nfnl, pid, group, error);
+	return netlink_set_err(net->nfnl, pid, group, error);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_set_err);
 
-- 
cgit v1.2.3


From aef7d97cc604309b66f6f45cce02cd734934cd4e Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 21 Mar 2010 05:27:45 +0100
Subject: Bluetooth: Convert debug files to actually use debugfs instead of
 sysfs

Some of the debug files ended up wrongly in sysfs, because at that point
of time, debugfs didn't exist. Convert these files to use debugfs and
also seq_file. This patch converts all of these files at once and then
removes the exported symbol for the Bluetooth sysfs class.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h |  2 +-
 net/bluetooth/hci_sysfs.c         |  3 +--
 net/bluetooth/l2cap.c             | 51 ++++++++++++++++++++++----------------
 net/bluetooth/rfcomm/core.c       | 52 ++++++++++++++++++++++-----------------
 net/bluetooth/rfcomm/sock.c       | 47 ++++++++++++++++++++---------------
 net/bluetooth/sco.c               | 47 +++++++++++++++++++----------------
 6 files changed, 114 insertions(+), 88 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 04a6908e38d2..ff77e8f882f1 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -176,6 +176,6 @@ extern void hci_sock_cleanup(void);
 extern int bt_sysfs_init(void);
 extern void bt_sysfs_cleanup(void);
 
-extern struct class *bt_class;
+extern struct dentry *bt_debugfs;
 
 #endif /* __BLUETOOTH_H */
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index cafb55b0cea5..05fd125f74fe 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -8,8 +8,7 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
-struct class *bt_class = NULL;
-EXPORT_SYMBOL_GPL(bt_class);
+static struct class *bt_class;
 
 struct dentry *bt_debugfs = NULL;
 EXPORT_SYMBOL_GPL(bt_debugfs);
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 27551820741e..43e17f7d7ecd 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -40,6 +40,8 @@
 #include <linux/skbuff.h>
 #include <linux/list.h>
 #include <linux/device.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 #include <linux/uaccess.h>
 #include <linux/crc16.h>
 #include <net/sock.h>
@@ -3937,39 +3939,42 @@ drop:
 	return 0;
 }
 
-static ssize_t l2cap_sysfs_show(struct class *dev,
-				struct class_attribute *attr,
-				char *buf)
+static int l2cap_debugfs_show(struct seq_file *f, void *p)
 {
 	struct sock *sk;
 	struct hlist_node *node;
-	char *str = buf;
-	int size = PAGE_SIZE;
 
 	read_lock_bh(&l2cap_sk_list.lock);
 
 	sk_for_each(sk, node, &l2cap_sk_list.head) {
 		struct l2cap_pinfo *pi = l2cap_pi(sk);
-		int len;
-
-		len = snprintf(str, size, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n",
-				batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst),
-				sk->sk_state, __le16_to_cpu(pi->psm), pi->scid,
-				pi->dcid, pi->imtu, pi->omtu, pi->sec_level);
-
-		size -= len;
-		if (size <= 0)
-			break;
 
-		str += len;
+		seq_printf(f, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n",
+					batostr(&bt_sk(sk)->src),
+					batostr(&bt_sk(sk)->dst),
+					sk->sk_state, __le16_to_cpu(pi->psm),
+					pi->scid, pi->dcid,
+					pi->imtu, pi->omtu, pi->sec_level);
 	}
 
 	read_unlock_bh(&l2cap_sk_list.lock);
 
-	return str - buf;
+	return 0;
 }
 
-static CLASS_ATTR(l2cap, S_IRUGO, l2cap_sysfs_show, NULL);
+static int l2cap_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, l2cap_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations l2cap_debugfs_fops = {
+	.open		= l2cap_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static struct dentry *l2cap_debugfs;
 
 static const struct proto_ops l2cap_sock_ops = {
 	.family		= PF_BLUETOOTH,
@@ -4029,8 +4034,12 @@ static int __init l2cap_init(void)
 		goto error;
 	}
 
-	if (class_create_file(bt_class, &class_attr_l2cap) < 0)
-		BT_ERR("Failed to create L2CAP info file");
+	if (bt_debugfs) {
+		l2cap_debugfs = debugfs_create_file("l2cap", 0444,
+					bt_debugfs, NULL, &l2cap_debugfs_fops);
+		if (!l2cap_debugfs)
+			BT_ERR("Failed to create L2CAP debug file");
+	}
 
 	BT_INFO("L2CAP ver %s", VERSION);
 	BT_INFO("L2CAP socket layer initialized");
@@ -4044,7 +4053,7 @@ error:
 
 static void __exit l2cap_exit(void)
 {
-	class_remove_file(bt_class, &class_attr_l2cap);
+	debugfs_remove(l2cap_debugfs);
 
 	if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
 		BT_ERR("L2CAP socket unregistration failed");
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index cf164073269d..13f114e8b0f9 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -33,6 +33,8 @@
 #include <linux/init.h>
 #include <linux/wait.h>
 #include <linux/device.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 #include <linux/net.h>
 #include <linux/mutex.h>
 #include <linux/kthread.h>
@@ -2098,14 +2100,10 @@ static struct hci_cb rfcomm_cb = {
 	.security_cfm	= rfcomm_security_cfm
 };
 
-static ssize_t rfcomm_dlc_sysfs_show(struct class *dev,
-				     struct class_attribute *attr,
-				     char *buf)
+static int rfcomm_dlc_debugfs_show(struct seq_file *f, void *x)
 {
 	struct rfcomm_session *s;
 	struct list_head *pp, *p;
-	char *str = buf;
-	int size = PAGE_SIZE;
 
 	rfcomm_lock();
 
@@ -2114,29 +2112,33 @@ static ssize_t rfcomm_dlc_sysfs_show(struct class *dev,
 		list_for_each(pp, &s->dlcs) {
 			struct sock *sk = s->sock->sk;
 			struct rfcomm_dlc *d = list_entry(pp, struct rfcomm_dlc, list);
-			int len;
 
-			len = snprintf(str, size, "%s %s %ld %d %d %d %d\n",
-					batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst),
-					d->state, d->dlci, d->mtu, d->rx_credits, d->tx_credits);
-
-			size -= len;
-			if (size <= 0)
-				break;
-
-			str += len;
+			seq_printf(f, "%s %s %ld %d %d %d %d\n",
+						batostr(&bt_sk(sk)->src),
+						batostr(&bt_sk(sk)->dst),
+						d->state, d->dlci, d->mtu,
+						d->rx_credits, d->tx_credits);
 		}
-
-		if (size <= 0)
-			break;
 	}
 
 	rfcomm_unlock();
 
-	return (str - buf);
+	return 0;
+}
+
+static int rfcomm_dlc_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rfcomm_dlc_debugfs_show, inode->i_private);
 }
 
-static CLASS_ATTR(rfcomm_dlc, S_IRUGO, rfcomm_dlc_sysfs_show, NULL);
+static const struct file_operations rfcomm_dlc_debugfs_fops = {
+	.open		= rfcomm_dlc_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static struct dentry *rfcomm_dlc_debugfs;
 
 /* ---- Initialization ---- */
 static int __init rfcomm_init(void)
@@ -2153,8 +2155,12 @@ static int __init rfcomm_init(void)
 		goto unregister;
 	}
 
-	if (class_create_file(bt_class, &class_attr_rfcomm_dlc) < 0)
-		BT_ERR("Failed to create RFCOMM info file");
+	if (bt_debugfs) {
+		rfcomm_dlc_debugfs = debugfs_create_file("rfcomm_dlc", 0444,
+				bt_debugfs, NULL, &rfcomm_dlc_debugfs_fops);
+		if (!rfcomm_dlc_debugfs)
+			BT_ERR("Failed to create RFCOMM debug file");
+	}
 
 	err = rfcomm_init_ttys();
 	if (err < 0)
@@ -2182,7 +2188,7 @@ unregister:
 
 static void __exit rfcomm_exit(void)
 {
-	class_remove_file(bt_class, &class_attr_rfcomm_dlc);
+	debugfs_remove(rfcomm_dlc_debugfs);
 
 	hci_unregister_cb(&rfcomm_cb);
 
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 8d0ee0b8a6b6..7f439765403d 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -40,6 +40,8 @@
 #include <linux/skbuff.h>
 #include <linux/list.h>
 #include <linux/device.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 #include <net/sock.h>
 
 #include <asm/system.h>
@@ -1061,37 +1063,38 @@ done:
 	return result;
 }
 
-static ssize_t rfcomm_sock_sysfs_show(struct class *dev,
-				      struct class_attribute *attr,
-				      char *buf)
+static int rfcomm_sock_debugfs_show(struct seq_file *f, void *p)
 {
 	struct sock *sk;
 	struct hlist_node *node;
-	char *str = buf;
-	int size = PAGE_SIZE;
 
 	read_lock_bh(&rfcomm_sk_list.lock);
 
 	sk_for_each(sk, node, &rfcomm_sk_list.head) {
-		int len;
-
-		len = snprintf(str, size, "%s %s %d %d\n",
-				batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst),
+		seq_printf(f, "%s %s %d %d\n",
+				batostr(&bt_sk(sk)->src),
+				batostr(&bt_sk(sk)->dst),
 				sk->sk_state, rfcomm_pi(sk)->channel);
-
-		size -= len;
-		if (size <= 0)
-			break;
-
-		str += len;
 	}
 
 	read_unlock_bh(&rfcomm_sk_list.lock);
 
-	return (str - buf);
+	return 0;
 }
 
-static CLASS_ATTR(rfcomm, S_IRUGO, rfcomm_sock_sysfs_show, NULL);
+static int rfcomm_sock_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rfcomm_sock_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations rfcomm_sock_debugfs_fops = {
+	.open		= rfcomm_sock_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static struct dentry *rfcomm_sock_debugfs;
 
 static const struct proto_ops rfcomm_sock_ops = {
 	.family		= PF_BLUETOOTH,
@@ -1131,8 +1134,12 @@ int __init rfcomm_init_sockets(void)
 	if (err < 0)
 		goto error;
 
-	if (class_create_file(bt_class, &class_attr_rfcomm) < 0)
-		BT_ERR("Failed to create RFCOMM info file");
+	if (bt_debugfs) {
+		rfcomm_sock_debugfs = debugfs_create_file("rfcomm", 0444,
+				bt_debugfs, NULL, &rfcomm_sock_debugfs_fops);
+		if (!rfcomm_sock_debugfs)
+			BT_ERR("Failed to create RFCOMM debug file");
+	}
 
 	BT_INFO("RFCOMM socket layer initialized");
 
@@ -1146,7 +1153,7 @@ error:
 
 void rfcomm_cleanup_sockets(void)
 {
-	class_remove_file(bt_class, &class_attr_rfcomm);
+	debugfs_remove(rfcomm_sock_debugfs);
 
 	if (bt_sock_unregister(BTPROTO_RFCOMM) < 0)
 		BT_ERR("RFCOMM socket layer unregistration failed");
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 967a75175c66..e5b16b76b22e 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -38,6 +38,8 @@
 #include <linux/socket.h>
 #include <linux/skbuff.h>
 #include <linux/device.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 #include <linux/list.h>
 #include <net/sock.h>
 
@@ -953,37 +955,36 @@ drop:
 	return 0;
 }
 
-static ssize_t sco_sysfs_show(struct class *dev,
-				struct class_attribute *attr,
-				char *buf)
+static int sco_debugfs_show(struct seq_file *f, void *p)
 {
 	struct sock *sk;
 	struct hlist_node *node;
-	char *str = buf;
-	int size = PAGE_SIZE;
 
 	read_lock_bh(&sco_sk_list.lock);
 
 	sk_for_each(sk, node, &sco_sk_list.head) {
-		int len;
-
-		len = snprintf(str, size, "%s %s %d\n",
-				batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst),
-				sk->sk_state);
-
-		size -= len;
-		if (size <= 0)
-			break;
-
-		str += len;
+		seq_printf(f, "%s %s %d\n", batostr(&bt_sk(sk)->src),
+				batostr(&bt_sk(sk)->dst), sk->sk_state);
 	}
 
 	read_unlock_bh(&sco_sk_list.lock);
 
-	return (str - buf);
+	return 0;
 }
 
-static CLASS_ATTR(sco, S_IRUGO, sco_sysfs_show, NULL);
+static int sco_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, sco_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations sco_debugfs_fops = {
+	.open		= sco_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static struct dentry *sco_debugfs;
 
 static const struct proto_ops sco_sock_ops = {
 	.family		= PF_BLUETOOTH,
@@ -1041,8 +1042,12 @@ static int __init sco_init(void)
 		goto error;
 	}
 
-	if (class_create_file(bt_class, &class_attr_sco) < 0)
-		BT_ERR("Failed to create SCO info file");
+	if (bt_debugfs) {
+		sco_debugfs = debugfs_create_file("sco", 0444,
+					bt_debugfs, NULL, &sco_debugfs_fops);
+		if (!sco_debugfs)
+			BT_ERR("Failed to create SCO debug file");
+	}
 
 	BT_INFO("SCO (Voice Link) ver %s", VERSION);
 	BT_INFO("SCO socket layer initialized");
@@ -1056,7 +1061,7 @@ error:
 
 static void __exit sco_exit(void)
 {
-	class_remove_file(bt_class, &class_attr_sco);
+	debugfs_remove(sco_debugfs);
 
 	if (bt_sock_unregister(BTPROTO_SCO) < 0)
 		BT_ERR("SCO socket unregistration failed");
-- 
cgit v1.2.3


From 9bf35c8dddd56f7f247a27346f74f5adc18071f4 Mon Sep 17 00:00:00 2001
From: Paulius Zaleckas <paulius.zaleckas@gmail.com>
Date: Sun, 21 Mar 2010 21:19:02 -0700
Subject: if_tunnel.h: add missing ams/byteorder.h include

When compiling userspace application which includes
if_tunnel.h and uses GRE_* defines you will get undefined
reference to __cpu_to_be16.

Fix this by adding missing #include <asm/byteorder.h>

Cc: stable@kernel.org
Signed-off-by: Paulius Zaleckas <paulius.zaleckas@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tunnel.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index 1822d635be6b..16b92d008bed 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -2,6 +2,7 @@
 #define _IF_TUNNEL_H_
 
 #include <linux/types.h>
+#include <asm/byteorder.h>
 
 #ifdef __KERNEL__
 #include <linux/ip.h>
-- 
cgit v1.2.3


From c9acb42ef1904d15d0fb315061cefbe638f67f3a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 19 Mar 2010 15:36:22 -0400
Subject: SUNRPC: Fix a use after free bug with the NFSv4.1 backchannel

The ->release_request() callback was designed to allow the transport layer
to do housekeeping after the RPC call is done. It cannot be used to free
the request itself, and doing so leads to a use-after-free bug in
xprt_release().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/bc_xprt.h |  5 ++++-
 net/sunrpc/bc_svc.c            | 15 ---------------
 net/sunrpc/xprt.c              | 22 +++++++++-------------
 net/sunrpc/xprtsock.c          |  3 ---
 4 files changed, 13 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index d7152b451e21..7c91260c44a9 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -36,7 +36,6 @@ struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt);
 void xprt_free_bc_request(struct rpc_rqst *req);
 int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
 void xprt_destroy_backchannel(struct rpc_xprt *, int max_reqs);
-void bc_release_request(struct rpc_task *);
 int bc_send(struct rpc_rqst *req);
 
 /*
@@ -59,6 +58,10 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
 {
 	return 0;
 }
+
+static inline void xprt_free_bc_request(struct rpc_rqst *req)
+{
+}
 #endif /* CONFIG_NFS_V4_1 */
 #endif /* _LINUX_SUNRPC_BC_XPRT_H */
 
diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
index 13f214f53120..f0c05d3311c1 100644
--- a/net/sunrpc/bc_svc.c
+++ b/net/sunrpc/bc_svc.c
@@ -37,21 +37,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #define RPCDBG_FACILITY	RPCDBG_SVCDSP
 
-void bc_release_request(struct rpc_task *task)
-{
-	struct rpc_rqst *req = task->tk_rqstp;
-
-	dprintk("RPC:       bc_release_request: task= %p\n", task);
-
-	/*
-	 * Release this request only if it's a backchannel
-	 * preallocated request
-	 */
-	if (!bc_prealloc(req))
-		return;
-	xprt_free_bc_request(req);
-}
-
 /* Empty callback ops */
 static const struct rpc_call_ops nfs41_callback_ops = {
 };
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 469de292c23c..42f09ade0044 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -46,6 +46,7 @@
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/metrics.h>
+#include <linux/sunrpc/bc_xprt.h>
 
 #include "sunrpc.h"
 
@@ -1032,21 +1033,16 @@ void xprt_release(struct rpc_task *task)
 	if (req->rq_release_snd_buf)
 		req->rq_release_snd_buf(req);
 
-	/*
-	 * Early exit if this is a backchannel preallocated request.
-	 * There is no need to have it added to the RPC slot list.
-	 */
-	if (is_bc_request)
-		return;
-
-	memset(req, 0, sizeof(*req));	/* mark unused */
-
 	dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
+	if (likely(!is_bc_request)) {
+		memset(req, 0, sizeof(*req));	/* mark unused */
 
-	spin_lock(&xprt->reserve_lock);
-	list_add(&req->rq_list, &xprt->free);
-	rpc_wake_up_next(&xprt->backlog);
-	spin_unlock(&xprt->reserve_lock);
+		spin_lock(&xprt->reserve_lock);
+		list_add(&req->rq_list, &xprt->free);
+		rpc_wake_up_next(&xprt->backlog);
+		spin_unlock(&xprt->reserve_lock);
+	} else
+		xprt_free_bc_request(req);
 }
 
 /**
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index e4839c07c913..9847c30b5001 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2251,9 +2251,6 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.buf_free		= rpc_free,
 	.send_request		= xs_tcp_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
-#if defined(CONFIG_NFS_V4_1)
-	.release_request	= bc_release_request,
-#endif /* CONFIG_NFS_V4_1 */
 	.close			= xs_tcp_close,
 	.destroy		= xs_destroy,
 	.print_stats		= xs_tcp_print_stats,
-- 
cgit v1.2.3


From ae6be51ed01d6c4aaf249a207b4434bc7785853b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 22 Mar 2010 13:12:33 -0700
Subject: Fix up prototype for sys_ipc breakage

Commit 45575f5a426c ("ppc64 sys_ipc breakage in 2.6.34-rc2") fixed the
definition of the sys_ipc() helper, but didn't fix the prototype in
<linux/syscalls.h>

Reported-and-tested-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index f994ae58a002..057929b0a651 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -688,7 +688,7 @@ asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg);
 asmlinkage long sys_shmget(key_t key, size_t size, int flag);
 asmlinkage long sys_shmdt(char __user *shmaddr);
 asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
-asmlinkage long sys_ipc(unsigned int call, int first, int second,
+asmlinkage long sys_ipc(unsigned int call, int first, unsigned long second,
 		unsigned long third, void __user *ptr, long fifth);
 
 asmlinkage long sys_mq_open(const char __user *name, int oflag, mode_t mode, struct mq_attr __user *attr);
-- 
cgit v1.2.3


From 66f1207bce10fd80ee8ce99b67d617644612f05e Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Thu, 11 Mar 2010 17:01:09 -0700
Subject: resources: add interfaces that return conflict information

request_resource() and insert_resource() only return success or failure,
which no information about what existing resource conflicted with the
proposed new reservation.  This patch adds request_resource_conflict()
and insert_resource_conflict(), which return the conflicting resource.

Callers may use this for better error messages or to adjust the new
resource and retry the request.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/ioport.h |  2 ++
 kernel/resource.c      | 44 +++++++++++++++++++++++++++++++++++++-------
 2 files changed, 39 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 71ab79da7e7f..26fad187d661 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -112,12 +112,14 @@ struct resource_list {
 extern struct resource ioport_resource;
 extern struct resource iomem_resource;
 
+extern struct resource *request_resource_conflict(struct resource *root, struct resource *new);
 extern int request_resource(struct resource *root, struct resource *new);
 extern int release_resource(struct resource *new);
 void release_child_resources(struct resource *new);
 extern void reserve_region_with_split(struct resource *root,
 			     resource_size_t start, resource_size_t end,
 			     const char *name);
+extern struct resource *insert_resource_conflict(struct resource *parent, struct resource *new);
 extern int insert_resource(struct resource *parent, struct resource *new);
 extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new);
 extern int allocate_resource(struct resource *root, struct resource *new,
diff --git a/kernel/resource.c b/kernel/resource.c
index 2d5be5d9bf5f..9c358e263534 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -219,19 +219,34 @@ void release_child_resources(struct resource *r)
 }
 
 /**
- * request_resource - request and reserve an I/O or memory resource
+ * request_resource_conflict - request and reserve an I/O or memory resource
  * @root: root resource descriptor
  * @new: resource descriptor desired by caller
  *
- * Returns 0 for success, negative error code on error.
+ * Returns 0 for success, conflict resource on error.
  */
-int request_resource(struct resource *root, struct resource *new)
+struct resource *request_resource_conflict(struct resource *root, struct resource *new)
 {
 	struct resource *conflict;
 
 	write_lock(&resource_lock);
 	conflict = __request_resource(root, new);
 	write_unlock(&resource_lock);
+	return conflict;
+}
+
+/**
+ * request_resource - request and reserve an I/O or memory resource
+ * @root: root resource descriptor
+ * @new: resource descriptor desired by caller
+ *
+ * Returns 0 for success, negative error code on error.
+ */
+int request_resource(struct resource *root, struct resource *new)
+{
+	struct resource *conflict;
+
+	conflict = request_resource_conflict(root, new);
 	return conflict ? -EBUSY : 0;
 }
 
@@ -474,25 +489,40 @@ static struct resource * __insert_resource(struct resource *parent, struct resou
 }
 
 /**
- * insert_resource - Inserts a resource in the resource tree
+ * insert_resource_conflict - Inserts resource in the resource tree
  * @parent: parent of the new resource
  * @new: new resource to insert
  *
- * Returns 0 on success, -EBUSY if the resource can't be inserted.
+ * Returns 0 on success, conflict resource if the resource can't be inserted.
  *
- * This function is equivalent to request_resource when no conflict
+ * This function is equivalent to request_resource_conflict when no conflict
  * happens. If a conflict happens, and the conflicting resources
  * entirely fit within the range of the new resource, then the new
  * resource is inserted and the conflicting resources become children of
  * the new resource.
  */
-int insert_resource(struct resource *parent, struct resource *new)
+struct resource *insert_resource_conflict(struct resource *parent, struct resource *new)
 {
 	struct resource *conflict;
 
 	write_lock(&resource_lock);
 	conflict = __insert_resource(parent, new);
 	write_unlock(&resource_lock);
+	return conflict;
+}
+
+/**
+ * insert_resource - Inserts a resource in the resource tree
+ * @parent: parent of the new resource
+ * @new: new resource to insert
+ *
+ * Returns 0 on success, -EBUSY if the resource can't be inserted.
+ */
+int insert_resource(struct resource *parent, struct resource *new)
+{
+	struct resource *conflict;
+
+	conflict = insert_resource_conflict(parent, new);
 	return conflict ? -EBUSY : 0;
 }
 
-- 
cgit v1.2.3


From d7646f7632549124fe70fec8af834c7c1246f365 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Mon, 15 Mar 2010 21:46:34 +0100
Subject: pcmcia: use dev_pm_ops for class pcmcia_socket_class

Instead of requiring PCMCIA socket drivers to call various functions
during their (bus) resume and suspend functions, register an own
dev_pm_ops for this class. This fixes several suspend/resume bugs
seen on db1xxx-ss, and probably on some other socket drivers, too.

With regard to the asymmetry with only _noirq suspend, but split up
resume, please see bug 14334 and commit 9905d1b411946fb3 .

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
---
 drivers/pcmcia/at91_cf.c        |   2 -
 drivers/pcmcia/au1000_generic.c |  13 -----
 drivers/pcmcia/bfin_cf_pcmcia.c |  12 ----
 drivers/pcmcia/cs.c             | 124 ++++++++++++++++++++--------------------
 drivers/pcmcia/db1xxx_ss.c      |  27 ---------
 drivers/pcmcia/i82092.c         |  16 ------
 drivers/pcmcia/i82365.c         |  11 ----
 drivers/pcmcia/m32r_cfc.c       |  11 ----
 drivers/pcmcia/m32r_pcc.c       |  12 ----
 drivers/pcmcia/m8xx_pcmcia.c    |  17 ------
 drivers/pcmcia/omap_cf.c        |  12 ----
 drivers/pcmcia/pd6729.c         |  16 ------
 drivers/pcmcia/pxa2xx_base.c    |   8 +--
 drivers/pcmcia/sa1100_generic.c |  13 -----
 drivers/pcmcia/sa1111_generic.c |  12 ----
 drivers/pcmcia/tcic.c           |  12 ----
 drivers/pcmcia/vrc4171_card.c   |  13 -----
 drivers/pcmcia/yenta_socket.c   |  17 +-----
 include/pcmcia/ss.h             |   6 --
 19 files changed, 66 insertions(+), 288 deletions(-)

(limited to 'include')

diff --git a/drivers/pcmcia/at91_cf.c b/drivers/pcmcia/at91_cf.c
index 5d228071ec69..fb904f444d90 100644
--- a/drivers/pcmcia/at91_cf.c
+++ b/drivers/pcmcia/at91_cf.c
@@ -361,7 +361,6 @@ static int at91_cf_suspend(struct platform_device *pdev, pm_message_t mesg)
 	struct at91_cf_socket	*cf = platform_get_drvdata(pdev);
 	struct at91_cf_data	*board = cf->board;
 
-	pcmcia_socket_dev_suspend(&pdev->dev);
 	if (device_may_wakeup(&pdev->dev)) {
 		enable_irq_wake(board->det_pin);
 		if (board->irq_pin)
@@ -381,7 +380,6 @@ static int at91_cf_resume(struct platform_device *pdev)
 			disable_irq_wake(board->irq_pin);
 	}
 
-	pcmcia_socket_dev_resume(&pdev->dev);
 	return 0;
 }
 
diff --git a/drivers/pcmcia/au1000_generic.c b/drivers/pcmcia/au1000_generic.c
index 171c8a654887..ac4d089430fd 100644
--- a/drivers/pcmcia/au1000_generic.c
+++ b/drivers/pcmcia/au1000_generic.c
@@ -510,17 +510,6 @@ static int au1x00_drv_pcmcia_probe(struct platform_device *dev)
 	return ret;
 }
 
-static int au1x00_drv_pcmcia_suspend(struct platform_device *dev,
-				     pm_message_t state)
-{
-	return pcmcia_socket_dev_suspend(&dev->dev);
-}
-
-static int au1x00_drv_pcmcia_resume(struct platform_device *dev)
-{
-	return pcmcia_socket_dev_resume(&dev->dev);
-}
-
 static struct platform_driver au1x00_pcmcia_driver = {
 	.driver = {
 		.name		= "au1x00-pcmcia",
@@ -528,8 +517,6 @@ static struct platform_driver au1x00_pcmcia_driver = {
 	},
 	.probe		= au1x00_drv_pcmcia_probe,
 	.remove		= au1x00_drv_pcmcia_remove,
-	.suspend 	= au1x00_drv_pcmcia_suspend,
-	.resume 	= au1x00_drv_pcmcia_resume,
 };
 
 
diff --git a/drivers/pcmcia/bfin_cf_pcmcia.c b/drivers/pcmcia/bfin_cf_pcmcia.c
index 2482ce7ac6dc..93f9ddeb0c36 100644
--- a/drivers/pcmcia/bfin_cf_pcmcia.c
+++ b/drivers/pcmcia/bfin_cf_pcmcia.c
@@ -300,16 +300,6 @@ static int __devexit bfin_cf_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static int bfin_cf_suspend(struct platform_device *pdev, pm_message_t mesg)
-{
-	return pcmcia_socket_dev_suspend(&pdev->dev);
-}
-
-static int bfin_cf_resume(struct platform_device *pdev)
-{
-	return pcmcia_socket_dev_resume(&pdev->dev);
-}
-
 static struct platform_driver bfin_cf_driver = {
 	.driver = {
 		   .name = (char *)driver_name,
@@ -317,8 +307,6 @@ static struct platform_driver bfin_cf_driver = {
 		   },
 	.probe = bfin_cf_probe,
 	.remove = __devexit_p(bfin_cf_remove),
-	.suspend = bfin_cf_suspend,
-	.resume = bfin_cf_resume,
 };
 
 static int __init bfin_cf_init(void)
diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c
index e679e708db63..75ed866e6953 100644
--- a/drivers/pcmcia/cs.c
+++ b/drivers/pcmcia/cs.c
@@ -76,65 +76,6 @@ DECLARE_RWSEM(pcmcia_socket_list_rwsem);
 EXPORT_SYMBOL(pcmcia_socket_list_rwsem);
 
 
-/*
- * Low-level PCMCIA socket drivers need to register with the PCCard
- * core using pcmcia_register_socket.
- *
- * socket drivers are expected to use the following callbacks in their
- * .drv struct:
- *  - pcmcia_socket_dev_suspend
- *  - pcmcia_socket_dev_resume
- * These functions check for the appropriate struct pcmcia_soket arrays,
- * and pass them to the low-level functions pcmcia_{suspend,resume}_socket
- */
-static int socket_early_resume(struct pcmcia_socket *skt);
-static int socket_late_resume(struct pcmcia_socket *skt);
-static int socket_resume(struct pcmcia_socket *skt);
-static int socket_suspend(struct pcmcia_socket *skt);
-
-static void pcmcia_socket_dev_run(struct device *dev,
-				  int (*cb)(struct pcmcia_socket *))
-{
-	struct pcmcia_socket *socket;
-
-	down_read(&pcmcia_socket_list_rwsem);
-	list_for_each_entry(socket, &pcmcia_socket_list, socket_list) {
-		if (socket->dev.parent != dev)
-			continue;
-		mutex_lock(&socket->skt_mutex);
-		cb(socket);
-		mutex_unlock(&socket->skt_mutex);
-	}
-	up_read(&pcmcia_socket_list_rwsem);
-}
-
-int pcmcia_socket_dev_suspend(struct device *dev)
-{
-	pcmcia_socket_dev_run(dev, socket_suspend);
-	return 0;
-}
-EXPORT_SYMBOL(pcmcia_socket_dev_suspend);
-
-void pcmcia_socket_dev_early_resume(struct device *dev)
-{
-	pcmcia_socket_dev_run(dev, socket_early_resume);
-}
-EXPORT_SYMBOL(pcmcia_socket_dev_early_resume);
-
-void pcmcia_socket_dev_late_resume(struct device *dev)
-{
-	pcmcia_socket_dev_run(dev, socket_late_resume);
-}
-EXPORT_SYMBOL(pcmcia_socket_dev_late_resume);
-
-int pcmcia_socket_dev_resume(struct device *dev)
-{
-	pcmcia_socket_dev_run(dev, socket_resume);
-	return 0;
-}
-EXPORT_SYMBOL(pcmcia_socket_dev_resume);
-
-
 struct pcmcia_socket *pcmcia_get_socket(struct pcmcia_socket *skt)
 {
 	struct device *dev = get_device(&skt->dev);
@@ -578,12 +519,18 @@ static int socket_early_resume(struct pcmcia_socket *skt)
 
 static int socket_late_resume(struct pcmcia_socket *skt)
 {
+	int ret;
+
 	mutex_lock(&skt->ops_mutex);
 	skt->state &= ~SOCKET_SUSPEND;
 	mutex_unlock(&skt->ops_mutex);
 
-	if (!(skt->state & SOCKET_PRESENT))
-		return socket_insert(skt);
+	if (!(skt->state & SOCKET_PRESENT)) {
+		ret = socket_insert(skt);
+		if (ret == -ENODEV)
+			ret = 0;
+		return ret;
+	}
 
 	if (skt->resume_status) {
 		socket_shutdown(skt);
@@ -919,11 +866,66 @@ static void pcmcia_release_socket_class(struct class *data)
 }
 
 
+#ifdef CONFIG_PM
+
+static int __pcmcia_pm_op(struct device *dev,
+			  int (*callback) (struct pcmcia_socket *skt))
+{
+	struct pcmcia_socket *s = container_of(dev, struct pcmcia_socket, dev);
+	int ret;
+
+	mutex_lock(&s->skt_mutex);
+	ret = callback(s);
+	mutex_unlock(&s->skt_mutex);
+
+	return ret;
+}
+
+static int pcmcia_socket_dev_suspend_noirq(struct device *dev)
+{
+	return __pcmcia_pm_op(dev, socket_suspend);
+}
+
+static int pcmcia_socket_dev_resume_noirq(struct device *dev)
+{
+	return __pcmcia_pm_op(dev, socket_early_resume);
+}
+
+static int pcmcia_socket_dev_resume(struct device *dev)
+{
+	return __pcmcia_pm_op(dev, socket_late_resume);
+}
+
+static const struct dev_pm_ops pcmcia_socket_pm_ops = {
+	/* dev_resume may be called with IRQs enabled */
+	SET_SYSTEM_SLEEP_PM_OPS(NULL,
+				pcmcia_socket_dev_resume)
+
+	/* late suspend must be called with IRQs disabled */
+	.suspend_noirq = pcmcia_socket_dev_suspend_noirq,
+	.freeze_noirq = pcmcia_socket_dev_suspend_noirq,
+	.poweroff_noirq = pcmcia_socket_dev_suspend_noirq,
+
+	/* early resume must be called with IRQs disabled */
+	.resume_noirq = pcmcia_socket_dev_resume_noirq,
+	.thaw_noirq = pcmcia_socket_dev_resume_noirq,
+	.restore_noirq = pcmcia_socket_dev_resume_noirq,
+};
+
+#define PCMCIA_SOCKET_CLASS_PM_OPS (&pcmcia_socket_pm_ops)
+
+#else /* CONFIG_PM */
+
+#define PCMCIA_SOCKET_CLASS_PM_OPS NULL
+
+#endif /* CONFIG_PM */
+
 struct class pcmcia_socket_class = {
 	.name = "pcmcia_socket",
 	.dev_uevent = pcmcia_socket_uevent,
 	.dev_release = pcmcia_release_socket,
 	.class_release = pcmcia_release_socket_class,
+	.pm = PCMCIA_SOCKET_CLASS_PM_OPS,
 };
 EXPORT_SYMBOL(pcmcia_socket_class);
 
diff --git a/drivers/pcmcia/db1xxx_ss.c b/drivers/pcmcia/db1xxx_ss.c
index 9254ab0b29b1..a520193b6453 100644
--- a/drivers/pcmcia/db1xxx_ss.c
+++ b/drivers/pcmcia/db1xxx_ss.c
@@ -558,37 +558,10 @@ static int __devexit db1x_pcmcia_socket_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int db1x_pcmcia_suspend(struct device *dev)
-{
-	return pcmcia_socket_dev_suspend(dev);
-}
-
-static int db1x_pcmcia_resume(struct device *dev)
-{
-	return pcmcia_socket_dev_resume(dev);
-}
-
-static struct dev_pm_ops db1x_pcmcia_pmops = {
-	.resume		= db1x_pcmcia_resume,
-	.suspend	= db1x_pcmcia_suspend,
-	.thaw		= db1x_pcmcia_resume,
-	.freeze		= db1x_pcmcia_suspend,
-};
-
-#define DB1XXX_SS_PMOPS &db1x_pcmcia_pmops
-
-#else
-
-#define DB1XXX_SS_PMOPS NULL
-
-#endif
-
 static struct platform_driver db1x_pcmcia_socket_driver = {
 	.driver	= {
 		.name	= "db1xxx_pcmcia",
 		.owner	= THIS_MODULE,
-		.pm	= DB1XXX_SS_PMOPS
 	},
 	.probe		= db1x_pcmcia_socket_probe,
 	.remove		= __devexit_p(db1x_pcmcia_socket_remove),
diff --git a/drivers/pcmcia/i82092.c b/drivers/pcmcia/i82092.c
index f5da62653313..3003bb3dfcc0 100644
--- a/drivers/pcmcia/i82092.c
+++ b/drivers/pcmcia/i82092.c
@@ -39,27 +39,11 @@ static struct pci_device_id i82092aa_pci_ids[] = {
 };
 MODULE_DEVICE_TABLE(pci, i82092aa_pci_ids);
 
-#ifdef CONFIG_PM
-static int i82092aa_socket_suspend (struct pci_dev *dev, pm_message_t state)
-{
-	return pcmcia_socket_dev_suspend(&dev->dev);
-}
-
-static int i82092aa_socket_resume (struct pci_dev *dev)
-{
-	return pcmcia_socket_dev_resume(&dev->dev);
-}
-#endif
-
 static struct pci_driver i82092aa_pci_driver = {
 	.name           = "i82092aa",
 	.id_table       = i82092aa_pci_ids,
 	.probe          = i82092aa_pci_probe,
 	.remove         = __devexit_p(i82092aa_pci_remove),
-#ifdef CONFIG_PM
-	.suspend        = i82092aa_socket_suspend,
-	.resume         = i82092aa_socket_resume,
-#endif
 };
 
 
diff --git a/drivers/pcmcia/i82365.c b/drivers/pcmcia/i82365.c
index c13fd9360511..d53d9b5659c7 100644
--- a/drivers/pcmcia/i82365.c
+++ b/drivers/pcmcia/i82365.c
@@ -1223,16 +1223,7 @@ static int pcic_init(struct pcmcia_socket *s)
 	return 0;
 }
 
-static int i82365_drv_pcmcia_suspend(struct platform_device *dev,
-				     pm_message_t state)
-{
-	return pcmcia_socket_dev_suspend(&dev->dev);
-}
 
-static int i82365_drv_pcmcia_resume(struct platform_device *dev)
-{
-	return pcmcia_socket_dev_resume(&dev->dev);
-}
 static struct pccard_operations pcic_operations = {
 	.init			= pcic_init,
 	.get_status		= pcic_get_status,
@@ -1248,8 +1239,6 @@ static struct platform_driver i82365_driver = {
 		.name = "i82365",
 		.owner		= THIS_MODULE,
 	},
-	.suspend 	= i82365_drv_pcmcia_suspend,
-	.resume 	= i82365_drv_pcmcia_resume,
 };
 
 static struct platform_device *i82365_device;
diff --git a/drivers/pcmcia/m32r_cfc.c b/drivers/pcmcia/m32r_cfc.c
index 0ece2cd4a85e..ab21264468d6 100644
--- a/drivers/pcmcia/m32r_cfc.c
+++ b/drivers/pcmcia/m32r_cfc.c
@@ -685,16 +685,7 @@ static struct pccard_operations pcc_operations = {
 	.set_mem_map		= pcc_set_mem_map,
 };
 
-static int cfc_drv_pcmcia_suspend(struct platform_device *dev,
-				     pm_message_t state)
-{
-	return pcmcia_socket_dev_suspend(&dev->dev);
-}
 
-static int cfc_drv_pcmcia_resume(struct platform_device *dev)
-{
-	return pcmcia_socket_dev_resume(&dev->dev);
-}
 /*====================================================================*/
 
 static struct platform_driver pcc_driver = {
@@ -702,8 +693,6 @@ static struct platform_driver pcc_driver = {
 		.name		= "cfc",
 		.owner		= THIS_MODULE,
 	},
-	.suspend 	= cfc_drv_pcmcia_suspend,
-	.resume 	= cfc_drv_pcmcia_resume,
 };
 
 static struct platform_device pcc_device = {
diff --git a/drivers/pcmcia/m32r_pcc.c b/drivers/pcmcia/m32r_pcc.c
index 72844c5a6d05..0caf3db7c700 100644
--- a/drivers/pcmcia/m32r_pcc.c
+++ b/drivers/pcmcia/m32r_pcc.c
@@ -663,16 +663,6 @@ static struct pccard_operations pcc_operations = {
 	.set_mem_map		= pcc_set_mem_map,
 };
 
-static int pcc_drv_pcmcia_suspend(struct platform_device *dev,
-				     pm_message_t state)
-{
-	return pcmcia_socket_dev_suspend(&dev->dev);
-}
-
-static int pcc_drv_pcmcia_resume(struct platform_device *dev)
-{
-	return pcmcia_socket_dev_resume(&dev->dev);
-}
 /*====================================================================*/
 
 static struct platform_driver pcc_driver = {
@@ -680,8 +670,6 @@ static struct platform_driver pcc_driver = {
 		.name		= "pcc",
 		.owner		= THIS_MODULE,
 	},
-	.suspend 	= pcc_drv_pcmcia_suspend,
-	.resume 	= pcc_drv_pcmcia_resume,
 };
 
 static struct platform_device pcc_device = {
diff --git a/drivers/pcmcia/m8xx_pcmcia.c b/drivers/pcmcia/m8xx_pcmcia.c
index 61c215918128..01ef7de15322 100644
--- a/drivers/pcmcia/m8xx_pcmcia.c
+++ b/drivers/pcmcia/m8xx_pcmcia.c
@@ -1288,21 +1288,6 @@ static int m8xx_remove(struct of_device *ofdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int m8xx_suspend(struct platform_device *pdev, pm_message_t state)
-{
-	return pcmcia_socket_dev_suspend(&pdev->dev);
-}
-
-static int m8xx_resume(struct platform_device *pdev)
-{
-	return pcmcia_socket_dev_resume(&pdev->dev);
-}
-#else
-#define m8xx_suspend NULL
-#define m8xx_resume NULL
-#endif
-
 static const struct of_device_id m8xx_pcmcia_match[] = {
 	{
 	 .type = "pcmcia",
@@ -1318,8 +1303,6 @@ static struct of_platform_driver m8xx_pcmcia_driver = {
 	.match_table = m8xx_pcmcia_match,
 	.probe = m8xx_probe,
 	.remove = m8xx_remove,
-	.suspend = m8xx_suspend,
-	.resume = m8xx_resume,
 };
 
 static int __init m8xx_init(void)
diff --git a/drivers/pcmcia/omap_cf.c b/drivers/pcmcia/omap_cf.c
index 3ef991552398..9edc396577b9 100644
--- a/drivers/pcmcia/omap_cf.c
+++ b/drivers/pcmcia/omap_cf.c
@@ -330,24 +330,12 @@ static int __exit omap_cf_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static int omap_cf_suspend(struct platform_device *pdev, pm_message_t mesg)
-{
-	return pcmcia_socket_dev_suspend(&pdev->dev);
-}
-
-static int omap_cf_resume(struct platform_device *pdev)
-{
-	return pcmcia_socket_dev_resume(&pdev->dev);
-}
-
 static struct platform_driver omap_cf_driver = {
 	.driver = {
 		.name	= (char *) driver_name,
 		.owner	= THIS_MODULE,
 	},
 	.remove		= __exit_p(omap_cf_remove),
-	.suspend	= omap_cf_suspend,
-	.resume		= omap_cf_resume,
 };
 
 static int __init omap_cf_init(void)
diff --git a/drivers/pcmcia/pd6729.c b/drivers/pcmcia/pd6729.c
index 47f342f1b0fc..4a34268cc512 100644
--- a/drivers/pcmcia/pd6729.c
+++ b/drivers/pcmcia/pd6729.c
@@ -764,18 +764,6 @@ static void __devexit pd6729_pci_remove(struct pci_dev *dev)
 	kfree(socket);
 }
 
-#ifdef CONFIG_PM
-static int pd6729_socket_suspend(struct pci_dev *dev, pm_message_t state)
-{
-	return pcmcia_socket_dev_suspend(&dev->dev);
-}
-
-static int pd6729_socket_resume(struct pci_dev *dev)
-{
-	return pcmcia_socket_dev_resume(&dev->dev);
-}
-#endif
-
 static struct pci_device_id pd6729_pci_ids[] = {
 	{
 		.vendor		= PCI_VENDOR_ID_CIRRUS,
@@ -792,10 +780,6 @@ static struct pci_driver pd6729_pci_driver = {
 	.id_table	= pd6729_pci_ids,
 	.probe		= pd6729_pci_probe,
 	.remove		= __devexit_p(pd6729_pci_remove),
-#ifdef CONFIG_PM
-	.suspend	= pd6729_socket_suspend,
-	.resume		= pd6729_socket_resume,
-#endif
 };
 
 static int pd6729_module_init(void)
diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c
index 76e640bccde8..0a876fabfe44 100644
--- a/drivers/pcmcia/pxa2xx_base.c
+++ b/drivers/pcmcia/pxa2xx_base.c
@@ -325,19 +325,13 @@ static int pxa2xx_drv_pcmcia_remove(struct platform_device *dev)
 	return 0;
 }
 
-static int pxa2xx_drv_pcmcia_suspend(struct device *dev)
-{
-	return pcmcia_socket_dev_suspend(dev);
-}
-
 static int pxa2xx_drv_pcmcia_resume(struct device *dev)
 {
 	pxa2xx_configure_sockets(dev);
-	return pcmcia_socket_dev_resume(dev);
+	return 0;
 }
 
 static const struct dev_pm_ops pxa2xx_drv_pcmcia_pm_ops = {
-	.suspend	= pxa2xx_drv_pcmcia_suspend,
 	.resume		= pxa2xx_drv_pcmcia_resume,
 };
 
diff --git a/drivers/pcmcia/sa1100_generic.c b/drivers/pcmcia/sa1100_generic.c
index 8db86b90c200..518896241429 100644
--- a/drivers/pcmcia/sa1100_generic.c
+++ b/drivers/pcmcia/sa1100_generic.c
@@ -95,17 +95,6 @@ static int sa11x0_drv_pcmcia_remove(struct platform_device *dev)
 	return 0;
 }
 
-static int sa11x0_drv_pcmcia_suspend(struct platform_device *dev,
-				     pm_message_t state)
-{
-	return pcmcia_socket_dev_suspend(&dev->dev);
-}
-
-static int sa11x0_drv_pcmcia_resume(struct platform_device *dev)
-{
-	return pcmcia_socket_dev_resume(&dev->dev);
-}
-
 static struct platform_driver sa11x0_pcmcia_driver = {
 	.driver = {
 		.name		= "sa11x0-pcmcia",
@@ -113,8 +102,6 @@ static struct platform_driver sa11x0_pcmcia_driver = {
 	},
 	.probe		= sa11x0_drv_pcmcia_probe,
 	.remove		= sa11x0_drv_pcmcia_remove,
-	.suspend 	= sa11x0_drv_pcmcia_suspend,
-	.resume 	= sa11x0_drv_pcmcia_resume,
 };
 
 /* sa11x0_pcmcia_init()
diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c
index db79ca61cf96..799e9793e49e 100644
--- a/drivers/pcmcia/sa1111_generic.c
+++ b/drivers/pcmcia/sa1111_generic.c
@@ -213,16 +213,6 @@ static int __devexit pcmcia_remove(struct sa1111_dev *dev)
 	return 0;
 }
 
-static int pcmcia_suspend(struct sa1111_dev *dev, pm_message_t state)
-{
-	return pcmcia_socket_dev_suspend(&dev->dev);
-}
-
-static int pcmcia_resume(struct sa1111_dev *dev)
-{
-	return pcmcia_socket_dev_resume(&dev->dev);
-}
-
 static struct sa1111_driver pcmcia_driver = {
 	.drv = {
 		.name	= "sa1111-pcmcia",
@@ -230,8 +220,6 @@ static struct sa1111_driver pcmcia_driver = {
 	.devid		= SA1111_DEVID_PCMCIA,
 	.probe		= pcmcia_probe,
 	.remove		= __devexit_p(pcmcia_remove),
-	.suspend	= pcmcia_suspend,
-	.resume		= pcmcia_resume,
 };
 
 static int __init sa1111_drv_pcmcia_init(void)
diff --git a/drivers/pcmcia/tcic.c b/drivers/pcmcia/tcic.c
index 12c49ee135e1..bac85f3236bb 100644
--- a/drivers/pcmcia/tcic.c
+++ b/drivers/pcmcia/tcic.c
@@ -348,16 +348,6 @@ static int __init get_tcic_id(void)
     return id;
 }
 
-static int tcic_drv_pcmcia_suspend(struct platform_device *dev,
-				     pm_message_t state)
-{
-	return pcmcia_socket_dev_suspend(&dev->dev);
-}
-
-static int tcic_drv_pcmcia_resume(struct platform_device *dev)
-{
-	return pcmcia_socket_dev_resume(&dev->dev);
-}
 /*====================================================================*/
 
 static struct platform_driver tcic_driver = {
@@ -365,8 +355,6 @@ static struct platform_driver tcic_driver = {
 		.name = "tcic-pcmcia",
 		.owner		= THIS_MODULE,
 	},
-	.suspend 	= tcic_drv_pcmcia_suspend,
-	.resume 	= tcic_drv_pcmcia_resume,
 };
 
 static struct platform_device tcic_device = {
diff --git a/drivers/pcmcia/vrc4171_card.c b/drivers/pcmcia/vrc4171_card.c
index aaccdb9f4ba1..86e4a1a3c642 100644
--- a/drivers/pcmcia/vrc4171_card.c
+++ b/drivers/pcmcia/vrc4171_card.c
@@ -705,24 +705,11 @@ static int __devinit vrc4171_card_setup(char *options)
 
 __setup("vrc4171_card=", vrc4171_card_setup);
 
-static int vrc4171_card_suspend(struct platform_device *dev,
-				     pm_message_t state)
-{
-	return pcmcia_socket_dev_suspend(&dev->dev);
-}
-
-static int vrc4171_card_resume(struct platform_device *dev)
-{
-	return pcmcia_socket_dev_resume(&dev->dev);
-}
-
 static struct platform_driver vrc4171_card_driver = {
 	.driver = {
 		.name		= vrc4171_card_name,
 		.owner		= THIS_MODULE,
 	},
-	.suspend	= vrc4171_card_suspend,
-	.resume		= vrc4171_card_resume,
 };
 
 static int __devinit vrc4171_card_init(void)
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index 418988ab6edf..f19ad02374d9 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -1290,12 +1290,9 @@ static int yenta_dev_suspend_noirq(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct yenta_socket *socket = pci_get_drvdata(pdev);
-	int ret;
-
-	ret = pcmcia_socket_dev_suspend(dev);
 
 	if (!socket)
-		return ret;
+		return 0;
 
 	if (socket->type && socket->type->save_state)
 		socket->type->save_state(socket);
@@ -1312,7 +1309,7 @@ static int yenta_dev_suspend_noirq(struct device *dev)
 	 */
 	/* pci_set_power_state(dev, 3); */
 
-	return ret;
+	return 0;
 }
 
 static int yenta_dev_resume_noirq(struct device *dev)
@@ -1336,26 +1333,16 @@ static int yenta_dev_resume_noirq(struct device *dev)
 	if (socket->type && socket->type->restore_state)
 		socket->type->restore_state(socket);
 
-	pcmcia_socket_dev_early_resume(dev);
-	return 0;
-}
-
-static int yenta_dev_resume(struct device *dev)
-{
-	pcmcia_socket_dev_late_resume(dev);
 	return 0;
 }
 
 static const struct dev_pm_ops yenta_pm_ops = {
 	.suspend_noirq = yenta_dev_suspend_noirq,
 	.resume_noirq = yenta_dev_resume_noirq,
-	.resume = yenta_dev_resume,
 	.freeze_noirq = yenta_dev_suspend_noirq,
 	.thaw_noirq = yenta_dev_resume_noirq,
-	.thaw = yenta_dev_resume,
 	.poweroff_noirq = yenta_dev_suspend_noirq,
 	.restore_noirq = yenta_dev_resume_noirq,
-	.restore = yenta_dev_resume,
 };
 
 #define YENTA_PM_OPS	(&yenta_pm_ops)
diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h
index 32896a773910..2e488b60bc76 100644
--- a/include/pcmcia/ss.h
+++ b/include/pcmcia/ss.h
@@ -277,12 +277,6 @@ extern struct pccard_resource_ops pccard_nonstatic_ops;
 #endif
 
 
-/* socket drivers are expected to use these callbacks in their .drv struct */
-extern int pcmcia_socket_dev_suspend(struct device *dev);
-extern void pcmcia_socket_dev_early_resume(struct device *dev);
-extern void pcmcia_socket_dev_late_resume(struct device *dev);
-extern int pcmcia_socket_dev_resume(struct device *dev);
-
 /* socket drivers use this callback in their IRQ handler */
 extern void pcmcia_parse_events(struct pcmcia_socket *socket,
 				unsigned int events);
-- 
cgit v1.2.3


From 222e82ac9ffbd3b80ab1b0b1d2c8c60ddb47d69d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Wed, 24 Mar 2010 14:38:37 +0100
Subject: acpi: Support IBM SMBus CMI devices

On some old IBM workstations and desktop computers, the BIOS presents in the
DSDT an SMBus object that is missing the HID identifier that the i2c-scmi
driver looks for.  Modify the ACPI device scan code to insert the missing HID
if it finds an IBM system with such an object.

Affected machines: IntelliStation Z20/Z30.  Note that the i2c-i801 driver no
longer works on these machines because of ACPI resource conflicts.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 drivers/acpi/scan.c         | 38 ++++++++++++++++++++++++++++++++++++++
 include/acpi/acpi_drivers.h |  2 ++
 2 files changed, 40 insertions(+)

(limited to 'include')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index fb7fc24fe727..189cbc2585fa 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -8,6 +8,7 @@
 #include <linux/acpi.h>
 #include <linux/signal.h>
 #include <linux/kthread.h>
+#include <linux/dmi.h>
 
 #include <acpi/acpi_drivers.h>
 
@@ -1032,6 +1033,41 @@ static void acpi_add_id(struct acpi_device *device, const char *dev_id)
 	list_add_tail(&id->list, &device->pnp.ids);
 }
 
+/*
+ * Old IBM workstations have a DSDT bug wherein the SMBus object
+ * lacks the SMBUS01 HID and the methods do not have the necessary "_"
+ * prefix.  Work around this.
+ */
+static int acpi_ibm_smbus_match(struct acpi_device *device)
+{
+	acpi_handle h_dummy;
+	struct acpi_buffer path = {ACPI_ALLOCATE_BUFFER, NULL};
+	int result;
+
+	if (!dmi_name_in_vendors("IBM"))
+		return -ENODEV;
+
+	/* Look for SMBS object */
+	result = acpi_get_name(device->handle, ACPI_SINGLE_NAME, &path);
+	if (result)
+		return result;
+
+	if (strcmp("SMBS", path.pointer)) {
+		result = -ENODEV;
+		goto out;
+	}
+
+	/* Does it have the necessary (but misnamed) methods? */
+	result = -ENODEV;
+	if (ACPI_SUCCESS(acpi_get_handle(device->handle, "SBI", &h_dummy)) &&
+	    ACPI_SUCCESS(acpi_get_handle(device->handle, "SBR", &h_dummy)) &&
+	    ACPI_SUCCESS(acpi_get_handle(device->handle, "SBW", &h_dummy)))
+		result = 0;
+out:
+	kfree(path.pointer);
+	return result;
+}
+
 static void acpi_device_set_id(struct acpi_device *device)
 {
 	acpi_status status;
@@ -1082,6 +1118,8 @@ static void acpi_device_set_id(struct acpi_device *device)
 			acpi_add_id(device, ACPI_BAY_HID);
 		else if (ACPI_SUCCESS(acpi_dock_match(device)))
 			acpi_add_id(device, ACPI_DOCK_HID);
+		else if (!acpi_ibm_smbus_match(device))
+			acpi_add_id(device, ACPI_SMBUS_IBM_HID);
 
 		break;
 	case ACPI_BUS_TYPE_POWER:
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 3a4767c01c5f..4f7b44866b76 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -65,6 +65,8 @@
 #define ACPI_VIDEO_HID			"LNXVIDEO"
 #define ACPI_BAY_HID			"LNXIOBAY"
 #define ACPI_DOCK_HID			"LNXDOCK"
+/* Quirk for broken IBM BIOSes */
+#define ACPI_SMBUS_IBM_HID		"SMBUSIBM"
 
 /*
  * For fixed hardware buttons, we fabricate acpi_devices with HID
-- 
cgit v1.2.3


From 03e6d819c2cb2cc8ce5642669a0a7c72336ee7a2 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Tue, 23 Mar 2010 20:40:50 +0000
Subject: skbuff: remove unused dma_head & dma_maps fields

The dma map fields in the skb_shared_info structure no longer has any users
and can be dropped since it is making the skb_shared_info unecessarily larger.

Running slabtop show that we were using 4K slabs for the skb->head on x86_64 w/
an allocation size of 1522.  It turns out that the dma_head and dma_maps array
made skb_shared large enough that we had crossed over the 2k boundary with
standard frames and as such we were using 4k blocks of memory for all skbs.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 03f816a9b659..124f90cd5a38 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -190,9 +190,6 @@ struct skb_shared_info {
 	atomic_t	dataref;
 	unsigned short	nr_frags;
 	unsigned short	gso_size;
-#ifdef CONFIG_HAS_DMA
-	dma_addr_t	dma_head;
-#endif
 	/* Warning: this field is not always filled in (UFO)! */
 	unsigned short	gso_segs;
 	unsigned short  gso_type;
@@ -201,9 +198,6 @@ struct skb_shared_info {
 	struct sk_buff	*frag_list;
 	struct skb_shared_hwtstamps hwtstamps;
 	skb_frag_t	frags[MAX_SKB_FRAGS];
-#ifdef CONFIG_HAS_DMA
-	dma_addr_t	dma_maps[MAX_SKB_FRAGS];
-#endif
 	/* Intermediate layers must ensure that destructor_arg
 	 * remains valid until skb destructor */
 	void *		destructor_arg;
-- 
cgit v1.2.3


From 4c87684d32e8f95715d53039dcd2d998dc63d1eb Mon Sep 17 00:00:00 2001
From: David Härdeman <david@hardeman.nu>
Date: Tue, 23 Mar 2010 13:35:22 -0700
Subject: kfifo: fix KFIFO_INIT in include/linux/kfifo.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

include/linux/kfifo.h first defines and then undefines __kfifo_initializer
which is used by INIT_KFIFO (which is also a macro, so building a module
which uses INIT_KFIFO will fail).

Signed-off-by: David Härdeman <david@hardeman.nu>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index bc0fc795bd35..ece0b1c33816 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -102,8 +102,6 @@ union { \
 	unsigned char name##kfifo_buffer[size]; \
 	struct kfifo name = __kfifo_initializer(size, name##kfifo_buffer)
 
-#undef __kfifo_initializer
-
 extern void kfifo_init(struct kfifo *fifo, void *buffer,
 			unsigned int size);
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
-- 
cgit v1.2.3


From 6cb4aff0a77cc0e6bae9475d62205319e3ebbf3f Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Tue, 23 Mar 2010 13:35:38 -0700
Subject: reiserfs: fix oops while creating privroot with selinux enabled

Commit 57fe60df ("reiserfs: add atomic addition of selinux attributes
during inode creation") contains a bug that will cause it to oops when
mounting a file system that didn't previously contain extended attributes
on a system using security.* xattrs.

The issue is that while creating the privroot during mount
reiserfs_security_init calls reiserfs_xattr_jcreate_nblocks which
dereferences the xattr root.  The xattr root doesn't exist, so we get an
oops.

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=15309

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/xattr_security.c   | 2 +-
 include/linux/reiserfs_xattr.h | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index d8b5bfcbdd30..de1fcffd906b 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -76,7 +76,7 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode,
 		return error;
 	}
 
-	if (sec->length) {
+	if (sec->length && reiserfs_xattrs_initialized(inode->i_sb)) {
 		blocks = reiserfs_xattr_jcreate_nblocks(inode) +
 			 reiserfs_xattr_nblocks(inode, sec->length);
 		/* We don't want to count the directories twice if we have
diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h
index 99928dce37ea..7fa02b4af838 100644
--- a/include/linux/reiserfs_xattr.h
+++ b/include/linux/reiserfs_xattr.h
@@ -70,6 +70,11 @@ int reiserfs_security_write(struct reiserfs_transaction_handle *th,
 void reiserfs_security_free(struct reiserfs_security_handle *sec);
 #endif
 
+static inline int reiserfs_xattrs_initialized(struct super_block *sb)
+{
+	return REISERFS_SB(sb)->priv_root != NULL;
+}
+
 #define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header))
 static inline loff_t reiserfs_xattr_nblocks(struct inode *inode, loff_t size)
 {
-- 
cgit v1.2.3


From 7198f3c9b13c7aa1e5d9f7ff74c0ea303174feff Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 23 Mar 2010 13:35:40 -0700
Subject: mmc: fix incorrect interpretation of card type bits

In the extended CSD register the CARD_TYPE is an 8-bit value of which the
upper 6 bits were reserved in JEDEC specifications prior to version 4.4.
In version 4.4 two of the reserved bits were designated for identifying
support for the newly added High-Speed Dual Data Rate.  Unfortunately the
mmc_read_ext_csd() function required that the reserved bits be zero
instead of ignoring them as it should.

This patch makes mmc_read_ext_csd() ignore the CARD_TYPE bits that are
reserved or not yet supported.  It also stops the function jumping to the
end as though an error occurred, when it is only warns that the CARD_TYPE
bits (that it does interpret) are invalid.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/mmc.c  | 3 +--
 include/linux/mmc/mmc.h | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 0eac6c814904..e041c003db22 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -225,7 +225,7 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 			mmc_card_set_blockaddr(card);
 	}
 
-	switch (ext_csd[EXT_CSD_CARD_TYPE]) {
+	switch (ext_csd[EXT_CSD_CARD_TYPE] & EXT_CSD_CARD_TYPE_MASK) {
 	case EXT_CSD_CARD_TYPE_52 | EXT_CSD_CARD_TYPE_26:
 		card->ext_csd.hs_max_dtr = 52000000;
 		break;
@@ -237,7 +237,6 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 		printk(KERN_WARNING "%s: card is mmc v4 but doesn't "
 			"support any high-speed modes.\n",
 			mmc_hostname(card->host));
-		goto out;
 	}
 
 	if (card->ext_csd.rev >= 3) {
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index c02c8db73701..8a49cbf0376d 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -268,6 +268,7 @@ struct _mmc_csd {
 
 #define EXT_CSD_CARD_TYPE_26	(1<<0)	/* Card can run at 26MHz */
 #define EXT_CSD_CARD_TYPE_52	(1<<1)	/* Card can run at 52MHz */
+#define EXT_CSD_CARD_TYPE_MASK	0x3	/* Mask out reserved and DDR bits */
 
 #define EXT_CSD_BUS_WIDTH_1	0	/* Card is in 1 bit mode */
 #define EXT_CSD_BUS_WIDTH_4	1	/* Card is in 4 bit mode */
-- 
cgit v1.2.3


From 90fddabf5818367c6bd1fe1b256a10e01827862f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 24 Mar 2010 09:43:00 +0000
Subject: Document Linux's circular buffering capabilities

Document the circular buffering capabilities available in Linux.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Randy Dunlap <rdunlap@xenotime.net>
Reviewed-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/circular-buffers.txt | 234 +++++++++++++++++++++++++++++++++++++
 Documentation/memory-barriers.txt  |  20 ++++
 include/linux/circ_buf.h           |   4 +
 3 files changed, 258 insertions(+)
 create mode 100644 Documentation/circular-buffers.txt

(limited to 'include')

diff --git a/Documentation/circular-buffers.txt b/Documentation/circular-buffers.txt
new file mode 100644
index 000000000000..8117e5bf6065
--- /dev/null
+++ b/Documentation/circular-buffers.txt
@@ -0,0 +1,234 @@
+			       ================
+			       CIRCULAR BUFFERS
+			       ================
+
+By: David Howells <dhowells@redhat.com>
+    Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+
+Linux provides a number of features that can be used to implement circular
+buffering.  There are two sets of such features:
+
+ (1) Convenience functions for determining information about power-of-2 sized
+     buffers.
+
+ (2) Memory barriers for when the producer and the consumer of objects in the
+     buffer don't want to share a lock.
+
+To use these facilities, as discussed below, there needs to be just one
+producer and just one consumer.  It is possible to handle multiple producers by
+serialising them, and to handle multiple consumers by serialising them.
+
+
+Contents:
+
+ (*) What is a circular buffer?
+
+ (*) Measuring power-of-2 buffers.
+
+ (*) Using memory barriers with circular buffers.
+     - The producer.
+     - The consumer.
+
+
+==========================
+WHAT IS A CIRCULAR BUFFER?
+==========================
+
+First of all, what is a circular buffer?  A circular buffer is a buffer of
+fixed, finite size into which there are two indices:
+
+ (1) A 'head' index - the point at which the producer inserts items into the
+     buffer.
+
+ (2) A 'tail' index - the point at which the consumer finds the next item in
+     the buffer.
+
+Typically when the tail pointer is equal to the head pointer, the buffer is
+empty; and the buffer is full when the head pointer is one less than the tail
+pointer.
+
+The head index is incremented when items are added, and the tail index when
+items are removed.  The tail index should never jump the head index, and both
+indices should be wrapped to 0 when they reach the end of the buffer, thus
+allowing an infinite amount of data to flow through the buffer.
+
+Typically, items will all be of the same unit size, but this isn't strictly
+required to use the techniques below.  The indices can be increased by more
+than 1 if multiple items or variable-sized items are to be included in the
+buffer, provided that neither index overtakes the other.  The implementer must
+be careful, however, as a region more than one unit in size may wrap the end of
+the buffer and be broken into two segments.
+
+
+============================
+MEASURING POWER-OF-2 BUFFERS
+============================
+
+Calculation of the occupancy or the remaining capacity of an arbitrarily sized
+circular buffer would normally be a slow operation, requiring the use of a
+modulus (divide) instruction.  However, if the buffer is of a power-of-2 size,
+then a much quicker bitwise-AND instruction can be used instead.
+
+Linux provides a set of macros for handling power-of-2 circular buffers.  These
+can be made use of by:
+
+	#include <linux/circ_buf.h>
+
+The macros are:
+
+ (*) Measure the remaining capacity of a buffer:
+
+	CIRC_SPACE(head_index, tail_index, buffer_size);
+
+     This returns the amount of space left in the buffer[1] into which items
+     can be inserted.
+
+
+ (*) Measure the maximum consecutive immediate space in a buffer:
+
+	CIRC_SPACE_TO_END(head_index, tail_index, buffer_size);
+
+     This returns the amount of consecutive space left in the buffer[1] into
+     which items can be immediately inserted without having to wrap back to the
+     beginning of the buffer.
+
+
+ (*) Measure the occupancy of a buffer:
+
+	CIRC_CNT(head_index, tail_index, buffer_size);
+
+     This returns the number of items currently occupying a buffer[2].
+
+
+ (*) Measure the non-wrapping occupancy of a buffer:
+
+	CIRC_CNT_TO_END(head_index, tail_index, buffer_size);
+
+     This returns the number of consecutive items[2] that can be extracted from
+     the buffer without having to wrap back to the beginning of the buffer.
+
+
+Each of these macros will nominally return a value between 0 and buffer_size-1,
+however:
+
+ [1] CIRC_SPACE*() are intended to be used in the producer.  To the producer
+     they will return a lower bound as the producer controls the head index,
+     but the consumer may still be depleting the buffer on another CPU and
+     moving the tail index.
+
+     To the consumer it will show an upper bound as the producer may be busy
+     depleting the space.
+
+ [2] CIRC_CNT*() are intended to be used in the consumer.  To the consumer they
+     will return a lower bound as the consumer controls the tail index, but the
+     producer may still be filling the buffer on another CPU and moving the
+     head index.
+
+     To the producer it will show an upper bound as the consumer may be busy
+     emptying the buffer.
+
+ [3] To a third party, the order in which the writes to the indices by the
+     producer and consumer become visible cannot be guaranteed as they are
+     independent and may be made on different CPUs - so the result in such a
+     situation will merely be a guess, and may even be negative.
+
+
+===========================================
+USING MEMORY BARRIERS WITH CIRCULAR BUFFERS
+===========================================
+
+By using memory barriers in conjunction with circular buffers, you can avoid
+the need to:
+
+ (1) use a single lock to govern access to both ends of the buffer, thus
+     allowing the buffer to be filled and emptied at the same time; and
+
+ (2) use atomic counter operations.
+
+There are two sides to this: the producer that fills the buffer, and the
+consumer that empties it.  Only one thing should be filling a buffer at any one
+time, and only one thing should be emptying a buffer at any one time, but the
+two sides can operate simultaneously.
+
+
+THE PRODUCER
+------------
+
+The producer will look something like this:
+
+	spin_lock(&producer_lock);
+
+	unsigned long head = buffer->head;
+	unsigned long tail = ACCESS_ONCE(buffer->tail);
+
+	if (CIRC_SPACE(head, tail, buffer->size) >= 1) {
+		/* insert one item into the buffer */
+		struct item *item = buffer[head];
+
+		produce_item(item);
+
+		smp_wmb(); /* commit the item before incrementing the head */
+
+		buffer->head = (head + 1) & (buffer->size - 1);
+
+		/* wake_up() will make sure that the head is committed before
+		 * waking anyone up */
+		wake_up(consumer);
+	}
+
+	spin_unlock(&producer_lock);
+
+This will instruct the CPU that the contents of the new item must be written
+before the head index makes it available to the consumer and then instructs the
+CPU that the revised head index must be written before the consumer is woken.
+
+Note that wake_up() doesn't have to be the exact mechanism used, but whatever
+is used must guarantee a (write) memory barrier between the update of the head
+index and the change of state of the consumer, if a change of state occurs.
+
+
+THE CONSUMER
+------------
+
+The consumer will look something like this:
+
+	spin_lock(&consumer_lock);
+
+	unsigned long head = ACCESS_ONCE(buffer->head);
+	unsigned long tail = buffer->tail;
+
+	if (CIRC_CNT(head, tail, buffer->size) >= 1) {
+		/* read index before reading contents at that index */
+		smp_read_barrier_depends();
+
+		/* extract one item from the buffer */
+		struct item *item = buffer[tail];
+
+		consume_item(item);
+
+		smp_mb(); /* finish reading descriptor before incrementing tail */
+
+		buffer->tail = (tail + 1) & (buffer->size - 1);
+	}
+
+	spin_unlock(&consumer_lock);
+
+This will instruct the CPU to make sure the index is up to date before reading
+the new item, and then it shall make sure the CPU has finished reading the item
+before it writes the new tail pointer, which will erase the item.
+
+
+Note the use of ACCESS_ONCE() in both algorithms to read the opposition index.
+This prevents the compiler from discarding and reloading its cached value -
+which some compilers will do across smp_read_barrier_depends().  This isn't
+strictly needed if you can be sure that the opposition index will _only_ be
+used the once.
+
+
+===============
+FURTHER READING
+===============
+
+See also Documentation/memory-barriers.txt for a description of Linux's memory
+barrier facilities.
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 7f5809eddee6..631ad2f1b229 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -3,6 +3,7 @@
 			 ============================
 
 By: David Howells <dhowells@redhat.com>
+    Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 
 Contents:
 
@@ -60,6 +61,10 @@ Contents:
 
      - And then there's the Alpha.
 
+ (*) Example uses.
+
+     - Circular buffers.
+
  (*) References.
 
 
@@ -2226,6 +2231,21 @@ The Alpha defines the Linux kernel's memory barrier model.
 See the subsection on "Cache Coherency" above.
 
 
+============
+EXAMPLE USES
+============
+
+CIRCULAR BUFFERS
+----------------
+
+Memory barriers can be used to implement circular buffering without the need
+of a lock to serialise the producer with the consumer.  See:
+
+	Documentation/circular-buffers.txt
+
+for details.
+
+
 ==========
 REFERENCES
 ==========
diff --git a/include/linux/circ_buf.h b/include/linux/circ_buf.h
index a2ed0591fb19..90f2471dc6f2 100644
--- a/include/linux/circ_buf.h
+++ b/include/linux/circ_buf.h
@@ -1,3 +1,7 @@
+/*
+ * See Documentation/circular-buffers.txt for more information.
+ */
+
 #ifndef _LINUX_CIRC_BUF_H
 #define _LINUX_CIRC_BUF_H 1
 
-- 
cgit v1.2.3


From 9c13886665c43600bd0af4b38e33c654e648e078 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Thu, 25 Mar 2010 11:17:26 +0100
Subject: netfilter: ip6table_raw: fix table priority

The order of the IPv6 raw table is currently reversed, that makes impossible
to use the NOTRACK target in IPv6: for example if someone enters

ip6tables -t raw -A PREROUTING -p tcp --dport 80 -j NOTRACK

and if we receive fragmented packets then the first fragment will be
untracked and thus skip nf_ct_frag6_gather (and conntrack), while all
subsequent fragments enter nf_ct_frag6_gather and reassembly will never
successfully be finished.

Singed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_ipv6.h    | 1 +
 net/ipv6/netfilter/ip6table_raw.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index d654873aa25a..1f7e300094cd 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -59,6 +59,7 @@
 enum nf_ip6_hook_priorities {
 	NF_IP6_PRI_FIRST = INT_MIN,
 	NF_IP6_PRI_CONNTRACK_DEFRAG = -400,
+	NF_IP6_PRI_RAW = -300,
 	NF_IP6_PRI_SELINUX_FIRST = -225,
 	NF_IP6_PRI_CONNTRACK = -200,
 	NF_IP6_PRI_MANGLE = -150,
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index aef31a29de9e..b9cf7cd61923 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -13,7 +13,7 @@ static const struct xt_table packet_raw = {
 	.valid_hooks = RAW_VALID_HOOKS,
 	.me = THIS_MODULE,
 	.af = NFPROTO_IPV6,
-	.priority = NF_IP6_PRI_FIRST,
+	.priority = NF_IP6_PRI_RAW,
 };
 
 /* The work comes in here from netfilter.c. */
-- 
cgit v1.2.3


From 5a7aadfe2fcb0f69e2acc1fbefe22a096e792fc9 Mon Sep 17 00:00:00 2001
From: Matt Helsley <matthltc@us.ibm.com>
Date: Fri, 26 Mar 2010 23:51:44 +0100
Subject: Freezer: Fix buggy resume test for tasks frozen with cgroup freezer

When the cgroup freezer is used to freeze tasks we do not want to thaw
those tasks during resume. Currently we test the cgroup freezer
state of the resuming tasks to see if the cgroup is FROZEN.  If so
then we don't thaw the task. However, the FREEZING state also indicates
that the task should remain frozen.

This also avoids a problem pointed out by Oren Ladaan: the freezer state
transition from FREEZING to FROZEN is updated lazily when userspace reads
or writes the freezer.state file in the cgroup filesystem. This means that
resume will thaw tasks in cgroups which should be in the FROZEN state if
there is no read/write of the freezer.state file to trigger this
transition before suspend.

NOTE: Another "simple" solution would be to always update the cgroup
freezer state during resume. However it's a bad choice for several reasons:
Updating the cgroup freezer state is somewhat expensive because it requires
walking all the tasks in the cgroup and checking if they are each frozen.
Worse, this could easily make resume run in N^2 time where N is the number
of tasks in the cgroup. Finally, updating the freezer state from this code
path requires trickier locking because of the way locks must be ordered.

Instead of updating the freezer state we rely on the fact that lazy
updates only manage the transition from FREEZING to FROZEN. We know that
a cgroup with the FREEZING state may actually be FROZEN so test for that
state too. This makes sense in the resume path even for partially-frozen
cgroups -- those that really are FREEZING but not FROZEN.

Reported-by: Oren Ladaan <orenl@cs.columbia.edu>
Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
Cc: stable@kernel.org
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/freezer.h | 7 +++++--
 kernel/cgroup_freezer.c | 9 ++++++---
 kernel/power/process.c  | 2 +-
 3 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 5a361f85cfec..da7e52b099f3 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -64,9 +64,12 @@ extern bool freeze_task(struct task_struct *p, bool sig_only);
 extern void cancel_freezing(struct task_struct *p);
 
 #ifdef CONFIG_CGROUP_FREEZER
-extern int cgroup_frozen(struct task_struct *task);
+extern int cgroup_freezing_or_frozen(struct task_struct *task);
 #else /* !CONFIG_CGROUP_FREEZER */
-static inline int cgroup_frozen(struct task_struct *task) { return 0; }
+static inline int cgroup_freezing_or_frozen(struct task_struct *task)
+{
+	return 0;
+}
 #endif /* !CONFIG_CGROUP_FREEZER */
 
 /*
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 59e9ef6aab40..eb3f34d57419 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -47,17 +47,20 @@ static inline struct freezer *task_freezer(struct task_struct *task)
 			    struct freezer, css);
 }
 
-int cgroup_frozen(struct task_struct *task)
+int cgroup_freezing_or_frozen(struct task_struct *task)
 {
 	struct freezer *freezer;
 	enum freezer_state state;
 
 	task_lock(task);
 	freezer = task_freezer(task);
-	state = freezer->state;
+	if (!freezer->css.cgroup->parent)
+		state = CGROUP_THAWED; /* root cgroup can't be frozen */
+	else
+		state = freezer->state;
 	task_unlock(task);
 
-	return state == CGROUP_FROZEN;
+	return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN);
 }
 
 /*
diff --git a/kernel/power/process.c b/kernel/power/process.c
index a0480cd4daaf..71ae29052ab6 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -144,7 +144,7 @@ static void thaw_tasks(bool nosig_only)
 		if (nosig_only && should_send_signal(p))
 			continue;
 
-		if (cgroup_frozen(p))
+		if (cgroup_freezing_or_frozen(p))
 			continue;
 
 		thaw_process(p);
-- 
cgit v1.2.3


From 71c5c1595c04852d6fbf3c4882b47b30b61a4d32 Mon Sep 17 00:00:00 2001
From: Brandon L Black <blblack@gmail.com>
Date: Fri, 26 Mar 2010 16:18:03 +0000
Subject: net: Add MSG_WAITFORONE flag to recvmmsg

Add new flag MSG_WAITFORONE for the recvmmsg() syscall.
When this flag is specified for a blocking socket, recvmmsg()
will only block until at least 1 packet is available.  The
default behavior is to block until all vlen packets are
available.  This flag has no effect on non-blocking sockets
or when used in combination with MSG_DONTWAIT.

Signed-off-by: Brandon L Black <blblack@gmail.com>
Acked-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 1 +
 net/socket.c           | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 7b3aae2052a6..354cc5617f8b 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -255,6 +255,7 @@ struct ucred {
 #define MSG_ERRQUEUE	0x2000	/* Fetch message from error queue */
 #define MSG_NOSIGNAL	0x4000	/* Do not generate SIGPIPE */
 #define MSG_MORE	0x8000	/* Sender will send more */
+#define MSG_WAITFORONE	0x10000	/* recvmmsg(): block until 1+ packets avail */
 
 #define MSG_EOF         MSG_FIN
 
diff --git a/net/socket.c b/net/socket.c
index 769c386bd428..f55ffe9f8c87 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2135,6 +2135,10 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 			break;
 		++datagrams;
 
+		/* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
+		if (flags & MSG_WAITFORONE)
+			flags |= MSG_DONTWAIT;
+
 		if (timeout) {
 			ktime_get_ts(timeout);
 			*timeout = timespec_sub(end_time, *timeout);
-- 
cgit v1.2.3


From a53f4f9efaeb1d87cfae066346979d4d70e1abe9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 29 Mar 2010 13:08:52 +0100
Subject: SLOW_WORK: CONFIG_SLOW_WORK_PROC should be CONFIG_SLOW_WORK_DEBUG

CONFIG_SLOW_WORK_PROC was changed to CONFIG_SLOW_WORK_DEBUG, but not in all
instances.  Change the remaining instances.  This makes the debugfs file
display the time mark and the owner's description again.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fscache/object.c           | 6 +++---
 fs/fscache/operation.c        | 4 ++--
 include/linux/fscache-cache.h | 2 +-
 kernel/slow-work.h            | 8 ++++----
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index e513ac599c8e..0b589a9b4ffc 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -53,7 +53,7 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = {
 static void fscache_object_slow_work_put_ref(struct slow_work *);
 static int  fscache_object_slow_work_get_ref(struct slow_work *);
 static void fscache_object_slow_work_execute(struct slow_work *);
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *);
 #endif
 static void fscache_initialise_object(struct fscache_object *);
@@ -69,7 +69,7 @@ const struct slow_work_ops fscache_object_slow_work_ops = {
 	.get_ref	= fscache_object_slow_work_get_ref,
 	.put_ref	= fscache_object_slow_work_put_ref,
 	.execute	= fscache_object_slow_work_execute,
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	.desc		= fscache_object_slow_work_desc,
 #endif
 };
@@ -364,7 +364,7 @@ static void fscache_object_slow_work_execute(struct slow_work *work)
 /*
  * describe an object for slow-work debugging
  */
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 static void fscache_object_slow_work_desc(struct slow_work *work,
 					  struct seq_file *m)
 {
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 313e79a14266..9f6c928d4586 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -500,7 +500,7 @@ static void fscache_op_execute(struct slow_work *work)
 /*
  * describe an operation for slow-work debugging
  */
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 static void fscache_op_desc(struct slow_work *work, struct seq_file *m)
 {
 	struct fscache_operation *op =
@@ -517,7 +517,7 @@ const struct slow_work_ops fscache_op_slow_work_ops = {
 	.get_ref	= fscache_op_get_ref,
 	.put_ref	= fscache_op_put_ref,
 	.execute	= fscache_op_execute,
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	.desc		= fscache_op_desc,
 #endif
 };
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 7be0c6fbe880..c57db27ac861 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -105,7 +105,7 @@ struct fscache_operation {
 	/* operation releaser */
 	fscache_operation_release_t release;
 
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	const char *name;		/* operation name */
 	const char *state;		/* operation state */
 #define fscache_set_op_name(OP, N)	do { (OP)->name  = (N); } while(0)
diff --git a/kernel/slow-work.h b/kernel/slow-work.h
index 321f3c59d732..a29ebd1ef41d 100644
--- a/kernel/slow-work.h
+++ b/kernel/slow-work.h
@@ -43,28 +43,28 @@ extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *);
  */
 static inline void slow_work_set_thread_pid(int id, pid_t pid)
 {
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	slow_work_pids[id] = pid;
 #endif
 }
 
 static inline void slow_work_mark_time(struct slow_work *work)
 {
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	work->mark = CURRENT_TIME;
 #endif
 }
 
 static inline void slow_work_begin_exec(int id, struct slow_work *work)
 {
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	slow_work_execs[id] = work;
 #endif
 }
 
 static inline void slow_work_end_exec(int id, struct slow_work *work)
 {
-#ifdef CONFIG_SLOW_WORK_PROC
+#ifdef CONFIG_SLOW_WORK_DEBUG
 	write_lock(&slow_work_execs_lock);
 	slow_work_execs[id] = NULL;
 	write_unlock(&slow_work_execs_lock);
-- 
cgit v1.2.3


From c36207a4624f15020f2918324405c1c88a5d4cbc Mon Sep 17 00:00:00 2001
From: viresh kumar <viresh.kumar@st.com>
Date: Mon, 29 Mar 2010 05:28:32 +0100
Subject: ARM: 5999/1: Including device.h and resource.h header files in
 linux/amba/bus.h

linux/amba/bus.h have dependencies on linux/device.h and linux/resource.h, but
it doesn't include them. We get compilation errors in our files which include
bus.h but doesn't include device.h and resource.h. This patch includes device.h
and resource.h in linux/amba/bus.h file.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Acked-by: Linux Walleij <linux.ml.walleij@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/bus.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 6816be6c3f77..8b1038607831 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -14,6 +14,9 @@
 #ifndef ASMARM_AMBA_H
 #define ASMARM_AMBA_H
 
+#include <linux/device.h>
+#include <linux/resource.h>
+
 #define AMBA_NR_IRQS	2
 
 struct amba_device {
-- 
cgit v1.2.3


From 367d6acceaacff1adc44f121543effb9c060e575 Mon Sep 17 00:00:00 2001
From: viresh kumar <viresh.kumar@st.com>
Date: Mon, 29 Mar 2010 05:29:56 +0100
Subject: ARM: 6003/1: removing compilation warning from pl061.h

pl061.h is using u8 type. including <linux/types.h> in pl061.h to avoid
warning.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Acked-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/pl061.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/amba/pl061.h b/include/linux/amba/pl061.h
index b4fbd9862606..5ddd9ad4b19c 100644
--- a/include/linux/amba/pl061.h
+++ b/include/linux/amba/pl061.h
@@ -1,3 +1,5 @@
+#include <linux/types.h>
+
 /* platform data for the PL061 GPIO driver */
 
 struct pl061_platform_data {
-- 
cgit v1.2.3


From de329820e920cd9cfbc2127cad26a37026260cce Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 29 Mar 2010 14:30:19 -0700
Subject: ext3: fix broken handling of EXT3_STATE_NEW

In commit 9df93939b735 ("ext3: Use bitops to read/modify
EXT3_I(inode)->i_state") ext3 changed its internal 'i_state' variable to
use bitops for its state handling.  However, unline the same ext4
change, it didn't actually change the name of the field when it changed
the semantics of it.

As a result, an old use of 'i_state' remained in fs/ext3/ialloc.c that
initialized the field to EXT3_STATE_NEW.  And that does not work
_at_all_ when we're now working with individually named bits rather than
values that get masked.  So the code tried to mark the state to be new,
but in actual fact set the field to EXT3_STATE_JDATA.  Which makes no
sense at all, and screws up all the code that checks whether the inode
was newly allocated.

In particular, it made the xattr code unhappy, and caused various random
behavior, like apparently

	https://bugzilla.redhat.com/show_bug.cgi?id=577911

So fix the initialization, and rename the field to match ext4 so that we
don't have this happen again.

Cc: James Morris <jmorris@namei.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Daniel J Walsh <dwalsh@redhat.com>
Cc: Eric Paris <eparis@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/ialloc.c          | 4 +++-
 fs/ext3/inode.c           | 2 +-
 include/linux/ext3_fs.h   | 6 +++---
 include/linux/ext3_fs_i.h | 2 +-
 4 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index ef9008b885b5..0d0e97ed3ff6 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -582,7 +582,9 @@ got:
 	inode->i_generation = sbi->s_next_generation++;
 	spin_unlock(&sbi->s_next_gen_lock);
 
-	ei->i_state = EXT3_STATE_NEW;
+	ei->i_state_flags = 0;
+	ext3_set_inode_state(inode, EXT3_STATE_NEW);
+
 	ei->i_extra_isize =
 		(EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
 		sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 7f920b7263a4..ea33bdf0a300 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2811,7 +2811,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
 	inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
 	inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
 
-	ei->i_state = 0;
+	ei->i_state_flags = 0;
 	ei->i_dir_start_lookup = 0;
 	ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
 	/* We now have enough fields to check if the inode was active or not.
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index cac84b006667..5f494b465097 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -565,17 +565,17 @@ enum {
 
 static inline int ext3_test_inode_state(struct inode *inode, int bit)
 {
-	return test_bit(bit, &EXT3_I(inode)->i_state);
+	return test_bit(bit, &EXT3_I(inode)->i_state_flags);
 }
 
 static inline void ext3_set_inode_state(struct inode *inode, int bit)
 {
-	set_bit(bit, &EXT3_I(inode)->i_state);
+	set_bit(bit, &EXT3_I(inode)->i_state_flags);
 }
 
 static inline void ext3_clear_inode_state(struct inode *inode, int bit)
 {
-	clear_bit(bit, &EXT3_I(inode)->i_state);
+	clear_bit(bit, &EXT3_I(inode)->i_state_flags);
 }
 #else
 /* Assume that user mode programs are passing in an ext3fs superblock, not
diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h
index 7679acdb519a..f42c098aed8d 100644
--- a/include/linux/ext3_fs_i.h
+++ b/include/linux/ext3_fs_i.h
@@ -87,7 +87,7 @@ struct ext3_inode_info {
 	 * near to their parent directory's inode.
 	 */
 	__u32	i_block_group;
-	unsigned long	i_state;	/* Dynamic state flags for ext3 */
+	unsigned long	i_state_flags;	/* Dynamic state flags for ext3 */
 
 	/* block reservation info */
 	struct ext3_block_alloc_info *i_block_alloc_info;
-- 
cgit v1.2.3