From bd8fbdee6562ee526f3c2582a3b373ef195015dd Mon Sep 17 00:00:00 2001
From: Rui Sousa <rui.p.m.sousa@gmail.com>
Date: Wed, 3 Sep 2008 17:53:07 +0200
Subject: lockdep: fix compilation when CONFIG_TRACE_IRQFLAGS_SUPPORT is not
 set

This patch fixes compilation if CONFIG_TRACE_IRQFLAGS_SUPPORT is ever
disabled (which is currently not allowed by Kconfig). Alternatively
we could just remove the option altogether and the associated code
paths. Since the compilation error has been in the tree for at
least two years and no one noticed it, I guess we don't really have
the need for CONFIG_TRACE_IRQFLAGS_SUPPORT=n.

Boot tested on x86 UP.

Signed-off-by: Rui Sousa <rui.p.m.sousa@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irqflags.h | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 74bde13224c9..f2993512b3b5 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -52,10 +52,10 @@
 # define start_critical_timings() do { } while (0)
 #endif
 
-#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
-
 #include <asm/irqflags.h>
 
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+
 #define local_irq_enable() \
 	do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
 #define local_irq_disable() \
@@ -84,21 +84,20 @@
  * The local_irq_*() APIs are equal to the raw_local_irq*()
  * if !TRACE_IRQFLAGS.
  */
-# define raw_local_irq_disable()	local_irq_disable()
-# define raw_local_irq_enable()		local_irq_enable()
-# define raw_local_irq_save(flags)			\
+#define local_irq_disable()		raw_local_irq_disable()
+#define local_irq_enable()		raw_local_irq_enable()
+#define local_irq_save(flags)				\
 	do {						\
 		typecheck(unsigned long, flags);	\
-		local_irq_save(flags);			\
+		raw_local_irq_save(flags);		\
 	} while (0)
-# define raw_local_irq_restore(flags)			\
+# define local_irq_restore(flags)			\
 	do {						\
 		typecheck(unsigned long, flags);	\
-		local_irq_restore(flags);		\
+		raw_local_irq_restore(flags);		\
 	} while (0)
 #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
 
-#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 #define safe_halt()						\
 	do {							\
 		trace_hardirqs_on();				\
@@ -124,6 +123,5 @@
 	typecheck(unsigned long, flags);	\
 	raw_irqs_disabled_flags(flags);		\
 })
-#endif		/* CONFIG_X86 */
 
 #endif
-- 
cgit v1.2.3


From ab7476cf76e560f0efda2a631a70aabe93009025 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 15 Aug 2008 15:29:38 -0700
Subject: debug: add notifier chain debugging, v2

- unbreak ia64 (and powerpc) where function pointers dont
  point at code but at data (reported by Tony Luck)

[ mingo@elte.hu: various cleanups ]

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h |  3 +++
 kernel/extable.c       | 16 ++++++++++++++++
 kernel/notifier.c      | 10 +---------
 lib/vsprintf.c         |  2 +-
 4 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2651f805ba6d..4e1366b552ae 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -187,6 +187,9 @@ extern unsigned long long memparse(char *ptr, char **retptr);
 extern int core_kernel_text(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
 extern int kernel_text_address(unsigned long addr);
+extern int func_ptr_is_kernel_text(void *ptr);
+extern void *dereference_function_descriptor(void *ptr);
+
 struct pid;
 extern struct pid *session_of_pgrp(struct pid *pgrp);
 
diff --git a/kernel/extable.c b/kernel/extable.c
index a26cb2e17023..adf0cc9c02d6 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -66,3 +66,19 @@ int kernel_text_address(unsigned long addr)
 		return 1;
 	return module_text_address(addr) != NULL;
 }
+
+/*
+ * On some architectures (PPC64, IA64) function pointers
+ * are actually only tokens to some data that then holds the
+ * real function address. As a result, to find if a function
+ * pointer is part of the kernel text, we need to do some
+ * special dereferencing first.
+ */
+int func_ptr_is_kernel_text(void *ptr)
+{
+	unsigned long addr;
+	addr = (unsigned long) dereference_function_descriptor(ptr);
+	if (core_kernel_text(addr))
+		return 1;
+	return module_text_address(addr) != NULL;
+}
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 143fdd77dbf7..0f39e398ef60 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -21,10 +21,6 @@ BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
 static int notifier_chain_register(struct notifier_block **nl,
 		struct notifier_block *n)
 {
-	if (!kernel_text_address((unsigned long)n->notifier_call)) {
-		WARN(1, "Invalid notifier registered!");
-		return 0;
-	}
 	while ((*nl) != NULL) {
 		if (n->priority > (*nl)->priority)
 			break;
@@ -38,10 +34,6 @@ static int notifier_chain_register(struct notifier_block **nl,
 static int notifier_chain_cond_register(struct notifier_block **nl,
 		struct notifier_block *n)
 {
-	if (!kernel_text_address((unsigned long)n->notifier_call)) {
-		WARN(1, "Invalid notifier registered!");
-		return 0;
-	}
 	while ((*nl) != NULL) {
 		if ((*nl) == n)
 			return 0;
@@ -92,7 +84,7 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
 		next_nb = rcu_dereference(nb->next);
 
 #ifdef CONFIG_DEBUG_NOTIFIERS
-		if (!kernel_text_address((unsigned long)nb->notifier_call)) {
+		if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
 			WARN(1, "Invalid notifier called!");
 			nb = next_nb;
 			continue;
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index d8d1d1142248..f5e5ffb9942f 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -513,7 +513,7 @@ static char *string(char *buf, char *end, char *s, int field_width, int precisio
 	return buf;
 }
 
-static inline void *dereference_function_descriptor(void *ptr)
+void *dereference_function_descriptor(void *ptr)
 {
 #if defined(CONFIG_IA64) || defined(CONFIG_PPC64)
 	void *p;
-- 
cgit v1.2.3


From 76b189e91845eab3a9d52bb97f971d312d25652d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 10 Sep 2008 09:57:35 +0200
Subject: lockdep: add might_lock() / might_lock_read()

useful to establish a lock dependency in case the actual dependency is
rare or hard to trigger.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/lockdep.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 331e5f1c2d8e..0aa657aa8a1e 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -480,4 +480,22 @@ static inline void print_irqtrace_events(struct task_struct *curr)
 # define lock_map_release(l)			do { } while (0)
 #endif
 
+#ifdef CONFIG_PROVE_LOCKING
+# define might_lock(lock) 						\
+do {									\
+	typecheck(struct lockdep_map *, &(lock)->dep_map);		\
+	lock_acquire(&(lock)->dep_map, 0, 0, 0, 2, NULL, _THIS_IP_);	\
+	lock_release(&(lock)->dep_map, 0, _THIS_IP_);			\
+} while (0)
+# define might_lock_read(lock) 						\
+do {									\
+	typecheck(struct lockdep_map *, &(lock)->dep_map);		\
+	lock_acquire(&(lock)->dep_map, 0, 0, 1, 2, NULL, _THIS_IP_);	\
+	lock_release(&(lock)->dep_map, 0, _THIS_IP_);			\
+} while (0)
+#else
+# define might_lock(lock) do { } while (0)
+# define might_lock_read(lock) do { } while (0)
+#endif
+
 #endif /* __LINUX_LOCKDEP_H */
-- 
cgit v1.2.3


From 3ee1afa308f2a38e5d1e2ad3752ad7abcf480da1 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 10 Sep 2008 13:37:17 +0200
Subject: x86: some lock annotations for user copy paths, v2

 - introduce might_fault()
 - handle the atomic user copy paths correctly

[ mingo@elte.hu: move might_sleep() outside of in_atomic(). ]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/usercopy_32.c   | 12 +++---------
 arch/x86/lib/usercopy_64.c   |  8 ++------
 include/asm-x86/uaccess.h    | 18 ++++--------------
 include/asm-x86/uaccess_32.h | 12 +++---------
 include/asm-x86/uaccess_64.h | 12 +++---------
 include/linux/kernel.h       |  9 +++++++++
 mm/memory.c                  | 15 +++++++++++++++
 7 files changed, 39 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 8eedde2a9cac..7393152a252e 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -32,9 +32,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
 #define __do_strncpy_from_user(dst, src, count, res)			   \
 do {									   \
 	int __d0, __d1, __d2;						   \
-	might_sleep();							   \
-	if (current->mm)						   \
-		might_lock_read(&current->mm->mmap_sem);		   \
+	might_fault();							   \
 	__asm__ __volatile__(						   \
 		"	testl %1,%1\n"					   \
 		"	jz 2f\n"					   \
@@ -121,9 +119,7 @@ EXPORT_SYMBOL(strncpy_from_user);
 #define __do_clear_user(addr,size)					\
 do {									\
 	int __d0;							\
-	might_sleep();							\
-	if (current->mm)						\
-		might_lock_read(&current->mm->mmap_sem);		\
+	might_fault();							\
 	__asm__ __volatile__(						\
 		"0:	rep; stosl\n"					\
 		"	movl %2,%0\n"					\
@@ -193,9 +189,7 @@ long strnlen_user(const char __user *s, long n)
 	unsigned long mask = -__addr_ok(s);
 	unsigned long res, tmp;
 
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 
 	__asm__ __volatile__(
 		"	testl %0, %0\n"
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 847d12945998..64d6c84e6353 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -15,9 +15,7 @@
 #define __do_strncpy_from_user(dst,src,count,res)			   \
 do {									   \
 	long __d0, __d1, __d2;						   \
-	might_sleep();							   \
-	if (current->mm)						   \
-		might_lock_read(&current->mm->mmap_sem);		   \
+	might_fault();							   \
 	__asm__ __volatile__(						   \
 		"	testq %1,%1\n"					   \
 		"	jz 2f\n"					   \
@@ -66,9 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user);
 unsigned long __clear_user(void __user *addr, unsigned long size)
 {
 	long __d0;
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 	/* no memory constraint because it doesn't change any memory gcc knows
 	   about */
 	asm volatile(
diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index ad29752a1713..39f8420c75d9 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -8,8 +8,6 @@
 #include <linux/thread_info.h>
 #include <linux/prefetch.h>
 #include <linux/string.h>
-#include <linux/lockdep.h>
-#include <linux/sched.h>
 #include <asm/asm.h>
 #include <asm/page.h>
 
@@ -159,9 +157,7 @@ extern int __get_user_bad(void);
 	int __ret_gu;							\
 	unsigned long __val_gu;						\
 	__chk_user_ptr(ptr);						\
-	might_sleep();							\
-	if (current->mm)						\
-		might_lock_read(&current->mm->mmap_sem);		\
+	might_fault();							\
 	switch (sizeof(*(ptr))) {					\
 	case 1:								\
 		__get_user_x(1, __ret_gu, __val_gu, ptr);		\
@@ -246,9 +242,7 @@ extern void __put_user_8(void);
 	int __ret_pu;						\
 	__typeof__(*(ptr)) __pu_val;				\
 	__chk_user_ptr(ptr);					\
-	might_sleep();						\
-	if (current->mm)					\
-		might_lock_read(&current->mm->mmap_sem);	\
+	might_fault();						\
 	__pu_val = x;						\
 	switch (sizeof(*(ptr))) {				\
 	case 1:							\
@@ -273,9 +267,7 @@ extern void __put_user_8(void);
 #define __put_user_size(x, ptr, size, retval, errret)			\
 do {									\
 	retval = 0;							\
-	might_sleep();							\
-	if (current->mm)						\
-		might_lock_read(&current->mm->mmap_sem);		\
+	might_fault();							\
 	__chk_user_ptr(ptr);						\
 	switch (size) {							\
 	case 1:								\
@@ -328,9 +320,7 @@ do {									\
 #define __get_user_size(x, ptr, size, retval, errret)			\
 do {									\
 	retval = 0;							\
-	might_sleep();							\
-	if (current->mm)						\
-		might_lock_read(&current->mm->mmap_sem);		\
+	might_fault();							\
 	__chk_user_ptr(ptr);						\
 	switch (size) {							\
 	case 1:								\
diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h
index d725e2d703f7..d10e842ec3ee 100644
--- a/include/asm-x86/uaccess_32.h
+++ b/include/asm-x86/uaccess_32.h
@@ -82,9 +82,7 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 static __always_inline unsigned long __must_check
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 	return __copy_to_user_inatomic(to, from, n);
 }
 
@@ -139,9 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
 static __always_inline unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 	if (__builtin_constant_p(n)) {
 		unsigned long ret;
 
@@ -163,9 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
 static __always_inline unsigned long __copy_from_user_nocache(void *to,
 				const void __user *from, unsigned long n)
 {
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 	if (__builtin_constant_p(n)) {
 		unsigned long ret;
 
diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h
index 40a7205fe576..13fd56fbc3ab 100644
--- a/include/asm-x86/uaccess_64.h
+++ b/include/asm-x86/uaccess_64.h
@@ -29,9 +29,7 @@ int __copy_from_user(void *dst, const void __user *src, unsigned size)
 {
 	int ret = 0;
 
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 	if (!__builtin_constant_p(size))
 		return copy_user_generic(dst, (__force void *)src, size);
 	switch (size) {
@@ -75,9 +73,7 @@ int __copy_to_user(void __user *dst, const void *src, unsigned size)
 {
 	int ret = 0;
 
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 	if (!__builtin_constant_p(size))
 		return copy_user_generic((__force void *)dst, src, size);
 	switch (size) {
@@ -121,9 +117,7 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
 {
 	int ret = 0;
 
-	might_sleep();
-	if (current->mm)
-		might_lock_read(&current->mm->mmap_sem);
+	might_fault();
 	if (!__builtin_constant_p(size))
 		return copy_user_generic((__force void *)dst,
 					 (__force void *)src, size);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2651f805ba6d..e580ec095765 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -140,6 +140,15 @@ extern int _cond_resched(void);
 		(__x < 0) ? -__x : __x;		\
 	})
 
+#ifdef CONFIG_PROVE_LOCKING
+void might_fault(void);
+#else
+static inline void might_fault(void)
+{
+	might_sleep();
+}
+#endif
+
 extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(long time);
 NORET_TYPE void panic(const char * fmt, ...)
diff --git a/mm/memory.c b/mm/memory.c
index 1002f473f497..b8fdf4e5e65b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3016,3 +3016,18 @@ void print_vma_addr(char *prefix, unsigned long ip)
 	}
 	up_read(&current->mm->mmap_sem);
 }
+
+#ifdef CONFIG_PROVE_LOCKING
+void might_fault(void)
+{
+	might_sleep();
+	/*
+	 * it would be nicer only to annotate paths which are not under
+	 * pagefault_disable, however that requires a larger audit and
+	 * providing helpers like get_user_atomic.
+	 */
+	if (!in_atomic() && current->mm)
+		might_lock_read(&current->mm->mmap_sem);
+}
+EXPORT_SYMBOL(might_fault);
+#endif
-- 
cgit v1.2.3


From 1d18ef489509314506328b9e464dd47c24c1d68f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 11 Sep 2008 20:53:21 +0200
Subject: x86: some lock annotations for user copy paths, v3

- add annotation back to clear_user()
- change probe_kernel_address() to _inatomic*() method

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/usercopy_32.c | 1 +
 include/asm-x86/uaccess.h  | 2 --
 include/linux/uaccess.h    | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 7393152a252e..fab5faba1d3e 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -148,6 +148,7 @@ do {									\
 unsigned long
 clear_user(void __user *to, unsigned long n)
 {
+	might_fault();
 	if (access_ok(VERIFY_WRITE, to, n))
 		__do_clear_user(to, n);
 	return n;
diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h
index 39f8420c75d9..dc8edb5c4659 100644
--- a/include/asm-x86/uaccess.h
+++ b/include/asm-x86/uaccess.h
@@ -267,7 +267,6 @@ extern void __put_user_8(void);
 #define __put_user_size(x, ptr, size, retval, errret)			\
 do {									\
 	retval = 0;							\
-	might_fault();							\
 	__chk_user_ptr(ptr);						\
 	switch (size) {							\
 	case 1:								\
@@ -320,7 +319,6 @@ do {									\
 #define __get_user_size(x, ptr, size, retval, errret)			\
 do {									\
 	retval = 0;							\
-	might_fault();							\
 	__chk_user_ptr(ptr);						\
 	switch (size) {							\
 	case 1:								\
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index fec6decfb983..2062293e57e6 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -78,7 +78,7 @@ static inline unsigned long __copy_from_user_nocache(void *to,
 							\
 		set_fs(KERNEL_DS);			\
 		pagefault_disable();			\
-		ret = __get_user(retval, (__force typeof(retval) __user *)(addr));		\
+		ret = __copy_from_user_inatomic((__force typeof(retval) __user *)(addr), &(retval), sizeof(retval));		\
 		pagefault_enable();			\
 		set_fs(old_fs);				\
 		ret;					\
-- 
cgit v1.2.3


From 53b9d87f41a3d8838210ad7cdef02d814817ce85 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 11 Sep 2008 17:02:58 -0700
Subject: lock debug: sit tight when we are already in a panic

in:

  > http://bugzilla.kernel.org/show_bug.cgi?id=11543

The panic code called the kexec code which called mutex_trylock() which
called spin_lock_mutex() which then stupidly went and blurted a load of
debug stuff because of in_interrupt().

Keep the lock debug code from escallating an already crappy situation.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/debug_locks.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 4aaa4afb1cb9..096476f1fb35 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -17,7 +17,7 @@ extern int debug_locks_off(void);
 ({									\
 	int __ret = 0;							\
 									\
-	if (unlikely(c)) {						\
+	if (!oops_in_progress && unlikely(c)) {				\
 		if (debug_locks_off() && !debug_locks_silent)		\
 			WARN_ON(1);					\
 		__ret = 1;						\
-- 
cgit v1.2.3


From 30742d5c2277c325fb0e9d2d817d55a19995fe8f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 14 Sep 2008 14:43:39 +0200
Subject: Revert "lockdep: fix compilation when CONFIG_TRACE_IRQFLAGS_SUPPORT
 is not set"

This reverts commit bd8fbdee6562ee526f3c2582a3b373ef195015dd.

This broke the powerpc build - more fixes are needed before we can
undo this revert.
---
 include/linux/irqflags.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index f2993512b3b5..74bde13224c9 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -52,10 +52,10 @@
 # define start_critical_timings() do { } while (0)
 #endif
 
-#include <asm/irqflags.h>
-
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 
+#include <asm/irqflags.h>
+
 #define local_irq_enable() \
 	do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
 #define local_irq_disable() \
@@ -84,20 +84,21 @@
  * The local_irq_*() APIs are equal to the raw_local_irq*()
  * if !TRACE_IRQFLAGS.
  */
-#define local_irq_disable()		raw_local_irq_disable()
-#define local_irq_enable()		raw_local_irq_enable()
-#define local_irq_save(flags)				\
+# define raw_local_irq_disable()	local_irq_disable()
+# define raw_local_irq_enable()		local_irq_enable()
+# define raw_local_irq_save(flags)			\
 	do {						\
 		typecheck(unsigned long, flags);	\
-		raw_local_irq_save(flags);		\
+		local_irq_save(flags);			\
 	} while (0)
-# define local_irq_restore(flags)			\
+# define raw_local_irq_restore(flags)			\
 	do {						\
 		typecheck(unsigned long, flags);	\
-		raw_local_irq_restore(flags);		\
+		local_irq_restore(flags);		\
 	} while (0)
 #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
 
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 #define safe_halt()						\
 	do {							\
 		trace_hardirqs_on();				\
@@ -123,5 +124,6 @@
 	typecheck(unsigned long, flags);	\
 	raw_irqs_disabled_flags(flags);		\
 })
+#endif		/* CONFIG_X86 */
 
 #endif
-- 
cgit v1.2.3


From fb71e45338453698bd7460f7e8f171ea0304d218 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Mon, 15 Sep 2008 18:04:26 -0700
Subject: uaccess: fix parameters inversion for __copy_from_user_inatomic()

The following patch changes to use __copy_from_user_inatomic(),
but the passing parameters incorrect:

    x86: some lock annotations for user copy paths, v3

This fixes the netfilter crash reported by Steven Noonan.

Reported-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Tested-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 2062293e57e6..6b58367d145e 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -78,7 +78,7 @@ static inline unsigned long __copy_from_user_nocache(void *to,
 							\
 		set_fs(KERNEL_DS);			\
 		pagefault_disable();			\
-		ret = __copy_from_user_inatomic((__force typeof(retval) __user *)(addr), &(retval), sizeof(retval));		\
+		ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval));		\
 		pagefault_enable();			\
 		set_fs(old_fs);				\
 		ret;					\
-- 
cgit v1.2.3


From 38d47c1b7075bd7ec3881141bb3629da58f88dab Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 26 Sep 2008 19:32:20 +0200
Subject: futex: rely on get_user_pages() for shared futexes

On the way of getting rid of the mmap_sem requirement for shared futexes,
start by relying on get_user_pages().

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/futex.h |   2 +
 kernel/futex.c        | 162 +++++++++++++++++++++++++-------------------------
 2 files changed, 82 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/futex.h b/include/linux/futex.h
index 586ab56a3ec3..8f627b9ae2b1 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -164,6 +164,8 @@ union futex_key {
 	} both;
 };
 
+#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
+
 #ifdef CONFIG_FUTEX
 extern void exit_robust_list(struct task_struct *curr);
 extern void exit_pi_state_list(struct task_struct *curr);
diff --git a/kernel/futex.c b/kernel/futex.c
index 7d1136e97c14..a4c39fa0a7a3 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -161,6 +161,45 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
 		&& key1->both.offset == key2->both.offset);
 }
 
+/*
+ * Take a reference to the resource addressed by a key.
+ * Can be called while holding spinlocks.
+ *
+ */
+static void get_futex_key_refs(union futex_key *key)
+{
+	if (!key->both.ptr)
+		return;
+
+	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
+	case FUT_OFF_INODE:
+		atomic_inc(&key->shared.inode->i_count);
+		break;
+	case FUT_OFF_MMSHARED:
+		atomic_inc(&key->private.mm->mm_count);
+		break;
+	}
+}
+
+/*
+ * Drop a reference to the resource addressed by a key.
+ * The hash bucket spinlock must not be held.
+ */
+static void drop_futex_key_refs(union futex_key *key)
+{
+	if (!key->both.ptr)
+		return;
+
+	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
+	case FUT_OFF_INODE:
+		iput(key->shared.inode);
+		break;
+	case FUT_OFF_MMSHARED:
+		mmdrop(key->private.mm);
+		break;
+	}
+}
+
 /**
  * get_futex_key - Get parameters which are the keys for a futex.
  * @uaddr: virtual address of the futex
@@ -184,7 +223,6 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
 {
 	unsigned long address = (unsigned long)uaddr;
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
 	struct page *page;
 	int err;
 
@@ -210,98 +248,47 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
 		key->private.address = address;
 		return 0;
 	}
-	/*
-	 * The futex is hashed differently depending on whether
-	 * it's in a shared or private mapping.  So check vma first.
-	 */
-	vma = find_extend_vma(mm, address);
-	if (unlikely(!vma))
-		return -EFAULT;
 
-	/*
-	 * Permissions.
-	 */
-	if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
-		return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
+again:
+	err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
+	if (err < 0)
+		return err;
+
+	lock_page(page);
+	if (!page->mapping) {
+		unlock_page(page);
+		put_page(page);
+		goto again;
+	}
 
 	/*
 	 * Private mappings are handled in a simple way.
 	 *
 	 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
 	 * it's a read-only handle, it's expected that futexes attach to
-	 * the object not the particular process.  Therefore we use
-	 * VM_MAYSHARE here, not VM_SHARED which is restricted to shared
-	 * mappings of _writable_ handles.
+	 * the object not the particular process.
 	 */
-	if (likely(!(vma->vm_flags & VM_MAYSHARE))) {
-		key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */
+	if (PageAnon(page)) {
+		key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
 		key->private.mm = mm;
 		key->private.address = address;
-		return 0;
-	}
-
-	/*
-	 * Linear file mappings are also simple.
-	 */
-	key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
-	key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
-	if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
-		key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
-				     + vma->vm_pgoff);
-		return 0;
+	} else {
+		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
+		key->shared.inode = page->mapping->host;
+		key->shared.pgoff = page->index;
 	}
 
-	/*
-	 * We could walk the page table to read the non-linear
-	 * pte, and get the page index without fetching the page
-	 * from swap.  But that's a lot of code to duplicate here
-	 * for a rare case, so we simply fetch the page.
-	 */
-	err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
-	if (err >= 0) {
-		key->shared.pgoff =
-			page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-		put_page(page);
-		return 0;
-	}
-	return err;
-}
+	get_futex_key_refs(key);
 
-/*
- * Take a reference to the resource addressed by a key.
- * Can be called while holding spinlocks.
- *
- */
-static void get_futex_key_refs(union futex_key *key)
-{
-	if (key->both.ptr == NULL)
-		return;
-	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
-		case FUT_OFF_INODE:
-			atomic_inc(&key->shared.inode->i_count);
-			break;
-		case FUT_OFF_MMSHARED:
-			atomic_inc(&key->private.mm->mm_count);
-			break;
-	}
+	unlock_page(page);
+	put_page(page);
+	return 0;
 }
 
-/*
- * Drop a reference to the resource addressed by a key.
- * The hash bucket spinlock must not be held.
- */
-static void drop_futex_key_refs(union futex_key *key)
+static inline
+void put_futex_key(struct rw_semaphore *fshared, union futex_key *key)
 {
-	if (!key->both.ptr)
-		return;
-	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
-		case FUT_OFF_INODE:
-			iput(key->shared.inode);
-			break;
-		case FUT_OFF_MMSHARED:
-			mmdrop(key->private.mm);
-			break;
-	}
+	drop_futex_key_refs(key);
 }
 
 static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
@@ -385,6 +372,7 @@ static int refill_pi_state_cache(void)
 	/* pi_mutex gets initialized later */
 	pi_state->owner = NULL;
 	atomic_set(&pi_state->refcount, 1);
+	pi_state->key = FUTEX_KEY_INIT;
 
 	current->pi_state_cache = pi_state;
 
@@ -462,7 +450,7 @@ void exit_pi_state_list(struct task_struct *curr)
 	struct list_head *next, *head = &curr->pi_state_list;
 	struct futex_pi_state *pi_state;
 	struct futex_hash_bucket *hb;
-	union futex_key key;
+	union futex_key key = FUTEX_KEY_INIT;
 
 	if (!futex_cmpxchg_enabled)
 		return;
@@ -725,7 +713,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
 	struct futex_hash_bucket *hb;
 	struct futex_q *this, *next;
 	struct plist_head *head;
-	union futex_key key;
+	union futex_key key = FUTEX_KEY_INIT;
 	int ret;
 
 	if (!bitset)
@@ -760,6 +748,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
 
 	spin_unlock(&hb->lock);
 out:
+	put_futex_key(fshared, &key);
 	futex_unlock_mm(fshared);
 	return ret;
 }
@@ -773,7 +762,7 @@ futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared,
 	      u32 __user *uaddr2,
 	      int nr_wake, int nr_wake2, int op)
 {
-	union futex_key key1, key2;
+	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
 	struct futex_hash_bucket *hb1, *hb2;
 	struct plist_head *head;
 	struct futex_q *this, *next;
@@ -873,6 +862,8 @@ retry:
 	if (hb1 != hb2)
 		spin_unlock(&hb2->lock);
 out:
+	put_futex_key(fshared, &key2);
+	put_futex_key(fshared, &key1);
 	futex_unlock_mm(fshared);
 
 	return ret;
@@ -886,7 +877,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
 			 u32 __user *uaddr2,
 			 int nr_wake, int nr_requeue, u32 *cmpval)
 {
-	union futex_key key1, key2;
+	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
 	struct futex_hash_bucket *hb1, *hb2;
 	struct plist_head *head1;
 	struct futex_q *this, *next;
@@ -974,6 +965,8 @@ out_unlock:
 		drop_futex_key_refs(&key1);
 
 out:
+	put_futex_key(fshared, &key2);
+	put_futex_key(fshared, &key1);
 	futex_unlock_mm(fshared);
 	return ret;
 }
@@ -1220,6 +1213,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
  retry:
 	futex_lock_mm(fshared);
 
+	q.key = FUTEX_KEY_INIT;
 	ret = get_futex_key(uaddr, fshared, &q.key);
 	if (unlikely(ret != 0))
 		goto out_release_sem;
@@ -1360,6 +1354,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
 	queue_unlock(&q, hb);
 
  out_release_sem:
+	put_futex_key(fshared, &q.key);
 	futex_unlock_mm(fshared);
 	return ret;
 }
@@ -1411,6 +1406,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
  retry:
 	futex_lock_mm(fshared);
 
+	q.key = FUTEX_KEY_INIT;
 	ret = get_futex_key(uaddr, fshared, &q.key);
 	if (unlikely(ret != 0))
 		goto out_release_sem;
@@ -1625,6 +1621,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
 	queue_unlock(&q, hb);
 
  out_release_sem:
+	put_futex_key(fshared, &q.key);
 	futex_unlock_mm(fshared);
 	if (to)
 		destroy_hrtimer_on_stack(&to->timer);
@@ -1671,7 +1668,7 @@ static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared)
 	struct futex_q *this, *next;
 	u32 uval;
 	struct plist_head *head;
-	union futex_key key;
+	union futex_key key = FUTEX_KEY_INIT;
 	int ret, attempt = 0;
 
 retry:
@@ -1744,6 +1741,7 @@ retry_unlocked:
 out_unlock:
 	spin_unlock(&hb->lock);
 out:
+	put_futex_key(fshared, &key);
 	futex_unlock_mm(fshared);
 
 	return ret;
-- 
cgit v1.2.3


From c7e78cff6b7518212247fb20b1dc6411540dc9af Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 16 Oct 2008 23:17:09 +0200
Subject: lockstat: contend with points

We currently only provide points that have to wait on contention, also
lists the points we have to wait for.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/lockstat.txt | 50 +++++++++++++++++++++++++++++-----------------
 include/linux/lockdep.h    | 13 ++++++++----
 kernel/lockdep.c           | 33 +++++++++++++++++++-----------
 kernel/lockdep_proc.c      | 21 +++++++++++++++++--
 kernel/mutex.c             |  2 +-
 5 files changed, 82 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt
index 02f36f5c64fe..9cb9138f7a79 100644
--- a/Documentation/lockstat.txt
+++ b/Documentation/lockstat.txt
@@ -71,34 +71,48 @@ Look at the current lock statistics:
 
 # less /proc/lock_stat
 
-01 lock_stat version 0.2
+01 lock_stat version 0.3
 02 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 03                               class name    con-bounces    contentions   waittime-min   waittime-max waittime-total    acq-bounces   acquisitions   holdtime-min   holdtime-max holdtime-total
 04 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 05
-06               &inode->i_data.tree_lock-W:            15          21657           0.18     1093295.30 11547131054.85             58          10415           0.16          87.51        6387.60
-07               &inode->i_data.tree_lock-R:             0              0           0.00           0.00           0.00          23302         231198           0.25           8.45       98023.38
-08               --------------------------
-09                 &inode->i_data.tree_lock              0          [<ffffffff8027c08f>] add_to_page_cache+0x5f/0x190
-10
-11 ...............................................................................................................................................................................................
-12
-13                              dcache_lock:          1037           1161           0.38          45.32         774.51           6611         243371           0.15         306.48       77387.24
-14                              -----------
-15                              dcache_lock            180          [<ffffffff802c0d7e>] sys_getcwd+0x11e/0x230
-16                              dcache_lock            165          [<ffffffff802c002a>] d_alloc+0x15a/0x210
-17                              dcache_lock             33          [<ffffffff8035818d>] _atomic_dec_and_lock+0x4d/0x70
-18                              dcache_lock              1          [<ffffffff802beef8>] shrink_dcache_parent+0x18/0x130
+06                          &mm->mmap_sem-W:           233            538 18446744073708       22924.27      607243.51           1342          45806           1.71        8595.89     1180582.34
+07                          &mm->mmap_sem-R:           205            587 18446744073708       28403.36      731975.00           1940         412426           0.58      187825.45     6307502.88
+08                          ---------------
+09                            &mm->mmap_sem            487          [<ffffffff8053491f>] do_page_fault+0x466/0x928
+10                            &mm->mmap_sem            179          [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d
+11                            &mm->mmap_sem            279          [<ffffffff80210a57>] sys_mmap+0x75/0xce
+12                            &mm->mmap_sem             76          [<ffffffff802a490b>] sys_munmap+0x32/0x59
+13                          ---------------
+14                            &mm->mmap_sem            270          [<ffffffff80210a57>] sys_mmap+0x75/0xce
+15                            &mm->mmap_sem            431          [<ffffffff8053491f>] do_page_fault+0x466/0x928
+16                            &mm->mmap_sem            138          [<ffffffff802a490b>] sys_munmap+0x32/0x59
+17                            &mm->mmap_sem            145          [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d
+18
+19 ...............................................................................................................................................................................................
+20
+21                              dcache_lock:           621            623           0.52         118.26        1053.02           6745          91930           0.29         316.29      118423.41
+22                              -----------
+23                              dcache_lock            179          [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54
+24                              dcache_lock            113          [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb
+25                              dcache_lock             99          [<ffffffff802ca0dc>] d_rehash+0x1b/0x44
+26                              dcache_lock            104          [<ffffffff802cbca0>] d_instantiate+0x36/0x8a
+27                              -----------
+28                              dcache_lock            192          [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54
+29                              dcache_lock             98          [<ffffffff802ca0dc>] d_rehash+0x1b/0x44
+30                              dcache_lock             72          [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb
+31                              dcache_lock            112          [<ffffffff802cbca0>] d_instantiate+0x36/0x8a
 
 This excerpt shows the first two lock class statistics. Line 01 shows the
 output version - each time the format changes this will be updated. Line 02-04
-show the header with column descriptions. Lines 05-10 and 13-18 show the actual
+show the header with column descriptions. Lines 05-18 and 20-31 show the actual
 statistics. These statistics come in two parts; the actual stats separated by a
-short separator (line 08, 14) from the contention points.
+short separator (line 08, 13) from the contention points.
 
-The first lock (05-10) is a read/write lock, and shows two lines above the
+The first lock (05-18) is a read/write lock, and shows two lines above the
 short separator. The contention points don't match the column descriptors,
-they have two: contentions and [<IP>] symbol.
+they have two: contentions and [<IP>] symbol. The second set of contention
+points are the points we're contending with.
 
 The integer part of the time values is in us.
 
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 0aa657aa8a1e..fc9f8e88123b 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -73,6 +73,8 @@ struct lock_class_key {
 	struct lockdep_subclass_key	subkeys[MAX_LOCKDEP_SUBCLASSES];
 };
 
+#define LOCKSTAT_POINTS		4
+
 /*
  * The lock-class itself:
  */
@@ -119,7 +121,8 @@ struct lock_class {
 	int				name_version;
 
 #ifdef CONFIG_LOCK_STAT
-	unsigned long			contention_point[4];
+	unsigned long			contention_point[LOCKSTAT_POINTS];
+	unsigned long			contending_point[LOCKSTAT_POINTS];
 #endif
 };
 
@@ -144,6 +147,7 @@ enum bounce_type {
 
 struct lock_class_stats {
 	unsigned long			contention_point[4];
+	unsigned long			contending_point[4];
 	struct lock_time		read_waittime;
 	struct lock_time		write_waittime;
 	struct lock_time		read_holdtime;
@@ -165,6 +169,7 @@ struct lockdep_map {
 	const char			*name;
 #ifdef CONFIG_LOCK_STAT
 	int				cpu;
+	unsigned long			ip;
 #endif
 };
 
@@ -355,7 +360,7 @@ struct lock_class_key { };
 #ifdef CONFIG_LOCK_STAT
 
 extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
-extern void lock_acquired(struct lockdep_map *lock);
+extern void lock_acquired(struct lockdep_map *lock, unsigned long ip);
 
 #define LOCK_CONTENDED(_lock, try, lock)			\
 do {								\
@@ -363,13 +368,13 @@ do {								\
 		lock_contended(&(_lock)->dep_map, _RET_IP_);	\
 		lock(_lock);					\
 	}							\
-	lock_acquired(&(_lock)->dep_map);			\
+	lock_acquired(&(_lock)->dep_map, _RET_IP_);			\
 } while (0)
 
 #else /* CONFIG_LOCK_STAT */
 
 #define lock_contended(lockdep_map, ip) do {} while (0)
-#define lock_acquired(lockdep_map) do {} while (0)
+#define lock_acquired(lockdep_map, ip) do {} while (0)
 
 #define LOCK_CONTENDED(_lock, try, lock) \
 	lock(_lock)
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index dbda475b13bd..234a9dccb4be 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -136,16 +136,16 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock)
 #ifdef CONFIG_LOCK_STAT
 static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
 
-static int lock_contention_point(struct lock_class *class, unsigned long ip)
+static int lock_point(unsigned long points[], unsigned long ip)
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) {
-		if (class->contention_point[i] == 0) {
-			class->contention_point[i] = ip;
+	for (i = 0; i < LOCKSTAT_POINTS; i++) {
+		if (points[i] == 0) {
+			points[i] = ip;
 			break;
 		}
-		if (class->contention_point[i] == ip)
+		if (points[i] == ip)
 			break;
 	}
 
@@ -185,6 +185,9 @@ struct lock_class_stats lock_stats(struct lock_class *class)
 		for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
 			stats.contention_point[i] += pcs->contention_point[i];
 
+		for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++)
+			stats.contending_point[i] += pcs->contending_point[i];
+
 		lock_time_add(&pcs->read_waittime, &stats.read_waittime);
 		lock_time_add(&pcs->write_waittime, &stats.write_waittime);
 
@@ -209,6 +212,7 @@ void clear_lock_stats(struct lock_class *class)
 		memset(cpu_stats, 0, sizeof(struct lock_class_stats));
 	}
 	memset(class->contention_point, 0, sizeof(class->contention_point));
+	memset(class->contending_point, 0, sizeof(class->contending_point));
 }
 
 static struct lock_class_stats *get_lock_stats(struct lock_class *class)
@@ -3001,7 +3005,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
 	struct held_lock *hlock, *prev_hlock;
 	struct lock_class_stats *stats;
 	unsigned int depth;
-	int i, point;
+	int i, contention_point, contending_point;
 
 	depth = curr->lockdep_depth;
 	if (DEBUG_LOCKS_WARN_ON(!depth))
@@ -3025,18 +3029,22 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
 found_it:
 	hlock->waittime_stamp = sched_clock();
 
-	point = lock_contention_point(hlock_class(hlock), ip);
+	contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
+	contending_point = lock_point(hlock_class(hlock)->contending_point,
+				      lock->ip);
 
 	stats = get_lock_stats(hlock_class(hlock));
-	if (point < ARRAY_SIZE(stats->contention_point))
-		stats->contention_point[point]++;
+	if (contention_point < LOCKSTAT_POINTS)
+		stats->contention_point[contention_point]++;
+	if (contending_point < LOCKSTAT_POINTS)
+		stats->contending_point[contending_point]++;
 	if (lock->cpu != smp_processor_id())
 		stats->bounces[bounce_contended + !!hlock->read]++;
 	put_lock_stats(stats);
 }
 
 static void
-__lock_acquired(struct lockdep_map *lock)
+__lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
 	struct task_struct *curr = current;
 	struct held_lock *hlock, *prev_hlock;
@@ -3085,6 +3093,7 @@ found_it:
 	put_lock_stats(stats);
 
 	lock->cpu = cpu;
+	lock->ip = ip;
 }
 
 void lock_contended(struct lockdep_map *lock, unsigned long ip)
@@ -3106,7 +3115,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
 }
 EXPORT_SYMBOL_GPL(lock_contended);
 
-void lock_acquired(struct lockdep_map *lock)
+void lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
 	unsigned long flags;
 
@@ -3119,7 +3128,7 @@ void lock_acquired(struct lockdep_map *lock)
 	raw_local_irq_save(flags);
 	check_flags(flags);
 	current->lockdep_recursion = 1;
-	__lock_acquired(lock);
+	__lock_acquired(lock, ip);
 	current->lockdep_recursion = 0;
 	raw_local_irq_restore(flags);
 }
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 8d3a6eba8d5a..13716b813896 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -557,7 +557,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
 	if (stats->read_holdtime.nr)
 		namelen += 2;
 
-	for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) {
+	for (i = 0; i < LOCKSTAT_POINTS; i++) {
 		char sym[KSYM_SYMBOL_LEN];
 		char ip[32];
 
@@ -574,6 +574,23 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
 				stats->contention_point[i],
 				ip, sym);
 	}
+	for (i = 0; i < LOCKSTAT_POINTS; i++) {
+		char sym[KSYM_SYMBOL_LEN];
+		char ip[32];
+
+		if (class->contending_point[i] == 0)
+			break;
+
+		if (!i)
+			seq_line(m, '-', 40-namelen, namelen);
+
+		sprint_symbol(sym, class->contending_point[i]);
+		snprintf(ip, sizeof(ip), "[<%p>]",
+				(void *)class->contending_point[i]);
+		seq_printf(m, "%40s %14lu %29s %s\n", name,
+				stats->contending_point[i],
+				ip, sym);
+	}
 	if (i) {
 		seq_puts(m, "\n");
 		seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1));
@@ -583,7 +600,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
 
 static void seq_header(struct seq_file *m)
 {
-	seq_printf(m, "lock_stat version 0.2\n");
+	seq_printf(m, "lock_stat version 0.3\n");
 	seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
 	seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
 			"%14s %14s\n",
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 12c779dc65d4..39a3816b68d9 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -184,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	}
 
 done:
-	lock_acquired(&lock->dep_map);
+	lock_acquired(&lock->dep_map, ip);
 	/* got the lock - rejoice! */
 	mutex_remove_waiter(lock, &waiter, task_thread_info(task));
 	debug_mutex_set_owner(lock, task_thread_info(task));
-- 
cgit v1.2.3


From b67b4b117746aef686e527c3205792db0f2c9e16 Mon Sep 17 00:00:00 2001
From: Dominic Curran <dcurran@ti.com>
Date: Mon, 27 Oct 2008 22:30:53 -0400
Subject: Input: gpio-keys - add flag to allow auto repeat

This patch adds a flag to gpio-key driver to turn on the input subsystems
auto repeat feature if needed.

Signed-off-by: Dominic Curran <dcurran@ti.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/gpio_keys.c | 4 ++++
 include/linux/gpio_keys.h          | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index 05f3f43582c2..ad67d763fdbd 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -98,6 +98,10 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev)
 	input->id.product = 0x0001;
 	input->id.version = 0x0100;
 
+	/* Enable auto repeat feature of Linux input subsystem */
+	if (pdata->rep)
+		__set_bit(EV_REP, input->evbit);
+
 	ddata->input = input;
 
 	for (i = 0; i < pdata->nbuttons; i++) {
diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h
index ec6ecd74781d..1289fa7623ca 100644
--- a/include/linux/gpio_keys.h
+++ b/include/linux/gpio_keys.h
@@ -15,6 +15,7 @@ struct gpio_keys_button {
 struct gpio_keys_platform_data {
 	struct gpio_keys_button *buttons;
 	int nbuttons;
+	unsigned int rep:1;		/* enable input subsystem auto repeat */
 };
 
 #endif
-- 
cgit v1.2.3


From cae1c11414912bf77a62aebd65ced321f0b9da51 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 27 Oct 2008 15:22:46 +0000
Subject: uwb: reference count reservations

Reference counting the struct uwb_rsv's is safer and easier to get right than
the transferring ownership of the structures from the PAL to reservation
manager.

This fixes an oops in the debug PAL after a reservation timed out.

Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 drivers/usb/wusbcore/reservation.c |  7 +++---
 drivers/uwb/rsv.c                  | 48 +++++++++++++++++++++++++-------------
 drivers/uwb/uwb-debug.c            | 13 ++++++++---
 include/linux/uwb.h                |  1 +
 4 files changed, 46 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/wusbcore/reservation.c b/drivers/usb/wusbcore/reservation.c
index fc63e77ded2d..7b6525dac2f1 100644
--- a/drivers/usb/wusbcore/reservation.c
+++ b/drivers/usb/wusbcore/reservation.c
@@ -59,7 +59,6 @@ static void wusbhc_rsv_complete_cb(struct uwb_rsv *rsv)
 	case UWB_RSV_STATE_NONE:
 		dev_dbg(dev, "removed reservation\n");
 		wusbhc_bwa_set(wusbhc, 0, NULL);
-		wusbhc->rsv = NULL;
 		break;
 	default:
 		dev_dbg(dev, "unexpected reservation state: %d\n", rsv->state);
@@ -105,11 +104,11 @@ int wusbhc_rsv_establish(struct wusbhc *wusbhc)
 
 
 /**
- * wusbhc_rsv_terminate - terminate any cluster reservation
+ * wusbhc_rsv_terminate - terminate the cluster reservation
  * @wusbhc: the WUSB host whose reservation is to be terminated
  */
 void wusbhc_rsv_terminate(struct wusbhc *wusbhc)
 {
-	if (wusbhc->rsv)
-		uwb_rsv_terminate(wusbhc->rsv);
+	uwb_rsv_terminate(wusbhc->rsv);
+	uwb_rsv_destroy(wusbhc->rsv);
 }
diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c
index e4facae46e0d..bcc41a4a6606 100644
--- a/drivers/uwb/rsv.c
+++ b/drivers/uwb/rsv.c
@@ -82,6 +82,23 @@ static void uwb_rsv_dump(struct uwb_rsv *rsv)
 	dev_dbg(dev, "rsv %s -> %s: %s\n", owner, target, uwb_rsv_state_str(rsv->state));
 }
 
+static void uwb_rsv_release(struct kref *kref)
+{
+	struct uwb_rsv *rsv = container_of(kref, struct uwb_rsv, kref);
+
+	kfree(rsv);
+}
+
+static void uwb_rsv_get(struct uwb_rsv *rsv)
+{
+	kref_get(&rsv->kref);
+}
+
+static void uwb_rsv_put(struct uwb_rsv *rsv)
+{
+	kref_put(&rsv->kref, uwb_rsv_release);
+}
+
 /*
  * Get a free stream index for a reservation.
  *
@@ -325,6 +342,7 @@ static struct uwb_rsv *uwb_rsv_alloc(struct uwb_rc *rc)
 
 	INIT_LIST_HEAD(&rsv->rc_node);
 	INIT_LIST_HEAD(&rsv->pal_node);
+	kref_init(&rsv->kref);
 	init_timer(&rsv->timer);
 	rsv->timer.function = uwb_rsv_timer;
 	rsv->timer.data     = (unsigned long)rsv;
@@ -334,14 +352,6 @@ static struct uwb_rsv *uwb_rsv_alloc(struct uwb_rc *rc)
 	return rsv;
 }
 
-static void uwb_rsv_free(struct uwb_rsv *rsv)
-{
-	uwb_dev_put(rsv->owner);
-	if (rsv->target.type == UWB_RSV_TARGET_DEV)
-		uwb_dev_put(rsv->target.dev);
-	kfree(rsv);
-}
-
 /**
  * uwb_rsv_create - allocate and initialize a UWB reservation structure
  * @rc: the radio controller
@@ -375,23 +385,23 @@ void uwb_rsv_remove(struct uwb_rsv *rsv)
 	if (rsv->state != UWB_RSV_STATE_NONE)
 		uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
 	del_timer_sync(&rsv->timer);
-	list_del(&rsv->rc_node);
-	uwb_rsv_free(rsv);
+	uwb_dev_put(rsv->owner);
+	if (rsv->target.type == UWB_RSV_TARGET_DEV)
+		uwb_dev_put(rsv->target.dev);
+
+	list_del_init(&rsv->rc_node);
+	uwb_rsv_put(rsv);
 }
 
 /**
  * uwb_rsv_destroy - free a UWB reservation structure
  * @rsv: the reservation to free
  *
- * The reservation will be terminated if it is pending or established.
+ * The reservation must already be terminated.
  */
 void uwb_rsv_destroy(struct uwb_rsv *rsv)
 {
-	struct uwb_rc *rc = rsv->rc;
-
-	mutex_lock(&rc->rsvs_mutex);
-	uwb_rsv_remove(rsv);
-	mutex_unlock(&rc->rsvs_mutex);
+	uwb_rsv_put(rsv);
 }
 EXPORT_SYMBOL_GPL(uwb_rsv_destroy);
 
@@ -423,6 +433,7 @@ int uwb_rsv_establish(struct uwb_rsv *rsv)
 		goto out;
 	}
 
+	uwb_rsv_get(rsv);
 	list_add_tail(&rsv->rc_node, &rc->reservations);
 	rsv->owner = &rc->uwb_dev;
 	uwb_dev_get(rsv->owner);
@@ -478,9 +489,14 @@ EXPORT_SYMBOL_GPL(uwb_rsv_terminate);
  *
  * Reservation requests from peers are denied unless a PAL accepts it
  * by calling this function.
+ *
+ * The PAL call uwb_rsv_destroy() for all accepted reservations before
+ * calling uwb_pal_unregister().
  */
 void uwb_rsv_accept(struct uwb_rsv *rsv, uwb_rsv_cb_f cb, void *pal_priv)
 {
+	uwb_rsv_get(rsv);
+
 	rsv->callback = cb;
 	rsv->pal_priv = pal_priv;
 	rsv->state    = UWB_RSV_STATE_T_ACCEPTED;
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index 6d232c35d07d..6db641e45313 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -104,6 +104,11 @@ static void uwb_dbg_rsv_cb(struct uwb_rsv *rsv)
 
 	dev_dbg(dev, "debug: rsv %s -> %s: %s\n",
 		owner, target, uwb_rsv_state_str(rsv->state));
+
+	if (rsv->state == UWB_RSV_STATE_NONE) {
+		list_del(&rsv->pal_node);
+		uwb_rsv_destroy(rsv);
+	}
 }
 
 static int cmd_rsv_establish(struct uwb_rc *rc,
@@ -153,11 +158,11 @@ static int cmd_rsv_terminate(struct uwb_rc *rc,
 			found = rsv;
 			break;
 		}
+		i++;
 	}
 	if (!found)
 		return -EINVAL;
 
-	list_del(&found->pal_node);
 	uwb_rsv_terminate(found);
 
 	return 0;
@@ -287,8 +292,10 @@ static void uwb_dbg_new_rsv(struct uwb_rsv *rsv)
 {
 	struct uwb_rc *rc = rsv->rc;
 
-	if (rc->dbg->accept)
+	if (rc->dbg->accept) {
+		list_add_tail(&rsv->pal_node, &rc->dbg->rsvs);
 		uwb_rsv_accept(rsv, uwb_dbg_rsv_cb, NULL);
+	}
 }
 
 /**
@@ -336,7 +343,7 @@ void uwb_dbg_del_rc(struct uwb_rc *rc)
 		return;
 
 	list_for_each_entry_safe(rsv, t, &rc->dbg->rsvs, pal_node) {
-		uwb_rsv_destroy(rsv);
+		uwb_rsv_terminate(rsv);
 	}
 
 	uwb_pal_unregister(rc, &rc->dbg->pal);
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index f9ccbd9a2ced..010ee708304d 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -201,6 +201,7 @@ struct uwb_rsv {
 	struct uwb_rc *rc;
 	struct list_head rc_node;
 	struct list_head pal_node;
+	struct kref kref;
 
 	struct uwb_dev *owner;
 	struct uwb_rsv_target target;
-- 
cgit v1.2.3


From 4d2bea4ca0adb4cebfbf89d34869c74081c42577 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 27 Oct 2008 15:42:31 +0000
Subject: wusb: do a proper channel stop

When stopping the WUSB channel the host should send Channel Stop IEs giving
the WUSB Channel Time of the last MMC.  Both WHCI and HWA hosts provide a
channel stop command for this.

Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 drivers/usb/host/hwa-hc.c       | 102 +++++++++++++++++++---------------------
 drivers/usb/host/whci/whcd.h    |   2 +-
 drivers/usb/host/whci/whci-hc.h |   2 +
 drivers/usb/host/whci/wusb.c    |  15 ++++--
 drivers/usb/wusbcore/mmc.c      |   8 +---
 drivers/usb/wusbcore/wusbhc.h   |  24 ++++++----
 include/linux/usb/wusb-wa.h     |   1 +
 7 files changed, 80 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/host/hwa-hc.c b/drivers/usb/host/hwa-hc.c
index 64be4d88df11..0e18989e1658 100644
--- a/drivers/usb/host/hwa-hc.c
+++ b/drivers/usb/host/hwa-hc.c
@@ -171,11 +171,6 @@ static int hwahc_op_start(struct usb_hcd *usb_hcd)
 	if (result < 0)
 		goto error_set_cluster_id;
 
-	result = wa_nep_arm(&hwahc->wa, GFP_KERNEL);
-	if (result < 0) {
-		dev_err(dev, "cannot listen to notifications: %d\n", result);
-		goto error_stop;
-	}
 	usb_hcd->uses_new_polling = 1;
 	usb_hcd->poll_rh = 1;
 	usb_hcd->state = HC_STATE_RUNNING;
@@ -185,8 +180,6 @@ out:
 	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
 	return result;
 
-error_stop:
-	__wa_stop(&hwahc->wa);
 error_set_cluster_id:
 	wusb_cluster_id_put(wusbhc->cluster_id);
 error_cluster_id_get:
@@ -194,39 +187,6 @@ error_cluster_id_get:
 
 }
 
-/*
- * FIXME: break this function up
- */
-static int __hwahc_op_wusbhc_start(struct wusbhc *wusbhc)
-{
-	int result;
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct device *dev = &hwahc->wa.usb_iface->dev;
-
-	/* Set up a Host Info WUSB Information Element */
-	d_fnstart(4, dev, "(hwahc %p)\n", hwahc);
-	result = -ENOSPC;
-
-	result = __wa_set_feature(&hwahc->wa, WA_ENABLE);
-	if (result < 0) {
-		dev_err(dev, "error commanding HC to start: %d\n", result);
-		goto error_stop;
-	}
-	result = __wa_wait_status(&hwahc->wa, WA_ENABLE, WA_ENABLE);
-	if (result < 0) {
-		dev_err(dev, "error waiting for HC to start: %d\n", result);
-		goto error_stop;
-	}
-	result = 0;
-out:
-	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
-	return result;
-
-error_stop:
-	result = __wa_clear_feature(&hwahc->wa, WA_ENABLE);
-	goto out;
-}
-
 static int hwahc_op_suspend(struct usb_hcd *usb_hcd, pm_message_t msg)
 {
 	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
@@ -246,18 +206,6 @@ static int hwahc_op_resume(struct usb_hcd *usb_hcd)
 	return -ENOSYS;
 }
 
-static void __hwahc_op_wusbhc_stop(struct wusbhc *wusbhc)
-{
-	int result;
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct device *dev = &hwahc->wa.usb_iface->dev;
-
-	d_fnstart(4, dev, "(hwahc %p)\n", hwahc);
-	/* Nothing for now */
-	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
-	return;
-}
-
 /*
  * No need to abort pipes, as when this is called, all the children
  * has been disconnected and that has done it [through
@@ -275,8 +223,6 @@ static void hwahc_op_stop(struct usb_hcd *usb_hcd)
 	d_fnstart(4, dev, "(hwahc %p)\n", hwahc);
 	mutex_lock(&wusbhc->mutex);
 	wusbhc_stop(wusbhc);
-	wa_nep_disarm(&hwahc->wa);
-	result = __wa_stop(&hwahc->wa);
 	wusb_cluster_id_put(wusbhc->cluster_id);
 	mutex_unlock(&wusbhc->mutex);
 	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
@@ -325,6 +271,54 @@ static void hwahc_op_endpoint_disable(struct usb_hcd *usb_hcd,
 	rpipe_ep_disable(&hwahc->wa, ep);
 }
 
+static int __hwahc_op_wusbhc_start(struct wusbhc *wusbhc)
+{
+	int result;
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	struct device *dev = &hwahc->wa.usb_iface->dev;
+
+	result = __wa_set_feature(&hwahc->wa, WA_ENABLE);
+	if (result < 0) {
+		dev_err(dev, "error commanding HC to start: %d\n", result);
+		goto error_stop;
+	}
+	result = __wa_wait_status(&hwahc->wa, WA_ENABLE, WA_ENABLE);
+	if (result < 0) {
+		dev_err(dev, "error waiting for HC to start: %d\n", result);
+		goto error_stop;
+	}
+	result = wa_nep_arm(&hwahc->wa, GFP_KERNEL);
+	if (result < 0) {
+		dev_err(dev, "cannot listen to notifications: %d\n", result);
+		goto error_stop;
+	}
+	return result;
+
+error_stop:
+	__wa_clear_feature(&hwahc->wa, WA_ENABLE);
+	return result;
+}
+
+static void __hwahc_op_wusbhc_stop(struct wusbhc *wusbhc, int delay)
+{
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	struct wahc *wa = &hwahc->wa;
+	u8 iface_no = wa->usb_iface->cur_altsetting->desc.bInterfaceNumber;
+	int ret;
+
+	ret = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+			      WUSB_REQ_CHAN_STOP,
+			      USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+			      delay * 1000,
+			      iface_no,
+			      NULL, 0, 1000 /* FIXME: arbitrary */);
+	if (ret == 0)
+		msleep(delay);
+
+	wa_nep_disarm(&hwahc->wa);
+	__wa_stop(&hwahc->wa);
+}
+
 /*
  * Set the UWB MAS allocation for the WUSB cluster
  *
diff --git a/drivers/usb/host/whci/whcd.h b/drivers/usb/host/whci/whcd.h
index 1d2a53bd39fd..1bbb8cb6bf80 100644
--- a/drivers/usb/host/whci/whcd.h
+++ b/drivers/usb/host/whci/whcd.h
@@ -136,7 +136,7 @@ int whc_do_gencmd(struct whc *whc, u32 cmd, u32 params, void *addr, size_t len);
 
 /* wusb.c */
 int whc_wusbhc_start(struct wusbhc *wusbhc);
-void whc_wusbhc_stop(struct wusbhc *wusbhc);
+void whc_wusbhc_stop(struct wusbhc *wusbhc, int delay);
 int whc_mmcie_add(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt,
 		  u8 handle, struct wuie_hdr *wuie);
 int whc_mmcie_rm(struct wusbhc *wusbhc, u8 handle);
diff --git a/drivers/usb/host/whci/whci-hc.h b/drivers/usb/host/whci/whci-hc.h
index bff1eb7a35cf..51df7e313b38 100644
--- a/drivers/usb/host/whci/whci-hc.h
+++ b/drivers/usb/host/whci/whci-hc.h
@@ -410,6 +410,8 @@ struct dn_buf_entry {
 #  define WUSBDNTSCTRL_SLOTS(s)    ((s) << 0)
 
 #define WUSBTIME             0x68
+#  define WUSBTIME_CHANNEL_TIME_MASK 0x00ffffff
+
 #define WUSBBPST             0x6c
 #define WUSBDIBUPDATED       0x70
 
diff --git a/drivers/usb/host/whci/wusb.c b/drivers/usb/host/whci/wusb.c
index 66e4ddcd961d..2befd475def4 100644
--- a/drivers/usb/host/whci/wusb.c
+++ b/drivers/usb/host/whci/wusb.c
@@ -64,8 +64,9 @@ static int whc_update_di(struct whc *whc, int idx)
 }
 
 /*
- * WHCI starts and stops MMCs based on there being a valid GTK so
- * these need only start/stop the asynchronous and periodic schedules.
+ * WHCI starts MMCs based on there being a valid GTK so these need
+ * only start/stop the asynchronous and periodic schedules and send a
+ * channel stop command.
  */
 
 int whc_wusbhc_start(struct wusbhc *wusbhc)
@@ -78,12 +79,20 @@ int whc_wusbhc_start(struct wusbhc *wusbhc)
 	return 0;
 }
 
-void whc_wusbhc_stop(struct wusbhc *wusbhc)
+void whc_wusbhc_stop(struct wusbhc *wusbhc, int delay)
 {
 	struct whc *whc = wusbhc_to_whc(wusbhc);
+	u32 stop_time, now_time;
+	int ret;
 
 	pzl_stop(whc);
 	asl_stop(whc);
+
+	now_time = le_readl(whc->base + WUSBTIME) & WUSBTIME_CHANNEL_TIME_MASK;
+	stop_time = (now_time + ((delay * 8) << 7)) & 0x00ffffff;
+	ret = whc_do_gencmd(whc, WUSBGENCMDSTS_CHAN_STOP, stop_time, NULL, 0);
+	if (ret == 0)
+		msleep(delay);
 }
 
 int whc_mmcie_add(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt,
diff --git a/drivers/usb/wusbcore/mmc.c b/drivers/usb/wusbcore/mmc.c
index cfa77a01cebd..af2aee0fdffa 100644
--- a/drivers/usb/wusbcore/mmc.c
+++ b/drivers/usb/wusbcore/mmc.c
@@ -250,18 +250,14 @@ error_alloc:
  * wusbhc_stop - stop transmitting MMCs
  * @wusbhc: the HC to stop
  *
- * Send a Host Disconnect IE, wait, remove all the MMCs (stop sending MMCs).
- *
- * If we can't allocate a Host Stop IE, screw it, we don't notify the
- * devices we are disconnecting...
+ * Stops the WUSB channel and removes the cluster reservation.
  */
 void wusbhc_stop(struct wusbhc *wusbhc)
 {
 	if (wusbhc->active) {
 		wusbhc->active = 0;
-		wusbhc->stop(wusbhc);
+		wusbhc->stop(wusbhc, WUSB_CHANNEL_STOP_DELAY_MS);
 		wusbhc_sec_stop(wusbhc);
-		__wusbhc_host_disconnect_ie(wusbhc);
 		wusbhc_devconnect_stop(wusbhc);
 		wusbhc_rsv_terminate(wusbhc);
 	}
diff --git a/drivers/usb/wusbcore/wusbhc.h b/drivers/usb/wusbcore/wusbhc.h
index d0c132434f1b..b9bdf5a5f11b 100644
--- a/drivers/usb/wusbcore/wusbhc.h
+++ b/drivers/usb/wusbcore/wusbhc.h
@@ -64,6 +64,13 @@
 #include <linux/uwb.h>
 #include <linux/usb/wusb.h>
 
+/*
+ * Time from a WUSB channel stop request to the last transmitted MMC.
+ *
+ * This needs to be > 4.096 ms in case no MMCs can be transmitted in
+ * zone 0.
+ */
+#define WUSB_CHANNEL_STOP_DELAY_MS 8
 
 /**
  * Wireless USB device
@@ -198,21 +205,18 @@ struct wusb_port {
  * @mmcies_max	   Max number of Information Elements this HC can send
  *                 in its MMC. Read-only.
  *
+ * @start          Start the WUSB channel.
+ *
+ * @stop           Stop the WUSB channel after the specified number of
+ *                 milliseconds.  Channel Stop IEs should be transmitted
+ *                 as required by [WUSB] 4.16.2.1.
+ *
  * @mmcie_add	   HC specific operation (WHCI or HWA) for adding an
  *                 MMCIE.
  *
  * @mmcie_rm	   HC specific operation (WHCI or HWA) for removing an
  *                 MMCIE.
  *
- * @enc_types	   Array which describes the encryptions methods
- *                 supported by the host as described in WUSB1.0 --
- *                 one entry per supported method. As of WUSB1.0 there
- *                 is only four methods, we make space for eight just in
- *                 case they decide to add some more (and pray they do
- *                 it in sequential order). if 'enc_types[enc_method]
- *                 != 0', then it is supported by the host. enc_method
- *                 is USB_ENC_TYPE*.
- *
  * @set_ptk:       Set the PTK and enable encryption for a device. Or, if
  *                 the supplied key is NULL, disable encryption for that
  *                 device.
@@ -269,7 +273,7 @@ struct wusbhc {
 	u8 mmcies_max;
 	/* FIXME: make wusbhc_ops? */
 	int (*start)(struct wusbhc *wusbhc);
-	void (*stop)(struct wusbhc *wusbhc);
+	void (*stop)(struct wusbhc *wusbhc, int delay);
 	int (*mmcie_add)(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt,
 			 u8 handle, struct wuie_hdr *wuie);
 	int (*mmcie_rm)(struct wusbhc *wusbhc, u8 handle);
diff --git a/include/linux/usb/wusb-wa.h b/include/linux/usb/wusb-wa.h
index a102561e7026..fb7c359bdfba 100644
--- a/include/linux/usb/wusb-wa.h
+++ b/include/linux/usb/wusb-wa.h
@@ -51,6 +51,7 @@ enum {
 	WUSB_REQ_GET_TIME       = 25,
 	WUSB_REQ_SET_STREAM_IDX = 26,
 	WUSB_REQ_SET_WUSB_MAS   = 27,
+	WUSB_REQ_CHAN_STOP      = 28,
 };
 
 
-- 
cgit v1.2.3


From 1cde7f68ced8d10a20dd2370e9d1d22ab3c1ea5c Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 27 Oct 2008 16:48:09 +0000
Subject: uwb: order IEs by element ID

ECMA-368 requires that IEs in a beacon must be sorted by element ID.  Most
hardware uses the ordering in the Set IE URC command so get the ordering
right on the host.

Also refactor the IE management code:
  - use uwb_ie_next() instead of uwb_ie_for_each().
  - remove unnecessary functions.
  - API is now only uwb_rc_ie_add() and uwb_rc_ie_rm().

Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 drivers/uwb/beacon.c           |  24 +--
 drivers/uwb/ie.c               | 463 +++++++++++++----------------------------
 drivers/uwb/lc-rc.c            |  25 ---
 drivers/uwb/uwb-internal.h     |  16 +-
 drivers/uwb/wlp/wlp-internal.h |   4 -
 include/linux/uwb.h            |  18 +-
 6 files changed, 172 insertions(+), 378 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/uwb/beacon.c b/drivers/uwb/beacon.c
index 46b18eec5026..ad823987cede 100644
--- a/drivers/uwb/beacon.c
+++ b/drivers/uwb/beacon.c
@@ -349,22 +349,22 @@ ssize_t uwb_bce_print_IEs(struct uwb_dev *uwb_dev, struct uwb_beca_e *bce,
 	ssize_t result = 0;
 	struct uwb_rc_evt_beacon *be;
 	struct uwb_beacon_frame *bf;
-	struct uwb_buf_ctx ctx = {
-		.buf = buf,
-		.bytes = 0,
-		.size = size
-	};
+	int ies_len;
+	struct uwb_ie_hdr *ies;
 
 	mutex_lock(&bce->mutex);
+
 	be = bce->be;
-	if (be == NULL)
-		goto out;
-	bf = (void *) be->BeaconInfo;
-	uwb_ie_for_each(uwb_dev, uwb_ie_dump_hex, &ctx,
-			bf->IEData, be->wBeaconInfoLength - sizeof(*bf));
-	result = ctx.bytes;
-out:
+	if (be) {
+		bf = (struct uwb_beacon_frame *)bce->be->BeaconInfo;
+		ies_len = be->wBeaconInfoLength - sizeof(struct uwb_beacon_frame);
+		ies = (struct uwb_ie_hdr *)bf->IEData;
+
+		result = uwb_ie_dump_hex(ies, ies_len, buf, size);
+	}
+
 	mutex_unlock(&bce->mutex);
+
 	return result;
 }
 
diff --git a/drivers/uwb/ie.c b/drivers/uwb/ie.c
index cf6f3d152b9d..ab976686175b 100644
--- a/drivers/uwb/ie.c
+++ b/drivers/uwb/ie.c
@@ -25,8 +25,6 @@
  */
 
 #include "uwb-internal.h"
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 /**
  * uwb_ie_next - get the next IE in a buffer
@@ -60,6 +58,42 @@ struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len)
 }
 EXPORT_SYMBOL_GPL(uwb_ie_next);
 
+/**
+ * uwb_ie_dump_hex - print IEs to a character buffer
+ * @ies: the IEs to print.
+ * @len: length of all the IEs.
+ * @buf: the destination buffer.
+ * @size: size of @buf.
+ *
+ * Returns the number of characters written.
+ */
+int uwb_ie_dump_hex(const struct uwb_ie_hdr *ies, size_t len,
+		    char *buf, size_t size)
+{
+	void *ptr;
+	const struct uwb_ie_hdr *ie;
+	int r = 0;
+	u8 *d;
+
+	ptr = (void *)ies;
+	for (;;) {
+		ie = uwb_ie_next(&ptr, &len);
+		if (!ie)
+			break;
+
+		r += scnprintf(buf + r, size - r, "%02x %02x",
+			       (unsigned)ie->element_id,
+			       (unsigned)ie->length);
+		d = (uint8_t *)ie + sizeof(struct uwb_ie_hdr);
+		while (d != ptr && r < size)
+			r += scnprintf(buf + r, size - r, " %02x", (unsigned)*d++);
+		if (r < size)
+			buf[r++] = '\n';
+	};
+
+	return r;
+}
+
 /**
  * Get the IEs that a radio controller is sending in its beacon
  *
@@ -70,6 +104,7 @@ EXPORT_SYMBOL_GPL(uwb_ie_next);
  * anything. Once done with the iedata buffer, call
  * uwb_rc_ie_release(iedata). Don't call kfree on it.
  */
+static
 ssize_t uwb_rc_get_ie(struct uwb_rc *uwb_rc, struct uwb_rc_evt_get_ie **pget_ie)
 {
 	ssize_t result;
@@ -78,148 +113,35 @@ ssize_t uwb_rc_get_ie(struct uwb_rc *uwb_rc, struct uwb_rc_evt_get_ie **pget_ie)
 	struct uwb_rceb *reply = NULL;
 	struct uwb_rc_evt_get_ie *get_ie;
 
-	d_fnstart(3, dev, "(%p, %p)\n", uwb_rc, pget_ie);
-	result = -ENOMEM;
 	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
 	if (cmd == NULL)
-		goto error_kzalloc;
+		return -ENOMEM;
+
 	cmd->bCommandType = UWB_RC_CET_GENERAL;
 	cmd->wCommand = cpu_to_le16(UWB_RC_CMD_GET_IE);
 	result = uwb_rc_vcmd(uwb_rc, "GET_IE", cmd, sizeof(*cmd),
 			     UWB_RC_CET_GENERAL, UWB_RC_CMD_GET_IE,
 			     &reply);
+	kfree(cmd);
 	if (result < 0)
-		goto error_cmd;
+		return result;
+
 	get_ie = container_of(reply, struct uwb_rc_evt_get_ie, rceb);
 	if (result < sizeof(*get_ie)) {
 		dev_err(dev, "not enough data returned for decoding GET IE "
 			"(%zu bytes received vs %zu needed)\n",
 			result, sizeof(*get_ie));
-		result = -EINVAL;
+		return -EINVAL;
 	} else if (result < sizeof(*get_ie) + le16_to_cpu(get_ie->wIELength)) {
 		dev_err(dev, "not enough data returned for decoding GET IE "
 			"payload (%zu bytes received vs %zu needed)\n", result,
 			sizeof(*get_ie) + le16_to_cpu(get_ie->wIELength));
-		result = -EINVAL;
-	} else
-		*pget_ie = get_ie;
-error_cmd:
-	kfree(cmd);
-error_kzalloc:
-	d_fnend(3, dev, "(%p, %p) = %d\n", uwb_rc, pget_ie, (int)result);
-	return result;
-}
-EXPORT_SYMBOL_GPL(uwb_rc_get_ie);
-
-
-/*
- * Given a pointer to an IE, print it in ASCII/hex followed by a new line
- *
- * @ie_hdr: pointer to the IE header. Length is in there, and it is
- *          guaranteed that the ie_hdr->length bytes following it are
- *          safely accesible.
- *
- * @_data: context data passed from uwb_ie_for_each(), an struct output_ctx
- */
-int uwb_ie_dump_hex(struct uwb_dev *uwb_dev, const struct uwb_ie_hdr *ie_hdr,
-		    size_t offset, void *_ctx)
-{
-	struct uwb_buf_ctx *ctx = _ctx;
-	const u8 *pl = (void *)(ie_hdr + 1);
-	u8 pl_itr;
-
-	ctx->bytes += scnprintf(ctx->buf + ctx->bytes, ctx->size - ctx->bytes,
-				"%02x %02x ", (unsigned) ie_hdr->element_id,
-				(unsigned) ie_hdr->length);
-	pl_itr = 0;
-	while (pl_itr < ie_hdr->length && ctx->bytes < ctx->size)
-		ctx->bytes += scnprintf(ctx->buf + ctx->bytes,
-					ctx->size - ctx->bytes,
-					"%02x ", (unsigned) pl[pl_itr++]);
-	if (ctx->bytes < ctx->size)
-		ctx->buf[ctx->bytes++] = '\n';
-	return 0;
-}
-EXPORT_SYMBOL_GPL(uwb_ie_dump_hex);
-
-
-/**
- * Verify that a pointer in a buffer points to valid IE
- *
- * @start: pointer to start of buffer in which IE appears
- * @itr:   pointer to IE inside buffer that will be verified
- * @top:   pointer to end of buffer
- *
- * @returns: 0 if IE is valid, <0 otherwise
- *
- * Verification involves checking that the buffer can contain a
- * header and the amount of data reported in the IE header can be found in
- * the buffer.
- */
-static
-int uwb_rc_ie_verify(struct uwb_dev *uwb_dev, const void *start,
-		     const void *itr, const void *top)
-{
-	struct device *dev = &uwb_dev->dev;
-	const struct uwb_ie_hdr *ie_hdr;
-
-	if (top - itr < sizeof(*ie_hdr)) {
-		dev_err(dev, "Bad IE: no data to decode header "
-			"(%zu bytes left vs %zu needed) at offset %zu\n",
-			top - itr, sizeof(*ie_hdr), itr - start);
-		return -EINVAL;
-	}
-	ie_hdr = itr;
-	itr += sizeof(*ie_hdr);
-	if (top - itr < ie_hdr->length) {
-		dev_err(dev, "Bad IE: not enough data for payload "
-			"(%zu bytes left vs %zu needed) at offset %zu\n",
-			top - itr, (size_t)ie_hdr->length,
-			(void *)ie_hdr - start);
 		return -EINVAL;
 	}
-	return 0;
-}
 
-
-/**
- * Walk a buffer filled with consecutive IE's a buffer
- *
- * @uwb_dev: UWB device this IEs belong to (for err messages mainly)
- *
- * @fn: function to call with each IE; if it returns 0, we keep
- *      traversing the buffer. If it returns !0, we'll stop and return
- *      that value.
- *
- * @data: pointer passed to @fn
- *
- * @buf: buffer where the consecutive IEs are located
- *
- * @size: size of @buf
- *
- * Each IE is checked for basic correctness (there is space left for
- * the header and the payload). If that test is failed, we stop
- * processing. For every good IE, @fn is called.
- */
-ssize_t uwb_ie_for_each(struct uwb_dev *uwb_dev, uwb_ie_f fn, void *data,
-			const void *buf, size_t size)
-{
-	ssize_t result = 0;
-	const struct uwb_ie_hdr *ie_hdr;
-	const void *itr = buf, *top = itr + size;
-
-	while (itr < top) {
-		if (uwb_rc_ie_verify(uwb_dev, buf, itr, top) != 0)
-			break;
-		ie_hdr = itr;
-		itr += sizeof(*ie_hdr) + ie_hdr->length;
-		result = fn(uwb_dev, ie_hdr, itr - buf, data);
-		if (result != 0)
-			break;
-	}
+	*pget_ie = get_ie;
 	return result;
 }
-EXPORT_SYMBOL_GPL(uwb_ie_for_each);
 
 
 /**
@@ -256,70 +178,6 @@ error_cmd:
 	return result;
 }
 
-/**
- * Determine by IE id if IE is host settable
- * WUSB 1.0 [8.6.2.8 Table 8.85]
- *
- * EXCEPTION:
- * All but UWB_IE_WLP appears in Table 8.85 from WUSB 1.0. Setting this IE
- * is required for the WLP substack to perform association with its WSS so
- * we hope that the WUSB spec will be changed to reflect this.
- */
-static
-int uwb_rc_ie_is_host_settable(enum uwb_ie element_id)
-{
-	if (element_id == UWB_PCA_AVAILABILITY ||
-	    element_id == UWB_BP_SWITCH_IE ||
-	    element_id == UWB_MAC_CAPABILITIES_IE ||
-	    element_id == UWB_PHY_CAPABILITIES_IE ||
-	    element_id == UWB_APP_SPEC_PROBE_IE ||
-	    element_id == UWB_IDENTIFICATION_IE ||
-	    element_id == UWB_MASTER_KEY_ID_IE ||
-	    element_id == UWB_IE_WLP ||
-	    element_id == UWB_APP_SPEC_IE)
-		return 1;
-	return 0;
-}
-
-
-/**
- * Extract Host Settable IEs from IE
- *
- * @ie_data: pointer to buffer containing all IEs
- * @size:    size of buffer
- *
- * @returns: length of buffer that only includes host settable IEs
- *
- * Given a buffer of IEs we move all Host Settable IEs to front of buffer
- * by overwriting the IEs that are not Host Settable.
- * Buffer length is adjusted accordingly.
- */
-static
-ssize_t uwb_rc_parse_host_settable_ie(struct uwb_dev *uwb_dev,
-				      void *ie_data, size_t size)
-{
-	size_t new_len = size;
-	struct uwb_ie_hdr *ie_hdr;
-	size_t ie_length;
-	void *itr = ie_data, *top = itr + size;
-
-	while (itr < top) {
-		if (uwb_rc_ie_verify(uwb_dev, ie_data, itr, top) != 0)
-			break;
-		ie_hdr = itr;
-		ie_length = sizeof(*ie_hdr) + ie_hdr->length;
-		if (uwb_rc_ie_is_host_settable(ie_hdr->element_id)) {
-			itr += ie_length;
-		} else {
-			memmove(itr, itr + ie_length, top - (itr + ie_length));
-			new_len -= ie_length;
-			top -= ie_length;
-		}
-	}
-	return new_len;
-}
-
-
 /* Cleanup the whole IE management subsystem */
 void uwb_rc_ie_init(struct uwb_rc *uwb_rc)
 {
@@ -328,49 +186,34 @@ void uwb_rc_ie_init(struct uwb_rc *uwb_rc)
 
 
 /**
- * Set up cache for host settable IEs currently being transmitted
+ * uwb_rc_ie_setup - setup a radio controller's IE manager
+ * @uwb_rc: the radio controller.
  *
- * First we just call GET-IE to get the current IEs being transmitted
- * (or we workaround and pretend we did) and (because the format is
- * the same) reuse that as the IE cache (with the command prefix, as
- * explained in 'struct uwb_rc').
+ * The current set of IEs are obtained from the hardware with a GET-IE
+ * command (since the radio controller is not yet beaconing this will
+ * be just the hardware's MAC and PHY Capability IEs).
  *
- * @returns: size of cache created
+ * Returns 0 on success; -ve on an error.
  */
-ssize_t uwb_rc_ie_setup(struct uwb_rc *uwb_rc)
+int uwb_rc_ie_setup(struct uwb_rc *uwb_rc)
 {
-	struct device *dev = &uwb_rc->uwb_dev.dev;
-	ssize_t result;
-	size_t capacity;
-	struct uwb_rc_evt_get_ie *ie_info;
+	struct uwb_rc_evt_get_ie *ie_info = NULL;
+	int capacity;
+
+	capacity = uwb_rc_get_ie(uwb_rc, &ie_info);
+	if (capacity < 0)
+		return capacity;
 
-	d_fnstart(3, dev, "(%p)\n", uwb_rc);
 	mutex_lock(&uwb_rc->ies_mutex);
-	result = uwb_rc_get_ie(uwb_rc, &ie_info);
-	if (result < 0)
-		goto error_get_ie;
-	capacity = result;
-	d_printf(5, dev, "Got IEs %zu bytes (%zu long at %p)\n", result,
-		 (size_t)le16_to_cpu(ie_info->wIELength), ie_info);
-
-	/* Remove IEs that host should not set. */
-	result = uwb_rc_parse_host_settable_ie(&uwb_rc->uwb_dev,
-			ie_info->IEData, le16_to_cpu(ie_info->wIELength));
-	if (result < 0)
-		goto error_parse;
-	d_printf(5, dev, "purged non-settable IEs to %zu bytes\n", result);
-	uwb_rc->ies = (void *) ie_info;
+
+	uwb_rc->ies = (struct uwb_rc_cmd_set_ie *)ie_info;
 	uwb_rc->ies->rccb.bCommandType = UWB_RC_CET_GENERAL;
 	uwb_rc->ies->rccb.wCommand = cpu_to_le16(UWB_RC_CMD_SET_IE);
 	uwb_rc->ies_capacity = capacity;
-	d_printf(5, dev, "IE cache at %p %zu bytes, %zu capacity\n",
-		 ie_info, result, capacity);
-	result = 0;
-error_parse:
-error_get_ie:
+
 	mutex_unlock(&uwb_rc->ies_mutex);
-	d_fnend(3, dev, "(%p) = %zu\n", uwb_rc, result);
-	return result;
+
+	return 0;
 }
 
 
@@ -383,26 +226,47 @@ void uwb_rc_ie_release(struct uwb_rc *uwb_rc)
 }
 
 
-static
-int __acc_size(struct uwb_dev *uwb_dev, const struct uwb_ie_hdr *ie_hdr,
-	       size_t offset, void *_ctx)
+static int uwb_rc_ie_add_one(struct uwb_rc *rc, const struct uwb_ie_hdr *new_ie)
 {
-	size_t *acc_size = _ctx;
-	*acc_size += sizeof(*ie_hdr) + ie_hdr->length;
-	d_printf(6, &uwb_dev->dev, "new acc size %zu\n", *acc_size);
+	struct uwb_rc_cmd_set_ie *new_ies;
+	void *ptr, *prev_ie;
+	struct uwb_ie_hdr *ie;
+	size_t length, new_ie_len, new_capacity, size, prev_size;
+
+	length = le16_to_cpu(rc->ies->wIELength);
+	new_ie_len = sizeof(struct uwb_ie_hdr) + new_ie->length;
+	new_capacity = sizeof(struct uwb_rc_cmd_set_ie) + length + new_ie_len;
+
+	if (new_capacity > rc->ies_capacity) {
+		new_ies = krealloc(rc->ies, new_capacity, GFP_KERNEL);
+		if (!new_ies)
+			return -ENOMEM;
+		rc->ies = new_ies;
+	}
+
+	ptr = rc->ies->IEData;
+	size = length;
+	for (;;) {
+		prev_ie = ptr;
+		prev_size = size;
+		ie = uwb_ie_next(&ptr, &size);
+		if (!ie || ie->element_id > new_ie->element_id)
+			break;
+	}
+
+	memmove(prev_ie + new_ie_len, prev_ie, prev_size);
+	memcpy(prev_ie, new_ie, new_ie_len);
+	rc->ies->wIELength = cpu_to_le16(length + new_ie_len);
+
 	return 0;
 }
 
-
 /**
- * Add a new IE to IEs currently being transmitted by device
- *
+ * uwb_rc_ie_add - add new IEs to the radio controller's beacon
+ * @uwb_rc: the radio controller.
  * @ies: the buffer containing the new IE or IEs to be added to
- *       the device's beacon. The buffer will be verified for
- *       consistence (meaning the headers should be right) and
- *       consistent with the buffer size.
- * @size: size of @ies (in bytes, total buffer size)
- * @returns: 0 if ok, <0 errno code on error
+ *       the device's beacon.
+ * @size: length of all the IEs.
  *
  * According to WHCI 0.95 [4.13.6] the driver will only receive the RCEB
  * after the device sent the first beacon that includes the IEs specified
@@ -411,66 +275,40 @@ int __acc_size(struct uwb_dev *uwb_dev, const struct uwb_ie_hdr *ie_hdr,
  * we start beaconing.
  *
  * Setting an IE on the device will overwrite all current IEs in device. So
- * we take the current IEs being transmitted by the device, append the
+ * we take the current IEs being transmitted by the device, insert the
  * new one, and call SET IE with all the IEs needed.
  *
- * The local IE cache will only be updated with the new IE if SET IE
- * completed successfully.
+ * Returns 0 on success; or -ENOMEM.
  */
 int uwb_rc_ie_add(struct uwb_rc *uwb_rc,
 		  const struct uwb_ie_hdr *ies, size_t size)
 {
 	int result = 0;
-	struct device *dev = &uwb_rc->uwb_dev.dev;
-	struct uwb_rc_cmd_set_ie *new_ies;
-	size_t ies_size, total_size, acc_size = 0;
-
-	if (uwb_rc->ies == NULL)
-		return -ESHUTDOWN;
-	uwb_ie_for_each(&uwb_rc->uwb_dev, __acc_size, &acc_size, ies, size);
-	if (acc_size != size) {
-		dev_err(dev, "BUG: bad IEs, misconstructed headers "
-			"[%zu bytes reported vs %zu calculated]\n",
-			size, acc_size);
-		WARN_ON(1);
-		return -EINVAL;
-	}
+	void *ptr;
+	const struct uwb_ie_hdr *ie;
+
 	mutex_lock(&uwb_rc->ies_mutex);
-	ies_size = le16_to_cpu(uwb_rc->ies->wIELength);
-	total_size = sizeof(*uwb_rc->ies) + ies_size;
-	if (total_size + size > uwb_rc->ies_capacity) {
-		d_printf(4, dev, "Reallocating IE cache from %p capacity %zu "
-			 "to capacity %zu\n", uwb_rc->ies, uwb_rc->ies_capacity,
-			 total_size + size);
-		new_ies = kzalloc(total_size + size, GFP_KERNEL);
-		if (new_ies == NULL) {
-			dev_err(dev, "No memory for adding new IE\n");
-			result = -ENOMEM;
-			goto error_alloc;
-		}
-		memcpy(new_ies, uwb_rc->ies, total_size);
-		uwb_rc->ies_capacity = total_size + size;
-		kfree(uwb_rc->ies);
-		uwb_rc->ies = new_ies;
-		d_printf(4, dev, "New IE cache at %p capacity %zu\n",
-			 uwb_rc->ies, uwb_rc->ies_capacity);
+
+	ptr = (void *)ies;
+	for (;;) {
+		ie = uwb_ie_next(&ptr, &size);
+		if (!ie)
+			break;
+
+		result = uwb_rc_ie_add_one(uwb_rc, ie);
+		if (result < 0)
+			break;
 	}
-	memcpy((void *)uwb_rc->ies + total_size, ies, size);
-	uwb_rc->ies->wIELength = cpu_to_le16(ies_size + size);
-	if (uwb_rc->beaconing != -1) {
-		result = uwb_rc_set_ie(uwb_rc, uwb_rc->ies);
-		if (result < 0) {
-			dev_err(dev, "Cannot set new IE on device: %d\n",
-				result);
-			uwb_rc->ies->wIELength = cpu_to_le16(ies_size);
+	if (result >= 0) {
+		if (size == 0) {
+			if (uwb_rc->beaconing != -1)
+				result = uwb_rc_set_ie(uwb_rc, uwb_rc->ies);
 		} else
-			result = 0;
+			result = -EINVAL;
 	}
-	d_printf(4, dev, "IEs now occupy %hu bytes of %zu capacity at %p\n",
-		 le16_to_cpu(uwb_rc->ies->wIELength), uwb_rc->ies_capacity,
-		 uwb_rc->ies);
-error_alloc:
+
 	mutex_unlock(&uwb_rc->ies_mutex);
+
 	return result;
 }
 EXPORT_SYMBOL_GPL(uwb_rc_ie_add);
@@ -489,53 +327,52 @@ EXPORT_SYMBOL_GPL(uwb_rc_ie_add);
  * beacon. We don't reallocate, we just mark the size smaller.
  */
 static
-int uwb_rc_ie_cache_rm(struct uwb_rc *uwb_rc, enum uwb_ie to_remove)
+void uwb_rc_ie_cache_rm(struct uwb_rc *uwb_rc, enum uwb_ie to_remove)
 {
-	struct uwb_ie_hdr *ie_hdr;
-	size_t new_len = le16_to_cpu(uwb_rc->ies->wIELength);
-	void *itr = uwb_rc->ies->IEData;
-	void *top = itr + new_len;
-
-	while (itr < top) {
-		ie_hdr = itr;
-		if (ie_hdr->element_id != to_remove) {
-			itr += sizeof(*ie_hdr) + ie_hdr->length;
-		} else {
-			int ie_length;
-			ie_length = sizeof(*ie_hdr) + ie_hdr->length;
-			if (top - itr != ie_length)
-				memmove(itr, itr + ie_length, top - itr + ie_length);
-			top -= ie_length;
-			new_len -= ie_length;
+	struct uwb_ie_hdr *ie;
+	size_t len = le16_to_cpu(uwb_rc->ies->wIELength);
+	void *ptr;
+	size_t size;
+
+	ptr = uwb_rc->ies->IEData;
+	size = len;
+	for (;;) {
+		ie = uwb_ie_next(&ptr, &size);
+		if (!ie)
+			break;
+		if (ie->element_id == to_remove) {
+			len -= sizeof(struct uwb_ie_hdr) + ie->length;
+			memmove(ie, ptr, size);
+			ptr = ie;
 		}
 	}
-	uwb_rc->ies->wIELength = cpu_to_le16(new_len);
-	return 0;
+	uwb_rc->ies->wIELength = cpu_to_le16(len);
 }
 
 
 /**
- * Remove an IE currently being transmitted by device
+ * uwb_rc_ie_rm - remove an IE from the radio controller's beacon
+ * @uwb_rc: the radio controller.
+ * @element_id: the element ID of the IE to remove.
  *
- * @element_id: id of IE to be removed from device's beacon
+ * Only IEs previously added with uwb_rc_ie_add() may be removed.
+ *
+ * Returns 0 on success; or -ve the SET-IE command to the radio
+ * controller failed.
  */
 int uwb_rc_ie_rm(struct uwb_rc *uwb_rc, enum uwb_ie element_id)
 {
-	struct device *dev = &uwb_rc->uwb_dev.dev;
-	int result;
+	int result = 0;
 
-	if (uwb_rc->ies == NULL)
-		return -ESHUTDOWN;
 	mutex_lock(&uwb_rc->ies_mutex);
-	result = uwb_rc_ie_cache_rm(uwb_rc, element_id);
-	if (result < 0)
-		dev_err(dev, "Cannot remove IE from cache.\n");
-	if (uwb_rc->beaconing != -1) {
+
+	uwb_rc_ie_cache_rm(uwb_rc, element_id);
+
+	if (uwb_rc->beaconing != -1)
 		result = uwb_rc_set_ie(uwb_rc, uwb_rc->ies);
-		if (result < 0)
-			dev_err(dev, "Cannot set new IE on device.\n");
-	}
+
 	mutex_unlock(&uwb_rc->ies_mutex);
+
 	return result;
 }
 EXPORT_SYMBOL_GPL(uwb_rc_ie_rm);
diff --git a/drivers/uwb/lc-rc.c b/drivers/uwb/lc-rc.c
index ee5772f00d42..1129e8767b58 100644
--- a/drivers/uwb/lc-rc.c
+++ b/drivers/uwb/lc-rc.c
@@ -468,28 +468,3 @@ void uwb_rc_put(struct uwb_rc *rc)
 	__uwb_rc_put(rc);
 }
 EXPORT_SYMBOL_GPL(uwb_rc_put);
-
-/*
- *
- *
- */
-ssize_t uwb_rc_print_IEs(struct uwb_rc *uwb_rc, char *buf, size_t size)
-{
-	ssize_t result;
-	struct uwb_rc_evt_get_ie *ie_info;
-	struct uwb_buf_ctx ctx;
-
-	result = uwb_rc_get_ie(uwb_rc, &ie_info);
-	if (result < 0)
-		goto error_get_ie;
-	ctx.buf = buf;
-	ctx.size = size;
-	ctx.bytes = 0;
-	uwb_ie_for_each(&uwb_rc->uwb_dev, uwb_ie_dump_hex, &ctx,
-			ie_info->IEData, result - sizeof(*ie_info));
-	result = ctx.bytes;
-	kfree(ie_info);
-error_get_ie:
-	return result;
-}
-
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
index 2ad307d12961..983ebc4dd8d5 100644
--- a/drivers/uwb/uwb-internal.h
+++ b/drivers/uwb/uwb-internal.h
@@ -66,14 +66,14 @@ extern int uwb_rc_scan(struct uwb_rc *rc,
 		       unsigned channel, enum uwb_scan_type type,
 		       unsigned bpst_offset);
 extern int uwb_rc_send_all_drp_ie(struct uwb_rc *rc);
-extern ssize_t uwb_rc_print_IEs(struct uwb_rc *rc, char *, size_t);
-extern void uwb_rc_ie_init(struct uwb_rc *);
-extern void uwb_rc_ie_init(struct uwb_rc *);
-extern ssize_t uwb_rc_ie_setup(struct uwb_rc *);
-extern void uwb_rc_ie_release(struct uwb_rc *);
-extern int uwb_rc_ie_add(struct uwb_rc *,
-			 const struct uwb_ie_hdr *, size_t);
-extern int uwb_rc_ie_rm(struct uwb_rc *, enum uwb_ie);
+
+void uwb_rc_ie_init(struct uwb_rc *);
+int uwb_rc_ie_setup(struct uwb_rc *);
+void uwb_rc_ie_release(struct uwb_rc *);
+int uwb_ie_dump_hex(const struct uwb_ie_hdr *ies, size_t len,
+		    char *buf, size_t size);
+int uwb_rc_set_ie(struct uwb_rc *, struct uwb_rc_cmd_set_ie *);
+
 
 extern const char *uwb_rc_strerror(unsigned code);
 
diff --git a/drivers/uwb/wlp/wlp-internal.h b/drivers/uwb/wlp/wlp-internal.h
index 1c94fabfb1a7..3e8d5de7c5b9 100644
--- a/drivers/uwb/wlp/wlp-internal.h
+++ b/drivers/uwb/wlp/wlp-internal.h
@@ -42,10 +42,6 @@ enum wlp_wss_connect {
 extern struct kobj_type wss_ktype;
 extern struct attribute_group wss_attr_group;
 
-extern int uwb_rc_ie_add(struct uwb_rc *, const struct uwb_ie_hdr *, size_t);
-extern int uwb_rc_ie_rm(struct uwb_rc *, enum uwb_ie);
-
-
 /* This should be changed to a dynamic array where entries are sorted
  * by eth_addr and search is done in a binary form
  *
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index 010ee708304d..6d93f54b8879 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -444,7 +444,6 @@ ssize_t uwb_rc_vcmd(struct uwb_rc *rc, const char *cmd_name,
 		    struct uwb_rccb *cmd, size_t cmd_size,
 		    u8 expected_type, u16 expected_event,
 		    struct uwb_rceb **preply);
-ssize_t uwb_rc_get_ie(struct uwb_rc *, struct uwb_rc_evt_get_ie **);
 int uwb_bg_joined(struct uwb_rc *rc);
 
 size_t __uwb_addr_print(char *, size_t, const unsigned char *, int);
@@ -653,22 +652,9 @@ static inline int edc_inc(struct edc *err_hist, u16 max_err, u16 timeframe)
 
 /* Information Element handling */
 
-/* For representing the state of writing to a buffer when iterating */
-struct uwb_buf_ctx {
-	char *buf;
-	size_t bytes, size;
-};
-
-typedef int (*uwb_ie_f)(struct uwb_dev *, const struct uwb_ie_hdr *,
-			size_t, void *);
 struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len);
-ssize_t uwb_ie_for_each(struct uwb_dev *uwb_dev, uwb_ie_f fn, void *data,
-			const void *buf, size_t size);
-int uwb_ie_dump_hex(struct uwb_dev *, const struct uwb_ie_hdr *,
-		    size_t, void *);
-int uwb_rc_set_ie(struct uwb_rc *, struct uwb_rc_cmd_set_ie *);
-struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len);
-
+int uwb_rc_ie_add(struct uwb_rc *uwb_rc, const struct uwb_ie_hdr *ies, size_t size);
+int uwb_rc_ie_rm(struct uwb_rc *uwb_rc, enum uwb_ie element_id);
 
 /*
  * Transmission statistics
-- 
cgit v1.2.3


From d98d38f2014ab79f28c126ff175d034891f7aefc Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 29 Oct 2008 14:24:09 -0700
Subject: mutex: improve header comment to be actually informative about the
 API

Impact: improve documentation

It's nice to say that mutex_trylock follows the spin_trylock convention.
It's a lot nicer if the comment also says which that is...  make it so.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mutex.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index bc6da10ceee0..7a0e5c4f8072 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -144,6 +144,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
 /*
  * NOTE: mutex_trylock() follows the spin_trylock() convention,
  *       not the down_trylock() convention!
+ *
+ * Returns 1 if the mutex has been acquired successfully, and 0 on contention.
  */
 extern int mutex_trylock(struct mutex *lock);
 extern void mutex_unlock(struct mutex *lock);
-- 
cgit v1.2.3


From ae9eba0e2744f1aa15cdc97cd39277a84723ae23 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 30 Oct 2008 20:06:16 +0100
Subject: uwb: struct device - replace bus_id with dev_name(), dev_set_name()

Cc: David Vrabel <david.vrabel@csr.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-Off-By: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 drivers/uwb/umc-dev.c     | 3 +--
 drivers/uwb/whci.c        | 2 +-
 include/linux/uwb/debug.h | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/uwb/umc-dev.c b/drivers/uwb/umc-dev.c
index aa44e1c1a102..53207e14cd8f 100644
--- a/drivers/uwb/umc-dev.c
+++ b/drivers/uwb/umc-dev.c
@@ -31,8 +31,7 @@ struct umc_dev *umc_device_create(struct device *parent, int n)
 
 	umc = kzalloc(sizeof(struct umc_dev), GFP_KERNEL);
 	if (umc) {
-		snprintf(umc->dev.bus_id, sizeof(umc->dev.bus_id), "%s-%d",
-			 parent->bus_id, n);
+		dev_set_name(&umc->dev, "%s-%d", dev_name(parent), n);
 		umc->dev.parent  = parent;
 		umc->dev.bus     = &umc_bus_type;
 		umc->dev.release = umc_device_release;
diff --git a/drivers/uwb/whci.c b/drivers/uwb/whci.c
index 3df2388f908f..e626467f95e3 100644
--- a/drivers/uwb/whci.c
+++ b/drivers/uwb/whci.c
@@ -111,7 +111,7 @@ static int whci_add_cap(struct whci_card *card, int n)
 		+ UWBCAPDATA_TO_OFFSET(capdata);
 	umc->resource.end    = umc->resource.start
 		+ (n == 0 ? 0x20 : UWBCAPDATA_TO_SIZE(capdata)) - 1;
-	umc->resource.name   = umc->dev.bus_id;
+	umc->resource.name   = dev_name(&umc->dev);
 	umc->resource.flags  = card->pci->resource[bar].flags;
 	umc->resource.parent = &card->pci->resource[bar];
 	umc->irq             = card->pci->irq;
diff --git a/include/linux/uwb/debug.h b/include/linux/uwb/debug.h
index a86a73fe303f..67a240527145 100644
--- a/include/linux/uwb/debug.h
+++ b/include/linux/uwb/debug.h
@@ -60,7 +60,7 @@ do {									\
 				snprintf(__head, sizeof(__head),	\
 					 "%s %s: ",			\
 					 dev_driver_string(__dev),	\
-					 __dev->bus_id);		\
+					 dev_name(__dev));		\
 		}							\
 		printk(KERN_ERR "%s%s" _tag ": " f, __head,		\
 			__func__, ## a);				\
-- 
cgit v1.2.3


From 29cbda77a67cf263d636feea65d3bbc9c7de2e24 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 3 Nov 2008 09:16:39 -0800
Subject: rcu: increase RCU stall-check timeouts

Impact: increase timeout of debug check feature

Increase RCU stall period timeouts to reduce the likelyhood of
false positives.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcuclassic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 5f89b62e6983..301dda829e37 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -41,7 +41,7 @@
 #include <linux/seqlock.h>
 
 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-#define RCU_SECONDS_TILL_STALL_CHECK	( 3 * HZ) /* for rcp->jiffies_stall */
+#define RCU_SECONDS_TILL_STALL_CHECK	(10 * HZ) /* for rcp->jiffies_stall */
 #define RCU_SECONDS_TILL_STALL_RECHECK	(30 * HZ) /* for rcp->jiffies_stall */
 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
-- 
cgit v1.2.3


From c5995bd2819dc577d0b32b26be0836d16c977e24 Mon Sep 17 00:00:00 2001
From: Stefano Panella <stefano.panella@csr.com>
Date: Tue, 4 Nov 2008 14:06:31 +0000
Subject: uwb: infrastructure for handling Relinquish Request IEs

The structures and event handler needed to handle Relinish Request IEs
received from neighbors.  Nothing is done with these IEs yet.

Signed-off-by: Stefano Panella <stefano.panella@csr.com>
Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 drivers/uwb/Makefile       |  1 +
 drivers/uwb/ie-rcv.c       | 55 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/uwb/uwb-internal.h |  1 +
 drivers/uwb/uwbd.c         |  4 ++++
 include/linux/uwb/spec.h   | 28 +++++++++++++++++++++++
 5 files changed, 89 insertions(+)
 create mode 100644 drivers/uwb/ie-rcv.c

(limited to 'include/linux')

diff --git a/drivers/uwb/Makefile b/drivers/uwb/Makefile
index 257e6908304c..2b99c3e61671 100644
--- a/drivers/uwb/Makefile
+++ b/drivers/uwb/Makefile
@@ -13,6 +13,7 @@ uwb-objs :=		\
 	drp-ie.o	\
 	est.o		\
 	ie.o		\
+	ie-rcv.o	\
 	lc-dev.o	\
 	lc-rc.o		\
 	neh.o		\
diff --git a/drivers/uwb/ie-rcv.c b/drivers/uwb/ie-rcv.c
new file mode 100644
index 000000000000..917e6d78a798
--- /dev/null
+++ b/drivers/uwb/ie-rcv.c
@@ -0,0 +1,55 @@
+/*
+ * Ultra Wide Band
+ * IE Received notification handling.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/bitmap.h>
+#include "uwb-internal.h"
+
+/*
+ * Process an incoming IE Received notification.
+ */
+int uwbd_evt_handle_rc_ie_rcv(struct uwb_event *evt)
+{
+	int result = -EINVAL;
+	struct device *dev = &evt->rc->uwb_dev.dev;
+	struct uwb_rc_evt_ie_rcv *iercv;
+	size_t iesize;
+
+	/* Is there enough data to decode it? */
+	if (evt->notif.size < sizeof(*iercv)) {
+		dev_err(dev, "IE Received notification: Not enough data to "
+			"decode (%zu vs %zu bytes needed)\n",
+			evt->notif.size, sizeof(*iercv));
+		goto error;
+	}
+	iercv = container_of(evt->notif.rceb, struct uwb_rc_evt_ie_rcv, rceb);
+	iesize = le16_to_cpu(iercv->wIELength);
+
+	dev_dbg(dev, "IE received, element ID=%d\n", iercv->IEData[0]);
+
+	if (iercv->IEData[0] == UWB_RELINQUISH_REQUEST_IE) {
+		dev_warn(dev, "unhandled Relinquish Request IE\n");
+	}
+
+	return 0;
+error:
+	return result;
+}
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
index 983ebc4dd8d5..031e8a885681 100644
--- a/drivers/uwb/uwb-internal.h
+++ b/drivers/uwb/uwb-internal.h
@@ -167,6 +167,7 @@ extern void uwbd_event_queue(struct uwb_event *);
 void uwbd_flush(struct uwb_rc *rc);
 
 /* UWB event handlers */
+extern int uwbd_evt_handle_rc_ie_rcv(struct uwb_event *);
 extern int uwbd_evt_handle_rc_beacon(struct uwb_event *);
 extern int uwbd_evt_handle_rc_beacon_size(struct uwb_event *);
 extern int uwbd_evt_handle_rc_bpoie_change(struct uwb_event *);
diff --git a/drivers/uwb/uwbd.c b/drivers/uwb/uwbd.c
index 78908416e42c..f75113571f4a 100644
--- a/drivers/uwb/uwbd.c
+++ b/drivers/uwb/uwbd.c
@@ -104,6 +104,10 @@ struct uwbd_event {
 /** Table of handlers for and properties of the UWBD Radio Control Events */
 static
 struct uwbd_event uwbd_events[] = {
+	[UWB_RC_EVT_IE_RCV] = {
+		.handler = uwbd_evt_handle_rc_ie_rcv,
+		.name = "IE_RECEIVED"
+	},
 	[UWB_RC_EVT_BEACON] = {
 		.handler = uwbd_evt_handle_rc_beacon,
 		.name = "BEACON_RECEIVED"
diff --git a/include/linux/uwb/spec.h b/include/linux/uwb/spec.h
index 198c15f8e251..a30436ea53aa 100644
--- a/include/linux/uwb/spec.h
+++ b/include/linux/uwb/spec.h
@@ -200,6 +200,12 @@ enum uwb_drp_reason {
 	UWB_DRP_REASON_MODIFIED,
 };
 
+/** Relinquish Request Reason Codes ([ECMA-368] table 113) */
+enum uwb_relinquish_req_reason {
+	UWB_RELINQUISH_REQ_REASON_NON_SPECIFIC = 0,
+	UWB_RELINQUISH_REQ_REASON_OVER_ALLOCATION,
+};
+
 /**
  *  DRP Notification Reason Codes (WHCI 0.95 [3.1.4.9])
  */
@@ -252,6 +258,7 @@ enum uwb_ie {
 	UWB_APP_SPEC_PROBE_IE = 15,
 	UWB_IDENTIFICATION_IE = 19,
 	UWB_MASTER_KEY_ID_IE = 20,
+	UWB_RELINQUISH_REQUEST_IE = 21,
 	UWB_IE_WLP = 250, /* WiMedia Logical Link Control Protocol WLP 0.99 */
 	UWB_APP_SPEC_IE = 255,
 };
@@ -365,6 +372,27 @@ struct uwb_ie_drp_avail {
 	DECLARE_BITMAP(bmp, UWB_NUM_MAS);
 } __attribute__((packed));
 
+/* Relinqish Request IE ([ECMA-368] section 16.8.19). */
+struct uwb_relinquish_request_ie {
+        struct uwb_ie_hdr       hdr;
+        __le16                  relinquish_req_control;
+        struct uwb_dev_addr     dev_addr;
+        struct uwb_drp_alloc    allocs[];
+} __attribute__((packed));
+
+static inline int uwb_ie_relinquish_req_reason_code(struct uwb_relinquish_request_ie *ie)
+{
+	return (le16_to_cpu(ie->relinquish_req_control) >> 0) & 0xf;
+}
+
+static inline void uwb_ie_relinquish_req_set_reason_code(struct uwb_relinquish_request_ie *ie,
+							 int reason_code)
+{
+	u16 ctrl = le16_to_cpu(ie->relinquish_req_control);
+	ctrl = (ctrl & ~(0xf << 0)) | (reason_code << 0);
+	ie->relinquish_req_control = cpu_to_le16(ctrl);
+}
+
 /**
  * The Vendor ID is set to an OUI that indicates the vendor of the device.
  * ECMA-368 [16.8.10]
-- 
cgit v1.2.3


From 6d5a681dfb583b2f1eefe7cd5505419ca2d4d6c8 Mon Sep 17 00:00:00 2001
From: Stefano Panella <stefano.panella@csr.com>
Date: Tue, 4 Nov 2008 14:24:57 +0000
Subject: uwb: add commands to add/remove IEs to the debug interface

Add the commands UWB_DBG_CMD_IE_ADD and UWB_DBG_CMD_IE_RM to the debug
interface and make them call uwb_rc_ie_add() and uwb_rc_ie_rm().

Signed-off-by: Stefano Panella <stefano.panella@csr.com>
Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 drivers/uwb/uwb-debug.c       | 20 +++++++++++++++++++-
 include/linux/uwb/debug-cmd.h |  9 +++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index 6db641e45313..88e6ac713817 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -168,6 +168,18 @@ static int cmd_rsv_terminate(struct uwb_rc *rc,
 	return 0;
 }
 
+static int cmd_ie_add(struct uwb_rc *rc, struct uwb_dbg_cmd_ie *ie_to_add)
+{
+	return uwb_rc_ie_add(rc,
+			     (const struct uwb_ie_hdr *) ie_to_add->data,
+			     ie_to_add->len);
+}
+
+static int cmd_ie_rm(struct uwb_rc *rc, struct uwb_dbg_cmd_ie *ie_to_rm)
+{
+	return uwb_rc_ie_rm(rc, ie_to_rm->data[0]);
+}
+
 static int command_open(struct inode *inode, struct file *file)
 {
 	file->private_data = inode->i_private;
@@ -195,6 +207,12 @@ static ssize_t command_write(struct file *file, const char __user *buf,
 	case UWB_DBG_CMD_RSV_TERMINATE:
 		ret = cmd_rsv_terminate(rc, &cmd.rsv_terminate);
 		break;
+	case UWB_DBG_CMD_IE_ADD:
+		ret = cmd_ie_add(rc, &cmd.ie_add);
+		break;
+	case UWB_DBG_CMD_IE_RM:
+		ret = cmd_ie_rm(rc, &cmd.ie_rm);
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -332,7 +350,7 @@ void uwb_dbg_add_rc(struct uwb_rc *rc)
 }
 
 /**
- * uwb_dbg_add_rc - remove a radio controller's debug interface
+ * uwb_dbg_del_rc - remove a radio controller's debug interface
  * @rc: the radio controller
  */
 void uwb_dbg_del_rc(struct uwb_rc *rc)
diff --git a/include/linux/uwb/debug-cmd.h b/include/linux/uwb/debug-cmd.h
index 1141f41bab5c..6a16566f0221 100644
--- a/include/linux/uwb/debug-cmd.h
+++ b/include/linux/uwb/debug-cmd.h
@@ -32,6 +32,8 @@
 enum uwb_dbg_cmd_type {
 	UWB_DBG_CMD_RSV_ESTABLISH = 1,
 	UWB_DBG_CMD_RSV_TERMINATE = 2,
+	UWB_DBG_CMD_IE_ADD = 3,
+	UWB_DBG_CMD_IE_RM = 4,
 };
 
 struct uwb_dbg_cmd_rsv_establish {
@@ -46,11 +48,18 @@ struct uwb_dbg_cmd_rsv_terminate {
 	int index;
 };
 
+struct uwb_dbg_cmd_ie {
+	__u8 data[128];
+	int len;
+};
+
 struct uwb_dbg_cmd {
 	__u32 type;
 	union {
 		struct uwb_dbg_cmd_rsv_establish rsv_establish;
 		struct uwb_dbg_cmd_rsv_terminate rsv_terminate;
+		struct uwb_dbg_cmd_ie ie_add;
+		struct uwb_dbg_cmd_ie ie_rm;
 	};
 };
 
-- 
cgit v1.2.3


From fec1a5932f16c0eb1b3f5ca2e18d81d860924088 Mon Sep 17 00:00:00 2001
From: Stefano Panella <stefano.panella@csr.com>
Date: Tue, 4 Nov 2008 15:39:08 +0000
Subject: uwb: per-radio controller event thread and beacon cache

Use an event thread per-radio controller so processing events from one
radio controller doesn't delay another.

A radio controller shouldn't have information on devices seen by a
different radio controller (they may be on different channels) so make the
beacon cache per-radio controller.

Signed-off-by: Stefano Panella <stefano.panella@csr.com>
Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 drivers/uwb/beacon.c       | 61 ++++++++++++++---------------
 drivers/uwb/driver.c       |  2 -
 drivers/uwb/lc-rc.c        | 21 ++++++----
 drivers/uwb/uwb-internal.h | 20 ++--------
 drivers/uwb/uwbd.c         | 98 +++++++++++++++-------------------------------
 include/linux/uwb.h        | 20 ++++++++++
 6 files changed, 100 insertions(+), 122 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/uwb/beacon.c b/drivers/uwb/beacon.c
index ad823987cede..d9f2a8acc593 100644
--- a/drivers/uwb/beacon.c
+++ b/drivers/uwb/beacon.c
@@ -168,12 +168,6 @@ out_up:
  * FIXME: use something faster for search than a list
  */
 
-struct uwb_beca uwb_beca = {
-	.list = LIST_HEAD_INIT(uwb_beca.list),
-	.mutex = __MUTEX_INITIALIZER(uwb_beca.mutex)
-};
-
-
 void uwb_bce_kfree(struct kref *_bce)
 {
 	struct uwb_beca_e *bce = container_of(_bce, struct uwb_beca_e, refcnt);
@@ -185,10 +179,11 @@ void uwb_bce_kfree(struct kref *_bce)
 
 /* Find a beacon by dev addr in the cache */
 static
-struct uwb_beca_e *__uwb_beca_find_bydev(const struct uwb_dev_addr *dev_addr)
+struct uwb_beca_e *__uwb_beca_find_bydev(struct uwb_rc *rc,
+					 const struct uwb_dev_addr *dev_addr)
 {
 	struct uwb_beca_e *bce, *next;
-	list_for_each_entry_safe(bce, next, &uwb_beca.list, node) {
+	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
 		d_printf(6, NULL, "looking for addr %02x:%02x in %02x:%02x\n",
 			 dev_addr->data[0], dev_addr->data[1],
 			 bce->dev_addr.data[0], bce->dev_addr.data[1]);
@@ -202,10 +197,11 @@ out:
 
 /* Find a beacon by dev addr in the cache */
 static
-struct uwb_beca_e *__uwb_beca_find_bymac(const struct uwb_mac_addr *mac_addr)
+struct uwb_beca_e *__uwb_beca_find_bymac(struct uwb_rc *rc, 
+					 const struct uwb_mac_addr *mac_addr)
 {
 	struct uwb_beca_e *bce, *next;
-	list_for_each_entry_safe(bce, next, &uwb_beca.list, node) {
+	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
 		if (!memcmp(bce->mac_addr, mac_addr->data,
 			    sizeof(struct uwb_mac_addr)))
 			goto out;
@@ -229,11 +225,11 @@ struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc,
 	struct uwb_dev *found = NULL;
 	struct uwb_beca_e *bce;
 
-	mutex_lock(&uwb_beca.mutex);
-	bce = __uwb_beca_find_bydev(devaddr);
+	mutex_lock(&rc->uwb_beca.mutex);
+	bce = __uwb_beca_find_bydev(rc, devaddr);
 	if (bce)
 		found = uwb_dev_try_get(rc, bce->uwb_dev);
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
 
 	return found;
 }
@@ -249,11 +245,11 @@ struct uwb_dev *uwb_dev_get_by_macaddr(struct uwb_rc *rc,
 	struct uwb_dev *found = NULL;
 	struct uwb_beca_e *bce;
 
-	mutex_lock(&uwb_beca.mutex);
-	bce = __uwb_beca_find_bymac(macaddr);
+	mutex_lock(&rc->uwb_beca.mutex);
+	bce = __uwb_beca_find_bymac(rc, macaddr);
 	if (bce)
 		found = uwb_dev_try_get(rc, bce->uwb_dev);
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
 
 	return found;
 }
@@ -274,7 +270,9 @@ static void uwb_beca_e_init(struct uwb_beca_e *bce)
  * @bf:         Beacon frame (part of b, really)
  * @ts_jiffies: Timestamp (in jiffies) when the beacon was received
  */
-struct uwb_beca_e *__uwb_beca_add(struct uwb_rc_evt_beacon *be,
+static
+struct uwb_beca_e *__uwb_beca_add(struct uwb_rc *rc,
+				  struct uwb_rc_evt_beacon *be,
 				  struct uwb_beacon_frame *bf,
 				  unsigned long ts_jiffies)
 {
@@ -286,7 +284,7 @@ struct uwb_beca_e *__uwb_beca_add(struct uwb_rc_evt_beacon *be,
 	uwb_beca_e_init(bce);
 	bce->ts_jiffies = ts_jiffies;
 	bce->uwb_dev = NULL;
-	list_add(&bce->node, &uwb_beca.list);
+	list_add(&bce->node, &rc->uwb_beca.list);
 	return bce;
 }
 
@@ -295,13 +293,13 @@ struct uwb_beca_e *__uwb_beca_add(struct uwb_rc_evt_beacon *be,
  *
  * Remove associated devicest too.
  */
-void uwb_beca_purge(void)
+void uwb_beca_purge(struct uwb_rc *rc)
 {
 	struct uwb_beca_e *bce, *next;
 	unsigned long expires;
 
-	mutex_lock(&uwb_beca.mutex);
-	list_for_each_entry_safe(bce, next, &uwb_beca.list, node) {
+	mutex_lock(&rc->uwb_beca.mutex);
+	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
 		expires = bce->ts_jiffies + msecs_to_jiffies(beacon_timeout_ms);
 		if (time_after(jiffies, expires)) {
 			uwbd_dev_offair(bce);
@@ -309,19 +307,20 @@ void uwb_beca_purge(void)
 			uwb_bce_put(bce);
 		}
 	}
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
 }
 
 /* Clean up the whole beacon cache. Called on shutdown */
-void uwb_beca_release(void)
+void uwb_beca_release(struct uwb_rc *rc)
 {
 	struct uwb_beca_e *bce, *next;
-	mutex_lock(&uwb_beca.mutex);
-	list_for_each_entry_safe(bce, next, &uwb_beca.list, node) {
+
+	mutex_lock(&rc->uwb_beca.mutex);
+	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
 		list_del(&bce->node);
 		uwb_bce_put(bce);
 	}
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
 }
 
 static void uwb_beacon_print(struct uwb_rc *rc, struct uwb_rc_evt_beacon *be,
@@ -437,18 +436,18 @@ int uwbd_evt_handle_rc_beacon(struct uwb_event *evt)
 	if (uwb_mac_addr_bcast(&bf->Device_Identifier))
 		return 0;
 
-	mutex_lock(&uwb_beca.mutex);
-	bce = __uwb_beca_find_bymac(&bf->Device_Identifier);
+	mutex_lock(&rc->uwb_beca.mutex);
+	bce = __uwb_beca_find_bymac(rc, &bf->Device_Identifier);
 	if (bce == NULL) {
 		/* Not in there, a new device is pinging */
 		uwb_beacon_print(evt->rc, be, bf);
-		bce = __uwb_beca_add(be, bf, evt->ts_jiffies);
+		bce = __uwb_beca_add(rc, be, bf, evt->ts_jiffies);
 		if (bce == NULL) {
-			mutex_unlock(&uwb_beca.mutex);
+			mutex_unlock(&rc->uwb_beca.mutex);
 			return -ENOMEM;
 		}
 	}
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
 
 	mutex_lock(&bce->mutex);
 	/* purge old beacon data */
diff --git a/drivers/uwb/driver.c b/drivers/uwb/driver.c
index 521cdeb84971..f57c26580de2 100644
--- a/drivers/uwb/driver.c
+++ b/drivers/uwb/driver.c
@@ -118,7 +118,6 @@ static int __init uwb_subsys_init(void)
 	result = class_register(&uwb_rc_class);
 	if (result < 0)
 		goto error_uwb_rc_class_register;
-	uwbd_start();
 	uwb_dbg_init();
 	return 0;
 
@@ -132,7 +131,6 @@ module_init(uwb_subsys_init);
 static void __exit uwb_subsys_exit(void)
 {
 	uwb_dbg_exit();
-	uwbd_stop();
 	class_unregister(&uwb_rc_class);
 	uwb_est_destroy();
 	return;
diff --git a/drivers/uwb/lc-rc.c b/drivers/uwb/lc-rc.c
index 1129e8767b58..38e3d57ec8f7 100644
--- a/drivers/uwb/lc-rc.c
+++ b/drivers/uwb/lc-rc.c
@@ -36,8 +36,6 @@
 #include <linux/etherdevice.h>
 #include <linux/usb.h>
 
-#define D_LOCAL 1
-#include <linux/uwb/debug.h>
 #include "uwb-internal.h"
 
 static int uwb_rc_index_match(struct device *dev, void *data)
@@ -83,7 +81,6 @@ static void uwb_rc_sys_release(struct device *dev)
 
 	uwb_rc_neh_destroy(rc);
 	uwb_rc_ie_release(rc);
-	d_printf(1, dev, "freed uwb_rc %p\n", rc);
 	kfree(rc);
 }
 
@@ -100,6 +97,8 @@ void uwb_rc_init(struct uwb_rc *rc)
 	rc->scan_type = UWB_SCAN_DISABLED;
 	INIT_LIST_HEAD(&rc->notifs_chain.list);
 	mutex_init(&rc->notifs_chain.mutex);
+	INIT_LIST_HEAD(&rc->uwb_beca.list);
+	mutex_init(&rc->uwb_beca.mutex);
 	uwb_drp_avail_init(rc);
 	uwb_rc_ie_init(rc);
 	uwb_rsv_init(rc);
@@ -250,6 +249,12 @@ int uwb_rc_add(struct uwb_rc *rc, struct device *parent_dev, void *priv)
 
 	rc->priv = priv;
 
+	init_waitqueue_head(&rc->uwbd.wq);
+	INIT_LIST_HEAD(&rc->uwbd.event_list);
+	spin_lock_init(&rc->uwbd.event_list_lock);
+
+	uwbd_start(rc);
+
 	result = rc->start(rc);
 	if (result < 0)
 		goto error_rc_start;
@@ -284,7 +289,7 @@ error_sys_add:
 error_dev_add:
 error_rc_setup:
 	rc->stop(rc);
-	uwbd_flush(rc);
+	uwbd_stop(rc);
 error_rc_start:
 	return result;
 }
@@ -315,16 +320,18 @@ void uwb_rc_rm(struct uwb_rc *rc)
 	uwb_rc_reset(rc);
 
 	rc->stop(rc);
-	uwbd_flush(rc);
+
+	uwbd_stop(rc);
 
 	uwb_dev_lock(&rc->uwb_dev);
 	rc->priv = NULL;
 	rc->cmd = NULL;
 	uwb_dev_unlock(&rc->uwb_dev);
-	mutex_lock(&uwb_beca.mutex);
+	mutex_lock(&rc->uwb_beca.mutex);
 	uwb_dev_for_each(rc, uwb_dev_offair_helper, NULL);
 	__uwb_rc_sys_rm(rc);
-	mutex_unlock(&uwb_beca.mutex);
+	mutex_unlock(&rc->uwb_beca.mutex);
+ 	uwb_beca_release(rc);
 	uwb_dev_rm(&rc->uwb_dev);
 }
 EXPORT_SYMBOL_GPL(uwb_rc_rm);
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
index 031e8a885681..4c2449679978 100644
--- a/drivers/uwb/uwb-internal.h
+++ b/drivers/uwb/uwb-internal.h
@@ -160,8 +160,8 @@ struct uwb_event {
 	};
 };
 
-extern void uwbd_start(void);
-extern void uwbd_stop(void);
+extern void uwbd_start(struct uwb_rc *rc);
+extern void uwbd_stop(struct uwb_rc *rc);
 extern struct uwb_event *uwb_event_alloc(size_t, gfp_t gfp_mask);
 extern void uwbd_event_queue(struct uwb_event *);
 void uwbd_flush(struct uwb_rc *rc);
@@ -194,15 +194,6 @@ int uwbd_evt_handle_rc_dev_addr_conflict(struct uwb_event *evt);
 
 extern unsigned long beacon_timeout_ms;
 
-/** Beacon cache list */
-struct uwb_beca {
-	struct list_head list;
-	size_t entries;
-	struct mutex mutex;
-};
-
-extern struct uwb_beca uwb_beca;
-
 /**
  * Beacon cache entry
  *
@@ -229,9 +220,6 @@ struct uwb_beca_e {
 struct uwb_beacon_frame;
 extern ssize_t uwb_bce_print_IEs(struct uwb_dev *, struct uwb_beca_e *,
 				 char *, size_t);
-extern struct uwb_beca_e *__uwb_beca_add(struct uwb_rc_evt_beacon *,
-					 struct uwb_beacon_frame *,
-					 unsigned long);
 
 extern void uwb_bce_kfree(struct kref *_bce);
 static inline void uwb_bce_get(struct uwb_beca_e *bce)
@@ -242,8 +230,8 @@ static inline void uwb_bce_put(struct uwb_beca_e *bce)
 {
 	kref_put(&bce->refcnt, uwb_bce_kfree);
 }
-extern void uwb_beca_purge(void);
-extern void uwb_beca_release(void);
+extern void uwb_beca_purge(struct uwb_rc *rc);
+extern void uwb_beca_release(struct uwb_rc *rc);
 
 struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc,
 				       const struct uwb_dev_addr *devaddr);
diff --git a/drivers/uwb/uwbd.c b/drivers/uwb/uwbd.c
index f75113571f4a..ec42ce92dbce 100644
--- a/drivers/uwb/uwbd.c
+++ b/drivers/uwb/uwbd.c
@@ -170,8 +170,6 @@ static const struct uwbd_event uwbd_message_handlers[] = {
 	},
 };
 
-static DEFINE_MUTEX(uwbd_event_mutex);
-
 /**
  * Handle an URC event passed to the UWB Daemon
  *
@@ -235,19 +233,10 @@ static void uwbd_event_handle_message(struct uwb_event *evt)
 		return;
 	}
 
-	/* If this is a reset event we need to drop the
-	 * uwbd_event_mutex or it deadlocks when the reset handler
-	 * attempts to flush the uwbd events. */
-	if (evt->message == UWB_EVT_MSG_RESET)
-		mutex_unlock(&uwbd_event_mutex);
-
 	result = uwbd_message_handlers[evt->message].handler(evt);
 	if (result < 0)
 		dev_err(&rc->uwb_dev.dev, "UWBD: '%s' message failed: %d\n",
 			uwbd_message_handlers[evt->message].name, result);
-
-	if (evt->message == UWB_EVT_MSG_RESET)
-		mutex_lock(&uwbd_event_mutex);
 }
 
 static void uwbd_event_handle(struct uwb_event *evt)
@@ -275,20 +264,6 @@ static void uwbd_event_handle(struct uwb_event *evt)
 
 	__uwb_rc_put(rc);	/* for the __uwb_rc_get() in uwb_rc_notif_cb() */
 }
-/* The UWB Daemon */
-
-
-/** Daemon's PID: used to decide if we can queue or not */
-static int uwbd_pid;
-/** Daemon's task struct for managing the kthread */
-static struct task_struct *uwbd_task;
-/** Daemon's waitqueue for waiting for new events */
-static DECLARE_WAIT_QUEUE_HEAD(uwbd_wq);
-/** Daemon's list of events; we queue/dequeue here */
-static struct list_head uwbd_event_list = LIST_HEAD_INIT(uwbd_event_list);
-/** Daemon's list lock to protect concurent access */
-static DEFINE_SPINLOCK(uwbd_event_list_lock);
-
 
 /**
  * UWB Daemon
@@ -302,65 +277,58 @@ static DEFINE_SPINLOCK(uwbd_event_list_lock);
  * FIXME: should change so we don't have a 1HZ timer all the time, but
  *        only if there are devices.
  */
-static int uwbd(void *unused)
+static int uwbd(void *param)
 {
+	struct uwb_rc *rc = param;
 	unsigned long flags;
-	struct list_head list = LIST_HEAD_INIT(list);
-	struct uwb_event *evt, *nxt;
+	struct uwb_event *evt;
 	int should_stop = 0;
+
 	while (1) {
 		wait_event_interruptible_timeout(
-			uwbd_wq,
-			!list_empty(&uwbd_event_list)
+			rc->uwbd.wq,
+			!list_empty(&rc->uwbd.event_list)
 			  || (should_stop = kthread_should_stop()),
 			HZ);
 		if (should_stop)
 			break;
 		try_to_freeze();
 
-		mutex_lock(&uwbd_event_mutex);
-		spin_lock_irqsave(&uwbd_event_list_lock, flags);
-		list_splice_init(&uwbd_event_list, &list);
-		spin_unlock_irqrestore(&uwbd_event_list_lock, flags);
-		list_for_each_entry_safe(evt, nxt, &list, list_node) {
+		spin_lock_irqsave(&rc->uwbd.event_list_lock, flags);
+		if (!list_empty(&rc->uwbd.event_list)) {
+			evt = list_first_entry(&rc->uwbd.event_list, struct uwb_event, list_node);
 			list_del(&evt->list_node);
+		} else
+			evt = NULL;
+		spin_unlock_irqrestore(&rc->uwbd.event_list_lock, flags);
+
+		if (evt) {
 			uwbd_event_handle(evt);
 			kfree(evt);
 		}
-		mutex_unlock(&uwbd_event_mutex);
 
-		uwb_beca_purge();	/* Purge devices that left */
+		uwb_beca_purge(rc);	/* Purge devices that left */
 	}
 	return 0;
 }
 
 
 /** Start the UWB daemon */
-void uwbd_start(void)
+void uwbd_start(struct uwb_rc *rc)
 {
-	uwbd_task = kthread_run(uwbd, NULL, "uwbd");
-	if (uwbd_task == NULL)
+	rc->uwbd.task = kthread_run(uwbd, rc, "uwbd");
+	if (rc->uwbd.task == NULL)
 		printk(KERN_ERR "UWB: Cannot start management daemon; "
 		       "UWB won't work\n");
 	else
-		uwbd_pid = uwbd_task->pid;
+		rc->uwbd.pid = rc->uwbd.task->pid;
 }
 
 /* Stop the UWB daemon and free any unprocessed events */
-void uwbd_stop(void)
+void uwbd_stop(struct uwb_rc *rc)
 {
-	unsigned long flags;
-	struct uwb_event *evt, *nxt;
-	kthread_stop(uwbd_task);
-	spin_lock_irqsave(&uwbd_event_list_lock, flags);
-	uwbd_pid = 0;
-	list_for_each_entry_safe(evt, nxt, &uwbd_event_list, list_node) {
-		if (evt->type == UWB_EVT_TYPE_NOTIF)
-			kfree(evt->notif.rceb);
-		kfree(evt);
-	}
-	spin_unlock_irqrestore(&uwbd_event_list_lock, flags);
-	uwb_beca_release();
+	kthread_stop(rc->uwbd.task);
+	uwbd_flush(rc);
 }
 
 /*
@@ -377,18 +345,20 @@ void uwbd_stop(void)
  */
 void uwbd_event_queue(struct uwb_event *evt)
 {
+	struct uwb_rc *rc = evt->rc;
 	unsigned long flags;
-	spin_lock_irqsave(&uwbd_event_list_lock, flags);
-	if (uwbd_pid != 0) {
-		list_add(&evt->list_node, &uwbd_event_list);
-		wake_up_all(&uwbd_wq);
+
+	spin_lock_irqsave(&rc->uwbd.event_list_lock, flags);
+	if (rc->uwbd.pid != 0) {
+		list_add(&evt->list_node, &rc->uwbd.event_list);
+		wake_up_all(&rc->uwbd.wq);
 	} else {
 		__uwb_rc_put(evt->rc);
 		if (evt->type == UWB_EVT_TYPE_NOTIF)
 			kfree(evt->notif.rceb);
 		kfree(evt);
 	}
-	spin_unlock_irqrestore(&uwbd_event_list_lock, flags);
+	spin_unlock_irqrestore(&rc->uwbd.event_list_lock, flags);
 	return;
 }
 
@@ -396,10 +366,8 @@ void uwbd_flush(struct uwb_rc *rc)
 {
 	struct uwb_event *evt, *nxt;
 
-	mutex_lock(&uwbd_event_mutex);
-
-	spin_lock_irq(&uwbd_event_list_lock);
-	list_for_each_entry_safe(evt, nxt, &uwbd_event_list, list_node) {
+	spin_lock_irq(&rc->uwbd.event_list_lock);
+	list_for_each_entry_safe(evt, nxt, &rc->uwbd.event_list, list_node) {
 		if (evt->rc == rc) {
 			__uwb_rc_put(rc);
 			list_del(&evt->list_node);
@@ -408,7 +376,5 @@ void uwbd_flush(struct uwb_rc *rc)
 			kfree(evt);
 		}
 	}
-	spin_unlock_irq(&uwbd_event_list_lock);
-
-	mutex_unlock(&uwbd_event_mutex);
+	spin_unlock_irq(&rc->uwbd.event_list_lock);
 }
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index 6d93f54b8879..881f0c5b6d28 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -30,6 +30,7 @@
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/timer.h>
+#include <linux/wait.h>
 #include <linux/workqueue.h>
 #include <linux/uwb/spec.h>
 
@@ -86,6 +87,22 @@ struct uwb_notifs_chain {
 	struct mutex mutex;
 };
 
+/* Beacon cache list */
+struct uwb_beca {
+	struct list_head list;
+	size_t entries;
+	struct mutex mutex;
+};
+
+/* Event handling thread. */
+struct uwbd {
+	int pid;
+	struct task_struct *task;
+	wait_queue_head_t wq;
+	struct list_head event_list;
+	spinlock_t event_list_lock;
+};
+
 /**
  * struct uwb_mas_bm - a bitmap of all MAS in a superframe
  * @bm: a bitmap of length #UWB_NUM_MAS
@@ -342,6 +359,9 @@ struct uwb_rc {
 	enum uwb_scan_type scan_type:3;
 	unsigned ready:1;
 	struct uwb_notifs_chain notifs_chain;
+	struct uwb_beca uwb_beca;
+
+	struct uwbd uwbd;
 
 	struct uwb_drp_avail drp_avail;
 	struct list_head reservations;
-- 
cgit v1.2.3


From 307ba6dd73254fe7d2ce27db64ffd90e1bb3c6c0 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Fri, 7 Nov 2008 17:37:33 +0000
Subject: uwb: don't unbind the radio controller driver when resetting

Use pre_reset and post_reset methods to avoid unbinding the radio
controller driver after a uwb_rc_reset_all() call.  This avoids a
deadlock in uwb_rc_rm() when waiting for the uwb event thread to stop.

Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 drivers/uwb/hwa-rc.c       | 22 +++++++++++++++-
 drivers/uwb/reset.c        | 49 ++++++++++++++++++++++++++++++------
 drivers/uwb/rsv.c          | 29 +++++++++++++++-------
 drivers/uwb/umc-bus.c      | 62 ++++++++++++++++++++++++++++++++--------------
 drivers/uwb/uwb-internal.h |  1 +
 drivers/uwb/whc-rc.c       | 30 ++++++++++++++++++----
 include/linux/uwb.h        |  2 ++
 include/linux/uwb/umc.h    |  2 ++
 8 files changed, 155 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c
index 18009c99577d..158e98d08af9 100644
--- a/drivers/uwb/hwa-rc.c
+++ b/drivers/uwb/hwa-rc.c
@@ -881,6 +881,24 @@ static void hwarc_disconnect(struct usb_interface *iface)
 	uwb_rc_put(uwb_rc);	/* when creating the device, refcount = 1 */
 }
 
+static int hwarc_pre_reset(struct usb_interface *iface)
+{
+	struct hwarc *hwarc = usb_get_intfdata(iface);
+	struct uwb_rc *uwb_rc = hwarc->uwb_rc;
+
+	uwb_rc_pre_reset(uwb_rc);
+	return 0;
+}
+
+static int hwarc_post_reset(struct usb_interface *iface)
+{
+	struct hwarc *hwarc = usb_get_intfdata(iface);
+	struct uwb_rc *uwb_rc = hwarc->uwb_rc;
+
+	uwb_rc_post_reset(uwb_rc);
+	return 0;
+}
+
 /** USB device ID's that we handle */
 static struct usb_device_id hwarc_id_table[] = {
 	/* D-Link DUB-1210 */
@@ -897,9 +915,11 @@ MODULE_DEVICE_TABLE(usb, hwarc_id_table);
 
 static struct usb_driver hwarc_driver = {
 	.name =		"hwa-rc",
+	.id_table =	hwarc_id_table,
 	.probe =	hwarc_probe,
 	.disconnect =	hwarc_disconnect,
-	.id_table =	hwarc_id_table,
+	.pre_reset =    hwarc_pre_reset,
+	.post_reset =   hwarc_post_reset,
 };
 
 static int __init hwarc_driver_init(void)
diff --git a/drivers/uwb/reset.c b/drivers/uwb/reset.c
index 8de856fa7958..e39b32099af3 100644
--- a/drivers/uwb/reset.c
+++ b/drivers/uwb/reset.c
@@ -323,17 +323,16 @@ int uwbd_msg_handle_reset(struct uwb_event *evt)
 	struct uwb_rc *rc = evt->rc;
 	int ret;
 
-	/* Need to prevent the RC hardware module going away while in
-	   the rc->reset() call. */
-	if (!try_module_get(rc->owner))
-		return 0;
-
 	dev_info(&rc->uwb_dev.dev, "resetting radio controller\n");
 	ret = rc->reset(rc);
-	if (ret)
+	if (ret) {
 		dev_err(&rc->uwb_dev.dev, "failed to reset hardware: %d\n", ret);
-
-	module_put(rc->owner);
+		goto error;
+	}
+	return 0;
+error:
+	/* Nothing can be done except try the reset again. */
+	uwb_rc_reset_all(rc);
 	return ret;
 }
 
@@ -360,3 +359,37 @@ void uwb_rc_reset_all(struct uwb_rc *rc)
 	uwbd_event_queue(evt);
 }
 EXPORT_SYMBOL_GPL(uwb_rc_reset_all);
+
+void uwb_rc_pre_reset(struct uwb_rc *rc)
+{
+	rc->stop(rc);
+	uwbd_flush(rc);
+
+	mutex_lock(&rc->uwb_dev.mutex);
+	rc->beaconing = -1;
+	rc->scanning = -1;
+	mutex_unlock(&rc->uwb_dev.mutex);
+
+	uwb_rsv_remove_all(rc);
+}
+EXPORT_SYMBOL_GPL(uwb_rc_pre_reset);
+
+void uwb_rc_post_reset(struct uwb_rc *rc)
+{
+	int ret;
+
+	ret = rc->start(rc);
+	if (ret)
+		goto error;
+	ret = uwb_rc_mac_addr_set(rc, &rc->uwb_dev.mac_addr);
+	if (ret)
+		goto error;
+	ret = uwb_rc_dev_addr_set(rc, &rc->uwb_dev.dev_addr);
+	if (ret)
+		goto error;
+	return;
+error:
+	/* Nothing can be done except try the reset again. */
+	uwb_rc_reset_all(rc);
+}
+EXPORT_SYMBOL_GPL(uwb_rc_post_reset);
diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c
index ce0094657d3d..3d76efe26acc 100644
--- a/drivers/uwb/rsv.c
+++ b/drivers/uwb/rsv.c
@@ -659,6 +659,25 @@ static void uwb_rsv_timer(unsigned long arg)
 	uwb_rsv_sched_update(rsv->rc);
 }
 
+/**
+ * uwb_rsv_remove_all - remove all reservations
+ * @rc: the radio controller
+ *
+ * A DRP IE update is not done.
+ */
+void uwb_rsv_remove_all(struct uwb_rc *rc)
+{
+	struct uwb_rsv *rsv, *t;
+
+	mutex_lock(&rc->rsvs_mutex);
+	list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) {
+		uwb_rsv_remove(rsv);
+	}
+	mutex_unlock(&rc->rsvs_mutex);
+
+	cancel_work_sync(&rc->rsv_update_work);
+}
+
 void uwb_rsv_init(struct uwb_rc *rc)
 {
 	INIT_LIST_HEAD(&rc->reservations);
@@ -682,14 +701,6 @@ int uwb_rsv_setup(struct uwb_rc *rc)
 
 void uwb_rsv_cleanup(struct uwb_rc *rc)
 {
-	struct uwb_rsv *rsv, *t;
-
-	mutex_lock(&rc->rsvs_mutex);
-	list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) {
-		uwb_rsv_remove(rsv);
-	}
-	mutex_unlock(&rc->rsvs_mutex);
-
-	cancel_work_sync(&rc->rsv_update_work);
+	uwb_rsv_remove_all(rc);
 	destroy_workqueue(rc->rsv_workq);
 }
diff --git a/drivers/uwb/umc-bus.c b/drivers/uwb/umc-bus.c
index 2d8d62d9f53e..5ad36164c13b 100644
--- a/drivers/uwb/umc-bus.c
+++ b/drivers/uwb/umc-bus.c
@@ -11,23 +11,48 @@
 #include <linux/uwb/umc.h>
 #include <linux/pci.h>
 
-static int umc_bus_unbind_helper(struct device *dev, void *data)
+static int umc_bus_pre_reset_helper(struct device *dev, void *data)
 {
-	struct device *parent = data;
+	int ret = 0;
 
-	if (dev->parent == parent && dev->driver)
-		device_release_driver(dev);
-	return 0;
+	if (dev->driver) {
+		struct umc_dev *umc = to_umc_dev(dev);
+		struct umc_driver *umc_drv = to_umc_driver(dev->driver);
+
+		if (umc_drv->pre_reset)
+			ret = umc_drv->pre_reset(umc);
+		else
+			device_release_driver(dev);
+	}
+	return ret;
+}
+
+static int umc_bus_post_reset_helper(struct device *dev, void *data)
+{
+	int ret = 0;
+
+	if (dev->driver) {
+		struct umc_dev *umc = to_umc_dev(dev);
+		struct umc_driver *umc_drv = to_umc_driver(dev->driver);
+
+		if (umc_drv->post_reset)
+			ret = umc_drv->post_reset(umc);
+	} else
+		ret = device_attach(dev);
+
+	return ret;
 }
 
 /**
  * umc_controller_reset - reset the whole UMC controller
  * @umc: the UMC device for the radio controller.
  *
- * Drivers will be unbound from all UMC devices belonging to the
- * controller and then the radio controller will be rebound.  The
- * radio controller is expected to do a full hardware reset when it is
- * probed.
+ * Drivers or all capabilities of the controller will have their
+ * pre_reset methods called or be unbound from their device.  Then all
+ * post_reset methods will be called or the drivers will be rebound.
+ *
+ * Radio controllers must provide pre_reset and post_reset methods and
+ * reset the hardware in their start method.
  *
  * If this is called while a probe() or remove() is in progress it
  * will return -EAGAIN and not perform the reset.
@@ -35,14 +60,13 @@ static int umc_bus_unbind_helper(struct device *dev, void *data)
 int umc_controller_reset(struct umc_dev *umc)
 {
 	struct device *parent = umc->dev.parent;
-	int ret;
+	int ret = 0;
 
-	if (down_trylock(&parent->sem))
+	if(down_trylock(&parent->sem))
 		return -EAGAIN;
-	bus_for_each_dev(&umc_bus_type, NULL, parent, umc_bus_unbind_helper);
-	ret = device_attach(&umc->dev);
-	if (ret == 1)
-		ret = 0;
+	ret = device_for_each_child(parent, parent, umc_bus_pre_reset_helper);
+	if (ret >= 0)
+		device_for_each_child(parent, parent, umc_bus_post_reset_helper);
 	up(&parent->sem);
 
 	return ret;
@@ -75,10 +99,10 @@ static int umc_bus_rescan_helper(struct device *dev, void *data)
 	if (!dev->driver)
 		ret = device_attach(dev);
 
-	return ret < 0 ? ret : 0;
+	return ret;
 }
 
-static void umc_bus_rescan(void)
+static void umc_bus_rescan(struct device *parent)
 {
 	int err;
 
@@ -86,7 +110,7 @@ static void umc_bus_rescan(void)
 	 * We can't use bus_rescan_devices() here as it deadlocks when
 	 * it tries to retake the dev->parent semaphore.
 	 */
-	err = bus_for_each_dev(&umc_bus_type, NULL, NULL, umc_bus_rescan_helper);
+	err = device_for_each_child(parent, NULL, umc_bus_rescan_helper);
 	if (err < 0)
 		printk(KERN_WARNING "%s: rescan of bus failed: %d\n",
 		       KBUILD_MODNAME, err);
@@ -120,7 +144,7 @@ static int umc_device_probe(struct device *dev)
 	if (err)
 		put_device(dev);
 	else
-		umc_bus_rescan();
+		umc_bus_rescan(dev->parent);
 
 	return err;
 }
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
index 4c2449679978..af95541dabcd 100644
--- a/drivers/uwb/uwb-internal.h
+++ b/drivers/uwb/uwb-internal.h
@@ -248,6 +248,7 @@ extern struct device_attribute dev_attr_scan;
 void uwb_rsv_init(struct uwb_rc *rc);
 int uwb_rsv_setup(struct uwb_rc *rc);
 void uwb_rsv_cleanup(struct uwb_rc *rc);
+void uwb_rsv_remove_all(struct uwb_rc *rc);
 
 void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state);
 void uwb_rsv_remove(struct uwb_rsv *rsv);
diff --git a/drivers/uwb/whc-rc.c b/drivers/uwb/whc-rc.c
index 6c454eadd307..e0d66938ccd8 100644
--- a/drivers/uwb/whc-rc.c
+++ b/drivers/uwb/whc-rc.c
@@ -394,7 +394,7 @@ void whcrc_stop_rc(struct uwb_rc *rc)
 
 	le_writel(0, whcrc->rc_base + URCCMD);
 	whci_wait_for(&umc_dev->dev, whcrc->rc_base + URCSTS,
-		      URCSTS_HALTED, 0, 40, "URCSTS.HALTED");
+		      URCSTS_HALTED, URCSTS_HALTED, 100, "URCSTS.HALTED");
 }
 
 static void whcrc_init(struct whcrc *whcrc)
@@ -488,6 +488,24 @@ static void whcrc_remove(struct umc_dev *umc_dev)
 	d_printf(1, &umc_dev->dev, "freed whcrc %p\n", whcrc);
 }
 
+static int whcrc_pre_reset(struct umc_dev *umc)
+{
+	struct whcrc *whcrc = umc_get_drvdata(umc);
+	struct uwb_rc *uwb_rc = whcrc->uwb_rc;
+
+	uwb_rc_pre_reset(uwb_rc);
+	return 0;
+}
+
+static int whcrc_post_reset(struct umc_dev *umc)
+{
+	struct whcrc *whcrc = umc_get_drvdata(umc);
+	struct uwb_rc *uwb_rc = whcrc->uwb_rc;
+
+	uwb_rc_post_reset(uwb_rc);
+	return 0;
+}
+
 /* PCI device ID's that we handle [so it gets loaded] */
 static struct pci_device_id whcrc_id_table[] = {
 	{ PCI_DEVICE_CLASS(PCI_CLASS_WIRELESS_WHCI, ~0) },
@@ -496,10 +514,12 @@ static struct pci_device_id whcrc_id_table[] = {
 MODULE_DEVICE_TABLE(pci, whcrc_id_table);
 
 static struct umc_driver whcrc_driver = {
-	.name   = "whc-rc",
-	.cap_id = UMC_CAP_ID_WHCI_RC,
-	.probe  = whcrc_probe,
-	.remove = whcrc_remove,
+	.name       = "whc-rc",
+	.cap_id     = UMC_CAP_ID_WHCI_RC,
+	.probe      = whcrc_probe,
+	.remove     = whcrc_remove,
+	.pre_reset  = whcrc_pre_reset,
+	.post_reset = whcrc_post_reset,
 };
 
 static int __init whcrc_driver_init(void)
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index 881f0c5b6d28..c4854848999d 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -540,6 +540,8 @@ void uwb_rc_rm(struct uwb_rc *);
 void uwb_rc_neh_grok(struct uwb_rc *, void *, size_t);
 void uwb_rc_neh_error(struct uwb_rc *, int);
 void uwb_rc_reset_all(struct uwb_rc *rc);
+void uwb_rc_pre_reset(struct uwb_rc *rc);
+void uwb_rc_post_reset(struct uwb_rc *rc);
 
 /**
  * uwb_rsv_is_owner - is the owner of this reservation the RC?
diff --git a/include/linux/uwb/umc.h b/include/linux/uwb/umc.h
index 36a39e34f8d7..4b4fc0f43855 100644
--- a/include/linux/uwb/umc.h
+++ b/include/linux/uwb/umc.h
@@ -89,6 +89,8 @@ struct umc_driver {
 	void (*remove)(struct umc_dev *);
 	int  (*suspend)(struct umc_dev *, pm_message_t state);
 	int  (*resume)(struct umc_dev *);
+	int  (*pre_reset)(struct umc_dev *);
+	int  (*post_reset)(struct umc_dev *);
 
 	struct device_driver driver;
 };
-- 
cgit v1.2.3


From e25cf3db560e803292946ef23a30c69e341ce56f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 17 Oct 2008 15:55:07 +0200
Subject: lockdep: include/linux/lockdep.h - fix warning in
 net/bluetooth/af_bluetooth.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix this warning:

  net/bluetooth/af_bluetooth.c:60: warning: ‘bt_key_strings’ defined but not used
  net/bluetooth/af_bluetooth.c:71: warning: ‘bt_slock_key_strings’ defined but not used

this is a lockdep macro problem in the !LOCKDEP case.

We cannot convert it to an inline because the macro works on multiple types,
but we can mark the parameter used.

[ also clean up a misaligned tab in sock_lock_init_class_and_name() ]

[ also remove #ifdefs from around af_family_clock_key strings - which
  were certainly added to get rid of the ugly build warnings. ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/lockdep.h | 5 +++--
 include/net/sock.h      | 2 +-
 net/core/sock.c         | 2 --
 3 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index fc9f8e88123b..8956daf64abd 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -336,10 +336,11 @@ static inline void lockdep_on(void)
 # define lock_set_subclass(l, s, i)		do { } while (0)
 # define lockdep_init()				do { } while (0)
 # define lockdep_info()				do { } while (0)
-# define lockdep_init_map(lock, name, key, sub)	do { (void)(key); } while (0)
+# define lockdep_init_map(lock, name, key, sub) \
+		do { (void)(name); (void)(key); } while (0)
 # define lockdep_set_class(lock, key)		do { (void)(key); } while (0)
 # define lockdep_set_class_and_name(lock, key, name) \
-		do { (void)(key); } while (0)
+		do { (void)(key); (void)(name); } while (0)
 #define lockdep_set_class_and_subclass(lock, key, sub) \
 		do { (void)(key); } while (0)
 #define lockdep_set_subclass(lock, sub)		do { } while (0)
diff --git a/include/net/sock.h b/include/net/sock.h
index c04f9e18ea22..2f47107f6d0f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -815,7 +815,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
  */
 #define sock_lock_init_class_and_name(sk, sname, skey, name, key) 	\
 do {									\
-	sk->sk_lock.owned = 0;					\
+	sk->sk_lock.owned = 0;						\
 	init_waitqueue_head(&sk->sk_lock.wq);				\
 	spin_lock_init(&(sk)->sk_lock.slock);				\
 	debug_check_no_locks_freed((void *)&(sk)->sk_lock,		\
diff --git a/net/core/sock.c b/net/core/sock.c
index 5e2a3132a8c9..341e39456952 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -136,7 +136,6 @@
 static struct lock_class_key af_family_keys[AF_MAX];
 static struct lock_class_key af_family_slock_keys[AF_MAX];
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
 /*
  * Make lock validator output more readable. (we pre-construct these
  * strings build-time, so that runtime initialization of socket
@@ -187,7 +186,6 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = {
   "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
   "clock-AF_MAX"
 };
-#endif
 
 /*
  * sk_callback_lock locking rules are per-address-family,
-- 
cgit v1.2.3


From e17be2b2a95b43fe0d5878adf330701bb7a77115 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 17 Nov 2008 15:24:14 +0000
Subject: uwb: add pal parameter to new reservation callback

The pal parameter allows PALs to retrieve their PAL-specific data
structure.

Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 drivers/uwb/rsv.c       |  2 +-
 drivers/uwb/uwb-debug.c | 10 +++++-----
 include/linux/uwb.h     |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c
index 3d76efe26acc..935d5b536db7 100644
--- a/drivers/uwb/rsv.c
+++ b/drivers/uwb/rsv.c
@@ -558,7 +558,7 @@ static struct uwb_rsv *uwb_rsv_new_target(struct uwb_rc *rc,
 	spin_lock(&rc->pal_lock);
 	list_for_each_entry(pal, &rc->pals, node) {
 		if (pal->new_rsv)
-			pal->new_rsv(rsv);
+			pal->new_rsv(pal, rsv);
 		if (rsv->state == UWB_RSV_STATE_T_ACCEPTED)
 			break;
 	}
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index 88e6ac713817..217ebaac128d 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -306,13 +306,13 @@ static struct file_operations drp_avail_fops = {
 	.owner   = THIS_MODULE,
 };
 
-static void uwb_dbg_new_rsv(struct uwb_rsv *rsv)
+static void uwb_dbg_new_rsv(struct uwb_pal *pal, struct uwb_rsv *rsv)
 {
-	struct uwb_rc *rc = rsv->rc;
+	struct uwb_dbg *dbg = container_of(pal, struct uwb_dbg, pal);
 
-	if (rc->dbg->accept) {
-		list_add_tail(&rsv->pal_node, &rc->dbg->rsvs);
-		uwb_rsv_accept(rsv, uwb_dbg_rsv_cb, NULL);
+	if (dbg->accept) {
+		list_add_tail(&rsv->pal_node, &dbg->rsvs);
+		uwb_rsv_accept(rsv, uwb_dbg_rsv_cb, dbg);
 	}
 }
 
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index c4854848999d..effd97998fd1 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -405,7 +405,7 @@ struct uwb_pal {
 	struct list_head node;
 	const char *name;
 	struct device *device;
-	void (*new_rsv)(struct uwb_rsv *rsv);
+	void (*new_rsv)(struct uwb_pal *pal, struct uwb_rsv *rsv);
 };
 
 void uwb_pal_init(struct uwb_pal *pal);
-- 
cgit v1.2.3


From 6fae35f9cea92793a98b2d9ab21235e5ae035581 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 17 Nov 2008 15:53:42 +0000
Subject: uwb: add basic radio manager

The UWB radio manager coordinates the use of the radio between the
PALs that may be using it.  PALs request use of the radio with
uwb_radio_start() and the radio manager will start beaconing if its
not already doing so.  When the last PAL has called uwb_radio_stop()
beaconing will be stopped.

In the future, the radio manager will have a more sophisticated channel
selection algorithm, probably following the Channel Selection Policy
from the WiMedia Alliance when it is finalized.  For now, channel 9
(BG1, TFC1) is selected.

The user may override the channel selected by the radio manager and may
force the radio to stop beaconing.

The WUSB Host Controller PAL makes use of this and there are two new
debug PAL commands that can be used for testing.

Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 Documentation/ABI/testing/sysfs-class-uwb_rc |  14 +-
 Documentation/usb/wusb-cbaf                  |   9 --
 drivers/usb/host/hwa-hc.c                    |   1 -
 drivers/usb/host/whci/hcd.c                  |   2 -
 drivers/usb/wusbcore/devconnect.c            |   5 +-
 drivers/usb/wusbcore/mmc.c                   |  75 ++--------
 drivers/usb/wusbcore/pal.c                   |  16 ++-
 drivers/usb/wusbcore/wusbhc.h                |   8 +-
 drivers/uwb/Makefile                         |   1 +
 drivers/uwb/beacon.c                         |  26 ++--
 drivers/uwb/drp.c                            |  24 +---
 drivers/uwb/lc-rc.c                          |  11 +-
 drivers/uwb/pal.c                            |  20 +--
 drivers/uwb/radio.c                          | 202 +++++++++++++++++++++++++++
 drivers/uwb/reset.c                          |   6 +-
 drivers/uwb/rsv.c                            |   4 +-
 drivers/uwb/uwb-debug.c                      |  26 +++-
 drivers/uwb/uwb-internal.h                   |   5 +
 drivers/uwb/wlp/wlp-lc.c                     |   5 +-
 include/linux/uwb.h                          |  23 ++-
 include/linux/uwb/debug-cmd.h                |   2 +
 21 files changed, 323 insertions(+), 162 deletions(-)
 create mode 100644 drivers/uwb/radio.c

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-uwb_rc b/Documentation/ABI/testing/sysfs-class-uwb_rc
index a0d18dbeb7a9..6a5fd072849d 100644
--- a/Documentation/ABI/testing/sysfs-class-uwb_rc
+++ b/Documentation/ABI/testing/sysfs-class-uwb_rc
@@ -32,14 +32,16 @@ Contact:        linux-usb@vger.kernel.org
 Description:
                 Write:
 
-                <channel> [<bpst offset>]
+                <channel>
 
-                to start beaconing on a specific channel, or stop
-                beaconing if <channel> is -1.  Valid channels depends
-                on the radio controller's supported band groups.
+                to force a specific channel to be used when beaconing,
+                or, if <channel> is -1, to prohibit beaconing.  If
+                <channel> is 0, then the default channel selection
+                algorithm will be used.  Valid channels depends on the
+                radio controller's supported band groups.
 
-                <bpst offset> may be used to try and join a specific
-                beacon group if more than one was found during a scan.
+                Reading returns the currently active channel, or -1 if
+                the radio controller is not beaconing.
 
 What:           /sys/class/uwb_rc/uwbN/scan
 Date:           July 2008
diff --git a/Documentation/usb/wusb-cbaf b/Documentation/usb/wusb-cbaf
index 2e78b70f3adc..426ddaaef96f 100644
--- a/Documentation/usb/wusb-cbaf
+++ b/Documentation/usb/wusb-cbaf
@@ -80,12 +80,6 @@ case $1 in
     start)
         for dev in ${2:-$hdevs}
           do
-          uwb_rc=$(readlink -f $dev/uwb_rc)
-          if cat $uwb_rc/beacon | grep -q -- "-1"
-              then
-              echo 13 0 > $uwb_rc/beacon
-              echo I: started beaconing on ch 13 on $(basename $uwb_rc) >&2
-          fi
           echo $host_CHID > $dev/wusb_chid
           echo I: started host $(basename $dev) >&2
         done
@@ -95,9 +89,6 @@ case $1 in
           do
           echo 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > $dev/wusb_chid
           echo I: stopped host $(basename $dev) >&2
-          uwb_rc=$(readlink -f $dev/uwb_rc)
-          echo -1 | cat > $uwb_rc/beacon
-          echo I: stopped beaconing on $(basename $uwb_rc) >&2
         done
         ;;
     set-chid)
diff --git a/drivers/usb/host/hwa-hc.c b/drivers/usb/host/hwa-hc.c
index 2827353e97e1..2a4d36fa70b0 100644
--- a/drivers/usb/host/hwa-hc.c
+++ b/drivers/usb/host/hwa-hc.c
@@ -221,7 +221,6 @@ static void hwahc_op_stop(struct usb_hcd *usb_hcd)
 
 	d_fnstart(4, dev, "(hwahc %p)\n", hwahc);
 	mutex_lock(&wusbhc->mutex);
-	wusbhc_stop(wusbhc);
 	wusb_cluster_id_put(wusbhc->cluster_id);
 	mutex_unlock(&wusbhc->mutex);
 	d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result);
diff --git a/drivers/usb/host/whci/hcd.c b/drivers/usb/host/whci/hcd.c
index de1e07271b81..f599f89d3be1 100644
--- a/drivers/usb/host/whci/hcd.c
+++ b/drivers/usb/host/whci/hcd.c
@@ -91,8 +91,6 @@ static void whc_stop(struct usb_hcd *usb_hcd)
 
 	mutex_lock(&wusbhc->mutex);
 
-	wusbhc_stop(wusbhc);
-
 	/* stop HC */
 	le_writel(0, whc->base + WUSBINTR);
 	whc_write_wusbcmd(whc, WUSBCMD_RUN, 0);
diff --git a/drivers/usb/wusbcore/devconnect.c b/drivers/usb/wusbcore/devconnect.c
index c01c7a80744c..08a1ec903867 100644
--- a/drivers/usb/wusbcore/devconnect.c
+++ b/drivers/usb/wusbcore/devconnect.c
@@ -1124,8 +1124,7 @@ void wusbhc_devconnect_destroy(struct wusbhc *wusbhc)
  * FIXME: This also enables the keep alives but this is not necessary
  * until there are connected and authenticated devices.
  */
-int wusbhc_devconnect_start(struct wusbhc *wusbhc,
-			    const struct wusb_ckhdid *chid)
+int wusbhc_devconnect_start(struct wusbhc *wusbhc)
 {
 	struct device *dev = wusbhc->dev;
 	struct wuie_host_info *hi;
@@ -1138,7 +1137,7 @@ int wusbhc_devconnect_start(struct wusbhc *wusbhc,
 	hi->hdr.bLength       = sizeof(*hi);
 	hi->hdr.bIEIdentifier = WUIE_ID_HOST_INFO;
 	hi->attributes        = cpu_to_le16((wusbhc->rsv->stream << 3) | WUIE_HI_CAP_ALL);
-	hi->CHID              = *chid;
+	hi->CHID              = wusbhc->chid;
 	result = wusbhc_mmcie_set(wusbhc, 0, 0, &hi->hdr);
 	if (result < 0) {
 		dev_err(dev, "Cannot add Host Info MMCIE: %d\n", result);
diff --git a/drivers/usb/wusbcore/mmc.c b/drivers/usb/wusbcore/mmc.c
index af2aee0fdffa..5463ecebafdf 100644
--- a/drivers/usb/wusbcore/mmc.c
+++ b/drivers/usb/wusbcore/mmc.c
@@ -162,12 +162,11 @@ EXPORT_SYMBOL_GPL(wusbhc_mmcie_rm);
 /*
  * wusbhc_start - start transmitting MMCs and accepting connections
  * @wusbhc: the HC to start
- * @chid: the CHID to use for this host
  *
  * Establishes a cluster reservation, enables device connections, and
  * starts MMCs with appropriate DNTS parameters.
  */
-int wusbhc_start(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid)
+int wusbhc_start(struct wusbhc *wusbhc)
 {
 	int result;
 	struct device *dev = wusbhc->dev;
@@ -181,7 +180,7 @@ int wusbhc_start(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid)
 		goto error_rsv_establish;
 	}
 
-	result = wusbhc_devconnect_start(wusbhc, chid);
+	result = wusbhc_devconnect_start(wusbhc);
 	if (result < 0) {
 		dev_err(dev, "error enabling device connections: %d\n", result);
 		goto error_devconnect_start;
@@ -218,34 +217,6 @@ error_rsv_establish:
 	return result;
 }
 
-/*
- * Disconnect all from the WUSB Channel
- *
- * Send a Host Disconnect IE in the MMC, wait, don't send it any more
- */
-static int __wusbhc_host_disconnect_ie(struct wusbhc *wusbhc)
-{
-	int result = -ENOMEM;
-	struct wuie_host_disconnect *host_disconnect_ie;
-	might_sleep();
-	host_disconnect_ie = kmalloc(sizeof(*host_disconnect_ie), GFP_KERNEL);
-	if (host_disconnect_ie == NULL)
-		goto error_alloc;
-	host_disconnect_ie->hdr.bLength       = sizeof(*host_disconnect_ie);
-	host_disconnect_ie->hdr.bIEIdentifier = WUIE_ID_HOST_DISCONNECT;
-	result = wusbhc_mmcie_set(wusbhc, 0, 0, &host_disconnect_ie->hdr);
-	if (result < 0)
-		goto error_mmcie_set;
-
-	/* WUSB1.0[8.5.3.1 & 7.5.2] */
-	msleep(100);
-	wusbhc_mmcie_rm(wusbhc, &host_disconnect_ie->hdr);
-error_mmcie_set:
-	kfree(host_disconnect_ie);
-error_alloc:
-	return result;
-}
-
 /*
  * wusbhc_stop - stop transmitting MMCs
  * @wusbhc: the HC to stop
@@ -264,29 +235,6 @@ void wusbhc_stop(struct wusbhc *wusbhc)
 }
 EXPORT_SYMBOL_GPL(wusbhc_stop);
 
-/*
- * Change the CHID in a WUSB Channel
- *
- * If it is just a new CHID, send a Host Disconnect IE and then change
- * the CHID IE.
- */
-static int __wusbhc_chid_change(struct wusbhc *wusbhc,
-				const struct wusb_ckhdid *chid)
-{
-	int result = -ENOSYS;
-	struct device *dev = wusbhc->dev;
-	dev_err(dev, "%s() not implemented yet\n", __func__);
-	return result;
-
-	BUG_ON(wusbhc->wuie_host_info == NULL);
-	__wusbhc_host_disconnect_ie(wusbhc);
-	wusbhc->wuie_host_info->CHID = *chid;
-	result = wusbhc_mmcie_set(wusbhc, 0, 0, &wusbhc->wuie_host_info->hdr);
-	if (result < 0)
-		dev_err(dev, "Can't update Host Info WUSB IE: %d\n", result);
-	return result;
-}
-
 /*
  * Set/reset/update a new CHID
  *
@@ -302,16 +250,19 @@ int wusbhc_chid_set(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid)
 		chid = NULL;
 
 	mutex_lock(&wusbhc->mutex);
-	if (wusbhc->active) {
-		if (chid)
-			result = __wusbhc_chid_change(wusbhc, chid);
-		else
-			wusbhc_stop(wusbhc);
-	} else {
-		if (chid)
-			wusbhc_start(wusbhc, chid);
+	if (chid) {
+		if (wusbhc->active) {
+			mutex_unlock(&wusbhc->mutex);
+			return -EBUSY;
+		}
+		wusbhc->chid = *chid;
 	}
 	mutex_unlock(&wusbhc->mutex);
+
+	if (chid)
+		result = uwb_radio_start(&wusbhc->pal);
+	else
+		uwb_radio_stop(&wusbhc->pal);
 	return result;
 }
 EXPORT_SYMBOL_GPL(wusbhc_chid_set);
diff --git a/drivers/usb/wusbcore/pal.c b/drivers/usb/wusbcore/pal.c
index 7cc51e9905cf..d0b172c5ecc7 100644
--- a/drivers/usb/wusbcore/pal.c
+++ b/drivers/usb/wusbcore/pal.c
@@ -18,6 +18,16 @@
  */
 #include "wusbhc.h"
 
+static void wusbhc_channel_changed(struct uwb_pal *pal, int channel)
+{
+	struct wusbhc *wusbhc = container_of(pal, struct wusbhc, pal);
+
+	if (channel < 0)
+		wusbhc_stop(wusbhc);
+	else
+		wusbhc_start(wusbhc);
+}
+
 /**
  * wusbhc_pal_register - register the WUSB HC as a UWB PAL
  * @wusbhc: the WUSB HC
@@ -28,8 +38,10 @@ int wusbhc_pal_register(struct wusbhc *wusbhc)
 
 	wusbhc->pal.name   = "wusbhc";
 	wusbhc->pal.device = wusbhc->usb_hcd.self.controller;
+	wusbhc->pal.rc     = wusbhc->uwb_rc;
+	wusbhc->pal.channel_changed = wusbhc_channel_changed;
 
-	return uwb_pal_register(wusbhc->uwb_rc, &wusbhc->pal);
+	return uwb_pal_register(&wusbhc->pal);
 }
 
 /**
@@ -38,5 +50,5 @@ int wusbhc_pal_register(struct wusbhc *wusbhc)
  */
 void wusbhc_pal_unregister(struct wusbhc *wusbhc)
 {
-	uwb_pal_unregister(wusbhc->uwb_rc, &wusbhc->pal);
+	uwb_pal_unregister(&wusbhc->pal);
 }
diff --git a/drivers/usb/wusbcore/wusbhc.h b/drivers/usb/wusbcore/wusbhc.h
index 8fef934ad2f3..797c2453a35b 100644
--- a/drivers/usb/wusbcore/wusbhc.h
+++ b/drivers/usb/wusbcore/wusbhc.h
@@ -252,7 +252,8 @@ struct wusbhc {
 	struct uwb_pal pal;
 
 	unsigned trust_timeout;			/* in jiffies */
-	struct wuie_host_info *wuie_host_info;	/* Includes CHID */
+	struct wusb_ckhdid chid;
+	struct wuie_host_info *wuie_host_info;
 
 	struct mutex mutex;			/* locks everything else */
 	u16 cluster_id;				/* Wireless USB Cluster ID */
@@ -376,15 +377,14 @@ static inline void wusbhc_put(struct wusbhc *wusbhc)
 	usb_put_hcd(&wusbhc->usb_hcd);
 }
 
-int wusbhc_start(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid);
+int wusbhc_start(struct wusbhc *wusbhc);
 void wusbhc_stop(struct wusbhc *wusbhc);
 extern int wusbhc_chid_set(struct wusbhc *, const struct wusb_ckhdid *);
 
 /* Device connect handling */
 extern int wusbhc_devconnect_create(struct wusbhc *);
 extern void wusbhc_devconnect_destroy(struct wusbhc *);
-extern int wusbhc_devconnect_start(struct wusbhc *wusbhc,
-				   const struct wusb_ckhdid *chid);
+extern int wusbhc_devconnect_start(struct wusbhc *wusbhc);
 extern void wusbhc_devconnect_stop(struct wusbhc *wusbhc);
 extern void wusbhc_handle_dn(struct wusbhc *, u8 srcaddr,
 			     struct wusb_dn_hdr *dn_hdr, size_t size);
diff --git a/drivers/uwb/Makefile b/drivers/uwb/Makefile
index 2b99c3e61671..ce21a95da04a 100644
--- a/drivers/uwb/Makefile
+++ b/drivers/uwb/Makefile
@@ -18,6 +18,7 @@ uwb-objs :=		\
 	lc-rc.o		\
 	neh.o		\
 	pal.o		\
+	radio.o		\
 	reset.o		\
 	rsv.o		\
 	scan.o		\
diff --git a/drivers/uwb/beacon.c b/drivers/uwb/beacon.c
index d9f2a8acc593..247956098afa 100644
--- a/drivers/uwb/beacon.c
+++ b/drivers/uwb/beacon.c
@@ -119,7 +119,6 @@ int uwb_rc_beacon(struct uwb_rc *rc, int channel, unsigned bpst_offset)
 	int result;
 	struct device *dev = &rc->uwb_dev.dev;
 
-	mutex_lock(&rc->uwb_dev.mutex);
 	if (channel < 0)
 		channel = -1;
 	if (channel == -1)
@@ -128,7 +127,7 @@ int uwb_rc_beacon(struct uwb_rc *rc, int channel, unsigned bpst_offset)
 		/* channel >= 0...dah */
 		result = uwb_rc_start_beacon(rc, bpst_offset, channel);
 		if (result < 0)
-			goto out_up;
+			return result;
 		if (le16_to_cpu(rc->ies->wIELength) > 0) {
 			result = uwb_rc_set_ie(rc, rc->ies);
 			if (result < 0) {
@@ -137,19 +136,14 @@ int uwb_rc_beacon(struct uwb_rc *rc, int channel, unsigned bpst_offset)
 				result = uwb_rc_stop_beacon(rc);
 				channel = -1;
 				bpst_offset = 0;
-			} else
-				result = 0;
+			}
 		}
 	}
 
-	if (result < 0)
-		goto out_up;
-	rc->beaconing = channel;
-
-	uwb_notify(rc, NULL, uwb_bg_joined(rc) ? UWB_NOTIF_BG_JOIN : UWB_NOTIF_BG_LEAVE);
-
-out_up:
-	mutex_unlock(&rc->uwb_dev.mutex);
+	if (result >= 0) {
+		rc->beaconing = channel;
+		uwb_notify(rc, NULL, uwb_bg_joined(rc) ? UWB_NOTIF_BG_JOIN : UWB_NOTIF_BG_LEAVE);
+	}
 	return result;
 }
 
@@ -618,9 +612,6 @@ static ssize_t uwb_rc_beacon_show(struct device *dev,
 
 /*
  * Start beaconing on the specified channel, or stop beaconing.
- *
- * The BPST offset of when to start searching for a beacon group to
- * join may be specified.
  */
 static ssize_t uwb_rc_beacon_store(struct device *dev,
 				   struct device_attribute *attr,
@@ -629,12 +620,11 @@ static ssize_t uwb_rc_beacon_store(struct device *dev,
 	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
 	struct uwb_rc *rc = uwb_dev->rc;
 	int channel;
-	unsigned bpst_offset = 0;
 	ssize_t result = -EINVAL;
 
-	result = sscanf(buf, "%d %u\n", &channel, &bpst_offset);
+	result = sscanf(buf, "%d", &channel);
 	if (result >= 1)
-		result = uwb_rc_beacon(rc, channel, bpst_offset);
+		result = uwb_radio_force_channel(rc, channel);
 
 	return result < 0 ? result : size;
 }
diff --git a/drivers/uwb/drp.c b/drivers/uwb/drp.c
index c0b1e5e2bd08..fe328146adb7 100644
--- a/drivers/uwb/drp.c
+++ b/drivers/uwb/drp.c
@@ -37,14 +37,13 @@
  *
  * A DRP Availability IE is appended.
  *
- * rc->uwb_dev.mutex is held
+ * rc->rsvs_mutex is held
  *
  * FIXME We currently ignore the returned value indicating the remaining space
  * in beacon. This could be used to deny reservation requests earlier if
  * determined that they would cause the beacon space to be exceeded.
  */
-static
-int uwb_rc_gen_send_drp_ie(struct uwb_rc *rc)
+int uwb_rc_send_all_drp_ie(struct uwb_rc *rc)
 {
 	int result;
 	struct device *dev = &rc->uwb_dev.dev;
@@ -102,25 +101,6 @@ error_cmd:
 	kfree(cmd);
 error:
 	return result;
-
-}
-/**
- * Send all DRP IEs associated with this host
- *
- * @returns:    >= 0 number of bytes still available in the beacon
- *              < 0 errno code on error.
- *
- * As per the protocol we obtain the host controller device lock to access
- * bandwidth structures.
- */
-int uwb_rc_send_all_drp_ie(struct uwb_rc *rc)
-{
-	int result;
-
-	mutex_lock(&rc->uwb_dev.mutex);
-	result = uwb_rc_gen_send_drp_ie(rc);
-	mutex_unlock(&rc->uwb_dev.mutex);
-	return result;
 }
 
 void uwb_drp_handle_timeout(struct uwb_rsv *rsv)
diff --git a/drivers/uwb/lc-rc.c b/drivers/uwb/lc-rc.c
index f00633d334dd..9cf21e6bb624 100644
--- a/drivers/uwb/lc-rc.c
+++ b/drivers/uwb/lc-rc.c
@@ -189,9 +189,9 @@ static int uwb_rc_setup(struct uwb_rc *rc)
 	int result;
 	struct device *dev = &rc->uwb_dev.dev;
 
-	result = uwb_rc_reset(rc);
+	result = uwb_radio_setup(rc);
 	if (result < 0) {
-		dev_err(dev, "cannot reset UWB radio: %d\n", result);
+		dev_err(dev, "cannot setup UWB radio: %d\n", result);
 		goto error;
 	}
 	result = uwb_rc_mac_addr_setup(rc);
@@ -311,12 +311,7 @@ void uwb_rc_rm(struct uwb_rc *rc)
 
 	uwb_dbg_del_rc(rc);
 	uwb_rsv_remove_all(rc);
-	uwb_rc_ie_rm(rc, UWB_IDENTIFICATION_IE);
-	if (rc->beaconing >= 0)
-		uwb_rc_beacon(rc, -1, 0);
-	if (rc->scan_type != UWB_SCAN_DISABLED)
-		uwb_rc_scan(rc, rc->scanning, UWB_SCAN_DISABLED, 0);
-	uwb_rc_reset(rc);
+	uwb_radio_shutdown(rc);
 
 	rc->stop(rc);
 
diff --git a/drivers/uwb/pal.c b/drivers/uwb/pal.c
index 1afb38eacb9a..605765124f5b 100644
--- a/drivers/uwb/pal.c
+++ b/drivers/uwb/pal.c
@@ -32,13 +32,13 @@ EXPORT_SYMBOL_GPL(uwb_pal_init);
 
 /**
  * uwb_pal_register - register a UWB PAL
- * @rc: the radio controller the PAL will be using
  * @pal: the PAL
  *
  * The PAL must be initialized with uwb_pal_init().
  */
-int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal)
+int uwb_pal_register(struct uwb_pal *pal)
 {
+	struct uwb_rc *rc = pal->rc;
 	int ret;
 
 	if (pal->device) {
@@ -54,9 +54,9 @@ int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal)
 		}
 	}
 
-	spin_lock(&rc->pal_lock);
+	mutex_lock(&rc->uwb_dev.mutex);
 	list_add(&pal->node, &rc->pals);
-	spin_unlock(&rc->pal_lock);
+	mutex_unlock(&rc->uwb_dev.mutex);
 
 	return 0;
 }
@@ -64,14 +64,17 @@ EXPORT_SYMBOL_GPL(uwb_pal_register);
 
 /**
  * uwb_pal_register - unregister a UWB PAL
- * @rc: the radio controller the PAL was using
  * @pal: the PAL
  */
-void uwb_pal_unregister(struct uwb_rc *rc, struct uwb_pal *pal)
+void uwb_pal_unregister(struct uwb_pal *pal)
 {
-	spin_lock(&rc->pal_lock);
+	struct uwb_rc *rc = pal->rc;
+
+	uwb_radio_stop(pal);
+
+	mutex_lock(&rc->uwb_dev.mutex);
 	list_del(&pal->node);
-	spin_unlock(&rc->pal_lock);
+	mutex_unlock(&rc->uwb_dev.mutex);
 
 	if (pal->device) {
 		sysfs_remove_link(&rc->uwb_dev.dev.kobj, pal->name);
@@ -86,6 +89,5 @@ EXPORT_SYMBOL_GPL(uwb_pal_unregister);
  */
 void uwb_rc_pal_init(struct uwb_rc *rc)
 {
-	spin_lock_init(&rc->pal_lock);
 	INIT_LIST_HEAD(&rc->pals);
 }
diff --git a/drivers/uwb/radio.c b/drivers/uwb/radio.c
new file mode 100644
index 000000000000..f0d55495f5e9
--- /dev/null
+++ b/drivers/uwb/radio.c
@@ -0,0 +1,202 @@
+/*
+ * UWB radio (channel) management.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+#include <linux/uwb.h>
+
+#include "uwb-internal.h"
+
+
+static int uwb_radio_select_channel(struct uwb_rc *rc)
+{
+	/*
+	 * Default to channel 9 (BG1, TFC1) unless the user has
+	 * selected a specific channel or there are no active PALs.
+	 */
+	if (rc->active_pals == 0)
+		return -1;
+	if (rc->beaconing_forced)
+		return rc->beaconing_forced;
+	return 9;
+}
+
+
+/*
+ * Notify all active PALs that the channel has changed.
+ */
+static void uwb_radio_channel_changed(struct uwb_rc *rc, int channel)
+{
+	struct uwb_pal *pal;
+
+	list_for_each_entry(pal, &rc->pals, node) {
+		if (pal->channel && channel != pal->channel) {
+			pal->channel = channel;
+			if (pal->channel_changed)
+				pal->channel_changed(pal, pal->channel);
+		}
+	}
+}
+
+/*
+ * Change to a new channel and notify any active PALs of the new
+ * channel.
+ *
+ * When stopping the radio, PALs need to be notified first so they can
+ * terminate any active reservations.
+ */
+static int uwb_radio_change_channel(struct uwb_rc *rc, int channel)
+{
+	int ret = 0;
+
+	if (channel == -1)
+		uwb_radio_channel_changed(rc, channel);
+
+	if (channel != rc->beaconing) {
+		if (rc->beaconing != -1 && channel != -1) {
+			/*
+			 * FIXME: should signal the channel change
+			 * with a Channel Change IE.
+			 */
+			ret = uwb_radio_change_channel(rc, -1);
+			if (ret < 0)
+				return ret;
+		}
+		ret = uwb_rc_beacon(rc, channel, 0);
+	}
+
+	if (channel != -1)
+		uwb_radio_channel_changed(rc, rc->beaconing);
+
+	return ret;
+}
+
+/**
+ * uwb_radio_start - request that the radio be started
+ * @pal: the PAL making the request.
+ *
+ * If the radio is not already active, aa suitable channel is selected
+ * and beacons are started.
+ */
+int uwb_radio_start(struct uwb_pal *pal)
+{
+	struct uwb_rc *rc = pal->rc;
+	int ret = 0;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+
+	if (!pal->channel) {
+		pal->channel = -1;
+		rc->active_pals++;
+		ret = uwb_radio_change_channel(rc, uwb_radio_select_channel(rc));
+	}
+
+	mutex_unlock(&rc->uwb_dev.mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(uwb_radio_start);
+
+/**
+ * uwb_radio_stop - request tha the radio be stopped.
+ * @pal: the PAL making the request.
+ *
+ * Stops the radio if no other PAL is making use of it.
+ */
+void uwb_radio_stop(struct uwb_pal *pal)
+{
+	struct uwb_rc *rc = pal->rc;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+
+	if (pal->channel) {
+		rc->active_pals--;
+		uwb_radio_change_channel(rc, uwb_radio_select_channel(rc));
+		pal->channel = 0;
+	}
+
+	mutex_unlock(&rc->uwb_dev.mutex);
+}
+EXPORT_SYMBOL_GPL(uwb_radio_stop);
+
+/*
+ * uwb_radio_force_channel - force a specific channel to be used
+ * @rc: the radio controller.
+ * @channel: the channel to use; -1 to force the radio to stop; 0 to
+ *   use the default channel selection algorithm.
+ */
+int uwb_radio_force_channel(struct uwb_rc *rc, int channel)
+{
+	int ret = 0;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+
+	rc->beaconing_forced = channel;
+	ret = uwb_radio_change_channel(rc, uwb_radio_select_channel(rc));
+
+	mutex_unlock(&rc->uwb_dev.mutex);
+	return ret;
+}
+
+/*
+ * uwb_radio_setup - setup the radio manager
+ * @rc: the radio controller.
+ *
+ * The radio controller is reset to ensure it's in a known state
+ * before it's used.
+ */
+int uwb_radio_setup(struct uwb_rc *rc)
+{
+	return uwb_rc_reset(rc);
+}
+
+/*
+ * uwb_radio_reset_state - reset any radio manager state
+ * @rc: the radio controller.
+ *
+ * All internal radio manager state is reset to values corresponding
+ * to a reset radio controller.
+ */
+void uwb_radio_reset_state(struct uwb_rc *rc)
+{
+	struct uwb_pal *pal;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+
+	list_for_each_entry(pal, &rc->pals, node) {
+		if (pal->channel) {
+			pal->channel = -1;
+			if (pal->channel_changed)
+				pal->channel_changed(pal, -1);
+		}
+	}
+
+	rc->beaconing = -1;
+	rc->scanning = -1;
+
+	mutex_unlock(&rc->uwb_dev.mutex);
+}
+
+/*
+ * uwb_radio_shutdown - shutdown the radio manager
+ * @rc: the radio controller.
+ *
+ * The radio controller is reset.
+ */
+void uwb_radio_shutdown(struct uwb_rc *rc)
+{
+	uwb_radio_reset_state(rc);
+	uwb_rc_reset(rc);
+}
diff --git a/drivers/uwb/reset.c b/drivers/uwb/reset.c
index e39b32099af3..ce8283cc8098 100644
--- a/drivers/uwb/reset.c
+++ b/drivers/uwb/reset.c
@@ -365,11 +365,7 @@ void uwb_rc_pre_reset(struct uwb_rc *rc)
 	rc->stop(rc);
 	uwbd_flush(rc);
 
-	mutex_lock(&rc->uwb_dev.mutex);
-	rc->beaconing = -1;
-	rc->scanning = -1;
-	mutex_unlock(&rc->uwb_dev.mutex);
-
+	uwb_radio_reset_state(rc);
 	uwb_rsv_remove_all(rc);
 }
 EXPORT_SYMBOL_GPL(uwb_rc_pre_reset);
diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c
index 935d5b536db7..1cd84f927540 100644
--- a/drivers/uwb/rsv.c
+++ b/drivers/uwb/rsv.c
@@ -555,14 +555,14 @@ static struct uwb_rsv *uwb_rsv_new_target(struct uwb_rc *rc,
 	 * deny the request.
 	 */
 	rsv->state = UWB_RSV_STATE_T_DENIED;
-	spin_lock(&rc->pal_lock);
+	mutex_lock(&rc->uwb_dev.mutex);
 	list_for_each_entry(pal, &rc->pals, node) {
 		if (pal->new_rsv)
 			pal->new_rsv(pal, rsv);
 		if (rsv->state == UWB_RSV_STATE_T_ACCEPTED)
 			break;
 	}
-	spin_unlock(&rc->pal_lock);
+	mutex_unlock(&rc->uwb_dev.mutex);
 
 	list_add_tail(&rsv->rc_node, &rc->reservations);
 	state = rsv->state;
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index 217ebaac128d..0e58071a232d 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -192,7 +192,7 @@ static ssize_t command_write(struct file *file, const char __user *buf,
 {
 	struct uwb_rc *rc = file->private_data;
 	struct uwb_dbg_cmd cmd;
-	int ret;
+	int ret = 0;
 
 	if (len != sizeof(struct uwb_dbg_cmd))
 		return -EINVAL;
@@ -213,6 +213,12 @@ static ssize_t command_write(struct file *file, const char __user *buf,
 	case UWB_DBG_CMD_IE_RM:
 		ret = cmd_ie_rm(rc, &cmd.ie_rm);
 		break;
+	case UWB_DBG_CMD_RADIO_START:
+		ret = uwb_radio_start(&rc->dbg->pal);
+		break;
+	case UWB_DBG_CMD_RADIO_STOP:
+		uwb_radio_stop(&rc->dbg->pal);
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -306,6 +312,17 @@ static struct file_operations drp_avail_fops = {
 	.owner   = THIS_MODULE,
 };
 
+static void uwb_dbg_channel_changed(struct uwb_pal *pal, int channel)
+{
+	struct uwb_dbg *dbg = container_of(pal, struct uwb_dbg, pal);
+	struct device *dev = &pal->rc->uwb_dev.dev;
+
+	if (channel > 0)
+		dev_info(dev, "debug: channel %d started\n", channel);
+	else
+		dev_info(dev, "debug: channel stopped\n");
+}
+
 static void uwb_dbg_new_rsv(struct uwb_pal *pal, struct uwb_rsv *rsv)
 {
 	struct uwb_dbg *dbg = container_of(pal, struct uwb_dbg, pal);
@@ -329,8 +346,11 @@ void uwb_dbg_add_rc(struct uwb_rc *rc)
 	INIT_LIST_HEAD(&rc->dbg->rsvs);
 
 	uwb_pal_init(&rc->dbg->pal);
+	rc->dbg->pal.rc = rc;
+	rc->dbg->pal.channel_changed = uwb_dbg_channel_changed;
 	rc->dbg->pal.new_rsv = uwb_dbg_new_rsv;
-	uwb_pal_register(rc, &rc->dbg->pal);
+	uwb_pal_register(&rc->dbg->pal);
+
 	if (root_dir) {
 		rc->dbg->root_d = debugfs_create_dir(dev_name(&rc->uwb_dev.dev),
 						     root_dir);
@@ -364,7 +384,7 @@ void uwb_dbg_del_rc(struct uwb_rc *rc)
 		uwb_rsv_terminate(rsv);
 	}
 
-	uwb_pal_unregister(rc, &rc->dbg->pal);
+	uwb_pal_unregister(&rc->dbg->pal);
 
 	if (root_dir) {
 		debugfs_remove(rc->dbg->drp_avail_f);
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
index af95541dabcd..9c0cdb4ded0c 100644
--- a/drivers/uwb/uwb-internal.h
+++ b/drivers/uwb/uwb-internal.h
@@ -238,6 +238,11 @@ struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc,
 struct uwb_dev *uwb_dev_get_by_macaddr(struct uwb_rc *rc,
 				       const struct uwb_mac_addr *macaddr);
 
+int uwb_radio_setup(struct uwb_rc *rc);
+void uwb_radio_reset_state(struct uwb_rc *rc);
+void uwb_radio_shutdown(struct uwb_rc *rc);
+int uwb_radio_force_channel(struct uwb_rc *rc, int channel);
+
 /* -- UWB Sysfs representation */
 extern struct class uwb_rc_class;
 extern struct device_attribute dev_attr_mac_address;
diff --git a/drivers/uwb/wlp/wlp-lc.c b/drivers/uwb/wlp/wlp-lc.c
index 0799402e73fb..7e5eb49b03b8 100644
--- a/drivers/uwb/wlp/wlp-lc.c
+++ b/drivers/uwb/wlp/wlp-lc.c
@@ -543,7 +543,8 @@ int wlp_setup(struct wlp *wlp, struct uwb_rc *rc)
 	uwb_notifs_register(rc, &wlp->uwb_notifs_handler);
 
 	uwb_pal_init(&wlp->pal);
-	result = uwb_pal_register(rc, &wlp->pal);
+	wlp->pal.rc = rc;
+	result = uwb_pal_register(&wlp->pal);
 	if (result < 0)
 		uwb_notifs_deregister(wlp->rc, &wlp->uwb_notifs_handler);
 
@@ -557,7 +558,7 @@ void wlp_remove(struct wlp *wlp)
 	struct device *dev = &wlp->rc->uwb_dev.dev;
 	d_fnstart(6, dev, "wlp %p\n", wlp);
 	wlp_neighbors_release(wlp);
-	uwb_pal_unregister(wlp->rc, &wlp->pal);
+	uwb_pal_unregister(&wlp->pal);
 	uwb_notifs_deregister(wlp->rc, &wlp->uwb_notifs_handler);
 	wlp_eda_release(&wlp->eda);
 	mutex_lock(&wlp->mutex);
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index effd97998fd1..7d3ebf046f9a 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -355,6 +355,7 @@ struct uwb_rc {
 	u8 ctx_roll;
 
 	int beaconing;			/* Beaconing state [channel number] */
+	int beaconing_forced;
 	int scanning;
 	enum uwb_scan_type scan_type:3;
 	unsigned ready:1;
@@ -373,8 +374,8 @@ struct uwb_rc {
 	struct uwb_rc_cmd_set_ie *ies;
 	size_t ies_capacity;
 
-	spinlock_t pal_lock;
 	struct list_head pals;
+	int active_pals;
 
 	struct uwb_dbg *dbg;
 };
@@ -382,11 +383,17 @@ struct uwb_rc {
 
 /**
  * struct uwb_pal - a UWB PAL
- * @name:    descriptive name for this PAL (wushc, wlp, etc.).
+ * @name:    descriptive name for this PAL (wusbhc, wlp, etc.).
  * @device:  a device for the PAL.  Used to link the PAL and the radio
  *           controller in sysfs.
+ * @rc:      the radio controller the PAL uses.
+ * @channel_changed: called when the channel used by the radio changes.
+ *           A channel of -1 means the channel has been stopped.
  * @new_rsv: called when a peer requests a reservation (may be NULL if
  *           the PAL cannot accept reservation requests).
+ * @channel: channel being used by the PAL; 0 if the PAL isn't using
+ *           the radio; -1 if the PAL wishes to use the radio but
+ *           cannot.
  *
  * A Protocol Adaptation Layer (PAL) is a user of the WiMedia UWB
  * radio platform (e.g., WUSB, WLP or Bluetooth UWB AMP).
@@ -405,12 +412,20 @@ struct uwb_pal {
 	struct list_head node;
 	const char *name;
 	struct device *device;
+	struct uwb_rc *rc;
+
+	void (*channel_changed)(struct uwb_pal *pal, int channel);
 	void (*new_rsv)(struct uwb_pal *pal, struct uwb_rsv *rsv);
+
+	int channel;
 };
 
 void uwb_pal_init(struct uwb_pal *pal);
-int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal);
-void uwb_pal_unregister(struct uwb_rc *rc, struct uwb_pal *pal);
+int uwb_pal_register(struct uwb_pal *pal);
+void uwb_pal_unregister(struct uwb_pal *pal);
+
+int uwb_radio_start(struct uwb_pal *pal);
+void uwb_radio_stop(struct uwb_pal *pal);
 
 /*
  * General public API
diff --git a/include/linux/uwb/debug-cmd.h b/include/linux/uwb/debug-cmd.h
index 6a16566f0221..07efbe17db53 100644
--- a/include/linux/uwb/debug-cmd.h
+++ b/include/linux/uwb/debug-cmd.h
@@ -34,6 +34,8 @@ enum uwb_dbg_cmd_type {
 	UWB_DBG_CMD_RSV_TERMINATE = 2,
 	UWB_DBG_CMD_IE_ADD = 3,
 	UWB_DBG_CMD_IE_RM = 4,
+	UWB_DBG_CMD_RADIO_START = 5,
+	UWB_DBG_CMD_RADIO_STOP = 6,
 };
 
 struct uwb_dbg_cmd_rsv_establish {
-- 
cgit v1.2.3


From e8e1594c8126b1b773988fa2e3bfec76cff88336 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 17 Nov 2008 16:16:51 +0000
Subject: wlp: start/stop radio on network interface up/down

Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 drivers/uwb/i1480/i1480u-wlp/lc.c     |  2 +-
 drivers/uwb/i1480/i1480u-wlp/netdev.c | 51 ++++++++---------------------------
 drivers/uwb/uwb-debug.c               |  3 ---
 drivers/uwb/wlp/wlp-lc.c              | 14 +++++++++-
 include/linux/wlp.h                   |  3 ++-
 5 files changed, 27 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/uwb/i1480/i1480u-wlp/lc.c b/drivers/uwb/i1480/i1480u-wlp/lc.c
index 384306c11bbe..488b2e30a0a8 100644
--- a/drivers/uwb/i1480/i1480u-wlp/lc.c
+++ b/drivers/uwb/i1480/i1480u-wlp/lc.c
@@ -206,7 +206,7 @@ int i1480u_add(struct i1480u *i1480u, struct usb_interface *iface)
 	wlp->fill_device_info = i1480u_fill_device_info;
 	wlp->stop_queue = i1480u_stop_queue;
 	wlp->start_queue = i1480u_start_queue;
-	result = wlp_setup(wlp, rc);
+	result = wlp_setup(wlp, rc, net_dev);
 	if (result < 0) {
 		dev_err(&iface->dev, "Cannot setup WLP\n");
 		goto error_wlp_setup;
diff --git a/drivers/uwb/i1480/i1480u-wlp/netdev.c b/drivers/uwb/i1480/i1480u-wlp/netdev.c
index 8802ac43d872..2eafb973cd05 100644
--- a/drivers/uwb/i1480/i1480u-wlp/netdev.c
+++ b/drivers/uwb/i1480/i1480u-wlp/netdev.c
@@ -207,6 +207,11 @@ int i1480u_open(struct net_device *net_dev)
 	result = i1480u_rx_setup(i1480u);		/* Alloc RX stuff */
 	if (result < 0)
 		goto error_rx_setup;
+
+	result = uwb_radio_start(&wlp->pal);
+	if (result < 0)
+		goto error_radio_start;
+
 	netif_wake_queue(net_dev);
 #ifdef i1480u_FLOW_CONTROL
 	result = usb_submit_urb(i1480u->notif_urb, GFP_KERNEL);;
@@ -215,25 +220,20 @@ int i1480u_open(struct net_device *net_dev)
 		goto error_notif_urb_submit;
 	}
 #endif
-	i1480u->uwb_notifs_handler.cb = i1480u_uwb_notifs_cb;
-	i1480u->uwb_notifs_handler.data = i1480u;
-	if (uwb_bg_joined(rc))
-		netif_carrier_on(net_dev);
-	else
-		netif_carrier_off(net_dev);
-	uwb_notifs_register(rc, &i1480u->uwb_notifs_handler);
 	/* Interface is up with an address, now we can create WSS */
 	result = wlp_wss_setup(net_dev, &wlp->wss);
 	if (result < 0) {
 		dev_err(dev, "Can't create WSS: %d. \n", result);
-		goto error_notif_deregister;
+		goto error_wss_setup;
 	}
 	return 0;
-error_notif_deregister:
-	uwb_notifs_deregister(rc, &i1480u->uwb_notifs_handler);
+error_wss_setup:
 #ifdef i1480u_FLOW_CONTROL
+	usb_kill_urb(i1480u->notif_urb);
 error_notif_urb_submit:
 #endif
+	uwb_radio_stop(&wlp->pal);
+error_radio_start:
 	netif_stop_queue(net_dev);
 	i1480u_rx_release(i1480u);
 error_rx_setup:
@@ -248,16 +248,15 @@ int i1480u_stop(struct net_device *net_dev)
 {
 	struct i1480u *i1480u = netdev_priv(net_dev);
 	struct wlp *wlp = &i1480u->wlp;
-	struct uwb_rc *rc = wlp->rc;
 
 	BUG_ON(wlp->rc == NULL);
 	wlp_wss_remove(&wlp->wss);
-	uwb_notifs_deregister(rc, &i1480u->uwb_notifs_handler);
 	netif_carrier_off(net_dev);
 #ifdef i1480u_FLOW_CONTROL
 	usb_kill_urb(i1480u->notif_urb);
 #endif
 	netif_stop_queue(net_dev);
+	uwb_radio_stop(&wlp->pal);
 	i1480u_rx_release(i1480u);
 	i1480u_tx_release(i1480u);
 	return 0;
@@ -303,34 +302,6 @@ int i1480u_change_mtu(struct net_device *net_dev, int mtu)
 	return 0;
 }
 
-
-/**
- * Callback function to handle events from UWB
- * When we see other devices we know the carrier is ok,
- * if we are the only device in the beacon group we set the carrier
- * state to off.
- * */
-void i1480u_uwb_notifs_cb(void *data, struct uwb_dev *uwb_dev,
-			  enum uwb_notifs event)
-{
-	struct i1480u *i1480u = data;
-	struct net_device *net_dev = i1480u->net_dev;
-	struct device *dev = &i1480u->usb_iface->dev;
-	switch (event) {
-	case UWB_NOTIF_BG_JOIN:
-		netif_carrier_on(net_dev);
-		dev_info(dev, "Link is up\n");
-		break;
-	case UWB_NOTIF_BG_LEAVE:
-		netif_carrier_off(net_dev);
-		dev_info(dev, "Link is down\n");
-		break;
-	default:
-		dev_err(dev, "don't know how to handle event %d from uwb\n",
-				event);
-	}
-}
-
 /**
  * Stop the network queue
  *
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index 0e58071a232d..e02fb83469d5 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -33,8 +33,6 @@
 #include <linux/seq_file.h>
 
 #include <linux/uwb/debug-cmd.h>
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 #include "uwb-internal.h"
 
@@ -314,7 +312,6 @@ static struct file_operations drp_avail_fops = {
 
 static void uwb_dbg_channel_changed(struct uwb_pal *pal, int channel)
 {
-	struct uwb_dbg *dbg = container_of(pal, struct uwb_dbg, pal);
 	struct device *dev = &pal->rc->uwb_dev.dev;
 
 	if (channel > 0)
diff --git a/drivers/uwb/wlp/wlp-lc.c b/drivers/uwb/wlp/wlp-lc.c
index 7e5eb49b03b8..e531093c4162 100644
--- a/drivers/uwb/wlp/wlp-lc.c
+++ b/drivers/uwb/wlp/wlp-lc.c
@@ -526,7 +526,17 @@ void wlp_uwb_notifs_cb(void *_wlp, struct uwb_dev *uwb_dev,
 	}
 }
 
-int wlp_setup(struct wlp *wlp, struct uwb_rc *rc)
+static void wlp_channel_changed(struct uwb_pal *pal, int channel)
+{
+	struct wlp *wlp = container_of(pal, struct wlp, pal);
+
+	if (channel < 0)
+		netif_carrier_off(wlp->ndev);
+	else
+		netif_carrier_on(wlp->ndev);
+}
+
+int wlp_setup(struct wlp *wlp, struct uwb_rc *rc, struct net_device *ndev)
 {
 	struct device *dev = &rc->uwb_dev.dev;
 	int result;
@@ -537,6 +547,7 @@ int wlp_setup(struct wlp *wlp, struct uwb_rc *rc)
 	BUG_ON(wlp->stop_queue == NULL);
 	BUG_ON(wlp->start_queue == NULL);
 	wlp->rc = rc;
+	wlp->ndev = ndev;
 	wlp_eda_init(&wlp->eda);/* Set up address cache */
 	wlp->uwb_notifs_handler.cb = wlp_uwb_notifs_cb;
 	wlp->uwb_notifs_handler.data = wlp;
@@ -544,6 +555,7 @@ int wlp_setup(struct wlp *wlp, struct uwb_rc *rc)
 
 	uwb_pal_init(&wlp->pal);
 	wlp->pal.rc = rc;
+	wlp->pal.channel_changed = wlp_channel_changed;
 	result = uwb_pal_register(&wlp->pal);
 	if (result < 0)
 		uwb_notifs_deregister(wlp->rc, &wlp->uwb_notifs_handler);
diff --git a/include/linux/wlp.h b/include/linux/wlp.h
index 033545e145c7..ac95ce6606ac 100644
--- a/include/linux/wlp.h
+++ b/include/linux/wlp.h
@@ -646,6 +646,7 @@ struct wlp_wss {
 struct wlp {
 	struct mutex mutex;
 	struct uwb_rc *rc;		/* UWB radio controller */
+	struct net_device *ndev;
 	struct uwb_pal pal;
 	struct wlp_eda eda;
 	struct wlp_uuid uuid;
@@ -675,7 +676,7 @@ struct wlp_wss_attribute {
 static struct wlp_wss_attribute wss_attr_##_name = __ATTR(_name, _mode,	\
 							  _show, _store)
 
-extern int wlp_setup(struct wlp *, struct uwb_rc *);
+extern int wlp_setup(struct wlp *, struct uwb_rc *, struct net_device *ndev);
 extern void wlp_remove(struct wlp *);
 extern ssize_t wlp_neighborhood_show(struct wlp *, char *);
 extern int wlp_wss_setup(struct net_device *, struct wlp_wss *);
-- 
cgit v1.2.3


From 0996e6382482ce9014787693d3884e9468153a5c Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 17 Nov 2008 16:23:22 +0000
Subject: uwb: remove unused beacon group join/leave events

The UWB_NOTIF_BG_JOIN/UWB_NOTIF_BG_LEAVE events have been
superceeded by the channel_changed callback in struct uwb_pal.

Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 drivers/uwb/beacon.c | 17 +----------------
 include/linux/uwb.h  |  7 +++----
 2 files changed, 4 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/uwb/beacon.c b/drivers/uwb/beacon.c
index 247956098afa..d9c60cb94993 100644
--- a/drivers/uwb/beacon.c
+++ b/drivers/uwb/beacon.c
@@ -140,10 +140,8 @@ int uwb_rc_beacon(struct uwb_rc *rc, int channel, unsigned bpst_offset)
 		}
 	}
 
-	if (result >= 0) {
+	if (result >= 0)
 		rc->beaconing = channel;
-		uwb_notify(rc, NULL, uwb_bg_joined(rc) ? UWB_NOTIF_BG_JOIN : UWB_NOTIF_BG_LEAVE);
-	}
 	return result;
 }
 
@@ -581,19 +579,6 @@ error:
 	return result;
 }
 
-/**
- * uwb_bg_joined - is the RC in a beacon group?
- * @rc: the radio controller
- *
- * Returns true if the radio controller is in a beacon group (even if
- * it's the sole member).
- */
-int uwb_bg_joined(struct uwb_rc *rc)
-{
-	return rc->beaconing != -1;
-}
-EXPORT_SYMBOL_GPL(uwb_bg_joined);
-
 /*
  * Print beaconing state.
  */
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index 7d3ebf046f9a..1719709d60ca 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -479,7 +479,6 @@ ssize_t uwb_rc_vcmd(struct uwb_rc *rc, const char *cmd_name,
 		    struct uwb_rccb *cmd, size_t cmd_size,
 		    u8 expected_type, u16 expected_event,
 		    struct uwb_rceb **preply);
-int uwb_bg_joined(struct uwb_rc *rc);
 
 size_t __uwb_addr_print(char *, size_t, const unsigned char *, int);
 
@@ -568,7 +567,9 @@ static inline bool uwb_rsv_is_owner(struct uwb_rsv *rsv)
 }
 
 /**
- * Events generated by UWB that can be passed to any listeners
+ * enum uwb_notifs - UWB events that can be passed to any listeners
+ * @UWB_NOTIF_ONAIR: a new neighbour has joined the beacon group.
+ * @UWB_NOTIF_OFFAIR: a neighbour has left the beacon group.
  *
  * Higher layers can register callback functions with the radio
  * controller using uwb_notifs_register(). The radio controller
@@ -576,8 +577,6 @@ static inline bool uwb_rsv_is_owner(struct uwb_rsv *rsv)
  * nodes when an event occurs.
  */
 enum uwb_notifs {
-	UWB_NOTIF_BG_JOIN = 0,	/* radio controller joined a beacon group */
-	UWB_NOTIF_BG_LEAVE = 1,	/* radio controller left a beacon group */
 	UWB_NOTIF_ONAIR,
 	UWB_NOTIF_OFFAIR,
 };
-- 
cgit v1.2.3


From e262a7ba31f0cd4dae225e6d2e9037e5ac6108e8 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Sun, 23 Nov 2008 14:34:43 +0000
Subject: irq.h: remove padding from irq_desc on 64bits

Impact: reduce struct irq_desc size

struct irq_desc: reorder to remove padding on 64bits

shrinks irq_desc to 128 bytes which saves data space & cache lines

On a generic x86_64/SMP build this reduces the reported data size by
64k.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index d058c57be02d..838a977885e1 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -142,8 +142,8 @@ struct irq_chip {
  * @depth:		disable-depth, for nested irq_disable() calls
  * @wake_depth:		enable depth, for multiple set_irq_wake() callers
  * @irq_count:		stats field to detect stalled irqs
- * @irqs_unhandled:	stats field for spurious unhandled interrupts
  * @last_unhandled:	aging timer for unhandled count
+ * @irqs_unhandled:	stats field for spurious unhandled interrupts
  * @lock:		locking for SMP
  * @affinity:		IRQ affinity on SMP
  * @cpu:		cpu index useful for balancing
@@ -165,8 +165,8 @@ struct irq_desc {
 	unsigned int		depth;		/* nested irq disables */
 	unsigned int		wake_depth;	/* nested wake enables */
 	unsigned int		irq_count;	/* For detecting broken IRQs */
-	unsigned int		irqs_unhandled;
 	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
+	unsigned int		irqs_unhandled;
 	spinlock_t		lock;
 #ifdef CONFIG_SMP
 	cpumask_t		affinity;
-- 
cgit v1.2.3


From 3eb1aa43ef5cb871ba3fb2f08633675eca374d2e Mon Sep 17 00:00:00 2001
From: Jaya Kumar <jayakumar.lkml@gmail.com>
Date: Wed, 19 Nov 2008 16:58:50 -0500
Subject: Input: add support for Wacom W8001 penabled serial touchscreen

The Wacom W8001 sensor is a sensor device (uses electromagnetic
resonance) and it is interfaced via its serial microcontroller
to the host.

Signed-off-by: Jaya Kumar <jayakumar.lkml@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/Kconfig       |  13 ++
 drivers/input/touchscreen/Makefile      |   1 +
 drivers/input/touchscreen/wacom_w8001.c | 325 ++++++++++++++++++++++++++++++++
 include/linux/serio.h                   |   1 +
 4 files changed, 340 insertions(+)
 create mode 100644 drivers/input/touchscreen/wacom_w8001.c

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 3d1ab8fa9acc..20eb52ed176d 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -95,6 +95,19 @@ config TOUCHSCREEN_ELO
 	  To compile this driver as a module, choose M here: the
 	  module will be called elo.
 
+config TOUCHSCREEN_WACOM_W8001
+	tristate "Wacom W8001 penabled serial touchscreen"
+	select SERIO
+	help
+	  Say Y here if you have an Wacom W8001 penabled serial touchscreen
+	  connected to your system.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called wacom_w8001.
+
+
 config TOUCHSCREEN_MTOUCH
 	tristate "MicroTouch serial touchscreens"
 	select SERIO
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 15cf29079489..3dc84d3846c1 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_TOUCHSCREEN_TOUCHIT213)	+= touchit213.o
 obj-$(CONFIG_TOUCHSCREEN_TOUCHRIGHT)	+= touchright.o
 obj-$(CONFIG_TOUCHSCREEN_TOUCHWIN)	+= touchwin.o
 obj-$(CONFIG_TOUCHSCREEN_UCB1400)	+= ucb1400_ts.o
+obj-$(CONFIG_TOUCHSCREEN_WACOM_W8001)	+= wacom_w8001.o
 obj-$(CONFIG_TOUCHSCREEN_WM97XX)	+= wm97xx-ts.o
 wm97xx-ts-$(CONFIG_TOUCHSCREEN_WM9705)	+= wm9705.o
 wm97xx-ts-$(CONFIG_TOUCHSCREEN_WM9712)	+= wm9712.o
diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c
new file mode 100644
index 000000000000..2f33a0167644
--- /dev/null
+++ b/drivers/input/touchscreen/wacom_w8001.c
@@ -0,0 +1,325 @@
+/*
+ * Wacom W8001 penabled serial touchscreen driver
+ *
+ * Copyright (c) 2008 Jaya Kumar
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of this archive for
+ * more details.
+ *
+ * Layout based on Elo serial touchscreen driver by Vojtech Pavlik
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/input.h>
+#include <linux/serio.h>
+#include <linux/init.h>
+#include <linux/ctype.h>
+
+#define DRIVER_DESC	"Wacom W8001 serial touchscreen driver"
+
+MODULE_AUTHOR("Jaya Kumar <jayakumar.lkml@gmail.com>");
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+
+/*
+ * Definitions & global arrays.
+ */
+
+#define W8001_MAX_LENGTH	11
+#define W8001_PACKET_LEN	11
+#define W8001_LEAD_MASK 0x80
+#define W8001_LEAD_BYTE 0x80
+#define W8001_TAB_MASK 0x40
+#define W8001_TAB_BYTE 0x40
+
+#define W8001_QUERY_PACKET 0x20
+
+struct w8001_coord {
+	u8 rdy;
+	u8 tsw;
+	u8 f1;
+	u8 f2;
+	u16 x;
+	u16 y;
+	u16 pen_pressure;
+	u8 tilt_x;
+	u8 tilt_y;
+};
+
+/*
+ * Per-touchscreen data.
+ */
+
+struct w8001 {
+	struct input_dev *dev;
+	struct serio *serio;
+	struct mutex cmd_mutex;
+	struct completion cmd_done;
+	int id;
+	int idx;
+	unsigned char expected_packet;
+	unsigned char data[W8001_MAX_LENGTH];
+	unsigned char response[W8001_PACKET_LEN];
+	char phys[32];
+};
+
+static int parse_data(u8 *data, struct w8001_coord *coord)
+{
+	coord->rdy = data[0] & 0x20;
+	coord->tsw = data[0] & 0x01;
+	coord->f1 = data[0] & 0x02;
+	coord->f2 = data[0] & 0x04;
+
+	coord->x = (data[1] & 0x7F) << 9;
+	coord->x |= (data[2] & 0x7F) << 2;
+	coord->x |= (data[6] & 0x60) >> 5;
+
+	coord->y = (data[3] & 0x7F) << 9;
+	coord->y |= (data[4] & 0x7F) << 2;
+	coord->y |= (data[6] & 0x18) >> 3;
+
+	coord->pen_pressure = data[5] & 0x7F;
+	coord->pen_pressure |= (data[6] & 0x07) << 7 ;
+
+	coord->tilt_x = data[7] & 0x7F;
+	coord->tilt_y = data[8] & 0x7F;
+
+	return 0;
+}
+
+static void w8001_process_data(struct w8001 *w8001, unsigned char data)
+{
+	struct input_dev *dev = w8001->dev;
+	u8 tmp;
+	struct w8001_coord coord;
+
+	w8001->data[w8001->idx] = data;
+	switch (w8001->idx++) {
+	case 0:
+		if ((data & W8001_LEAD_MASK) != W8001_LEAD_BYTE) {
+			pr_debug("w8001: unsynchronized data: 0x%02x\n", data);
+			w8001->idx = 0;
+		}
+		break;
+	case 8:
+		tmp = w8001->data[0] & W8001_TAB_MASK;
+		if (unlikely(tmp == W8001_TAB_BYTE))
+			break;
+		w8001->idx = 0;
+		memset(&coord, 0, sizeof(coord));
+		parse_data(w8001->data, &coord);
+		input_report_abs(dev, ABS_X, coord.x);
+		input_report_abs(dev, ABS_Y, coord.y);
+		input_report_abs(dev, ABS_PRESSURE, coord.pen_pressure);
+		input_report_key(dev, BTN_TOUCH, coord.tsw);
+		input_sync(dev);
+		break;
+	case 10:
+		w8001->idx = 0;
+		memcpy(w8001->response, &w8001->data, W8001_PACKET_LEN);
+		w8001->expected_packet = W8001_QUERY_PACKET;
+		complete(&w8001->cmd_done);
+		break;
+	}
+}
+
+
+static irqreturn_t w8001_interrupt(struct serio *serio,
+		unsigned char data, unsigned int flags)
+{
+	struct w8001 *w8001 = serio_get_drvdata(serio);
+
+	w8001_process_data(w8001, data);
+
+	return IRQ_HANDLED;
+}
+
+static int w8001_async_command(struct w8001 *w8001, unsigned char *packet,
+					int len)
+{
+	int rc = -1;
+	int i;
+
+	mutex_lock(&w8001->cmd_mutex);
+
+	for (i = 0; i < len; i++) {
+		if (serio_write(w8001->serio, packet[i]))
+			goto out;
+	}
+	rc = 0;
+
+out:
+	mutex_unlock(&w8001->cmd_mutex);
+	return rc;
+}
+
+static int w8001_command(struct w8001 *w8001, unsigned char *packet, int len)
+{
+	int rc = -1;
+	int i;
+
+	mutex_lock(&w8001->cmd_mutex);
+
+	serio_pause_rx(w8001->serio);
+	init_completion(&w8001->cmd_done);
+	serio_continue_rx(w8001->serio);
+
+	for (i = 0; i < len; i++) {
+		if (serio_write(w8001->serio, packet[i]))
+			goto out;
+	}
+
+	wait_for_completion_timeout(&w8001->cmd_done, HZ);
+
+	if (w8001->expected_packet == W8001_QUERY_PACKET) {
+		/* We are back in reporting mode, the query was ACKed */
+		memcpy(packet, w8001->response, W8001_PACKET_LEN);
+		rc = 0;
+	}
+
+out:
+	mutex_unlock(&w8001->cmd_mutex);
+	return rc;
+}
+
+static int w8001_setup(struct w8001 *w8001)
+{
+	struct w8001_coord coord;
+	struct input_dev *dev = w8001->dev;
+	unsigned char start[1] = { '1' };
+	unsigned char query[11] = { '*' };
+
+	if (w8001_command(w8001, query, 1))
+		return -1;
+
+	memset(&coord, 0, sizeof(coord));
+	parse_data(query, &coord);
+
+	input_set_abs_params(dev, ABS_X, 0, coord.x, 0, 0);
+	input_set_abs_params(dev, ABS_Y, 0, coord.y, 0, 0);
+	input_set_abs_params(dev, ABS_PRESSURE, 0, coord.pen_pressure, 0, 0);
+	input_set_abs_params(dev, ABS_TILT_X, 0, coord.tilt_x, 0, 0);
+	input_set_abs_params(dev, ABS_TILT_Y, 0, coord.tilt_y, 0, 0);
+
+	if (w8001_async_command(w8001, start, 1))
+		return -1;
+
+	return 0;
+}
+
+/*
+ * w8001_disconnect() is the opposite of w8001_connect()
+ */
+
+static void w8001_disconnect(struct serio *serio)
+{
+	struct w8001 *w8001 = serio_get_drvdata(serio);
+
+	input_get_device(w8001->dev);
+	input_unregister_device(w8001->dev);
+	serio_close(serio);
+	serio_set_drvdata(serio, NULL);
+	input_put_device(w8001->dev);
+	kfree(w8001);
+}
+
+/*
+ * w8001_connect() is the routine that is called when someone adds a
+ * new serio device that supports the w8001 protocol and registers it as
+ * an input device.
+ */
+
+static int w8001_connect(struct serio *serio, struct serio_driver *drv)
+{
+	struct w8001 *w8001;
+	struct input_dev *input_dev;
+	int err;
+
+	w8001 = kzalloc(sizeof(struct w8001), GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!w8001 || !input_dev) {
+		err = -ENOMEM;
+		goto fail1;
+	}
+
+	w8001->serio = serio;
+	w8001->id = serio->id.id;
+	w8001->dev = input_dev;
+	mutex_init(&w8001->cmd_mutex);
+	init_completion(&w8001->cmd_done);
+	snprintf(w8001->phys, sizeof(w8001->phys), "%s/input0", serio->phys);
+
+	input_dev->name = "Wacom W8001 Penabled Serial TouchScreen";
+	input_dev->phys = w8001->phys;
+	input_dev->id.bustype = BUS_RS232;
+	input_dev->id.vendor = SERIO_W8001;
+	input_dev->id.product = w8001->id;
+	input_dev->id.version = 0x0100;
+	input_dev->dev.parent = &serio->dev;
+
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
+
+	serio_set_drvdata(serio, w8001);
+	err = serio_open(serio, drv);
+	if (err)
+		goto fail2;
+
+	if (w8001_setup(w8001))
+		goto fail3;
+
+	err = input_register_device(w8001->dev);
+	if (err)
+		goto fail3;
+
+	return 0;
+
+fail3:
+	serio_close(serio);
+fail2:
+	serio_set_drvdata(serio, NULL);
+fail1:
+	input_free_device(input_dev);
+	kfree(w8001);
+	return err;
+}
+
+static struct serio_device_id w8001_serio_ids[] = {
+	{
+		.type	= SERIO_RS232,
+		.proto	= SERIO_W8001,
+		.id	= SERIO_ANY,
+		.extra	= SERIO_ANY,
+	},
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(serio, w8001_serio_ids);
+
+static struct serio_driver w8001_drv = {
+	.driver		= {
+		.name	= "w8001",
+	},
+	.description	= DRIVER_DESC,
+	.id_table	= w8001_serio_ids,
+	.interrupt	= w8001_interrupt,
+	.connect	= w8001_connect,
+	.disconnect	= w8001_disconnect,
+};
+
+static int __init w8001_init(void)
+{
+	return serio_register_driver(&w8001_drv);
+}
+
+static void __exit w8001_exit(void)
+{
+	serio_unregister_driver(&w8001_drv);
+}
+
+module_init(w8001_init);
+module_exit(w8001_exit);
diff --git a/include/linux/serio.h b/include/linux/serio.h
index 25641d9e0ea8..1bcb357a01a1 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -213,5 +213,6 @@ static inline void serio_unpin_driver(struct serio *serio)
 #define SERIO_ZHENHUA	0x36
 #define SERIO_INEXIO	0x37
 #define SERIO_TOUCHIT213	0x37
+#define SERIO_W8001	0x39
 
 #endif
-- 
cgit v1.2.3


From a2d781fc8d9b16113dd9440107d73c0f21d7cbef Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 19 Nov 2008 17:02:24 -0500
Subject: Input: libps2 - handle 0xfc responses from devices

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/serio/libps2.c | 20 ++++++++++++++++----
 include/linux/libps2.h       |  2 ++
 2 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index 2b304c22c200..67248c31e19a 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -262,9 +262,17 @@ int ps2_handle_ack(struct ps2dev *ps2dev, unsigned char data)
 			break;
 
 		case PS2_RET_NAK:
-			ps2dev->nak = 1;
+			ps2dev->flags |= PS2_FLAG_NAK;
+			ps2dev->nak = PS2_RET_NAK;
 			break;
 
+		case PS2_RET_ERR:
+			if (ps2dev->flags & PS2_FLAG_NAK) {
+				ps2dev->flags &= ~PS2_FLAG_NAK;
+				ps2dev->nak = PS2_RET_ERR;
+				break;
+			}
+
 		/*
 		 * Workaround for mice which don't ACK the Get ID command.
 		 * These are valid mouse IDs that we recognize.
@@ -282,8 +290,11 @@ int ps2_handle_ack(struct ps2dev *ps2dev, unsigned char data)
 	}
 
 
-	if (!ps2dev->nak && ps2dev->cmdcnt)
-		ps2dev->flags |= PS2_FLAG_CMD | PS2_FLAG_CMD1;
+	if (!ps2dev->nak) {
+		ps2dev->flags &= ~PS2_FLAG_NAK;
+		if (ps2dev->cmdcnt)
+			ps2dev->flags |= PS2_FLAG_CMD | PS2_FLAG_CMD1;
+	}
 
 	ps2dev->flags &= ~PS2_FLAG_ACK;
 	wake_up(&ps2dev->wait);
@@ -329,6 +340,7 @@ void ps2_cmd_aborted(struct ps2dev *ps2dev)
 	if (ps2dev->flags & (PS2_FLAG_ACK | PS2_FLAG_CMD))
 		wake_up(&ps2dev->wait);
 
-	ps2dev->flags = 0;
+	/* reset all flags except last nack */
+	ps2dev->flags &= PS2_FLAG_NAK;
 }
 EXPORT_SYMBOL(ps2_cmd_aborted);
diff --git a/include/linux/libps2.h b/include/linux/libps2.h
index afc413369101..b94534b7e266 100644
--- a/include/linux/libps2.h
+++ b/include/linux/libps2.h
@@ -18,11 +18,13 @@
 #define PS2_RET_ID		0x00
 #define PS2_RET_ACK		0xfa
 #define PS2_RET_NAK		0xfe
+#define PS2_RET_ERR		0xfc
 
 #define PS2_FLAG_ACK		1	/* Waiting for ACK/NAK */
 #define PS2_FLAG_CMD		2	/* Waiting for command to finish */
 #define PS2_FLAG_CMD1		4	/* Waiting for the first byte of command response */
 #define PS2_FLAG_WAITID		8	/* Command execiting is GET ID */
+#define PS2_FLAG_NAK		16	/* Last transmission was NAKed */
 
 struct ps2dev {
 	struct serio *serio;
-- 
cgit v1.2.3


From 758b2cdc6f6a22c702bd8f2344382fb1270b2161 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:04 +1030
Subject: sched: wrap sched_group and sched_domain cpumask accesses.

Impact: trivial wrap of member accesses

This eases the transition in the next patch.

We also get rid of a temporary cpumask in find_idlest_cpu() thanks to
for_each_cpu_and, and sched_balance_self() due to getting weight before
setting sd to NULL.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  10 +++++
 kernel/sched.c        | 114 ++++++++++++++++++++++++--------------------------
 kernel/sched_fair.c   |  10 ++---
 kernel/sched_rt.c     |   3 +-
 kernel/sched_stats.h  |   3 +-
 5 files changed, 73 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4ce5c603c51a..2b95aa9f779b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -786,6 +786,11 @@ struct sched_group {
 	u32 reciprocal_cpu_power;
 };
 
+static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
+{
+	return &sg->cpumask;
+}
+
 enum sched_domain_level {
 	SD_LV_NONE = 0,
 	SD_LV_SIBLING,
@@ -866,6 +871,11 @@ struct sched_domain {
 #endif
 };
 
+static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
+{
+	return &sd->span;
+}
+
 extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
 				    struct sched_domain_attr *dattr_new);
 extern int arch_reinit_sched_domains(void);
diff --git a/kernel/sched.c b/kernel/sched.c
index a2de33d05340..575f38acf4da 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1501,7 +1501,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
 	struct sched_domain *sd = data;
 	int i;
 
-	for_each_cpu_mask(i, sd->span) {
+	for_each_cpu(i, sched_domain_span(sd)) {
 		/*
 		 * If there are currently no tasks on the cpu pretend there
 		 * is one of average load so that when a new task gets to
@@ -1522,7 +1522,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
 	if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
 		shares = tg->shares;
 
-	for_each_cpu_mask(i, sd->span)
+	for_each_cpu(i, sched_domain_span(sd))
 		update_group_shares_cpu(tg, i, shares, rq_weight);
 
 	return 0;
@@ -2053,15 +2053,17 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
 		int i;
 
 		/* Skip over this group if it has no CPUs allowed */
-		if (!cpus_intersects(group->cpumask, p->cpus_allowed))
+		if (!cpumask_intersects(sched_group_cpus(group),
+					&p->cpus_allowed))
 			continue;
 
-		local_group = cpu_isset(this_cpu, group->cpumask);
+		local_group = cpumask_test_cpu(this_cpu,
+					       sched_group_cpus(group));
 
 		/* Tally up the load of all CPUs in the group */
 		avg_load = 0;
 
-		for_each_cpu(i, &group->cpumask) {
+		for_each_cpu(i, sched_group_cpus(group)) {
 			/* Bias balancing toward cpus of our domain */
 			if (local_group)
 				load = source_load(i, load_idx);
@@ -2093,17 +2095,14 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
  * find_idlest_cpu - find the idlest cpu among the cpus in group.
  */
 static int
-find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu,
-		cpumask_t *tmp)
+find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 {
 	unsigned long load, min_load = ULONG_MAX;
 	int idlest = -1;
 	int i;
 
 	/* Traverse only the allowed CPUs */
-	cpus_and(*tmp, group->cpumask, p->cpus_allowed);
-
-	for_each_cpu(i, tmp) {
+	for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
 		load = weighted_cpuload(i);
 
 		if (load < min_load || (load == min_load && i == this_cpu)) {
@@ -2145,7 +2144,6 @@ static int sched_balance_self(int cpu, int flag)
 		update_shares(sd);
 
 	while (sd) {
-		cpumask_t span, tmpmask;
 		struct sched_group *group;
 		int new_cpu, weight;
 
@@ -2154,14 +2152,13 @@ static int sched_balance_self(int cpu, int flag)
 			continue;
 		}
 
-		span = sd->span;
 		group = find_idlest_group(sd, t, cpu);
 		if (!group) {
 			sd = sd->child;
 			continue;
 		}
 
-		new_cpu = find_idlest_cpu(group, t, cpu, &tmpmask);
+		new_cpu = find_idlest_cpu(group, t, cpu);
 		if (new_cpu == -1 || new_cpu == cpu) {
 			/* Now try balancing at a lower domain level of cpu */
 			sd = sd->child;
@@ -2170,10 +2167,10 @@ static int sched_balance_self(int cpu, int flag)
 
 		/* Now try balancing at a lower domain level of new_cpu */
 		cpu = new_cpu;
+		weight = cpumask_weight(sched_domain_span(sd));
 		sd = NULL;
-		weight = cpus_weight(span);
 		for_each_domain(cpu, tmp) {
-			if (weight <= cpus_weight(tmp->span))
+			if (weight <= cpumask_weight(sched_domain_span(tmp)))
 				break;
 			if (tmp->flags & flag)
 				sd = tmp;
@@ -2218,7 +2215,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 		cpu = task_cpu(p);
 
 		for_each_domain(this_cpu, sd) {
-			if (cpu_isset(cpu, sd->span)) {
+			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
 				update_shares(sd);
 				break;
 			}
@@ -2266,7 +2263,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 	else {
 		struct sched_domain *sd;
 		for_each_domain(this_cpu, sd) {
-			if (cpu_isset(cpu, sd->span)) {
+			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
 				schedstat_inc(sd, ttwu_wake_remote);
 				break;
 			}
@@ -3109,10 +3106,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		unsigned long sum_avg_load_per_task;
 		unsigned long avg_load_per_task;
 
-		local_group = cpu_isset(this_cpu, group->cpumask);
+		local_group = cpumask_test_cpu(this_cpu,
+					       sched_group_cpus(group));
 
 		if (local_group)
-			balance_cpu = first_cpu(group->cpumask);
+			balance_cpu = cpumask_first(sched_group_cpus(group));
 
 		/* Tally up the load of all CPUs in the group */
 		sum_weighted_load = sum_nr_running = avg_load = 0;
@@ -3121,13 +3119,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		max_cpu_load = 0;
 		min_cpu_load = ~0UL;
 
-		for_each_cpu(i, &group->cpumask) {
-			struct rq *rq;
-
-			if (!cpu_isset(i, *cpus))
-				continue;
-
-			rq = cpu_rq(i);
+		for_each_cpu_and(i, sched_group_cpus(group), cpus) {
+			struct rq *rq = cpu_rq(i);
 
 			if (*sd_idle && rq->nr_running)
 				*sd_idle = 0;
@@ -3238,8 +3231,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		 */
 		if ((sum_nr_running < min_nr_running) ||
 		    (sum_nr_running == min_nr_running &&
-		     first_cpu(group->cpumask) <
-		     first_cpu(group_min->cpumask))) {
+		     cpumask_first(sched_group_cpus(group)) <
+		     cpumask_first(sched_group_cpus(group_min)))) {
 			group_min = group;
 			min_nr_running = sum_nr_running;
 			min_load_per_task = sum_weighted_load /
@@ -3254,8 +3247,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		if (sum_nr_running <= group_capacity - 1) {
 			if (sum_nr_running > leader_nr_running ||
 			    (sum_nr_running == leader_nr_running &&
-			     first_cpu(group->cpumask) >
-			      first_cpu(group_leader->cpumask))) {
+			     cpumask_first(sched_group_cpus(group)) >
+			     cpumask_first(sched_group_cpus(group_leader)))) {
 				group_leader = group;
 				leader_nr_running = sum_nr_running;
 			}
@@ -3400,7 +3393,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
 	unsigned long max_load = 0;
 	int i;
 
-	for_each_cpu(i, &group->cpumask) {
+	for_each_cpu(i, sched_group_cpus(group)) {
 		unsigned long wl;
 
 		if (!cpu_isset(i, *cpus))
@@ -3746,7 +3739,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
 	/* Search for an sd spanning us and the target CPU. */
 	for_each_domain(target_cpu, sd) {
 		if ((sd->flags & SD_LOAD_BALANCE) &&
-		    cpu_isset(busiest_cpu, sd->span))
+		    cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
 				break;
 	}
 
@@ -6618,7 +6611,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 	struct sched_group *group = sd->groups;
 	char str[256];
 
-	cpulist_scnprintf(str, sizeof(str), sd->span);
+	cpulist_scnprintf(str, sizeof(str), *sched_domain_span(sd));
 	cpus_clear(*groupmask);
 
 	printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
@@ -6633,11 +6626,11 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 
 	printk(KERN_CONT "span %s level %s\n", str, sd->name);
 
-	if (!cpu_isset(cpu, sd->span)) {
+	if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
 		printk(KERN_ERR "ERROR: domain->span does not contain "
 				"CPU%d\n", cpu);
 	}
-	if (!cpu_isset(cpu, group->cpumask)) {
+	if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
 		printk(KERN_ERR "ERROR: domain->groups does not contain"
 				" CPU%d\n", cpu);
 	}
@@ -6657,31 +6650,32 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 			break;
 		}
 
-		if (!cpus_weight(group->cpumask)) {
+		if (!cpumask_weight(sched_group_cpus(group))) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: empty group\n");
 			break;
 		}
 
-		if (cpus_intersects(*groupmask, group->cpumask)) {
+		if (cpumask_intersects(groupmask, sched_group_cpus(group))) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: repeated CPUs\n");
 			break;
 		}
 
-		cpus_or(*groupmask, *groupmask, group->cpumask);
+		cpumask_or(groupmask, groupmask, sched_group_cpus(group));
 
-		cpulist_scnprintf(str, sizeof(str), group->cpumask);
+		cpulist_scnprintf(str, sizeof(str), *sched_group_cpus(group));
 		printk(KERN_CONT " %s", str);
 
 		group = group->next;
 	} while (group != sd->groups);
 	printk(KERN_CONT "\n");
 
-	if (!cpus_equal(sd->span, *groupmask))
+	if (!cpumask_equal(sched_domain_span(sd), groupmask))
 		printk(KERN_ERR "ERROR: groups don't span domain->span\n");
 
-	if (sd->parent && !cpus_subset(*groupmask, sd->parent->span))
+	if (sd->parent &&
+	    !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
 		printk(KERN_ERR "ERROR: parent span is not a superset "
 			"of domain->span\n");
 	return 0;
@@ -6721,7 +6715,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 
 static int sd_degenerate(struct sched_domain *sd)
 {
-	if (cpus_weight(sd->span) == 1)
+	if (cpumask_weight(sched_domain_span(sd)) == 1)
 		return 1;
 
 	/* Following flags need at least 2 groups */
@@ -6752,7 +6746,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 	if (sd_degenerate(parent))
 		return 1;
 
-	if (!cpus_equal(sd->span, parent->span))
+	if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
 		return 0;
 
 	/* Does parent contain flags not in child? */
@@ -6913,10 +6907,10 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
 		int group = group_fn(i, cpu_map, &sg, tmpmask);
 		int j;
 
-		if (cpu_isset(i, *covered))
+		if (cpumask_test_cpu(i, covered))
 			continue;
 
-		cpus_clear(sg->cpumask);
+		cpumask_clear(sched_group_cpus(sg));
 		sg->__cpu_power = 0;
 
 		for_each_cpu(j, span) {
@@ -6924,7 +6918,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
 				continue;
 
 			cpu_set(j, *covered);
-			cpu_set(j, sg->cpumask);
+			cpumask_set_cpu(j, sched_group_cpus(sg));
 		}
 		if (!first)
 			first = sg;
@@ -7119,11 +7113,11 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
 	if (!sg)
 		return;
 	do {
-		for_each_cpu(j, &sg->cpumask) {
+		for_each_cpu(j, sched_group_cpus(sg)) {
 			struct sched_domain *sd;
 
 			sd = &per_cpu(phys_domains, j);
-			if (j != first_cpu(sd->groups->cpumask)) {
+			if (j != cpumask_first(sched_group_cpus(sd->groups))) {
 				/*
 				 * Only add "power" once for each
 				 * physical package.
@@ -7200,7 +7194,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 
 	WARN_ON(!sd || !sd->groups);
 
-	if (cpu != first_cpu(sd->groups->cpumask))
+	if (cpu != cpumask_first(sched_group_cpus(sd->groups)))
 		return;
 
 	child = sd->child;
@@ -7372,7 +7366,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 			sd = &per_cpu(allnodes_domains, i);
 			SD_INIT(sd, ALLNODES);
 			set_domain_attribute(sd, attr);
-			sd->span = *cpu_map;
+			cpumask_copy(sched_domain_span(sd), cpu_map);
 			cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask);
 			p = sd;
 			sd_allnodes = 1;
@@ -7382,18 +7376,19 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		sd = &per_cpu(node_domains, i);
 		SD_INIT(sd, NODE);
 		set_domain_attribute(sd, attr);
-		sched_domain_node_span(cpu_to_node(i), &sd->span);
+		sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
 		sd->parent = p;
 		if (p)
 			p->child = sd;
-		cpus_and(sd->span, sd->span, *cpu_map);
+		cpumask_and(sched_domain_span(sd),
+			    sched_domain_span(sd), cpu_map);
 #endif
 
 		p = sd;
 		sd = &per_cpu(phys_domains, i);
 		SD_INIT(sd, CPU);
 		set_domain_attribute(sd, attr);
-		sd->span = *nodemask;
+		cpumask_copy(sched_domain_span(sd), nodemask);
 		sd->parent = p;
 		if (p)
 			p->child = sd;
@@ -7404,8 +7399,9 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		sd = &per_cpu(core_domains, i);
 		SD_INIT(sd, MC);
 		set_domain_attribute(sd, attr);
-		sd->span = cpu_coregroup_map(i);
-		cpus_and(sd->span, sd->span, *cpu_map);
+		*sched_domain_span(sd) = cpu_coregroup_map(i);
+		cpumask_and(sched_domain_span(sd),
+			    sched_domain_span(sd), cpu_map);
 		sd->parent = p;
 		p->child = sd;
 		cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7416,8 +7412,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		sd = &per_cpu(cpu_domains, i);
 		SD_INIT(sd, SIBLING);
 		set_domain_attribute(sd, attr);
-		sd->span = per_cpu(cpu_sibling_map, i);
-		cpus_and(sd->span, sd->span, *cpu_map);
+		cpumask_and(sched_domain_span(sd),
+			    &per_cpu(cpu_sibling_map, i), cpu_map);
 		sd->parent = p;
 		p->child = sd;
 		cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7503,7 +7499,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 			sd->groups = sg;
 		}
 		sg->__cpu_power = 0;
-		sg->cpumask = *nodemask;
+		cpumask_copy(sched_group_cpus(sg), nodemask);
 		sg->next = sg;
 		cpus_or(*covered, *covered, *nodemask);
 		prev = sg;
@@ -7530,7 +7526,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 				goto error;
 			}
 			sg->__cpu_power = 0;
-			sg->cpumask = *tmpmask;
+			cpumask_copy(sched_group_cpus(sg), tmpmask);
 			sg->next = prev->next;
 			cpus_or(*covered, *covered, *tmpmask);
 			prev->next = sg;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 98345e45b059..bba00402ed90 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1024,7 +1024,6 @@ static void yield_task_fair(struct rq *rq)
 #if defined(ARCH_HAS_SCHED_WAKE_IDLE)
 static int wake_idle(int cpu, struct task_struct *p)
 {
-	cpumask_t tmp;
 	struct sched_domain *sd;
 	int i;
 
@@ -1044,10 +1043,9 @@ static int wake_idle(int cpu, struct task_struct *p)
 		if ((sd->flags & SD_WAKE_IDLE)
 		    || ((sd->flags & SD_WAKE_IDLE_FAR)
 			&& !task_hot(p, task_rq(p)->clock, sd))) {
-			cpus_and(tmp, sd->span, p->cpus_allowed);
-			cpus_and(tmp, tmp, cpu_active_map);
-			for_each_cpu_mask_nr(i, tmp) {
-				if (idle_cpu(i)) {
+			for_each_cpu_and(i, sched_domain_span(sd),
+					 &p->cpus_allowed) {
+				if (cpu_active(i) && idle_cpu(i)) {
 					if (i != task_cpu(p)) {
 						schedstat_inc(p,
 						       se.nr_wakeups_idle);
@@ -1240,7 +1238,7 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
 	 * this_cpu and prev_cpu are present in:
 	 */
 	for_each_domain(this_cpu, sd) {
-		if (cpu_isset(prev_cpu, sd->span)) {
+		if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) {
 			this_sd = sd;
 			break;
 		}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 2bdd44423599..4cd813abc23a 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1017,7 +1017,8 @@ static int find_lowest_rq(struct task_struct *task)
 			cpumask_t domain_mask;
 			int       best_cpu;
 
-			cpus_and(domain_mask, sd->span, *lowest_mask);
+			cpumask_and(&domain_mask, sched_domain_span(sd),
+				    lowest_mask);
 
 			best_cpu = pick_optimal_cpu(this_cpu,
 						    &domain_mask);
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 7dbf72a2b02c..ce340835d055 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -42,7 +42,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
 		for_each_domain(cpu, sd) {
 			enum cpu_idle_type itype;
 
-			cpumask_scnprintf(mask_str, mask_len, sd->span);
+			cpumask_scnprintf(mask_str, mask_len,
+					  *sched_domain_span(sd));
 			seq_printf(seq, "domain%d %s", dcount++, mask_str);
 			for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
 					itype++) {
-- 
cgit v1.2.3


From 6c99e9ad47d9c082bd096f42fb49e397b05d58a8 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:04 +1030
Subject: sched: convert struct sched_group/sched_domain cpumask_ts to variable
 bitmaps

Impact: (future) size reduction for large NR_CPUS.

We move the 'cpumask' member of sched_group to the end, so when we
kmalloc it we can do a minimal allocation: saves space for small
nr_cpu_ids but big CONFIG_NR_CPUS.  Similar trick for 'span' in
sched_domain.

This isn't quite as good as converting to a cpumask_var_t, as some
sched_groups are actually static, but it's safer: we don't have to
figure out where to call alloc_cpumask_var/free_cpumask_var.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 11 +++++----
 kernel/sched.c        | 65 ++++++++++++++++++++++++++++++++-------------------
 2 files changed, 48 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b95aa9f779b..c5be6c6bc741 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -771,7 +771,6 @@ enum cpu_idle_type {
 
 struct sched_group {
 	struct sched_group *next;	/* Must be a circular list */
-	cpumask_t cpumask;
 
 	/*
 	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
@@ -784,11 +783,13 @@ struct sched_group {
 	 * (see include/linux/reciprocal_div.h)
 	 */
 	u32 reciprocal_cpu_power;
+
+	unsigned long cpumask[];
 };
 
 static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
 {
-	return &sg->cpumask;
+	return to_cpumask(sg->cpumask);
 }
 
 enum sched_domain_level {
@@ -814,7 +815,6 @@ struct sched_domain {
 	struct sched_domain *parent;	/* top domain must be null terminated */
 	struct sched_domain *child;	/* bottom domain must be null terminated */
 	struct sched_group *groups;	/* the balancing groups of the domain */
-	cpumask_t span;			/* span of all CPUs in this domain */
 	unsigned long min_interval;	/* Minimum balance interval ms */
 	unsigned long max_interval;	/* Maximum balance interval ms */
 	unsigned int busy_factor;	/* less balancing by factor if busy */
@@ -869,11 +869,14 @@ struct sched_domain {
 #ifdef CONFIG_SCHED_DEBUG
 	char *name;
 #endif
+
+	/* span of all CPUs in this domain */
+	unsigned long span[];
 };
 
 static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
 {
-	return &sd->span;
+	return to_cpumask(sd->span);
 }
 
 extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
diff --git a/kernel/sched.c b/kernel/sched.c
index 575f38acf4da..6b9606a6cabf 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7005,19 +7005,34 @@ static void sched_domain_node_span(int node, cpumask_t *span)
 
 int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
 
+/*
+ * The cpus mask in sched_group and sched_domain hangs off the end.
+ * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space
+ * for nr_cpu_ids < CONFIG_NR_CPUS.
+ */
+struct static_sched_group {
+	struct sched_group sg;
+	DECLARE_BITMAP(cpus, CONFIG_NR_CPUS);
+};
+
+struct static_sched_domain {
+	struct sched_domain sd;
+	DECLARE_BITMAP(span, CONFIG_NR_CPUS);
+};
+
 /*
  * SMT sched-domains:
  */
 #ifdef CONFIG_SCHED_SMT
-static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
-static DEFINE_PER_CPU(struct sched_group, sched_group_cpus);
+static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus);
 
 static int
 cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
 		 cpumask_t *unused)
 {
 	if (sg)
-		*sg = &per_cpu(sched_group_cpus, cpu);
+		*sg = &per_cpu(sched_group_cpus, cpu).sg;
 	return cpu;
 }
 #endif /* CONFIG_SCHED_SMT */
@@ -7026,8 +7041,8 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
  * multi-core sched-domains:
  */
 #ifdef CONFIG_SCHED_MC
-static DEFINE_PER_CPU(struct sched_domain, core_domains);
-static DEFINE_PER_CPU(struct sched_group, sched_group_core);
+static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
 #endif /* CONFIG_SCHED_MC */
 
 #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
@@ -7041,7 +7056,7 @@ cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
 	cpus_and(*mask, *mask, *cpu_map);
 	group = first_cpu(*mask);
 	if (sg)
-		*sg = &per_cpu(sched_group_core, group);
+		*sg = &per_cpu(sched_group_core, group).sg;
 	return group;
 }
 #elif defined(CONFIG_SCHED_MC)
@@ -7050,13 +7065,13 @@ cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
 		  cpumask_t *unused)
 {
 	if (sg)
-		*sg = &per_cpu(sched_group_core, cpu);
+		*sg = &per_cpu(sched_group_core, cpu).sg;
 	return cpu;
 }
 #endif
 
-static DEFINE_PER_CPU(struct sched_domain, phys_domains);
-static DEFINE_PER_CPU(struct sched_group, sched_group_phys);
+static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
 
 static int
 cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
@@ -7075,7 +7090,7 @@ cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
 	group = cpu;
 #endif
 	if (sg)
-		*sg = &per_cpu(sched_group_phys, group);
+		*sg = &per_cpu(sched_group_phys, group).sg;
 	return group;
 }
 
@@ -7089,7 +7104,7 @@ static DEFINE_PER_CPU(struct sched_domain, node_domains);
 static struct sched_group ***sched_group_nodes_bycpu;
 
 static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
-static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes);
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
 
 static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map,
 				 struct sched_group **sg, cpumask_t *nodemask)
@@ -7101,7 +7116,7 @@ static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map,
 	group = first_cpu(*nodemask);
 
 	if (sg)
-		*sg = &per_cpu(sched_group_allnodes, group);
+		*sg = &per_cpu(sched_group_allnodes, group).sg;
 	return group;
 }
 
@@ -7116,7 +7131,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
 		for_each_cpu(j, sched_group_cpus(sg)) {
 			struct sched_domain *sd;
 
-			sd = &per_cpu(phys_domains, j);
+			sd = &per_cpu(phys_domains, j).sd;
 			if (j != cpumask_first(sched_group_cpus(sd->groups))) {
 				/*
 				 * Only add "power" once for each
@@ -7385,7 +7400,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 #endif
 
 		p = sd;
-		sd = &per_cpu(phys_domains, i);
+		sd = &per_cpu(phys_domains, i).sd;
 		SD_INIT(sd, CPU);
 		set_domain_attribute(sd, attr);
 		cpumask_copy(sched_domain_span(sd), nodemask);
@@ -7396,7 +7411,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 
 #ifdef CONFIG_SCHED_MC
 		p = sd;
-		sd = &per_cpu(core_domains, i);
+		sd = &per_cpu(core_domains, i).sd;
 		SD_INIT(sd, MC);
 		set_domain_attribute(sd, attr);
 		*sched_domain_span(sd) = cpu_coregroup_map(i);
@@ -7409,7 +7424,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 
 #ifdef CONFIG_SCHED_SMT
 		p = sd;
-		sd = &per_cpu(cpu_domains, i);
+		sd = &per_cpu(cpu_domains, i).sd;
 		SD_INIT(sd, SIBLING);
 		set_domain_attribute(sd, attr);
 		cpumask_and(sched_domain_span(sd),
@@ -7485,7 +7500,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		sched_domain_node_span(i, domainspan);
 		cpus_and(*domainspan, *domainspan, *cpu_map);
 
-		sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i);
+		sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
+				  GFP_KERNEL, i);
 		if (!sg) {
 			printk(KERN_WARNING "Can not alloc domain group for "
 				"node %d\n", i);
@@ -7518,7 +7534,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 			if (cpus_empty(*tmpmask))
 				continue;
 
-			sg = kmalloc_node(sizeof(struct sched_group),
+			sg = kmalloc_node(sizeof(struct sched_group) +
+					  cpumask_size(),
 					  GFP_KERNEL, i);
 			if (!sg) {
 				printk(KERN_WARNING
@@ -7538,21 +7555,21 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 	/* Calculate CPU power for physical packages and nodes */
 #ifdef CONFIG_SCHED_SMT
 	for_each_cpu(i, cpu_map) {
-		struct sched_domain *sd = &per_cpu(cpu_domains, i);
+		struct sched_domain *sd = &per_cpu(cpu_domains, i).sd;
 
 		init_sched_groups_power(i, sd);
 	}
 #endif
 #ifdef CONFIG_SCHED_MC
 	for_each_cpu(i, cpu_map) {
-		struct sched_domain *sd = &per_cpu(core_domains, i);
+		struct sched_domain *sd = &per_cpu(core_domains, i).sd;
 
 		init_sched_groups_power(i, sd);
 	}
 #endif
 
 	for_each_cpu(i, cpu_map) {
-		struct sched_domain *sd = &per_cpu(phys_domains, i);
+		struct sched_domain *sd = &per_cpu(phys_domains, i).sd;
 
 		init_sched_groups_power(i, sd);
 	}
@@ -7574,11 +7591,11 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 	for_each_cpu(i, cpu_map) {
 		struct sched_domain *sd;
 #ifdef CONFIG_SCHED_SMT
-		sd = &per_cpu(cpu_domains, i);
+		sd = &per_cpu(cpu_domains, i).sd;
 #elif defined(CONFIG_SCHED_MC)
-		sd = &per_cpu(core_domains, i);
+		sd = &per_cpu(core_domains, i).sd;
 #else
-		sd = &per_cpu(phys_domains, i);
+		sd = &per_cpu(phys_domains, i).sd;
 #endif
 		cpu_attach_domain(sd, rd, i);
 	}
-- 
cgit v1.2.3


From 6a7b3dc3440f7b5a9b67594af01ed562cdeb41e4 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:04 +1030
Subject: sched: convert nohz_cpu_mask to cpumask_var_t.

Impact: (future) size reduction for large NR_CPUS.

Dynamically allocating cpumasks (when CONFIG_CPUMASK_OFFSTACK) saves
space for small nr_cpu_ids but big CONFIG_NR_CPUS.  cpumask_var_t
is just a struct cpumask for !CONFIG_CPUMASK_OFFSTACK.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h    |  2 +-
 kernel/rcuclassic.c      |  2 +-
 kernel/sched.c           |  7 +++++--
 kernel/time/tick-sched.c | 10 +++++-----
 4 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c5be6c6bc741..1e33e2cb7f8c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -249,7 +249,7 @@ extern void init_idle_bootup_task(struct task_struct *idle);
 extern int runqueue_is_locked(void);
 extern void task_rq_unlock_wait(struct task_struct *p);
 
-extern cpumask_t nohz_cpu_mask;
+extern cpumask_var_t nohz_cpu_mask;
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
 extern int select_nohz_load_balancer(int cpu);
 #else
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index e503a002f330..c03ca3e61919 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -393,7 +393,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
 		 * unnecessarily.
 		 */
 		smp_mb();
-		cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
+		cpumask_andnot(&rcp->cpumask, cpu_online_mask, nohz_cpu_mask);
 
 		rcp->signaled = 0;
 	}
diff --git a/kernel/sched.c b/kernel/sched.c
index 6b9606a6cabf..2723d7a4a42d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5870,9 +5870,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
  * indicates which cpus entered this state. This is used
  * in the rcu update to wait only for active cpus. For system
  * which do not switch off the HZ timer nohz_cpu_mask should
- * always be CPU_MASK_NONE.
+ * always be CPU_BITS_NONE.
  */
-cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
+cpumask_var_t nohz_cpu_mask;
 
 /*
  * Increase the granularity value when there are more CPUs,
@@ -8274,6 +8274,9 @@ void __init sched_init(void)
 	 */
 	current->sched_class = &fair_sched_class;
 
+	/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
+	alloc_bootmem_cpumask_var(&nohz_cpu_mask);
+
 	scheduler_running = 1;
 }
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 342fc9ccab46..70f872c71f4e 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -144,7 +144,7 @@ void tick_nohz_update_jiffies(void)
 	if (!ts->tick_stopped)
 		return;
 
-	cpu_clear(cpu, nohz_cpu_mask);
+	cpumask_clear_cpu(cpu, nohz_cpu_mask);
 	now = ktime_get();
 	ts->idle_waketime = now;
 
@@ -283,7 +283,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 	if ((long)delta_jiffies >= 1) {
 
 		if (delta_jiffies > 1)
-			cpu_set(cpu, nohz_cpu_mask);
+			cpumask_set_cpu(cpu, nohz_cpu_mask);
 		/*
 		 * nohz_stop_sched_tick can be called several times before
 		 * the nohz_restart_sched_tick is called. This happens when
@@ -296,7 +296,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 				/*
 				 * sched tick not stopped!
 				 */
-				cpu_clear(cpu, nohz_cpu_mask);
+				cpumask_clear_cpu(cpu, nohz_cpu_mask);
 				goto out;
 			}
 
@@ -354,7 +354,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 		 * softirq.
 		 */
 		tick_do_update_jiffies64(ktime_get());
-		cpu_clear(cpu, nohz_cpu_mask);
+		cpumask_clear_cpu(cpu, nohz_cpu_mask);
 	}
 	raise_softirq_irqoff(TIMER_SOFTIRQ);
 out:
@@ -432,7 +432,7 @@ void tick_nohz_restart_sched_tick(void)
 	select_nohz_load_balancer(0);
 	now = ktime_get();
 	tick_do_update_jiffies64(now);
-	cpu_clear(cpu, nohz_cpu_mask);
+	cpumask_clear_cpu(cpu, nohz_cpu_mask);
 
 	/*
 	 * We stopped the tick in idle. Update process times would miss the
-- 
cgit v1.2.3


From 96f874e26428ab5d2db681c100210c254775e154 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 25 Nov 2008 02:35:14 +1030
Subject: sched: convert remaining old-style cpumask operators

Impact: Trivial API conversion

  NR_CPUS -> nr_cpu_ids
  cpumask_t -> struct cpumask
  sizeof(cpumask_t) -> cpumask_size()
  cpumask_a = cpumask_b -> cpumask_copy(&cpumask_a, &cpumask_b)

  cpu_set() -> cpumask_set_cpu()
  first_cpu() -> cpumask_first()
  cpumask_of_cpu() -> cpumask_of()
  cpus_* -> cpumask_*

There are some FIXMEs where we all archs to complete infrastructure
(patches have been sent):

  cpu_coregroup_map -> cpu_coregroup_mask
  node_to_cpumask* -> cpumask_of_node

There is also one FIXME where we pass an array of cpumasks to
partition_sched_domains(): this implies knowing the definition of
'struct cpumask' and the size of a cpumask.  This will be fixed in a
future patch.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  16 ++--
 kernel/sched.c        | 212 +++++++++++++++++++++++++++-----------------------
 kernel/sched_fair.c   |   4 +-
 kernel/sched_rt.c     |  18 ++---
 4 files changed, 132 insertions(+), 118 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1e33e2cb7f8c..4b7b0187374c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -879,7 +879,7 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
 	return to_cpumask(sd->span);
 }
 
-extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 				    struct sched_domain_attr *dattr_new);
 extern int arch_reinit_sched_domains(void);
 
@@ -888,7 +888,7 @@ extern int arch_reinit_sched_domains(void);
 struct sched_domain_attr;
 
 static inline void
-partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 			struct sched_domain_attr *dattr_new)
 {
 }
@@ -970,7 +970,7 @@ struct sched_class {
 	void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
 
 	void (*set_cpus_allowed)(struct task_struct *p,
-				 const cpumask_t *newmask);
+				 const struct cpumask *newmask);
 
 	void (*rq_online)(struct rq *rq);
 	void (*rq_offline)(struct rq *rq);
@@ -1612,12 +1612,12 @@ extern cputime_t task_gtime(struct task_struct *p);
 
 #ifdef CONFIG_SMP
 extern int set_cpus_allowed_ptr(struct task_struct *p,
-				const cpumask_t *new_mask);
+				const struct cpumask *new_mask);
 #else
 static inline int set_cpus_allowed_ptr(struct task_struct *p,
-				       const cpumask_t *new_mask)
+				       const struct cpumask *new_mask)
 {
-	if (!cpu_isset(0, *new_mask))
+	if (!cpumask_test_cpu(0, new_mask))
 		return -EINVAL;
 	return 0;
 }
@@ -2230,8 +2230,8 @@ __trace_special(void *__tr, void *__data,
 }
 #endif
 
-extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask);
-extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
+extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
+extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
 extern int sched_mc_power_savings, sched_smt_power_savings;
 
diff --git a/kernel/sched.c b/kernel/sched.c
index f2be61870030..eba6a156d334 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2829,7 +2829,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
 	struct rq *rq;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpu_isset(dest_cpu, p->cpus_allowed)
+	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
 	    || unlikely(!cpu_active(dest_cpu)))
 		goto out;
 
@@ -2895,7 +2895,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
 	 * 2) cannot be migrated to this CPU due to cpus_allowed, or
 	 * 3) are cache-hot on their current CPU.
 	 */
-	if (!cpu_isset(this_cpu, p->cpus_allowed)) {
+	if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) {
 		schedstat_inc(p, se.nr_failed_migrations_affine);
 		return 0;
 	}
@@ -3070,7 +3070,7 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
 static struct sched_group *
 find_busiest_group(struct sched_domain *sd, int this_cpu,
 		   unsigned long *imbalance, enum cpu_idle_type idle,
-		   int *sd_idle, const cpumask_t *cpus, int *balance)
+		   int *sd_idle, const struct cpumask *cpus, int *balance)
 {
 	struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
 	unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -3387,7 +3387,7 @@ ret:
  */
 static struct rq *
 find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
-		   unsigned long imbalance, const cpumask_t *cpus)
+		   unsigned long imbalance, const struct cpumask *cpus)
 {
 	struct rq *busiest = NULL, *rq;
 	unsigned long max_load = 0;
@@ -3396,7 +3396,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
 	for_each_cpu(i, sched_group_cpus(group)) {
 		unsigned long wl;
 
-		if (!cpu_isset(i, *cpus))
+		if (!cpumask_test_cpu(i, cpus))
 			continue;
 
 		rq = cpu_rq(i);
@@ -3426,7 +3426,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
  */
 static int load_balance(int this_cpu, struct rq *this_rq,
 			struct sched_domain *sd, enum cpu_idle_type idle,
-			int *balance, cpumask_t *cpus)
+			int *balance, struct cpumask *cpus)
 {
 	int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
 	struct sched_group *group;
@@ -3434,7 +3434,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 	struct rq *busiest;
 	unsigned long flags;
 
-	cpus_setall(*cpus);
+	cpumask_setall(cpus);
 
 	/*
 	 * When power savings policy is enabled for the parent domain, idle
@@ -3494,8 +3494,8 @@ redo:
 
 		/* All tasks on this runqueue were pinned by CPU affinity */
 		if (unlikely(all_pinned)) {
-			cpu_clear(cpu_of(busiest), *cpus);
-			if (!cpus_empty(*cpus))
+			cpumask_clear_cpu(cpu_of(busiest), cpus);
+			if (!cpumask_empty(cpus))
 				goto redo;
 			goto out_balanced;
 		}
@@ -3512,7 +3512,8 @@ redo:
 			/* don't kick the migration_thread, if the curr
 			 * task on busiest cpu can't be moved to this_cpu
 			 */
-			if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
+			if (!cpumask_test_cpu(this_cpu,
+					      &busiest->curr->cpus_allowed)) {
 				spin_unlock_irqrestore(&busiest->lock, flags);
 				all_pinned = 1;
 				goto out_one_pinned;
@@ -3587,7 +3588,7 @@ out:
  */
 static int
 load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
-			cpumask_t *cpus)
+			struct cpumask *cpus)
 {
 	struct sched_group *group;
 	struct rq *busiest = NULL;
@@ -3596,7 +3597,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
 	int sd_idle = 0;
 	int all_pinned = 0;
 
-	cpus_setall(*cpus);
+	cpumask_setall(cpus);
 
 	/*
 	 * When power savings policy is enabled for the parent domain, idle
@@ -3640,8 +3641,8 @@ redo:
 		double_unlock_balance(this_rq, busiest);
 
 		if (unlikely(all_pinned)) {
-			cpu_clear(cpu_of(busiest), *cpus);
-			if (!cpus_empty(*cpus))
+			cpumask_clear_cpu(cpu_of(busiest), cpus);
+			if (!cpumask_empty(cpus))
 				goto redo;
 		}
 	}
@@ -5376,7 +5377,7 @@ out_unlock:
 	return retval;
 }
 
-long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 {
 	cpumask_var_t cpus_allowed, new_mask;
 	struct task_struct *p;
@@ -5445,13 +5446,13 @@ out_put_task:
 }
 
 static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-			     cpumask_t *new_mask)
+			     struct cpumask *new_mask)
 {
-	if (len < sizeof(cpumask_t)) {
-		memset(new_mask, 0, sizeof(cpumask_t));
-	} else if (len > sizeof(cpumask_t)) {
-		len = sizeof(cpumask_t);
-	}
+	if (len < cpumask_size())
+		cpumask_clear(new_mask);
+	else if (len > cpumask_size())
+		len = cpumask_size();
+
 	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
 }
 
@@ -5477,7 +5478,7 @@ asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
 	return retval;
 }
 
-long sched_getaffinity(pid_t pid, cpumask_t *mask)
+long sched_getaffinity(pid_t pid, struct cpumask *mask)
 {
 	struct task_struct *p;
 	int retval;
@@ -5494,7 +5495,7 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask)
 	if (retval)
 		goto out_unlock;
 
-	cpus_and(*mask, p->cpus_allowed, cpu_online_map);
+	cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
 
 out_unlock:
 	read_unlock(&tasklist_lock);
@@ -5872,7 +5873,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	idle->se.exec_start = sched_clock();
 
 	idle->prio = idle->normal_prio = MAX_PRIO;
-	idle->cpus_allowed = cpumask_of_cpu(cpu);
+	cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
 	__set_task_cpu(idle, cpu);
 
 	rq->curr = rq->idle = idle;
@@ -5956,7 +5957,7 @@ static inline void sched_init_granularity(void)
  * task must not exit() & deallocate itself prematurely. The
  * call is not atomic; no spinlocks may be held.
  */
-int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 {
 	struct migration_req req;
 	unsigned long flags;
@@ -5964,13 +5965,13 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
 	int ret = 0;
 
 	rq = task_rq_lock(p, &flags);
-	if (!cpus_intersects(*new_mask, cpu_online_map)) {
+	if (!cpumask_intersects(new_mask, cpu_online_mask)) {
 		ret = -EINVAL;
 		goto out;
 	}
 
 	if (unlikely((p->flags & PF_THREAD_BOUND) && p != current &&
-		     !cpus_equal(p->cpus_allowed, *new_mask))) {
+		     !cpumask_equal(&p->cpus_allowed, new_mask))) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -5978,12 +5979,12 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
 	if (p->sched_class->set_cpus_allowed)
 		p->sched_class->set_cpus_allowed(p, new_mask);
 	else {
-		p->cpus_allowed = *new_mask;
-		p->rt.nr_cpus_allowed = cpus_weight(*new_mask);
+		cpumask_copy(&p->cpus_allowed, new_mask);
+		p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
 	}
 
 	/* Can the task run on the task's current CPU? If so, we're done */
-	if (cpu_isset(task_cpu(p), *new_mask))
+	if (cpumask_test_cpu(task_cpu(p), new_mask))
 		goto out;
 
 	if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) {
@@ -6028,7 +6029,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 	if (task_cpu(p) != src_cpu)
 		goto done;
 	/* Affinity changed (again). */
-	if (!cpu_isset(dest_cpu, p->cpus_allowed))
+	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
 		goto fail;
 
 	on_rq = p->se.on_rq;
@@ -6629,13 +6630,13 @@ early_initcall(migration_init);
 #ifdef CONFIG_SCHED_DEBUG
 
 static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
-				  cpumask_t *groupmask)
+				  struct cpumask *groupmask)
 {
 	struct sched_group *group = sd->groups;
 	char str[256];
 
 	cpulist_scnprintf(str, sizeof(str), *sched_domain_span(sd));
-	cpus_clear(*groupmask);
+	cpumask_clear(groupmask);
 
 	printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
 
@@ -6936,24 +6937,25 @@ __setup("isolcpus=", isolated_cpu_setup);
 /*
  * init_sched_build_groups takes the cpumask we wish to span, and a pointer
  * to a function which identifies what group(along with sched group) a CPU
- * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS
- * (due to the fact that we keep track of groups covered with a cpumask_t).
+ * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
+ * (due to the fact that we keep track of groups covered with a struct cpumask).
  *
  * init_sched_build_groups will build a circular linked list of the groups
  * covered by the given span, and will set each group's ->cpumask correctly,
  * and ->cpu_power to 0.
  */
 static void
-init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
-			int (*group_fn)(int cpu, const cpumask_t *cpu_map,
+init_sched_build_groups(const struct cpumask *span,
+			const struct cpumask *cpu_map,
+			int (*group_fn)(int cpu, const struct cpumask *cpu_map,
 					struct sched_group **sg,
-					cpumask_t *tmpmask),
-			cpumask_t *covered, cpumask_t *tmpmask)
+					struct cpumask *tmpmask),
+			struct cpumask *covered, struct cpumask *tmpmask)
 {
 	struct sched_group *first = NULL, *last = NULL;
 	int i;
 
-	cpus_clear(*covered);
+	cpumask_clear(covered);
 
 	for_each_cpu(i, span) {
 		struct sched_group *sg;
@@ -6970,7 +6972,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
 			if (group_fn(j, cpu_map, NULL, tmpmask) != group)
 				continue;
 
-			cpu_set(j, *covered);
+			cpumask_set_cpu(j, covered);
 			cpumask_set_cpu(j, sched_group_cpus(sg));
 		}
 		if (!first)
@@ -7035,9 +7037,10 @@ static int find_next_best_node(int node, nodemask_t *used_nodes)
  * should be one that prevents unnecessary balancing, but also spreads tasks
  * out optimally.
  */
-static void sched_domain_node_span(int node, cpumask_t *span)
+static void sched_domain_node_span(int node, struct cpumask *span)
 {
 	nodemask_t used_nodes;
+	/* FIXME: use cpumask_of_node() */
 	node_to_cpumask_ptr(nodemask, node);
 	int i;
 
@@ -7081,8 +7084,8 @@ static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
 static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus);
 
 static int
-cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
-		 cpumask_t *unused)
+cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
+		 struct sched_group **sg, struct cpumask *unused)
 {
 	if (sg)
 		*sg = &per_cpu(sched_group_cpus, cpu).sg;
@@ -7100,22 +7103,21 @@ static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
 
 #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
 static int
-cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
-		  cpumask_t *mask)
+cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
+		  struct sched_group **sg, struct cpumask *mask)
 {
 	int group;
 
-	*mask = per_cpu(cpu_sibling_map, cpu);
-	cpus_and(*mask, *mask, *cpu_map);
-	group = first_cpu(*mask);
+	cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+	group = cpumask_first(mask);
 	if (sg)
 		*sg = &per_cpu(sched_group_core, group).sg;
 	return group;
 }
 #elif defined(CONFIG_SCHED_MC)
 static int
-cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
-		  cpumask_t *unused)
+cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
+		  struct sched_group **sg, struct cpumask *unused)
 {
 	if (sg)
 		*sg = &per_cpu(sched_group_core, cpu).sg;
@@ -7127,18 +7129,18 @@ static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
 static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
 
 static int
-cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
-		  cpumask_t *mask)
+cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
+		  struct sched_group **sg, struct cpumask *mask)
 {
 	int group;
 #ifdef CONFIG_SCHED_MC
+	/* FIXME: Use cpu_coregroup_mask. */
 	*mask = cpu_coregroup_map(cpu);
 	cpus_and(*mask, *mask, *cpu_map);
-	group = first_cpu(*mask);
+	group = cpumask_first(mask);
 #elif defined(CONFIG_SCHED_SMT)
-	*mask = per_cpu(cpu_sibling_map, cpu);
-	cpus_and(*mask, *mask, *cpu_map);
-	group = first_cpu(*mask);
+	cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+	group = cpumask_first(mask);
 #else
 	group = cpu;
 #endif
@@ -7159,14 +7161,16 @@ static struct sched_group ***sched_group_nodes_bycpu;
 static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
 static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
 
-static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map,
-				 struct sched_group **sg, cpumask_t *nodemask)
+static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
+				 struct sched_group **sg,
+				 struct cpumask *nodemask)
 {
 	int group;
+	/* FIXME: use cpumask_of_node */
 	node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu));
 
-	cpus_and(*nodemask, *pnodemask, *cpu_map);
-	group = first_cpu(*nodemask);
+	cpumask_and(nodemask, pnodemask, cpu_map);
+	group = cpumask_first(nodemask);
 
 	if (sg)
 		*sg = &per_cpu(sched_group_allnodes, group).sg;
@@ -7202,7 +7206,8 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
 
 #ifdef CONFIG_NUMA
 /* Free memory allocated for various sched_group structures */
-static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
+static void free_sched_groups(const struct cpumask *cpu_map,
+			      struct cpumask *nodemask)
 {
 	int cpu, i;
 
@@ -7215,10 +7220,11 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
 
 		for (i = 0; i < nr_node_ids; i++) {
 			struct sched_group *oldsg, *sg = sched_group_nodes[i];
+			/* FIXME: Use cpumask_of_node */
 			node_to_cpumask_ptr(pnodemask, i);
 
 			cpus_and(*nodemask, *pnodemask, *cpu_map);
-			if (cpus_empty(*nodemask))
+			if (cpumask_empty(nodemask))
 				continue;
 
 			if (sg == NULL)
@@ -7236,7 +7242,8 @@ next_sg:
 	}
 }
 #else /* !CONFIG_NUMA */
-static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
+static void free_sched_groups(const struct cpumask *cpu_map,
+			      struct cpumask *nodemask)
 {
 }
 #endif /* CONFIG_NUMA */
@@ -7366,7 +7373,7 @@ static void set_domain_attribute(struct sched_domain *sd,
  * Build sched domains for a given set of cpus and attach the sched domains
  * to the individual cpus
  */
-static int __build_sched_domains(const cpumask_t *cpu_map,
+static int __build_sched_domains(const struct cpumask *cpu_map,
 				 struct sched_domain_attr *attr)
 {
 	int i, err = -ENOMEM;
@@ -7416,7 +7423,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 	}
 
 #ifdef CONFIG_NUMA
-	sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
+	sched_group_nodes_bycpu[cpumask_first(cpu_map)] = sched_group_nodes;
 #endif
 
 	/*
@@ -7425,12 +7432,13 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 	for_each_cpu(i, cpu_map) {
 		struct sched_domain *sd = NULL, *p;
 
+		/* FIXME: use cpumask_of_node */
 		*nodemask = node_to_cpumask(cpu_to_node(i));
 		cpus_and(*nodemask, *nodemask, *cpu_map);
 
 #ifdef CONFIG_NUMA
-		if (cpus_weight(*cpu_map) >
-				SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) {
+		if (cpumask_weight(cpu_map) >
+				SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
 			sd = &per_cpu(allnodes_domains, i);
 			SD_INIT(sd, ALLNODES);
 			set_domain_attribute(sd, attr);
@@ -7491,9 +7499,9 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 #ifdef CONFIG_SCHED_SMT
 	/* Set up CPU (sibling) groups */
 	for_each_cpu(i, cpu_map) {
-		*this_sibling_map = per_cpu(cpu_sibling_map, i);
-		cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map);
-		if (i != first_cpu(*this_sibling_map))
+		cpumask_and(this_sibling_map,
+			    &per_cpu(cpu_sibling_map, i), cpu_map);
+		if (i != cpumask_first(this_sibling_map))
 			continue;
 
 		init_sched_build_groups(this_sibling_map, cpu_map,
@@ -7505,9 +7513,10 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 #ifdef CONFIG_SCHED_MC
 	/* Set up multi-core groups */
 	for_each_cpu(i, cpu_map) {
+		/* FIXME: Use cpu_coregroup_mask */
 		*this_core_map = cpu_coregroup_map(i);
 		cpus_and(*this_core_map, *this_core_map, *cpu_map);
-		if (i != first_cpu(*this_core_map))
+		if (i != cpumask_first(this_core_map))
 			continue;
 
 		init_sched_build_groups(this_core_map, cpu_map,
@@ -7518,9 +7527,10 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 
 	/* Set up physical groups */
 	for (i = 0; i < nr_node_ids; i++) {
+		/* FIXME: Use cpumask_of_node */
 		*nodemask = node_to_cpumask(i);
 		cpus_and(*nodemask, *nodemask, *cpu_map);
-		if (cpus_empty(*nodemask))
+		if (cpumask_empty(nodemask))
 			continue;
 
 		init_sched_build_groups(nodemask, cpu_map,
@@ -7541,17 +7551,18 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		struct sched_group *sg, *prev;
 		int j;
 
+		/* FIXME: Use cpumask_of_node */
 		*nodemask = node_to_cpumask(i);
-		cpus_clear(*covered);
+		cpumask_clear(covered);
 
 		cpus_and(*nodemask, *nodemask, *cpu_map);
-		if (cpus_empty(*nodemask)) {
+		if (cpumask_empty(nodemask)) {
 			sched_group_nodes[i] = NULL;
 			continue;
 		}
 
 		sched_domain_node_span(i, domainspan);
-		cpus_and(*domainspan, *domainspan, *cpu_map);
+		cpumask_and(domainspan, domainspan, cpu_map);
 
 		sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
 				  GFP_KERNEL, i);
@@ -7570,21 +7581,22 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 		sg->__cpu_power = 0;
 		cpumask_copy(sched_group_cpus(sg), nodemask);
 		sg->next = sg;
-		cpus_or(*covered, *covered, *nodemask);
+		cpumask_or(covered, covered, nodemask);
 		prev = sg;
 
 		for (j = 0; j < nr_node_ids; j++) {
 			int n = (i + j) % nr_node_ids;
+			/* FIXME: Use cpumask_of_node */
 			node_to_cpumask_ptr(pnodemask, n);
 
-			cpus_complement(*notcovered, *covered);
-			cpus_and(*tmpmask, *notcovered, *cpu_map);
-			cpus_and(*tmpmask, *tmpmask, *domainspan);
-			if (cpus_empty(*tmpmask))
+			cpumask_complement(notcovered, covered);
+			cpumask_and(tmpmask, notcovered, cpu_map);
+			cpumask_and(tmpmask, tmpmask, domainspan);
+			if (cpumask_empty(tmpmask))
 				break;
 
-			cpus_and(*tmpmask, *tmpmask, *pnodemask);
-			if (cpus_empty(*tmpmask))
+			cpumask_and(tmpmask, tmpmask, pnodemask);
+			if (cpumask_empty(tmpmask))
 				continue;
 
 			sg = kmalloc_node(sizeof(struct sched_group) +
@@ -7598,7 +7610,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 			sg->__cpu_power = 0;
 			cpumask_copy(sched_group_cpus(sg), tmpmask);
 			sg->next = prev->next;
-			cpus_or(*covered, *covered, *tmpmask);
+			cpumask_or(covered, covered, tmpmask);
 			prev->next = sg;
 			prev = sg;
 		}
@@ -7634,7 +7646,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
 	if (sd_allnodes) {
 		struct sched_group *sg;
 
-		cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg,
+		cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg,
 								tmpmask);
 		init_numa_sched_groups_power(sg);
 	}
@@ -7690,12 +7702,12 @@ error:
 #endif
 }
 
-static int build_sched_domains(const cpumask_t *cpu_map)
+static int build_sched_domains(const struct cpumask *cpu_map)
 {
 	return __build_sched_domains(cpu_map, NULL);
 }
 
-static cpumask_t *doms_cur;	/* current sched domains */
+static struct cpumask *doms_cur;	/* current sched domains */
 static int ndoms_cur;		/* number of sched domains in 'doms_cur' */
 static struct sched_domain_attr *dattr_cur;
 				/* attribues of custom domains in 'doms_cur' */
@@ -7716,13 +7728,13 @@ void __attribute__((weak)) arch_update_cpu_topology(void)
  * For now this just excludes isolated cpus, but could be used to
  * exclude other special cases in the future.
  */
-static int arch_init_sched_domains(const cpumask_t *cpu_map)
+static int arch_init_sched_domains(const struct cpumask *cpu_map)
 {
 	int err;
 
 	arch_update_cpu_topology();
 	ndoms_cur = 1;
-	doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+	doms_cur = kmalloc(cpumask_size(), GFP_KERNEL);
 	if (!doms_cur)
 		doms_cur = fallback_doms;
 	cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map);
@@ -7733,8 +7745,8 @@ static int arch_init_sched_domains(const cpumask_t *cpu_map)
 	return err;
 }
 
-static void arch_destroy_sched_domains(const cpumask_t *cpu_map,
-				       cpumask_t *tmpmask)
+static void arch_destroy_sched_domains(const struct cpumask *cpu_map,
+				       struct cpumask *tmpmask)
 {
 	free_sched_groups(cpu_map, tmpmask);
 }
@@ -7743,15 +7755,16 @@ static void arch_destroy_sched_domains(const cpumask_t *cpu_map,
  * Detach sched domains from a group of cpus specified in cpu_map
  * These cpus will now be attached to the NULL domain
  */
-static void detach_destroy_domains(const cpumask_t *cpu_map)
+static void detach_destroy_domains(const struct cpumask *cpu_map)
 {
-	cpumask_t tmpmask;
+	/* Save because hotplug lock held. */
+	static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS);
 	int i;
 
 	for_each_cpu(i, cpu_map)
 		cpu_attach_domain(NULL, &def_root_domain, i);
 	synchronize_sched();
-	arch_destroy_sched_domains(cpu_map, &tmpmask);
+	arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask));
 }
 
 /* handle null as "default" */
@@ -7776,7 +7789,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
  * doms_new[] to the current sched domain partitioning, doms_cur[].
  * It destroys each deleted domain and builds each new domain.
  *
- * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'.
+ * 'doms_new' is an array of cpumask's of length 'ndoms_new'.
  * The masks don't intersect (don't overlap.) We should setup one
  * sched domain for each mask. CPUs not in any of the cpumasks will
  * not be load balanced. If the same cpumask appears both in the
@@ -7790,13 +7803,14 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
  * the single partition 'fallback_doms', it also forces the domains
  * to be rebuilt.
  *
- * If doms_new == NULL it will be replaced with cpu_online_map.
+ * If doms_new == NULL it will be replaced with cpu_online_mask.
  * ndoms_new == 0 is a special case for destroying existing domains,
  * and it will not create the default domain.
  *
  * Call with hotplug lock held
  */
-void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+/* FIXME: Change to struct cpumask *doms_new[] */
+void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 			     struct sched_domain_attr *dattr_new)
 {
 	int i, j, n;
@@ -7811,7 +7825,7 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
 	/* Destroy deleted domains */
 	for (i = 0; i < ndoms_cur; i++) {
 		for (j = 0; j < n; j++) {
-			if (cpus_equal(doms_cur[i], doms_new[j])
+			if (cpumask_equal(&doms_cur[i], &doms_new[j])
 			    && dattrs_equal(dattr_cur, i, dattr_new, j))
 				goto match1;
 		}
@@ -7831,7 +7845,7 @@ match1:
 	/* Build new domains */
 	for (i = 0; i < ndoms_new; i++) {
 		for (j = 0; j < ndoms_cur; j++) {
-			if (cpus_equal(doms_new[i], doms_cur[j])
+			if (cpumask_equal(&doms_new[i], &doms_cur[j])
 			    && dattrs_equal(dattr_new, i, dattr_cur, j))
 				goto match2;
 		}
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index bba00402ed90..08ffffd4a410 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1017,7 +1017,7 @@ static void yield_task_fair(struct rq *rq)
  * search starts with cpus closest then further out as needed,
  * so we always favor a closer, idle cpu.
  * Domains may include CPUs that are not usable for migration,
- * hence we need to mask them out (cpu_active_map)
+ * hence we need to mask them out (cpu_active_mask)
  *
  * Returns the CPU we should wake onto.
  */
@@ -1244,7 +1244,7 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
 		}
 	}
 
-	if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
+	if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed)))
 		goto out;
 
 	/*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 1f0e99d1a8ce..fb3964579a8a 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -923,7 +923,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 	if (!task_running(rq, p) &&
-	    (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) &&
+	    (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
 	    (p->rt.nr_cpus_allowed > 1))
 		return 1;
 	return 0;
@@ -982,7 +982,7 @@ static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
 static int find_lowest_rq(struct task_struct *task)
 {
 	struct sched_domain *sd;
-	cpumask_t *lowest_mask = __get_cpu_var(local_cpu_mask);
+	struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
 	int this_cpu = smp_processor_id();
 	int cpu      = task_cpu(task);
 
@@ -997,7 +997,7 @@ static int find_lowest_rq(struct task_struct *task)
 	 * I guess we might want to change cpupri_find() to ignore those
 	 * in the first place.
 	 */
-	cpus_and(*lowest_mask, *lowest_mask, cpu_active_map);
+	cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
 
 	/*
 	 * At this point we have built a mask of cpus representing the
@@ -1007,7 +1007,7 @@ static int find_lowest_rq(struct task_struct *task)
 	 * We prioritize the last cpu that the task executed on since
 	 * it is most likely cache-hot in that location.
 	 */
-	if (cpu_isset(cpu, *lowest_mask))
+	if (cpumask_test_cpu(cpu, lowest_mask))
 		return cpu;
 
 	/*
@@ -1064,8 +1064,8 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 			 * Also make sure that it wasn't scheduled on its rq.
 			 */
 			if (unlikely(task_rq(task) != rq ||
-				     !cpu_isset(lowest_rq->cpu,
-						task->cpus_allowed) ||
+				     !cpumask_test_cpu(lowest_rq->cpu,
+						       &task->cpus_allowed) ||
 				     task_running(rq, task) ||
 				     !task->se.on_rq)) {
 
@@ -1315,9 +1315,9 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
 }
 
 static void set_cpus_allowed_rt(struct task_struct *p,
-				const cpumask_t *new_mask)
+				const struct cpumask *new_mask)
 {
-	int weight = cpus_weight(*new_mask);
+	int weight = cpumask_weight(new_mask);
 
 	BUG_ON(!rt_task(p));
 
@@ -1338,7 +1338,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
 		update_rt_migration(rq);
 	}
 
-	p->cpus_allowed    = *new_mask;
+	cpumask_copy(&p->cpus_allowed, new_mask);
 	p->rt.nr_cpus_allowed = weight;
 }
 
-- 
cgit v1.2.3


From 1acdac104668a0834cfa267de9946fac7764d486 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 20 Nov 2008 10:02:53 -0800
Subject: futex: make clock selectable for FUTEX_WAIT_BITSET

FUTEX_WAIT_BITSET could be used instead of FUTEX_WAIT by setting the
bit set to FUTEX_BITSET_MATCH_ANY, but FUTEX_WAIT uses CLOCK_REALTIME
while FUTEX_WAIT_BITSET uses CLOCK_MONOTONIC.

Add a flag to select CLOCK_REALTIME for FUTEX_WAIT_BITSET so glibc can
replace the FUTEX_WAIT logic which needs to do gettimeofday() calls
before and after the syscall to convert the absolute timeout to a
relative timeout for FUTEX_WAIT.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Ulrich Drepper <drepper@redhat.com>
---
 include/linux/futex.h |  3 ++-
 kernel/futex.c        | 24 +++++++++++++++++-------
 2 files changed, 19 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/futex.h b/include/linux/futex.h
index 8f627b9ae2b1..3bf5bb5a34f9 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -25,7 +25,8 @@ union ktime;
 #define FUTEX_WAKE_BITSET	10
 
 #define FUTEX_PRIVATE_FLAG	128
-#define FUTEX_CMD_MASK		~FUTEX_PRIVATE_FLAG
+#define FUTEX_CLOCK_REALTIME	256
+#define FUTEX_CMD_MASK		~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
 
 #define FUTEX_WAIT_PRIVATE	(FUTEX_WAIT | FUTEX_PRIVATE_FLAG)
 #define FUTEX_WAKE_PRIVATE	(FUTEX_WAKE | FUTEX_PRIVATE_FLAG)
diff --git a/kernel/futex.c b/kernel/futex.c
index e10c5c8786a6..ba0d3b83c091 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1142,12 +1142,13 @@ handle_fault:
  * In case we must use restart_block to restart a futex_wait,
  * we encode in the 'flags' shared capability
  */
-#define FLAGS_SHARED  1
+#define FLAGS_SHARED		0x01
+#define FLAGS_CLOCKRT		0x02
 
 static long futex_wait_restart(struct restart_block *restart);
 
 static int futex_wait(u32 __user *uaddr, int fshared,
-		      u32 val, ktime_t *abs_time, u32 bitset)
+		      u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
 {
 	struct task_struct *curr = current;
 	DECLARE_WAITQUEUE(wait, curr);
@@ -1233,8 +1234,10 @@ static int futex_wait(u32 __user *uaddr, int fshared,
 			slack = current->timer_slack_ns;
 			if (rt_task(current))
 				slack = 0;
-			hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC,
-						HRTIMER_MODE_ABS);
+			hrtimer_init_on_stack(&t.timer,
+					      clockrt ? CLOCK_REALTIME :
+					      CLOCK_MONOTONIC,
+					      HRTIMER_MODE_ABS);
 			hrtimer_init_sleeper(&t, current);
 			hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack);
 
@@ -1289,6 +1292,8 @@ static int futex_wait(u32 __user *uaddr, int fshared,
 
 		if (fshared)
 			restart->futex.flags |= FLAGS_SHARED;
+		if (clockrt)
+			restart->futex.flags |= FLAGS_CLOCKRT;
 		return -ERESTART_RESTARTBLOCK;
 	}
 
@@ -1312,7 +1317,8 @@ static long futex_wait_restart(struct restart_block *restart)
 	if (restart->futex.flags & FLAGS_SHARED)
 		fshared = 1;
 	return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
-				restart->futex.bitset);
+				restart->futex.bitset,
+				restart->futex.flags & FLAGS_CLOCKRT);
 }
 
 
@@ -1905,18 +1911,22 @@ void exit_robust_list(struct task_struct *curr)
 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 		u32 __user *uaddr2, u32 val2, u32 val3)
 {
-	int ret = -ENOSYS;
+	int clockrt, ret = -ENOSYS;
 	int cmd = op & FUTEX_CMD_MASK;
 	int fshared = 0;
 
 	if (!(op & FUTEX_PRIVATE_FLAG))
 		fshared = 1;
 
+	clockrt = op & FUTEX_CLOCK_REALTIME;
+	if (clockrt && cmd != FUTEX_WAIT_BITSET)
+		return -ENOSYS;
+
 	switch (cmd) {
 	case FUTEX_WAIT:
 		val3 = FUTEX_BITSET_MATCH_ANY;
 	case FUTEX_WAIT_BITSET:
-		ret = futex_wait(uaddr, fshared, val, timeout, val3);
+		ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt);
 		break;
 	case FUTEX_WAKE:
 		val3 = FUTEX_BITSET_MATCH_ANY;
-- 
cgit v1.2.3


From ca109491f612aab5c8152207631c0444f63da97f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 25 Nov 2008 12:43:51 +0100
Subject: hrtimer: removing all ur callback modes

Impact: cleanup, move all hrtimer processing into hardirq context

This is an attempt at removing some of the hrtimer complexity by
reducing the number of callback modes to 1.

This means that all hrtimer callback functions will be ran from HARD-irq
context.

I went through all the 30 odd hrtimer callback functions in the kernel
and saw only one that I'm not quite sure of, which is the one in
net/can/bcm.c - hence I'm CC-ing the folks responsible for that code.

Furthermore, the hrtimer core now calls callbacks directly with IRQs
disabled in case you try to enqueue an expired timer. If this timer is a
periodic timer (which should use hrtimer_forward() to advance its time)
then it might be possible to end up in an inf. recursive loop due to the
fact that hrtimer_forward() doesn't round up to the next timer
granularity, and therefore keeps on calling the callback - obviously
this needs a fix.

Aside from that, this seems to compile and actually boot on my dual core
test box - although I'm sure there are some bugs in, me not hitting any
makes me certain :-)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/input/touchscreen/ads7846.c |   4 +-
 include/linux/hrtimer.h             |  34 +----
 include/linux/interrupt.h           |   3 -
 kernel/hrtimer.c                    | 280 ++++--------------------------------
 kernel/sched.c                      |   2 -
 kernel/time/ntp.c                   |   4 +-
 kernel/time/tick-sched.c            |   1 -
 kernel/trace/trace_sysprof.c        |   1 -
 sound/drivers/pcsp/pcsp.c           |   1 -
 9 files changed, 37 insertions(+), 293 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index b9b7fc6ff1eb..e1ece89fe922 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -697,7 +697,7 @@ static enum hrtimer_restart ads7846_timer(struct hrtimer *handle)
 	struct ads7846	*ts = container_of(handle, struct ads7846, timer);
 	int		status = 0;
 
-	spin_lock_irq(&ts->lock);
+	spin_lock(&ts->lock);
 
 	if (unlikely(!get_pendown_state(ts) ||
 		     device_suspended(&ts->spi->dev))) {
@@ -728,7 +728,7 @@ static enum hrtimer_restart ads7846_timer(struct hrtimer *handle)
 			dev_err(&ts->spi->dev, "spi_async --> %d\n", status);
 	}
 
-	spin_unlock_irq(&ts->lock);
+	spin_unlock(&ts->lock);
 	return HRTIMER_NORESTART;
 }
 
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 3eba43878dcb..bd37078c2d7d 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -42,26 +42,6 @@ enum hrtimer_restart {
 	HRTIMER_RESTART,	/* Timer must be restarted */
 };
 
-/*
- * hrtimer callback modes:
- *
- *	HRTIMER_CB_SOFTIRQ:		Callback must run in softirq context
- *	HRTIMER_CB_IRQSAFE_PERCPU:	Callback must run in hardirq context
- *					Special mode for tick emulation and
- *					scheduler timer. Such timers are per
- *					cpu and not allowed to be migrated on
- *					cpu unplug.
- *	HRTIMER_CB_IRQSAFE_UNLOCKED:	Callback should run in hardirq context
- *					with timer->base lock unlocked
- *					used for timers which call wakeup to
- *					avoid lock order problems with rq->lock
- */
-enum hrtimer_cb_mode {
-	HRTIMER_CB_SOFTIRQ,
-	HRTIMER_CB_IRQSAFE_PERCPU,
-	HRTIMER_CB_IRQSAFE_UNLOCKED,
-};
-
 /*
  * Values to track state of the timer
  *
@@ -70,7 +50,6 @@ enum hrtimer_cb_mode {
  * 0x00		inactive
  * 0x01		enqueued into rbtree
  * 0x02		callback function running
- * 0x04		callback pending (high resolution mode)
  *
  * Special cases:
  * 0x03		callback function running and enqueued
@@ -92,8 +71,7 @@ enum hrtimer_cb_mode {
 #define HRTIMER_STATE_INACTIVE	0x00
 #define HRTIMER_STATE_ENQUEUED	0x01
 #define HRTIMER_STATE_CALLBACK	0x02
-#define HRTIMER_STATE_PENDING	0x04
-#define HRTIMER_STATE_MIGRATE	0x08
+#define HRTIMER_STATE_MIGRATE	0x04
 
 /**
  * struct hrtimer - the basic hrtimer structure
@@ -109,8 +87,6 @@ enum hrtimer_cb_mode {
  * @function:	timer expiry callback function
  * @base:	pointer to the timer base (per cpu and per clock)
  * @state:	state information (See bit values above)
- * @cb_mode:	high resolution timer feature to select the callback execution
- *		 mode
  * @cb_entry:	list head to enqueue an expired timer into the callback list
  * @start_site:	timer statistics field to store the site where the timer
  *		was started
@@ -129,7 +105,6 @@ struct hrtimer {
 	struct hrtimer_clock_base	*base;
 	unsigned long			state;
 	struct list_head		cb_entry;
-	enum hrtimer_cb_mode		cb_mode;
 #ifdef CONFIG_TIMER_STATS
 	int				start_pid;
 	void				*start_site;
@@ -188,15 +163,11 @@ struct hrtimer_clock_base {
  * @check_clocks:	Indictator, when set evaluate time source and clock
  *			event devices whether high resolution mode can be
  *			activated.
- * @cb_pending:		Expired timers are moved from the rbtree to this
- *			list in the timer interrupt. The list is processed
- *			in the softirq.
  * @nr_events:		Total number of timer interrupt events
  */
 struct hrtimer_cpu_base {
 	spinlock_t			lock;
 	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
-	struct list_head		cb_pending;
 #ifdef CONFIG_HIGH_RES_TIMERS
 	ktime_t				expires_next;
 	int				hres_active;
@@ -404,8 +375,7 @@ static inline int hrtimer_active(const struct hrtimer *timer)
  */
 static inline int hrtimer_is_queued(struct hrtimer *timer)
 {
-	return timer->state &
-		(HRTIMER_STATE_ENQUEUED | HRTIMER_STATE_PENDING);
+	return timer->state & HRTIMER_STATE_ENQUEUED;
 }
 
 /*
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index f58a0cf8929a..d6210a97a8ca 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -251,9 +251,6 @@ enum
 	BLOCK_SOFTIRQ,
 	TASKLET_SOFTIRQ,
 	SCHED_SOFTIRQ,
-#ifdef CONFIG_HIGH_RES_TIMERS
-	HRTIMER_SOFTIRQ,
-#endif
 	RCU_SOFTIRQ, 	/* Preferable RCU should always be the last softirq */
 
 	NR_SOFTIRQS
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 47e63349d1b2..efd6f41e1c16 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -442,22 +442,6 @@ static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
 #endif
 
-/*
- * Check, whether the timer is on the callback pending list
- */
-static inline int hrtimer_cb_pending(const struct hrtimer *timer)
-{
-	return timer->state & HRTIMER_STATE_PENDING;
-}
-
-/*
- * Remove a timer from the callback pending list
- */
-static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
-{
-	list_del_init(&timer->cb_entry);
-}
-
 /* High resolution timer related functions */
 #ifdef CONFIG_HIGH_RES_TIMERS
 
@@ -651,6 +635,8 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
 {
 }
 
+static void __run_hrtimer(struct hrtimer *timer);
+
 /*
  * When High resolution timers are active, try to reprogram. Note, that in case
  * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
@@ -661,31 +647,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 					    struct hrtimer_clock_base *base)
 {
 	if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
-
-		/* Timer is expired, act upon the callback mode */
-		switch(timer->cb_mode) {
-		case HRTIMER_CB_IRQSAFE_PERCPU:
-		case HRTIMER_CB_IRQSAFE_UNLOCKED:
-			/*
-			 * This is solely for the sched tick emulation with
-			 * dynamic tick support to ensure that we do not
-			 * restart the tick right on the edge and end up with
-			 * the tick timer in the softirq ! The calling site
-			 * takes care of this. Also used for hrtimer sleeper !
-			 */
-			debug_hrtimer_deactivate(timer);
-			return 1;
-		case HRTIMER_CB_SOFTIRQ:
-			/*
-			 * Move everything else into the softirq pending list !
-			 */
-			list_add_tail(&timer->cb_entry,
-				      &base->cpu_base->cb_pending);
-			timer->state = HRTIMER_STATE_PENDING;
-			return 1;
-		default:
-			BUG();
-		}
+		/*
+		 * XXX: recursion check?
+		 * hrtimer_forward() should round up with timer granularity
+		 * so that we never get into inf recursion here,
+		 * it doesn't do that though
+		 */
+		__run_hrtimer(timer);
+		return 1;
 	}
 	return 0;
 }
@@ -724,11 +693,6 @@ static int hrtimer_switch_to_hres(void)
 	return 1;
 }
 
-static inline void hrtimer_raise_softirq(void)
-{
-	raise_softirq(HRTIMER_SOFTIRQ);
-}
-
 #else
 
 static inline int hrtimer_hres_active(void) { return 0; }
@@ -747,7 +711,6 @@ static inline int hrtimer_reprogram(struct hrtimer *timer,
 {
 	return 0;
 }
-static inline void hrtimer_raise_softirq(void) { }
 
 #endif /* CONFIG_HIGH_RES_TIMERS */
 
@@ -890,10 +853,7 @@ static void __remove_hrtimer(struct hrtimer *timer,
 			     struct hrtimer_clock_base *base,
 			     unsigned long newstate, int reprogram)
 {
-	/* High res. callback list. NOP for !HIGHRES */
-	if (hrtimer_cb_pending(timer))
-		hrtimer_remove_cb_pending(timer);
-	else {
+	if (timer->state & HRTIMER_STATE_ENQUEUED) {
 		/*
 		 * Remove the timer from the rbtree and replace the
 		 * first entry pointer if necessary.
@@ -953,7 +913,7 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n
 {
 	struct hrtimer_clock_base *base, *new_base;
 	unsigned long flags;
-	int ret, raise;
+	int ret;
 
 	base = lock_hrtimer_base(timer, &flags);
 
@@ -988,26 +948,8 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n
 	enqueue_hrtimer(timer, new_base,
 			new_base->cpu_base == &__get_cpu_var(hrtimer_bases));
 
-	/*
-	 * The timer may be expired and moved to the cb_pending
-	 * list. We can not raise the softirq with base lock held due
-	 * to a possible deadlock with runqueue lock.
-	 */
-	raise = timer->state == HRTIMER_STATE_PENDING;
-
-	/*
-	 * We use preempt_disable to prevent this task from migrating after
-	 * setting up the softirq and raising it. Otherwise, if me migrate
-	 * we will raise the softirq on the wrong CPU.
-	 */
-	preempt_disable();
-
 	unlock_hrtimer_base(timer, &flags);
 
-	if (raise)
-		hrtimer_raise_softirq();
-	preempt_enable();
-
 	return ret;
 }
 EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
@@ -1192,75 +1134,6 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
 }
 EXPORT_SYMBOL_GPL(hrtimer_get_res);
 
-static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
-{
-	spin_lock_irq(&cpu_base->lock);
-
-	while (!list_empty(&cpu_base->cb_pending)) {
-		enum hrtimer_restart (*fn)(struct hrtimer *);
-		struct hrtimer *timer;
-		int restart;
-		int emulate_hardirq_ctx = 0;
-
-		timer = list_entry(cpu_base->cb_pending.next,
-				   struct hrtimer, cb_entry);
-
-		debug_hrtimer_deactivate(timer);
-		timer_stats_account_hrtimer(timer);
-
-		fn = timer->function;
-		/*
-		 * A timer might have been added to the cb_pending list
-		 * when it was migrated during a cpu-offline operation.
-		 * Emulate hardirq context for such timers.
-		 */
-		if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
-		    timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED)
-			emulate_hardirq_ctx = 1;
-
-		__remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
-		spin_unlock_irq(&cpu_base->lock);
-
-		if (unlikely(emulate_hardirq_ctx)) {
-			local_irq_disable();
-			restart = fn(timer);
-			local_irq_enable();
-		} else
-			restart = fn(timer);
-
-		spin_lock_irq(&cpu_base->lock);
-
-		timer->state &= ~HRTIMER_STATE_CALLBACK;
-		if (restart == HRTIMER_RESTART) {
-			BUG_ON(hrtimer_active(timer));
-			/*
-			 * Enqueue the timer, allow reprogramming of the event
-			 * device
-			 */
-			enqueue_hrtimer(timer, timer->base, 1);
-		} else if (hrtimer_active(timer)) {
-			/*
-			 * If the timer was rearmed on another CPU, reprogram
-			 * the event device.
-			 */
-			struct hrtimer_clock_base *base = timer->base;
-
-			if (base->first == &timer->node &&
-			    hrtimer_reprogram(timer, base)) {
-				/*
-				 * Timer is expired. Thus move it from tree to
-				 * pending list again.
-				 */
-				__remove_hrtimer(timer, base,
-						 HRTIMER_STATE_PENDING, 0);
-				list_add_tail(&timer->cb_entry,
-					      &base->cpu_base->cb_pending);
-			}
-		}
-	}
-	spin_unlock_irq(&cpu_base->lock);
-}
-
 static void __run_hrtimer(struct hrtimer *timer)
 {
 	struct hrtimer_clock_base *base = timer->base;
@@ -1268,25 +1141,21 @@ static void __run_hrtimer(struct hrtimer *timer)
 	enum hrtimer_restart (*fn)(struct hrtimer *);
 	int restart;
 
+	WARN_ON(!irqs_disabled());
+
 	debug_hrtimer_deactivate(timer);
 	__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
 	timer_stats_account_hrtimer(timer);
-
 	fn = timer->function;
-	if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
-	    timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) {
-		/*
-		 * Used for scheduler timers, avoid lock inversion with
-		 * rq->lock and tasklist_lock.
-		 *
-		 * These timers are required to deal with enqueue expiry
-		 * themselves and are not allowed to migrate.
-		 */
-		spin_unlock(&cpu_base->lock);
-		restart = fn(timer);
-		spin_lock(&cpu_base->lock);
-	} else
-		restart = fn(timer);
+
+	/*
+	 * Because we run timers from hardirq context, there is no chance
+	 * they get migrated to another cpu, therefore its safe to unlock
+	 * the timer base.
+	 */
+	spin_unlock(&cpu_base->lock);
+	restart = fn(timer);
+	spin_lock(&cpu_base->lock);
 
 	/*
 	 * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid
@@ -1311,7 +1180,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
 	struct hrtimer_clock_base *base;
 	ktime_t expires_next, now;
-	int i, raise = 0;
+	int i;
 
 	BUG_ON(!cpu_base->hres_active);
 	cpu_base->nr_events++;
@@ -1360,16 +1229,6 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 				break;
 			}
 
-			/* Move softirq callbacks to the pending list */
-			if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
-				__remove_hrtimer(timer, base,
-						 HRTIMER_STATE_PENDING, 0);
-				list_add_tail(&timer->cb_entry,
-					      &base->cpu_base->cb_pending);
-				raise = 1;
-				continue;
-			}
-
 			__run_hrtimer(timer);
 		}
 		spin_unlock(&cpu_base->lock);
@@ -1383,10 +1242,6 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 		if (tick_program_event(expires_next, 0))
 			goto retry;
 	}
-
-	/* Raise softirq ? */
-	if (raise)
-		raise_softirq(HRTIMER_SOFTIRQ);
 }
 
 /**
@@ -1413,11 +1268,6 @@ void hrtimer_peek_ahead_timers(void)
 	local_irq_restore(flags);
 }
 
-static void run_hrtimer_softirq(struct softirq_action *h)
-{
-	run_hrtimer_pending(&__get_cpu_var(hrtimer_bases));
-}
-
 #endif	/* CONFIG_HIGH_RES_TIMERS */
 
 /*
@@ -1429,8 +1279,6 @@ static void run_hrtimer_softirq(struct softirq_action *h)
  */
 void hrtimer_run_pending(void)
 {
-	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
-
 	if (hrtimer_hres_active())
 		return;
 
@@ -1444,8 +1292,6 @@ void hrtimer_run_pending(void)
 	 */
 	if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
 		hrtimer_switch_to_hres();
-
-	run_hrtimer_pending(cpu_base);
 }
 
 /*
@@ -1482,14 +1328,6 @@ void hrtimer_run_queues(void)
 					hrtimer_get_expires_tv64(timer))
 				break;
 
-			if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
-				__remove_hrtimer(timer, base,
-					HRTIMER_STATE_PENDING, 0);
-				list_add_tail(&timer->cb_entry,
-					&base->cpu_base->cb_pending);
-				continue;
-			}
-
 			__run_hrtimer(timer);
 		}
 		spin_unlock(&cpu_base->lock);
@@ -1516,9 +1354,6 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
 {
 	sl->timer.function = hrtimer_wakeup;
 	sl->task = task;
-#ifdef CONFIG_HIGH_RES_TIMERS
-	sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
-#endif
 }
 
 static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
@@ -1655,36 +1490,22 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
 		cpu_base->clock_base[i].cpu_base = cpu_base;
 
-	INIT_LIST_HEAD(&cpu_base->cb_pending);
 	hrtimer_init_hres(cpu_base);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
-				struct hrtimer_clock_base *new_base, int dcpu)
+static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
+				 struct hrtimer_clock_base *new_base, int dcpu)
 {
 	struct hrtimer *timer;
 	struct rb_node *node;
-	int raise = 0;
 
 	while ((node = rb_first(&old_base->active))) {
 		timer = rb_entry(node, struct hrtimer, node);
 		BUG_ON(hrtimer_callback_running(timer));
 		debug_hrtimer_deactivate(timer);
 
-		/*
-		 * Should not happen. Per CPU timers should be
-		 * canceled _before_ the migration code is called
-		 */
-		if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) {
-			__remove_hrtimer(timer, old_base,
-					 HRTIMER_STATE_INACTIVE, 0);
-			WARN(1, "hrtimer (%p %p)active but cpu %d dead\n",
-			     timer, timer->function, dcpu);
-			continue;
-		}
-
 		/*
 		 * Mark it as STATE_MIGRATE not INACTIVE otherwise the
 		 * timer could be seen as !active and just vanish away
@@ -1708,48 +1529,19 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 		 * otherwise we end up with a stale timer.
 		 */
 		if (timer->state == HRTIMER_STATE_MIGRATE) {
-			timer->state = HRTIMER_STATE_PENDING;
-			list_add_tail(&timer->cb_entry,
-				      &new_base->cpu_base->cb_pending);
-			raise = 1;
+			/* XXX: running on offline cpu */
+			__run_hrtimer(timer);
 		}
 #endif
 		/* Clear the migration state bit */
 		timer->state &= ~HRTIMER_STATE_MIGRATE;
 	}
-	return raise;
 }
 
-#ifdef CONFIG_HIGH_RES_TIMERS
-static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
-				   struct hrtimer_cpu_base *new_base)
-{
-	struct hrtimer *timer;
-	int raise = 0;
-
-	while (!list_empty(&old_base->cb_pending)) {
-		timer = list_entry(old_base->cb_pending.next,
-				   struct hrtimer, cb_entry);
-
-		__remove_hrtimer(timer, timer->base, HRTIMER_STATE_PENDING, 0);
-		timer->base = &new_base->clock_base[timer->base->index];
-		list_add_tail(&timer->cb_entry, &new_base->cb_pending);
-		raise = 1;
-	}
-	return raise;
-}
-#else
-static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
-				   struct hrtimer_cpu_base *new_base)
-{
-	return 0;
-}
-#endif
-
 static void migrate_hrtimers(int cpu)
 {
 	struct hrtimer_cpu_base *old_base, *new_base;
-	int i, raise = 0;
+	int i;
 
 	BUG_ON(cpu_online(cpu));
 	old_base = &per_cpu(hrtimer_bases, cpu);
@@ -1764,20 +1556,13 @@ static void migrate_hrtimers(int cpu)
 	spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
-		if (migrate_hrtimer_list(&old_base->clock_base[i],
-					 &new_base->clock_base[i], cpu))
-			raise = 1;
+		migrate_hrtimer_list(&old_base->clock_base[i],
+				     &new_base->clock_base[i], cpu);
 	}
 
-	if (migrate_hrtimer_pending(old_base, new_base))
-		raise = 1;
-
 	spin_unlock(&old_base->lock);
 	spin_unlock_irq(&new_base->lock);
 	put_cpu_var(hrtimer_bases);
-
-	if (raise)
-		hrtimer_raise_softirq();
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
@@ -1817,9 +1602,6 @@ void __init hrtimers_init(void)
 	hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
 			  (void *)(long)smp_processor_id());
 	register_cpu_notifier(&hrtimers_nb);
-#ifdef CONFIG_HIGH_RES_TIMERS
-	open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
-#endif
 }
 
 /**
diff --git a/kernel/sched.c b/kernel/sched.c
index 9b1e79371c20..5ac5e9536168 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -203,7 +203,6 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
 	hrtimer_init(&rt_b->rt_period_timer,
 			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	rt_b->rt_period_timer.function = sched_rt_period_timer;
-	rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
 }
 
 static inline int rt_bandwidth_enabled(void)
@@ -1139,7 +1138,6 @@ static void init_rq_hrtick(struct rq *rq)
 
 	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	rq->hrtick_timer.function = hrtick;
-	rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
 }
 #else	/* CONFIG_SCHED_HRTICK */
 static inline void hrtick_clear(struct rq *rq)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 8ff15e5d486b..f5f793d92415 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -131,7 +131,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
 {
 	enum hrtimer_restart res = HRTIMER_NORESTART;
 
-	write_seqlock_irq(&xtime_lock);
+	write_seqlock(&xtime_lock);
 
 	switch (time_state) {
 	case TIME_OK:
@@ -164,7 +164,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
 	}
 	update_vsyscall(&xtime, clock);
 
-	write_sequnlock_irq(&xtime_lock);
+	write_sequnlock(&xtime_lock);
 
 	return res;
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 342fc9ccab46..502a81e2639b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -681,7 +681,6 @@ void tick_setup_sched_timer(void)
 	 */
 	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	ts->sched_timer.function = tick_sched_timer;
-	ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
 
 	/* Get the next period (per cpu) */
 	hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 9587d3bcba55..ae542e2e38d5 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -202,7 +202,6 @@ static void start_stack_timer(int cpu)
 
 	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hrtimer->function = stack_trace_timer_fn;
-	hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
 
 	hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
 }
diff --git a/sound/drivers/pcsp/pcsp.c b/sound/drivers/pcsp/pcsp.c
index 1899cf0685bc..8e52b2a8a13a 100644
--- a/sound/drivers/pcsp/pcsp.c
+++ b/sound/drivers/pcsp/pcsp.c
@@ -96,7 +96,6 @@ static int __devinit snd_card_pcsp_probe(int devnum, struct device *dev)
 		return -EINVAL;
 
 	hrtimer_init(&pcsp_chip.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	pcsp_chip.timer.cb_mode = HRTIMER_CB_SOFTIRQ;
 	pcsp_chip.timer.function = pcsp_do_timer;
 
 	card = snd_card_new(index, id, THIS_MODULE, 0);
-- 
cgit v1.2.3


From 193da6092764ab693da7170c5badbf60d7758c1d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 26 Nov 2008 12:03:54 +0100
Subject: fuse: move FUSE_MINOR to miscdevice.h

Move FUSE_MINOR to miscdevice.h.  While at it, de-uglify the file.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 include/linux/fuse.h       |  3 ---
 include/linux/miscdevice.h | 42 +++++++++++++++++++++---------------------
 2 files changed, 21 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 350fe9767bbc..7caa473306e4 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -40,9 +40,6 @@
 /** The major number of the fuse character device */
 #define FUSE_MAJOR MISC_MAJOR
 
-/** The minor number of the fuse character device */
-#define FUSE_MINOR 229
-
 /* Make sure all structures are padded to 64bit boundary, so 32bit
    userspace works under 64bit kernels */
 
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 26433ec520b3..a820f816a49e 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -3,33 +3,33 @@
 #include <linux/module.h>
 #include <linux/major.h>
 
-#define PSMOUSE_MINOR  1
-#define MS_BUSMOUSE_MINOR 2
-#define ATIXL_BUSMOUSE_MINOR 3
-/*#define AMIGAMOUSE_MINOR 4	FIXME OBSOLETE */
-#define ATARIMOUSE_MINOR 5
-#define SUN_MOUSE_MINOR 6
-#define APOLLO_MOUSE_MINOR 7
-#define PC110PAD_MINOR 9
-/*#define ADB_MOUSE_MINOR 10	FIXME OBSOLETE */
+#define PSMOUSE_MINOR		1
+#define MS_BUSMOUSE_MINOR	2
+#define ATIXL_BUSMOUSE_MINOR	3
+/*#define AMIGAMOUSE_MINOR	4	FIXME OBSOLETE */
+#define ATARIMOUSE_MINOR	5
+#define SUN_MOUSE_MINOR		6
+#define APOLLO_MOUSE_MINOR	7
+#define PC110PAD_MINOR		9
+/*#define ADB_MOUSE_MINOR	10	FIXME OBSOLETE */
 #define WATCHDOG_MINOR		130	/* Watchdog timer     */
 #define TEMP_MINOR		131	/* Temperature Sensor */
-#define RTC_MINOR 135
+#define RTC_MINOR		135
 #define EFI_RTC_MINOR		136	/* EFI Time services */
-#define SUN_OPENPROM_MINOR 139
+#define SUN_OPENPROM_MINOR	139
 #define DMAPI_MINOR		140	/* DMAPI */
-#define NVRAM_MINOR 144
-#define SGI_MMTIMER        153
+#define NVRAM_MINOR		144
+#define SGI_MMTIMER		153
 #define STORE_QUEUE_MINOR	155
-#define I2O_MINOR 166
+#define I2O_MINOR		166
 #define MICROCODE_MINOR		184
-#define MWAVE_MINOR	219		/* ACP/Mwave Modem */
-#define MPT_MINOR	220
-#define MISC_DYNAMIC_MINOR 255
-
-#define TUN_MINOR	     200
-#define	HPET_MINOR	     228
-#define KVM_MINOR            232
+#define TUN_MINOR		200
+#define MWAVE_MINOR		219	/* ACP/Mwave Modem */
+#define MPT_MINOR		220
+#define HPET_MINOR		228
+#define FUSE_MINOR		229
+#define KVM_MINOR		232
+#define MISC_DYNAMIC_MINOR	255
 
 struct device;
 
-- 
cgit v1.2.3


From 59efec7b903987dcb60b9ebc85c7acd4443a11a1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 26 Nov 2008 12:03:55 +0100
Subject: fuse: implement ioctl support

Generic ioctl support is tricky to implement because only the ioctl
implementation itself knows which memory regions need to be read
and/or written.  To support this, fuse client can request retry of
ioctl specifying memory regions to read and write.  Deep copying
(nested pointers) can be implemented by retrying multiple times
resolving one depth of dereference at a time.

For security and cleanliness considerations, ioctl implementation has
restricted mode where the kernel determines data transfer directions
and sizes using the _IOC_*() macros on the ioctl command.  In this
mode, retry is not allowed.

For all FUSE servers, restricted mode is enforced.  Unrestricted ioctl
will be used by CUSE.

Plese read the comment on top of fs/fuse/file.c::fuse_file_do_ioctl()
for more information.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/file.c       | 280 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fuse.h |  32 ++++++
 2 files changed, 312 insertions(+)

(limited to 'include/linux')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 617269803913..baed06ea7622 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1469,6 +1469,282 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
 	return retval;
 }
 
+static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
+			unsigned int nr_segs, size_t bytes, bool to_user)
+{
+	struct iov_iter ii;
+	int page_idx = 0;
+
+	if (!bytes)
+		return 0;
+
+	iov_iter_init(&ii, iov, nr_segs, bytes, 0);
+
+	while (iov_iter_count(&ii)) {
+		struct page *page = pages[page_idx++];
+		size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii));
+		void *kaddr, *map;
+
+		kaddr = map = kmap(page);
+
+		while (todo) {
+			char __user *uaddr = ii.iov->iov_base + ii.iov_offset;
+			size_t iov_len = ii.iov->iov_len - ii.iov_offset;
+			size_t copy = min(todo, iov_len);
+			size_t left;
+
+			if (!to_user)
+				left = copy_from_user(kaddr, uaddr, copy);
+			else
+				left = copy_to_user(uaddr, kaddr, copy);
+
+			if (unlikely(left))
+				return -EFAULT;
+
+			iov_iter_advance(&ii, copy);
+			todo -= copy;
+			kaddr += copy;
+		}
+
+		kunmap(map);
+	}
+
+	return 0;
+}
+
+/*
+ * For ioctls, there is no generic way to determine how much memory
+ * needs to be read and/or written.  Furthermore, ioctls are allowed
+ * to dereference the passed pointer, so the parameter requires deep
+ * copying but FUSE has no idea whatsoever about what to copy in or
+ * out.
+ *
+ * This is solved by allowing FUSE server to retry ioctl with
+ * necessary in/out iovecs.  Let's assume the ioctl implementation
+ * needs to read in the following structure.
+ *
+ * struct a {
+ *	char	*buf;
+ *	size_t	buflen;
+ * }
+ *
+ * On the first callout to FUSE server, inarg->in_size and
+ * inarg->out_size will be NULL; then, the server completes the ioctl
+ * with FUSE_IOCTL_RETRY set in out->flags, out->in_iovs set to 1 and
+ * the actual iov array to
+ *
+ * { { .iov_base = inarg.arg,	.iov_len = sizeof(struct a) } }
+ *
+ * which tells FUSE to copy in the requested area and retry the ioctl.
+ * On the second round, the server has access to the structure and
+ * from that it can tell what to look for next, so on the invocation,
+ * it sets FUSE_IOCTL_RETRY, out->in_iovs to 2 and iov array to
+ *
+ * { { .iov_base = inarg.arg,	.iov_len = sizeof(struct a)	},
+ *   { .iov_base = a.buf,	.iov_len = a.buflen		} }
+ *
+ * FUSE will copy both struct a and the pointed buffer from the
+ * process doing the ioctl and retry ioctl with both struct a and the
+ * buffer.
+ *
+ * This time, FUSE server has everything it needs and completes ioctl
+ * without FUSE_IOCTL_RETRY which finishes the ioctl call.
+ *
+ * Copying data out works the same way.
+ *
+ * Note that if FUSE_IOCTL_UNRESTRICTED is clear, the kernel
+ * automatically initializes in and out iovs by decoding @cmd with
+ * _IOC_* macros and the server is not allowed to request RETRY.  This
+ * limits ioctl data transfers to well-formed ioctls and is the forced
+ * behavior for all FUSE servers.
+ */
+static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
+			       unsigned long arg, unsigned int flags)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_ioctl_in inarg = {
+		.fh = ff->fh,
+		.cmd = cmd,
+		.arg = arg,
+		.flags = flags
+	};
+	struct fuse_ioctl_out outarg;
+	struct fuse_req *req = NULL;
+	struct page **pages = NULL;
+	struct page *iov_page = NULL;
+	struct iovec *in_iov = NULL, *out_iov = NULL;
+	unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
+	size_t in_size, out_size, transferred;
+	int err;
+
+	/* assume all the iovs returned by client always fits in a page */
+	BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
+
+	if (!fuse_allow_task(fc, current))
+		return -EACCES;
+
+	err = -EIO;
+	if (is_bad_inode(inode))
+		goto out;
+
+	err = -ENOMEM;
+	pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL);
+	iov_page = alloc_page(GFP_KERNEL);
+	if (!pages || !iov_page)
+		goto out;
+
+	/*
+	 * If restricted, initialize IO parameters as encoded in @cmd.
+	 * RETRY from server is not allowed.
+	 */
+	if (!(flags & FUSE_IOCTL_UNRESTRICTED)) {
+		struct iovec *iov = page_address(iov_page);
+
+		iov->iov_base = (void *)arg;
+		iov->iov_len = _IOC_SIZE(cmd);
+
+		if (_IOC_DIR(cmd) & _IOC_WRITE) {
+			in_iov = iov;
+			in_iovs = 1;
+		}
+
+		if (_IOC_DIR(cmd) & _IOC_READ) {
+			out_iov = iov;
+			out_iovs = 1;
+		}
+	}
+
+ retry:
+	inarg.in_size = in_size = iov_length(in_iov, in_iovs);
+	inarg.out_size = out_size = iov_length(out_iov, out_iovs);
+
+	/*
+	 * Out data can be used either for actual out data or iovs,
+	 * make sure there always is at least one page.
+	 */
+	out_size = max_t(size_t, out_size, PAGE_SIZE);
+	max_pages = DIV_ROUND_UP(max(in_size, out_size), PAGE_SIZE);
+
+	/* make sure there are enough buffer pages and init request with them */
+	err = -ENOMEM;
+	if (max_pages > FUSE_MAX_PAGES_PER_REQ)
+		goto out;
+	while (num_pages < max_pages) {
+		pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+		if (!pages[num_pages])
+			goto out;
+		num_pages++;
+	}
+
+	req = fuse_get_req(fc);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		req = NULL;
+		goto out;
+	}
+	memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
+	req->num_pages = num_pages;
+
+	/* okay, let's send it to the client */
+	req->in.h.opcode = FUSE_IOCTL;
+	req->in.h.nodeid = get_node_id(inode);
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	if (in_size) {
+		req->in.numargs++;
+		req->in.args[1].size = in_size;
+		req->in.argpages = 1;
+
+		err = fuse_ioctl_copy_user(pages, in_iov, in_iovs, in_size,
+					   false);
+		if (err)
+			goto out;
+	}
+
+	req->out.numargs = 2;
+	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].value = &outarg;
+	req->out.args[1].size = out_size;
+	req->out.argpages = 1;
+	req->out.argvar = 1;
+
+	request_send(fc, req);
+	err = req->out.h.error;
+	transferred = req->out.args[1].size;
+	fuse_put_request(fc, req);
+	req = NULL;
+	if (err)
+		goto out;
+
+	/* did it ask for retry? */
+	if (outarg.flags & FUSE_IOCTL_RETRY) {
+		char *vaddr;
+
+		/* no retry if in restricted mode */
+		err = -EIO;
+		if (!(flags & FUSE_IOCTL_UNRESTRICTED))
+			goto out;
+
+		in_iovs = outarg.in_iovs;
+		out_iovs = outarg.out_iovs;
+
+		/*
+		 * Make sure things are in boundary, separate checks
+		 * are to protect against overflow.
+		 */
+		err = -ENOMEM;
+		if (in_iovs > FUSE_IOCTL_MAX_IOV ||
+		    out_iovs > FUSE_IOCTL_MAX_IOV ||
+		    in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
+			goto out;
+
+		err = -EIO;
+		if ((in_iovs + out_iovs) * sizeof(struct iovec) != transferred)
+			goto out;
+
+		/* okay, copy in iovs and retry */
+		vaddr = kmap_atomic(pages[0], KM_USER0);
+		memcpy(page_address(iov_page), vaddr, transferred);
+		kunmap_atomic(vaddr, KM_USER0);
+
+		in_iov = page_address(iov_page);
+		out_iov = in_iov + in_iovs;
+
+		goto retry;
+	}
+
+	err = -EIO;
+	if (transferred > inarg.out_size)
+		goto out;
+
+	err = fuse_ioctl_copy_user(pages, out_iov, out_iovs, transferred, true);
+ out:
+	if (req)
+		fuse_put_request(fc, req);
+	if (iov_page)
+		__free_page(iov_page);
+	while (num_pages)
+		__free_page(pages[--num_pages]);
+	kfree(pages);
+
+	return err ? err : outarg.result;
+}
+
+static long fuse_file_ioctl(struct file *file, unsigned int cmd,
+			    unsigned long arg)
+{
+	return fuse_file_do_ioctl(file, cmd, arg, 0);
+}
+
+static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
+				   unsigned long arg)
+{
+	return fuse_file_do_ioctl(file, cmd, arg, FUSE_IOCTL_COMPAT);
+}
+
 static const struct file_operations fuse_file_operations = {
 	.llseek		= fuse_file_llseek,
 	.read		= do_sync_read,
@@ -1483,6 +1759,8 @@ static const struct file_operations fuse_file_operations = {
 	.lock		= fuse_file_lock,
 	.flock		= fuse_file_flock,
 	.splice_read	= generic_file_splice_read,
+	.unlocked_ioctl	= fuse_file_ioctl,
+	.compat_ioctl	= fuse_file_compat_ioctl,
 };
 
 static const struct file_operations fuse_direct_io_file_operations = {
@@ -1495,6 +1773,8 @@ static const struct file_operations fuse_direct_io_file_operations = {
 	.fsync		= fuse_fsync,
 	.lock		= fuse_file_lock,
 	.flock		= fuse_file_flock,
+	.unlocked_ioctl	= fuse_file_ioctl,
+	.compat_ioctl	= fuse_file_compat_ioctl,
 	/* no mmap and splice_read */
 };
 
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 7caa473306e4..608e300ae883 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -148,6 +148,21 @@ struct fuse_file_lock {
  */
 #define FUSE_READ_LOCKOWNER	(1 << 1)
 
+/**
+ * Ioctl flags
+ *
+ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
+ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
+ * FUSE_IOCTL_RETRY: retry with new iovecs
+ *
+ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
+ */
+#define FUSE_IOCTL_COMPAT	(1 << 0)
+#define FUSE_IOCTL_UNRESTRICTED	(1 << 1)
+#define FUSE_IOCTL_RETRY	(1 << 2)
+
+#define FUSE_IOCTL_MAX_IOV	256
+
 enum fuse_opcode {
 	FUSE_LOOKUP	   = 1,
 	FUSE_FORGET	   = 2,  /* no reply */
@@ -185,6 +200,7 @@ enum fuse_opcode {
 	FUSE_INTERRUPT     = 36,
 	FUSE_BMAP          = 37,
 	FUSE_DESTROY       = 38,
+	FUSE_IOCTL         = 39,
 };
 
 /* The read buffer is required to be at least 8k, but may be much larger */
@@ -385,6 +401,22 @@ struct fuse_bmap_out {
 	__u64	block;
 };
 
+struct fuse_ioctl_in {
+	__u64	fh;
+	__u32	flags;
+	__u32	cmd;
+	__u64	arg;
+	__u32	in_size;
+	__u32	out_size;
+};
+
+struct fuse_ioctl_out {
+	__s32	result;
+	__u32	flags;
+	__u32	in_iovs;
+	__u32	out_iovs;
+};
+
 struct fuse_in_header {
 	__u32	len;
 	__u32	opcode;
-- 
cgit v1.2.3


From 8599396b5062bf6bd2a0b433503849e2322df1c2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 26 Nov 2008 12:03:55 +0100
Subject: fuse: implement unsolicited notification

Clients always used to write only in response to read requests.  To
implement poll efficiently, clients should be able to issue
unsolicited notifications.  This patch implements basic notification
support.

Zero fuse_out_header.unique is now accepted and considered unsolicited
notification and the error field contains notification code.  This
patch doesn't implement any actual notification.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dev.c        | 27 +++++++++++++++++++++++++--
 include/linux/fuse.h |  4 ++++
 2 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 225388f54ae7..ffd670bb8c8c 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -816,6 +816,15 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
 	return err;
 }
 
+static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
+		       unsigned int size, struct fuse_copy_state *cs)
+{
+	switch (code) {
+	default:
+		return -EINVAL;
+	}
+}
+
 /* Look up request on processing list by unique ID */
 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
 {
@@ -879,9 +888,23 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
 	err = fuse_copy_one(&cs, &oh, sizeof(oh));
 	if (err)
 		goto err_finish;
+
+	err = -EINVAL;
+	if (oh.len != nbytes)
+		goto err_finish;
+
+	/*
+	 * Zero oh.unique indicates unsolicited notification message
+	 * and error contains notification code.
+	 */
+	if (!oh.unique) {
+		err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), &cs);
+		fuse_copy_finish(&cs);
+		return err ? err : nbytes;
+	}
+
 	err = -EINVAL;
-	if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
-	    oh.len != nbytes)
+	if (oh.error <= -1000 || oh.error > 0)
 		goto err_finish;
 
 	spin_lock(&fc->lock);
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 608e300ae883..abde9949e2c0 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -203,6 +203,10 @@ enum fuse_opcode {
 	FUSE_IOCTL         = 39,
 };
 
+enum fuse_notify_code {
+	FUSE_NOTIFY_CODE_MAX,
+};
+
 /* The read buffer is required to be at least 8k, but may be much larger */
 #define FUSE_MIN_READ_BUFFER 8192
 
-- 
cgit v1.2.3


From 95668a69a4bb862063c4d28a746e55107dee7b98 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 26 Nov 2008 12:03:55 +0100
Subject: fuse: implement poll support

Implement poll support.  Polled files are indexed using kh in a RB
tree rooted at fuse_conn->polled_files.

Client should send FUSE_NOTIFY_POLL notification once after processing
FUSE_POLL which has FUSE_POLL_SCHEDULE_NOTIFY set.  Sending
notification unconditionally after the latest poll or everytime file
content might have changed is inefficient but won't cause malfunction.

fuse_file_poll() can sleep and requires patches from the following
thread which allows f_op->poll() to sleep.

  http://thread.gmane.org/gmane.linux.kernel/726176

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dev.c        |  19 ++++++++
 fs/fuse/file.c       | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/fuse/fuse_i.h     |  20 ++++++++
 fs/fuse/inode.c      |   1 +
 include/linux/fuse.h |  25 ++++++++++
 5 files changed, 197 insertions(+)

(limited to 'include/linux')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ffd670bb8c8c..6176e444c76e 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -816,10 +816,29 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
 	return err;
 }
 
+static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
+			    struct fuse_copy_state *cs)
+{
+	struct fuse_notify_poll_wakeup_out outarg;
+	int err;
+
+	if (size != sizeof(outarg))
+		return -EINVAL;
+
+	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+	if (err)
+		return err;
+
+	return fuse_notify_poll_wakeup(fc, &outarg);
+}
+
 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
 		       unsigned int size, struct fuse_copy_state *cs)
 {
 	switch (code) {
+	case FUSE_NOTIFY_POLL:
+		return fuse_notify_poll(fc, size, cs);
+
 	default:
 		return -EINVAL;
 	}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a28ced678d38..b3a944e4bb9c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -62,6 +62,8 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
 			ff->kh = ++fc->khctr;
 			spin_unlock(&fc->lock);
 		}
+		RB_CLEAR_NODE(&ff->polled_node);
+		init_waitqueue_head(&ff->poll_wait);
 	}
 	return ff;
 }
@@ -170,7 +172,11 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir)
 
 		spin_lock(&fc->lock);
 		list_del(&ff->write_entry);
+		if (!RB_EMPTY_NODE(&ff->polled_node))
+			rb_erase(&ff->polled_node, &fc->polled_files);
 		spin_unlock(&fc->lock);
+
+		wake_up_interruptible_sync(&ff->poll_wait);
 		/*
 		 * Normally this will send the RELEASE request,
 		 * however if some asynchronous READ or WRITE requests
@@ -1749,6 +1755,130 @@ static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
 	return fuse_file_do_ioctl(file, cmd, arg, FUSE_IOCTL_COMPAT);
 }
 
+/*
+ * All files which have been polled are linked to RB tree
+ * fuse_conn->polled_files which is indexed by kh.  Walk the tree and
+ * find the matching one.
+ */
+static struct rb_node **fuse_find_polled_node(struct fuse_conn *fc, u64 kh,
+					      struct rb_node **parent_out)
+{
+	struct rb_node **link = &fc->polled_files.rb_node;
+	struct rb_node *last = NULL;
+
+	while (*link) {
+		struct fuse_file *ff;
+
+		last = *link;
+		ff = rb_entry(last, struct fuse_file, polled_node);
+
+		if (kh < ff->kh)
+			link = &last->rb_left;
+		else if (kh > ff->kh)
+			link = &last->rb_right;
+		else
+			return link;
+	}
+
+	if (parent_out)
+		*parent_out = last;
+	return link;
+}
+
+/*
+ * The file is about to be polled.  Make sure it's on the polled_files
+ * RB tree.  Note that files once added to the polled_files tree are
+ * not removed before the file is released.  This is because a file
+ * polled once is likely to be polled again.
+ */
+static void fuse_register_polled_file(struct fuse_conn *fc,
+				      struct fuse_file *ff)
+{
+	spin_lock(&fc->lock);
+	if (RB_EMPTY_NODE(&ff->polled_node)) {
+		struct rb_node **link, *parent;
+
+		link = fuse_find_polled_node(fc, ff->kh, &parent);
+		BUG_ON(*link);
+		rb_link_node(&ff->polled_node, parent, link);
+		rb_insert_color(&ff->polled_node, &fc->polled_files);
+	}
+	spin_unlock(&fc->lock);
+}
+
+static unsigned fuse_file_poll(struct file *file, poll_table *wait)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
+	struct fuse_poll_out outarg;
+	struct fuse_req *req;
+	int err;
+
+	if (fc->no_poll)
+		return DEFAULT_POLLMASK;
+
+	poll_wait(file, &ff->poll_wait, wait);
+
+	/*
+	 * Ask for notification iff there's someone waiting for it.
+	 * The client may ignore the flag and always notify.
+	 */
+	if (waitqueue_active(&ff->poll_wait)) {
+		inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY;
+		fuse_register_polled_file(fc, ff);
+	}
+
+	req = fuse_get_req(fc);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	req->in.h.opcode = FUSE_POLL;
+	req->in.h.nodeid = get_node_id(inode);
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].value = &outarg;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+
+	if (!err)
+		return outarg.revents;
+	if (err == -ENOSYS) {
+		fc->no_poll = 1;
+		return DEFAULT_POLLMASK;
+	}
+	return POLLERR;
+}
+
+/*
+ * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and
+ * wakes up the poll waiters.
+ */
+int fuse_notify_poll_wakeup(struct fuse_conn *fc,
+			    struct fuse_notify_poll_wakeup_out *outarg)
+{
+	u64 kh = outarg->kh;
+	struct rb_node **link;
+
+	spin_lock(&fc->lock);
+
+	link = fuse_find_polled_node(fc, kh, NULL);
+	if (*link) {
+		struct fuse_file *ff;
+
+		ff = rb_entry(*link, struct fuse_file, polled_node);
+		wake_up_interruptible_sync(&ff->poll_wait);
+	}
+
+	spin_unlock(&fc->lock);
+	return 0;
+}
+
 static const struct file_operations fuse_file_operations = {
 	.llseek		= fuse_file_llseek,
 	.read		= do_sync_read,
@@ -1765,6 +1895,7 @@ static const struct file_operations fuse_file_operations = {
 	.splice_read	= generic_file_splice_read,
 	.unlocked_ioctl	= fuse_file_ioctl,
 	.compat_ioctl	= fuse_file_compat_ioctl,
+	.poll		= fuse_file_poll,
 };
 
 static const struct file_operations fuse_direct_io_file_operations = {
@@ -1779,6 +1910,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
 	.flock		= fuse_file_flock,
 	.unlocked_ioctl	= fuse_file_ioctl,
 	.compat_ioctl	= fuse_file_compat_ioctl,
+	.poll		= fuse_file_poll,
 	/* no mmap and splice_read */
 };
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 86f013303828..986fbd4c1ff5 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -19,6 +19,8 @@
 #include <linux/backing-dev.h>
 #include <linux/mutex.h>
 #include <linux/rwsem.h>
+#include <linux/rbtree.h>
+#include <linux/poll.h>
 
 /** Max number of pages that can be used in a single read request */
 #define FUSE_MAX_PAGES_PER_REQ 32
@@ -111,6 +113,12 @@ struct fuse_file {
 
 	/** Entry on inode's write_files list */
 	struct list_head write_entry;
+
+	/** RB node to be linked on fuse_conn->polled_files */
+	struct rb_node polled_node;
+
+	/** Wait queue head for poll */
+	wait_queue_head_t poll_wait;
 };
 
 /** One input argument of a request */
@@ -328,6 +336,9 @@ struct fuse_conn {
 	/** The next unique kernel file handle */
 	u64 khctr;
 
+	/** rbtree of fuse_files waiting for poll events indexed by ph */
+	struct rb_root polled_files;
+
 	/** Number of requests currently in the background */
 	unsigned num_background;
 
@@ -416,6 +427,9 @@ struct fuse_conn {
 	/** Is bmap not implemented by fs? */
 	unsigned no_bmap:1;
 
+	/** Is poll not implemented by fs? */
+	unsigned no_poll:1;
+
 	/** Do multi-page cached writes */
 	unsigned big_writes:1;
 
@@ -524,6 +538,12 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir);
 int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
 		      int isdir);
 
+/**
+ * Notify poll wakeup
+ */
+int fuse_notify_poll_wakeup(struct fuse_conn *fc,
+			    struct fuse_notify_poll_wakeup_out *outarg);
+
 /**
  * Initialize file operations on a regular file
  */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 0e15bc221d23..ba7256128084 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -486,6 +486,7 @@ static struct fuse_conn *new_conn(struct super_block *sb)
 		/* fuse does it's own writeback accounting */
 		fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
 		fc->khctr = 0;
+		fc->polled_files = RB_ROOT;
 		fc->dev = sb->s_dev;
 		err = bdi_init(&fc->bdi);
 		if (err)
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index abde9949e2c0..5650cf033e73 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -163,6 +163,13 @@ struct fuse_file_lock {
 
 #define FUSE_IOCTL_MAX_IOV	256
 
+/**
+ * Poll flags
+ *
+ * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify
+ */
+#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0)
+
 enum fuse_opcode {
 	FUSE_LOOKUP	   = 1,
 	FUSE_FORGET	   = 2,  /* no reply */
@@ -201,9 +208,11 @@ enum fuse_opcode {
 	FUSE_BMAP          = 37,
 	FUSE_DESTROY       = 38,
 	FUSE_IOCTL         = 39,
+	FUSE_POLL          = 40,
 };
 
 enum fuse_notify_code {
+	FUSE_NOTIFY_POLL   = 1,
 	FUSE_NOTIFY_CODE_MAX,
 };
 
@@ -421,6 +430,22 @@ struct fuse_ioctl_out {
 	__u32	out_iovs;
 };
 
+struct fuse_poll_in {
+	__u64	fh;
+	__u64	kh;
+	__u32	flags;
+	__u32   padding;
+};
+
+struct fuse_poll_out {
+	__u32	revents;
+	__u32	padding;
+};
+
+struct fuse_notify_poll_wakeup_out {
+	__u64	kh;
+};
+
 struct fuse_in_header {
 	__u32	len;
 	__u32	opcode;
-- 
cgit v1.2.3


From dcc7461eef7341e84e2f7274f904ce01a43b2506 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Wed, 26 Nov 2008 13:36:59 +0000
Subject: wusb: add debug files for ASL, PZL and DI to the whci-hcd driver

Add asl, pzl and di debugfs files to uwb/uwbN/wusbhc for WHCI host
controller.  These dump the current ASL, PZL and DI buffer.

Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 drivers/usb/host/whci/Kbuild  |   1 +
 drivers/usb/host/whci/asl.c   |  25 ------
 drivers/usb/host/whci/debug.c | 189 ++++++++++++++++++++++++++++++++++++++++++
 drivers/usb/host/whci/hcd.c   |   3 +
 drivers/usb/host/whci/pzl.c   |  28 -------
 drivers/usb/host/whci/qset.c  |  40 ---------
 drivers/usb/host/whci/whcd.h  |   9 +-
 drivers/usb/host/whci/wusb.c  |  27 ------
 drivers/uwb/pal.c             |   5 ++
 drivers/uwb/uwb-debug.c       |  13 +++
 drivers/uwb/uwb-internal.h    |   3 +-
 include/linux/uwb.h           |   3 +
 12 files changed, 223 insertions(+), 123 deletions(-)
 create mode 100644 drivers/usb/host/whci/debug.c

(limited to 'include/linux')

diff --git a/drivers/usb/host/whci/Kbuild b/drivers/usb/host/whci/Kbuild
index 26a3871ea0f9..11e5040b8337 100644
--- a/drivers/usb/host/whci/Kbuild
+++ b/drivers/usb/host/whci/Kbuild
@@ -2,6 +2,7 @@ obj-$(CONFIG_USB_WHCI_HCD) += whci-hcd.o
 
 whci-hcd-y := \
 	asl.o	\
+	debug.o \
 	hcd.o 	\
 	hw.o	\
 	init.o	\
diff --git a/drivers/usb/host/whci/asl.c b/drivers/usb/host/whci/asl.c
index ba99a7a3f81a..577c0d29849d 100644
--- a/drivers/usb/host/whci/asl.c
+++ b/drivers/usb/host/whci/asl.c
@@ -19,32 +19,11 @@
 #include <linux/dma-mapping.h>
 #include <linux/uwb/umc.h>
 #include <linux/usb.h>
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 #include "../../wusbcore/wusbhc.h"
 
 #include "whcd.h"
 
-#if D_LOCAL >= 4
-static void dump_asl(struct whc *whc, const char *tag)
-{
-	struct device *dev = &whc->umc->dev;
-	struct whc_qset *qset;
-
-	d_printf(4, dev, "ASL %s\n", tag);
-
-	list_for_each_entry(qset, &whc->async_list, list_node) {
-		dump_qset(qset, dev);
-	}
-}
-#else
-static inline void dump_asl(struct whc *whc, const char *tag)
-{
-}
-#endif
-
-
 static void qset_get_next_prev(struct whc *whc, struct whc_qset *qset,
 			       struct whc_qset **next, struct whc_qset **prev)
 {
@@ -217,8 +196,6 @@ void scan_async_work(struct work_struct *work)
 
 	spin_lock_irq(&whc->lock);
 
-	dump_asl(whc, "before processing");
-
 	/*
 	 * Transerve the software list backwards so new qsets can be
 	 * safely inserted into the ASL without making it non-circular.
@@ -232,8 +209,6 @@ void scan_async_work(struct work_struct *work)
 		update |= process_qset(whc, qset);
 	}
 
-	dump_asl(whc, "after processing");
-
 	spin_unlock_irq(&whc->lock);
 
 	if (update) {
diff --git a/drivers/usb/host/whci/debug.c b/drivers/usb/host/whci/debug.c
new file mode 100644
index 000000000000..cf2d45946c57
--- /dev/null
+++ b/drivers/usb/host/whci/debug.c
@@ -0,0 +1,189 @@
+/*
+ * Wireless Host Controller (WHC) debug.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "../../wusbcore/wusbhc.h"
+
+#include "whcd.h"
+
+struct whc_dbg {
+	struct dentry *di_f;
+	struct dentry *asl_f;
+	struct dentry *pzl_f;
+};
+
+void qset_print(struct seq_file *s, struct whc_qset *qset)
+{
+	struct whc_std *std;
+	struct urb *urb = NULL;
+	int i;
+
+	seq_printf(s, "qset %08x\n", (u32)qset->qset_dma);
+	seq_printf(s, "  -> %08x\n", (u32)qset->qh.link);
+	seq_printf(s, "  info: %08x %08x %08x\n",
+		qset->qh.info1, qset->qh.info2,  qset->qh.info3);
+	seq_printf(s, "  sts: %04x errs: %d\n", qset->qh.status, qset->qh.err_count);
+	seq_printf(s, "  TD: sts: %08x opts: %08x\n",
+		qset->qh.overlay.qtd.status, qset->qh.overlay.qtd.options);
+
+	for (i = 0; i < WHCI_QSET_TD_MAX; i++) {
+		seq_printf(s, "  %c%c TD[%d]: sts: %08x opts: %08x ptr: %08x\n",
+			i == qset->td_start ? 'S' : ' ',
+			i == qset->td_end ? 'E' : ' ',
+			i, qset->qtd[i].status, qset->qtd[i].options,
+			(u32)qset->qtd[i].page_list_ptr);
+	}
+	seq_printf(s, "  ntds: %d\n", qset->ntds);
+	list_for_each_entry(std, &qset->stds, list_node) {
+		if (urb != std->urb) {
+			urb = std->urb;
+			seq_printf(s, "  urb %p transferred: %d bytes\n", urb,
+				urb->actual_length);
+		}
+		if (std->qtd)
+			seq_printf(s, "    sTD[%td]: %zu bytes @ %08x\n",
+				std->qtd - &qset->qtd[0],
+				std->len, std->num_pointers ?
+				(u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr);
+		else
+			seq_printf(s, "    sTD[-]: %zd bytes @ %08x\n",
+				std->len, std->num_pointers ?
+				(u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr);
+	}
+}
+
+static int di_print(struct seq_file *s, void *p)
+{
+	struct whc *whc = s->private;
+	char buf[72];
+	int d;
+
+	for (d = 0; d < whc->n_devices; d++) {
+		struct di_buf_entry *di = &whc->di_buf[d];
+
+		bitmap_scnprintf(buf, sizeof(buf),
+				 (unsigned long *)di->availability_info, UWB_NUM_MAS);
+
+		seq_printf(s, "DI[%d]\n", d);
+		seq_printf(s, "  availability: %s\n", buf);
+		seq_printf(s, "  %c%c key idx: %d dev addr: %d\n",
+			   (di->addr_sec_info & WHC_DI_SECURE) ? 'S' : ' ',
+			   (di->addr_sec_info & WHC_DI_DISABLE) ? 'D' : ' ',
+			   (di->addr_sec_info & WHC_DI_KEY_IDX_MASK) >> 8,
+			   (di->addr_sec_info & WHC_DI_DEV_ADDR_MASK));
+	}
+	return 0;
+}
+
+static int asl_print(struct seq_file *s, void *p)
+{
+	struct whc *whc = s->private;
+	struct whc_qset *qset;
+
+	list_for_each_entry(qset, &whc->async_list, list_node) {
+		qset_print(s, qset);
+	}
+
+	return 0;
+}
+
+static int pzl_print(struct seq_file *s, void *p)
+{
+	struct whc *whc = s->private;
+	struct whc_qset *qset;
+	int period;
+
+	for (period = 0; period < 5; period++) {
+		seq_printf(s, "Period %d\n", period);
+		list_for_each_entry(qset, &whc->periodic_list[period], list_node) {
+			qset_print(s, qset);
+		}
+	}
+	return 0;
+}
+
+static int di_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, di_print, inode->i_private);
+}
+
+static int asl_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, asl_print, inode->i_private);
+}
+
+static int pzl_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pzl_print, inode->i_private);
+}
+
+static struct file_operations di_fops = {
+	.open    = di_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release,
+	.owner   = THIS_MODULE,
+};
+
+static struct file_operations asl_fops = {
+	.open    = asl_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release,
+	.owner   = THIS_MODULE,
+};
+
+static struct file_operations pzl_fops = {
+	.open    = pzl_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release,
+	.owner   = THIS_MODULE,
+};
+
+void whc_dbg_init(struct whc *whc)
+{
+	if (whc->wusbhc.pal.debugfs_dir == NULL)
+		return;
+
+	whc->dbg = kzalloc(sizeof(struct whc_dbg), GFP_KERNEL);
+	if (whc->dbg == NULL)
+		return;
+
+	whc->dbg->di_f = debugfs_create_file("di", 0444,
+					      whc->wusbhc.pal.debugfs_dir, whc,
+					      &di_fops);
+	whc->dbg->asl_f = debugfs_create_file("asl", 0444,
+					      whc->wusbhc.pal.debugfs_dir, whc,
+					      &asl_fops);
+	whc->dbg->pzl_f = debugfs_create_file("pzl", 0444,
+					      whc->wusbhc.pal.debugfs_dir, whc,
+					      &pzl_fops);
+}
+
+void whc_dbg_clean_up(struct whc *whc)
+{
+	if (whc->dbg) {
+		debugfs_remove(whc->dbg->pzl_f);
+		debugfs_remove(whc->dbg->asl_f);
+		debugfs_remove(whc->dbg->di_f);
+		kfree(whc->dbg);
+	}
+}
diff --git a/drivers/usb/host/whci/hcd.c b/drivers/usb/host/whci/hcd.c
index f599f89d3be1..1569afd6245b 100644
--- a/drivers/usb/host/whci/hcd.c
+++ b/drivers/usb/host/whci/hcd.c
@@ -273,6 +273,8 @@ static int whc_probe(struct umc_dev *umc)
 		goto error_wusbhc_b_create;
 	}
 
+	whc_dbg_init(whc);
+
 	return 0;
 
 error_wusbhc_b_create:
@@ -296,6 +298,7 @@ static void whc_remove(struct umc_dev *umc)
 	struct whc *whc = wusbhc_to_whc(wusbhc);
 
 	if (usb_hcd) {
+		whc_dbg_clean_up(whc);
 		wusbhc_b_destroy(wusbhc);
 		usb_remove_hcd(usb_hcd);
 		wusbhc_destroy(wusbhc);
diff --git a/drivers/usb/host/whci/pzl.c b/drivers/usb/host/whci/pzl.c
index 34d3a0aeab2b..2ae5abf69a6a 100644
--- a/drivers/usb/host/whci/pzl.c
+++ b/drivers/usb/host/whci/pzl.c
@@ -19,35 +19,11 @@
 #include <linux/dma-mapping.h>
 #include <linux/uwb/umc.h>
 #include <linux/usb.h>
-#define D_LOCAL 0
-#include <linux/uwb/debug.h>
 
 #include "../../wusbcore/wusbhc.h"
 
 #include "whcd.h"
 
-#if D_LOCAL >= 4
-static void dump_pzl(struct whc *whc, const char *tag)
-{
-	struct device *dev = &whc->umc->dev;
-	struct whc_qset *qset;
-	int period = 0;
-
-	d_printf(4, dev, "PZL %s\n", tag);
-
-	for (period = 0; period < 5; period++) {
-		d_printf(4, dev, "Period %d\n", period);
-		list_for_each_entry(qset, &whc->periodic_list[period], list_node) {
-			dump_qset(qset, dev);
-		}
-	}
-}
-#else
-static inline void dump_pzl(struct whc *whc, const char *tag)
-{
-}
-#endif
-
 static void update_pzl_pointers(struct whc *whc, int period, u64 addr)
 {
 	switch (period) {
@@ -250,8 +226,6 @@ void scan_periodic_work(struct work_struct *work)
 
 	spin_lock_irq(&whc->lock);
 
-	dump_pzl(whc, "before processing");
-
 	for (period = 4; period >= 0; period--) {
 		list_for_each_entry_safe(qset, t, &whc->periodic_list[period], list_node) {
 			if (!qset->in_hw_list)
@@ -263,8 +237,6 @@ void scan_periodic_work(struct work_struct *work)
 	if (update & (WHC_UPDATE_ADDED | WHC_UPDATE_REMOVED))
 		update_pzl_hw_view(whc);
 
-	dump_pzl(whc, "after processing");
-
 	spin_unlock_irq(&whc->lock);
 
 	if (update) {
diff --git a/drivers/usb/host/whci/qset.c b/drivers/usb/host/whci/qset.c
index 0420037d2e18..7be74314ee12 100644
--- a/drivers/usb/host/whci/qset.c
+++ b/drivers/usb/host/whci/qset.c
@@ -24,46 +24,6 @@
 
 #include "whcd.h"
 
-void dump_qset(struct whc_qset *qset, struct device *dev)
-{
-	struct whc_std *std;
-	struct urb *urb = NULL;
-	int i;
-
-	dev_dbg(dev, "qset %08x\n", (u32)qset->qset_dma);
-	dev_dbg(dev, "  -> %08x\n", (u32)qset->qh.link);
-	dev_dbg(dev, "  info: %08x %08x %08x\n",
-		qset->qh.info1, qset->qh.info2,  qset->qh.info3);
-	dev_dbg(dev, "  sts: %04x errs: %d\n", qset->qh.status, qset->qh.err_count);
-	dev_dbg(dev, "  TD: sts: %08x opts: %08x\n",
-		qset->qh.overlay.qtd.status, qset->qh.overlay.qtd.options);
-
-	for (i = 0; i < WHCI_QSET_TD_MAX; i++) {
-		dev_dbg(dev, "  %c%c TD[%d]: sts: %08x opts: %08x ptr: %08x\n",
-			i == qset->td_start ? 'S' : ' ',
-			i == qset->td_end ? 'E' : ' ',
-			i, qset->qtd[i].status, qset->qtd[i].options,
-			(u32)qset->qtd[i].page_list_ptr);
-	}
-	dev_dbg(dev, "  ntds: %d\n", qset->ntds);
-	list_for_each_entry(std, &qset->stds, list_node) {
-		if (urb != std->urb) {
-			urb = std->urb;
-			dev_dbg(dev, "  urb %p transferred: %d bytes\n", urb,
-				urb->actual_length);
-		}
-		if (std->qtd)
-			dev_dbg(dev, "    sTD[%td]: %zu bytes @ %08x\n",
-				std->qtd - &qset->qtd[0],
-				std->len, std->num_pointers ?
-				(u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr);
-		else
-			dev_dbg(dev, "    sTD[-]: %zd bytes @ %08x\n",
-				std->len, std->num_pointers ?
-				(u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr);
-	}
-}
-
 struct whc_qset *qset_alloc(struct whc *whc, gfp_t mem_flags)
 {
 	struct whc_qset *qset;
diff --git a/drivers/usb/host/whci/whcd.h b/drivers/usb/host/whci/whcd.h
index 1bbb8cb6bf80..0f3540f04f53 100644
--- a/drivers/usb/host/whci/whcd.h
+++ b/drivers/usb/host/whci/whcd.h
@@ -21,6 +21,7 @@
 #define __WHCD_H
 
 #include <linux/uwb/whci.h>
+#include <linux/uwb/umc.h>
 #include <linux/workqueue.h>
 
 #include "whci-hc.h"
@@ -28,6 +29,7 @@
 /* Generic command timeout. */
 #define WHC_GENCMD_TIMEOUT_MS 100
 
+struct whc_dbg;
 
 struct whc {
 	struct wusbhc wusbhc;
@@ -69,6 +71,8 @@ struct whc {
 	struct list_head periodic_removed_list;
 	wait_queue_head_t periodic_list_wq;
 	struct work_struct periodic_work;
+
+	struct whc_dbg *dbg;
 };
 
 #define wusbhc_to_whc(w) (container_of((w), struct whc, wusbhc))
@@ -190,8 +194,11 @@ void process_inactive_qtd(struct whc *whc, struct whc_qset *qset,
 				 struct whc_qtd *qtd);
 enum whc_update qset_add_qtds(struct whc *whc, struct whc_qset *qset);
 void qset_remove_complete(struct whc *whc, struct whc_qset *qset);
-void dump_qset(struct whc_qset *qset, struct device *dev);
 void pzl_update(struct whc *whc, uint32_t wusbcmd);
 void asl_update(struct whc *whc, uint32_t wusbcmd);
 
+/* debug.c */
+void whc_dbg_init(struct whc *whc);
+void whc_dbg_clean_up(struct whc *whc);
+
 #endif /* #ifndef __WHCD_H */
diff --git a/drivers/usb/host/whci/wusb.c b/drivers/usb/host/whci/wusb.c
index 540021a0971e..f24efdebad17 100644
--- a/drivers/usb/host/whci/wusb.c
+++ b/drivers/usb/host/whci/wusb.c
@@ -18,43 +18,16 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/uwb/umc.h>
-#define D_LOCAL 1
-#include <linux/uwb/debug.h>
 
 #include "../../wusbcore/wusbhc.h"
 
 #include "whcd.h"
 
-#if D_LOCAL >= 1
-static void dump_di(struct whc *whc, int idx)
-{
-	struct di_buf_entry *di = &whc->di_buf[idx];
-	struct device *dev = &whc->umc->dev;
-	char buf[128];
-
-	bitmap_scnprintf(buf, sizeof(buf), (unsigned long *)di->availability_info, UWB_NUM_MAS);
-
-	d_printf(1, dev, "DI[%d]\n", idx);
-	d_printf(1, dev, "  availability: %s\n", buf);
-	d_printf(1, dev, "  %c%c key idx: %d dev addr: %d\n",
-		 (di->addr_sec_info & WHC_DI_SECURE) ? 'S' : ' ',
-		 (di->addr_sec_info & WHC_DI_DISABLE) ? 'D' : ' ',
-		 (di->addr_sec_info & WHC_DI_KEY_IDX_MASK) >> 8,
-		 (di->addr_sec_info & WHC_DI_DEV_ADDR_MASK));
-}
-#else
-static inline void dump_di(struct whc *whc, int idx)
-{
-}
-#endif
-
 static int whc_update_di(struct whc *whc, int idx)
 {
 	int offset = idx / 32;
 	u32 bit = 1 << (idx % 32);
 
-	dump_di(whc, idx);
-
 	le_writel(bit, whc->base + WUSBDIBUPDATED + offset);
 
 	return whci_wait_for(&whc->umc->dev,
diff --git a/drivers/uwb/pal.c b/drivers/uwb/pal.c
index 605765124f5b..99a19c199095 100644
--- a/drivers/uwb/pal.c
+++ b/drivers/uwb/pal.c
@@ -16,6 +16,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <linux/kernel.h>
+#include <linux/debugfs.h>
 #include <linux/uwb.h>
 
 #include "uwb-internal.h"
@@ -54,6 +55,8 @@ int uwb_pal_register(struct uwb_pal *pal)
 		}
 	}
 
+	pal->debugfs_dir = uwb_dbg_create_pal_dir(pal);
+
 	mutex_lock(&rc->uwb_dev.mutex);
 	list_add(&pal->node, &rc->pals);
 	mutex_unlock(&rc->uwb_dev.mutex);
@@ -76,6 +79,8 @@ void uwb_pal_unregister(struct uwb_pal *pal)
 	list_del(&pal->node);
 	mutex_unlock(&rc->uwb_dev.mutex);
 
+	debugfs_remove(pal->debugfs_dir);
+
 	if (pal->device) {
 		sysfs_remove_link(&rc->uwb_dev.dev.kobj, pal->name);
 		sysfs_remove_link(&pal->device->kobj, "uwb_rc");
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index ec1b7a403ff3..a6debb9baf38 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -407,3 +407,16 @@ void uwb_dbg_exit(void)
 {
 	debugfs_remove(root_dir);
 }
+
+/**
+ * uwb_dbg_create_pal_dir - create a debugfs directory for a PAL
+ * @pal: The PAL.
+ */
+struct dentry *uwb_dbg_create_pal_dir(struct uwb_pal *pal)
+{
+	struct uwb_rc *rc = pal->rc;
+
+	if (root_dir && rc->dbg && rc->dbg->root_d && pal->name)
+		return debugfs_create_dir(pal->name, rc->dbg->root_d);
+	return NULL;
+}
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
index 9c0cdb4ded0c..f0f21f406bf0 100644
--- a/drivers/uwb/uwb-internal.h
+++ b/drivers/uwb/uwb-internal.h
@@ -284,8 +284,7 @@ void uwb_dbg_init(void);
 void uwb_dbg_exit(void);
 void uwb_dbg_add_rc(struct uwb_rc *rc);
 void uwb_dbg_del_rc(struct uwb_rc *rc);
-
-/* Workarounds for version specific stuff */
+struct dentry *uwb_dbg_create_pal_dir(struct uwb_pal *pal);
 
 static inline void uwb_dev_lock(struct uwb_dev *uwb_dev)
 {
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index 1719709d60ca..d7ed5201ade6 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -394,6 +394,8 @@ struct uwb_rc {
  * @channel: channel being used by the PAL; 0 if the PAL isn't using
  *           the radio; -1 if the PAL wishes to use the radio but
  *           cannot.
+ * @debugfs_dir: a debugfs directory which the PAL can use for its own
+ *           debugfs files.
  *
  * A Protocol Adaptation Layer (PAL) is a user of the WiMedia UWB
  * radio platform (e.g., WUSB, WLP or Bluetooth UWB AMP).
@@ -418,6 +420,7 @@ struct uwb_pal {
 	void (*new_rsv)(struct uwb_pal *pal, struct uwb_rsv *rsv);
 
 	int channel;
+	struct dentry *debugfs_dir;
 };
 
 void uwb_pal_init(struct uwb_pal *pal);
-- 
cgit v1.2.3


From ce71e27c6fdc43c29f36d307b9100bde70c947fc Mon Sep 17 00:00:00 2001
From: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Date: Tue, 19 Aug 2008 20:43:25 +0300
Subject: SLUB: Replace __builtin_return_address(0) with _RET_IP_.

This patch replaces __builtin_return_address(0) with _RET_IP_, since a
previous patch moved _RET_IP_ and _THIS_IP_ to include/linux/kernel.h and
they're widely available now. This makes for shorter and easier to read
code.

[penberg@cs.helsinki.fi: remove _RET_IP_ casts to void pointer]
Signed-off-by: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slab.h |  8 ++++----
 mm/slab.c            |  8 ++++----
 mm/slub.c            | 48 ++++++++++++++++++++++++------------------------
 3 files changed, 32 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 000da12b5cf0..c97ed28559ec 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -253,9 +253,9 @@ static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep,
  * request comes from.
  */
 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
-extern void *__kmalloc_track_caller(size_t, gfp_t, void*);
+extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long);
 #define kmalloc_track_caller(size, flags) \
-	__kmalloc_track_caller(size, flags, __builtin_return_address(0))
+	__kmalloc_track_caller(size, flags, _RET_IP_)
 #else
 #define kmalloc_track_caller(size, flags) \
 	__kmalloc(size, flags)
@@ -271,10 +271,10 @@ extern void *__kmalloc_track_caller(size_t, gfp_t, void*);
  * allocation request comes from.
  */
 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
-extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *);
+extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long);
 #define kmalloc_node_track_caller(size, flags, node) \
 	__kmalloc_node_track_caller(size, flags, node, \
-			__builtin_return_address(0))
+			_RET_IP_)
 #else
 #define kmalloc_node_track_caller(size, flags, node) \
 	__kmalloc_node(size, flags, node)
diff --git a/mm/slab.c b/mm/slab.c
index 09187517f9dc..a14787799014 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3686,9 +3686,9 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 EXPORT_SYMBOL(__kmalloc_node);
 
 void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
-		int node, void *caller)
+		int node, unsigned long caller)
 {
-	return __do_kmalloc_node(size, flags, node, caller);
+	return __do_kmalloc_node(size, flags, node, (void *)caller);
 }
 EXPORT_SYMBOL(__kmalloc_node_track_caller);
 #else
@@ -3730,9 +3730,9 @@ void *__kmalloc(size_t size, gfp_t flags)
 }
 EXPORT_SYMBOL(__kmalloc);
 
-void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller)
+void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
 {
-	return __do_kmalloc(size, flags, caller);
+	return __do_kmalloc(size, flags, (void *)caller);
 }
 EXPORT_SYMBOL(__kmalloc_track_caller);
 
diff --git a/mm/slub.c b/mm/slub.c
index 7ec2888bffc1..68ab260583d0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -182,7 +182,7 @@ static LIST_HEAD(slab_caches);
  * Tracking user of a slab.
  */
 struct track {
-	void *addr;		/* Called from address */
+	unsigned long addr;	/* Called from address */
 	int cpu;		/* Was running on cpu */
 	int pid;		/* Pid context */
 	unsigned long when;	/* When did the operation occur */
@@ -371,7 +371,7 @@ static struct track *get_track(struct kmem_cache *s, void *object,
 }
 
 static void set_track(struct kmem_cache *s, void *object,
-				enum track_item alloc, void *addr)
+			enum track_item alloc, unsigned long addr)
 {
 	struct track *p;
 
@@ -395,8 +395,8 @@ static void init_tracking(struct kmem_cache *s, void *object)
 	if (!(s->flags & SLAB_STORE_USER))
 		return;
 
-	set_track(s, object, TRACK_FREE, NULL);
-	set_track(s, object, TRACK_ALLOC, NULL);
+	set_track(s, object, TRACK_FREE, 0UL);
+	set_track(s, object, TRACK_ALLOC, 0UL);
 }
 
 static void print_track(const char *s, struct track *t)
@@ -405,7 +405,7 @@ static void print_track(const char *s, struct track *t)
 		return;
 
 	printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
-		s, t->addr, jiffies - t->when, t->cpu, t->pid);
+		s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
 }
 
 static void print_tracking(struct kmem_cache *s, void *object)
@@ -870,7 +870,7 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page,
 }
 
 static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
-						void *object, void *addr)
+					void *object, unsigned long addr)
 {
 	if (!check_slab(s, page))
 		goto bad;
@@ -910,7 +910,7 @@ bad:
 }
 
 static int free_debug_processing(struct kmem_cache *s, struct page *page,
-						void *object, void *addr)
+					void *object, unsigned long addr)
 {
 	if (!check_slab(s, page))
 		goto fail;
@@ -1033,10 +1033,10 @@ static inline void setup_object_debug(struct kmem_cache *s,
 			struct page *page, void *object) {}
 
 static inline int alloc_debug_processing(struct kmem_cache *s,
-	struct page *page, void *object, void *addr) { return 0; }
+	struct page *page, void *object, unsigned long addr) { return 0; }
 
 static inline int free_debug_processing(struct kmem_cache *s,
-	struct page *page, void *object, void *addr) { return 0; }
+	struct page *page, void *object, unsigned long addr) { return 0; }
 
 static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
 			{ return 1; }
@@ -1503,8 +1503,8 @@ static inline int node_match(struct kmem_cache_cpu *c, int node)
  * we need to allocate a new slab. This is the slowest path since it involves
  * a call to the page allocator and the setup of a new slab.
  */
-static void *__slab_alloc(struct kmem_cache *s,
-		gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
+static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+			  unsigned long addr, struct kmem_cache_cpu *c)
 {
 	void **object;
 	struct page *new;
@@ -1588,7 +1588,7 @@ debug:
  * Otherwise we can simply pick the next object from the lockless free list.
  */
 static __always_inline void *slab_alloc(struct kmem_cache *s,
-		gfp_t gfpflags, int node, void *addr)
+		gfp_t gfpflags, int node, unsigned long addr)
 {
 	void **object;
 	struct kmem_cache_cpu *c;
@@ -1617,14 +1617,14 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 
 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
 {
-	return slab_alloc(s, gfpflags, -1, __builtin_return_address(0));
+	return slab_alloc(s, gfpflags, -1, _RET_IP_);
 }
 EXPORT_SYMBOL(kmem_cache_alloc);
 
 #ifdef CONFIG_NUMA
 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 {
-	return slab_alloc(s, gfpflags, node, __builtin_return_address(0));
+	return slab_alloc(s, gfpflags, node, _RET_IP_);
 }
 EXPORT_SYMBOL(kmem_cache_alloc_node);
 #endif
@@ -1638,7 +1638,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node);
  * handling required then we can return immediately.
  */
 static void __slab_free(struct kmem_cache *s, struct page *page,
-				void *x, void *addr, unsigned int offset)
+			void *x, unsigned long addr, unsigned int offset)
 {
 	void *prior;
 	void **object = (void *)x;
@@ -1708,7 +1708,7 @@ debug:
  * with all sorts of special processing.
  */
 static __always_inline void slab_free(struct kmem_cache *s,
-			struct page *page, void *x, void *addr)
+			struct page *page, void *x, unsigned long addr)
 {
 	void **object = (void *)x;
 	struct kmem_cache_cpu *c;
@@ -1735,7 +1735,7 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
 
 	page = virt_to_head_page(x);
 
-	slab_free(s, page, x, __builtin_return_address(0));
+	slab_free(s, page, x, _RET_IP_);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
@@ -2663,7 +2663,7 @@ void *__kmalloc(size_t size, gfp_t flags)
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
-	return slab_alloc(s, flags, -1, __builtin_return_address(0));
+	return slab_alloc(s, flags, -1, _RET_IP_);
 }
 EXPORT_SYMBOL(__kmalloc);
 
@@ -2691,7 +2691,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
-	return slab_alloc(s, flags, node, __builtin_return_address(0));
+	return slab_alloc(s, flags, node, _RET_IP_);
 }
 EXPORT_SYMBOL(__kmalloc_node);
 #endif
@@ -2748,7 +2748,7 @@ void kfree(const void *x)
 		put_page(page);
 		return;
 	}
-	slab_free(page->slab, page, object, __builtin_return_address(0));
+	slab_free(page->slab, page, object, _RET_IP_);
 }
 EXPORT_SYMBOL(kfree);
 
@@ -3204,7 +3204,7 @@ static struct notifier_block __cpuinitdata slab_notifier = {
 
 #endif
 
-void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
+void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
 {
 	struct kmem_cache *s;
 
@@ -3220,7 +3220,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
 }
 
 void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
-					int node, void *caller)
+					int node, unsigned long caller)
 {
 	struct kmem_cache *s;
 
@@ -3431,7 +3431,7 @@ static void resiliency_test(void) {};
 
 struct location {
 	unsigned long count;
-	void *addr;
+	unsigned long addr;
 	long long sum_time;
 	long min_time;
 	long max_time;
@@ -3479,7 +3479,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
 {
 	long start, end, pos;
 	struct location *l;
-	void *caddr;
+	unsigned long caddr;
 	unsigned long age = jiffies - track->when;
 
 	start = -1;
-- 
cgit v1.2.3


From 8b752e3ef6e3f5cde87afc649dd51d92b1e549c1 Mon Sep 17 00:00:00 2001
From: Liming Wang <liming.wang@windriver.com>
Date: Fri, 28 Nov 2008 09:52:40 +0800
Subject: softirq: remove useless function __local_bh_enable

Impact: remove unused code

__local_bh_enable has been replaced with _local_bh_enable.
As comments says "it always nests inside local_bh_enable() sections"
has not been valid now. Also there is no reason to use __local_bh_enable
anywhere, so we can remove this useless function.

Signed-off-by: Liming Wang <liming.wang@windriver.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/bottom_half.h |  1 -
 kernel/softirq.c            | 14 --------------
 2 files changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h
index 777dbf695d44..27b1bcffe408 100644
--- a/include/linux/bottom_half.h
+++ b/include/linux/bottom_half.h
@@ -2,7 +2,6 @@
 #define _LINUX_BH_H
 
 extern void local_bh_disable(void);
-extern void __local_bh_enable(void);
 extern void _local_bh_enable(void);
 extern void local_bh_enable(void);
 extern void local_bh_enable_ip(unsigned long ip);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index e7c69a720d69..8d9934b4162a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -102,20 +102,6 @@ void local_bh_disable(void)
 
 EXPORT_SYMBOL(local_bh_disable);
 
-void __local_bh_enable(void)
-{
-	WARN_ON_ONCE(in_irq());
-
-	/*
-	 * softirqs should never be enabled by __local_bh_enable(),
-	 * it always nests inside local_bh_enable() sections:
-	 */
-	WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
-
-	sub_preempt_count(SOFTIRQ_OFFSET);
-}
-EXPORT_SYMBOL_GPL(__local_bh_enable);
-
 /*
  * Special-case - softirqs can safely be enabled in
  * cond_resched_softirq(), or by __do_softirq(),
-- 
cgit v1.2.3


From 1f55ed06cf0c361b293b32e5947d35d173eff2aa Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 1 Dec 2008 19:14:02 +0100
Subject: fuse: update interface version

Change interface version to 7.11 after adding the IOCTL and POLL
messages.

Also clean up the <linux/fuse.h> header a bit:
  - update copyright date to 2008
  - fix checkpatch warning:
      WARNING: Use #include <linux/types.h> instead of <asm/types.h>
  - remove FUSE_MAJOR define, which is not being used any more

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 include/linux/fuse.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 5650cf033e73..162e5defe683 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -1,6 +1,6 @@
 /*
     FUSE: Filesystem in Userspace
-    Copyright (C) 2001-2006  Miklos Szeredi <miklos@szeredi.hu>
+    Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
 
     This program can be distributed under the terms of the GNU GPL.
     See the file COPYING.
@@ -20,26 +20,27 @@
  *
  * 7.10
  *  - add nonseekable open flag
+ *
+ * 7.11
+ *  - add IOCTL message
+ *  - add unsolicited notification support
+ *  - add POLL message and NOTIFY_POLL notification
  */
 
 #ifndef _LINUX_FUSE_H
 #define _LINUX_FUSE_H
 
-#include <asm/types.h>
-#include <linux/major.h>
+#include <linux/types.h>
 
 /** Version number of this interface */
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 10
+#define FUSE_KERNEL_MINOR_VERSION 11
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
 
-/** The major number of the fuse character device */
-#define FUSE_MAJOR MISC_MAJOR
-
 /* Make sure all structures are padded to 64bit boundary, so 32bit
    userspace works under 64bit kernels */
 
-- 
cgit v1.2.3


From 00ef9f7348dfd2fc223ec42aceb30836e86b367f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 4 Dec 2008 09:00:17 +0100
Subject: lockdep: change a held lock's class

Impact: introduce new lockdep API

Allow to change a held lock's class. Basically the same as the existing
code to change a subclass therefore reuse all that.

The XFS code will be able to use this to annotate their inode locking.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/lockdep.h | 12 ++++++++++--
 kernel/lockdep.c        | 24 +++++++++---------------
 2 files changed, 19 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 8956daf64abd..37a0361f4685 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -314,8 +314,15 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 extern void lock_release(struct lockdep_map *lock, int nested,
 			 unsigned long ip);
 
-extern void lock_set_subclass(struct lockdep_map *lock, unsigned int subclass,
-			      unsigned long ip);
+extern void lock_set_class(struct lockdep_map *lock, const char *name,
+			   struct lock_class_key *key, unsigned int subclass,
+			   unsigned long ip);
+
+static inline void lock_set_subclass(struct lockdep_map *lock,
+		unsigned int subclass, unsigned long ip)
+{
+	lock_set_class(lock, lock->name, lock->key, subclass, ip);
+}
 
 # define INIT_LOCKDEP				.lockdep_recursion = 0,
 
@@ -333,6 +340,7 @@ static inline void lockdep_on(void)
 
 # define lock_acquire(l, s, t, r, c, n, i)	do { } while (0)
 # define lock_release(l, n, i)			do { } while (0)
+# define lock_set_class(l, n, k, s, i)		do { } while (0)
 # define lock_set_subclass(l, s, i)		do { } while (0)
 # define lockdep_init()				do { } while (0)
 # define lockdep_info()				do { } while (0)
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 90f3fb64dbce..4fa6eeb4e8a7 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -291,14 +291,12 @@ void lockdep_off(void)
 {
 	current->lockdep_recursion++;
 }
-
 EXPORT_SYMBOL(lockdep_off);
 
 void lockdep_on(void)
 {
 	current->lockdep_recursion--;
 }
-
 EXPORT_SYMBOL(lockdep_on);
 
 /*
@@ -2513,7 +2511,6 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
 	if (subclass)
 		register_lock_class(lock, subclass, 1);
 }
-
 EXPORT_SYMBOL_GPL(lockdep_init_map);
 
 /*
@@ -2694,8 +2691,9 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
 }
 
 static int
-__lock_set_subclass(struct lockdep_map *lock,
-		    unsigned int subclass, unsigned long ip)
+__lock_set_class(struct lockdep_map *lock, const char *name,
+		 struct lock_class_key *key, unsigned int subclass,
+		 unsigned long ip)
 {
 	struct task_struct *curr = current;
 	struct held_lock *hlock, *prev_hlock;
@@ -2722,6 +2720,7 @@ __lock_set_subclass(struct lockdep_map *lock,
 	return print_unlock_inbalance_bug(curr, lock, ip);
 
 found_it:
+	lockdep_init_map(lock, name, key, 0);
 	class = register_lock_class(lock, subclass, 0);
 	hlock->class_idx = class - lock_classes + 1;
 
@@ -2906,9 +2905,9 @@ static void check_flags(unsigned long flags)
 #endif
 }
 
-void
-lock_set_subclass(struct lockdep_map *lock,
-		  unsigned int subclass, unsigned long ip)
+void lock_set_class(struct lockdep_map *lock, const char *name,
+		    struct lock_class_key *key, unsigned int subclass,
+		    unsigned long ip)
 {
 	unsigned long flags;
 
@@ -2918,13 +2917,12 @@ lock_set_subclass(struct lockdep_map *lock,
 	raw_local_irq_save(flags);
 	current->lockdep_recursion = 1;
 	check_flags(flags);
-	if (__lock_set_subclass(lock, subclass, ip))
+	if (__lock_set_class(lock, name, key, subclass, ip))
 		check_chain_key(current);
 	current->lockdep_recursion = 0;
 	raw_local_irq_restore(flags);
 }
-
-EXPORT_SYMBOL_GPL(lock_set_subclass);
+EXPORT_SYMBOL_GPL(lock_set_class);
 
 /*
  * We are not always called with irqs disabled - do that here,
@@ -2948,7 +2946,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	current->lockdep_recursion = 0;
 	raw_local_irq_restore(flags);
 }
-
 EXPORT_SYMBOL_GPL(lock_acquire);
 
 void lock_release(struct lockdep_map *lock, int nested,
@@ -2966,7 +2963,6 @@ void lock_release(struct lockdep_map *lock, int nested,
 	current->lockdep_recursion = 0;
 	raw_local_irq_restore(flags);
 }
-
 EXPORT_SYMBOL_GPL(lock_release);
 
 #ifdef CONFIG_LOCK_STAT
@@ -3451,7 +3447,6 @@ retry:
 	if (unlock)
 		read_unlock(&tasklist_lock);
 }
-
 EXPORT_SYMBOL_GPL(debug_show_all_locks);
 
 /*
@@ -3472,7 +3467,6 @@ void debug_show_held_locks(struct task_struct *task)
 {
 		__debug_show_held_locks(task);
 }
-
 EXPORT_SYMBOL_GPL(debug_show_held_locks);
 
 void lockdep_sys_exit(void)
-- 
cgit v1.2.3


From c9bb6003dd096ad190e1594a7d835ae1c39fae8f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 29 Oct 2008 23:46:09 -0700
Subject: of: Fix comment, sparc no longer uses of_device objects on special
 busses.

It only uses of_platform_bus_type.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of_platform.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index a8efcfeea732..3d327b67d7e2 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -26,8 +26,7 @@ extern struct bus_type of_platform_bus_type;
 
 /*
  * An of_platform_driver driver is attached to a basic of_device on
- * the "platform bus" (of_platform_bus_type) (or ISA, EBUS and SBUS
- * busses on sparc).
+ * the "platform bus" (of_platform_bus_type).
  */
 struct of_platform_driver
 {
-- 
cgit v1.2.3


From e088e4c9cdb618675874becb91b2fd581ee707e6 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 25 Nov 2008 13:29:47 -0500
Subject: [CPUFREQ] Disable sysfs ui for p4-clockmod.

p4-clockmod has a long history of abuse.   It pretends to be a CPU
frequency scaling driver, even though it doesn't actually change
the CPU frequency, but instead just modulates the frequency with
wait-states.
The biggest misconception is that when running at the lower 'frequency'
p4-clockmod is saving power.  This isn't the case, as workloads running
slower take longer to complete, preventing the CPU from entering deep C states.

However p4-clockmod does have a purpose.  It can prevent overheating.
Having it hooked up to the cpufreq interfaces is the wrong way to achieve
cooling however. It should instead be hooked up to ACPI.

This diff introduces a means for a cpufreq driver to register with the
cpufreq core, but not present a sysfs interface.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/x86/kernel/cpu/cpufreq/p4-clockmod.c |  1 +
 drivers/cpufreq/cpufreq.c                 | 51 ++++++++++++++++++++-----------
 include/linux/cpufreq.h                   |  1 +
 3 files changed, 35 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index ba3a94a997c3..0c43b2240517 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -276,6 +276,7 @@ static struct cpufreq_driver p4clockmod_driver = {
 	.name		= "p4-clockmod",
 	.owner		= THIS_MODULE,
 	.attr		= p4clockmod_attr,
+	.hide_interface	= 1,
 };
 
 
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 31d6f535a79d..9044b911d8ae 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -754,6 +754,11 @@ static struct kobj_type ktype_cpufreq = {
 	.release	= cpufreq_sysfs_release,
 };
 
+static struct kobj_type ktype_empty_cpufreq = {
+	.sysfs_ops	= &sysfs_ops,
+	.release	= cpufreq_sysfs_release,
+};
+
 
 /**
  * cpufreq_add_dev - add a CPU device
@@ -876,26 +881,36 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
 	memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
 
 	/* prepare interface data */
-	ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj,
-				   "cpufreq");
-	if (ret)
-		goto err_out_driver_exit;
-
-	/* set up files for this cpu device */
-	drv_attr = cpufreq_driver->attr;
-	while ((drv_attr) && (*drv_attr)) {
-		ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
+	if (!cpufreq_driver->hide_interface) {
+		ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq,
+					   &sys_dev->kobj, "cpufreq");
 		if (ret)
 			goto err_out_driver_exit;
-		drv_attr++;
-	}
-	if (cpufreq_driver->get) {
-		ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
-		if (ret)
-			goto err_out_driver_exit;
-	}
-	if (cpufreq_driver->target) {
-		ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
+
+		/* set up files for this cpu device */
+		drv_attr = cpufreq_driver->attr;
+		while ((drv_attr) && (*drv_attr)) {
+			ret = sysfs_create_file(&policy->kobj,
+						&((*drv_attr)->attr));
+			if (ret)
+				goto err_out_driver_exit;
+			drv_attr++;
+		}
+		if (cpufreq_driver->get) {
+			ret = sysfs_create_file(&policy->kobj,
+						&cpuinfo_cur_freq.attr);
+			if (ret)
+				goto err_out_driver_exit;
+		}
+		if (cpufreq_driver->target) {
+			ret = sysfs_create_file(&policy->kobj,
+						&scaling_cur_freq.attr);
+			if (ret)
+				goto err_out_driver_exit;
+		}
+	} else {
+		ret = kobject_init_and_add(&policy->kobj, &ktype_empty_cpufreq,
+					   &sys_dev->kobj, "cpufreq");
 		if (ret)
 			goto err_out_driver_exit;
 	}
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 1ee608fd7b77..484b3abf61bb 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -234,6 +234,7 @@ struct cpufreq_driver {
 	int	(*suspend)	(struct cpufreq_policy *policy, pm_message_t pmsg);
 	int	(*resume)	(struct cpufreq_policy *policy);
 	struct freq_attr	**attr;
+	bool			hide_interface;
 };
 
 /* flags */
-- 
cgit v1.2.3


From 0b8f1efad30bd58f89961b82dfe68b9edf8fd2ac Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 5 Dec 2008 18:58:31 -0800
Subject: sparse irq_desc[] array: core kernel and x86 changes

Impact: new feature

Problem on distro kernels: irq_desc[NR_IRQS] takes megabytes of RAM with
NR_CPUS set to large values. The goal is to be able to scale up to much
larger NR_IRQS value without impacting the (important) common case.

To solve this, we generalize irq_desc[NR_IRQS] to an (optional) array of
irq_desc pointers.

When CONFIG_SPARSE_IRQ=y is used, we use kzalloc_node to get irq_desc,
this also makes the IRQ descriptors NUMA-local (to the site that calls
request_irq()).

This gets rid of the irq_cfg[] static array on x86 as well: irq_cfg now
uses desc->chip_data for x86 to store irq_cfg.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                   |  10 ++
 arch/x86/include/asm/irq_vectors.h |   9 ++
 arch/x86/kernel/io_apic.c          | 301 +++++++++++++++++++++++--------------
 arch/x86/kernel/irq.c              |   3 +
 arch/x86/kernel/irq_32.c           |   2 +
 arch/x86/kernel/irq_64.c           |   2 +
 arch/x86/kernel/irqinit_32.c       |   1 -
 arch/x86/kernel/irqinit_64.c       |   1 -
 drivers/char/random.c              |  22 +--
 drivers/pci/intr_remapping.c       |  76 +++++++++-
 drivers/xen/events.c               |  12 +-
 fs/proc/stat.c                     |  17 ++-
 include/linux/interrupt.h          |   2 +
 include/linux/irq.h                |  54 ++++++-
 include/linux/irqnr.h              |  14 +-
 include/linux/kernel_stat.h        |  14 +-
 include/linux/random.h             |  51 +++++++
 init/main.c                        |  11 ++
 kernel/irq/autoprobe.c             |  15 ++
 kernel/irq/chip.c                  |   3 +-
 kernel/irq/handle.c                | 181 +++++++++++++++++++++-
 kernel/irq/proc.c                  |   6 +-
 kernel/irq/spurious.c              |   5 +
 23 files changed, 649 insertions(+), 163 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7719f7..48ac688de3cd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -238,6 +238,16 @@ config X86_HAS_BOOT_CPU_ID
 	def_bool y
 	depends on X86_VOYAGER
 
+config SPARSE_IRQ
+	bool "Support sparse irq numbering"
+	depends on PCI_MSI || HT_IRQ
+	default y
+	help
+	  This enables support for sparse irq, esp for msi/msi-x. You may need
+	  if you have lots of cards supports msi-x installed.
+
+	  If you don't know what to do here, say Y.
+
 config X86_FIND_SMP_CONFIG
 	def_bool y
 	depends on X86_MPPARSE || X86_VOYAGER
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 0005adb0f941..bb6b69a6b125 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -102,11 +102,20 @@
 #define invalid_vm86_irq(irq)	((irq) < 3 || (irq) > 15)
 
 #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
+
+#ifndef CONFIG_SPARSE_IRQ
 # if NR_CPUS < MAX_IO_APICS
 #  define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
 # else
 #  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
 # endif
+#else
+# if (8 * NR_CPUS) > (32 * MAX_IO_APICS)
+#  define NR_IRQS (NR_VECTORS + (8 * NR_CPUS))
+# else
+#  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
+# endif
+#endif
 
 #elif defined(CONFIG_X86_VOYAGER)
 
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9043251210fb..9de17f5c1125 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -108,8 +108,33 @@ static int __init parse_noapic(char *str)
 early_param("noapic", parse_noapic);
 
 struct irq_pin_list;
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+struct irq_pin_list {
+	int apic, pin;
+	struct irq_pin_list *next;
+};
+
+static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+{
+	struct irq_pin_list *pin;
+	int node;
+
+	node = cpu_to_node(cpu);
+
+	pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
+	printk(KERN_DEBUG "  alloc irq_2_pin on cpu %d node %d\n", cpu, node);
+
+	return pin;
+}
+
 struct irq_cfg {
-	unsigned int irq;
 	struct irq_pin_list *irq_2_pin;
 	cpumask_t domain;
 	cpumask_t old_domain;
@@ -119,83 +144,93 @@ struct irq_cfg {
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+#ifdef CONFIG_SPARSE_IRQ
+static struct irq_cfg irq_cfgx[] = {
+#else
 static struct irq_cfg irq_cfgx[NR_IRQS] = {
-	[0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-	[1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-	[2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-	[3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-	[4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-	[5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-	[6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-	[7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-	[8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-	[9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-	[10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-	[11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-	[12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-	[13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-	[14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-	[15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+#endif
+	[0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+	[1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+	[2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+	[3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+	[4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+	[5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+	[6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+	[7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+	[8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+	[9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+	[10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+	[11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+	[12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+	[13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+	[14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
-#define for_each_irq_cfg(irq, cfg)		\
-	for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
-
-static struct irq_cfg *irq_cfg(unsigned int irq)
+void __init arch_early_irq_init(void)
 {
-	return irq < nr_irqs ? irq_cfgx + irq : NULL;
-}
+	struct irq_cfg *cfg;
+	struct irq_desc *desc;
+	int count;
+	int i;
 
-static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
-{
-	return irq_cfg(irq);
-}
+	cfg = irq_cfgx;
+	count = ARRAY_SIZE(irq_cfgx);
 
-/*
- * Rough estimation of how many shared IRQs there are, can be changed
- * anytime.
- */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+	for (i = 0; i < count; i++) {
+		desc = irq_to_desc(i);
+		desc->chip_data = &cfg[i];
+	}
+}
 
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
+#ifdef CONFIG_SPARSE_IRQ
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	struct irq_cfg *cfg = NULL;
+	struct irq_desc *desc;
 
-struct irq_pin_list {
-	int apic, pin;
-	struct irq_pin_list *next;
-};
+	desc = irq_to_desc(irq);
+	if (desc)
+		cfg = desc->chip_data;
 
-static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
-static struct irq_pin_list *irq_2_pin_ptr;
+	return cfg;
+}
 
-static void __init irq_2_pin_init(void)
+static struct irq_cfg *get_one_free_irq_cfg(int cpu)
 {
-	struct irq_pin_list *pin = irq_2_pin_head;
-	int i;
+	struct irq_cfg *cfg;
+	int node;
+
+	node = cpu_to_node(cpu);
 
-	for (i = 1; i < PIN_MAP_SIZE; i++)
-		pin[i-1].next = &pin[i];
+	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+	printk(KERN_DEBUG "  alloc irq_cfg on cpu %d node %d\n", cpu, node);
 
-	irq_2_pin_ptr = &pin[0];
+	return cfg;
 }
 
-static struct irq_pin_list *get_one_free_irq_2_pin(void)
+void arch_init_chip_data(struct irq_desc *desc, int cpu)
 {
-	struct irq_pin_list *pin = irq_2_pin_ptr;
+	struct irq_cfg *cfg;
 
-	if (!pin)
-		panic("can not get more irq_2_pin\n");
+	cfg = desc->chip_data;
+	if (!cfg) {
+		desc->chip_data = get_one_free_irq_cfg(cpu);
+		if (!desc->chip_data) {
+			printk(KERN_ERR "can not alloc irq_cfg\n");
+			BUG_ON(1);
+		}
+	}
+}
 
-	irq_2_pin_ptr = pin->next;
-	pin->next = NULL;
-	return pin;
+#else
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	return irq < nr_irqs ? irq_cfgx + irq : NULL;
 }
 
+#endif
+
 struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
@@ -397,16 +432,19 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+static void add_pin_to_irq_cpu(unsigned int irq, int cpu, int apic, int pin)
 {
-	struct irq_cfg *cfg;
 	struct irq_pin_list *entry;
+	struct irq_cfg *cfg = irq_cfg(irq);
 
-	/* first time to refer irq_cfg, so with new */
-	cfg = irq_cfg_alloc(irq);
 	entry = cfg->irq_2_pin;
 	if (!entry) {
-		entry = get_one_free_irq_2_pin();
+		entry = get_one_free_irq_2_pin(cpu);
+		if (!entry) {
+			printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
+					apic, pin);
+			return;
+		}
 		cfg->irq_2_pin = entry;
 		entry->apic = apic;
 		entry->pin = pin;
@@ -421,7 +459,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 		entry = entry->next;
 	}
 
-	entry->next = get_one_free_irq_2_pin();
+	entry->next = get_one_free_irq_2_pin(cpu);
 	entry = entry->next;
 	entry->apic = apic;
 	entry->pin = pin;
@@ -430,7 +468,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 /*
  * Reroute an IRQ to a different pin.
  */
-static void __init replace_pin_at_irq(unsigned int irq,
+static void __init replace_pin_at_irq(unsigned int irq, int cpu,
 				      int oldapic, int oldpin,
 				      int newapic, int newpin)
 {
@@ -451,7 +489,7 @@ static void __init replace_pin_at_irq(unsigned int irq,
 
 	/* why? call replace before add? */
 	if (!replaced)
-		add_pin_to_irq(irq, newapic, newpin);
+		add_pin_to_irq_cpu(irq, cpu, newapic, newpin);
 }
 
 static inline void io_apic_modify_irq(unsigned int irq,
@@ -1162,9 +1200,13 @@ void __setup_vector_irq(int cpu)
 	/* This function must be called with vector_lock held */
 	int irq, vector;
 	struct irq_cfg *cfg;
+	struct irq_desc *desc;
 
 	/* Mark the inuse vectors */
-	for_each_irq_cfg(irq, cfg) {
+	for_each_irq_desc(irq, desc) {
+		if (!desc)
+			continue;
+		cfg = desc->chip_data;
 		if (!cpu_isset(cpu, cfg->domain))
 			continue;
 		vector = cfg->vector;
@@ -1356,6 +1398,8 @@ static void __init setup_IO_APIC_irqs(void)
 {
 	int apic, pin, idx, irq;
 	int notcon = 0;
+	struct irq_desc *desc;
+	int cpu = boot_cpu_id;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
@@ -1387,7 +1431,12 @@ static void __init setup_IO_APIC_irqs(void)
 			if (multi_timer_check(apic, irq))
 				continue;
 #endif
-			add_pin_to_irq(irq, apic, pin);
+			desc = irq_to_desc_alloc_cpu(irq, cpu);
+			if (!desc) {
+				printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+				continue;
+			}
+			add_pin_to_irq_cpu(irq, cpu, apic, pin);
 
 			setup_IO_APIC_irq(apic, pin, irq,
 					irq_trigger(idx), irq_polarity(idx));
@@ -1448,6 +1497,7 @@ __apicdebuginit(void) print_IO_APIC(void)
 	union IO_APIC_reg_03 reg_03;
 	unsigned long flags;
 	struct irq_cfg *cfg;
+	struct irq_desc *desc;
 	unsigned int irq;
 
 	if (apic_verbosity == APIC_QUIET)
@@ -1537,8 +1587,13 @@ __apicdebuginit(void) print_IO_APIC(void)
 	}
 	}
 	printk(KERN_DEBUG "IRQ to pin mappings:\n");
-	for_each_irq_cfg(irq, cfg) {
-		struct irq_pin_list *entry = cfg->irq_2_pin;
+	for_each_irq_desc(irq, desc) {
+		struct irq_pin_list *entry;
+
+		if (!desc)
+			continue;
+		cfg = desc->chip_data;
+		entry = cfg->irq_2_pin;
 		if (!entry)
 			continue;
 		printk(KERN_DEBUG "IRQ%d ", irq);
@@ -2022,6 +2077,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 {
 	int was_pending = 0;
 	unsigned long flags;
+	struct irq_cfg *cfg;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
 	if (irq < 16) {
@@ -2029,6 +2085,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 		if (i8259A_irq_pending(irq))
 			was_pending = 1;
 	}
+	cfg = irq_cfg(irq);
 	__unmask_IO_APIC_irq(irq);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
@@ -2178,6 +2235,9 @@ static void ir_irq_migration(struct work_struct *work)
 	struct irq_desc *desc;
 
 	for_each_irq_desc(irq, desc) {
+		if (!desc)
+			continue;
+
 		if (desc->status & IRQ_MOVE_PENDING) {
 			unsigned long flags;
 
@@ -2229,6 +2289,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 		struct irq_cfg *cfg;
 		irq = __get_cpu_var(vector_irq)[vector];
 
+		if (irq == -1)
+			continue;
+
 		desc = irq_to_desc(irq);
 		if (!desc)
 			continue;
@@ -2430,8 +2493,12 @@ static inline void init_IO_APIC_traps(void)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-	for_each_irq_cfg(irq, cfg) {
-		if (IO_APIC_IRQ(irq) && !cfg->vector) {
+	for_each_irq_desc(irq, desc) {
+		if (!desc)
+			continue;
+
+		cfg = desc->chip_data;
+		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
 			 * so default to an old-fashioned 8259
@@ -2439,11 +2506,9 @@ static inline void init_IO_APIC_traps(void)
 			 */
 			if (irq < 16)
 				make_8259A_irq(irq);
-			else {
-				desc = irq_to_desc(irq);
+			else
 				/* Strange. Oh, well.. */
 				desc->chip = &no_irq_chip;
-			}
 		}
 	}
 }
@@ -2654,7 +2719,7 @@ static inline void __init check_timer(void)
 		 * Ok, does IRQ0 through the IOAPIC work?
 		 */
 		if (no_pin1) {
-			add_pin_to_irq(0, apic1, pin1);
+			add_pin_to_irq_cpu(0, boot_cpu_id, apic1, pin1);
 			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
 		}
 		unmask_IO_APIC_irq(0);
@@ -2683,7 +2748,7 @@ static inline void __init check_timer(void)
 		/*
 		 * legacy devices should be connected to IO APIC #0
 		 */
-		replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+		replace_pin_at_irq(0, boot_cpu_id, apic1, pin1, apic2, pin2);
 		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
 		unmask_IO_APIC_irq(0);
 		enable_8259A_irq(0);
@@ -2902,21 +2967,25 @@ unsigned int create_irq_nr(unsigned int irq_want)
 	unsigned int irq;
 	unsigned int new;
 	unsigned long flags;
-	struct irq_cfg *cfg_new;
-
-	irq_want = nr_irqs - 1;
+	struct irq_cfg *cfg_new = NULL;
+	int cpu = boot_cpu_id;
+	struct irq_desc *desc_new = NULL;
 
 	irq = 0;
 	spin_lock_irqsave(&vector_lock, flags);
 	for (new = irq_want; new > 0; new--) {
 		if (platform_legacy_irq(new))
 			continue;
-		cfg_new = irq_cfg(new);
-		if (cfg_new && cfg_new->vector != 0)
+
+		desc_new = irq_to_desc_alloc_cpu(new, cpu);
+		if (!desc_new) {
+			printk(KERN_INFO "can not get irq_desc for %d\n", new);
+			continue;
+		}
+		cfg_new = desc_new->chip_data;
+
+		if (cfg_new->vector != 0)
 			continue;
-		/* check if need to create one */
-		if (!cfg_new)
-			cfg_new = irq_cfg_alloc(new);
 		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
 			irq = new;
 		break;
@@ -2925,6 +2994,9 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 	if (irq > 0) {
 		dynamic_irq_init(irq);
+		/* restore it, in case dynamic_irq_init clear it */
+		if (desc_new)
+			desc_new->chip_data = cfg_new;
 	}
 	return irq;
 }
@@ -2944,8 +3016,16 @@ int create_irq(void)
 void destroy_irq(unsigned int irq)
 {
 	unsigned long flags;
+	struct irq_cfg *cfg;
+	struct irq_desc *desc;
 
+	/* store it, in case dynamic_irq_cleanup clear it */
+	desc = irq_to_desc(irq);
+	cfg = desc->chip_data;
 	dynamic_irq_cleanup(irq);
+	/* connect back irq_cfg */
+	if (desc)
+		desc->chip_data = cfg;
 
 #ifdef CONFIG_INTR_REMAP
 	free_irte(irq);
@@ -3195,26 +3275,13 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 	return 0;
 }
 
-static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
-{
-	unsigned int irq;
-
-	irq = dev->bus->number;
-	irq <<= 8;
-	irq |= dev->devfn;
-	irq <<= 12;
-
-	return irq;
-}
-
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
 {
 	unsigned int irq;
 	int ret;
 	unsigned int irq_want;
 
-	irq_want = build_irq_for_pci_dev(dev) + 0x100;
-
+	irq_want = nr_irqs - 1;
 	irq = create_irq_nr(irq_want);
 	if (irq == 0)
 		return -1;
@@ -3228,7 +3295,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 		goto error;
 no_ir:
 #endif
-	ret = setup_msi_irq(dev, desc, irq);
+	ret = setup_msi_irq(dev, msidesc, irq);
 	if (ret < 0) {
 		destroy_irq(irq);
 		return ret;
@@ -3246,7 +3313,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
 	unsigned int irq;
 	int ret, sub_handle;
-	struct msi_desc *desc;
+	struct msi_desc *msidesc;
 	unsigned int irq_want;
 
 #ifdef CONFIG_INTR_REMAP
@@ -3254,10 +3321,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 	int index = 0;
 #endif
 
-	irq_want = build_irq_for_pci_dev(dev) + 0x100;
+	irq_want = nr_irqs - 1;
 	sub_handle = 0;
-	list_for_each_entry(desc, &dev->msi_list, list) {
-		irq = create_irq_nr(irq_want--);
+	list_for_each_entry(msidesc, &dev->msi_list, list) {
+		irq = create_irq_nr(irq_want);
+		irq_want--;
 		if (irq == 0)
 			return -1;
 #ifdef CONFIG_INTR_REMAP
@@ -3289,7 +3357,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 		}
 no_ir:
 #endif
-		ret = setup_msi_irq(dev, desc, irq);
+		ret = setup_msi_irq(dev, msidesc, irq);
 		if (ret < 0)
 			goto error;
 		sub_handle++;
@@ -3707,17 +3775,29 @@ int __init io_apic_get_version(int ioapic)
 
 int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
 {
+	struct irq_desc *desc;
+	struct irq_cfg *cfg;
+	int cpu = boot_cpu_id;
+
 	if (!IO_APIC_IRQ(irq)) {
 		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
 			ioapic);
 		return -EINVAL;
 	}
 
+	desc = irq_to_desc_alloc_cpu(irq, cpu);
+	if (!desc) {
+		printk(KERN_INFO "can not get irq_desc %d\n", irq);
+		return 0;
+	}
+
 	/*
 	 * IRQs < 16 are already in the irq_2_pin[] map
 	 */
-	if (irq >= 16)
-		add_pin_to_irq(irq, ioapic, pin);
+	if (irq >= 16) {
+		cfg = desc->chip_data;
+		add_pin_to_irq_cpu(irq, cpu, ioapic, pin);
+	}
 
 	setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
 
@@ -3773,7 +3853,8 @@ void __init setup_ioapic_dest(void)
 			 * when you have too many devices, because at that time only boot
 			 * cpu is online.
 			 */
-			cfg = irq_cfg(irq);
+			desc = irq_to_desc(irq);
+			cfg = desc->chip_data;
 			if (!cfg->vector) {
 				setup_IO_APIC_irq(ioapic, pin, irq,
 						  irq_trigger(irq_entry),
@@ -3785,7 +3866,6 @@ void __init setup_ioapic_dest(void)
 			/*
 			 * Honour affinities which have been set in early boot
 			 */
-			desc = irq_to_desc(irq);
 			if (desc->status &
 			    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
 				mask = desc->affinity;
@@ -3846,7 +3926,6 @@ void __init ioapic_init_mappings(void)
 	struct resource *ioapic_res;
 	int i;
 
-	irq_2_pin_init();
 	ioapic_res = ioapic_setup_resources();
 	for (i = 0; i < nr_ioapics; i++) {
 		if (smp_found_config) {
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index d1d4dc52f649..3f1d9d18df67 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -118,6 +118,9 @@ int show_interrupts(struct seq_file *p, void *v)
 	}
 
 	desc = irq_to_desc(i);
+	if (!desc)
+		return 0;
+
 	spin_lock_irqsave(&desc->lock, flags);
 #ifndef CONFIG_SMP
 	any_count = kstat_irqs(i);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a51382672de0..119fc9c8ff7f 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -242,6 +242,8 @@ void fixup_irqs(cpumask_t map)
 	for_each_irq_desc(irq, desc) {
 		cpumask_t mask;
 
+		if (!desc)
+			continue;
 		if (irq == 2)
 			continue;
 
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84eb77a0..900009c70591 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -94,6 +94,8 @@ void fixup_irqs(cpumask_t map)
 		int break_affinity = 0;
 		int set_affinity = 1;
 
+		if (!desc)
+			continue;
 		if (irq == 2)
 			continue;
 
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 845aa9803e80..5a5651b7f9e6 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -69,7 +69,6 @@ void __init init_ISA_irqs (void)
 	 * 16 old-style INTA-cycle interrupts:
 	 */
 	for (i = 0; i < 16; i++) {
-		/* first time call this irq_desc */
 		struct irq_desc *desc = irq_to_desc(i);
 
 		desc->status = IRQ_DISABLED;
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index ff0235391285..cd9f42d028d9 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -143,7 +143,6 @@ void __init init_ISA_irqs(void)
 	init_8259A(0);
 
 	for (i = 0; i < 16; i++) {
-		/* first time call this irq_desc */
 		struct irq_desc *desc = irq_to_desc(i);
 
 		desc->status = IRQ_DISABLED;
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 675076f5fca8..d26891bfcd41 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -558,23 +558,9 @@ struct timer_rand_state {
 	unsigned dont_count_entropy:1;
 };
 
-static struct timer_rand_state *irq_timer_state[NR_IRQS];
-
-static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
-{
-	if (irq >= nr_irqs)
-		return NULL;
-
-	return irq_timer_state[irq];
-}
-
-static void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
-{
-	if (irq >= nr_irqs)
-		return;
-
-	irq_timer_state[irq] = state;
-}
+#ifndef CONFIG_SPARSE_IRQ
+struct timer_rand_state *irq_timer_state[NR_IRQS];
+#endif
 
 static struct timer_rand_state input_timer_state;
 
@@ -933,8 +919,10 @@ void rand_initialize_irq(int irq)
 {
 	struct timer_rand_state *state;
 
+#ifndef CONFIG_SPARSE_IRQ
 	if (irq >= nr_irqs)
 		return;
+#endif
 
 	state = get_timer_rand_state(irq);
 
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index 2de5a3238c94..c9958ec5e25e 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -19,17 +19,75 @@ struct irq_2_iommu {
 	u8  irte_mask;
 };
 
-static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
+#ifdef CONFIG_SPARSE_IRQ
+static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu)
+{
+	struct irq_2_iommu *iommu;
+	int node;
+
+	node = cpu_to_node(cpu);
+
+	iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node);
+	printk(KERN_DEBUG "alloc irq_2_iommu on cpu %d node %d\n", cpu, node);
+
+	return iommu;
+}
 
 static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
 {
-	return (irq < nr_irqs) ? irq_2_iommuX + irq : NULL;
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	if (WARN_ON_ONCE(!desc))
+		return NULL;
+
+	return desc->irq_2_iommu;
+}
+
+static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu)
+{
+	struct irq_desc *desc;
+	struct irq_2_iommu *irq_iommu;
+
+	/*
+	 * alloc irq desc if not allocated already.
+	 */
+	desc = irq_to_desc_alloc_cpu(irq, cpu);
+	if (!desc) {
+		printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+		return NULL;
+	}
+
+	irq_iommu = desc->irq_2_iommu;
+
+	if (!irq_iommu)
+		desc->irq_2_iommu = get_one_free_irq_2_iommu(cpu);
+
+	return desc->irq_2_iommu;
 }
 
+static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
+{
+	return irq_2_iommu_alloc_cpu(irq, boot_cpu_id);
+}
+
+#else /* !CONFIG_SPARSE_IRQ */
+
+static struct irq_2_iommu irq_2_iommuX[NR_IRQS];
+
+static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
+{
+	if (irq < nr_irqs)
+		return &irq_2_iommuX[irq];
+
+	return NULL;
+}
 static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
 {
 	return irq_2_iommu(irq);
 }
+#endif
 
 static DEFINE_SPINLOCK(irq_2_ir_lock);
 
@@ -86,9 +144,11 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 	if (!count)
 		return -1;
 
+#ifndef CONFIG_SPARSE_IRQ
 	/* protect irq_2_iommu_alloc later */
 	if (irq >= nr_irqs)
 		return -1;
+#endif
 
 	/*
 	 * start the IRTE search from index 0.
@@ -130,6 +190,12 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
 		table->base[i].present = 1;
 
 	irq_iommu = irq_2_iommu_alloc(irq);
+	if (!irq_iommu) {
+		spin_unlock(&irq_2_ir_lock);
+		printk(KERN_ERR "can't allocate irq_2_iommu\n");
+		return -1;
+	}
+
 	irq_iommu->iommu = iommu;
 	irq_iommu->irte_index =  index;
 	irq_iommu->sub_handle = 0;
@@ -177,6 +243,12 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 
 	irq_iommu = irq_2_iommu_alloc(irq);
 
+	if (!irq_iommu) {
+		spin_unlock(&irq_2_ir_lock);
+		printk(KERN_ERR "can't allocate irq_2_iommu\n");
+		return -1;
+	}
+
 	irq_iommu->iommu = iommu;
 	irq_iommu->irte_index = index;
 	irq_iommu->sub_handle = subhandle;
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 1e3b934a4cf7..2924faa7f6c4 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -141,8 +141,12 @@ static void init_evtchn_cpu_bindings(void)
 	int i;
 
 	/* By default all event channels notify CPU#0. */
-	for_each_irq_desc(i, desc)
+	for_each_irq_desc(i, desc) {
+		if (!desc)
+			continue;
+
 		desc->affinity = cpumask_of_cpu(0);
+	}
 #endif
 
 	memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
@@ -231,7 +235,7 @@ static int find_unbound_irq(void)
 	int irq;
 
 	/* Only allocate from dynirq range */
-	for_each_irq_nr(irq)
+	for (irq = 0; irq < nr_irqs; irq++)
 		if (irq_bindcount[irq] == 0)
 			break;
 
@@ -792,7 +796,7 @@ void xen_irq_resume(void)
 		mask_evtchn(evtchn);
 
 	/* No IRQ <-> event-channel mappings. */
-	for_each_irq_nr(irq)
+	for (irq = 0; irq < nr_irqs; irq++)
 		irq_info[irq].evtchn = 0; /* zap event-channel binding */
 
 	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
@@ -824,7 +828,7 @@ void __init xen_init_IRQ(void)
 		mask_evtchn(i);
 
 	/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
-	for_each_irq_nr(i)
+	for (i = 0; i < nr_irqs; i++)
 		irq_bindcount[i] = 0;
 
 	irq_ctx_init(smp_processor_id());
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 81904f07679d..a13431ab7c65 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -27,6 +27,7 @@ static int show_stat(struct seq_file *p, void *v)
 	u64 sum = 0;
 	struct timespec boottime;
 	unsigned int per_irq_sum;
+	struct irq_desc *desc;
 
 	user = nice = system = idle = iowait =
 		irq = softirq = steal = cputime64_zero;
@@ -44,10 +45,11 @@ static int show_stat(struct seq_file *p, void *v)
 		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
-
-		for_each_irq_nr(j)
+		for_each_irq_desc(j, desc) {
+			if (!desc)
+				continue;
 			sum += kstat_irqs_cpu(j, i);
-
+		}
 		sum += arch_irq_stat_cpu(i);
 	}
 	sum += arch_irq_stat();
@@ -90,11 +92,14 @@ static int show_stat(struct seq_file *p, void *v)
 	seq_printf(p, "intr %llu", (unsigned long long)sum);
 
 	/* sum again ? it could be updated? */
-	for_each_irq_nr(j) {
+	for (j = 0; j < NR_IRQS; j++) {
+		desc = irq_to_desc(j);
 		per_irq_sum = 0;
 
-		for_each_possible_cpu(i)
-			per_irq_sum += kstat_irqs_cpu(j, i);
+		if (desc) {
+			for_each_possible_cpu(i)
+				per_irq_sum += kstat_irqs_cpu(j, i);
+		}
 
 		seq_printf(p, " %u", per_irq_sum);
 	}
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index f58a0cf8929a..79e915e7e8a5 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -18,6 +18,8 @@
 #include <asm/ptrace.h>
 #include <asm/system.h>
 
+extern int nr_irqs;
+
 /*
  * These correspond to the IORESOURCE_IRQ_* defines in
  * linux/ioport.h to select the interrupt line behaviour.  When
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 3dddfa703ebd..63b00439d4d2 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -129,6 +129,8 @@ struct irq_chip {
 	const char	*typename;
 };
 
+struct timer_rand_state;
+struct irq_2_iommu;
 /**
  * struct irq_desc - interrupt descriptor
  * @irq:		interrupt number for this descriptor
@@ -154,6 +156,13 @@ struct irq_chip {
  */
 struct irq_desc {
 	unsigned int		irq;
+#ifdef CONFIG_SPARSE_IRQ
+	struct timer_rand_state *timer_rand_state;
+	unsigned int            *kstat_irqs;
+# ifdef CONFIG_INTR_REMAP
+	struct irq_2_iommu      *irq_2_iommu;
+# endif
+#endif
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
 	struct msi_desc		*msi_desc;
@@ -181,14 +190,52 @@ struct irq_desc {
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
+extern void early_irq_init(void);
+extern void arch_early_irq_init(void);
+extern void arch_init_chip_data(struct irq_desc *desc, int cpu);
+extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
+					struct irq_desc *desc, int cpu);
+extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
+
+#ifndef CONFIG_SPARSE_IRQ
 
 extern struct irq_desc irq_desc[NR_IRQS];
 
 static inline struct irq_desc *irq_to_desc(unsigned int irq)
 {
-	return (irq < nr_irqs) ? irq_desc + irq : NULL;
+	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
+}
+static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+{
+	return irq_to_desc(irq);
 }
 
+#ifdef CONFIG_GENERIC_HARDIRQS
+# define for_each_irq_desc(irq, desc)		\
+	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
+# define for_each_irq_desc_reverse(irq, desc)                          \
+	for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1);        \
+	    irq >= 0; irq--, desc--)
+#endif
+
+#else
+
+extern struct irq_desc *irq_to_desc(unsigned int irq);
+extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
+extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
+
+# define for_each_irq_desc(irq, desc)		\
+	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; irq++, desc = irq_to_desc(irq))
+# define for_each_irq_desc_reverse(irq, desc)                          \
+	for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; irq--, desc = irq_to_desc(irq))
+
+#define kstat_irqs_this_cpu(DESC) \
+	((DESC)->kstat_irqs[smp_processor_id()])
+#define kstat_incr_irqs_this_cpu(irqno, DESC) \
+	((DESC)->kstat_irqs[smp_processor_id()]++)
+
+#endif
+
 /*
  * Migration helpers for obsolete names, they will go away:
  */
@@ -380,6 +427,11 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
 #define get_irq_data(irq)	(irq_to_desc(irq)->handler_data)
 #define get_irq_msi(irq)	(irq_to_desc(irq)->msi_desc)
 
+#define get_irq_desc_chip(desc)		((desc)->chip)
+#define get_irq_desc_chip_data(desc)	((desc)->chip_data)
+#define get_irq_desc_data(desc)		((desc)->handler_data)
+#define get_irq_desc_msi(desc)		((desc)->msi_desc)
+
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
 #endif /* !CONFIG_S390 */
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 452c280c8115..7a299e989f8b 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -7,18 +7,10 @@
 
 # define for_each_irq_desc(irq, desc)		\
 	for (irq = 0; irq < nr_irqs; irq++)
-#else
-extern int nr_irqs;
 
-# define for_each_irq_desc(irq, desc)		\
-	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
-
-# define for_each_irq_desc_reverse(irq, desc)				\
-	for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1);	\
-	     irq >= 0; irq--, desc--)
+static inline early_sparse_irq_init(void)
+{
+}
 #endif
 
-#define for_each_irq_nr(irq)			\
-	for (irq = 0; irq < nr_irqs; irq++)
-
 #endif
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 4a145caeee07..4ee4b3d2316f 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -28,7 +28,9 @@ struct cpu_usage_stat {
 
 struct kernel_stat {
 	struct cpu_usage_stat	cpustat;
-	unsigned int irqs[NR_IRQS];
+#ifndef CONFIG_SPARSE_IRQ
+       unsigned int irqs[NR_IRQS];
+#endif
 };
 
 DECLARE_PER_CPU(struct kernel_stat, kstat);
@@ -39,6 +41,10 @@ DECLARE_PER_CPU(struct kernel_stat, kstat);
 
 extern unsigned long long nr_context_switches(void);
 
+#ifndef CONFIG_SPARSE_IRQ
+#define kstat_irqs_this_cpu(irq) \
+	(kstat_this_cpu.irqs[irq])
+
 struct irq_desc;
 
 static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
@@ -46,11 +52,17 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
 {
 	kstat_this_cpu.irqs[irq]++;
 }
+#endif
+
 
+#ifndef CONFIG_SPARSE_IRQ
 static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 {
        return kstat_cpu(cpu).irqs[irq];
 }
+#else
+extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
+#endif
 
 /*
  * Number of interrupts per specific IRQ source, since bootup
diff --git a/include/linux/random.h b/include/linux/random.h
index 36f125c0c603..ad9daa2374d5 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -44,6 +44,57 @@ struct rand_pool_info {
 
 extern void rand_initialize_irq(int irq);
 
+struct timer_rand_state;
+#ifndef CONFIG_SPARSE_IRQ
+
+extern struct timer_rand_state *irq_timer_state[];
+
+extern int nr_irqs;
+static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq)
+{
+	if (irq >= nr_irqs)
+		return NULL;
+
+	return irq_timer_state[irq];
+}
+
+static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
+{
+	if (irq >= nr_irqs)
+		return;
+
+	irq_timer_state[irq] = state;
+}
+
+#else
+
+#include <linux/irq.h>
+static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	if (!desc)
+		return NULL;
+
+	return desc->timer_rand_state;
+}
+
+static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	if (!desc)
+		return;
+
+	desc->timer_rand_state = state;
+}
+#endif
+
+
 extern void add_input_randomness(unsigned int type, unsigned int code,
 				 unsigned int value);
 extern void add_interrupt_randomness(int irq);
diff --git a/init/main.c b/init/main.c
index 7e117a231af1..c1f999a3cf31 100644
--- a/init/main.c
+++ b/init/main.c
@@ -539,6 +539,15 @@ void __init __weak thread_info_cache_init(void)
 {
 }
 
+void __init __weak arch_early_irq_init(void)
+{
+}
+
+void __init __weak early_irq_init(void)
+{
+	arch_early_irq_init();
+}
+
 asmlinkage void __init start_kernel(void)
 {
 	char * command_line;
@@ -603,6 +612,8 @@ asmlinkage void __init start_kernel(void)
 	sort_main_extable();
 	trap_init();
 	rcu_init();
+	/* init some links before init_ISA_irqs() */
+	early_irq_init();
 	init_IRQ();
 	pidhash_init();
 	init_timers();
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index cc0f7321b8ce..650ce4102a63 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -40,6 +40,9 @@ unsigned long probe_irq_on(void)
 	 * flush such a longstanding irq before considering it as spurious.
 	 */
 	for_each_irq_desc_reverse(i, desc) {
+		if (!desc)
+			continue;
+
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			/*
@@ -68,6 +71,9 @@ unsigned long probe_irq_on(void)
 	 * happened in the previous stage, it may have masked itself)
 	 */
 	for_each_irq_desc_reverse(i, desc) {
+		if (!desc)
+			continue;
+
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
@@ -86,6 +92,9 @@ unsigned long probe_irq_on(void)
 	 * Now filter out any obviously spurious interrupts
 	 */
 	for_each_irq_desc(i, desc) {
+		if (!desc)
+			continue;
+
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -124,6 +133,9 @@ unsigned int probe_irq_mask(unsigned long val)
 	int i;
 
 	for_each_irq_desc(i, desc) {
+		if (!desc)
+			continue;
+
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -166,6 +178,9 @@ int probe_irq_off(unsigned long val)
 	unsigned int status;
 
 	for_each_irq_desc(i, desc) {
+		if (!desc)
+			continue;
+
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 10b5092e9bfe..8e4fce4a1b1f 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -24,9 +24,10 @@
  */
 void dynamic_irq_init(unsigned int irq)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc;
 	unsigned long flags;
 
+	desc = irq_to_desc(irq);
 	if (!desc) {
 		WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq);
 		return;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index c815b42d0f5b..96ca203eb51b 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -15,9 +15,16 @@
 #include <linux/random.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
+#include <linux/rculist.h>
+#include <linux/hash.h>
 
 #include "internals.h"
 
+/*
+ * lockdep: we want to handle all irq_desc locks as a single lock-class:
+ */
+static struct lock_class_key irq_desc_lock_class;
+
 /**
  * handle_bad_irq - handle spurious and unhandled irqs
  * @irq:       the interrupt number
@@ -49,6 +56,155 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
 int nr_irqs = NR_IRQS;
 EXPORT_SYMBOL_GPL(nr_irqs);
 
+void __init __attribute__((weak)) arch_early_irq_init(void)
+{
+}
+
+#ifdef CONFIG_SPARSE_IRQ
+static struct irq_desc irq_desc_init = {
+	.irq	    = -1,
+	.status	    = IRQ_DISABLED,
+	.chip	    = &no_irq_chip,
+	.handle_irq = handle_bad_irq,
+	.depth      = 1,
+	.lock       = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+#ifdef CONFIG_SMP
+	.affinity   = CPU_MASK_ALL
+#endif
+};
+
+static void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
+{
+	unsigned long bytes;
+	char *ptr;
+	int node;
+
+	/* Compute how many bytes we need per irq and allocate them */
+	bytes = nr * sizeof(unsigned int);
+
+	node = cpu_to_node(cpu);
+	ptr = kzalloc_node(bytes, GFP_ATOMIC, node);
+	printk(KERN_DEBUG "  alloc kstat_irqs on cpu %d node %d\n", cpu, node);
+
+	if (ptr)
+		desc->kstat_irqs = (unsigned int *)ptr;
+}
+
+void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu)
+{
+}
+
+static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
+{
+	memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
+	desc->irq = irq;
+#ifdef CONFIG_SMP
+	desc->cpu = cpu;
+#endif
+	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+	init_kstat_irqs(desc, cpu, nr_cpu_ids);
+	if (!desc->kstat_irqs) {
+		printk(KERN_ERR "can not alloc kstat_irqs\n");
+		BUG_ON(1);
+	}
+	arch_init_chip_data(desc, cpu);
+}
+
+/*
+ * Protect the sparse_irqs:
+ */
+static DEFINE_SPINLOCK(sparse_irq_lock);
+
+struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly;
+
+static struct irq_desc irq_desc_legacy[16] __cacheline_aligned_in_smp = {
+	[0 ... 15] = {
+		.irq	    = -1,
+		.status	    = IRQ_DISABLED,
+		.chip	    = &no_irq_chip,
+		.handle_irq = handle_bad_irq,
+		.depth	    = 1,
+		.lock	    = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+#ifdef CONFIG_SMP
+		.affinity   = CPU_MASK_ALL
+#endif
+	}
+};
+
+/* FIXME: use bootmem alloc ...*/
+static unsigned int kstat_irqs_legacy[16][NR_CPUS];
+
+void __init early_irq_init(void)
+{
+	struct irq_desc *desc;
+	int legacy_count;
+	int i;
+
+	desc = irq_desc_legacy;
+	legacy_count = ARRAY_SIZE(irq_desc_legacy);
+
+	for (i = 0; i < legacy_count; i++) {
+		desc[i].irq = i;
+		desc[i].kstat_irqs = kstat_irqs_legacy[i];
+
+		irq_desc_ptrs[i] = desc + i;
+	}
+
+	for (i = legacy_count; i < NR_IRQS; i++)
+		irq_desc_ptrs[i] = NULL;
+
+	arch_early_irq_init();
+}
+
+struct irq_desc *irq_to_desc(unsigned int irq)
+{
+	return (irq < NR_IRQS) ? irq_desc_ptrs[irq] : NULL;
+}
+
+struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+{
+	struct irq_desc *desc;
+	unsigned long flags;
+	int node;
+
+	if (irq >= NR_IRQS) {
+		printk(KERN_WARNING "irq >= NR_IRQS in irq_to_desc_alloc: %d %d\n",
+				irq, NR_IRQS);
+		WARN_ON(1);
+		return NULL;
+	}
+
+	desc = irq_desc_ptrs[irq];
+	if (desc)
+		return desc;
+
+	spin_lock_irqsave(&sparse_irq_lock, flags);
+
+	/* We have to check it to avoid races with another CPU */
+	desc = irq_desc_ptrs[irq];
+	if (desc)
+		goto out_unlock;
+
+	node = cpu_to_node(cpu);
+	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
+	printk(KERN_DEBUG "  alloc irq_desc for %d on cpu %d node %d\n",
+		 irq, cpu, node);
+	if (!desc) {
+		printk(KERN_ERR "can not alloc irq_desc\n");
+		BUG_ON(1);
+	}
+	init_one_irq_desc(irq, desc, cpu);
+
+	irq_desc_ptrs[irq] = desc;
+
+out_unlock:
+	spin_unlock_irqrestore(&sparse_irq_lock, flags);
+
+	return desc;
+}
+
+#else
+
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS-1] = {
 		.status = IRQ_DISABLED,
@@ -62,6 +218,8 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	}
 };
 
+#endif
+
 /*
  * What should we do if we get a hw irq event on an illegal vector?
  * Each architecture has to answer this themself.
@@ -261,17 +419,28 @@ out:
 
 
 #ifdef CONFIG_TRACE_IRQFLAGS
-/*
- * lockdep: we want to handle all irq_desc locks as a single lock-class:
- */
-static struct lock_class_key irq_desc_lock_class;
-
 void early_init_irq_lock_class(void)
 {
+#ifndef CONFIG_SPARSE_IRQ
 	struct irq_desc *desc;
 	int i;
 
-	for_each_irq_desc(i, desc)
+	for_each_irq_desc(i, desc) {
+		if (!desc)
+			continue;
+
 		lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+	}
+#endif
+}
+#endif
+
+#ifdef CONFIG_SPARSE_IRQ
+unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	return desc->kstat_irqs[cpu];
 }
 #endif
+EXPORT_SYMBOL(kstat_irqs_cpu);
+
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index d257e7d6a8a4..f6b3440f05bc 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -243,7 +243,11 @@ void init_irq_proc(void)
 	/*
 	 * Create entries for all existing IRQs.
 	 */
-	for_each_irq_desc(irq, desc)
+	for_each_irq_desc(irq, desc) {
+		if (!desc)
+			continue;
+
 		register_irq_proc(irq, desc);
+	}
 }
 
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index dd364c11e56e..3738107531fd 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -91,6 +91,9 @@ static int misrouted_irq(int irq)
 	int i, ok = 0;
 
 	for_each_irq_desc(i, desc) {
+		if (!desc)
+			continue;
+
 		if (!i)
 			 continue;
 
@@ -112,6 +115,8 @@ static void poll_spurious_irqs(unsigned long dummy)
 	for_each_irq_desc(i, desc) {
 		unsigned int status;
 
+		if (!desc)
+			continue;
 		if (!i)
 			 continue;
 
-- 
cgit v1.2.3


From 3145e941fcfe2548fa2270afb1a05bab3a6bc418 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 5 Dec 2008 18:58:34 -0800
Subject: x86, MSI: pass irq_cfg and irq_desc

Impact: simplify code

Pass irq_desc and cfg around, instead of raw IRQ numbers - this way
we dont have to look it up again and again.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 318 ++++++++++++++++++++++++++--------------------
 drivers/pci/msi.c         |  55 +++++---
 include/linux/msi.h       |   3 +
 3 files changed, 222 insertions(+), 154 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 0dcde74abd1d..a1a2e070f31a 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -231,6 +231,10 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
 
 #endif
 
+static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+{
+}
+
 struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
@@ -272,11 +276,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
 	writel(value, &io_apic->data);
 }
 
-static bool io_apic_level_ack_pending(unsigned int irq)
+static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
 {
 	struct irq_pin_list *entry;
 	unsigned long flags;
-	struct irq_cfg *cfg = irq_cfg(irq);
 
 	spin_lock_irqsave(&ioapic_lock, flags);
 	entry = cfg->irq_2_pin;
@@ -358,13 +361,12 @@ static void ioapic_mask_entry(int apic, int pin)
 }
 
 #ifdef CONFIG_SMP
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
 {
 	int apic, pin;
-	struct irq_cfg *cfg;
 	struct irq_pin_list *entry;
+	u8 vector = cfg->vector;
 
-	cfg = irq_cfg(irq);
 	entry = cfg->irq_2_pin;
 	for (;;) {
 		unsigned int reg;
@@ -394,24 +396,27 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 	}
 }
 
-static int assign_irq_vector(int irq, cpumask_t mask);
+static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
 
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
 {
 	struct irq_cfg *cfg;
 	unsigned long flags;
 	unsigned int dest;
 	cpumask_t tmp;
-	struct irq_desc *desc;
+	unsigned int irq;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
-	cfg = irq_cfg(irq);
-	if (assign_irq_vector(irq, mask))
+	irq = desc->irq;
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 	/*
@@ -419,12 +424,20 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	 */
 	dest = SET_APIC_LOGICAL_ID(dest);
 
-	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&ioapic_lock, flags);
-	__target_IO_APIC_irq(irq, dest, cfg->vector);
+	__target_IO_APIC_irq(irq, dest, cfg);
 	desc->affinity = mask;
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
+
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	set_ioapic_affinity_irq_desc(desc, mask);
+}
 #endif /* CONFIG_SMP */
 
 /*
@@ -432,10 +445,9 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq_cpu(unsigned int irq, int cpu, int apic, int pin)
+static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
 {
 	struct irq_pin_list *entry;
-	struct irq_cfg *cfg = irq_cfg(irq);
 
 	entry = cfg->irq_2_pin;
 	if (!entry) {
@@ -468,11 +480,10 @@ static void add_pin_to_irq_cpu(unsigned int irq, int cpu, int apic, int pin)
 /*
  * Reroute an IRQ to a different pin.
  */
-static void __init replace_pin_at_irq(unsigned int irq, int cpu,
+static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
 				      int oldapic, int oldpin,
 				      int newapic, int newpin)
 {
-	struct irq_cfg *cfg = irq_cfg(irq);
 	struct irq_pin_list *entry = cfg->irq_2_pin;
 	int replaced = 0;
 
@@ -489,18 +500,16 @@ static void __init replace_pin_at_irq(unsigned int irq, int cpu,
 
 	/* why? call replace before add? */
 	if (!replaced)
-		add_pin_to_irq_cpu(irq, cpu, newapic, newpin);
+		add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
 }
 
-static inline void io_apic_modify_irq(unsigned int irq,
+static inline void io_apic_modify_irq(struct irq_cfg *cfg,
 				int mask_and, int mask_or,
 				void (*final)(struct irq_pin_list *entry))
 {
 	int pin;
-	struct irq_cfg *cfg;
 	struct irq_pin_list *entry;
 
-	cfg = irq_cfg(irq);
 	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
 		unsigned int reg;
 		pin = entry->pin;
@@ -513,9 +522,9 @@ static inline void io_apic_modify_irq(unsigned int irq,
 	}
 }
 
-static void __unmask_IO_APIC_irq(unsigned int irq)
+static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
 {
-	io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
+	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
 }
 
 #ifdef CONFIG_X86_64
@@ -530,47 +539,64 @@ void io_apic_sync(struct irq_pin_list *entry)
 	readl(&io_apic->data);
 }
 
-static void __mask_IO_APIC_irq(unsigned int irq)
+static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
 {
-	io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
 }
 #else /* CONFIG_X86_32 */
-static void __mask_IO_APIC_irq(unsigned int irq)
+static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
 {
-	io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
+	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
 }
 
-static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
 {
-	io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
 			IO_APIC_REDIR_MASKED, NULL);
 }
 
-static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
 {
-	io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
+	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
 			IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
 }
 #endif /* CONFIG_X86_32 */
 
-static void mask_IO_APIC_irq (unsigned int irq)
+static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
 {
+	struct irq_cfg *cfg = desc->chip_data;
 	unsigned long flags;
 
+	BUG_ON(!cfg);
+
 	spin_lock_irqsave(&ioapic_lock, flags);
-	__mask_IO_APIC_irq(irq);
+	__mask_IO_APIC_irq(cfg);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-static void unmask_IO_APIC_irq (unsigned int irq)
+static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
 {
+	struct irq_cfg *cfg = desc->chip_data;
 	unsigned long flags;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	__unmask_IO_APIC_irq(irq);
+	__unmask_IO_APIC_irq(cfg);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
+static void mask_IO_APIC_irq(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	mask_IO_APIC_irq_desc(desc);
+}
+static void unmask_IO_APIC_irq(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	unmask_IO_APIC_irq_desc(desc);
+}
+
 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 {
 	struct IO_APIC_route_entry entry;
@@ -1072,7 +1098,7 @@ void unlock_vector_lock(void)
 	spin_unlock(&vector_lock);
 }
 
-static int __assign_irq_vector(int irq, cpumask_t mask)
+static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
 {
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -1088,16 +1114,13 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
 	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
 	unsigned int old_vector;
 	int cpu;
-	struct irq_cfg *cfg;
 
-	cfg = irq_cfg(irq);
+	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+		return -EBUSY;
 
 	/* Only try and allocate irqs on cpus that are present */
 	cpus_and(mask, mask, cpu_online_map);
 
-	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
-		return -EBUSY;
-
 	old_vector = cfg->vector;
 	if (old_vector) {
 		cpumask_t tmp;
@@ -1151,24 +1174,22 @@ next:
 	return -ENOSPC;
 }
 
-static int assign_irq_vector(int irq, cpumask_t mask)
+static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
 {
 	int err;
 	unsigned long flags;
 
 	spin_lock_irqsave(&vector_lock, flags);
-	err = __assign_irq_vector(irq, mask);
+	err = __assign_irq_vector(irq, cfg, mask);
 	spin_unlock_irqrestore(&vector_lock, flags);
 	return err;
 }
 
-static void __clear_irq_vector(int irq)
+static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
 {
-	struct irq_cfg *cfg;
 	cpumask_t mask;
 	int cpu, vector;
 
-	cfg = irq_cfg(irq);
 	BUG_ON(!cfg->vector);
 
 	vector = cfg->vector;
@@ -1257,11 +1278,8 @@ static inline int IO_APIC_irq_trigger(int irq)
 }
 #endif
 
-static void ioapic_register_intr(int irq, unsigned long trigger)
+static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
 {
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
 
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 	    trigger == IOAPIC_LEVEL)
@@ -1353,7 +1371,7 @@ static int setup_ioapic_entry(int apic, int irq,
 	return 0;
 }
 
-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
+static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
 			      int trigger, int polarity)
 {
 	struct irq_cfg *cfg;
@@ -1363,10 +1381,10 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 	if (!IO_APIC_IRQ(irq))
 		return;
 
-	cfg = irq_cfg(irq);
+	cfg = desc->chip_data;
 
 	mask = TARGET_CPUS;
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
 	cpus_and(mask, cfg->domain, mask);
@@ -1383,11 +1401,11 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 			       cfg->vector)) {
 		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 		       mp_ioapics[apic].mp_apicid, pin);
-		__clear_irq_vector(irq);
+		__clear_irq_vector(irq, cfg);
 		return;
 	}
 
-	ioapic_register_intr(irq, trigger);
+	ioapic_register_intr(irq, desc, trigger);
 	if (irq < NR_IRQS_LEGACY)
 		disable_8259A_irq(irq);
 
@@ -1399,6 +1417,7 @@ static void __init setup_IO_APIC_irqs(void)
 	int apic, pin, idx, irq;
 	int notcon = 0;
 	struct irq_desc *desc;
+	struct irq_cfg *cfg;
 	int cpu = boot_cpu_id;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
@@ -1436,9 +1455,10 @@ static void __init setup_IO_APIC_irqs(void)
 				printk(KERN_INFO "can not get irq_desc for %d\n", irq);
 				continue;
 			}
-			add_pin_to_irq_cpu(irq, cpu, apic, pin);
+			cfg = desc->chip_data;
+			add_pin_to_irq_cpu(cfg, cpu, apic, pin);
 
-			setup_IO_APIC_irq(apic, pin, irq,
+			setup_IO_APIC_irq(apic, pin, irq, desc,
 					irq_trigger(idx), irq_polarity(idx));
 		}
 	}
@@ -2086,7 +2106,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 			was_pending = 1;
 	}
 	cfg = irq_cfg(irq);
-	__unmask_IO_APIC_irq(irq);
+	__unmask_IO_APIC_irq(cfg);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	return was_pending;
@@ -2149,35 +2169,37 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
  * as simple as edge triggered migration and we can do the irq migration
  * with a simple atomic update to IO-APIC RTE.
  */
-static void migrate_ioapic_irq(int irq, cpumask_t mask)
+static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask)
 {
 	struct irq_cfg *cfg;
-	struct irq_desc *desc;
 	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
 	int modify_ioapic_rte;
 	unsigned int dest;
 	unsigned long flags;
+	unsigned int irq;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
+	irq = desc->irq;
 	if (get_irte(irq, &irte))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
-	desc = irq_to_desc(irq);
 	modify_ioapic_rte = desc->status & IRQ_LEVEL;
 	if (modify_ioapic_rte) {
 		spin_lock_irqsave(&ioapic_lock, flags);
-		__target_IO_APIC_irq(irq, dest, cfg->vector);
+		__target_IO_APIC_irq(irq, dest, cfg);
 		spin_unlock_irqrestore(&ioapic_lock, flags);
 	}
 
@@ -2199,14 +2221,14 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
 	desc->affinity = mask;
 }
 
-static int migrate_irq_remapped_level(int irq)
+static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
 {
 	int ret = -1;
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_cfg *cfg = desc->chip_data;
 
-	mask_IO_APIC_irq(irq);
+	mask_IO_APIC_irq_desc(desc);
 
-	if (io_apic_level_ack_pending(irq)) {
+	if (io_apic_level_ack_pending(cfg)) {
 		/*
 		 * Interrupt in progress. Migrating irq now will change the
 		 * vector information in the IO-APIC RTE and that will confuse
@@ -2218,14 +2240,15 @@ static int migrate_irq_remapped_level(int irq)
 	}
 
 	/* everthing is clear. we have right of way */
-	migrate_ioapic_irq(irq, desc->pending_mask);
+	migrate_ioapic_irq_desc(desc, desc->pending_mask);
 
 	ret = 0;
 	desc->status &= ~IRQ_MOVE_PENDING;
 	cpus_clear(desc->pending_mask);
 
 unmask:
-	unmask_IO_APIC_irq(irq);
+	unmask_IO_APIC_irq_desc(desc);
+
 	return ret;
 }
 
@@ -2258,18 +2281,22 @@ static void ir_irq_migration(struct work_struct *work)
 /*
  * Migrates the IRQ destination in the process context.
  */
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
-
 	if (desc->status & IRQ_LEVEL) {
 		desc->status |= IRQ_MOVE_PENDING;
 		desc->pending_mask = mask;
-		migrate_irq_remapped_level(irq);
+		migrate_irq_remapped_level_desc(desc);
 		return;
 	}
 
-	migrate_ioapic_irq(irq, mask);
+	migrate_ioapic_irq_desc(desc, mask);
+}
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	set_ir_ioapic_affinity_irq_desc(desc, mask);
 }
 #endif
 
@@ -2313,9 +2340,10 @@ unlock:
 	irq_exit();
 }
 
-static void irq_complete_move(unsigned int irq)
+static void irq_complete_move(struct irq_desc **descp)
 {
-	struct irq_cfg *cfg = irq_cfg(irq);
+	struct irq_desc *desc = *descp;
+	struct irq_cfg *cfg = desc->chip_data;
 	unsigned vector, me;
 
 	if (likely(!cfg->move_in_progress))
@@ -2333,8 +2361,9 @@ static void irq_complete_move(unsigned int irq)
 	}
 }
 #else
-static inline void irq_complete_move(unsigned int irq) {}
+static inline void irq_complete_move(struct irq_desc **descp) {}
 #endif
+
 #ifdef CONFIG_INTR_REMAP
 static void ack_x2apic_level(unsigned int irq)
 {
@@ -2345,11 +2374,14 @@ static void ack_x2apic_edge(unsigned int irq)
 {
 	ack_x2APIC_irq();
 }
+
 #endif
 
 static void ack_apic_edge(unsigned int irq)
 {
-	irq_complete_move(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	irq_complete_move(&desc);
 	move_native_irq(irq);
 	ack_APIC_irq();
 }
@@ -2358,18 +2390,21 @@ atomic_t irq_mis_count;
 
 static void ack_apic_level(unsigned int irq)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
+
 #ifdef CONFIG_X86_32
 	unsigned long v;
 	int i;
 #endif
+	struct irq_cfg *cfg;
 	int do_unmask_irq = 0;
 
-	irq_complete_move(irq);
+	irq_complete_move(&desc);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	/* If we are moving the irq we need to mask it */
-	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+	if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
 		do_unmask_irq = 1;
-		mask_IO_APIC_irq(irq);
+		mask_IO_APIC_irq_desc(desc);
 	}
 #endif
 
@@ -2393,7 +2428,8 @@ static void ack_apic_level(unsigned int irq)
 	* operation to prevent an edge-triggered interrupt escaping meanwhile.
 	* The idea is from Manfred Spraul.  --macro
 	*/
-	i = irq_cfg(irq)->vector;
+	cfg = desc->chip_data;
+	i = cfg->vector;
 
 	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 #endif
@@ -2432,17 +2468,18 @@ static void ack_apic_level(unsigned int irq)
 		 * accurate and is causing problems then it is a hardware bug
 		 * and you can go talk to the chipset vendor about it.
 		 */
-		if (!io_apic_level_ack_pending(irq))
+		cfg = desc->chip_data;
+		if (!io_apic_level_ack_pending(cfg))
 			move_masked_irq(irq);
-		unmask_IO_APIC_irq(irq);
+		unmask_IO_APIC_irq_desc(desc);
 	}
 
 #ifdef CONFIG_X86_32
 	if (!(v & (1 << (i & 0x1f)))) {
 		atomic_inc(&irq_mis_count);
 		spin_lock(&ioapic_lock);
-		__mask_and_edge_IO_APIC_irq(irq);
-		__unmask_and_level_IO_APIC_irq(irq);
+		__mask_and_edge_IO_APIC_irq(cfg);
+		__unmask_and_level_IO_APIC_irq(cfg);
 		spin_unlock(&ioapic_lock);
 	}
 #endif
@@ -2533,7 +2570,7 @@ static void unmask_lapic_irq(unsigned int irq)
 	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
 }
 
-static void ack_lapic_irq (unsigned int irq)
+static void ack_lapic_irq(unsigned int irq)
 {
 	ack_APIC_irq();
 }
@@ -2545,11 +2582,8 @@ static struct irq_chip lapic_chip __read_mostly = {
 	.ack		= ack_lapic_irq,
 };
 
-static void lapic_register_intr(int irq)
+static void lapic_register_intr(int irq, struct irq_desc *desc)
 {
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
 	desc->status &= ~IRQ_LEVEL;
 	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
 				      "edge");
@@ -2653,7 +2687,9 @@ int timer_through_8259 __initdata;
  */
 static inline void __init check_timer(void)
 {
-	struct irq_cfg *cfg = irq_cfg(0);
+	struct irq_desc *desc = irq_to_desc(0);
+	struct irq_cfg *cfg = desc->chip_data;
+	int cpu = boot_cpu_id;
 	int apic1, pin1, apic2, pin2;
 	unsigned long flags;
 	unsigned int ver;
@@ -2668,7 +2704,7 @@ static inline void __init check_timer(void)
 	 * get/set the timer IRQ vector:
 	 */
 	disable_8259A_irq(0);
-	assign_irq_vector(0, TARGET_CPUS);
+	assign_irq_vector(0, cfg, TARGET_CPUS);
 
 	/*
 	 * As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2719,10 +2755,10 @@ static inline void __init check_timer(void)
 		 * Ok, does IRQ0 through the IOAPIC work?
 		 */
 		if (no_pin1) {
-			add_pin_to_irq_cpu(0, boot_cpu_id, apic1, pin1);
+			add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
 			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
 		}
-		unmask_IO_APIC_irq(0);
+		unmask_IO_APIC_irq_desc(desc);
 		if (timer_irq_works()) {
 			if (nmi_watchdog == NMI_IO_APIC) {
 				setup_nmi();
@@ -2748,9 +2784,9 @@ static inline void __init check_timer(void)
 		/*
 		 * legacy devices should be connected to IO APIC #0
 		 */
-		replace_pin_at_irq(0, boot_cpu_id, apic1, pin1, apic2, pin2);
+		replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
 		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
-		unmask_IO_APIC_irq(0);
+		unmask_IO_APIC_irq_desc(desc);
 		enable_8259A_irq(0);
 		if (timer_irq_works()) {
 			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2782,7 +2818,7 @@ static inline void __init check_timer(void)
 	apic_printk(APIC_QUIET, KERN_INFO
 		    "...trying to set up timer as Virtual Wire IRQ...\n");
 
-	lapic_register_intr(0);
+	lapic_register_intr(0, desc);
 	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
 	enable_8259A_irq(0);
 
@@ -2986,7 +3022,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 		if (cfg_new->vector != 0)
 			continue;
-		if (__assign_irq_vector(new, TARGET_CPUS) == 0)
+		if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
 			irq = new;
 		break;
 	}
@@ -3034,7 +3070,7 @@ void destroy_irq(unsigned int irq)
 	free_irte(irq);
 #endif
 	spin_lock_irqsave(&vector_lock, flags);
-	__clear_irq_vector(irq);
+	__clear_irq_vector(irq, cfg);
 	spin_unlock_irqrestore(&vector_lock, flags);
 }
 
@@ -3049,12 +3085,12 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 	unsigned dest;
 	cpumask_t tmp;
 
+	cfg = irq_cfg(irq);
 	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, tmp);
+	err = assign_irq_vector(irq, cfg, tmp);
 	if (err)
 		return err;
 
-	cfg = irq_cfg(irq);
 	cpus_and(tmp, cfg->domain, tmp);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -3112,35 +3148,35 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 #ifdef CONFIG_SMP
 static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
-	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
-	read_msi_msg(irq, &msg);
+	read_msi_msg_desc(desc, &msg);
 
 	msg.data &= ~MSI_DATA_VECTOR_MASK;
 	msg.data |= MSI_DATA_VECTOR(cfg->vector);
 	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
-	write_msi_msg(irq, &msg);
-	desc = irq_to_desc(irq);
+	write_msi_msg_desc(desc, &msg);
 	desc->affinity = mask;
 }
-
 #ifdef CONFIG_INTR_REMAP
 /*
  * Migrate the MSI irq to another cpumask. This migration is
@@ -3148,11 +3184,11 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
  */
 static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp, cleanup_mask;
 	struct irte irte;
-	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
@@ -3161,10 +3197,12 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	if (get_irte(irq, &irte))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -3188,9 +3226,9 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 		cfg->move_in_progress = 0;
 	}
 
-	desc = irq_to_desc(irq);
 	desc->affinity = mask;
 }
+
 #endif
 #endif /* CONFIG_SMP */
 
@@ -3249,7 +3287,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
 }
 #endif
 
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
 {
 	int ret;
 	struct msi_msg msg;
@@ -3258,7 +3296,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 	if (ret < 0)
 		return ret;
 
-	set_irq_msi(irq, desc);
+	set_irq_msi(irq, msidesc);
 	write_msi_msg(irq, &msg);
 
 #ifdef CONFIG_INTR_REMAP
@@ -3381,20 +3419,22 @@ void arch_teardown_msi_irq(unsigned int irq)
 #ifdef CONFIG_SMP
 static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
-	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -3406,9 +3446,9 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	dmar_msi_write(irq, &msg);
-	desc = irq_to_desc(irq);
 	desc->affinity = mask;
 }
+
 #endif /* CONFIG_SMP */
 
 struct irq_chip dmar_msi_type = {
@@ -3442,8 +3482,8 @@ int arch_setup_dmar_msi(unsigned int irq)
 #ifdef CONFIG_SMP
 static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
-	struct irq_desc *desc;
 	struct msi_msg msg;
 	unsigned int dest;
 	cpumask_t tmp;
@@ -3452,10 +3492,12 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	if (cpus_empty(tmp))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
@@ -3467,9 +3509,9 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	hpet_msi_write(irq, &msg);
-	desc = irq_to_desc(irq);
 	desc->affinity = mask;
 }
+
 #endif /* CONFIG_SMP */
 
 struct irq_chip hpet_msi_type = {
@@ -3524,26 +3566,28 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp;
-	struct irq_desc *desc;
 
 	cpus_and(tmp, mask, cpu_online_map);
 	if (cpus_empty(tmp))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
 		return;
 
-	cfg = irq_cfg(irq);
+	set_extra_move_desc(desc, mask);
+
 	cpus_and(tmp, cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
-	desc = irq_to_desc(irq);
 	desc->affinity = mask;
 }
+
 #endif
 
 static struct irq_chip ht_irq_chip = {
@@ -3563,13 +3607,13 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 	int err;
 	cpumask_t tmp;
 
+	cfg = irq_cfg(irq);
 	tmp = TARGET_CPUS;
-	err = assign_irq_vector(irq, tmp);
+	err = assign_irq_vector(irq, cfg, tmp);
 	if (!err) {
 		struct ht_irq_msg msg;
 		unsigned dest;
 
-		cfg = irq_cfg(irq);
 		cpus_and(tmp, cfg->domain, tmp);
 		dest = cpu_mask_to_apicid(tmp);
 
@@ -3615,7 +3659,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 	unsigned long flags;
 	int err;
 
-	err = assign_irq_vector(irq, *eligible_cpu);
+	cfg = irq_cfg(irq);
+
+	err = assign_irq_vector(irq, cfg, *eligible_cpu);
 	if (err != 0)
 		return err;
 
@@ -3624,8 +3670,6 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 				      irq_name);
 	spin_unlock_irqrestore(&vector_lock, flags);
 
-	cfg = irq_cfg(irq);
-
 	mmr_value = 0;
 	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
 	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
@@ -3806,10 +3850,10 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p
 	 */
 	if (irq >= NR_IRQS_LEGACY) {
 		cfg = desc->chip_data;
-		add_pin_to_irq_cpu(irq, cpu, ioapic, pin);
+		add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
 	}
 
-	setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
+	setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
 
 	return 0;
 }
@@ -3866,7 +3910,7 @@ void __init setup_ioapic_dest(void)
 			desc = irq_to_desc(irq);
 			cfg = desc->chip_data;
 			if (!cfg->vector) {
-				setup_IO_APIC_irq(ioapic, pin, irq,
+				setup_IO_APIC_irq(ioapic, pin, irq, desc,
 						  irq_trigger(irq_entry),
 						  irq_polarity(irq_entry));
 				continue;
@@ -3884,10 +3928,10 @@ void __init setup_ioapic_dest(void)
 
 #ifdef CONFIG_INTR_REMAP
 			if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq(irq, mask);
+				set_ir_ioapic_affinity_irq_desc(desc, mask);
 			else
 #endif
-				set_ioapic_affinity_irq(irq, mask);
+				set_ioapic_affinity_irq_desc(desc, mask);
 		}
 
 	}
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 74801f7df9c9..11a51f8ed3b3 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -103,11 +103,11 @@ static void msix_set_enable(struct pci_dev *dev, int enable)
 	}
 }
 
-static void msix_flush_writes(unsigned int irq)
+static void msix_flush_writes(struct irq_desc *desc)
 {
 	struct msi_desc *entry;
 
-	entry = get_irq_msi(irq);
+	entry = get_irq_desc_msi(desc);
 	BUG_ON(!entry || !entry->dev);
 	switch (entry->msi_attrib.type) {
 	case PCI_CAP_ID_MSI:
@@ -135,11 +135,11 @@ static void msix_flush_writes(unsigned int irq)
  * Returns 1 if it succeeded in masking the interrupt and 0 if the device
  * doesn't support MSI masking.
  */
-static int msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag)
+static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag)
 {
 	struct msi_desc *entry;
 
-	entry = get_irq_msi(irq);
+	entry = get_irq_desc_msi(desc);
 	BUG_ON(!entry || !entry->dev);
 	switch (entry->msi_attrib.type) {
 	case PCI_CAP_ID_MSI:
@@ -172,9 +172,9 @@ static int msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag)
 	return 1;
 }
 
-void read_msi_msg(unsigned int irq, struct msi_msg *msg)
+void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
 {
-	struct msi_desc *entry = get_irq_msi(irq);
+	struct msi_desc *entry = get_irq_desc_msi(desc);
 	switch(entry->msi_attrib.type) {
 	case PCI_CAP_ID_MSI:
 	{
@@ -211,9 +211,16 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg)
 	}
 }
 
-void write_msi_msg(unsigned int irq, struct msi_msg *msg)
+void read_msi_msg(unsigned int irq, struct msi_msg *msg)
 {
-	struct msi_desc *entry = get_irq_msi(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	read_msi_msg_desc(desc, msg);
+}
+
+void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+{
+	struct msi_desc *entry = get_irq_desc_msi(desc);
 	switch (entry->msi_attrib.type) {
 	case PCI_CAP_ID_MSI:
 	{
@@ -252,21 +259,31 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg)
 	entry->msg = *msg;
 }
 
+void write_msi_msg(unsigned int irq, struct msi_msg *msg)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	write_msi_msg_desc(desc, msg);
+}
+
 void mask_msi_irq(unsigned int irq)
 {
-	msi_set_mask_bits(irq, 1, 1);
-	msix_flush_writes(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	msi_set_mask_bits(desc, 1, 1);
+	msix_flush_writes(desc);
 }
 
 void unmask_msi_irq(unsigned int irq)
 {
-	msi_set_mask_bits(irq, 1, 0);
-	msix_flush_writes(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	msi_set_mask_bits(desc, 1, 0);
+	msix_flush_writes(desc);
 }
 
 static int msi_free_irqs(struct pci_dev* dev);
 
-
 static struct msi_desc* alloc_msi_entry(void)
 {
 	struct msi_desc *entry;
@@ -303,9 +320,11 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
 	pci_intx_for_msi(dev, 0);
 	msi_set_enable(dev, 0);
 	write_msi_msg(dev->irq, &entry->msg);
-	if (entry->msi_attrib.maskbit)
-		msi_set_mask_bits(dev->irq, entry->msi_attrib.maskbits_mask,
+	if (entry->msi_attrib.maskbit) {
+		struct irq_desc *desc = irq_to_desc(dev->irq);
+		msi_set_mask_bits(desc, entry->msi_attrib.maskbits_mask,
 				  entry->msi_attrib.masked);
+	}
 
 	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
 	control &= ~PCI_MSI_FLAGS_QSIZE;
@@ -327,8 +346,9 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
 	msix_set_enable(dev, 0);
 
 	list_for_each_entry(entry, &dev->msi_list, list) {
+		struct irq_desc *desc = irq_to_desc(entry->irq);
 		write_msi_msg(entry->irq, &entry->msg);
-		msi_set_mask_bits(entry->irq, 1, entry->msi_attrib.masked);
+		msi_set_mask_bits(desc, 1, entry->msi_attrib.masked);
 	}
 
 	BUG_ON(list_empty(&dev->msi_list));
@@ -596,7 +616,8 @@ void pci_msi_shutdown(struct pci_dev* dev)
 	/* Return the the pci reset with msi irqs unmasked */
 	if (entry->msi_attrib.maskbit) {
 		u32 mask = entry->msi_attrib.maskbits_mask;
-		msi_set_mask_bits(dev->irq, mask, ~mask);
+		struct irq_desc *desc = irq_to_desc(dev->irq);
+		msi_set_mask_bits(desc, mask, ~mask);
 	}
 	if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI)
 		return;
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 8f2939227207..d2b8a1e8ca11 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -10,8 +10,11 @@ struct msi_msg {
 };
 
 /* Helper functions */
+struct irq_desc;
 extern void mask_msi_irq(unsigned int irq);
 extern void unmask_msi_irq(unsigned int irq);
+extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
+extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
 extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
 extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
 
-- 
cgit v1.2.3


From 240d367b4e6c6e3c5075e034db14dba60a6f5fa7 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 8 Dec 2008 14:06:17 -0800
Subject: sparseirq: fix Alpha build failure

Impact: build fix on Alpha

-tip testing found this build failure on the Alpha defconfig:

/home/mingo/tip/fs/proc/stat.c: In function 'show_stat':
/home/mingo/tip/fs/proc/stat.c:48: error: implicit declaration of function 'for_each_irq_desc'
/home/mingo/tip/fs/proc/stat.c:48: error: expected ';' before '{' token

can not use irq_desc() in stat.c on older architectures.

Signed-off-by: Yinghai Lu <yinghai@kernel.orgg>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/stat.c        | 20 +++++++++++++-------
 include/linux/irq.h   |  9 ---------
 include/linux/irqnr.h | 19 ++++++++++++++++---
 3 files changed, 29 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index a13431ab7c65..3cb9492801c0 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -45,9 +45,12 @@ static int show_stat(struct seq_file *p, void *v)
 		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
-		for_each_irq_desc(j, desc) {
+		for_each_irq_nr(j) {
+#ifdef CONFIG_SPARSE_IRQ
+			desc = irq_to_desc(j);
 			if (!desc)
 				continue;
+#endif
 			sum += kstat_irqs_cpu(j, i);
 		}
 		sum += arch_irq_stat_cpu(i);
@@ -92,14 +95,17 @@ static int show_stat(struct seq_file *p, void *v)
 	seq_printf(p, "intr %llu", (unsigned long long)sum);
 
 	/* sum again ? it could be updated? */
-	for (j = 0; j < NR_IRQS; j++) {
-		desc = irq_to_desc(j);
+	for_each_irq_nr(j) {
 		per_irq_sum = 0;
-
-		if (desc) {
-			for_each_possible_cpu(i)
-				per_irq_sum += kstat_irqs_cpu(j, i);
+#ifdef CONFIG_SPARSE_IRQ
+		desc = irq_to_desc(j);
+		if (!desc) {
+			seq_printf(p, " %u", per_irq_sum);
+			continue;
 		}
+#endif
+		for_each_possible_cpu(i)
+			per_irq_sum += kstat_irqs_cpu(j, i);
 
 		seq_printf(p, " %u", per_irq_sum);
 	}
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 63b00439d4d2..b5749db3e5a1 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -198,7 +198,6 @@ extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
 extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
 
 #ifndef CONFIG_SPARSE_IRQ
-
 extern struct irq_desc irq_desc[NR_IRQS];
 
 static inline struct irq_desc *irq_to_desc(unsigned int irq)
@@ -210,14 +209,6 @@ static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
 	return irq_to_desc(irq);
 }
 
-#ifdef CONFIG_GENERIC_HARDIRQS
-# define for_each_irq_desc(irq, desc)		\
-	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
-# define for_each_irq_desc_reverse(irq, desc)                          \
-	for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1);        \
-	    irq >= 0; irq--, desc--)
-#endif
-
 #else
 
 extern struct irq_desc *irq_to_desc(unsigned int irq);
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 7a299e989f8b..13754f813589 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -8,9 +8,22 @@
 # define for_each_irq_desc(irq, desc)		\
 	for (irq = 0; irq < nr_irqs; irq++)
 
-static inline early_sparse_irq_init(void)
-{
-}
+# define for_each_irq_desc_reverse(irq, desc)                          \
+	for (irq = nr_irqs - 1; irq >= 0; irq--)
+#else
+#ifndef CONFIG_SPARSE_IRQ
+
+struct irq_desc;
+extern int nr_irqs;
+# define for_each_irq_desc(irq, desc)		\
+	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
+# define for_each_irq_desc_reverse(irq, desc)                          \
+	for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1);        \
+	    irq >= 0; irq--, desc--)
 #endif
+#endif
+
+#define for_each_irq_nr(irq)                   \
+       for (irq = 0; irq < nr_irqs; irq++)
 
 #endif
-- 
cgit v1.2.3


From 58494487581cb143a0d763e3056a894d5009d60a Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 26 Nov 2008 12:02:53 +0100
Subject: oprofile: update comment for oprofile_add_sample()

The cpu argument is no longer part of the parameter list.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 include/linux/oprofile.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index 5231861f357d..1ce9fe572e51 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -86,8 +86,7 @@ int oprofile_arch_init(struct oprofile_operations * ops);
 void oprofile_arch_exit(void);
 
 /**
- * Add a sample. This may be called from any context. Pass
- * smp_processor_id() as cpu.
+ * Add a sample. This may be called from any context.
  */
 void oprofile_add_sample(struct pt_regs * const regs, unsigned long event);
 
-- 
cgit v1.2.3


From e09373f22e76cc048ca5fe10a9ff9012f5d64309 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 26 Nov 2008 14:04:19 +0100
Subject: ring_buffer: add remaining cpu functions to ring_buffer.h

These functions are not yet in ring_buffer.h though they seems to be
part of the API.

Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 include/linux/ring_buffer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index e097c2e6b6dc..de9d8c12e5ec 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -116,6 +116,8 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
 
 unsigned long ring_buffer_entries(struct ring_buffer *buffer);
 unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
+unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
 
 u64 ring_buffer_time_stamp(int cpu);
 void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
-- 
cgit v1.2.3


From 0ebb26e7a4e2c5337502e98b2221e037fda911b9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 12 Dec 2008 11:26:39 +0100
Subject: sparse irqs: handle !GENIRQ platforms

Impact: build fix

fix:

 In file included from /home/mingo/tip/arch/m68k/amiga/amiints.c:39:
 /home/mingo/tip/include/linux/interrupt.h:21: error: expected identifier or '('
 /home/mingo/tip/arch/m68k/amiga/amiints.c: In function 'amiga_init_IRQ':

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/interrupt.h |  4 ++--
 include/linux/irqnr.h     | 11 ++++++++++-
 include/linux/random.h    |  2 +-
 3 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 79e915e7e8a5..777f89e00b4a 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -14,12 +14,12 @@
 #include <linux/irqflags.h>
 #include <linux/smp.h>
 #include <linux/percpu.h>
+#include <linux/irqnr.h>
+
 #include <asm/atomic.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
 
-extern int nr_irqs;
-
 /*
  * These correspond to the IORESOURCE_IRQ_* defines in
  * linux/ioport.h to select the interrupt line behaviour.  When
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 13754f813589..95d2b74641f5 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -1,6 +1,11 @@
 #ifndef _LINUX_IRQNR_H
 #define _LINUX_IRQNR_H
 
+/*
+ * Generic irq_desc iterators:
+ */
+#ifdef __KERNEL__
+
 #ifndef CONFIG_GENERIC_HARDIRQS
 #include <asm/irq.h>
 # define nr_irqs		NR_IRQS
@@ -11,10 +16,12 @@
 # define for_each_irq_desc_reverse(irq, desc)                          \
 	for (irq = nr_irqs - 1; irq >= 0; irq--)
 #else
+
+extern int nr_irqs;
+
 #ifndef CONFIG_SPARSE_IRQ
 
 struct irq_desc;
-extern int nr_irqs;
 # define for_each_irq_desc(irq, desc)		\
 	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
 # define for_each_irq_desc_reverse(irq, desc)                          \
@@ -26,4 +33,6 @@ extern int nr_irqs;
 #define for_each_irq_nr(irq)                   \
        for (irq = 0; irq < nr_irqs; irq++)
 
+#endif /* __KERNEL__ */
+
 #endif
diff --git a/include/linux/random.h b/include/linux/random.h
index ad9daa2374d5..adbf3bd3c6b3 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -8,6 +8,7 @@
 #define _LINUX_RANDOM_H
 
 #include <linux/ioctl.h>
+#include <linux/irqnr.h>
 
 /* ioctl()'s for the random number generator */
 
@@ -49,7 +50,6 @@ struct timer_rand_state;
 
 extern struct timer_rand_state *irq_timer_state[];
 
-extern int nr_irqs;
 static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq)
 {
 	if (irq >= nr_irqs)
-- 
cgit v1.2.3


From 30cb367ea2be76bf71dbd275f38d0fd3b6f4142b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 12 Dec 2008 12:19:57 +0100
Subject: sparse irqs: add irqnr.h to the user headers list

Impact: fix build error

/home/mingo/tip/usr/include/linux/random.h:11: included file
'linux/irqnr.h' is not exported

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index e531783e5d78..95ac82340c3b 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -313,6 +313,7 @@ unifdef-y += ptrace.h
 unifdef-y += qnx4_fs.h
 unifdef-y += quota.h
 unifdef-y += random.h
+unifdef-y += irqnr.h
 unifdef-y += reboot.h
 unifdef-y += reiserfs_fs.h
 unifdef-y += reiserfs_xattr.h
-- 
cgit v1.2.3


From 5b37717a23b8e40f6cf7ad85a26ddcf41c171e2c Mon Sep 17 00:00:00 2001
From: Stefano Panella <stefano.panella@csr.com>
Date: Fri, 12 Dec 2008 13:00:06 +0000
Subject: uwb: improved MAS allocator and reservation conflict handling

Greatly enhance the MAS allocator:
  - Handle row and column reservations.
  - Permit all the available MAS to be allocated.
  - Follows the WiMedia rules on MAS selection.

Take appropriate action when reservation conflicts are detected.
  - Correctly identify which reservation wins the conflict.
  - Protect alien BP reservations.
  - If an owned reservation loses, resize/move it.
  - Follow the backoff procedure before requesting additional MAS.

When reservations are terminated, move the remaining reservations (if
necessary) so they keep following the MAS allocation rules.

Signed-off-by: Stefano Panella <stefano.panella@csr.com>
Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 drivers/usb/wusbcore/reservation.c |  13 +-
 drivers/uwb/Makefile               |   1 +
 drivers/uwb/allocator.c            | 386 +++++++++++++++++++++
 drivers/uwb/drp-avail.c            |   4 +-
 drivers/uwb/drp-ie.c               | 160 +++++++--
 drivers/uwb/drp.c                  | 681 ++++++++++++++++++++++++++++---------
 drivers/uwb/rsv.c                  | 482 ++++++++++++++++++++------
 drivers/uwb/uwb-debug.c            |  49 +--
 drivers/uwb/uwb-internal.h         |  80 ++++-
 include/linux/uwb.h                |  47 ++-
 include/linux/uwb/debug-cmd.h      |   2 +-
 include/linux/uwb/spec.h           |  25 ++
 12 files changed, 1603 insertions(+), 327 deletions(-)
 create mode 100644 drivers/uwb/allocator.c

(limited to 'include/linux')

diff --git a/drivers/usb/wusbcore/reservation.c b/drivers/usb/wusbcore/reservation.c
index 7b6525dac2f1..c37e4f83e54a 100644
--- a/drivers/usb/wusbcore/reservation.c
+++ b/drivers/usb/wusbcore/reservation.c
@@ -48,13 +48,15 @@ static void wusbhc_rsv_complete_cb(struct uwb_rsv *rsv)
 {
 	struct wusbhc *wusbhc = rsv->pal_priv;
 	struct device *dev = wusbhc->dev;
+	struct uwb_mas_bm mas;
 	char buf[72];
 
 	switch (rsv->state) {
 	case UWB_RSV_STATE_O_ESTABLISHED:
-		bitmap_scnprintf(buf, sizeof(buf), rsv->mas.bm, UWB_NUM_MAS);
+		uwb_rsv_get_usable_mas(rsv, &mas);
+		bitmap_scnprintf(buf, sizeof(buf), mas.bm, UWB_NUM_MAS);
 		dev_dbg(dev, "established reservation: %s\n", buf);
-		wusbhc_bwa_set(wusbhc, rsv->stream, &rsv->mas);
+		wusbhc_bwa_set(wusbhc, rsv->stream, &mas);
 		break;
 	case UWB_RSV_STATE_NONE:
 		dev_dbg(dev, "removed reservation\n");
@@ -85,13 +87,12 @@ int wusbhc_rsv_establish(struct wusbhc *wusbhc)
 	bcid.data[0] = wusbhc->cluster_id;
 	bcid.data[1] = 0;
 
-	rsv->owner = &rc->uwb_dev;
 	rsv->target.type = UWB_RSV_TARGET_DEVADDR;
 	rsv->target.devaddr = bcid;
 	rsv->type = UWB_DRP_TYPE_PRIVATE;
-	rsv->max_mas = 256;
-	rsv->min_mas = 16;  /* one MAS per zone? */
-	rsv->sparsity = 16; /* at least one MAS in each zone? */
+	rsv->max_mas = 256; /* try to get as much as possible */
+	rsv->min_mas = 15;  /* one MAS per zone */
+	rsv->max_interval = 1; /* max latency is one zone */
 	rsv->is_multicast = true;
 
 	ret = uwb_rsv_establish(rsv);
diff --git a/drivers/uwb/Makefile b/drivers/uwb/Makefile
index ce21a95da04a..2f98d080fe78 100644
--- a/drivers/uwb/Makefile
+++ b/drivers/uwb/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_UWB_I1480U)	+= i1480/
 
 uwb-objs :=		\
 	address.o	\
+	allocator.o	\
 	beacon.o	\
 	driver.o	\
 	drp.o		\
diff --git a/drivers/uwb/allocator.c b/drivers/uwb/allocator.c
new file mode 100644
index 000000000000..c8185e6b0cd5
--- /dev/null
+++ b/drivers/uwb/allocator.c
@@ -0,0 +1,386 @@
+/*
+ * UWB reservation management.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/uwb.h>
+
+#include "uwb-internal.h"
+
+static void uwb_rsv_fill_column_alloc(struct uwb_rsv_alloc_info *ai)
+{
+	int col, mas, safe_mas, unsafe_mas;
+	unsigned char *bm = ai->bm;
+	struct uwb_rsv_col_info *ci = ai->ci;
+	unsigned char c;
+
+	for (col = ci->csi.start_col; col < UWB_NUM_ZONES; col += ci->csi.interval) {
+    
+		safe_mas   = ci->csi.safe_mas_per_col;
+		unsafe_mas = ci->csi.unsafe_mas_per_col;
+    
+		for (mas = 0; mas < UWB_MAS_PER_ZONE; mas++ ) {
+			if (bm[col * UWB_MAS_PER_ZONE + mas] == 0) {
+	
+				if (safe_mas > 0) {
+					safe_mas--;
+					c = UWB_RSV_MAS_SAFE;
+				} else if (unsafe_mas > 0) {
+					unsafe_mas--;
+					c = UWB_RSV_MAS_UNSAFE;
+				} else {
+					break;
+				}
+				bm[col * UWB_MAS_PER_ZONE + mas] = c;
+			}
+		}
+	}
+}
+
+static void uwb_rsv_fill_row_alloc(struct uwb_rsv_alloc_info *ai)
+{
+	int mas, col, rows;
+	unsigned char *bm = ai->bm;
+	struct uwb_rsv_row_info *ri = &ai->ri;
+	unsigned char c;
+
+	rows = 1;
+	c = UWB_RSV_MAS_SAFE;
+	for (mas = UWB_MAS_PER_ZONE - 1; mas >= 0; mas--) {
+		if (ri->avail[mas] == 1) {
+      
+			if (rows > ri->used_rows) {
+				break;
+			} else if (rows > 7) {
+				c = UWB_RSV_MAS_UNSAFE;
+			}
+
+			for (col = 0; col < UWB_NUM_ZONES; col++) {
+				if (bm[col * UWB_NUM_ZONES + mas] != UWB_RSV_MAS_NOT_AVAIL) {
+					bm[col * UWB_NUM_ZONES + mas] = c;
+					if(c == UWB_RSV_MAS_SAFE)
+						ai->safe_allocated_mases++;
+					else
+						ai->unsafe_allocated_mases++;
+				}
+			}
+			rows++;
+		}
+	}
+	ai->total_allocated_mases = ai->safe_allocated_mases + ai->unsafe_allocated_mases;
+}
+
+/*
+ * Find the best column set for a given availability, interval, num safe mas and
+ * num unsafe mas.
+ *
+ * The different sets are tried in order as shown below, depending on the interval.
+ *
+ * interval = 16
+ *	deep = 0
+ *		set 1 ->  {  8 }
+ *	deep = 1
+ *		set 1 ->  {  4 }
+ *		set 2 ->  { 12 }
+ *	deep = 2
+ *		set 1 ->  {  2 }
+ *		set 2 ->  {  6 }
+ *		set 3 ->  { 10 }
+ *		set 4 ->  { 14 }
+ *	deep = 3
+ *		set 1 ->  {  1 }
+ *		set 2 ->  {  3 }
+ *		set 3 ->  {  5 }
+ *		set 4 ->  {  7 }
+ *		set 5 ->  {  9 }
+ *		set 6 ->  { 11 }
+ *		set 7 ->  { 13 }
+ *		set 8 ->  { 15 }
+ *
+ * interval = 8
+ *	deep = 0
+ *		set 1 ->  {  4  12 }
+ *	deep = 1
+ *		set 1 ->  {  2  10 }
+ *		set 2 ->  {  6  14 }
+ *	deep = 2
+ *		set 1 ->  {  1   9 }
+ *		set 2 ->  {  3  11 }
+ *		set 3 ->  {  5  13 }
+ *		set 4 ->  {  7  15 }
+ *
+ * interval = 4
+ *	deep = 0
+ *		set 1 ->  {  2   6  10  14 }
+ *	deep = 1
+ *		set 1 ->  {  1   5   9  13 }
+ *		set 2 ->  {  3   7  11  15 }
+ *
+ * interval = 2
+ *	deep = 0
+ *		set 1 ->  {  1   3   5   7   9  11  13  15 }
+ */
+static int uwb_rsv_find_best_column_set(struct uwb_rsv_alloc_info *ai, int interval, 
+					int num_safe_mas, int num_unsafe_mas)
+{
+	struct uwb_rsv_col_info *ci = ai->ci;
+	struct uwb_rsv_col_set_info *csi = &ci->csi;
+	struct uwb_rsv_col_set_info tmp_csi;
+	int deep, set, col, start_col_deep, col_start_set;
+	int start_col, max_mas_in_set, lowest_max_mas_in_deep;
+	int n_mas;
+	int found = UWB_RSV_ALLOC_NOT_FOUND; 
+
+	tmp_csi.start_col = 0;
+	start_col_deep = interval;
+	n_mas = num_unsafe_mas + num_safe_mas;
+
+	for (deep = 0; ((interval >> deep) & 0x1) == 0; deep++) {
+		start_col_deep /= 2;
+		col_start_set = 0;
+		lowest_max_mas_in_deep = UWB_MAS_PER_ZONE;
+
+		for (set = 1; set <= (1 << deep); set++) {
+			max_mas_in_set = 0;
+			start_col = start_col_deep + col_start_set;
+			for (col = start_col; col < UWB_NUM_ZONES; col += interval) {
+                
+				if (ci[col].max_avail_safe >= num_safe_mas &&
+				    ci[col].max_avail_unsafe >= n_mas) {
+					if (ci[col].highest_mas[n_mas] > max_mas_in_set)
+						max_mas_in_set = ci[col].highest_mas[n_mas];
+				} else {
+					max_mas_in_set = 0;
+					break;
+				}
+			}
+			if ((lowest_max_mas_in_deep > max_mas_in_set) && max_mas_in_set) {
+				lowest_max_mas_in_deep = max_mas_in_set;
+
+				tmp_csi.start_col = start_col;
+			}
+			col_start_set += (interval >> deep);
+		}
+
+		if (lowest_max_mas_in_deep < 8) {
+			csi->start_col = tmp_csi.start_col;
+			found = UWB_RSV_ALLOC_FOUND;
+			break;
+		} else if ((lowest_max_mas_in_deep > 8) && 
+			   (lowest_max_mas_in_deep != UWB_MAS_PER_ZONE) &&
+			   (found == UWB_RSV_ALLOC_NOT_FOUND)) {
+			csi->start_col = tmp_csi.start_col;
+			found = UWB_RSV_ALLOC_FOUND;
+		}
+	}
+
+	if (found == UWB_RSV_ALLOC_FOUND) {
+		csi->interval = interval;
+		csi->safe_mas_per_col = num_safe_mas;
+		csi->unsafe_mas_per_col = num_unsafe_mas;
+
+		ai->safe_allocated_mases = (UWB_NUM_ZONES / interval) * num_safe_mas;
+		ai->unsafe_allocated_mases = (UWB_NUM_ZONES / interval) * num_unsafe_mas;
+		ai->total_allocated_mases = ai->safe_allocated_mases + ai->unsafe_allocated_mases;
+		ai->interval = interval;		
+	}
+	return found;
+}
+
+static void get_row_descriptors(struct uwb_rsv_alloc_info *ai)
+{
+	unsigned char *bm = ai->bm;
+	struct uwb_rsv_row_info *ri = &ai->ri;
+	int col, mas;
+  
+	ri->free_rows = 16;
+	for (mas = 0; mas < UWB_MAS_PER_ZONE; mas ++) {
+		ri->avail[mas] = 1;
+		for (col = 1; col < UWB_NUM_ZONES; col++) {
+			if (bm[col * UWB_NUM_ZONES + mas] == UWB_RSV_MAS_NOT_AVAIL) {
+				ri->free_rows--;
+				ri->avail[mas]=0;
+				break;
+			}
+		}
+	}
+}
+
+static void uwb_rsv_fill_column_info(unsigned char *bm, int column, struct uwb_rsv_col_info *rci)
+{
+	int mas;
+	int block_count = 0, start_block = 0; 
+	int previous_avail = 0;
+	int available = 0;
+	int safe_mas_in_row[UWB_MAS_PER_ZONE] = {
+		8, 7, 6, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 1,
+	};
+
+	rci->max_avail_safe = 0;
+
+	for (mas = 0; mas < UWB_MAS_PER_ZONE; mas ++) {
+		if (!bm[column * UWB_NUM_ZONES + mas]) {
+			available++;
+			rci->max_avail_unsafe = available;
+
+			rci->highest_mas[available] = mas;
+
+			if (previous_avail) {
+				block_count++;
+				if ((block_count > safe_mas_in_row[start_block]) &&
+				    (!rci->max_avail_safe))
+					rci->max_avail_safe = available - 1;
+			} else {
+				previous_avail = 1;
+				start_block = mas;
+				block_count = 1;
+			}
+		} else {
+			previous_avail = 0;
+		}
+	}
+	if (!rci->max_avail_safe)
+		rci->max_avail_safe = rci->max_avail_unsafe;
+}
+
+static void get_column_descriptors(struct uwb_rsv_alloc_info *ai)
+{
+	unsigned char *bm = ai->bm;
+	struct uwb_rsv_col_info *ci = ai->ci;
+	int col;
+
+	for (col = 1; col < UWB_NUM_ZONES; col++) {
+		uwb_rsv_fill_column_info(bm, col, &ci[col]);
+	}
+}
+
+static int uwb_rsv_find_best_row_alloc(struct uwb_rsv_alloc_info *ai)
+{
+	int n_rows;
+	int max_rows = ai->max_mas / UWB_USABLE_MAS_PER_ROW;
+	int min_rows = ai->min_mas / UWB_USABLE_MAS_PER_ROW;
+	if (ai->min_mas % UWB_USABLE_MAS_PER_ROW)
+		min_rows++;
+	for (n_rows = max_rows; n_rows >= min_rows; n_rows--) {
+		if (n_rows <= ai->ri.free_rows) {
+			ai->ri.used_rows = n_rows;
+			ai->interval = 1; /* row reservation */
+			uwb_rsv_fill_row_alloc(ai);
+			return UWB_RSV_ALLOC_FOUND;
+		}
+	}  
+	return UWB_RSV_ALLOC_NOT_FOUND;
+}
+
+static int uwb_rsv_find_best_col_alloc(struct uwb_rsv_alloc_info *ai, int interval)
+{
+	int n_safe, n_unsafe, n_mas;  
+	int n_column = UWB_NUM_ZONES / interval;
+	int max_per_zone = ai->max_mas / n_column;
+	int min_per_zone = ai->min_mas / n_column;
+
+	if (ai->min_mas % n_column)
+		min_per_zone++;
+
+	if (min_per_zone > UWB_MAS_PER_ZONE) {
+		return UWB_RSV_ALLOC_NOT_FOUND;
+	}
+    
+	if (max_per_zone > UWB_MAS_PER_ZONE) {
+		max_per_zone = UWB_MAS_PER_ZONE;
+	}
+    
+	for (n_mas = max_per_zone; n_mas >= min_per_zone; n_mas--) {
+		if (uwb_rsv_find_best_column_set(ai, interval, 0, n_mas) == UWB_RSV_ALLOC_NOT_FOUND)
+			continue;
+		for (n_safe = n_mas; n_safe >= 0; n_safe--) {
+			n_unsafe = n_mas - n_safe;
+			if (uwb_rsv_find_best_column_set(ai, interval, n_safe, n_unsafe) == UWB_RSV_ALLOC_FOUND) {
+				uwb_rsv_fill_column_alloc(ai);
+				return UWB_RSV_ALLOC_FOUND;
+			}
+		}
+	}
+	return UWB_RSV_ALLOC_NOT_FOUND;
+}
+
+int uwb_rsv_find_best_allocation(struct uwb_rsv *rsv, struct uwb_mas_bm *available, 
+				 struct uwb_mas_bm *result)
+{
+	struct uwb_rsv_alloc_info *ai;
+	int interval;
+	int bit_index;
+
+	ai = kzalloc(sizeof(struct uwb_rsv_alloc_info), GFP_KERNEL);
+	
+	ai->min_mas = rsv->min_mas;
+	ai->max_mas = rsv->max_mas;
+	ai->max_interval = rsv->max_interval;
+
+
+	/* fill the not available vector from the available bm */
+	for (bit_index = 0; bit_index < UWB_NUM_MAS; bit_index++) {
+		if (!test_bit(bit_index, available->bm))
+			ai->bm[bit_index] = UWB_RSV_MAS_NOT_AVAIL;
+	}
+
+	if (ai->max_interval == 1) {
+		get_row_descriptors(ai);
+		if (uwb_rsv_find_best_row_alloc(ai) == UWB_RSV_ALLOC_FOUND)
+			goto alloc_found;
+		else
+			goto alloc_not_found;
+	}
+
+	get_column_descriptors(ai);
+        
+	for (interval = 16; interval >= 2; interval>>=1) {
+		if (interval > ai->max_interval)
+			continue;
+		if (uwb_rsv_find_best_col_alloc(ai, interval) == UWB_RSV_ALLOC_FOUND)
+			goto alloc_found;
+	}
+
+	/* try row reservation if no column is found */
+	get_row_descriptors(ai);
+	if (uwb_rsv_find_best_row_alloc(ai) == UWB_RSV_ALLOC_FOUND)
+		goto alloc_found;
+	else
+		goto alloc_not_found;
+
+  alloc_found:
+	bitmap_zero(result->bm, UWB_NUM_MAS);
+	bitmap_zero(result->unsafe_bm, UWB_NUM_MAS);
+	/* fill the safe and unsafe bitmaps */
+	for (bit_index = 0; bit_index < UWB_NUM_MAS; bit_index++) {
+		if (ai->bm[bit_index] == UWB_RSV_MAS_SAFE)
+			set_bit(bit_index, result->bm);
+		else if (ai->bm[bit_index] == UWB_RSV_MAS_UNSAFE)
+			set_bit(bit_index, result->unsafe_bm);
+	}
+	bitmap_or(result->bm, result->bm, result->unsafe_bm, UWB_NUM_MAS);
+
+	result->safe   = ai->safe_allocated_mases;
+	result->unsafe = ai->unsafe_allocated_mases;
+	
+	kfree(ai);		
+	return UWB_RSV_ALLOC_FOUND;
+  
+  alloc_not_found:
+	kfree(ai);
+	return UWB_RSV_ALLOC_NOT_FOUND;
+}
diff --git a/drivers/uwb/drp-avail.c b/drivers/uwb/drp-avail.c
index 3febd8552808..40a540a5a72e 100644
--- a/drivers/uwb/drp-avail.c
+++ b/drivers/uwb/drp-avail.c
@@ -58,7 +58,7 @@ void uwb_drp_avail_init(struct uwb_rc *rc)
  *
  * avail = global & local & pending
  */
-static void uwb_drp_available(struct uwb_rc *rc, struct uwb_mas_bm *avail)
+void uwb_drp_available(struct uwb_rc *rc, struct uwb_mas_bm *avail)
 {
 	bitmap_and(avail->bm, rc->drp_avail.global, rc->drp_avail.local, UWB_NUM_MAS);
 	bitmap_and(avail->bm, avail->bm, rc->drp_avail.pending, UWB_NUM_MAS);
@@ -105,6 +105,7 @@ void uwb_drp_avail_release(struct uwb_rc *rc, struct uwb_mas_bm *mas)
 	bitmap_or(rc->drp_avail.local, rc->drp_avail.local, mas->bm, UWB_NUM_MAS);
 	bitmap_or(rc->drp_avail.pending, rc->drp_avail.pending, mas->bm, UWB_NUM_MAS);
 	rc->drp_avail.ie_valid = false;
+	uwb_rsv_handle_drp_avail_change(rc);
 }
 
 /**
@@ -280,6 +281,7 @@ int uwbd_evt_handle_rc_drp_avail(struct uwb_event *evt)
 	mutex_lock(&rc->rsvs_mutex);
 	bitmap_copy(rc->drp_avail.global, bmp, UWB_NUM_MAS);
 	rc->drp_avail.ie_valid = false;
+	uwb_rsv_handle_drp_avail_change(rc);
 	mutex_unlock(&rc->rsvs_mutex);
 
 	uwb_rsv_sched_update(rc);
diff --git a/drivers/uwb/drp-ie.c b/drivers/uwb/drp-ie.c
index 75491d47806b..2840d7bf9e67 100644
--- a/drivers/uwb/drp-ie.c
+++ b/drivers/uwb/drp-ie.c
@@ -22,6 +22,96 @@
 
 #include "uwb-internal.h"
 
+
+/*
+ * Return the reason code for a reservations's DRP IE.
+ */
+int uwb_rsv_reason_code(struct uwb_rsv *rsv)
+{
+	static const int reason_codes[] = {
+		[UWB_RSV_STATE_O_INITIATED]          = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_O_PENDING]            = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_O_MODIFIED]           = UWB_DRP_REASON_MODIFIED,
+		[UWB_RSV_STATE_O_ESTABLISHED]        = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_O_TO_BE_MOVED]        = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_O_MOVE_COMBINING]     = UWB_DRP_REASON_MODIFIED,
+		[UWB_RSV_STATE_O_MOVE_REDUCING]      = UWB_DRP_REASON_MODIFIED,
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_ACCEPTED]           = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_CONFLICT]           = UWB_DRP_REASON_CONFLICT,
+		[UWB_RSV_STATE_T_PENDING]            = UWB_DRP_REASON_PENDING,
+		[UWB_RSV_STATE_T_DENIED]             = UWB_DRP_REASON_DENIED,
+		[UWB_RSV_STATE_T_RESIZED]            = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = UWB_DRP_REASON_CONFLICT,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = UWB_DRP_REASON_PENDING,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = UWB_DRP_REASON_DENIED,
+	};
+
+	return reason_codes[rsv->state];
+}
+
+/*
+ * Return the reason code for a reservations's companion DRP IE .
+ */
+int uwb_rsv_companion_reason_code(struct uwb_rsv *rsv)
+{
+	static const int companion_reason_codes[] = {
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = UWB_DRP_REASON_CONFLICT,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = UWB_DRP_REASON_PENDING,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = UWB_DRP_REASON_DENIED,
+	};
+
+	return companion_reason_codes[rsv->state];
+}
+
+/*
+ * Return the status bit for a reservations's DRP IE.
+ */
+int uwb_rsv_status(struct uwb_rsv *rsv)
+{
+	static const int statuses[] = {
+		[UWB_RSV_STATE_O_INITIATED]          = 0,
+		[UWB_RSV_STATE_O_PENDING]            = 0,
+		[UWB_RSV_STATE_O_MODIFIED]           = 1,
+		[UWB_RSV_STATE_O_ESTABLISHED]        = 1,
+		[UWB_RSV_STATE_O_TO_BE_MOVED]        = 0,
+		[UWB_RSV_STATE_O_MOVE_COMBINING]     = 1,
+		[UWB_RSV_STATE_O_MOVE_REDUCING]      = 1,
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = 1,
+		[UWB_RSV_STATE_T_ACCEPTED]           = 1,
+		[UWB_RSV_STATE_T_CONFLICT]           = 0,
+		[UWB_RSV_STATE_T_PENDING]            = 0,
+		[UWB_RSV_STATE_T_DENIED]             = 0,
+		[UWB_RSV_STATE_T_RESIZED]            = 1,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = 1,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = 1,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = 1,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = 1,
+
+	};
+
+	return statuses[rsv->state];
+}
+
+/*
+ * Return the status bit for a reservations's companion DRP IE .
+ */
+int uwb_rsv_companion_status(struct uwb_rsv *rsv)
+{
+	static const int companion_statuses[] = {
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = 0,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = 1,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = 0,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = 0,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = 0,
+	};
+
+	return companion_statuses[rsv->state];
+}
+
 /*
  * Allocate a DRP IE.
  *
@@ -33,16 +123,12 @@
 static struct uwb_ie_drp *uwb_drp_ie_alloc(void)
 {
 	struct uwb_ie_drp *drp_ie;
-	unsigned tiebreaker;
 
 	drp_ie = kzalloc(sizeof(struct uwb_ie_drp) +
 			UWB_NUM_ZONES * sizeof(struct uwb_drp_alloc),
 			GFP_KERNEL);
 	if (drp_ie) {
 		drp_ie->hdr.element_id = UWB_IE_DRP;
-
-		get_random_bytes(&tiebreaker, sizeof(unsigned));
-		uwb_ie_drp_set_tiebreaker(drp_ie, tiebreaker & 1);
 	}
 	return drp_ie;
 }
@@ -103,43 +189,17 @@ static void uwb_drp_ie_from_bm(struct uwb_ie_drp *drp_ie,
  */
 int uwb_drp_ie_update(struct uwb_rsv *rsv)
 {
-	struct device *dev = &rsv->rc->uwb_dev.dev;
 	struct uwb_ie_drp *drp_ie;
-	int reason_code, status;
+	struct uwb_rsv_move *mv;
+	int unsafe;
 
-	switch (rsv->state) {
-	case UWB_RSV_STATE_NONE:
+	if (rsv->state == UWB_RSV_STATE_NONE) {
 		kfree(rsv->drp_ie);
 		rsv->drp_ie = NULL;
 		return 0;
-	case UWB_RSV_STATE_O_INITIATED:
-		reason_code = UWB_DRP_REASON_ACCEPTED;
-		status = 0;
-		break;
-	case UWB_RSV_STATE_O_PENDING:
-		reason_code = UWB_DRP_REASON_ACCEPTED;
-		status = 0;
-		break;
-	case UWB_RSV_STATE_O_MODIFIED:
-		reason_code = UWB_DRP_REASON_MODIFIED;
-		status = 1;
-		break;
-	case UWB_RSV_STATE_O_ESTABLISHED:
-		reason_code = UWB_DRP_REASON_ACCEPTED;
-		status = 1;
-		break;
-	case UWB_RSV_STATE_T_ACCEPTED:
-		reason_code = UWB_DRP_REASON_ACCEPTED;
-		status = 1;
-		break;
-	case UWB_RSV_STATE_T_DENIED:
-		reason_code = UWB_DRP_REASON_DENIED;
-		status = 0;
-		break;
-	default:
-		dev_dbg(dev, "rsv with unhandled state (%d)\n", rsv->state);
-		return -EINVAL;
 	}
+	
+	unsafe = rsv->mas.unsafe ? 1 : 0;
 
 	if (rsv->drp_ie == NULL) {
 		rsv->drp_ie = uwb_drp_ie_alloc();
@@ -148,9 +208,11 @@ int uwb_drp_ie_update(struct uwb_rsv *rsv)
 	}
 	drp_ie = rsv->drp_ie;
 
+	uwb_ie_drp_set_unsafe(drp_ie,       unsafe);
+	uwb_ie_drp_set_tiebreaker(drp_ie,   rsv->tiebreaker);
 	uwb_ie_drp_set_owner(drp_ie,        uwb_rsv_is_owner(rsv));
-	uwb_ie_drp_set_status(drp_ie,       status);
-	uwb_ie_drp_set_reason_code(drp_ie,  reason_code);
+	uwb_ie_drp_set_status(drp_ie,       uwb_rsv_status(rsv));
+	uwb_ie_drp_set_reason_code(drp_ie,  uwb_rsv_reason_code(rsv));
 	uwb_ie_drp_set_stream_index(drp_ie, rsv->stream);
 	uwb_ie_drp_set_type(drp_ie,         rsv->type);
 
@@ -168,6 +230,27 @@ int uwb_drp_ie_update(struct uwb_rsv *rsv)
 
 	uwb_drp_ie_from_bm(drp_ie, &rsv->mas);
 
+	if (uwb_rsv_has_two_drp_ies(rsv)) {
+		mv = &rsv->mv; 
+		if (mv->companion_drp_ie == NULL) {
+			mv->companion_drp_ie = uwb_drp_ie_alloc();
+			if (mv->companion_drp_ie == NULL)
+				return -ENOMEM;
+		}
+		drp_ie = mv->companion_drp_ie;
+		
+		/* keep all the same configuration of the main drp_ie */
+		memcpy(drp_ie, rsv->drp_ie, sizeof(struct uwb_ie_drp));
+		
+
+		/* FIXME: handle properly the unsafe bit */
+		uwb_ie_drp_set_unsafe(drp_ie,       1);
+		uwb_ie_drp_set_status(drp_ie,       uwb_rsv_companion_status(rsv));
+		uwb_ie_drp_set_reason_code(drp_ie,  uwb_rsv_companion_reason_code(rsv));
+	
+		uwb_drp_ie_from_bm(drp_ie, &mv->companion_mas);
+	}
+
 	rsv->ie_valid = true;
 	return 0;
 }
@@ -218,6 +301,8 @@ void uwb_drp_ie_to_bm(struct uwb_mas_bm *bm, const struct uwb_ie_drp *drp_ie)
 	u8 zone;
 	u16 zone_mask;
 
+	bitmap_zero(bm->bm, UWB_NUM_MAS);
+
 	for (cnt = 0; cnt < numallocs; cnt++) {
 		alloc = &drp_ie->allocs[cnt];
 		zone_bm = le16_to_cpu(alloc->zone_bm);
@@ -229,3 +314,4 @@ void uwb_drp_ie_to_bm(struct uwb_mas_bm *bm, const struct uwb_ie_drp *drp_ie)
 		}
 	}
 }
+
diff --git a/drivers/uwb/drp.c b/drivers/uwb/drp.c
index fe328146adb7..2b4f9406789d 100644
--- a/drivers/uwb/drp.c
+++ b/drivers/uwb/drp.c
@@ -23,6 +23,59 @@
 #include <linux/delay.h>
 #include "uwb-internal.h"
 
+
+/* DRP Conflict Actions ([ECMA-368 2nd Edition] 17.4.6) */
+enum uwb_drp_conflict_action {
+	/* Reservation is mantained, no action needed */
+	UWB_DRP_CONFLICT_MANTAIN = 0,
+	
+	/* the device shall not transmit frames in conflicting MASs in
+	 * the following superframe. If the device is the reservation
+	 * target, it shall also set the Reason Code in its DRP IE to
+	 * Conflict in its beacon in the following superframe.
+	 */
+	UWB_DRP_CONFLICT_ACT1,
+	
+	/* the device shall not set the Reservation Status bit to ONE
+	 * and shall not transmit frames in conflicting MASs. If the
+	 * device is the reservation target, it shall also set the
+	 * Reason Code in its DRP IE to Conflict.
+	 */	
+	UWB_DRP_CONFLICT_ACT2,
+
+	/* the device shall not transmit frames in conflicting MASs in
+	 * the following superframe. It shall remove the conflicting
+	 * MASs from the reservation or set the Reservation Status to
+	 * ZERO in its beacon in the following superframe. If the
+	 * device is the reservation target, it shall also set the
+	 * Reason Code in its DRP IE to Conflict.
+	 */
+	UWB_DRP_CONFLICT_ACT3,
+};
+
+
+static void uwb_rc_set_drp_cmd_done(struct uwb_rc *rc, void *arg,
+				    struct uwb_rceb *reply, ssize_t reply_size)
+{
+	struct uwb_rc_evt_set_drp_ie *r = (struct uwb_rc_evt_set_drp_ie *)reply;
+
+	if (r != NULL) {
+		if (r->bResultCode != UWB_RC_RES_SUCCESS)
+			dev_err(&rc->uwb_dev.dev, "SET-DRP-IE failed: %s (%d)\n",
+				uwb_rc_strerror(r->bResultCode), r->bResultCode);
+	} else
+		dev_err(&rc->uwb_dev.dev, "SET-DRP-IE: timeout\n");
+
+	spin_lock(&rc->rsvs_lock);
+	if (rc->set_drp_ie_pending > 1) {
+		rc->set_drp_ie_pending = 0;
+		uwb_rsv_queue_update(rc);	
+	} else {
+		rc->set_drp_ie_pending = 0;	
+	}
+	spin_unlock(&rc->rsvs_lock);
+}
+
 /**
  * Construct and send the SET DRP IE
  *
@@ -46,18 +99,23 @@
 int uwb_rc_send_all_drp_ie(struct uwb_rc *rc)
 {
 	int result;
-	struct device *dev = &rc->uwb_dev.dev;
 	struct uwb_rc_cmd_set_drp_ie *cmd;
-	struct uwb_rc_evt_set_drp_ie reply;
 	struct uwb_rsv *rsv;
+	struct uwb_rsv_move *mv;
 	int num_bytes = 0;
 	u8 *IEDataptr;
 
 	result = -ENOMEM;
 	/* First traverse all reservations to determine memory needed. */
 	list_for_each_entry(rsv, &rc->reservations, rc_node) {
-		if (rsv->drp_ie != NULL)
+		if (rsv->drp_ie != NULL) {
 			num_bytes += rsv->drp_ie->hdr.length + 2;
+			if (uwb_rsv_has_two_drp_ies(rsv) &&
+				(rsv->mv.companion_drp_ie != NULL)) {
+				mv = &rsv->mv;
+				num_bytes += mv->companion_drp_ie->hdr.length + 2;	
+			}
+		}
 	}
 	num_bytes += sizeof(rc->drp_avail.ie);
 	cmd = kzalloc(sizeof(*cmd) + num_bytes, GFP_KERNEL);
@@ -68,109 +126,322 @@ int uwb_rc_send_all_drp_ie(struct uwb_rc *rc)
 	cmd->wIELength = num_bytes;
 	IEDataptr = (u8 *)&cmd->IEData[0];
 
+	/* FIXME: DRV avail IE is not always needed */
+	/* put DRP avail IE first */
+	memcpy(IEDataptr, &rc->drp_avail.ie, sizeof(rc->drp_avail.ie));
+	IEDataptr += sizeof(struct uwb_ie_drp_avail);
+
 	/* Next traverse all reservations to place IEs in allocated memory. */
 	list_for_each_entry(rsv, &rc->reservations, rc_node) {
 		if (rsv->drp_ie != NULL) {
 			memcpy(IEDataptr, rsv->drp_ie,
 			       rsv->drp_ie->hdr.length + 2);
 			IEDataptr += rsv->drp_ie->hdr.length + 2;
+			
+			if (uwb_rsv_has_two_drp_ies(rsv) &&
+				(rsv->mv.companion_drp_ie != NULL)) {
+				mv = &rsv->mv;
+				memcpy(IEDataptr, mv->companion_drp_ie,
+				       mv->companion_drp_ie->hdr.length + 2);
+				IEDataptr += mv->companion_drp_ie->hdr.length + 2;	
+			}
 		}
 	}
-	memcpy(IEDataptr, &rc->drp_avail.ie, sizeof(rc->drp_avail.ie));
 
-	reply.rceb.bEventType = UWB_RC_CET_GENERAL;
-	reply.rceb.wEvent = UWB_RC_CMD_SET_DRP_IE;
-	result = uwb_rc_cmd(rc, "SET-DRP-IE", &cmd->rccb,
-			sizeof(*cmd) + num_bytes, &reply.rceb,
-			sizeof(reply));
-	if (result < 0)
-		goto error_cmd;
-	result = le16_to_cpu(reply.wRemainingSpace);
-	if (reply.bResultCode != UWB_RC_RES_SUCCESS) {
-		dev_err(&rc->uwb_dev.dev, "SET-DRP-IE: command execution "
-				"failed: %s (%d). RemainingSpace in beacon "
-				"= %d\n", uwb_rc_strerror(reply.bResultCode),
-				reply.bResultCode, result);
-		result = -EIO;
-	} else {
-		dev_dbg(dev, "SET-DRP-IE sent. RemainingSpace in beacon "
-			     "= %d.\n", result);
-		result = 0;
-	}
-error_cmd:
+	result = uwb_rc_cmd_async(rc, "SET-DRP-IE", &cmd->rccb, sizeof(*cmd) + num_bytes,
+				  UWB_RC_CET_GENERAL, UWB_RC_CMD_SET_DRP_IE,
+				  uwb_rc_set_drp_cmd_done, NULL);
+	
+	rc->set_drp_ie_pending = 1;
+
 	kfree(cmd);
 error:
 	return result;
 }
 
-void uwb_drp_handle_timeout(struct uwb_rsv *rsv)
+/*
+ * Evaluate the action to perform using conflict resolution rules
+ *
+ * Return a uwb_drp_conflict_action.
+ */
+static int evaluate_conflict_action(struct uwb_ie_drp *ext_drp_ie, int ext_beacon_slot,
+				    struct uwb_rsv *rsv, int our_status)
 {
-	struct device *dev = &rsv->rc->uwb_dev.dev;
+	int our_tie_breaker = rsv->tiebreaker;
+	int our_type        = rsv->type;
+	int our_beacon_slot = rsv->rc->uwb_dev.beacon_slot;
+
+	int ext_tie_breaker = uwb_ie_drp_tiebreaker(ext_drp_ie);
+	int ext_status      = uwb_ie_drp_status(ext_drp_ie);
+	int ext_type        = uwb_ie_drp_type(ext_drp_ie);
+	
+	
+	/* [ECMA-368 2nd Edition] 17.4.6 */
+	if (ext_type == UWB_DRP_TYPE_PCA && our_type == UWB_DRP_TYPE_PCA) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
+
+	/* [ECMA-368 2nd Edition] 17.4.6-1 */
+	if (our_type == UWB_DRP_TYPE_ALIEN_BP) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
+	
+	/* [ECMA-368 2nd Edition] 17.4.6-2 */
+	if (ext_type == UWB_DRP_TYPE_ALIEN_BP) {
+		/* here we know our_type != UWB_DRP_TYPE_ALIEN_BP */
+		return UWB_DRP_CONFLICT_ACT1;
+	}
+
+	/* [ECMA-368 2nd Edition] 17.4.6-3 */
+	if (our_status == 0 && ext_status == 1) {
+		return UWB_DRP_CONFLICT_ACT2;
+	}
 
-	dev_dbg(dev, "reservation timeout in state %s (%d)\n",
-		uwb_rsv_state_str(rsv->state), rsv->state);
+	/* [ECMA-368 2nd Edition] 17.4.6-4 */
+	if (our_status == 1 && ext_status == 0) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
 
-	switch (rsv->state) {
-	case UWB_RSV_STATE_O_INITIATED:
-		if (rsv->is_multicast) {
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
-			return;
+	/* [ECMA-368 2nd Edition] 17.4.6-5a */
+	if (our_tie_breaker == ext_tie_breaker &&
+	    our_beacon_slot <  ext_beacon_slot) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
+
+	/* [ECMA-368 2nd Edition] 17.4.6-5b */
+	if (our_tie_breaker != ext_tie_breaker &&
+	    our_beacon_slot >  ext_beacon_slot) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
+	
+	if (our_status == 0) {
+		if (our_tie_breaker == ext_tie_breaker) {
+			/* [ECMA-368 2nd Edition] 17.4.6-6a */
+			if (our_beacon_slot > ext_beacon_slot) {
+				return UWB_DRP_CONFLICT_ACT2;
+			}
+		} else  {
+			/* [ECMA-368 2nd Edition] 17.4.6-6b */
+			if (our_beacon_slot < ext_beacon_slot) {
+				return UWB_DRP_CONFLICT_ACT2;
+			}
 		}
-		break;
-	case UWB_RSV_STATE_O_ESTABLISHED:
-		if (rsv->is_multicast)
-			return;
-		break;
-	default:
-		break;
+	} else {
+		if (our_tie_breaker == ext_tie_breaker) {
+			/* [ECMA-368 2nd Edition] 17.4.6-7a */
+			if (our_beacon_slot > ext_beacon_slot) {
+				return UWB_DRP_CONFLICT_ACT3;
+			}
+		} else {
+			/* [ECMA-368 2nd Edition] 17.4.6-7b */
+			if (our_beacon_slot < ext_beacon_slot) {
+				return UWB_DRP_CONFLICT_ACT3;
+			}
+		}
+	}
+	return UWB_DRP_CONFLICT_MANTAIN;
+}
+
+static void handle_conflict_normal(struct uwb_ie_drp *drp_ie, 
+				   int ext_beacon_slot, 
+				   struct uwb_rsv *rsv, 
+				   struct uwb_mas_bm *conflicting_mas)
+{
+	struct uwb_rc *rc = rsv->rc;
+	struct uwb_rsv_move *mv = &rsv->mv;
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	int action;
+
+	action = evaluate_conflict_action(drp_ie, ext_beacon_slot, rsv, uwb_rsv_status(rsv));
+
+	if (uwb_rsv_is_owner(rsv)) {
+		switch(action) {
+		case UWB_DRP_CONFLICT_ACT2:
+			/* try move */
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_TO_BE_MOVED);
+			if (bow->can_reserve_extra_mases == false)
+				uwb_rsv_backoff_win_increment(rc);
+			
+			break;
+		case UWB_DRP_CONFLICT_ACT3:
+			uwb_rsv_backoff_win_increment(rc);
+			/* drop some mases with reason modified */
+			/* put in the companion the mases to be dropped */
+			bitmap_and(mv->companion_mas.bm, rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS);
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED);
+		default:
+			break;
+		}
+	} else {
+		switch(action) {
+		case UWB_DRP_CONFLICT_ACT2:
+		case UWB_DRP_CONFLICT_ACT3:
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT);	
+		default:
+			break;
+		}
+
+	}
+	
+}
+
+static void handle_conflict_expanding(struct uwb_ie_drp *drp_ie, int ext_beacon_slot,
+				      struct uwb_rsv *rsv, bool companion_only,
+				      struct uwb_mas_bm *conflicting_mas)
+{
+	struct uwb_rc *rc = rsv->rc;
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	struct uwb_rsv_move *mv = &rsv->mv;
+	int action;
+	
+	if (companion_only) {
+		/* status of companion is 0 at this point */
+		action = evaluate_conflict_action(drp_ie, ext_beacon_slot, rsv, 0);
+		if (uwb_rsv_is_owner(rsv)) {
+			switch(action) {
+			case UWB_DRP_CONFLICT_ACT2:
+			case UWB_DRP_CONFLICT_ACT3:
+				uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+				rsv->needs_release_companion_mas = false;
+				if (bow->can_reserve_extra_mases == false)
+					uwb_rsv_backoff_win_increment(rc);
+				uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
+			}
+		} else { /* rsv is target */			
+			switch(action) {
+			case UWB_DRP_CONFLICT_ACT2:
+			case UWB_DRP_CONFLICT_ACT3:
+				uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_EXPANDING_CONFLICT);
+                                /* send_drp_avail_ie = true; */
+			}
+		}
+	} else { /* also base part of the reservation is conflicting */		
+		if (uwb_rsv_is_owner(rsv)) {
+			uwb_rsv_backoff_win_increment(rc);
+			/* remove companion part */
+			uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
+
+			/* drop some mases with reason modified */
+
+			/* put in the companion the mases to be dropped */
+			bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS);
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED);
+		} else { /* it is a target rsv */
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT);
+                        /* send_drp_avail_ie = true; */
+		}
+	}
+}
+
+static void uwb_drp_handle_conflict_rsv(struct uwb_rc *rc, struct uwb_rsv *rsv,
+					struct uwb_rc_evt_drp *drp_evt, 
+					struct uwb_ie_drp *drp_ie,
+					struct uwb_mas_bm *conflicting_mas)
+{
+	struct uwb_rsv_move *mv;
+
+	/* check if the conflicting reservation has two drp_ies */
+	if (uwb_rsv_has_two_drp_ies(rsv)) {
+		mv = &rsv->mv;
+		if (bitmap_intersects(rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS)) {
+			handle_conflict_expanding(drp_ie, drp_evt->beacon_slot_number,
+						  rsv, false, conflicting_mas);
+		} else {
+			if (bitmap_intersects(mv->companion_mas.bm, conflicting_mas->bm, UWB_NUM_MAS)) {
+				handle_conflict_expanding(drp_ie, drp_evt->beacon_slot_number,
+							  rsv, true, conflicting_mas);	
+			}
+		}
+	} else if (bitmap_intersects(rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS)) {
+		handle_conflict_normal(drp_ie, drp_evt->beacon_slot_number, rsv, conflicting_mas);
 	}
-	uwb_rsv_remove(rsv);
 }
 
+static void uwb_drp_handle_all_conflict_rsv(struct uwb_rc *rc,
+					    struct uwb_rc_evt_drp *drp_evt, 
+					    struct uwb_ie_drp *drp_ie,
+					    struct uwb_mas_bm *conflicting_mas)
+{
+	struct uwb_rsv *rsv;
+	
+	list_for_each_entry(rsv, &rc->reservations, rc_node) {
+		uwb_drp_handle_conflict_rsv(rc, rsv, drp_evt, drp_ie, conflicting_mas);	
+	}
+}
+	
 /*
  * Based on the DRP IE, transition a target reservation to a new
  * state.
  */
 static void uwb_drp_process_target(struct uwb_rc *rc, struct uwb_rsv *rsv,
-				   struct uwb_ie_drp *drp_ie)
+				   struct uwb_ie_drp *drp_ie, struct uwb_rc_evt_drp *drp_evt)
 {
 	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_rsv_move *mv = &rsv->mv;
 	int status;
 	enum uwb_drp_reason reason_code;
-
+	struct uwb_mas_bm mas;
+	
 	status = uwb_ie_drp_status(drp_ie);
 	reason_code = uwb_ie_drp_reason_code(drp_ie);
+	uwb_drp_ie_to_bm(&mas, drp_ie);
 
-	if (status) {
-		switch (reason_code) {
-		case UWB_DRP_REASON_ACCEPTED:
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED);
-			break;
-		case UWB_DRP_REASON_MODIFIED:
-			dev_err(dev, "FIXME: unhandled reason code (%d/%d)\n",
-				reason_code, status);
+	switch (reason_code) {
+	case UWB_DRP_REASON_ACCEPTED:
+
+		if (rsv->state == UWB_RSV_STATE_T_CONFLICT) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT);
 			break;
-		default:
-			dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
-				 reason_code, status);
 		}
-	} else {
-		switch (reason_code) {
-		case UWB_DRP_REASON_ACCEPTED:
-			/* New reservations are handled in uwb_rsv_find(). */
-			break;
-		case UWB_DRP_REASON_DENIED:
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
-			break;
-		case UWB_DRP_REASON_CONFLICT:
-		case UWB_DRP_REASON_MODIFIED:
-			dev_err(dev, "FIXME: unhandled reason code (%d/%d)\n",
-				reason_code, status);
+
+		if (rsv->state == UWB_RSV_STATE_T_EXPANDING_ACCEPTED) {
+			/* drp_ie is companion */
+			if (!bitmap_equal(rsv->mas.bm, mas.bm, UWB_NUM_MAS))
+				/* stroke companion */
+				uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_EXPANDING_ACCEPTED);	
+		} else {
+			if (!bitmap_equal(rsv->mas.bm, mas.bm, UWB_NUM_MAS)) {
+				if (uwb_drp_avail_reserve_pending(rc, &mas) == -EBUSY) {
+					/* FIXME: there is a conflict, find
+					 * the conflicting reservations and
+					 * take a sensible action. Consider
+					 * that in drp_ie there is the
+					 * "neighbour" */
+					uwb_drp_handle_all_conflict_rsv(rc, drp_evt, drp_ie, &mas);
+				} else {
+					/* accept the extra reservation */
+					bitmap_copy(mv->companion_mas.bm, mas.bm, UWB_NUM_MAS);
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_EXPANDING_ACCEPTED);
+				}
+			} else {
+				if (status) {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED);
+				}
+			}
+			
+		}
+		break;
+
+	case UWB_DRP_REASON_MODIFIED:
+		/* check to see if we have already modified the reservation */
+		if (bitmap_equal(rsv->mas.bm, mas.bm, UWB_NUM_MAS)) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED);
 			break;
-		default:
-			dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
-				 reason_code, status);
 		}
+
+		/* find if the owner wants to expand or reduce */
+		if (bitmap_subset(mas.bm, rsv->mas.bm, UWB_NUM_MAS)) {
+			/* owner is reducing */
+			bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm, mas.bm, UWB_NUM_MAS);
+			uwb_drp_avail_release(rsv->rc, &mv->companion_mas);
+		}
+
+		bitmap_copy(rsv->mas.bm, mas.bm, UWB_NUM_MAS);
+		uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_RESIZED);
+		break;
+	default:
+		dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
+			 reason_code, status);
 	}
 }
 
@@ -179,23 +450,60 @@ static void uwb_drp_process_target(struct uwb_rc *rc, struct uwb_rsv *rsv,
  * state.
  */
 static void uwb_drp_process_owner(struct uwb_rc *rc, struct uwb_rsv *rsv,
-				  struct uwb_ie_drp *drp_ie)
+				  struct uwb_dev *src, struct uwb_ie_drp *drp_ie,
+				  struct uwb_rc_evt_drp *drp_evt)
 {
 	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_rsv_move *mv = &rsv->mv;
 	int status;
 	enum uwb_drp_reason reason_code;
+	struct uwb_mas_bm mas;
 
 	status = uwb_ie_drp_status(drp_ie);
 	reason_code = uwb_ie_drp_reason_code(drp_ie);
+	uwb_drp_ie_to_bm(&mas, drp_ie);
 
 	if (status) {
 		switch (reason_code) {
 		case UWB_DRP_REASON_ACCEPTED:
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
-			break;
-		case UWB_DRP_REASON_MODIFIED:
-			dev_err(dev, "FIXME: unhandled reason code (%d/%d)\n",
-				reason_code, status);
+			switch (rsv->state) {
+			case UWB_RSV_STATE_O_PENDING:
+			case UWB_RSV_STATE_O_INITIATED:
+			case UWB_RSV_STATE_O_ESTABLISHED:
+				uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+				break;
+			case UWB_RSV_STATE_O_MODIFIED:
+				if (bitmap_equal(mas.bm, rsv->mas.bm, UWB_NUM_MAS)) {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+				} else {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED);	
+				}
+				break;
+				
+			case UWB_RSV_STATE_O_MOVE_REDUCING: /* shouldn' t be a problem */
+				if (bitmap_equal(mas.bm, rsv->mas.bm, UWB_NUM_MAS)) {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+				} else {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);	
+				}
+				break;
+			case UWB_RSV_STATE_O_MOVE_EXPANDING:
+				if (bitmap_equal(mas.bm, mv->companion_mas.bm, UWB_NUM_MAS)) {
+					/* Companion reservation accepted */
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
+				} else {
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING);
+				}
+				break;
+			case UWB_RSV_STATE_O_MOVE_COMBINING:
+				if (bitmap_equal(mas.bm, rsv->mas.bm, UWB_NUM_MAS))
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);
+				else
+					uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
+				break;
+			default:
+				break;	
+			}
 			break;
 		default:
 			dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
@@ -210,9 +518,10 @@ static void uwb_drp_process_owner(struct uwb_rc *rc, struct uwb_rsv *rsv,
 			uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
 			break;
 		case UWB_DRP_REASON_CONFLICT:
-		case UWB_DRP_REASON_MODIFIED:
-			dev_err(dev, "FIXME: unhandled reason code (%d/%d)\n",
-				reason_code, status);
+			/* resolve the conflict */
+			bitmap_complement(mas.bm, src->last_availability_bm,
+					  UWB_NUM_MAS);
+			uwb_drp_handle_conflict_rsv(rc, rsv, drp_evt, drp_ie, &mas);
 			break;
 		default:
 			dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
@@ -221,12 +530,110 @@ static void uwb_drp_process_owner(struct uwb_rc *rc, struct uwb_rsv *rsv,
 	}
 }
 
+static void uwb_cnflt_alien_stroke_timer(struct uwb_cnflt_alien *cnflt)
+{
+	unsigned timeout_us = UWB_MAX_LOST_BEACONS * UWB_SUPERFRAME_LENGTH_US;
+	mod_timer(&cnflt->timer, jiffies + usecs_to_jiffies(timeout_us));
+}
+
+static void uwb_cnflt_update_work(struct work_struct *work)
+{
+	struct uwb_cnflt_alien *cnflt = container_of(work,
+						     struct uwb_cnflt_alien,
+						     cnflt_update_work);
+	struct uwb_cnflt_alien *c;
+	struct uwb_rc *rc = cnflt->rc;
+	
+	unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE;
+	
+	mutex_lock(&rc->rsvs_mutex);
+
+	list_del(&cnflt->rc_node);
+
+	/* update rc global conflicting alien bitmap */
+	bitmap_zero(rc->cnflt_alien_bitmap.bm, UWB_NUM_MAS);
+
+	list_for_each_entry(c, &rc->cnflt_alien_list, rc_node) {
+		bitmap_or(rc->cnflt_alien_bitmap.bm, rc->cnflt_alien_bitmap.bm, c->mas.bm, UWB_NUM_MAS);			
+	}
+	
+	queue_delayed_work(rc->rsv_workq, &rc->rsv_alien_bp_work, usecs_to_jiffies(delay_us));
+
+	kfree(cnflt);
+	mutex_unlock(&rc->rsvs_mutex);
+}
+
+static void uwb_cnflt_timer(unsigned long arg)
+{
+	struct uwb_cnflt_alien *cnflt = (struct uwb_cnflt_alien *)arg;
+
+	queue_work(cnflt->rc->rsv_workq, &cnflt->cnflt_update_work);
+}
+
 /*
- * Process a received DRP IE, it's either for a reservation owned by
- * the RC or targeted at it (or it's for a WUSB cluster reservation).
+ * We have received an DRP_IE of type Alien BP and we need to make
+ * sure we do not transmit in conflicting MASs.
  */
-static void uwb_drp_process(struct uwb_rc *rc, struct uwb_dev *src,
-		     struct uwb_ie_drp *drp_ie)
+static void uwb_drp_handle_alien_drp(struct uwb_rc *rc, struct uwb_ie_drp *drp_ie)
+{
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_mas_bm mas;
+	struct uwb_cnflt_alien *cnflt;
+	char buf[72];
+	unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE;
+	
+	uwb_drp_ie_to_bm(&mas, drp_ie);
+	bitmap_scnprintf(buf, sizeof(buf), mas.bm, UWB_NUM_MAS);
+	
+	list_for_each_entry(cnflt, &rc->cnflt_alien_list, rc_node) {
+		if (bitmap_equal(cnflt->mas.bm, mas.bm, UWB_NUM_MAS)) {
+			/* Existing alien BP reservation conflicting
+			 * bitmap, just reset the timer */
+			uwb_cnflt_alien_stroke_timer(cnflt);
+			return;
+		}
+	}
+
+	/* New alien BP reservation conflicting bitmap */
+
+	/* alloc and initialize new uwb_cnflt_alien */
+	cnflt = kzalloc(sizeof(struct uwb_cnflt_alien), GFP_KERNEL);
+	if (!cnflt)
+		dev_err(dev, "failed to alloc uwb_cnflt_alien struct\n");
+	INIT_LIST_HEAD(&cnflt->rc_node);
+	init_timer(&cnflt->timer);
+	cnflt->timer.function = uwb_cnflt_timer;
+	cnflt->timer.data     = (unsigned long)cnflt;
+
+	cnflt->rc = rc;
+	INIT_WORK(&cnflt->cnflt_update_work, uwb_cnflt_update_work);
+	
+	bitmap_copy(cnflt->mas.bm, mas.bm, UWB_NUM_MAS);
+
+	list_add_tail(&cnflt->rc_node, &rc->cnflt_alien_list);
+
+	/* update rc global conflicting alien bitmap */
+	bitmap_or(rc->cnflt_alien_bitmap.bm, rc->cnflt_alien_bitmap.bm, mas.bm, UWB_NUM_MAS);
+
+	queue_delayed_work(rc->rsv_workq, &rc->rsv_alien_bp_work, usecs_to_jiffies(delay_us));
+	
+	/* start the timer */
+	uwb_cnflt_alien_stroke_timer(cnflt);
+}
+
+static void uwb_drp_process_not_involved(struct uwb_rc *rc,
+					 struct uwb_rc_evt_drp *drp_evt, 
+					 struct uwb_ie_drp *drp_ie)
+{
+	struct uwb_mas_bm mas;
+	
+	uwb_drp_ie_to_bm(&mas, drp_ie);
+	uwb_drp_handle_all_conflict_rsv(rc, drp_evt, drp_ie, &mas);
+}
+
+static void uwb_drp_process_involved(struct uwb_rc *rc, struct uwb_dev *src,
+				     struct uwb_rc_evt_drp *drp_evt,
+				     struct uwb_ie_drp *drp_ie)
 {
 	struct uwb_rsv *rsv;
 
@@ -239,7 +646,7 @@ static void uwb_drp_process(struct uwb_rc *rc, struct uwb_dev *src,
 		 */
 		return;
 	}
-
+	
 	/*
 	 * Do nothing with DRP IEs for reservations that have been
 	 * terminated.
@@ -248,13 +655,43 @@ static void uwb_drp_process(struct uwb_rc *rc, struct uwb_dev *src,
 		uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
 		return;
 	}
-
+			
 	if (uwb_ie_drp_owner(drp_ie))
-		uwb_drp_process_target(rc, rsv, drp_ie);
+		uwb_drp_process_target(rc, rsv, drp_ie, drp_evt);
+	else
+		uwb_drp_process_owner(rc, rsv, src, drp_ie, drp_evt);
+	
+}
+
+
+static bool uwb_drp_involves_us(struct uwb_rc *rc, struct uwb_ie_drp *drp_ie)
+{
+	return uwb_dev_addr_cmp(&rc->uwb_dev.dev_addr, &drp_ie->dev_addr) == 0;
+}
+
+/*
+ * Process a received DRP IE.
+ */
+static void uwb_drp_process(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt,
+			    struct uwb_dev *src, struct uwb_ie_drp *drp_ie)
+{
+	if (uwb_ie_drp_type(drp_ie) == UWB_DRP_TYPE_ALIEN_BP)
+		uwb_drp_handle_alien_drp(rc, drp_ie);
+	else if (uwb_drp_involves_us(rc, drp_ie))
+		uwb_drp_process_involved(rc, src, drp_evt, drp_ie);
 	else
-		uwb_drp_process_owner(rc, rsv, drp_ie);
+		uwb_drp_process_not_involved(rc, drp_evt, drp_ie);
 }
 
+/*
+ * Process a received DRP Availability IE
+ */
+static void uwb_drp_availability_process(struct uwb_rc *rc, struct uwb_dev *src,
+					 struct uwb_ie_drp_avail *drp_availability_ie)
+{
+	bitmap_copy(src->last_availability_bm,
+		    drp_availability_ie->bmp, UWB_NUM_MAS);
+}
 
 /*
  * Process all the DRP IEs (both DRP IEs and the DRP Availability IE)
@@ -276,10 +713,10 @@ void uwb_drp_process_all(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt,
 
 		switch (ie_hdr->element_id) {
 		case UWB_IE_DRP_AVAILABILITY:
-			/* FIXME: does something need to be done with this? */
+			uwb_drp_availability_process(rc, src_dev, (struct uwb_ie_drp_avail *)ie_hdr);
 			break;
 		case UWB_IE_DRP:
-			uwb_drp_process(rc, src_dev, (struct uwb_ie_drp *)ie_hdr);
+			uwb_drp_process(rc, drp_evt, src_dev, (struct uwb_ie_drp *)ie_hdr);
 			break;
 		default:
 			dev_warn(dev, "unexpected IE in DRP notification\n");
@@ -292,55 +729,6 @@ void uwb_drp_process_all(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt,
 			 (int)ielen);
 }
 
-
-/*
- * Go through all the DRP IEs and find the ones that conflict with our
- * reservations.
- *
- * FIXME: must resolve the conflict according the the rules in
- * [ECMA-368].
- */
-static
-void uwb_drp_process_conflict_all(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt,
-				  size_t ielen, struct uwb_dev *src_dev)
-{
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_ie_hdr *ie_hdr;
-	struct uwb_ie_drp *drp_ie;
-	void *ptr;
-
-	ptr = drp_evt->ie_data;
-	for (;;) {
-		ie_hdr = uwb_ie_next(&ptr, &ielen);
-		if (!ie_hdr)
-			break;
-
-		drp_ie = container_of(ie_hdr, struct uwb_ie_drp, hdr);
-
-		/* FIXME: check if this DRP IE conflicts. */
-	}
-
-	if (ielen > 0)
-		dev_warn(dev, "%d octets remaining in DRP notification\n",
-			 (int)ielen);
-}
-
-
-/*
- * Terminate all reservations owned by, or targeted at, 'uwb_dev'.
- */
-static void uwb_drp_terminate_all(struct uwb_rc *rc, struct uwb_dev *uwb_dev)
-{
-	struct uwb_rsv *rsv;
-
-	list_for_each_entry(rsv, &rc->reservations, rc_node) {
-		if (rsv->owner == uwb_dev
-		    || (rsv->target.type == UWB_RSV_TARGET_DEV && rsv->target.dev == uwb_dev))
-			uwb_rsv_remove(rsv);
-	}
-}
-
-
 /**
  * uwbd_evt_handle_rc_drp - handle a DRP_IE event
  * @evt: the DRP_IE event from the radio controller
@@ -381,7 +769,6 @@ int uwbd_evt_handle_rc_drp(struct uwb_event *evt)
 	size_t ielength, bytes_left;
 	struct uwb_dev_addr src_addr;
 	struct uwb_dev *src_dev;
-	int reason;
 
 	/* Is there enough data to decode the event (and any IEs in
 	   its payload)? */
@@ -417,22 +804,8 @@ int uwbd_evt_handle_rc_drp(struct uwb_event *evt)
 
 	mutex_lock(&rc->rsvs_mutex);
 
-	reason = uwb_rc_evt_drp_reason(drp_evt);
-
-	switch (reason) {
-	case UWB_DRP_NOTIF_DRP_IE_RCVD:
-		uwb_drp_process_all(rc, drp_evt, ielength, src_dev);
-		break;
-	case UWB_DRP_NOTIF_CONFLICT:
-		uwb_drp_process_conflict_all(rc, drp_evt, ielength, src_dev);
-		break;
-	case UWB_DRP_NOTIF_TERMINATE:
-		uwb_drp_terminate_all(rc, src_dev);
-		break;
-	default:
-		dev_warn(dev, "ignored DRP event with reason code: %d\n", reason);
-		break;
-	}
+	/* We do not distinguish from the reason */
+	uwb_drp_process_all(rc, drp_evt, ielength, src_dev);
 
 	mutex_unlock(&rc->rsvs_mutex);
 
diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c
index 1cd84f927540..165aec6a8f97 100644
--- a/drivers/uwb/rsv.c
+++ b/drivers/uwb/rsv.c
@@ -17,20 +17,31 @@
  */
 #include <linux/kernel.h>
 #include <linux/uwb.h>
+#include <linux/random.h>
 
 #include "uwb-internal.h"
 
 static void uwb_rsv_timer(unsigned long arg);
 
 static const char *rsv_states[] = {
-	[UWB_RSV_STATE_NONE]          = "none",
-	[UWB_RSV_STATE_O_INITIATED]   = "initiated",
-	[UWB_RSV_STATE_O_PENDING]     = "pending",
-	[UWB_RSV_STATE_O_MODIFIED]    = "modified",
-	[UWB_RSV_STATE_O_ESTABLISHED] = "established",
-	[UWB_RSV_STATE_T_ACCEPTED]    = "accepted",
-	[UWB_RSV_STATE_T_DENIED]      = "denied",
-	[UWB_RSV_STATE_T_PENDING]     = "pending",
+	[UWB_RSV_STATE_NONE]                 = "none            ",
+	[UWB_RSV_STATE_O_INITIATED]          = "o initiated     ",
+	[UWB_RSV_STATE_O_PENDING]            = "o pending       ",
+	[UWB_RSV_STATE_O_MODIFIED]           = "o modified      ",
+	[UWB_RSV_STATE_O_ESTABLISHED]        = "o established   ",
+	[UWB_RSV_STATE_O_TO_BE_MOVED]        = "o to be moved   ",
+	[UWB_RSV_STATE_O_MOVE_EXPANDING]     = "o move expanding",
+	[UWB_RSV_STATE_O_MOVE_COMBINING]     = "o move combining",
+	[UWB_RSV_STATE_O_MOVE_REDUCING]      = "o move reducing ",
+	[UWB_RSV_STATE_T_ACCEPTED]           = "t accepted      ",
+	[UWB_RSV_STATE_T_CONFLICT]           = "t conflict      ",
+	[UWB_RSV_STATE_T_PENDING]            = "t pending       ",
+	[UWB_RSV_STATE_T_DENIED]             = "t denied        ",
+	[UWB_RSV_STATE_T_RESIZED]            = "t resized       ",
+	[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = "t expanding acc ",
+	[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = "t expanding conf",
+	[UWB_RSV_STATE_T_EXPANDING_PENDING]  = "t expanding pend",
+	[UWB_RSV_STATE_T_EXPANDING_DENIED]   = "t expanding den ",
 };
 
 static const char *rsv_types[] = {
@@ -41,6 +52,31 @@ static const char *rsv_types[] = {
 	[UWB_DRP_TYPE_PCA]      = "pca",
 };
 
+bool uwb_rsv_has_two_drp_ies(struct uwb_rsv *rsv)
+{
+	static const bool has_two_drp_ies[] = {
+		[UWB_RSV_STATE_O_INITIATED]               = false,
+		[UWB_RSV_STATE_O_PENDING]                 = false,
+		[UWB_RSV_STATE_O_MODIFIED]                = false,
+		[UWB_RSV_STATE_O_ESTABLISHED]             = false,
+		[UWB_RSV_STATE_O_TO_BE_MOVED]             = false,
+		[UWB_RSV_STATE_O_MOVE_COMBINING]          = false,
+		[UWB_RSV_STATE_O_MOVE_REDUCING]           = false,
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]          = true,
+		[UWB_RSV_STATE_T_ACCEPTED]                = false,
+		[UWB_RSV_STATE_T_CONFLICT]                = false,
+		[UWB_RSV_STATE_T_PENDING]                 = false,
+		[UWB_RSV_STATE_T_DENIED]                  = false,
+		[UWB_RSV_STATE_T_RESIZED]                 = false,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED]      = true,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT]      = true,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]       = true,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]        = true,
+	};
+
+	return has_two_drp_ies[rsv->state];
+}
+
 /**
  * uwb_rsv_state_str - return a string for a reservation state
  * @state: the reservation state.
@@ -65,7 +101,7 @@ const char *uwb_rsv_type_str(enum uwb_drp_type type)
 }
 EXPORT_SYMBOL_GPL(uwb_rsv_type_str);
 
-static void uwb_rsv_dump(struct uwb_rsv *rsv)
+void uwb_rsv_dump(char *text, struct uwb_rsv *rsv)
 {
 	struct device *dev = &rsv->rc->uwb_dev.dev;
 	struct uwb_dev_addr devaddr;
@@ -88,12 +124,12 @@ static void uwb_rsv_release(struct kref *kref)
 	kfree(rsv);
 }
 
-static void uwb_rsv_get(struct uwb_rsv *rsv)
+void uwb_rsv_get(struct uwb_rsv *rsv)
 {
 	kref_get(&rsv->kref);
 }
 
-static void uwb_rsv_put(struct uwb_rsv *rsv)
+void uwb_rsv_put(struct uwb_rsv *rsv)
 {
 	kref_put(&rsv->kref, uwb_rsv_release);
 }
@@ -108,6 +144,7 @@ static void uwb_rsv_put(struct uwb_rsv *rsv)
 static int uwb_rsv_get_stream(struct uwb_rsv *rsv)
 {
 	struct uwb_rc *rc = rsv->rc;
+	struct device *dev = &rc->uwb_dev.dev;
 	unsigned long *streams_bm;
 	int stream;
 
@@ -129,12 +166,15 @@ static int uwb_rsv_get_stream(struct uwb_rsv *rsv)
 	rsv->stream = stream;
 	set_bit(stream, streams_bm);
 
+	dev_dbg(dev, "get stream %d\n", rsv->stream);
+
 	return 0;
 }
 
 static void uwb_rsv_put_stream(struct uwb_rsv *rsv)
 {
 	struct uwb_rc *rc = rsv->rc;
+	struct device *dev = &rc->uwb_dev.dev;
 	unsigned long *streams_bm;
 
 	switch (rsv->target.type) {
@@ -149,86 +189,52 @@ static void uwb_rsv_put_stream(struct uwb_rsv *rsv)
 	}
 
 	clear_bit(rsv->stream, streams_bm);
+
+	dev_dbg(dev, "put stream %d\n", rsv->stream);
 }
 
-/*
- * Generate a MAS allocation with a single row component.
- */
-static void uwb_rsv_gen_alloc_row(struct uwb_mas_bm *mas,
-				  int first_mas, int mas_per_zone,
-				  int zs, int ze)
+void uwb_rsv_backoff_win_timer(unsigned long arg)
 {
-	struct uwb_mas_bm col;
-	int z;
+	struct uwb_drp_backoff_win *bow = (struct uwb_drp_backoff_win *)arg;
+	struct uwb_rc *rc = container_of(bow, struct uwb_rc, bow);
+	struct device *dev = &rc->uwb_dev.dev;
 
-	bitmap_zero(mas->bm, UWB_NUM_MAS);
-	bitmap_zero(col.bm, UWB_NUM_MAS);
-	bitmap_fill(col.bm, mas_per_zone);
-	bitmap_shift_left(col.bm, col.bm, first_mas + zs * UWB_MAS_PER_ZONE, UWB_NUM_MAS);
-
-	for (z = zs; z <= ze; z++) {
-		bitmap_or(mas->bm, mas->bm, col.bm, UWB_NUM_MAS);
-		bitmap_shift_left(col.bm, col.bm, UWB_MAS_PER_ZONE, UWB_NUM_MAS);
+	bow->can_reserve_extra_mases = true;
+	if (bow->total_expired <= 4) {
+		bow->total_expired++;
+	} else {
+		/* after 4 backoff window has expired we can exit from
+		 * the backoff procedure */
+		bow->total_expired = 0;
+		bow->window = UWB_DRP_BACKOFF_WIN_MIN >> 1;
 	}
+	dev_dbg(dev, "backoff_win_timer total_expired=%d, n=%d\n: ", bow->total_expired, bow->n);
+
+	/* try to relocate all the "to be moved" relocations */
+	uwb_rsv_handle_drp_avail_change(rc);
 }
 
-/*
- * Allocate some MAS for this reservation based on current local
- * availability, the reservation parameters (max_mas, min_mas,
- * sparsity), and the WiMedia rules for MAS allocations.
- *
- * Returns -EBUSY is insufficient free MAS are available.
- *
- * FIXME: to simplify this, only safe reservations with a single row
- * component in zones 1 to 15 are tried (zone 0 is skipped to avoid
- * problems with the MAS reserved for the BP).
- *
- * [ECMA-368] section B.2.
- */
-static int uwb_rsv_alloc_mas(struct uwb_rsv *rsv)
+void uwb_rsv_backoff_win_increment(struct uwb_rc *rc)
 {
-	static const int safe_mas_in_row[UWB_NUM_ZONES] = {
-		8, 7, 6, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 1,
-	};
-	int n, r;
-	struct uwb_mas_bm mas;
-	bool found = false;
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	struct device *dev = &rc->uwb_dev.dev;
+	unsigned timeout_us;
 
-	/*
-	 * Search all valid safe allocations until either: too few MAS
-	 * are available; or the smallest allocation with sufficient
-	 * MAS is found.
-	 *
-	 * The top of the zones are preferred, so space for larger
-	 * allocations is available in the bottom of the zone (e.g., a
-	 * 15 MAS allocation should start in row 14 leaving space for
-	 * a 120 MAS allocation at row 0).
-	 */
-	for (n = safe_mas_in_row[0]; n >= 1; n--) {
-		int num_mas;
+	dev_dbg(dev, "backoff_win_increment: window=%d\n", bow->window);
 
-		num_mas = n * (UWB_NUM_ZONES - 1);
-		if (num_mas < rsv->min_mas)
-			break;
-		if (found && num_mas < rsv->max_mas)
-			break;
+	bow->can_reserve_extra_mases = false;
 
-		for (r = UWB_MAS_PER_ZONE-1;  r >= 0; r--) {
-			if (safe_mas_in_row[r] < n)
-				continue;
-			uwb_rsv_gen_alloc_row(&mas, r, n, 1, UWB_NUM_ZONES);
-			if (uwb_drp_avail_reserve_pending(rsv->rc, &mas) == 0) {
-				found = true;
-				break;
-			}
-		}
-	}
+	if((bow->window << 1) == UWB_DRP_BACKOFF_WIN_MAX)
+		return;
 
-	if (!found)
-		return -EBUSY;
+	bow->window <<= 1;
+	bow->n = random32() & (bow->window - 1);
+	dev_dbg(dev, "new_window=%d, n=%d\n: ", bow->window, bow->n);
 
-	bitmap_copy(rsv->mas.bm, mas.bm, UWB_NUM_MAS);
-	return 0;
+	/* reset the timer associated variables */
+	timeout_us = bow->n * UWB_SUPERFRAME_LENGTH_US;
+	bow->total_expired = 0;
+	mod_timer(&bow->timer, jiffies + usecs_to_jiffies(timeout_us));		
 }
 
 static void uwb_rsv_stroke_timer(struct uwb_rsv *rsv)
@@ -241,13 +247,16 @@ static void uwb_rsv_stroke_timer(struct uwb_rsv *rsv)
 	 * received.
 	 */
 	if (rsv->is_multicast) {
-		if (rsv->state == UWB_RSV_STATE_O_INITIATED)
+		if (rsv->state == UWB_RSV_STATE_O_INITIATED
+		    || rsv->state == UWB_RSV_STATE_O_MOVE_EXPANDING
+		    || rsv->state == UWB_RSV_STATE_O_MOVE_COMBINING
+		    || rsv->state == UWB_RSV_STATE_O_MOVE_REDUCING)
 			sframes = 1;
 		if (rsv->state == UWB_RSV_STATE_O_ESTABLISHED)
 			sframes = 0;
+		
 	}
 
-	rsv->expired = false;
 	if (sframes > 0) {
 		/*
 		 * Add an additional 2 superframes to account for the
@@ -269,7 +278,7 @@ static void uwb_rsv_state_update(struct uwb_rsv *rsv,
 	rsv->state = new_state;
 	rsv->ie_valid = false;
 
-	uwb_rsv_dump(rsv);
+	uwb_rsv_dump("SU", rsv);
 
 	uwb_rsv_stroke_timer(rsv);
 	uwb_rsv_sched_update(rsv->rc);
@@ -283,10 +292,17 @@ static void uwb_rsv_callback(struct uwb_rsv *rsv)
 
 void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state)
 {
+	struct uwb_rsv_move *mv = &rsv->mv;
+
 	if (rsv->state == new_state) {
 		switch (rsv->state) {
 		case UWB_RSV_STATE_O_ESTABLISHED:
+		case UWB_RSV_STATE_O_MOVE_EXPANDING:
+		case UWB_RSV_STATE_O_MOVE_COMBINING:
+		case UWB_RSV_STATE_O_MOVE_REDUCING:
 		case UWB_RSV_STATE_T_ACCEPTED:
+		case UWB_RSV_STATE_T_EXPANDING_ACCEPTED:
+		case UWB_RSV_STATE_T_RESIZED:
 		case UWB_RSV_STATE_NONE:
 			uwb_rsv_stroke_timer(rsv);
 			break;
@@ -298,11 +314,10 @@ void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state)
 		return;
 	}
 
+	uwb_rsv_dump("SC", rsv);
+
 	switch (new_state) {
 	case UWB_RSV_STATE_NONE:
-		uwb_drp_avail_release(rsv->rc, &rsv->mas);
-		if (uwb_rsv_is_owner(rsv))
-			uwb_rsv_put_stream(rsv);
 		uwb_rsv_state_update(rsv, UWB_RSV_STATE_NONE);
 		uwb_rsv_callback(rsv);
 		break;
@@ -312,12 +327,45 @@ void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state)
 	case UWB_RSV_STATE_O_PENDING:
 		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_PENDING);
 		break;
+	case UWB_RSV_STATE_O_MODIFIED:
+		/* in the companion there are the MASes to drop */
+		bitmap_andnot(rsv->mas.bm, rsv->mas.bm, mv->companion_mas.bm, UWB_NUM_MAS);
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MODIFIED);
+		break;
 	case UWB_RSV_STATE_O_ESTABLISHED:
+		if (rsv->state == UWB_RSV_STATE_O_MODIFIED
+		    || rsv->state == UWB_RSV_STATE_O_MOVE_REDUCING) {
+			uwb_drp_avail_release(rsv->rc, &mv->companion_mas);
+			rsv->needs_release_companion_mas = false;
+		}
 		uwb_drp_avail_reserve(rsv->rc, &rsv->mas);
 		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_ESTABLISHED);
 		uwb_rsv_callback(rsv);
 		break;
+	case UWB_RSV_STATE_O_MOVE_EXPANDING:
+		rsv->needs_release_companion_mas = true;
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING);
+		break;
+	case UWB_RSV_STATE_O_MOVE_COMBINING:
+		rsv->needs_release_companion_mas = false;
+		uwb_drp_avail_reserve(rsv->rc, &mv->companion_mas);
+		bitmap_or(rsv->mas.bm, rsv->mas.bm, mv->companion_mas.bm, UWB_NUM_MAS);
+		rsv->mas.safe   += mv->companion_mas.safe;
+		rsv->mas.unsafe += mv->companion_mas.unsafe;
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
+		break;
+	case UWB_RSV_STATE_O_MOVE_REDUCING:
+		bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS);
+		rsv->needs_release_companion_mas = true;
+		rsv->mas.safe   = mv->final_mas.safe;
+		rsv->mas.unsafe = mv->final_mas.unsafe;
+		bitmap_copy(rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS);
+		bitmap_copy(rsv->mas.unsafe_bm, mv->final_mas.unsafe_bm, UWB_NUM_MAS);
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);
+		break;
 	case UWB_RSV_STATE_T_ACCEPTED:
+	case UWB_RSV_STATE_T_RESIZED:
+		rsv->needs_release_companion_mas = false;
 		uwb_drp_avail_reserve(rsv->rc, &rsv->mas);
 		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_ACCEPTED);
 		uwb_rsv_callback(rsv);
@@ -325,12 +373,82 @@ void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state)
 	case UWB_RSV_STATE_T_DENIED:
 		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_DENIED);
 		break;
+	case UWB_RSV_STATE_T_CONFLICT:
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_CONFLICT);
+		break;
+	case UWB_RSV_STATE_T_PENDING:
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_PENDING);
+		break;
+	case UWB_RSV_STATE_T_EXPANDING_ACCEPTED:
+		rsv->needs_release_companion_mas = true;
+		uwb_drp_avail_reserve(rsv->rc, &mv->companion_mas);
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_EXPANDING_ACCEPTED);
+		break;
 	default:
 		dev_err(&rsv->rc->uwb_dev.dev, "unhandled state: %s (%d)\n",
 			uwb_rsv_state_str(new_state), new_state);
 	}
 }
 
+static void uwb_rsv_handle_timeout_work(struct work_struct *work)
+{
+	struct uwb_rsv *rsv = container_of(work, struct uwb_rsv,
+					   handle_timeout_work);
+	struct uwb_rc *rc = rsv->rc;
+
+	mutex_lock(&rc->rsvs_mutex);
+
+	uwb_rsv_dump("TO", rsv);
+
+	switch (rsv->state) {
+	case UWB_RSV_STATE_O_INITIATED:
+		if (rsv->is_multicast) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+			goto unlock;
+		}
+		break;
+	case UWB_RSV_STATE_O_MOVE_EXPANDING:
+		if (rsv->is_multicast) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
+			goto unlock;
+		}
+		break;
+	case UWB_RSV_STATE_O_MOVE_COMBINING:
+		if (rsv->is_multicast) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);
+			goto unlock;
+		}
+		break;
+	case UWB_RSV_STATE_O_MOVE_REDUCING:
+		if (rsv->is_multicast) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+			goto unlock;
+		}
+		break;
+	case UWB_RSV_STATE_O_ESTABLISHED:
+		if (rsv->is_multicast)
+			goto unlock;
+		break;
+	case UWB_RSV_STATE_T_EXPANDING_ACCEPTED:
+		/*
+		 * The time out could be for the main or of the
+		 * companion DRP, assume it's for the companion and
+		 * drop that first.  A further time out is required to
+		 * drop the main.
+		 */
+		uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED);
+		uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
+		goto unlock;
+	default:
+		break;
+	}
+
+	uwb_rsv_remove(rsv);
+
+unlock:
+	mutex_unlock(&rc->rsvs_mutex);
+}
+
 static struct uwb_rsv *uwb_rsv_alloc(struct uwb_rc *rc)
 {
 	struct uwb_rsv *rsv;
@@ -347,6 +465,7 @@ static struct uwb_rsv *uwb_rsv_alloc(struct uwb_rc *rc)
 	rsv->timer.data     = (unsigned long)rsv;
 
 	rsv->rc = rc;
+	INIT_WORK(&rsv->handle_timeout_work, uwb_rsv_handle_timeout_work);
 
 	return rsv;
 }
@@ -381,8 +500,18 @@ EXPORT_SYMBOL_GPL(uwb_rsv_create);
 
 void uwb_rsv_remove(struct uwb_rsv *rsv)
 {
+	uwb_rsv_dump("RM", rsv);
+
 	if (rsv->state != UWB_RSV_STATE_NONE)
 		uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
+
+	if (rsv->needs_release_companion_mas)
+		uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
+	uwb_drp_avail_release(rsv->rc, &rsv->mas);
+
+	if (uwb_rsv_is_owner(rsv))
+		uwb_rsv_put_stream(rsv);
+	
 	del_timer_sync(&rsv->timer);
 	uwb_dev_put(rsv->owner);
 	if (rsv->target.type == UWB_RSV_TARGET_DEV)
@@ -409,7 +538,7 @@ EXPORT_SYMBOL_GPL(uwb_rsv_destroy);
  * @rsv: the reservation
  *
  * The PAL should fill in @rsv's owner, target, type, max_mas,
- * min_mas, sparsity and is_multicast fields.  If the target is a
+ * min_mas, max_interval and is_multicast fields.  If the target is a
  * uwb_dev it must be referenced.
  *
  * The reservation's callback will be called when the reservation is
@@ -418,16 +547,27 @@ EXPORT_SYMBOL_GPL(uwb_rsv_destroy);
 int uwb_rsv_establish(struct uwb_rsv *rsv)
 {
 	struct uwb_rc *rc = rsv->rc;
+	struct uwb_mas_bm available;
 	int ret;
 
 	mutex_lock(&rc->rsvs_mutex);
-
 	ret = uwb_rsv_get_stream(rsv);
 	if (ret)
 		goto out;
 
-	ret = uwb_rsv_alloc_mas(rsv);
-	if (ret) {
+	rsv->tiebreaker = random32() & 1;
+	/* get available mas bitmap */
+	uwb_drp_available(rc, &available);
+
+	ret = uwb_rsv_find_best_allocation(rsv, &available, &rsv->mas);
+	if (ret == UWB_RSV_ALLOC_NOT_FOUND) {
+		ret = -EBUSY;
+		uwb_rsv_put_stream(rsv);
+		goto out;
+	}
+
+	ret = uwb_drp_avail_reserve_pending(rc, &rsv->mas);
+	if (ret != 0) {
 		uwb_rsv_put_stream(rsv);
 		goto out;
 	}
@@ -448,16 +588,71 @@ EXPORT_SYMBOL_GPL(uwb_rsv_establish);
  * @rsv: the reservation to modify
  * @max_mas: new maximum MAS to reserve
  * @min_mas: new minimum MAS to reserve
- * @sparsity: new sparsity to use
+ * @max_interval: new max_interval to use
  *
  * FIXME: implement this once there are PALs that use it.
  */
-int uwb_rsv_modify(struct uwb_rsv *rsv, int max_mas, int min_mas, int sparsity)
+int uwb_rsv_modify(struct uwb_rsv *rsv, int max_mas, int min_mas, int max_interval)
 {
 	return -ENOSYS;
 }
 EXPORT_SYMBOL_GPL(uwb_rsv_modify);
 
+/*
+ * move an already established reservation (rc->rsvs_mutex must to be
+ * taken when tis function is called)
+ */
+int uwb_rsv_try_move(struct uwb_rsv *rsv, struct uwb_mas_bm *available)
+{
+	struct uwb_rc *rc = rsv->rc;
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_rsv_move *mv;
+	int ret = 0;
+ 
+	if (bow->can_reserve_extra_mases == false)
+		return -EBUSY;
+
+	mv = &rsv->mv;
+
+	if (uwb_rsv_find_best_allocation(rsv, available, &mv->final_mas) == UWB_RSV_ALLOC_FOUND) {
+
+		if (!bitmap_equal(rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS)) {
+			/* We want to move the reservation */
+			bitmap_andnot(mv->companion_mas.bm, mv->final_mas.bm, rsv->mas.bm, UWB_NUM_MAS);
+			uwb_drp_avail_reserve_pending(rc, &mv->companion_mas);
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING);
+		}
+	} else {
+		dev_dbg(dev, "new allocation not found\n");
+	}
+	
+	return ret;
+}
+
+/* It will try to move every reservation in state O_ESTABLISHED giving
+ * to the MAS allocator algorithm an availability that is the real one
+ * plus the allocation already established from the reservation. */
+void uwb_rsv_handle_drp_avail_change(struct uwb_rc *rc)
+{
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	struct uwb_rsv *rsv;
+	struct uwb_mas_bm mas;
+	
+	if (bow->can_reserve_extra_mases == false)
+		return;
+
+	list_for_each_entry(rsv, &rc->reservations, rc_node) {
+		if (rsv->state == UWB_RSV_STATE_O_ESTABLISHED ||
+		    rsv->state == UWB_RSV_STATE_O_TO_BE_MOVED) {
+			uwb_drp_available(rc, &mas);
+			bitmap_or(mas.bm, mas.bm, rsv->mas.bm, UWB_NUM_MAS);
+			uwb_rsv_try_move(rsv, &mas);
+		}
+	}
+	
+}
+
 /**
  * uwb_rsv_terminate - terminate an established reservation
  * @rsv: the reservation to terminate
@@ -546,6 +741,7 @@ static struct uwb_rsv *uwb_rsv_new_target(struct uwb_rc *rc,
 	uwb_dev_get(rsv->owner);
 	rsv->target.type = UWB_RSV_TARGET_DEV;
 	rsv->target.dev  = &rc->uwb_dev;
+	uwb_dev_get(&rc->uwb_dev);
 	rsv->type        = uwb_ie_drp_type(drp_ie);
 	rsv->stream      = uwb_ie_drp_stream_index(drp_ie);
 	uwb_drp_ie_to_bm(&rsv->mas, drp_ie);
@@ -567,11 +763,33 @@ static struct uwb_rsv *uwb_rsv_new_target(struct uwb_rc *rc,
 	list_add_tail(&rsv->rc_node, &rc->reservations);
 	state = rsv->state;
 	rsv->state = UWB_RSV_STATE_NONE;
-	uwb_rsv_set_state(rsv, state);
+
+	/* FIXME: do something sensible here */
+	if (state == UWB_RSV_STATE_T_ACCEPTED
+	    && uwb_drp_avail_reserve_pending(rc, &rsv->mas) == -EBUSY) {
+		/* FIXME: do something sensible here */
+	} else {
+		uwb_rsv_set_state(rsv, state);
+	}
 
 	return rsv;
 }
 
+/**
+ * uwb_rsv_get_usable_mas - get the bitmap of the usable MAS of a reservations
+ * @rsv: the reservation.
+ * @mas: returns the available MAS.
+ *
+ * The usable MAS of a reservation may be less than the negotiated MAS
+ * if alien BPs are present.
+ */
+void uwb_rsv_get_usable_mas(struct uwb_rsv *rsv, struct uwb_mas_bm *mas)
+{
+	bitmap_zero(mas->bm, UWB_NUM_MAS);
+	bitmap_andnot(mas->bm, rsv->mas.bm, rsv->rc->cnflt_alien_bitmap.bm, UWB_NUM_MAS);
+}
+EXPORT_SYMBOL_GPL(uwb_rsv_get_usable_mas);
+
 /**
  * uwb_rsv_find - find a reservation for a received DRP IE.
  * @rc: the radio controller
@@ -611,8 +829,6 @@ static bool uwb_rsv_update_all(struct uwb_rc *rc)
 	bool ie_updated = false;
 
 	list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) {
-		if (rsv->expired)
-			uwb_drp_handle_timeout(rsv);
 		if (!rsv->ie_valid) {
 			uwb_drp_ie_update(rsv);
 			ie_updated = true;
@@ -622,9 +838,47 @@ static bool uwb_rsv_update_all(struct uwb_rc *rc)
 	return ie_updated;
 }
 
+void uwb_rsv_queue_update(struct uwb_rc *rc)
+{
+	unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE;
+
+	queue_delayed_work(rc->rsv_workq, &rc->rsv_update_work, usecs_to_jiffies(delay_us));
+}
+
+/**
+ * uwb_rsv_sched_update - schedule an update of the DRP IEs
+ * @rc: the radio controller.
+ *
+ * To improve performance and ensure correctness with [ECMA-368] the
+ * number of SET-DRP-IE commands that are done are limited.
+ *
+ * DRP IEs update come from two sources: DRP events from the hardware
+ * which all occur at the beginning of the superframe ('syncronous'
+ * events) and reservation establishment/termination requests from
+ * PALs or timers ('asynchronous' events).
+ *
+ * A delayed work ensures that all the synchronous events result in
+ * one SET-DRP-IE command.
+ *
+ * Additional logic (the set_drp_ie_pending and rsv_updated_postponed
+ * flags) will prevent an asynchrous event starting a SET-DRP-IE
+ * command if one is currently awaiting a response.
+ *
+ * FIXME: this does leave a window where an asynchrous event can delay
+ * the SET-DRP-IE for a synchronous event by one superframe.
+ */
 void uwb_rsv_sched_update(struct uwb_rc *rc)
 {
-	queue_work(rc->rsv_workq, &rc->rsv_update_work);
+	spin_lock(&rc->rsvs_lock);
+	if (!delayed_work_pending(&rc->rsv_update_work)) {
+		if (rc->set_drp_ie_pending > 0) {
+			rc->set_drp_ie_pending++;
+			goto unlock;
+		}
+		uwb_rsv_queue_update(rc);
+	}
+unlock:
+	spin_unlock(&rc->rsvs_lock);
 }
 
 /*
@@ -633,7 +887,8 @@ void uwb_rsv_sched_update(struct uwb_rc *rc)
  */
 static void uwb_rsv_update_work(struct work_struct *work)
 {
-	struct uwb_rc *rc = container_of(work, struct uwb_rc, rsv_update_work);
+	struct uwb_rc *rc = container_of(work, struct uwb_rc,
+					 rsv_update_work.work);
 	bool ie_updated;
 
 	mutex_lock(&rc->rsvs_mutex);
@@ -645,18 +900,34 @@ static void uwb_rsv_update_work(struct work_struct *work)
 		ie_updated = true;
 	}
 
-	if (ie_updated)
+	if (ie_updated && (rc->set_drp_ie_pending == 0))
 		uwb_rc_send_all_drp_ie(rc);
 
 	mutex_unlock(&rc->rsvs_mutex);
 }
 
+static void uwb_rsv_alien_bp_work(struct work_struct *work)
+{
+	struct uwb_rc *rc = container_of(work, struct uwb_rc,
+					 rsv_alien_bp_work.work);
+	struct uwb_rsv *rsv;
+
+	mutex_lock(&rc->rsvs_mutex);
+	
+	list_for_each_entry(rsv, &rc->reservations, rc_node) {
+		if (rsv->type != UWB_DRP_TYPE_ALIEN_BP) {
+			rsv->callback(rsv);
+		}
+	}
+
+	mutex_unlock(&rc->rsvs_mutex);
+}
+
 static void uwb_rsv_timer(unsigned long arg)
 {
 	struct uwb_rsv *rsv = (struct uwb_rsv *)arg;
 
-	rsv->expired = true;
-	uwb_rsv_sched_update(rsv->rc);
+	queue_work(rsv->rc->rsv_workq, &rsv->handle_timeout_work);
 }
 
 /**
@@ -673,16 +944,27 @@ void uwb_rsv_remove_all(struct uwb_rc *rc)
 	list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) {
 		uwb_rsv_remove(rsv);
 	}
+	/* Cancel any postponed update. */
+	rc->set_drp_ie_pending = 0;
 	mutex_unlock(&rc->rsvs_mutex);
 
-	cancel_work_sync(&rc->rsv_update_work);
+	cancel_delayed_work_sync(&rc->rsv_update_work);
 }
 
 void uwb_rsv_init(struct uwb_rc *rc)
 {
 	INIT_LIST_HEAD(&rc->reservations);
+	INIT_LIST_HEAD(&rc->cnflt_alien_list);
 	mutex_init(&rc->rsvs_mutex);
-	INIT_WORK(&rc->rsv_update_work, uwb_rsv_update_work);
+	spin_lock_init(&rc->rsvs_lock);
+	INIT_DELAYED_WORK(&rc->rsv_update_work, uwb_rsv_update_work);
+	INIT_DELAYED_WORK(&rc->rsv_alien_bp_work, uwb_rsv_alien_bp_work);
+	rc->bow.can_reserve_extra_mases = true;
+	rc->bow.total_expired = 0;
+	rc->bow.window = UWB_DRP_BACKOFF_WIN_MIN >> 1;
+	init_timer(&rc->bow.timer);
+	rc->bow.timer.function = uwb_rsv_backoff_win_timer;
+	rc->bow.timer.data     = (unsigned long)&rc->bow;
 
 	bitmap_complement(rc->uwb_dev.streams, rc->uwb_dev.streams, UWB_NUM_STREAMS);
 }
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index a6debb9baf38..89b2e6a7214c 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -82,29 +82,21 @@ struct uwb_dbg {
 	struct dentry *reservations_f;
 	struct dentry *accept_f;
 	struct dentry *drp_avail_f;
+	spinlock_t list_lock;
 };
 
 static struct dentry *root_dir;
 
 static void uwb_dbg_rsv_cb(struct uwb_rsv *rsv)
 {
-	struct uwb_rc *rc = rsv->rc;
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_dev_addr devaddr;
-	char owner[UWB_ADDR_STRSIZE], target[UWB_ADDR_STRSIZE];
-
-	uwb_dev_addr_print(owner, sizeof(owner), &rsv->owner->dev_addr);
-	if (rsv->target.type == UWB_RSV_TARGET_DEV)
-		devaddr = rsv->target.dev->dev_addr;
-	else
-		devaddr = rsv->target.devaddr;
-	uwb_dev_addr_print(target, sizeof(target), &devaddr);
+	struct uwb_dbg *dbg = rsv->pal_priv;
 
-	dev_dbg(dev, "debug: rsv %s -> %s: %s\n",
-		owner, target, uwb_rsv_state_str(rsv->state));
+	uwb_rsv_dump("debug", rsv);
 
 	if (rsv->state == UWB_RSV_STATE_NONE) {
+		spin_lock(&dbg->list_lock);
 		list_del(&rsv->pal_node);
+		spin_unlock(&dbg->list_lock);
 		uwb_rsv_destroy(rsv);
 	}
 }
@@ -128,20 +120,21 @@ static int cmd_rsv_establish(struct uwb_rc *rc,
 		return -ENOMEM;
 	}
 
-	rsv->owner       = &rc->uwb_dev;
-	rsv->target.type = UWB_RSV_TARGET_DEV;
-	rsv->target.dev  = target;
-	rsv->type        = cmd->type;
-	rsv->max_mas     = cmd->max_mas;
-	rsv->min_mas     = cmd->min_mas;
-	rsv->sparsity    = cmd->sparsity;
+	rsv->target.type  = UWB_RSV_TARGET_DEV;
+	rsv->target.dev   = target;
+	rsv->type         = cmd->type;
+	rsv->max_mas      = cmd->max_mas;
+	rsv->min_mas      = cmd->min_mas;
+	rsv->max_interval = cmd->max_interval;
 
 	ret = uwb_rsv_establish(rsv);
 	if (ret)
 		uwb_rsv_destroy(rsv);
-	else
+	else {
+		spin_lock(&(rc->dbg)->list_lock);
 		list_add_tail(&rsv->pal_node, &rc->dbg->rsvs);
-
+		spin_unlock(&(rc->dbg)->list_lock);
+	}
 	return ret;
 }
 
@@ -151,17 +144,24 @@ static int cmd_rsv_terminate(struct uwb_rc *rc,
 	struct uwb_rsv *rsv, *found = NULL;
 	int i = 0;
 
+	spin_lock(&(rc->dbg)->list_lock);
+
 	list_for_each_entry(rsv, &rc->dbg->rsvs, pal_node) {
 		if (i == cmd->index) {
 			found = rsv;
+			uwb_rsv_get(found);
 			break;
 		}
 		i++;
 	}
+
+	spin_unlock(&(rc->dbg)->list_lock);
+
 	if (!found)
 		return -EINVAL;
 
 	uwb_rsv_terminate(found);
+	uwb_rsv_put(found);
 
 	return 0;
 }
@@ -191,7 +191,7 @@ static ssize_t command_write(struct file *file, const char __user *buf,
 	struct uwb_rc *rc = file->private_data;
 	struct uwb_dbg_cmd cmd;
 	int ret = 0;
-
+	
 	if (len != sizeof(struct uwb_dbg_cmd))
 		return -EINVAL;
 
@@ -325,7 +325,9 @@ static void uwb_dbg_new_rsv(struct uwb_pal *pal, struct uwb_rsv *rsv)
 	struct uwb_dbg *dbg = container_of(pal, struct uwb_dbg, pal);
 
 	if (dbg->accept) {
+		spin_lock(&dbg->list_lock);
 		list_add_tail(&rsv->pal_node, &dbg->rsvs);
+		spin_unlock(&dbg->list_lock);
 		uwb_rsv_accept(rsv, uwb_dbg_rsv_cb, dbg);
 	}
 }
@@ -341,6 +343,7 @@ void uwb_dbg_add_rc(struct uwb_rc *rc)
 		return;
 
 	INIT_LIST_HEAD(&rc->dbg->rsvs);
+	spin_lock_init(&(rc->dbg)->list_lock);
 
 	uwb_pal_init(&rc->dbg->pal);
 	rc->dbg->pal.rc = rc;
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
index f0f21f406bf0..d5bcfc1c227a 100644
--- a/drivers/uwb/uwb-internal.h
+++ b/drivers/uwb/uwb-internal.h
@@ -92,6 +92,12 @@ extern const char *uwb_rc_strerror(unsigned code);
 
 struct uwb_rc_neh;
 
+extern int uwb_rc_cmd_async(struct uwb_rc *rc, const char *cmd_name,
+			    struct uwb_rccb *cmd, size_t cmd_size,
+			    u8 expected_type, u16 expected_event,
+			    uwb_rc_cmd_cb_f cb, void *arg);
+
+
 void uwb_rc_neh_create(struct uwb_rc *rc);
 void uwb_rc_neh_destroy(struct uwb_rc *rc);
 
@@ -106,7 +112,69 @@ void uwb_rc_neh_put(struct uwb_rc_neh *neh);
 extern int uwb_est_create(void);
 extern void uwb_est_destroy(void);
 
+/*
+ * UWB conflicting alien reservations
+ */
+struct uwb_cnflt_alien {
+	struct uwb_rc *rc;
+	struct list_head rc_node;
+	struct uwb_mas_bm mas;
+	struct timer_list timer;
+	struct work_struct cnflt_update_work;
+};
+
+enum uwb_uwb_rsv_alloc_result {
+	UWB_RSV_ALLOC_FOUND = 0,
+	UWB_RSV_ALLOC_NOT_FOUND,
+};
+
+enum uwb_rsv_mas_status {
+	UWB_RSV_MAS_NOT_AVAIL = 1,
+	UWB_RSV_MAS_SAFE,
+	UWB_RSV_MAS_UNSAFE,
+};
+
+struct uwb_rsv_col_set_info {
+	unsigned char start_col;
+	unsigned char interval;
+	unsigned char safe_mas_per_col;
+	unsigned char unsafe_mas_per_col;
+};
+
+struct uwb_rsv_col_info {
+	unsigned char max_avail_safe;
+	unsigned char max_avail_unsafe;
+	unsigned char highest_mas[UWB_MAS_PER_ZONE];
+	struct uwb_rsv_col_set_info csi;
+};
+
+struct uwb_rsv_row_info {
+	unsigned char avail[UWB_MAS_PER_ZONE];
+	unsigned char free_rows;
+	unsigned char used_rows;
+};
+
+/*
+ * UWB find allocation
+ */
+struct uwb_rsv_alloc_info {
+	unsigned char bm[UWB_MAS_PER_ZONE * UWB_NUM_ZONES];
+	struct uwb_rsv_col_info ci[UWB_NUM_ZONES];
+	struct uwb_rsv_row_info ri;
+	struct uwb_mas_bm *not_available;
+	struct uwb_mas_bm *result;
+	int min_mas;
+	int max_mas;
+	int max_interval;
+	int total_allocated_mases;
+	int safe_allocated_mases;
+	int unsafe_allocated_mases;
+	int interval;
+};
 
+int uwb_rsv_find_best_allocation(struct uwb_rsv *rsv, struct uwb_mas_bm *available, 
+				 struct uwb_mas_bm *result);
+void uwb_rsv_handle_drp_avail_change(struct uwb_rc *rc);
 /*
  * UWB Events & management daemon
  */
@@ -254,18 +322,28 @@ void uwb_rsv_init(struct uwb_rc *rc);
 int uwb_rsv_setup(struct uwb_rc *rc);
 void uwb_rsv_cleanup(struct uwb_rc *rc);
 void uwb_rsv_remove_all(struct uwb_rc *rc);
+void uwb_rsv_get(struct uwb_rsv *rsv);
+void uwb_rsv_put(struct uwb_rsv *rsv);
+bool uwb_rsv_has_two_drp_ies(struct uwb_rsv *rsv);
+void uwb_rsv_dump(char *text, struct uwb_rsv *rsv);
+int uwb_rsv_try_move(struct uwb_rsv *rsv, struct uwb_mas_bm *available);
+void uwb_rsv_backoff_win_timer(unsigned long arg);
+void uwb_rsv_backoff_win_increment(struct uwb_rc *rc);
+int uwb_rsv_status(struct uwb_rsv *rsv);
+int uwb_rsv_companion_status(struct uwb_rsv *rsv);
 
 void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state);
 void uwb_rsv_remove(struct uwb_rsv *rsv);
 struct uwb_rsv *uwb_rsv_find(struct uwb_rc *rc, struct uwb_dev *src,
 			     struct uwb_ie_drp *drp_ie);
 void uwb_rsv_sched_update(struct uwb_rc *rc);
+void uwb_rsv_queue_update(struct uwb_rc *rc);
 
-void uwb_drp_handle_timeout(struct uwb_rsv *rsv);
 int uwb_drp_ie_update(struct uwb_rsv *rsv);
 void uwb_drp_ie_to_bm(struct uwb_mas_bm *bm, const struct uwb_ie_drp *drp_ie);
 
 void uwb_drp_avail_init(struct uwb_rc *rc);
+void uwb_drp_available(struct uwb_rc *rc, struct uwb_mas_bm *avail);
 int  uwb_drp_avail_reserve_pending(struct uwb_rc *rc, struct uwb_mas_bm *mas);
 void uwb_drp_avail_reserve(struct uwb_rc *rc, struct uwb_mas_bm *mas);
 void uwb_drp_avail_release(struct uwb_rc *rc, struct uwb_mas_bm *mas);
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index d7ed5201ade6..c02128991ff7 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -67,6 +67,7 @@ struct uwb_dev {
 	struct uwb_dev_addr dev_addr;
 	int beacon_slot;
 	DECLARE_BITMAP(streams, UWB_NUM_STREAMS);
+	DECLARE_BITMAP(last_availability_bm, UWB_NUM_MAS);
 };
 #define to_uwb_dev(d) container_of(d, struct uwb_dev, dev)
 
@@ -109,6 +110,9 @@ struct uwbd {
  */
 struct uwb_mas_bm {
 	DECLARE_BITMAP(bm, UWB_NUM_MAS);
+	DECLARE_BITMAP(unsafe_bm, UWB_NUM_MAS);
+	int safe;
+	int unsafe;
 };
 
 /**
@@ -134,14 +138,24 @@ struct uwb_mas_bm {
  * FIXME: further target states TBD.
  */
 enum uwb_rsv_state {
-	UWB_RSV_STATE_NONE,
+	UWB_RSV_STATE_NONE = 0,
 	UWB_RSV_STATE_O_INITIATED,
 	UWB_RSV_STATE_O_PENDING,
 	UWB_RSV_STATE_O_MODIFIED,
 	UWB_RSV_STATE_O_ESTABLISHED,
+	UWB_RSV_STATE_O_TO_BE_MOVED,
+	UWB_RSV_STATE_O_MOVE_EXPANDING,
+	UWB_RSV_STATE_O_MOVE_COMBINING,
+	UWB_RSV_STATE_O_MOVE_REDUCING,
 	UWB_RSV_STATE_T_ACCEPTED,
 	UWB_RSV_STATE_T_DENIED,
+	UWB_RSV_STATE_T_CONFLICT,
 	UWB_RSV_STATE_T_PENDING,
+	UWB_RSV_STATE_T_EXPANDING_ACCEPTED,
+	UWB_RSV_STATE_T_EXPANDING_CONFLICT,
+	UWB_RSV_STATE_T_EXPANDING_PENDING,
+	UWB_RSV_STATE_T_EXPANDING_DENIED,
+	UWB_RSV_STATE_T_RESIZED,
 
 	UWB_RSV_STATE_LAST,
 };
@@ -166,6 +180,12 @@ struct uwb_rsv_target {
 	};
 };
 
+struct uwb_rsv_move {
+	struct uwb_mas_bm final_mas;
+	struct uwb_ie_drp *companion_drp_ie;
+	struct uwb_mas_bm companion_mas;
+};
+
 /*
  * Number of streams reserved for reservations targeted at DevAddrs.
  */
@@ -203,6 +223,7 @@ typedef void (*uwb_rsv_cb_f)(struct uwb_rsv *rsv);
  *
  * @status:         negotiation status
  * @stream:         stream index allocated for this reservation
+ * @tiebreaker:     conflict tiebreaker for this reservation
  * @mas:            reserved MAS
  * @drp_ie:         the DRP IE
  * @ie_valid:       true iff the DRP IE matches the reservation parameters
@@ -225,19 +246,22 @@ struct uwb_rsv {
 	enum uwb_drp_type type;
 	int max_mas;
 	int min_mas;
-	int sparsity;
+	int max_interval;
 	bool is_multicast;
 
 	uwb_rsv_cb_f callback;
 	void *pal_priv;
 
 	enum uwb_rsv_state state;
+	bool needs_release_companion_mas;
 	u8 stream;
+	u8 tiebreaker;
 	struct uwb_mas_bm mas;
 	struct uwb_ie_drp *drp_ie;
+	struct uwb_rsv_move mv;
 	bool ie_valid;
 	struct timer_list timer;
-	bool expired;
+	struct work_struct handle_timeout_work;
 };
 
 static const
@@ -279,6 +303,13 @@ struct uwb_drp_avail {
 	bool ie_valid;
 };
 
+struct uwb_drp_backoff_win {
+	u8 window;
+	u8 n;
+	int total_expired;
+	struct timer_list timer;
+	bool can_reserve_extra_mases;
+};
 
 const char *uwb_rsv_state_str(enum uwb_rsv_state state);
 const char *uwb_rsv_type_str(enum uwb_drp_type type);
@@ -294,6 +325,8 @@ void uwb_rsv_terminate(struct uwb_rsv *rsv);
 
 void uwb_rsv_accept(struct uwb_rsv *rsv, uwb_rsv_cb_f cb, void *pal_priv);
 
+void uwb_rsv_get_usable_mas(struct uwb_rsv *orig_rsv, struct uwb_mas_bm *mas);
+
 /**
  * Radio Control Interface instance
  *
@@ -364,12 +397,18 @@ struct uwb_rc {
 
 	struct uwbd uwbd;
 
+	struct uwb_drp_backoff_win bow;
 	struct uwb_drp_avail drp_avail;
 	struct list_head reservations;
+	struct list_head cnflt_alien_list;
+	struct uwb_mas_bm cnflt_alien_bitmap;
 	struct mutex rsvs_mutex;
+	spinlock_t rsvs_lock;
 	struct workqueue_struct *rsv_workq;
-	struct work_struct rsv_update_work;
 
+	struct delayed_work rsv_update_work;
+	struct delayed_work rsv_alien_bp_work;
+	int set_drp_ie_pending;
 	struct mutex ies_mutex;
 	struct uwb_rc_cmd_set_ie *ies;
 	size_t ies_capacity;
diff --git a/include/linux/uwb/debug-cmd.h b/include/linux/uwb/debug-cmd.h
index 07efbe17db53..8da004e25628 100644
--- a/include/linux/uwb/debug-cmd.h
+++ b/include/linux/uwb/debug-cmd.h
@@ -43,7 +43,7 @@ struct uwb_dbg_cmd_rsv_establish {
 	__u8  type;
 	__u16 max_mas;
 	__u16 min_mas;
-	__u8  sparsity;
+	__u8  max_interval;
 };
 
 struct uwb_dbg_cmd_rsv_terminate {
diff --git a/include/linux/uwb/spec.h b/include/linux/uwb/spec.h
index a30436ea53aa..b52e44f1bd33 100644
--- a/include/linux/uwb/spec.h
+++ b/include/linux/uwb/spec.h
@@ -58,6 +58,11 @@ enum { UWB_NUM_ZONES = 16 };
  */
 #define UWB_MAS_PER_ZONE (UWB_NUM_MAS / UWB_NUM_ZONES)
 
+/*
+ * Number of MAS required before a row can be considered available.
+ */
+#define UWB_USABLE_MAS_PER_ROW (UWB_NUM_ZONES - 1)
+
 /*
  * Number of streams per DRP reservation between a pair of devices.
  *
@@ -93,6 +98,26 @@ enum { UWB_BEACON_SLOT_LENGTH_US = 85 };
  */
 enum { UWB_MAX_LOST_BEACONS = 3 };
 
+/*
+ * mDRPBackOffWinMin
+ *
+ * The minimum number of superframes to wait before trying to reserve
+ * extra MAS.
+ *
+ * [ECMA-368] section 17.16
+ */
+enum { UWB_DRP_BACKOFF_WIN_MIN = 2 };
+
+/*
+ * mDRPBackOffWinMax
+ *
+ * The maximum number of superframes to wait before trying to reserve
+ * extra MAS.
+ *
+ * [ECMA-368] section 17.16
+ */
+enum { UWB_DRP_BACKOFF_WIN_MAX = 16 };
+
 /*
  * Length of a superframe in microseconds.
  */
-- 
cgit v1.2.3


From 27af4245b6ce99e08c6a7c38825383bf51119cc9 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 1 Dec 2008 14:18:13 -0800
Subject: posix-timers: use "struct pid*" instead of "struct task_struct*"

Impact: restructure, clean up code

k_itimer holds the ref to the ->it_process until sys_timer_delete(). This
allows to pin up to RLIMIT_SIGPENDING dead task_struct's. Change the code
to use "struct pid *" instead.

The patch doesn't kill ->it_process, it places ->it_pid into the union.
->it_process is still used by do_cpu_nanosleep() as before. It would be
trivial to change the nanosleep code as well, but since it uses it_process
in a special way I think it is better to keep this field for grep.

The patch bloats the kernel by 104 bytes and it also adds the new pointer,
->it_signal, to k_itimer. It is used by lock_timer() to verify that the
found timer was not created by another process. It is not clear why do we
use the global database (and thus the global idr_lock) for posix timers.
We still need the signal_struct->posix_timers which contains all useable
timers, perhaps it is better to use some form of per-process array
instead.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/posix-timers.h |  6 +++++-
 kernel/posix-timers.c        | 43 +++++++++++++++++++++++--------------------
 2 files changed, 28 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index a7c721355549..4f71bf4e628c 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -45,7 +45,11 @@ struct k_itimer {
 	int it_requeue_pending;		/* waiting to requeue this timer */
 #define REQUEUE_PENDING 1
 	int it_sigev_notify;		/* notify word of sigevent struct */
-	struct task_struct *it_process;	/* process to send signal to */
+	struct signal_struct *it_signal;
+	union {
+		struct pid *it_pid;	/* pid of process to send signal to */
+		struct task_struct *it_process;	/* for clock_nanosleep */
+	};
 	struct sigqueue *sigq;		/* signal queue entry. */
 	union {
 		struct {
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 5e79c662294b..42a39afd694a 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -116,7 +116,7 @@ static DEFINE_SPINLOCK(idr_lock);
  *	    must supply functions here, even if the function just returns
  *	    ENOSYS.  The standard POSIX timer management code assumes the
  *	    following: 1.) The k_itimer struct (sched.h) is used for the
- *	    timer.  2.) The list, it_lock, it_clock, it_id and it_process
+ *	    timer.  2.) The list, it_lock, it_clock, it_id and it_pid
  *	    fields are not modified by timer code.
  *
  *          At this time all functions EXCEPT clock_nanosleep can be
@@ -313,7 +313,8 @@ void do_schedule_next_timer(struct siginfo *info)
 
 int posix_timer_event(struct k_itimer *timr, int si_private)
 {
-	int shared, ret;
+	struct task_struct *task;
+	int shared, ret = -1;
 	/*
 	 * FIXME: if ->sigq is queued we can race with
 	 * dequeue_signal()->do_schedule_next_timer().
@@ -327,8 +328,13 @@ int posix_timer_event(struct k_itimer *timr, int si_private)
 	 */
 	timr->sigq->info.si_sys_private = si_private;
 
-	shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
-	ret = send_sigqueue(timr->sigq, timr->it_process, shared);
+	rcu_read_lock();
+	task = pid_task(timr->it_pid, PIDTYPE_PID);
+	if (task) {
+		shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
+		ret = send_sigqueue(timr->sigq, task, shared);
+	}
+	rcu_read_unlock();
 	/* If we failed to send the signal the timer stops. */
 	return ret > 0;
 }
@@ -405,7 +411,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
 	return ret;
 }
 
-static struct task_struct * good_sigevent(sigevent_t * event)
+static struct pid *good_sigevent(sigevent_t * event)
 {
 	struct task_struct *rtn = current->group_leader;
 
@@ -419,7 +425,7 @@ static struct task_struct * good_sigevent(sigevent_t * event)
 	    ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
 		return NULL;
 
-	return rtn;
+	return task_pid(rtn);
 }
 
 void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock)
@@ -471,7 +477,7 @@ sys_timer_create(const clockid_t which_clock,
 {
 	struct k_itimer *new_timer;
 	int error, new_timer_id;
-	struct task_struct *process;
+	struct pid *it_pid;
 	sigevent_t event;
 	int it_id_set = IT_ID_NOT_SET;
 
@@ -525,11 +531,9 @@ sys_timer_create(const clockid_t which_clock,
 			goto out;
 		}
 		rcu_read_lock();
-		process = good_sigevent(&event);
-		if (process)
-			get_task_struct(process);
+		it_pid = get_pid(good_sigevent(&event));
 		rcu_read_unlock();
-		if (!process) {
+		if (!it_pid) {
 			error = -EINVAL;
 			goto out;
 		}
@@ -537,8 +541,7 @@ sys_timer_create(const clockid_t which_clock,
 		event.sigev_notify = SIGEV_SIGNAL;
 		event.sigev_signo = SIGALRM;
 		event.sigev_value.sival_int = new_timer->it_id;
-		process = current->group_leader;
-		get_task_struct(process);
+		it_pid = get_pid(task_tgid(current));
 	}
 
 	new_timer->it_sigev_notify     = event.sigev_notify;
@@ -548,7 +551,8 @@ sys_timer_create(const clockid_t which_clock,
 	new_timer->sigq->info.si_code  = SI_TIMER;
 
 	spin_lock_irq(&current->sighand->siglock);
-	new_timer->it_process = process;
+	new_timer->it_pid = it_pid;
+	new_timer->it_signal = current->signal;
 	list_add(&new_timer->list, &current->signal->posix_timers);
 	spin_unlock_irq(&current->sighand->siglock);
 
@@ -583,8 +587,7 @@ static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags)
 	timr = idr_find(&posix_timers_id, (int)timer_id);
 	if (timr) {
 		spin_lock(&timr->it_lock);
-		if (timr->it_process &&
-		    same_thread_group(timr->it_process, current)) {
+		if (timr->it_pid && timr->it_signal == current->signal) {
 			spin_unlock(&idr_lock);
 			return timr;
 		}
@@ -831,8 +834,8 @@ retry_delete:
 	 * This keeps any tasks waiting on the spin lock from thinking
 	 * they got something (see the lock code above).
 	 */
-	put_task_struct(timer->it_process);
-	timer->it_process = NULL;
+	put_pid(timer->it_pid);
+	timer->it_pid = NULL;
 
 	unlock_timer(timer, flags);
 	release_posix_timer(timer, IT_ID_SET);
@@ -858,8 +861,8 @@ retry_delete:
 	 * This keeps any tasks waiting on the spin lock from thinking
 	 * they got something (see the lock code above).
 	 */
-	put_task_struct(timer->it_process);
-	timer->it_process = NULL;
+	put_pid(timer->it_pid);
+	timer->it_pid = NULL;
 
 	unlock_timer(timer, flags);
 	release_posix_timer(timer, IT_ID_SET);
-- 
cgit v1.2.3


From c29541b24fb2c6301021637229ae5347c877330a Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Mon, 1 Dec 2008 14:18:11 -0800
Subject: linux/timex.h: cleanup for userspace

Impact: fix user-space exported use

Move all the kernel-specific defines and includes into the __KERNEL__
section so that they don't get leaked into userspace.

[akpm@linux-foundation.org: coding-style fixes]

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timex.h | 73 ++++++++++++++++++++++++++-------------------------
 1 file changed, 37 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index 9007313b5b71..998a55d80acf 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -53,46 +53,10 @@
 #ifndef _LINUX_TIMEX_H
 #define _LINUX_TIMEX_H
 
-#include <linux/compiler.h>
 #include <linux/time.h>
 
-#include <asm/param.h>
-
 #define NTP_API		4	/* NTP API version */
 
-/*
- * SHIFT_KG and SHIFT_KF establish the damping of the PLL and are chosen
- * for a slightly underdamped convergence characteristic. SHIFT_KH
- * establishes the damping of the FLL and is chosen by wisdom and black
- * art.
- *
- * MAXTC establishes the maximum time constant of the PLL. With the
- * SHIFT_KG and SHIFT_KF values given and a time constant range from
- * zero to MAXTC, the PLL will converge in 15 minutes to 16 hours,
- * respectively.
- */
-#define SHIFT_PLL	4	/* PLL frequency factor (shift) */
-#define SHIFT_FLL	2	/* FLL frequency factor (shift) */
-#define MAXTC		10	/* maximum time constant (shift) */
-
-/*
- * SHIFT_USEC defines the scaling (shift) of the time_freq and
- * time_tolerance variables, which represent the current frequency
- * offset and maximum frequency tolerance.
- */
-#define SHIFT_USEC 16		/* frequency offset scale (shift) */
-#define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC))
-#define PPM_SCALE_INV_SHIFT 19
-#define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \
-		       PPM_SCALE + 1)
-
-#define MAXPHASE 500000000l	/* max phase error (ns) */
-#define MAXFREQ 500000		/* max frequency error (ns/s) */
-#define MAXFREQ_SCALED ((s64)MAXFREQ << NTP_SCALE_SHIFT)
-#define MINSEC 256		/* min interval between updates (s) */
-#define MAXSEC 2048		/* max interval between updates (s) */
-#define NTP_PHASE_LIMIT ((MAXPHASE / NSEC_PER_USEC) << 5) /* beyond max. dispersion */
-
 /*
  * syscall interface - used (mainly by NTP daemon)
  * to discipline kernel clock oscillator
@@ -199,8 +163,45 @@ struct timex {
 #define TIME_BAD	TIME_ERROR /* bw compat */
 
 #ifdef __KERNEL__
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/param.h>
+
 #include <asm/timex.h>
 
+/*
+ * SHIFT_KG and SHIFT_KF establish the damping of the PLL and are chosen
+ * for a slightly underdamped convergence characteristic. SHIFT_KH
+ * establishes the damping of the FLL and is chosen by wisdom and black
+ * art.
+ *
+ * MAXTC establishes the maximum time constant of the PLL. With the
+ * SHIFT_KG and SHIFT_KF values given and a time constant range from
+ * zero to MAXTC, the PLL will converge in 15 minutes to 16 hours,
+ * respectively.
+ */
+#define SHIFT_PLL	4	/* PLL frequency factor (shift) */
+#define SHIFT_FLL	2	/* FLL frequency factor (shift) */
+#define MAXTC		10	/* maximum time constant (shift) */
+
+/*
+ * SHIFT_USEC defines the scaling (shift) of the time_freq and
+ * time_tolerance variables, which represent the current frequency
+ * offset and maximum frequency tolerance.
+ */
+#define SHIFT_USEC 16		/* frequency offset scale (shift) */
+#define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC))
+#define PPM_SCALE_INV_SHIFT 19
+#define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \
+		       PPM_SCALE + 1)
+
+#define MAXPHASE 500000000l	/* max phase error (ns) */
+#define MAXFREQ 500000		/* max frequency error (ns/s) */
+#define MAXFREQ_SCALED ((s64)MAXFREQ << NTP_SCALE_SHIFT)
+#define MINSEC 256		/* min interval between updates (s) */
+#define MAXSEC 2048		/* max interval between updates (s) */
+#define NTP_PHASE_LIMIT ((MAXPHASE / NSEC_PER_USEC) << 5) /* beyond max. dispersion */
+
 /*
  * kernel variables
  * Note: maximum error = NTP synch distance = dispersion + delay / 2;
-- 
cgit v1.2.3


From 29c0177e6a4ac094302bed54a1d4bbb6b740a9ef Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 13 Dec 2008 21:20:25 +1030
Subject: cpumask: change cpumask_scnprintf, cpumask_parse_user, cpulist_parse,
 and cpulist_scnprintf to take pointers.

Impact: change calling convention of existing cpumask APIs

Most cpumask functions started with cpus_: these have been replaced by
cpumask_ ones which take struct cpumask pointers as expected.

These four functions don't have good replacement names; fortunately
they're rarely used, so we just change them over.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: paulus@samba.org
Cc: mingo@redhat.com
Cc: tony.luck@intel.com
Cc: ralf@linux-mips.org
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: cl@linux-foundation.org
Cc: srostedt@redhat.com
---
 arch/ia64/kernel/topology.c           |  2 +-
 arch/mips/kernel/smp-cmp.c            |  4 +-
 arch/powerpc/platforms/pseries/xics.c |  2 +-
 arch/x86/kernel/cpu/intel_cacheinfo.c |  4 +-
 arch/x86/kernel/setup_percpu.c        |  2 +-
 drivers/base/cpu.c                    |  2 +-
 drivers/base/node.c                   |  4 +-
 drivers/base/topology.c               |  4 +-
 drivers/pci/pci-sysfs.c               |  4 +-
 drivers/pci/probe.c                   |  4 +-
 include/linux/cpumask.h               | 87 +++++++++++++++++++++++------------
 kernel/cpuset.c                       |  4 +-
 kernel/irq/proc.c                     |  4 +-
 kernel/profile.c                      |  4 +-
 kernel/sched.c                        |  4 +-
 kernel/sched_stats.h                  |  2 +-
 kernel/taskstats.c                    |  2 +-
 kernel/trace/trace.c                  |  4 +-
 mm/slub.c                             |  2 +-
 19 files changed, 86 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index c75b914f2d6b..a8d61a3e9a94 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -219,7 +219,7 @@ static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf)
 	cpumask_t shared_cpu_map;
 
 	cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map);
-	len = cpumask_scnprintf(buf, NR_CPUS+1, shared_cpu_map);
+	len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map);
 	len += sprintf(buf+len, "\n");
 	return len;
 }
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index 6789c1a12120..f27beca4b26d 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c
@@ -51,10 +51,10 @@ static int __init allowcpus(char *str)
 	int len;
 
 	cpus_clear(cpu_allow_map);
-	if (cpulist_parse(str, cpu_allow_map) == 0) {
+	if (cpulist_parse(str, &cpu_allow_map) == 0) {
 		cpu_set(0, cpu_allow_map);
 		cpus_and(cpu_possible_map, cpu_possible_map, cpu_allow_map);
-		len = cpulist_scnprintf(buf, sizeof(buf)-1, cpu_possible_map);
+		len = cpulist_scnprintf(buf, sizeof(buf)-1, &cpu_possible_map);
 		buf[len] = '\0';
 		pr_debug("Allowable CPUs: %s\n", buf);
 		return 1;
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index e1904774a70f..64d24310ce7e 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -358,7 +358,7 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
 	irq_server = get_irq_server(virq, 1);
 	if (irq_server == -1) {
 		char cpulist[128];
-		cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
+		cpumask_scnprintf(cpulist, sizeof(cpulist), &cpumask);
 		printk(KERN_WARNING
 			"%s: No online cpus in the mask %s for irq %d\n",
 			__func__, cpulist, virq);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 3f46afbb1cf1..43ea612d3e9d 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -626,8 +626,8 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
 		cpumask_t *mask = &this_leaf->shared_cpu_map;
 
 		n = type?
-			cpulist_scnprintf(buf, len-2, *mask):
-			cpumask_scnprintf(buf, len-2, *mask);
+			cpulist_scnprintf(buf, len-2, mask) :
+			cpumask_scnprintf(buf, len-2, mask);
 		buf[n++] = '\n';
 		buf[n] = '\0';
 	}
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ae0c0d3bb770..1c2084291f97 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -282,7 +282,7 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
 	else
 		cpu_clear(cpu, *mask);
 
-	cpulist_scnprintf(buf, sizeof(buf), *mask);
+	cpulist_scnprintf(buf, sizeof(buf), mask);
 	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
 		enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
  }
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 64f5d54f7edc..4259072f5bd0 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -109,7 +109,7 @@ static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL);
  */
 static ssize_t print_cpus_map(char *buf, cpumask_t *map)
 {
-	int n = cpulist_scnprintf(buf, PAGE_SIZE-2, *map);
+	int n = cpulist_scnprintf(buf, PAGE_SIZE-2, map);
 
 	buf[n++] = '\n';
 	buf[n] = '\0';
diff --git a/drivers/base/node.c b/drivers/base/node.c
index f5207090885a..91636cd8b6c9 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -30,8 +30,8 @@ static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf)
 	BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));
 
 	len = type?
-		cpulist_scnprintf(buf, PAGE_SIZE-2, *mask):
-		cpumask_scnprintf(buf, PAGE_SIZE-2, *mask);
+		cpulist_scnprintf(buf, PAGE_SIZE-2, mask) :
+		cpumask_scnprintf(buf, PAGE_SIZE-2, mask);
  	buf[len++] = '\n';
  	buf[len] = '\0';
 	return len;
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 199cd97e32e6..a8bc1cbcfa7c 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -49,8 +49,8 @@ static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf)
 
 	if (len > 1) {
 		n = type?
-			cpulist_scnprintf(buf, len-2, *mask):
-			cpumask_scnprintf(buf, len-2, *mask);
+			cpulist_scnprintf(buf, len-2, mask) :
+			cpumask_scnprintf(buf, len-2, mask);
 		buf[n++] = '\n';
 		buf[n] = '\0';
 	}
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 5d72866897a8..c88485860a0a 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -74,7 +74,7 @@ static ssize_t local_cpus_show(struct device *dev,
 	int len;
 
 	mask = pcibus_to_cpumask(to_pci_dev(dev)->bus);
-	len = cpumask_scnprintf(buf, PAGE_SIZE-2, mask);
+	len = cpumask_scnprintf(buf, PAGE_SIZE-2, &mask);
 	buf[len++] = '\n';
 	buf[len] = '\0';
 	return len;
@@ -88,7 +88,7 @@ static ssize_t local_cpulist_show(struct device *dev,
 	int len;
 
 	mask = pcibus_to_cpumask(to_pci_dev(dev)->bus);
-	len = cpulist_scnprintf(buf, PAGE_SIZE-2, mask);
+	len = cpulist_scnprintf(buf, PAGE_SIZE-2, &mask);
 	buf[len++] = '\n';
 	buf[len] = '\0';
 	return len;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 003a9b3c293f..5b3f5937ecf5 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -55,8 +55,8 @@ static ssize_t pci_bus_show_cpuaffinity(struct device *dev,
 
 	cpumask = pcibus_to_cpumask(to_pci_bus(dev));
 	ret = type?
-		cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask):
-		cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask);
+		cpulist_scnprintf(buf, PAGE_SIZE-2, &cpumask) :
+		cpumask_scnprintf(buf, PAGE_SIZE-2, &cpumask);
 	buf[ret++] = '\n';
 	buf[ret] = '\0';
 	return ret;
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 21e1dd43e52a..94a2ab88ae85 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -339,36 +339,6 @@ extern cpumask_t cpu_mask_all;
 #endif
 #define	CPUMASK_PTR(v, m) 	cpumask_t *v = &(m->v)
 
-#define cpumask_scnprintf(buf, len, src) \
-			__cpumask_scnprintf((buf), (len), &(src), NR_CPUS)
-static inline int __cpumask_scnprintf(char *buf, int len,
-					const cpumask_t *srcp, int nbits)
-{
-	return bitmap_scnprintf(buf, len, srcp->bits, nbits);
-}
-
-#define cpumask_parse_user(ubuf, ulen, dst) \
-			__cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS)
-static inline int __cpumask_parse_user(const char __user *buf, int len,
-					cpumask_t *dstp, int nbits)
-{
-	return bitmap_parse_user(buf, len, dstp->bits, nbits);
-}
-
-#define cpulist_scnprintf(buf, len, src) \
-			__cpulist_scnprintf((buf), (len), &(src), NR_CPUS)
-static inline int __cpulist_scnprintf(char *buf, int len,
-					const cpumask_t *srcp, int nbits)
-{
-	return bitmap_scnlistprintf(buf, len, srcp->bits, nbits);
-}
-
-#define cpulist_parse(buf, dst) __cpulist_parse((buf), &(dst), NR_CPUS)
-static inline int __cpulist_parse(const char *buf, cpumask_t *dstp, int nbits)
-{
-	return bitmap_parselist(buf, dstp->bits, nbits);
-}
-
 #define cpu_remap(oldbit, old, new) \
 		__cpu_remap((oldbit), &(old), &(new), NR_CPUS)
 static inline int __cpu_remap(int oldbit,
@@ -945,6 +915,63 @@ static inline void cpumask_copy(struct cpumask *dstp,
  */
 #define cpumask_of(cpu) (get_cpu_mask(cpu))
 
+/**
+ * cpumask_scnprintf - print a cpumask into a string as comma-separated hex
+ * @buf: the buffer to sprintf into
+ * @len: the length of the buffer
+ * @srcp: the cpumask to print
+ *
+ * If len is zero, returns zero.  Otherwise returns the length of the
+ * (nul-terminated) @buf string.
+ */
+static inline int cpumask_scnprintf(char *buf, int len,
+				    const struct cpumask *srcp)
+{
+	return bitmap_scnprintf(buf, len, srcp->bits, nr_cpumask_bits);
+}
+
+/**
+ * cpumask_parse_user - extract a cpumask from a user string
+ * @buf: the buffer to extract from
+ * @len: the length of the buffer
+ * @dstp: the cpumask to set.
+ *
+ * Returns -errno, or 0 for success.
+ */
+static inline int cpumask_parse_user(const char __user *buf, int len,
+				     struct cpumask *dstp)
+{
+	return bitmap_parse_user(buf, len, dstp->bits, nr_cpumask_bits);
+}
+
+/**
+ * cpulist_scnprintf - print a cpumask into a string as comma-separated list
+ * @buf: the buffer to sprintf into
+ * @len: the length of the buffer
+ * @srcp: the cpumask to print
+ *
+ * If len is zero, returns zero.  Otherwise returns the length of the
+ * (nul-terminated) @buf string.
+ */
+static inline int cpulist_scnprintf(char *buf, int len,
+				    const struct cpumask *srcp)
+{
+	return bitmap_scnlistprintf(buf, len, srcp->bits, nr_cpumask_bits);
+}
+
+/**
+ * cpulist_parse_user - extract a cpumask from a user string of ranges
+ * @buf: the buffer to extract from
+ * @len: the length of the buffer
+ * @dstp: the cpumask to set.
+ *
+ * Returns -errno, or 0 for success.
+ */
+static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
+{
+	return bitmap_parselist(buf, dstp->bits, nr_cpumask_bits);
+}
+
 /**
  * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
  * @bitmap: the bitmap
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 96c0ba13b8cd..39c1a4c1c5a9 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -896,7 +896,7 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
 	if (!*buf) {
 		cpus_clear(trialcs.cpus_allowed);
 	} else {
-		retval = cpulist_parse(buf, trialcs.cpus_allowed);
+		retval = cpulist_parse(buf, &trialcs.cpus_allowed);
 		if (retval < 0)
 			return retval;
 
@@ -1482,7 +1482,7 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
 	mask = cs->cpus_allowed;
 	mutex_unlock(&callback_mutex);
 
-	return cpulist_scnprintf(page, PAGE_SIZE, mask);
+	return cpulist_scnprintf(page, PAGE_SIZE, &mask);
 }
 
 static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index d257e7d6a8a4..f293349d49d0 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -47,7 +47,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
 	    irq_balancing_disabled(irq))
 		return -EIO;
 
-	err = cpumask_parse_user(buffer, count, new_value);
+	err = cpumask_parse_user(buffer, count, &new_value);
 	if (err)
 		return err;
 
@@ -95,7 +95,7 @@ static ssize_t default_affinity_write(struct file *file,
 	cpumask_t new_value;
 	int err;
 
-	err = cpumask_parse_user(buffer, count, new_value);
+	err = cpumask_parse_user(buffer, count, &new_value);
 	if (err)
 		return err;
 
diff --git a/kernel/profile.c b/kernel/profile.c
index dc41827fbfee..7d620dfdde59 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -442,7 +442,7 @@ void profile_tick(int type)
 static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
 			int count, int *eof, void *data)
 {
-	int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
+	int len = cpumask_scnprintf(page, count, (cpumask_t *)data);
 	if (count - len < 2)
 		return -EINVAL;
 	len += sprintf(page + len, "\n");
@@ -456,7 +456,7 @@ static int prof_cpu_mask_write_proc(struct file *file,
 	unsigned long full_count = count, err;
 	cpumask_t new_value;
 
-	err = cpumask_parse_user(buffer, count, new_value);
+	err = cpumask_parse_user(buffer, count, &new_value);
 	if (err)
 		return err;
 
diff --git a/kernel/sched.c b/kernel/sched.c
index e4bb1dd7b308..d2d16d1273b1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6666,7 +6666,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 	struct sched_group *group = sd->groups;
 	char str[256];
 
-	cpulist_scnprintf(str, sizeof(str), sd->span);
+	cpulist_scnprintf(str, sizeof(str), &sd->span);
 	cpus_clear(*groupmask);
 
 	printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
@@ -6720,7 +6720,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 
 		cpus_or(*groupmask, *groupmask, group->cpumask);
 
-		cpulist_scnprintf(str, sizeof(str), group->cpumask);
+		cpulist_scnprintf(str, sizeof(str), &group->cpumask);
 		printk(KERN_CONT " %s", str);
 
 		group = group->next;
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 7dbf72a2b02c..6beff1e4eeae 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -42,7 +42,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
 		for_each_domain(cpu, sd) {
 			enum cpu_idle_type itype;
 
-			cpumask_scnprintf(mask_str, mask_len, sd->span);
+			cpumask_scnprintf(mask_str, mask_len, &sd->span);
 			seq_printf(seq, "domain%d %s", dcount++, mask_str);
 			for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
 					itype++) {
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index bd6be76303cf..6d7dc4ec4aa5 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -352,7 +352,7 @@ static int parse(struct nlattr *na, cpumask_t *mask)
 	if (!data)
 		return -ENOMEM;
 	nla_strlcpy(data, na, len);
-	ret = cpulist_parse(data, *mask);
+	ret = cpulist_parse(data, mask);
 	kfree(data);
 	return ret;
 }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d86e3252f300..d2e75479dc50 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2126,7 +2126,7 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf,
 
 	mutex_lock(&tracing_cpumask_update_lock);
 
-	len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
+	len = cpumask_scnprintf(mask_str, count, &tracing_cpumask);
 	if (count - len < 2) {
 		count = -EINVAL;
 		goto out_err;
@@ -2147,7 +2147,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
 	int err, cpu;
 
 	mutex_lock(&tracing_cpumask_update_lock);
-	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
+	err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new);
 	if (err)
 		goto err_unlock;
 
diff --git a/mm/slub.c b/mm/slub.c
index a2cd47d89e0a..8e516e29f989 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3626,7 +3626,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
 				len < PAGE_SIZE - 60) {
 			len += sprintf(buf + len, " cpus=");
 			len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
-					l->cpus);
+					&l->cpus);
 		}
 
 		if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
-- 
cgit v1.2.3


From 0de26520c7cabf36e1de090ea8092f011a6106ce Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 13 Dec 2008 21:20:26 +1030
Subject: cpumask: make irq_set_affinity() take a const struct cpumask

Impact: change existing irq_chip API

Not much point with gentle transition here: the struct irq_chip's
setaffinity method signature needs to change.

Fortunately, not widely used code, but hits a few architectures.

Note: In irq_select_affinity() I save a temporary in by mangling
irq_desc[irq].affinity directly.  Ingo, does this break anything?

(Folded in fix from KOSAKI Motohiro)

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Reviewed-by: Grant Grundler <grundler@parisc-linux.org>
Acked-by: Ingo Molnar <mingo@redhat.com>
Cc: ralf@linux-mips.org
Cc: grundler@parisc-linux.org
Cc: jeremy@xensource.com
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---
 arch/alpha/kernel/irq.c               |  2 +-
 arch/alpha/kernel/sys_dp264.c         |  8 ++--
 arch/alpha/kernel/sys_titan.c         |  4 +-
 arch/arm/common/gic.c                 |  4 +-
 arch/arm/kernel/irq.c                 |  2 +-
 arch/arm/oprofile/op_model_mpcore.c   |  4 +-
 arch/cris/arch-v32/kernel/irq.c       |  4 +-
 arch/ia64/hp/sim/hpsim_irq.c          |  2 +-
 arch/ia64/kernel/iosapic.c            | 12 +++---
 arch/ia64/kernel/irq.c                |  9 ++--
 arch/ia64/kernel/msi_ia64.c           | 12 +++---
 arch/ia64/kernel/smpboot.c            |  4 +-
 arch/ia64/sn/kernel/irq.c             |  6 +--
 arch/ia64/sn/kernel/msi_sn.c          |  7 +--
 arch/mips/include/asm/irq.h           |  3 +-
 arch/mips/kernel/cevt-bcm1480.c       |  2 +-
 arch/mips/kernel/cevt-sb1250.c        |  2 +-
 arch/mips/kernel/irq-gic.c            |  6 +--
 arch/mips/mti-malta/malta-smtc.c      |  6 +--
 arch/mips/sibyte/bcm1480/irq.c        |  8 ++--
 arch/mips/sibyte/sb1250/irq.c         |  8 ++--
 arch/parisc/kernel/irq.c              |  6 +--
 arch/powerpc/kernel/irq.c             |  2 +-
 arch/powerpc/platforms/pseries/xics.c |  6 +--
 arch/powerpc/sysdev/mpic.c            |  4 +-
 arch/powerpc/sysdev/mpic.h            |  2 +-
 arch/sparc64/kernel/irq.c             | 11 +++--
 arch/sparc64/kernel/of_device.c       |  2 +-
 arch/sparc64/kernel/pci_msi.c         |  2 +-
 arch/x86/kernel/hpet.c                |  4 +-
 arch/x86/kernel/io_apic.c             | 81 +++++++++++++++++------------------
 arch/x86/kernel/irq_32.c              |  2 +-
 arch/x86/kernel/irq_64.c              |  2 +-
 drivers/parisc/iosapic.c              |  7 +--
 drivers/xen/events.c                  |  6 +--
 include/linux/interrupt.h             |  4 +-
 include/linux/irq.h                   |  3 +-
 kernel/irq/chip.c                     |  2 +-
 kernel/irq/manage.c                   | 22 +++++-----
 kernel/irq/migration.c                | 14 +++---
 kernel/irq/proc.c                     | 29 ++++++++-----
 kernel/time/tick-common.c             |  6 +--
 42 files changed, 171 insertions(+), 161 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index c626a821cdcb..d0f1620007f7 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -55,7 +55,7 @@ int irq_select_affinity(unsigned int irq)
 	last_cpu = cpu;
 
 	irq_desc[irq].affinity = cpumask_of_cpu(cpu);
-	irq_desc[irq].chip->set_affinity(irq, cpumask_of_cpu(cpu));
+	irq_desc[irq].chip->set_affinity(irq, cpumask_of(cpu));
 	return 0;
 }
 #endif /* CONFIG_SMP */
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index c71b0fd7a61f..ab44c164d9d4 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -177,19 +177,19 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
 }
 
 static void
-dp264_set_affinity(unsigned int irq, cpumask_t affinity)
+dp264_set_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&dp264_irq_lock);
-	cpu_set_irq_affinity(irq, affinity);
+	cpu_set_irq_affinity(irq, *affinity);
 	tsunami_update_irq_hw(cached_irq_mask);
 	spin_unlock(&dp264_irq_lock);
 }
 
 static void
-clipper_set_affinity(unsigned int irq, cpumask_t affinity)
+clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&dp264_irq_lock);
-	cpu_set_irq_affinity(irq - 16, affinity);
+	cpu_set_irq_affinity(irq - 16, *affinity);
 	tsunami_update_irq_hw(cached_irq_mask);
 	spin_unlock(&dp264_irq_lock);
 }
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index 52c91ccc1648..27f840a4ad3d 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -158,10 +158,10 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
 }
 
 static void
-titan_set_irq_affinity(unsigned int irq, cpumask_t affinity)
+titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
 	spin_lock(&titan_irq_lock);
-	titan_cpu_set_irq_affinity(irq - 16, affinity);
+	titan_cpu_set_irq_affinity(irq - 16, *affinity);
 	titan_update_irq_hw(titan_cached_irq_mask);
 	spin_unlock(&titan_irq_lock);
 }
diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index 7fc9860a97d7..c6884ba1d5ed 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -109,11 +109,11 @@ static void gic_unmask_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void gic_set_cpu(unsigned int irq, cpumask_t mask_val)
+static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
 {
 	void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3);
 	unsigned int shift = (irq % 4) * 8;
-	unsigned int cpu = first_cpu(mask_val);
+	unsigned int cpu = cpumask_first(mask_val);
 	u32 val;
 
 	spin_lock(&irq_controller_lock);
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 2f3eb795fa6e..7141cee1fab7 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -174,7 +174,7 @@ static void route_irq(struct irq_desc *desc, unsigned int irq, unsigned int cpu)
 	pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->cpu, cpu);
 
 	spin_lock_irq(&desc->lock);
-	desc->chip->set_affinity(irq, cpumask_of_cpu(cpu));
+	desc->chip->set_affinity(irq, cpumask_of(cpu));
 	spin_unlock_irq(&desc->lock);
 }
 
diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c
index 4de366e8b4c5..6d6bd5899240 100644
--- a/arch/arm/oprofile/op_model_mpcore.c
+++ b/arch/arm/oprofile/op_model_mpcore.c
@@ -260,10 +260,10 @@ static void em_stop(void)
 static void em_route_irq(int irq, unsigned int cpu)
 {
 	struct irq_desc *desc = irq_desc + irq;
-	cpumask_t mask = cpumask_of_cpu(cpu);
+	const struct cpumask *mask = cpumask_of(cpu);
 
 	spin_lock_irq(&desc->lock);
-	desc->affinity = mask;
+	desc->affinity = *mask;
 	desc->chip->set_affinity(irq, mask);
 	spin_unlock_irq(&desc->lock);
 }
diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c
index 173c141ac9ba..295131fee710 100644
--- a/arch/cris/arch-v32/kernel/irq.c
+++ b/arch/cris/arch-v32/kernel/irq.c
@@ -325,11 +325,11 @@ static void end_crisv32_irq(unsigned int irq)
 {
 }
 
-void set_affinity_crisv32_irq(unsigned int irq, cpumask_t dest)
+void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&irq_lock, flags);
-	irq_allocations[irq - FIRST_IRQ].mask = dest;
+	irq_allocations[irq - FIRST_IRQ].mask = *dest;
 	spin_unlock_irqrestore(&irq_lock, flags);
 }
 
diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c
index c2f58ff364e7..cc0a3182db3c 100644
--- a/arch/ia64/hp/sim/hpsim_irq.c
+++ b/arch/ia64/hp/sim/hpsim_irq.c
@@ -22,7 +22,7 @@ hpsim_irq_noop (unsigned int irq)
 }
 
 static void
-hpsim_set_affinity_noop (unsigned int a, cpumask_t b)
+hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b)
 {
 }
 
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 5c4674ae8aea..c8adecd5b416 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -330,25 +330,25 @@ unmask_irq (unsigned int irq)
 
 
 static void
-iosapic_set_affinity (unsigned int irq, cpumask_t mask)
+iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 #ifdef CONFIG_SMP
 	u32 high32, low32;
-	int dest, rte_index;
+	int cpu, dest, rte_index;
 	int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
 	struct iosapic_rte_info *rte;
 	struct iosapic *iosapic;
 
 	irq &= (~IA64_IRQ_REDIRECTED);
 
-	cpus_and(mask, mask, cpu_online_map);
-	if (cpus_empty(mask))
+	cpu = cpumask_first_and(cpu_online_mask, mask);
+	if (cpu >= nr_cpu_ids)
 		return;
 
-	if (irq_prepare_move(irq, first_cpu(mask)))
+	if (irq_prepare_move(irq, cpu))
 		return;
 
-	dest = cpu_physical_id(first_cpu(mask));
+	dest = cpu_physical_id(cpu);
 
 	if (!iosapic_intr_info[irq].count)
 		return;			/* not an IOSAPIC interrupt */
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index 7fd18f54c056..0b6db53fedcf 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -133,7 +133,6 @@ unsigned int vectors_in_migration[NR_IRQS];
  */
 static void migrate_irqs(void)
 {
-	cpumask_t	mask;
 	irq_desc_t *desc;
 	int 		irq, new_cpu;
 
@@ -152,15 +151,14 @@ static void migrate_irqs(void)
 		if (desc->status == IRQ_PER_CPU)
 			continue;
 
-		cpus_and(mask, irq_desc[irq].affinity, cpu_online_map);
-		if (any_online_cpu(mask) == NR_CPUS) {
+		if (cpumask_any_and(&irq_desc[irq].affinity, cpu_online_mask)
+		    >= nr_cpu_ids) {
 			/*
 			 * Save it for phase 2 processing
 			 */
 			vectors_in_migration[irq] = irq;
 
 			new_cpu = any_online_cpu(cpu_online_map);
-			mask = cpumask_of_cpu(new_cpu);
 
 			/*
 			 * Al three are essential, currently WARN_ON.. maybe panic?
@@ -168,7 +166,8 @@ static void migrate_irqs(void)
 			if (desc->chip && desc->chip->disable &&
 				desc->chip->enable && desc->chip->set_affinity) {
 				desc->chip->disable(irq);
-				desc->chip->set_affinity(irq, mask);
+				desc->chip->set_affinity(irq,
+							 cpumask_of(new_cpu));
 				desc->chip->enable(irq);
 			} else {
 				WARN_ON((!(desc->chip) || !(desc->chip->disable) ||
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 702a09c13238..890339339035 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -49,11 +49,12 @@
 static struct irq_chip	ia64_msi_chip;
 
 #ifdef CONFIG_SMP
-static void ia64_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
+static void ia64_set_msi_irq_affinity(unsigned int irq,
+				      const cpumask_t *cpu_mask)
 {
 	struct msi_msg msg;
 	u32 addr, data;
-	int cpu = first_cpu(cpu_mask);
+	int cpu = first_cpu(*cpu_mask);
 
 	if (!cpu_online(cpu))
 		return;
@@ -166,12 +167,11 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #ifdef CONFIG_DMAR
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg = irq_cfg + irq;
 	struct msi_msg msg;
-	int cpu = first_cpu(mask);
-
+	int cpu = cpumask_first(mask);
 
 	if (!cpu_online(cpu))
 		return;
@@ -187,7 +187,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
 
 	dmar_msi_write(irq, &msg);
-	irq_desc[irq].affinity = mask;
+	irq_desc[irq].affinity = *mask;
 }
 #endif /* CONFIG_SMP */
 
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 4ede6e571c38..11463994a7d5 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -682,7 +682,7 @@ int migrate_platform_irqs(unsigned int cpu)
 {
 	int new_cpei_cpu;
 	irq_desc_t *desc = NULL;
-	cpumask_t 	mask;
+	const struct cpumask *mask;
 	int 		retval = 0;
 
 	/*
@@ -695,7 +695,7 @@ int migrate_platform_irqs(unsigned int cpu)
 			 * Now re-target the CPEI to a different processor
 			 */
 			new_cpei_cpu = any_online_cpu(cpu_online_map);
-			mask = cpumask_of_cpu(new_cpei_cpu);
+			mask = cpumask_of(new_cpei_cpu);
 			set_cpei_target_cpu(new_cpei_cpu);
 			desc = irq_desc + ia64_cpe_irq;
 			/*
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 0c66dbdd1d72..66fd705e82c0 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -227,14 +227,14 @@ finish_up:
 	return new_irq_info;
 }
 
-static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask)
+static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
 {
 	struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
 	nasid_t nasid;
 	int slice;
 
-	nasid = cpuid_to_nasid(first_cpu(mask));
-	slice = cpuid_to_slice(first_cpu(mask));
+	nasid = cpuid_to_nasid(cpumask_first(mask));
+	slice = cpuid_to_slice(cpumask_first(mask));
 
 	list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
 				 sn_irq_lh[irq], list)
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
index 83f190ffe350..ca553b0429ce 100644
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -151,7 +151,8 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry)
 }
 
 #ifdef CONFIG_SMP
-static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
+static void sn_set_msi_irq_affinity(unsigned int irq,
+				    const struct cpumask *cpu_mask)
 {
 	struct msi_msg msg;
 	int slice;
@@ -164,7 +165,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
 	struct sn_pcibus_provider *provider;
 	unsigned int cpu;
 
-	cpu = first_cpu(cpu_mask);
+	cpu = cpumask_first(cpu_mask);
 	sn_irq_info = sn_msi_info[irq].sn_irq_info;
 	if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0)
 		return;
@@ -204,7 +205,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
 	msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff);
 
 	write_msi_msg(irq, &msg);
-	irq_desc[irq].affinity = cpu_mask;
+	irq_desc[irq].affinity = *cpu_mask;
 }
 #endif /* CONFIG_SMP */
 
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index a58f0eecc68f..abc62aa744ac 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -49,7 +49,8 @@ static inline void smtc_im_ack_irq(unsigned int irq)
 #ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
 #include <linux/cpumask.h>
 
-extern void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity);
+extern void plat_set_irq_affinity(unsigned int irq,
+				  const struct cpumask *affinity);
 extern void smtc_forward_irq(unsigned int irq);
 
 /*
diff --git a/arch/mips/kernel/cevt-bcm1480.c b/arch/mips/kernel/cevt-bcm1480.c
index 0a57f86945f1..d7e21bc8cd21 100644
--- a/arch/mips/kernel/cevt-bcm1480.c
+++ b/arch/mips/kernel/cevt-bcm1480.c
@@ -148,6 +148,6 @@ void __cpuinit sb1480_clockevent_init(void)
 	action->name	= name;
 	action->dev_id	= cd;
 
-	irq_set_affinity(irq, cpumask_of_cpu(cpu));
+	irq_set_affinity(irq, cpumask_of(cpu));
 	setup_irq(irq, action);
 }
diff --git a/arch/mips/kernel/cevt-sb1250.c b/arch/mips/kernel/cevt-sb1250.c
index 63ac3ad462bc..0f188cd46e03 100644
--- a/arch/mips/kernel/cevt-sb1250.c
+++ b/arch/mips/kernel/cevt-sb1250.c
@@ -147,6 +147,6 @@ void __cpuinit sb1250_clockevent_init(void)
 	action->name	= name;
 	action->dev_id	= cd;
 
-	irq_set_affinity(irq, cpumask_of_cpu(cpu));
+	irq_set_affinity(irq, cpumask_of(cpu));
 	setup_irq(irq, action);
 }
diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c
index f0a4bb19e096..494a49a317e9 100644
--- a/arch/mips/kernel/irq-gic.c
+++ b/arch/mips/kernel/irq-gic.c
@@ -155,7 +155,7 @@ static void gic_unmask_irq(unsigned int irq)
 
 static DEFINE_SPINLOCK(gic_lock);
 
-static void gic_set_affinity(unsigned int irq, cpumask_t cpumask)
+static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
 	cpumask_t	tmp = CPU_MASK_NONE;
 	unsigned long	flags;
@@ -164,7 +164,7 @@ static void gic_set_affinity(unsigned int irq, cpumask_t cpumask)
 	pr_debug(KERN_DEBUG "%s called\n", __func__);
 	irq -= _irqbase;
 
-	cpus_and(tmp, cpumask, cpu_online_map);
+	cpumask_and(&tmp, cpumask, cpu_online_mask);
 	if (cpus_empty(tmp))
 		return;
 
@@ -187,7 +187,7 @@ static void gic_set_affinity(unsigned int irq, cpumask_t cpumask)
 		set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask);
 
 	}
-	irq_desc[irq].affinity = cpumask;
+	irq_desc[irq].affinity = *cpumask;
 	spin_unlock_irqrestore(&gic_lock, flags);
 
 }
diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c
index f84a46a8ae6e..aabd7274507b 100644
--- a/arch/mips/mti-malta/malta-smtc.c
+++ b/arch/mips/mti-malta/malta-smtc.c
@@ -114,9 +114,9 @@ struct plat_smp_ops msmtc_smp_ops = {
  */
 
 
-void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity)
+void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 {
-	cpumask_t tmask = affinity;
+	cpumask_t tmask = *affinity;
 	int cpu = 0;
 	void smtc_set_irq_affinity(unsigned int irq, cpumask_t aff);
 
@@ -139,7 +139,7 @@ void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity)
 	 * be made to forward to an offline "CPU".
 	 */
 
-	for_each_cpu_mask(cpu, affinity) {
+	for_each_cpu(cpu, affinity) {
 		if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu))
 			cpu_clear(cpu, tmask);
 	}
diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c
index a35818ed4263..12b465d404df 100644
--- a/arch/mips/sibyte/bcm1480/irq.c
+++ b/arch/mips/sibyte/bcm1480/irq.c
@@ -50,7 +50,7 @@ static void enable_bcm1480_irq(unsigned int irq);
 static void disable_bcm1480_irq(unsigned int irq);
 static void ack_bcm1480_irq(unsigned int irq);
 #ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask);
+static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
 #endif
 
 #ifdef CONFIG_PCI
@@ -109,7 +109,7 @@ void bcm1480_unmask_irq(int cpu, int irq)
 }
 
 #ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask)
+static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	int i = 0, old_cpu, cpu, int_on, k;
 	u64 cur_ints;
@@ -117,11 +117,11 @@ static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask)
 	unsigned long flags;
 	unsigned int irq_dirty;
 
-	if (cpus_weight(mask) != 1) {
+	if (cpumask_weight(mask) != 1) {
 		printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
 		return;
 	}
-	i = first_cpu(mask);
+	i = cpumask_first(mask);
 
 	/* Convert logical CPU to physical CPU */
 	cpu = cpu_logical_map(i);
diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c
index a5158483986e..808ac2959b8c 100644
--- a/arch/mips/sibyte/sb1250/irq.c
+++ b/arch/mips/sibyte/sb1250/irq.c
@@ -50,7 +50,7 @@ static void enable_sb1250_irq(unsigned int irq);
 static void disable_sb1250_irq(unsigned int irq);
 static void ack_sb1250_irq(unsigned int irq);
 #ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, cpumask_t mask);
+static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
 #endif
 
 #ifdef CONFIG_SIBYTE_HAS_LDT
@@ -103,16 +103,16 @@ void sb1250_unmask_irq(int cpu, int irq)
 }
 
 #ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, cpumask_t mask)
+static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	int i = 0, old_cpu, cpu, int_on;
 	u64 cur_ints;
 	struct irq_desc *desc = irq_desc + irq;
 	unsigned long flags;
 
-	i = first_cpu(mask);
+	i = cpumask_first(mask);
 
-	if (cpus_weight(mask) > 1) {
+	if (cpumask_weight(mask) > 1) {
 		printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
 		return;
 	}
diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
index 23ef950df008..4cea935e2f99 100644
--- a/arch/parisc/kernel/irq.c
+++ b/arch/parisc/kernel/irq.c
@@ -131,12 +131,12 @@ int cpu_check_affinity(unsigned int irq, cpumask_t *dest)
 	return 0;
 }
 
-static void cpu_set_affinity_irq(unsigned int irq, cpumask_t dest)
+static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest)
 {
-	if (cpu_check_affinity(irq, &dest))
+	if (cpu_check_affinity(irq, dest))
 		return;
 
-	irq_desc[irq].affinity = dest;
+	irq_desc[irq].affinity = *dest;
 }
 #endif
 
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ac222d0ab12e..23b8b5e36f98 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -237,7 +237,7 @@ void fixup_irqs(cpumask_t map)
 			mask = map;
 		}
 		if (irq_desc[irq].chip->set_affinity)
-			irq_desc[irq].chip->set_affinity(irq, mask);
+			irq_desc[irq].chip->set_affinity(irq, &mask);
 		else if (irq_desc[irq].action && !(warned++))
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 64d24310ce7e..424b335a71c8 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -332,7 +332,7 @@ static void xics_eoi_lpar(unsigned int virq)
 	lpar_xirr_info_set((0xff << 24) | irq);
 }
 
-static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
+static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
 {
 	unsigned int irq;
 	int status;
@@ -358,7 +358,7 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
 	irq_server = get_irq_server(virq, 1);
 	if (irq_server == -1) {
 		char cpulist[128];
-		cpumask_scnprintf(cpulist, sizeof(cpulist), &cpumask);
+		cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
 		printk(KERN_WARNING
 			"%s: No online cpus in the mask %s for irq %d\n",
 			__func__, cpulist, virq);
@@ -845,7 +845,7 @@ void xics_migrate_irqs_away(void)
 
 		/* Reset affinity to all cpus */
 		irq_desc[virq].affinity = CPU_MASK_ALL;
-		desc->chip->set_affinity(virq, CPU_MASK_ALL);
+		desc->chip->set_affinity(virq, cpu_all_mask);
 unlock:
 		spin_unlock_irqrestore(&desc->lock, flags);
 	}
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 1890fb085cde..5d7f9f0c93c3 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -817,7 +817,7 @@ static void mpic_end_ipi(unsigned int irq)
 
 #endif /* CONFIG_SMP */
 
-void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
+void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
 	struct mpic *mpic = mpic_from_irq(irq);
 	unsigned int src = mpic_irq_to_hw(irq);
@@ -829,7 +829,7 @@ void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
 	} else {
 		cpumask_t tmp;
 
-		cpus_and(tmp, cpumask, cpu_online_map);
+		cpumask_and(&tmp, cpumask, cpu_online_mask);
 
 		mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
 			       mpic_physmask(cpus_addr(tmp)[0]));
diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h
index 6209c62a426d..3cef2af10f42 100644
--- a/arch/powerpc/sysdev/mpic.h
+++ b/arch/powerpc/sysdev/mpic.h
@@ -36,6 +36,6 @@ static inline int mpic_pasemi_msi_init(struct mpic *mpic)
 
 extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type);
 extern void mpic_set_vector(unsigned int virq, unsigned int vector);
-extern void mpic_set_affinity(unsigned int irq, cpumask_t cpumask);
+extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask);
 
 #endif /* _POWERPC_SYSDEV_MPIC_H */
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index 52fc836f464d..4aaf18e83c8c 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -312,7 +312,8 @@ static void sun4u_irq_enable(unsigned int virt_irq)
 	}
 }
 
-static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask)
+static void sun4u_set_affinity(unsigned int virt_irq,
+			       const struct cpumask *mask)
 {
 	sun4u_irq_enable(virt_irq);
 }
@@ -362,7 +363,8 @@ static void sun4v_irq_enable(unsigned int virt_irq)
 		       ino, err);
 }
 
-static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask)
+static void sun4v_set_affinity(unsigned int virt_irq,
+			       const struct cpumask *mask)
 {
 	unsigned int ino = virt_irq_table[virt_irq].dev_ino;
 	unsigned long cpuid = irq_choose_cpu(virt_irq);
@@ -429,7 +431,8 @@ static void sun4v_virq_enable(unsigned int virt_irq)
 		       dev_handle, dev_ino, err);
 }
 
-static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask)
+static void sun4v_virt_set_affinity(unsigned int virt_irq,
+				    const struct cpumask *mask)
 {
 	unsigned long cpuid, dev_handle, dev_ino;
 	int err;
@@ -788,7 +791,7 @@ void fixup_irqs(void)
 		    !(irq_desc[irq].status & IRQ_PER_CPU)) {
 			if (irq_desc[irq].chip->set_affinity)
 				irq_desc[irq].chip->set_affinity(irq,
-					irq_desc[irq].affinity);
+					&irq_desc[irq].affinity);
 		}
 		spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
 	}
diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c
index 0f616ae3246c..df2efb7fc14c 100644
--- a/arch/sparc64/kernel/of_device.c
+++ b/arch/sparc64/kernel/of_device.c
@@ -780,7 +780,7 @@ out:
 	if (nid != -1) {
 		cpumask_t numa_mask = node_to_cpumask(nid);
 
-		irq_set_affinity(irq, numa_mask);
+		irq_set_affinity(irq, &numa_mask);
 	}
 
 	return irq;
diff --git a/arch/sparc64/kernel/pci_msi.c b/arch/sparc64/kernel/pci_msi.c
index 2e680f34f727..0d0cd815e83e 100644
--- a/arch/sparc64/kernel/pci_msi.c
+++ b/arch/sparc64/kernel/pci_msi.c
@@ -288,7 +288,7 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
 	if (nid != -1) {
 		cpumask_t numa_mask = node_to_cpumask(nid);
 
-		irq_set_affinity(irq, numa_mask);
+		irq_set_affinity(irq, &numa_mask);
 	}
 	err = request_irq(irq, sparc64_msiq_interrupt, 0,
 			  "MSIQ",
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 067d8de913f6..940f25851e1e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -301,7 +301,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
 			struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
 			hpet_setup_msi_irq(hdev->irq);
 			disable_irq(hdev->irq);
-			irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
+			irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
 			enable_irq(hdev->irq);
 		}
 		break;
@@ -449,7 +449,7 @@ static int hpet_setup_irq(struct hpet_dev *dev)
 		return -1;
 
 	disable_irq(dev->irq);
-	irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
+	irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
 	enable_irq(dev->irq);
 
 	printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9043251210fb..1184210e6d0c 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -361,7 +361,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 static int assign_irq_vector(int irq, cpumask_t mask);
 
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ioapic_affinity_irq(unsigned int irq,
+				    const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	unsigned long flags;
@@ -369,15 +370,14 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	cpumask_t tmp;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 	/*
 	 * Only the high 8 bits are valid.
@@ -387,7 +387,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 	desc = irq_to_desc(irq);
 	spin_lock_irqsave(&ioapic_lock, flags);
 	__target_IO_APIC_irq(irq, dest, cfg->vector);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 #endif /* CONFIG_SMP */
@@ -2189,7 +2189,7 @@ static void ir_irq_migration(struct work_struct *work)
 				continue;
 			}
 
-			desc->chip->set_affinity(irq, desc->pending_mask);
+			desc->chip->set_affinity(irq, &desc->pending_mask);
 			spin_unlock_irqrestore(&desc->lock, flags);
 		}
 	}
@@ -2198,18 +2198,19 @@ static void ir_irq_migration(struct work_struct *work)
 /*
  * Migrates the IRQ destination in the process context.
  */
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ir_ioapic_affinity_irq(unsigned int irq,
+				       const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (desc->status & IRQ_LEVEL) {
 		desc->status |= IRQ_MOVE_PENDING;
-		desc->pending_mask = mask;
+		cpumask_copy(&desc->pending_mask, mask);
 		migrate_irq_remapped_level(irq);
 		return;
 	}
 
-	migrate_ioapic_irq(irq, mask);
+	migrate_ioapic_irq(irq, *mask);
 }
 #endif
 
@@ -3027,7 +3028,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 }
 
 #ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
@@ -3035,15 +3036,14 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	cpumask_t tmp;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	read_msi_msg(irq, &msg);
@@ -3055,7 +3055,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 
 	write_msi_msg(irq, &msg);
 	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 
 #ifdef CONFIG_INTR_REMAP
@@ -3063,7 +3063,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
  * Migrate the MSI irq to another cpumask. This migration is
  * done in the process context using interrupt-remapping hardware.
  */
-static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void ir_set_msi_irq_affinity(unsigned int irq,
+				    const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	unsigned int dest;
@@ -3071,18 +3072,17 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	struct irte irte;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
 	if (get_irte(irq, &irte))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	irte.vector = cfg->vector;
@@ -3106,7 +3106,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	}
 
 	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 #endif
 #endif /* CONFIG_SMP */
@@ -3308,7 +3308,7 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #ifdef CONFIG_DMAR
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	struct msi_msg msg;
@@ -3316,15 +3316,14 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	cpumask_t tmp;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	dmar_msi_read(irq, &msg);
@@ -3336,7 +3335,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 
 	dmar_msi_write(irq, &msg);
 	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 #endif /* CONFIG_SMP */
 
@@ -3369,7 +3368,7 @@ int arch_setup_dmar_msi(unsigned int irq)
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	struct irq_desc *desc;
@@ -3377,15 +3376,14 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 	unsigned int dest;
 	cpumask_t tmp;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	hpet_msi_read(irq, &msg);
@@ -3397,7 +3395,7 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
 
 	hpet_msi_write(irq, &msg);
 	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 #endif /* CONFIG_SMP */
 
@@ -3451,27 +3449,26 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 	write_ht_irq_msg(irq, &msg);
 }
 
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	unsigned int dest;
 	cpumask_t tmp;
 	struct irq_desc *desc;
 
-	cpus_and(tmp, mask, cpu_online_map);
-	if (cpus_empty(tmp))
+	if (!cpumask_intersects(mask, cpu_online_mask))
 		return;
 
-	if (assign_irq_vector(irq, mask))
+	if (assign_irq_vector(irq, *mask))
 		return;
 
 	cfg = irq_cfg(irq);
-	cpus_and(tmp, cfg->domain, mask);
+	cpumask_and(&tmp, &cfg->domain, mask);
 	dest = cpu_mask_to_apicid(tmp);
 
 	target_ht_irq(irq, dest, cfg->vector);
 	desc = irq_to_desc(irq);
-	desc->affinity = mask;
+	cpumask_copy(&desc->affinity, mask);
 }
 #endif
 
@@ -3794,10 +3791,10 @@ void __init setup_ioapic_dest(void)
 
 #ifdef CONFIG_INTR_REMAP
 			if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq(irq, mask);
+				set_ir_ioapic_affinity_irq(irq, &mask);
 			else
 #endif
-				set_ioapic_affinity_irq(irq, mask);
+				set_ioapic_affinity_irq(irq, &mask);
 		}
 
 	}
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a51382672de0..87870a49be4e 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -251,7 +251,7 @@ void fixup_irqs(cpumask_t map)
 			mask = map;
 		}
 		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, mask);
+			desc->chip->set_affinity(irq, &mask);
 		else if (desc->action && !(warned++))
 			printk("Cannot set affinity for irq %i\n", irq);
 	}
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84eb77a0..7d37f847544d 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -116,7 +116,7 @@ void fixup_irqs(cpumask_t map)
 			desc->chip->mask(irq);
 
 		if (desc->chip->set_affinity)
-			desc->chip->set_affinity(irq, mask);
+			desc->chip->set_affinity(irq, &mask);
 		else if (!(warned++))
 			set_affinity = 0;
 
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 7beffcab2745..9dedbbd218c3 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -704,16 +704,17 @@ static unsigned int iosapic_startup_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void iosapic_set_affinity_irq(unsigned int irq, cpumask_t dest)
+static void iosapic_set_affinity_irq(unsigned int irq,
+				     const struct cpumask *dest)
 {
 	struct vector_info *vi = iosapic_get_vector(irq);
 	u32 d0, d1, dummy_d0;
 	unsigned long flags;
 
-	if (cpu_check_affinity(irq, &dest))
+	if (cpu_check_affinity(irq, dest))
 		return;
 
-	vi->txn_addr = txn_affinity_addr(irq, first_cpu(dest));
+	vi->txn_addr = txn_affinity_addr(irq, cpumask_first(dest));
 
 	spin_lock_irqsave(&iosapic_lock, flags);
 	/* d1 contains the destination CPU, so only want to set that
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 1e3b934a4cf7..eba5ec5b020e 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -579,7 +579,7 @@ void rebind_evtchn_irq(int evtchn, int irq)
 	spin_unlock(&irq_mapping_update_lock);
 
 	/* new event channels are always bound to cpu 0 */
-	irq_set_affinity(irq, cpumask_of_cpu(0));
+	irq_set_affinity(irq, cpumask_of(0));
 
 	/* Unmask the event channel. */
 	enable_irq(irq);
@@ -608,9 +608,9 @@ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 }
 
 
-static void set_affinity_irq(unsigned irq, cpumask_t dest)
+static void set_affinity_irq(unsigned irq, const struct cpumask *dest)
 {
-	unsigned tcpu = first_cpu(dest);
+	unsigned tcpu = cpumask_first(dest);
 	rebind_irq_to_cpu(irq, tcpu);
 }
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index f58a0cf8929a..48e63934fabe 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -109,13 +109,13 @@ extern void enable_irq(unsigned int irq);
 
 extern cpumask_t irq_default_affinity;
 
-extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask);
+extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
 extern int irq_can_set_affinity(unsigned int irq);
 extern int irq_select_affinity(unsigned int irq);
 
 #else /* CONFIG_SMP */
 
-static inline int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
+static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
 {
 	return -EINVAL;
 }
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 3dddfa703ebd..ab70fd604d3a 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -113,7 +113,8 @@ struct irq_chip {
 	void		(*eoi)(unsigned int irq);
 
 	void		(*end)(unsigned int irq);
-	void		(*set_affinity)(unsigned int irq, cpumask_t dest);
+	void		(*set_affinity)(unsigned int irq,
+					const struct cpumask *dest);
 	int		(*retrigger)(unsigned int irq);
 	int		(*set_type)(unsigned int irq, unsigned int flow_type);
 	int		(*set_wake)(unsigned int irq, unsigned int on);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 10b5092e9bfe..58d8e31daa49 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -45,7 +45,7 @@ void dynamic_irq_init(unsigned int irq)
 	desc->irq_count = 0;
 	desc->irqs_unhandled = 0;
 #ifdef CONFIG_SMP
-	cpus_setall(desc->affinity);
+	cpumask_setall(&desc->affinity);
 #endif
 	spin_unlock_irqrestore(&desc->lock, flags);
 }
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 801addda3c43..10ad2f87ed9a 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -79,7 +79,7 @@ int irq_can_set_affinity(unsigned int irq)
  *	@cpumask:	cpumask
  *
  */
-int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
+int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
@@ -91,14 +91,14 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
-		desc->affinity = cpumask;
+		cpumask_copy(&desc->affinity, cpumask);
 		desc->chip->set_affinity(irq, cpumask);
 	} else {
 		desc->status |= IRQ_MOVE_PENDING;
-		desc->pending_mask = cpumask;
+		cpumask_copy(&desc->pending_mask, cpumask);
 	}
 #else
-	desc->affinity = cpumask;
+	cpumask_copy(&desc->affinity, cpumask);
 	desc->chip->set_affinity(irq, cpumask);
 #endif
 	desc->status |= IRQ_AFFINITY_SET;
@@ -112,26 +112,24 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
  */
 int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
 {
-	cpumask_t mask;
-
 	if (!irq_can_set_affinity(irq))
 		return 0;
 
-	cpus_and(mask, cpu_online_map, irq_default_affinity);
-
 	/*
 	 * Preserve an userspace affinity setup, but make sure that
 	 * one of the targets is online.
 	 */
 	if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
-		if (cpus_intersects(desc->affinity, cpu_online_map))
-			mask = desc->affinity;
+		if (cpumask_any_and(&desc->affinity, cpu_online_mask)
+		    < nr_cpu_ids)
+			goto set_affinity;
 		else
 			desc->status &= ~IRQ_AFFINITY_SET;
 	}
 
-	desc->affinity = mask;
-	desc->chip->set_affinity(irq, mask);
+	cpumask_and(&desc->affinity, cpu_online_mask, &irq_default_affinity);
+set_affinity:
+	desc->chip->set_affinity(irq, &desc->affinity);
 
 	return 0;
 }
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 9db681d95814..bd72329e630c 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -4,7 +4,6 @@
 void move_masked_irq(int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-	cpumask_t tmp;
 
 	if (likely(!(desc->status & IRQ_MOVE_PENDING)))
 		return;
@@ -19,7 +18,7 @@ void move_masked_irq(int irq)
 
 	desc->status &= ~IRQ_MOVE_PENDING;
 
-	if (unlikely(cpus_empty(desc->pending_mask)))
+	if (unlikely(cpumask_empty(&desc->pending_mask)))
 		return;
 
 	if (!desc->chip->set_affinity)
@@ -27,8 +26,6 @@ void move_masked_irq(int irq)
 
 	assert_spin_locked(&desc->lock);
 
-	cpus_and(tmp, desc->pending_mask, cpu_online_map);
-
 	/*
 	 * If there was a valid mask to work with, please
 	 * do the disable, re-program, enable sequence.
@@ -41,10 +38,13 @@ void move_masked_irq(int irq)
 	 * For correct operation this depends on the caller
 	 * masking the irqs.
 	 */
-	if (likely(!cpus_empty(tmp))) {
-		desc->chip->set_affinity(irq,tmp);
+	if (likely(cpumask_any_and(&desc->pending_mask, cpu_online_mask)
+		   < nr_cpu_ids)) {
+		cpumask_and(&desc->affinity,
+			    &desc->pending_mask, cpu_online_mask);
+		desc->chip->set_affinity(irq, &desc->affinity);
 	}
-	cpus_clear(desc->pending_mask);
+	cpumask_clear(&desc->pending_mask);
 }
 
 void move_native_irq(int irq)
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index f293349d49d0..8e91c9762520 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -40,33 +40,42 @@ static ssize_t irq_affinity_proc_write(struct file *file,
 		const char __user *buffer, size_t count, loff_t *pos)
 {
 	unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
-	cpumask_t new_value;
+	cpumask_var_t new_value;
 	int err;
 
 	if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
 	    irq_balancing_disabled(irq))
 		return -EIO;
 
-	err = cpumask_parse_user(buffer, count, &new_value);
+	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
+		return -ENOMEM;
+
+	err = cpumask_parse_user(buffer, count, new_value);
 	if (err)
-		return err;
+		goto free_cpumask;
 
-	if (!is_affinity_mask_valid(new_value))
-		return -EINVAL;
+	if (!is_affinity_mask_valid(*new_value)) {
+		err = -EINVAL;
+		goto free_cpumask;
+	}
 
 	/*
 	 * Do not allow disabling IRQs completely - it's a too easy
 	 * way to make the system unusable accidentally :-) At least
 	 * one online CPU still has to be targeted.
 	 */
-	if (!cpus_intersects(new_value, cpu_online_map))
+	if (!cpumask_intersects(new_value, cpu_online_mask)) {
 		/* Special case for empty set - allow the architecture
 		   code to set default SMP affinity. */
-		return irq_select_affinity_usr(irq) ? -EINVAL : count;
-
-	irq_set_affinity(irq, new_value);
+		err = irq_select_affinity_usr(irq) ? -EINVAL : count;
+	} else {
+		irq_set_affinity(irq, new_value);
+		err = count;
+	}
 
-	return count;
+free_cpumask:
+	free_cpumask_var(new_value);
+	return err;
 }
 
 static int irq_affinity_proc_open(struct inode *inode, struct file *file)
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index df12434b43ca..ab65d217583f 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -136,7 +136,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
  */
 static void tick_setup_device(struct tick_device *td,
 			      struct clock_event_device *newdev, int cpu,
-			      const cpumask_t *cpumask)
+			      const struct cpumask *cpumask)
 {
 	ktime_t next_event;
 	void (*handler)(struct clock_event_device *) = NULL;
@@ -171,8 +171,8 @@ static void tick_setup_device(struct tick_device *td,
 	 * When the device is not per cpu, pin the interrupt to the
 	 * current cpu:
 	 */
-	if (!cpus_equal(newdev->cpumask, *cpumask))
-		irq_set_affinity(newdev->irq, *cpumask);
+	if (!cpumask_equal(&newdev->cpumask, cpumask))
+		irq_set_affinity(newdev->irq, cpumask);
 
 	/*
 	 * When global broadcasting is active, check if the current
-- 
cgit v1.2.3


From 320ab2b0b1e08e3805a3e1084a2f0eb1938d5d67 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 13 Dec 2008 21:20:26 +1030
Subject: cpumask: convert struct clock_event_device to cpumask pointers.

Impact: change calling convention of existing clock_event APIs

struct clock_event_timer's cpumask field gets changed to take pointer,
as does the ->broadcast function.

Another single-patch change.  For safety, we BUG_ON() in
clockevents_register_device() if it's not set.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/arm/mach-at91/at91rm9200_time.c    | 3 +--
 arch/arm/mach-at91/at91sam926x_time.c   | 2 +-
 arch/arm/mach-davinci/time.c            | 2 +-
 arch/arm/mach-imx/time.c                | 2 +-
 arch/arm/mach-ixp4xx/common.c           | 2 +-
 arch/arm/mach-msm/timer.c               | 2 +-
 arch/arm/mach-ns9xxx/time-ns9360.c      | 2 +-
 arch/arm/mach-omap1/time.c              | 2 +-
 arch/arm/mach-omap1/timer32k.c          | 2 +-
 arch/arm/mach-omap2/timer-gp.c          | 2 +-
 arch/arm/mach-pxa/time.c                | 2 +-
 arch/arm/mach-realview/core.c           | 2 +-
 arch/arm/mach-realview/localtimer.c     | 4 ++--
 arch/arm/mach-sa1100/time.c             | 2 +-
 arch/arm/mach-versatile/core.c          | 2 +-
 arch/arm/plat-mxc/time.c                | 2 +-
 arch/arm/plat-orion/time.c              | 2 +-
 arch/avr32/kernel/time.c                | 2 +-
 arch/blackfin/kernel/time-ts.c          | 2 +-
 arch/m68knommu/platform/coldfire/pit.c  | 2 +-
 arch/mips/jazz/irq.c                    | 2 +-
 arch/mips/kernel/cevt-bcm1480.c         | 2 +-
 arch/mips/kernel/cevt-ds1287.c          | 2 +-
 arch/mips/kernel/cevt-gt641xx.c         | 2 +-
 arch/mips/kernel/cevt-r4k.c             | 2 +-
 arch/mips/kernel/cevt-sb1250.c          | 2 +-
 arch/mips/kernel/cevt-smtc.c            | 2 +-
 arch/mips/kernel/cevt-txx9.c            | 2 +-
 arch/mips/kernel/i8253.c                | 2 +-
 arch/mips/nxp/pnx8550/common/time.c     | 1 +
 arch/mips/sgi-ip27/ip27-timer.c         | 2 +-
 arch/mips/sni/time.c                    | 2 +-
 arch/powerpc/kernel/time.c              | 2 +-
 arch/s390/kernel/time.c                 | 2 +-
 arch/sh/include/asm/smp.h               | 2 +-
 arch/sh/kernel/smp.c                    | 4 ++--
 arch/sh/kernel/timers/timer-broadcast.c | 2 +-
 arch/sh/kernel/timers/timer-tmu.c       | 2 +-
 arch/sparc64/kernel/time.c              | 2 +-
 arch/um/kernel/time.c                   | 2 +-
 arch/x86/kernel/apic.c                  | 8 ++++----
 arch/x86/kernel/hpet.c                  | 4 ++--
 arch/x86/kernel/i8253.c                 | 2 +-
 arch/x86/kernel/mfgpt_32.c              | 2 +-
 arch/x86/kernel/vmiclock_32.c           | 2 +-
 arch/x86/lguest/boot.c                  | 2 +-
 arch/x86/xen/time.c                     | 2 +-
 drivers/clocksource/tcb_clksrc.c        | 2 +-
 include/linux/clockchips.h              | 4 ++--
 kernel/time/clockevents.c               | 2 ++
 kernel/time/tick-broadcast.c            | 2 +-
 kernel/time/tick-common.c               | 8 ++++----
 52 files changed, 63 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c
index a72e798a2a40..72f51d39202c 100644
--- a/arch/arm/mach-at91/at91rm9200_time.c
+++ b/arch/arm/mach-at91/at91rm9200_time.c
@@ -169,7 +169,6 @@ static struct clock_event_device clkevt = {
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
 	.rating		= 150,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= clkevt32k_next_event,
 	.set_mode	= clkevt32k_mode,
 };
@@ -197,7 +196,7 @@ void __init at91rm9200_timer_init(void)
 	clkevt.mult = div_sc(AT91_SLOW_CLOCK, NSEC_PER_SEC, clkevt.shift);
 	clkevt.max_delta_ns = clockevent_delta2ns(AT91_ST_ALMV, &clkevt);
 	clkevt.min_delta_ns = clockevent_delta2ns(2, &clkevt) + 1;
-	clkevt.cpumask = cpumask_of_cpu(0);
+	clkevt.cpumask = cpumask_of(0);
 	clockevents_register_device(&clkevt);
 
 	/* register clocksource */
diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c
index 122fd77ed580..b63e1d5f1bad 100644
--- a/arch/arm/mach-at91/at91sam926x_time.c
+++ b/arch/arm/mach-at91/at91sam926x_time.c
@@ -91,7 +91,6 @@ static struct clock_event_device pit_clkevt = {
 	.features	= CLOCK_EVT_FEAT_PERIODIC,
 	.shift		= 32,
 	.rating		= 100,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_mode	= pit_clkevt_mode,
 };
 
@@ -173,6 +172,7 @@ static void __init at91sam926x_pit_init(void)
 
 	/* Set up and register clockevents */
 	pit_clkevt.mult = div_sc(pit_rate, NSEC_PER_SEC, pit_clkevt.shift);
+	pit_clkevt.cpumask = cpumask_of(0);
 	clockevents_register_device(&pit_clkevt);
 }
 
diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c
index 3b9a296b5c4b..f8bcd29d17a6 100644
--- a/arch/arm/mach-davinci/time.c
+++ b/arch/arm/mach-davinci/time.c
@@ -322,7 +322,7 @@ static void __init davinci_timer_init(void)
 	clockevent_davinci.min_delta_ns =
 		clockevent_delta2ns(1, &clockevent_davinci);
 
-	clockevent_davinci.cpumask = cpumask_of_cpu(0);
+	clockevent_davinci.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_davinci);
 }
 
diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c
index a11765f5f23b..aff0ebcfa847 100644
--- a/arch/arm/mach-imx/time.c
+++ b/arch/arm/mach-imx/time.c
@@ -184,7 +184,7 @@ static int __init imx_clockevent_init(unsigned long rate)
 	clockevent_imx.min_delta_ns =
 		clockevent_delta2ns(0xf, &clockevent_imx);
 
-	clockevent_imx.cpumask = cpumask_of_cpu(0);
+	clockevent_imx.cpumask = cpumask_of(0);
 
 	clockevents_register_device(&clockevent_imx);
 
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 7766f469456b..f4656d2ac8a8 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -487,7 +487,7 @@ static int __init ixp4xx_clockevent_init(void)
 		clockevent_delta2ns(0xfffffffe, &clockevent_ixp4xx);
 	clockevent_ixp4xx.min_delta_ns =
 		clockevent_delta2ns(0xf, &clockevent_ixp4xx);
-	clockevent_ixp4xx.cpumask = cpumask_of_cpu(0);
+	clockevent_ixp4xx.cpumask = cpumask_of(0);
 
 	clockevents_register_device(&clockevent_ixp4xx);
 	return 0;
diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c
index 345a14cb73c3..444d9c0f5ca6 100644
--- a/arch/arm/mach-msm/timer.c
+++ b/arch/arm/mach-msm/timer.c
@@ -182,7 +182,7 @@ static void __init msm_timer_init(void)
 			clockevent_delta2ns(0xf0000000 >> clock->shift, ce);
 		/* 4 gets rounded down to 3 */
 		ce->min_delta_ns = clockevent_delta2ns(4, ce);
-		ce->cpumask = cpumask_of_cpu(0);
+		ce->cpumask = cpumask_of(0);
 
 		cs->mult = clocksource_hz2mult(clock->freq, cs->shift);
 		res = clocksource_register(cs);
diff --git a/arch/arm/mach-ns9xxx/time-ns9360.c b/arch/arm/mach-ns9xxx/time-ns9360.c
index a63424d083d9..41df69721769 100644
--- a/arch/arm/mach-ns9xxx/time-ns9360.c
+++ b/arch/arm/mach-ns9xxx/time-ns9360.c
@@ -173,7 +173,7 @@ static void __init ns9360_timer_init(void)
 	ns9360_clockevent_device.min_delta_ns =
 		clockevent_delta2ns(1, &ns9360_clockevent_device);
 
-	ns9360_clockevent_device.cpumask = cpumask_of_cpu(0);
+	ns9360_clockevent_device.cpumask = cpumask_of(0);
 	clockevents_register_device(&ns9360_clockevent_device);
 
 	setup_irq(IRQ_NS9360_TIMER0 + TIMER_CLOCKEVENT,
diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c
index 2cf7e32bd293..495a32c287b4 100644
--- a/arch/arm/mach-omap1/time.c
+++ b/arch/arm/mach-omap1/time.c
@@ -173,7 +173,7 @@ static __init void omap_init_mpu_timer(unsigned long rate)
 	clockevent_mpu_timer1.min_delta_ns =
 		clockevent_delta2ns(1, &clockevent_mpu_timer1);
 
-	clockevent_mpu_timer1.cpumask = cpumask_of_cpu(0);
+	clockevent_mpu_timer1.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_mpu_timer1);
 }
 
diff --git a/arch/arm/mach-omap1/timer32k.c b/arch/arm/mach-omap1/timer32k.c
index 705367ece174..fd3f7396e162 100644
--- a/arch/arm/mach-omap1/timer32k.c
+++ b/arch/arm/mach-omap1/timer32k.c
@@ -187,7 +187,7 @@ static __init void omap_init_32k_timer(void)
 	clockevent_32k_timer.min_delta_ns =
 		clockevent_delta2ns(1, &clockevent_32k_timer);
 
-	clockevent_32k_timer.cpumask = cpumask_of_cpu(0);
+	clockevent_32k_timer.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_32k_timer);
 }
 
diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c
index 589393bedade..ae6036300f60 100644
--- a/arch/arm/mach-omap2/timer-gp.c
+++ b/arch/arm/mach-omap2/timer-gp.c
@@ -120,7 +120,7 @@ static void __init omap2_gp_clockevent_init(void)
 	clockevent_gpt.min_delta_ns =
 		clockevent_delta2ns(1, &clockevent_gpt);
 
-	clockevent_gpt.cpumask = cpumask_of_cpu(0);
+	clockevent_gpt.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_gpt);
 }
 
diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c
index f8a9a62959e5..bf3c9a4aad50 100644
--- a/arch/arm/mach-pxa/time.c
+++ b/arch/arm/mach-pxa/time.c
@@ -122,7 +122,6 @@ static struct clock_event_device ckevt_pxa_osmr0 = {
 	.features	= CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
 	.rating		= 200,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= pxa_osmr0_set_next_event,
 	.set_mode	= pxa_osmr0_set_mode,
 };
@@ -170,6 +169,7 @@ static void __init pxa_timer_init(void)
 		clockevent_delta2ns(0x7fffffff, &ckevt_pxa_osmr0);
 	ckevt_pxa_osmr0.min_delta_ns =
 		clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1;
+	ckevt_pxa_osmr0.cpumask = cpumask_of(0);
 
 	cksrc_pxa_oscr0.mult =
 		clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift);
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 2f04d54711e7..b07cb9b7adb1 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -511,7 +511,7 @@ static struct clock_event_device timer0_clockevent =	 {
 	.set_mode	= timer_set_mode,
 	.set_next_event	= timer_set_next_event,
 	.rating		= 300,
-	.cpumask	= CPU_MASK_ALL,
+	.cpumask	= cpu_all_mask,
 };
 
 static void __init realview_clockevents_init(unsigned int timer_irq)
diff --git a/arch/arm/mach-realview/localtimer.c b/arch/arm/mach-realview/localtimer.c
index 44d178cd5733..504961ef343c 100644
--- a/arch/arm/mach-realview/localtimer.c
+++ b/arch/arm/mach-realview/localtimer.c
@@ -161,7 +161,7 @@ void __cpuinit local_timer_setup(unsigned int cpu)
 	clk->set_mode		= local_timer_set_mode;
 	clk->set_next_event	= local_timer_set_next_event;
 	clk->irq		= IRQ_LOCALTIMER;
-	clk->cpumask		= cpumask_of_cpu(cpu);
+	clk->cpumask		= cpumask_of(cpu);
 	clk->shift		= 20;
 	clk->mult		= div_sc(mpcore_timer_rate, NSEC_PER_SEC, clk->shift);
 	clk->max_delta_ns	= clockevent_delta2ns(0xffffffff, clk);
@@ -199,7 +199,7 @@ void __cpuinit local_timer_setup(unsigned int cpu)
 	clk->rating		= 200;
 	clk->set_mode		= dummy_timer_set_mode;
 	clk->broadcast		= smp_timer_broadcast;
-	clk->cpumask		= cpumask_of_cpu(cpu);
+	clk->cpumask		= cpumask_of(cpu);
 
 	clockevents_register_device(clk);
 }
diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c
index 24c0a4bae850..1cac4ac0b4b8 100644
--- a/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@ -73,7 +73,6 @@ static struct clock_event_device ckevt_sa1100_osmr0 = {
 	.features	= CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
 	.rating		= 200,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= sa1100_osmr0_set_next_event,
 	.set_mode	= sa1100_osmr0_set_mode,
 };
@@ -110,6 +109,7 @@ static void __init sa1100_timer_init(void)
 		clockevent_delta2ns(0x7fffffff, &ckevt_sa1100_osmr0);
 	ckevt_sa1100_osmr0.min_delta_ns =
 		clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1;
+	ckevt_sa1100_osmr0.cpumask = cpumask_of(0);
 
 	cksrc_sa1100_oscr.mult =
 		clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift);
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 565e0ba0d67e..a3f1933434e2 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -965,7 +965,7 @@ static void __init versatile_timer_init(void)
 	timer0_clockevent.min_delta_ns =
 		clockevent_delta2ns(0xf, &timer0_clockevent);
 
-	timer0_clockevent.cpumask = cpumask_of_cpu(0);
+	timer0_clockevent.cpumask = cpumask_of(0);
 	clockevents_register_device(&timer0_clockevent);
 }
 
diff --git a/arch/arm/plat-mxc/time.c b/arch/arm/plat-mxc/time.c
index fd28f5194f71..758a1293bcfa 100644
--- a/arch/arm/plat-mxc/time.c
+++ b/arch/arm/plat-mxc/time.c
@@ -190,7 +190,7 @@ static int __init mxc_clockevent_init(void)
 	clockevent_mxc.min_delta_ns =
 			clockevent_delta2ns(0xff, &clockevent_mxc);
 
-	clockevent_mxc.cpumask = cpumask_of_cpu(0);
+	clockevent_mxc.cpumask = cpumask_of(0);
 
 	clockevents_register_device(&clockevent_mxc);
 
diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c
index 544d6b327f3a..6fa2923e6dca 100644
--- a/arch/arm/plat-orion/time.c
+++ b/arch/arm/plat-orion/time.c
@@ -149,7 +149,6 @@ static struct clock_event_device orion_clkevt = {
 	.features	= CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
 	.shift		= 32,
 	.rating		= 300,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= orion_clkevt_next_event,
 	.set_mode	= orion_clkevt_mode,
 };
@@ -199,5 +198,6 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk)
 	orion_clkevt.mult = div_sc(tclk, NSEC_PER_SEC, orion_clkevt.shift);
 	orion_clkevt.max_delta_ns = clockevent_delta2ns(0xfffffffe, &orion_clkevt);
 	orion_clkevt.min_delta_ns = clockevent_delta2ns(1, &orion_clkevt);
+	orion_clkevt.cpumask = cpumask_of(0);
 	clockevents_register_device(&orion_clkevt);
 }
diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c
index 283481d74a5b..0ff46bf873b0 100644
--- a/arch/avr32/kernel/time.c
+++ b/arch/avr32/kernel/time.c
@@ -106,7 +106,6 @@ static struct clock_event_device comparator = {
 	.features	= CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 16,
 	.rating		= 50,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= comparator_next_event,
 	.set_mode	= comparator_mode,
 };
@@ -134,6 +133,7 @@ void __init time_init(void)
 	comparator.mult = div_sc(counter_hz, NSEC_PER_SEC, comparator.shift);
 	comparator.max_delta_ns = clockevent_delta2ns((u32)~0, &comparator);
 	comparator.min_delta_ns = clockevent_delta2ns(50, &comparator) + 1;
+	comparator.cpumask = cpumask_of(0);
 
 	sysreg_write(COMPARE, 0);
 	timer_irqaction.dev_id = &comparator;
diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c
index e887efc86c29..0ed2badfd746 100644
--- a/arch/blackfin/kernel/time-ts.c
+++ b/arch/blackfin/kernel/time-ts.c
@@ -162,7 +162,6 @@ static struct clock_event_device clockevent_bfin = {
 	.name		= "bfin_core_timer",
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.shift		= 32,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event = bfin_timer_set_next_event,
 	.set_mode	= bfin_timer_set_mode,
 };
@@ -193,6 +192,7 @@ static int __init bfin_clockevent_init(void)
 	clockevent_bfin.mult = div_sc(timer_clk, NSEC_PER_SEC, clockevent_bfin.shift);
 	clockevent_bfin.max_delta_ns = clockevent_delta2ns(-1, &clockevent_bfin);
 	clockevent_bfin.min_delta_ns = clockevent_delta2ns(100, &clockevent_bfin);
+	clockevent_bfin.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_bfin);
 
 	return 0;
diff --git a/arch/m68knommu/platform/coldfire/pit.c b/arch/m68knommu/platform/coldfire/pit.c
index c5b916700b22..2a12e7fa9748 100644
--- a/arch/m68knommu/platform/coldfire/pit.c
+++ b/arch/m68knommu/platform/coldfire/pit.c
@@ -156,7 +156,7 @@ void hw_timer_init(void)
 {
 	u32 imr;
 
-	cf_pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+	cf_pit_clockevent.cpumask = cpumask_of(smp_processor_id());
 	cf_pit_clockevent.mult = div_sc(FREQ, NSEC_PER_SEC, 32);
 	cf_pit_clockevent.max_delta_ns =
 		clockevent_delta2ns(0xFFFF, &cf_pit_clockevent);
diff --git a/arch/mips/jazz/irq.c b/arch/mips/jazz/irq.c
index d7f8a782aae4..03965cb1b252 100644
--- a/arch/mips/jazz/irq.c
+++ b/arch/mips/jazz/irq.c
@@ -146,7 +146,7 @@ void __init plat_time_init(void)
 
 	BUG_ON(HZ != 100);
 
-	cd->cpumask             = cpumask_of_cpu(cpu);
+	cd->cpumask             = cpumask_of(cpu);
 	clockevents_register_device(cd);
 	action->dev_id = cd;
 	setup_irq(JAZZ_TIMER_IRQ, action);
diff --git a/arch/mips/kernel/cevt-bcm1480.c b/arch/mips/kernel/cevt-bcm1480.c
index d7e21bc8cd21..b820661678b0 100644
--- a/arch/mips/kernel/cevt-bcm1480.c
+++ b/arch/mips/kernel/cevt-bcm1480.c
@@ -126,7 +126,7 @@ void __cpuinit sb1480_clockevent_init(void)
 	cd->min_delta_ns	= clockevent_delta2ns(2, cd);
 	cd->rating		= 200;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= sibyte_next_event;
 	cd->set_mode		= sibyte_set_mode;
 	clockevents_register_device(cd);
diff --git a/arch/mips/kernel/cevt-ds1287.c b/arch/mips/kernel/cevt-ds1287.c
index df4acb68bfb5..1ada45ea0700 100644
--- a/arch/mips/kernel/cevt-ds1287.c
+++ b/arch/mips/kernel/cevt-ds1287.c
@@ -88,7 +88,6 @@ static void ds1287_event_handler(struct clock_event_device *dev)
 static struct clock_event_device ds1287_clockevent = {
 	.name		= "ds1287",
 	.features	= CLOCK_EVT_FEAT_PERIODIC,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_next_event	= ds1287_set_next_event,
 	.set_mode	= ds1287_set_mode,
 	.event_handler	= ds1287_event_handler,
@@ -122,6 +121,7 @@ int __init ds1287_clockevent_init(int irq)
 	clockevent_set_clock(cd, 32768);
 	cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
 	cd->min_delta_ns = clockevent_delta2ns(0x300, cd);
+	cd->cpumask = cpumask_of(0);
 
 	clockevents_register_device(&ds1287_clockevent);
 
diff --git a/arch/mips/kernel/cevt-gt641xx.c b/arch/mips/kernel/cevt-gt641xx.c
index 6e2f58520afb..e9b787feedcb 100644
--- a/arch/mips/kernel/cevt-gt641xx.c
+++ b/arch/mips/kernel/cevt-gt641xx.c
@@ -96,7 +96,6 @@ static void gt641xx_timer0_event_handler(struct clock_event_device *dev)
 static struct clock_event_device gt641xx_timer0_clockevent = {
 	.name		= "gt641xx-timer0",
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
-	.cpumask	= CPU_MASK_CPU0,
 	.irq		= GT641XX_TIMER0_IRQ,
 	.set_next_event	= gt641xx_timer0_set_next_event,
 	.set_mode	= gt641xx_timer0_set_mode,
@@ -132,6 +131,7 @@ static int __init gt641xx_timer0_clockevent_init(void)
 	clockevent_set_clock(cd, gt641xx_base_clock);
 	cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
 	cd->min_delta_ns = clockevent_delta2ns(0x300, cd);
+	cd->cpumask = cpumask_of(0);
 
 	clockevents_register_device(&gt641xx_timer0_clockevent);
 
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 4a4c59f2737a..e1ec83b68031 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -195,7 +195,7 @@ int __cpuinit mips_clockevent_init(void)
 
 	cd->rating		= 300;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= mips_next_event;
 	cd->set_mode		= mips_set_clock_mode;
 	cd->event_handler	= mips_event_handler;
diff --git a/arch/mips/kernel/cevt-sb1250.c b/arch/mips/kernel/cevt-sb1250.c
index 0f188cd46e03..a2eebaafda52 100644
--- a/arch/mips/kernel/cevt-sb1250.c
+++ b/arch/mips/kernel/cevt-sb1250.c
@@ -125,7 +125,7 @@ void __cpuinit sb1250_clockevent_init(void)
 	cd->min_delta_ns	= clockevent_delta2ns(2, cd);
 	cd->rating		= 200;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= sibyte_next_event;
 	cd->set_mode		= sibyte_set_mode;
 	clockevents_register_device(cd);
diff --git a/arch/mips/kernel/cevt-smtc.c b/arch/mips/kernel/cevt-smtc.c
index 5162fe4b5952..6d45e24db5bf 100644
--- a/arch/mips/kernel/cevt-smtc.c
+++ b/arch/mips/kernel/cevt-smtc.c
@@ -292,7 +292,7 @@ int __cpuinit mips_clockevent_init(void)
 
 	cd->rating		= 300;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= mips_next_event;
 	cd->set_mode		= mips_set_clock_mode;
 	cd->event_handler	= mips_event_handler;
diff --git a/arch/mips/kernel/cevt-txx9.c b/arch/mips/kernel/cevt-txx9.c
index b5fc4eb412d2..eccf7d6096bd 100644
--- a/arch/mips/kernel/cevt-txx9.c
+++ b/arch/mips/kernel/cevt-txx9.c
@@ -112,7 +112,6 @@ static struct clock_event_device txx9tmr_clock_event_device = {
 	.name		= "TXx9",
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.rating		= 200,
-	.cpumask	= CPU_MASK_CPU0,
 	.set_mode	= txx9tmr_set_mode,
 	.set_next_event	= txx9tmr_set_next_event,
 };
@@ -150,6 +149,7 @@ void __init txx9_clockevent_init(unsigned long baseaddr, int irq,
 		clockevent_delta2ns(0xffffffff >> (32 - TXX9_TIMER_BITS), cd);
 	cd->min_delta_ns = clockevent_delta2ns(0xf, cd);
 	cd->irq = irq;
+	cd->cpumask = cpumask_of(0),
 	clockevents_register_device(cd);
 	setup_irq(irq, &txx9tmr_irq);
 	printk(KERN_INFO "TXx9: clockevent device at 0x%lx, irq %d\n",
diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c
index b6ac55162b9a..f4d187825f96 100644
--- a/arch/mips/kernel/i8253.c
+++ b/arch/mips/kernel/i8253.c
@@ -115,7 +115,7 @@ void __init setup_pit_timer(void)
 	 * Start pit with the boot cpu mask and make it global after the
 	 * IO_APIC has been initialized.
 	 */
-	cd->cpumask = cpumask_of_cpu(cpu);
+	cd->cpumask = cpumask_of(cpu);
 	clockevent_set_clock(cd, CLOCK_TICK_RATE);
 	cd->max_delta_ns = clockevent_delta2ns(0x7FFF, cd);
 	cd->min_delta_ns = clockevent_delta2ns(0xF, cd);
diff --git a/arch/mips/nxp/pnx8550/common/time.c b/arch/mips/nxp/pnx8550/common/time.c
index 62f495b57f93..cf293b279098 100644
--- a/arch/mips/nxp/pnx8550/common/time.c
+++ b/arch/mips/nxp/pnx8550/common/time.c
@@ -102,6 +102,7 @@ __init void plat_time_init(void)
 	unsigned int p;
 	unsigned int pow2p;
 
+	pnx8xxx_clockevent.cpumask = cpu_none_mask;
 	clockevents_register_device(&pnx8xxx_clockevent);
 	clocksource_register(&pnx_clocksource);
 
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index 1327c2746fb7..f024057a35f8 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -134,7 +134,7 @@ void __cpuinit hub_rt_clock_event_init(void)
 	cd->min_delta_ns        = clockevent_delta2ns(0x300, cd);
 	cd->rating		= 200;
 	cd->irq			= irq;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= rt_next_event;
 	cd->set_mode		= rt_set_mode;
 	clockevents_register_device(cd);
diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c
index 796e3ce28720..69f5f88711cc 100644
--- a/arch/mips/sni/time.c
+++ b/arch/mips/sni/time.c
@@ -80,7 +80,7 @@ static void __init sni_a20r_timer_setup(void)
 	struct irqaction *action = &a20r_irqaction;
 	unsigned int cpu = smp_processor_id();
 
-	cd->cpumask             = cpumask_of_cpu(cpu);
+	cd->cpumask             = cpumask_of(cpu);
 	clockevents_register_device(cd);
 	action->dev_id = cd;
 	setup_irq(SNI_A20R_IRQ_TIMER, &a20r_irqaction);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e2ee66b5831d..6f39d35d6f55 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -869,7 +869,7 @@ static void register_decrementer_clockevent(int cpu)
 	struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
 
 	*dec = decrementer_clockevent;
-	dec->cpumask = cpumask_of_cpu(cpu);
+	dec->cpumask = cpumask_of(cpu);
 
 	printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n",
 	       dec->name, dec->mult, dec->shift, cpu);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index eccefbbff887..f5bd141c8443 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -154,7 +154,7 @@ void init_cpu_timer(void)
 	cd->min_delta_ns	= 1;
 	cd->max_delta_ns	= LONG_MAX;
 	cd->rating		= 400;
-	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->cpumask		= cpumask_of(cpu);
 	cd->set_next_event	= s390_next_event;
 	cd->set_mode		= s390_set_mode;
 
diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h
index 85b660c17eb0..c24e9c6a1736 100644
--- a/arch/sh/include/asm/smp.h
+++ b/arch/sh/include/asm/smp.h
@@ -31,7 +31,7 @@ enum {
 };
 
 void smp_message_recv(unsigned int msg);
-void smp_timer_broadcast(cpumask_t mask);
+void smp_timer_broadcast(const struct cpumask *mask);
 
 void local_timer_interrupt(void);
 void local_timer_setup(unsigned int cpu);
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 593937d0c495..8f4027412614 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -184,11 +184,11 @@ void arch_send_call_function_single_ipi(int cpu)
 	plat_send_ipi(cpu, SMP_MSG_FUNCTION_SINGLE);
 }
 
-void smp_timer_broadcast(cpumask_t mask)
+void smp_timer_broadcast(const struct cpumask *mask)
 {
 	int cpu;
 
-	for_each_cpu_mask(cpu, mask)
+	for_each_cpu(cpu, mask)
 		plat_send_ipi(cpu, SMP_MSG_TIMER);
 }
 
diff --git a/arch/sh/kernel/timers/timer-broadcast.c b/arch/sh/kernel/timers/timer-broadcast.c
index c2317635230f..96e8eaea1e62 100644
--- a/arch/sh/kernel/timers/timer-broadcast.c
+++ b/arch/sh/kernel/timers/timer-broadcast.c
@@ -51,7 +51,7 @@ void __cpuinit local_timer_setup(unsigned int cpu)
 	clk->mult		= 1;
 	clk->set_mode		= dummy_timer_set_mode;
 	clk->broadcast		= smp_timer_broadcast;
-	clk->cpumask		= cpumask_of_cpu(cpu);
+	clk->cpumask		= cpumask_of(cpu);
 
 	clockevents_register_device(clk);
 }
diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c
index 3c61ddd4d43e..0db3f9510336 100644
--- a/arch/sh/kernel/timers/timer-tmu.c
+++ b/arch/sh/kernel/timers/timer-tmu.c
@@ -263,7 +263,7 @@ static int tmu_timer_init(void)
 	tmu0_clockevent.min_delta_ns =
 			clockevent_delta2ns(1, &tmu0_clockevent);
 
-	tmu0_clockevent.cpumask = cpumask_of_cpu(0);
+	tmu0_clockevent.cpumask = cpumask_of(0);
 
 	clockevents_register_device(&tmu0_clockevent);
 
diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c
index 141da3759091..9df8f095a8b1 100644
--- a/arch/sparc64/kernel/time.c
+++ b/arch/sparc64/kernel/time.c
@@ -763,7 +763,7 @@ void __devinit setup_sparc64_timer(void)
 	sevt = &__get_cpu_var(sparc64_events);
 
 	memcpy(sevt, &sparc64_clockevent, sizeof(*sevt));
-	sevt->cpumask = cpumask_of_cpu(smp_processor_id());
+	sevt->cpumask = cpumask_of(smp_processor_id());
 
 	clockevents_register_device(sevt);
 }
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 47f04f4a3464..b13a87a3ec95 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -50,7 +50,7 @@ static int itimer_next_event(unsigned long delta,
 static struct clock_event_device itimer_clockevent = {
 	.name		= "itimer",
 	.rating		= 250,
-	.cpumask	= CPU_MASK_ALL,
+	.cpumask	= cpu_all_mask,
 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
 	.set_mode	= itimer_set_mode,
 	.set_next_event = itimer_next_event,
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 16f94879b525..b2cef49f3085 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -141,7 +141,7 @@ static int lapic_next_event(unsigned long delta,
 			    struct clock_event_device *evt);
 static void lapic_timer_setup(enum clock_event_mode mode,
 			      struct clock_event_device *evt);
-static void lapic_timer_broadcast(cpumask_t mask);
+static void lapic_timer_broadcast(const struct cpumask *mask);
 static void apic_pm_activate(void);
 
 /*
@@ -453,10 +453,10 @@ static void lapic_timer_setup(enum clock_event_mode mode,
 /*
  * Local APIC timer broadcast function
  */
-static void lapic_timer_broadcast(cpumask_t mask)
+static void lapic_timer_broadcast(const struct cpumask *mask)
 {
 #ifdef CONFIG_SMP
-	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+	send_IPI_mask(*mask, LOCAL_TIMER_VECTOR);
 #endif
 }
 
@@ -469,7 +469,7 @@ static void __cpuinit setup_APIC_timer(void)
 	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
 	memcpy(levt, &lapic_clockevent, sizeof(*levt));
-	levt->cpumask = cpumask_of_cpu(smp_processor_id());
+	levt->cpumask = cpumask_of(smp_processor_id());
 
 	clockevents_register_device(levt);
 }
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 940f25851e1e..e76d7e272974 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -246,7 +246,7 @@ static void hpet_legacy_clockevent_register(void)
 	 * Start hpet with the boot cpu mask and make it
 	 * global after the IO_APIC has been initialized.
 	 */
-	hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+	hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
 	clockevents_register_device(&hpet_clockevent);
 	global_clock_event = &hpet_clockevent;
 	printk(KERN_DEBUG "hpet clockevent registered\n");
@@ -500,7 +500,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
 	/* 5 usec minimum reprogramming delta. */
 	evt->min_delta_ns = 5000;
 
-	evt->cpumask = cpumask_of_cpu(hdev->cpu);
+	evt->cpumask = cpumask_of(hdev->cpu);
 	clockevents_register_device(evt);
 }
 
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index c1b5e3ece1f2..10f92fb532f3 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -114,7 +114,7 @@ void __init setup_pit_timer(void)
 	 * Start pit with the boot cpu mask and make it global after the
 	 * IO_APIC has been initialized.
 	 */
-	pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+	pit_clockevent.cpumask = cpumask_of(smp_processor_id());
 	pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
 				     pit_clockevent.shift);
 	pit_clockevent.max_delta_ns =
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 3b599518c322..c12314c9e86f 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -287,7 +287,7 @@ static struct clock_event_device mfgpt_clockevent = {
 	.set_mode = mfgpt_set_mode,
 	.set_next_event = mfgpt_next_event,
 	.rating = 250,
-	.cpumask = CPU_MASK_ALL,
+	.cpumask = cpu_all_mask,
 	.shift = 32
 };
 
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 254ee07f8635..c4c1f9e09402 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void)
 	/* Upper bound is clockevent's use of ulong for cycle deltas. */
 	evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
 	evt->min_delta_ns = clockevent_delta2ns(1, evt);
-	evt->cpumask = cpumask_of_cpu(cpu);
+	evt->cpumask = cpumask_of(cpu);
 
 	printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
 	       evt->name, evt->mult, evt->shift);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a5d8e1ace1cf..104c8220a383 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -737,7 +737,7 @@ static void lguest_time_init(void)
 
 	/* We can't set cpumask in the initializer: damn C limitations!  Set it
 	 * here and register our timer device. */
-	lguest_clockevent.cpumask = cpumask_of_cpu(0);
+	lguest_clockevent.cpumask = cpumask_of(0);
 	clockevents_register_device(&lguest_clockevent);
 
 	/* Finally, we unblock the timer interrupt. */
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c9f7cda48ed7..65d75a6be0ba 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -437,7 +437,7 @@ void xen_setup_timer(int cpu)
 	evt = &per_cpu(xen_clock_events, cpu);
 	memcpy(evt, xen_clockevent, sizeof(*evt));
 
-	evt->cpumask = cpumask_of_cpu(cpu);
+	evt->cpumask = cpumask_of(cpu);
 	evt->irq = irq;
 
 	setup_runstate_info(cpu);
diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
index f450588e5858..254f1064d973 100644
--- a/drivers/clocksource/tcb_clksrc.c
+++ b/drivers/clocksource/tcb_clksrc.c
@@ -154,7 +154,6 @@ static struct tc_clkevt_device clkevt = {
 		.shift		= 32,
 		/* Should be lower than at91rm9200's system timer */
 		.rating		= 125,
-		.cpumask	= CPU_MASK_CPU0,
 		.set_next_event	= tc_next_event,
 		.set_mode	= tc_mode,
 	},
@@ -195,6 +194,7 @@ static void __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
 	clkevt.clkevt.max_delta_ns
 		= clockevent_delta2ns(0xffff, &clkevt.clkevt);
 	clkevt.clkevt.min_delta_ns = clockevent_delta2ns(1, &clkevt.clkevt) + 1;
+	clkevt.clkevt.cpumask = cpumask_of(0);
 
 	setup_irq(irq, &tc_irqaction);
 
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index ed3a5d473e52..cea153697ec7 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -82,13 +82,13 @@ struct clock_event_device {
 	int			shift;
 	int			rating;
 	int			irq;
-	cpumask_t		cpumask;
+	const struct cpumask	*cpumask;
 	int			(*set_next_event)(unsigned long evt,
 						  struct clock_event_device *);
 	void			(*set_mode)(enum clock_event_mode mode,
 					    struct clock_event_device *);
 	void			(*event_handler)(struct clock_event_device *);
-	void			(*broadcast)(cpumask_t mask);
+	void			(*broadcast)(const struct cpumask *mask);
 	struct list_head	list;
 	enum clock_event_mode	mode;
 	ktime_t			next_event;
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index f8d968063cea..ea2f48af83cf 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -166,6 +166,8 @@ static void clockevents_notify_released(void)
 void clockevents_register_device(struct clock_event_device *dev)
 {
 	BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+	BUG_ON(!dev->cpumask);
+
 	/*
 	 * A nsec2cyc multiplicator of 0 is invalid and we'd crash
 	 * on it, so fix it up and emit a warning:
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f98a1b7b16e9..9590af2327be 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -150,7 +150,7 @@ static void tick_do_broadcast(cpumask_t mask)
 		 */
 		cpu = first_cpu(mask);
 		td = &per_cpu(tick_cpu_device, cpu);
-		td->evtdev->broadcast(mask);
+		td->evtdev->broadcast(&mask);
 	}
 }
 
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index ab65d217583f..f8372be74122 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -171,7 +171,7 @@ static void tick_setup_device(struct tick_device *td,
 	 * When the device is not per cpu, pin the interrupt to the
 	 * current cpu:
 	 */
-	if (!cpumask_equal(&newdev->cpumask, cpumask))
+	if (!cpumask_equal(newdev->cpumask, cpumask))
 		irq_set_affinity(newdev->irq, cpumask);
 
 	/*
@@ -202,14 +202,14 @@ static int tick_check_new_device(struct clock_event_device *newdev)
 	spin_lock_irqsave(&tick_device_lock, flags);
 
 	cpu = smp_processor_id();
-	if (!cpu_isset(cpu, newdev->cpumask))
+	if (!cpumask_test_cpu(cpu, newdev->cpumask))
 		goto out_bc;
 
 	td = &per_cpu(tick_cpu_device, cpu);
 	curdev = td->evtdev;
 
 	/* cpu local device ? */
-	if (!cpus_equal(newdev->cpumask, cpumask_of_cpu(cpu))) {
+	if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) {
 
 		/*
 		 * If the cpu affinity of the device interrupt can not
@@ -222,7 +222,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
 		 * If we have a cpu local device already, do not replace it
 		 * by a non cpu local device
 		 */
-		if (curdev && cpus_equal(curdev->cpumask, cpumask_of_cpu(cpu)))
+		if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
 			goto out_bc;
 	}
 
-- 
cgit v1.2.3


From 7be7585393d311866653564fbcd10a3232773c0b Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 13 Dec 2008 21:20:28 +1030
Subject: cpumask: Use all NR_CPUS bits unless CONFIG_CPUMASK_OFFSTACK

Impact: futureproof as we convert more code to new APIs

The old cpumask operators treat all NR_CPUS bits as relevent, the new
ones use nr_cpumask_bits.  For large NR_CPUS and small nr_cpu_ids, this
makes a difference.

However, mixing the two can cause problems with undefined bits.  An
arch which sets CONFIG_CPUMASK_OFFSTACK should have converted across
to the new operators, so it's safe in that case.

(Thanks to Stephen Rothwell for bisecting the initial unused-bits bug,
and Mike Travis for this solution).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Mike Travis <travis@sgi.com>
---
 include/linux/cpumask.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 94a2ab88ae85..d4bf52603e6b 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -510,9 +510,6 @@ extern cpumask_t cpu_active_map;
 	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD	\
 }
 
-/* This produces more efficient code. */
-#define nr_cpumask_bits	NR_CPUS
-
 #else /* NR_CPUS > BITS_PER_LONG */
 
 #define CPU_BITS_ALL						\
@@ -520,9 +517,15 @@ extern cpumask_t cpu_active_map;
 	[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,		\
 	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD		\
 }
+#endif /* NR_CPUS > BITS_PER_LONG */
 
+#ifdef CONFIG_CPUMASK_OFFSTACK
+/* Assuming NR_CPUS is huge, a runtime limit is more efficient.  Also,
+ * not all bits may be allocated. */
 #define nr_cpumask_bits	nr_cpu_ids
-#endif /* NR_CPUS > BITS_PER_LONG */
+#else
+#define nr_cpumask_bits	NR_CPUS
+#endif
 
 /* verify cpu argument to cpumask_* operators */
 static inline unsigned int cpumask_check(unsigned int cpu)
-- 
cgit v1.2.3


From b690ace50be7d10d77cb7a6d5ef1bd9de649852f Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Tue, 21 Oct 2008 14:07:03 +0100
Subject: [ARM] S3C6400: serial support for S3C6400 and S3C6410 SoCs

Add support to the Samsung serial driver for the S3C6400
and S3C6410 serial ports.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 arch/arm/mach-s3c2410/include/mach/map.h     |   2 +
 arch/arm/mach-s3c24a0/include/mach/map.h     |   2 +
 arch/arm/plat-s3c/include/plat/regs-serial.h |   6 ++
 drivers/serial/Kconfig                       |  10 +-
 drivers/serial/Makefile                      |   1 +
 drivers/serial/s3c6400.c                     | 151 +++++++++++++++++++++++++++
 drivers/serial/samsung.c                     |   8 +-
 include/linux/serial_core.h                  |   2 +
 8 files changed, 178 insertions(+), 4 deletions(-)
 create mode 100644 drivers/serial/s3c6400.c

(limited to 'include/linux')

diff --git a/arch/arm/mach-s3c2410/include/mach/map.h b/arch/arm/mach-s3c2410/include/mach/map.h
index 6b30361a0805..918e3463297f 100644
--- a/arch/arm/mach-s3c2410/include/mach/map.h
+++ b/arch/arm/mach-s3c2410/include/mach/map.h
@@ -101,4 +101,6 @@
 #define S3C24XX_PA_SDI      S3C2410_PA_SDI
 #define S3C24XX_PA_NAND	    S3C2410_PA_NAND
 
+#define S3C_PA_UART	    S3C24XX_PA_UART
+
 #endif /* __ASM_ARCH_MAP_H */
diff --git a/arch/arm/mach-s3c24a0/include/mach/map.h b/arch/arm/mach-s3c24a0/include/mach/map.h
index 2ce1839de4ee..6667355a47a1 100644
--- a/arch/arm/mach-s3c24a0/include/mach/map.h
+++ b/arch/arm/mach-s3c24a0/include/mach/map.h
@@ -80,4 +80,6 @@
 #define S3C24XX_PA_SDI		S3C24A0_PA_SDI
 #define S3C24XX_PA_NAND		S3C24A0_PA_NAND
 
+#define S3C_PA_UART		S3C24A0_PA_UART
+
 #endif /* __ASM_ARCH_24A0_MAP_H */
diff --git a/arch/arm/plat-s3c/include/plat/regs-serial.h b/arch/arm/plat-s3c/include/plat/regs-serial.h
index 18ba31c7174c..3ca28585cf80 100644
--- a/arch/arm/plat-s3c/include/plat/regs-serial.h
+++ b/arch/arm/plat-s3c/include/plat/regs-serial.h
@@ -77,6 +77,12 @@
 #define S3C2440_UCON_FCLK	  (3<<10)
 #define S3C2443_UCON_EPLL	  (3<<10)
 
+#define S3C6400_UCON_CLKMASK	(3<<10)
+#define S3C6400_UCON_PCLK	(0<<10)
+#define S3C6400_UCON_PCLK2	(2<<10)
+#define S3C6400_UCON_UCLK0	(1<<10)
+#define S3C6400_UCON_UCLK1	(3<<10)
+
 #define S3C2440_UCON2_FCLK_EN	  (1<<15)
 #define S3C2440_UCON0_DIVMASK	  (15 << 12)
 #define S3C2440_UCON1_DIVMASK	  (15 << 12)
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index f71a2e8a5f60..e4ae499e587e 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -447,7 +447,7 @@ config SERIAL_CLPS711X_CONSOLE
 
 config SERIAL_SAMSUNG
 	tristate "Samsung SoC serial support"
-	depends on ARM && PLAT_S3C24XX
+	depends on ARM && PLAT_S3C
 	select SERIAL_CORE
 	help
 	  Support for the on-chip UARTs on the Samsung S3C24XX series CPUs,
@@ -515,6 +515,14 @@ config SERIAL_S3C24A0
 	help
 	  Serial port support for the Samsung S3C24A0 SoC
 
+config SERIAL_S3C6400
+	tristate "Samsung S3C6400/S3C6410 Serial port support"
+	depends on SERIAL_SAMSUNG && (CPU_S3C600 || CPU_S3C6410)
+	default y
+	help
+	  Serial port support for the Samsung S3C6400 and S3C6410
+	  SoCs
+
 config SERIAL_DZ
 	bool "DECstation DZ serial driver"
 	depends on MACH_DECSTATION && 32BIT
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index 7769aece54ca..dfe775ac45b2 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_SERIAL_S3C2410) += s3c2410.o
 obj-$(CONFIG_SERIAL_S3C2412) += s3c2412.o
 obj-$(CONFIG_SERIAL_S3C2440) += s3c2440.o
 obj-$(CONFIG_SERIAL_S3C24A0) += s3c24a0.o
+obj-$(CONFIG_SERIAL_S3C6400) += s3c6400.o
 obj-$(CONFIG_SERIAL_IP22_ZILOG) += ip22zilog.o
 obj-$(CONFIG_SERIAL_MUX) += mux.o
 obj-$(CONFIG_SERIAL_68328) += 68328serial.o
diff --git a/drivers/serial/s3c6400.c b/drivers/serial/s3c6400.c
new file mode 100644
index 000000000000..06936d13393f
--- /dev/null
+++ b/drivers/serial/s3c6400.c
@@ -0,0 +1,151 @@
+/* linux/drivers/serial/s3c6400.c
+ *
+ * Driver for Samsung S3C6400 and S3C6410 SoC onboard UARTs.
+ *
+ * Copyright 2008 Openmoko,  Inc.
+ * Copyright 2008 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *	http://armlinux.simtec.co.uk/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/init.h>
+#include <linux/serial_core.h>
+#include <linux/serial.h>
+
+#include <asm/irq.h>
+#include <mach/hardware.h>
+
+#include <plat/regs-serial.h>
+
+#include "samsung.h"
+
+static int s3c6400_serial_setsource(struct uart_port *port,
+				    struct s3c24xx_uart_clksrc *clk)
+{
+	unsigned long ucon = rd_regl(port, S3C2410_UCON);
+
+	if (strcmp(clk->name, "uclk0") == 0) {
+		ucon &= ~S3C6400_UCON_CLKMASK;
+		ucon |= S3C6400_UCON_UCLK0;
+	} else if (strcmp(clk->name, "uclk1") == 0)
+		ucon |= S3C6400_UCON_UCLK1;
+	else if (strcmp(clk->name, "pclk") == 0) {
+		/* See notes about transitioning from UCLK to PCLK */
+		ucon &= ~S3C6400_UCON_UCLK0;
+	} else {
+		printk(KERN_ERR "unknown clock source %s\n", clk->name);
+		return -EINVAL;
+	}
+
+	wr_regl(port, S3C2410_UCON, ucon);
+	return 0;
+}
+
+
+static int s3c6400_serial_getsource(struct uart_port *port,
+				    struct s3c24xx_uart_clksrc *clk)
+{
+	u32 ucon = rd_regl(port, S3C2410_UCON);
+
+	clk->divisor = 1;
+
+	switch (ucon & S3C6400_UCON_CLKMASK) {
+	case S3C6400_UCON_UCLK0:
+		clk->name = "uclk0";
+		break;
+
+	case S3C6400_UCON_UCLK1:
+		clk->name = "uclk1";
+		break;
+
+	case S3C6400_UCON_PCLK:
+	case S3C6400_UCON_PCLK2:
+		clk->name = "pclk";
+		break;
+	}
+
+	return 0;
+}
+
+static int s3c6400_serial_resetport(struct uart_port *port,
+				    struct s3c2410_uartcfg *cfg)
+{
+	unsigned long ucon = rd_regl(port, S3C2410_UCON);
+
+	dbg("s3c6400_serial_resetport: port=%p (%08lx), cfg=%p\n",
+	    port, port->mapbase, cfg);
+
+	/* ensure we don't change the clock settings... */
+
+	ucon &= S3C6400_UCON_CLKMASK;
+
+	wr_regl(port, S3C2410_UCON,  ucon | cfg->ucon);
+	wr_regl(port, S3C2410_ULCON, cfg->ulcon);
+
+	/* reset both fifos */
+
+	wr_regl(port, S3C2410_UFCON, cfg->ufcon | S3C2410_UFCON_RESETBOTH);
+	wr_regl(port, S3C2410_UFCON, cfg->ufcon);
+
+	return 0;
+}
+
+static struct s3c24xx_uart_info s3c6400_uart_inf = {
+	.name		= "Samsung S3C6400 UART",
+	.type		= PORT_S3C6400,
+	.fifosize	= 64,
+	.rx_fifomask	= S3C2440_UFSTAT_RXMASK,
+	.rx_fifoshift	= S3C2440_UFSTAT_RXSHIFT,
+	.rx_fifofull	= S3C2440_UFSTAT_RXFULL,
+	.tx_fifofull	= S3C2440_UFSTAT_TXFULL,
+	.tx_fifomask	= S3C2440_UFSTAT_TXMASK,
+	.tx_fifoshift	= S3C2440_UFSTAT_TXSHIFT,
+	.get_clksrc	= s3c6400_serial_getsource,
+	.set_clksrc	= s3c6400_serial_setsource,
+	.reset_port	= s3c6400_serial_resetport,
+};
+
+/* device management */
+
+static int s3c6400_serial_probe(struct platform_device *dev)
+{
+	dbg("s3c6400_serial_probe: dev=%p\n", dev);
+	return s3c24xx_serial_probe(dev, &s3c6400_uart_inf);
+}
+
+static struct platform_driver s3c6400_serial_drv = {
+	.probe		= s3c6400_serial_probe,
+	.remove		= s3c24xx_serial_remove,
+	.driver		= {
+		.name	= "s3c6400-uart",
+		.owner	= THIS_MODULE,
+	},
+};
+
+s3c24xx_console_init(&s3c6400_serial_drv, &s3c6400_uart_inf);
+
+static int __init s3c6400_serial_init(void)
+{
+	return s3c24xx_serial_init(&s3c6400_serial_drv, &s3c6400_uart_inf);
+}
+
+static void __exit s3c6400_serial_exit(void)
+{
+	platform_driver_unregister(&s3c6400_serial_drv);
+}
+
+module_init(s3c6400_serial_init);
+module_exit(s3c6400_serial_exit);
+
+MODULE_DESCRIPTION("Samsung S3C6400,S3C6410 SoC Serial port driver");
+MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:s3c6400-uart");
diff --git a/drivers/serial/samsung.c b/drivers/serial/samsung.c
index bb8b57aae3af..44fc38afa228 100644
--- a/drivers/serial/samsung.c
+++ b/drivers/serial/samsung.c
@@ -47,9 +47,9 @@
 #include <asm/irq.h>
 
 #include <mach/hardware.h>
+#include <mach/map.h>
 
 #include <plat/regs-serial.h>
-#include <mach/regs-gpio.h>
 
 #include "samsung.h"
 
@@ -756,6 +756,8 @@ static const char *s3c24xx_serial_type(struct uart_port *port)
 		return "S3C2440";
 	case PORT_S3C2412:
 		return "S3C2412";
+	case PORT_S3C6400:
+		return "S3C6400/10";
 	default:
 		return NULL;
 	}
@@ -1034,8 +1036,8 @@ static int s3c24xx_serial_init_port(struct s3c24xx_uart_port *ourport,
 
 	dbg("resource %p (%lx..%lx)\n", res, res->start, res->end);
 
-	port->mapbase	= res->start;
-	port->membase	= S3C24XX_VA_UART + (res->start - S3C24XX_PA_UART);
+	port->mapbase = res->start;
+	port->membase = S3C_VA_UART + res->start - (S3C_PA_UART & 0xfff00000);
 	ret = platform_get_irq(platdev, 0);
 	if (ret < 0)
 		port->irq = 0;
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 4e4f1277f3bf..feb3b939ec4b 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -158,6 +158,8 @@
 /* SH-SCI */
 #define PORT_SCIFA	83
 
+#define PORT_S3C6400	84
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
-- 
cgit v1.2.3


From 8c5df16bec8a60bb8589fc232b9e26cac0ed4b2c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Tue, 16 Dec 2008 12:17:26 -0800
Subject: swiotlb: allow architectures to override swiotlb pool allocation

Impact: generalize swiotlb allocation code

Architectures may need to allocate memory specially for use with
the swiotlb.  Create the weak function swiotlb_alloc_boot() and
swiotlb_alloc() defaulting to the current behaviour.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/swiotlb.h |  3 +++
 lib/swiotlb.c           | 16 +++++++++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index b18ec5533e8c..b8c5fc766a56 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -10,6 +10,9 @@ struct scatterlist;
 extern void
 swiotlb_init(void);
 
+extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs);
+extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
+
 extern void
 *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			dma_addr_t *dma_handle, gfp_t flags);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 5f6c629a924d..abecb2857556 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -21,6 +21,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
+#include <linux/swiotlb.h>
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/ctype.h>
@@ -126,6 +127,16 @@ setup_io_tlb_npages(char *str)
 __setup("swiotlb=", setup_io_tlb_npages);
 /* make io_tlb_overflow tunable too? */
 
+void * __weak swiotlb_alloc_boot(size_t size, unsigned long nslabs)
+{
+	return alloc_bootmem_low_pages(size);
+}
+
+void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs)
+{
+	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
+}
+
 /*
  * Statically reserve bounce buffer space and initialize bounce buffer data
  * structures for the software IO TLB used to implement the DMA API.
@@ -145,7 +156,7 @@ swiotlb_init_with_default_size(size_t default_size)
 	/*
 	 * Get IO TLB memory from the low pages
 	 */
-	io_tlb_start = alloc_bootmem_low_pages(bytes);
+	io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs);
 	if (!io_tlb_start)
 		panic("Cannot allocate SWIOTLB buffer");
 	io_tlb_end = io_tlb_start + bytes;
@@ -202,8 +213,7 @@ swiotlb_late_init_with_default_size(size_t default_size)
 	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 
 	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-		io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
-		                                        order);
+		io_tlb_start = swiotlb_alloc(order, io_tlb_nslabs);
 		if (io_tlb_start)
 			break;
 		order--;
-- 
cgit v1.2.3


From 0016fdee927f7aa0f428494bcf11ae60c7470a02 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 16 Dec 2008 12:17:27 -0800
Subject: swiotlb: move some definitions to header

Impact: cleanup

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/swiotlb.h | 14 ++++++++++++++
 lib/swiotlb.c           | 14 +-------------
 2 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index b8c5fc766a56..58b996a642f9 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -7,6 +7,20 @@ struct device;
 struct dma_attrs;
 struct scatterlist;
 
+/*
+ * Maximum allowable number of contiguous slabs to map,
+ * must be a power of 2.  What is the appropriate value ?
+ * The complexity of {map,unmap}_single is linearly dependent on this value.
+ */
+#define IO_TLB_SEGSIZE	128
+
+
+/*
+ * log of the size of each IO TLB slab.  The number of slabs is command line
+ * controllable.
+ */
+#define IO_TLB_SHIFT 11
+
 extern void
 swiotlb_init(void);
 
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index abecb2857556..db724ba7ebf6 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -23,6 +23,7 @@
 #include <linux/spinlock.h>
 #include <linux/swiotlb.h>
 #include <linux/string.h>
+#include <linux/swiotlb.h>
 #include <linux/types.h>
 #include <linux/ctype.h>
 
@@ -40,19 +41,6 @@
 #define SG_ENT_VIRT_ADDRESS(sg)	(sg_virt((sg)))
 #define SG_ENT_PHYS_ADDRESS(sg)	virt_to_bus(SG_ENT_VIRT_ADDRESS(sg))
 
-/*
- * Maximum allowable number of contiguous slabs to map,
- * must be a power of 2.  What is the appropriate value ?
- * The complexity of {map,unmap}_single is linearly dependent on this value.
- */
-#define IO_TLB_SEGSIZE	128
-
-/*
- * log of the size of each IO TLB slab.  The number of slabs is command line
- * controllable.
- */
-#define IO_TLB_SHIFT 11
-
 #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
 
 /*
-- 
cgit v1.2.3


From 48a1b10aff588833b73994704c47bbd0deb73e9c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 11 Dec 2008 00:15:01 -0800
Subject: x86, sparseirq: move irq_desc according to smp_affinity, v7

Impact: improve NUMA handling by migrating irq_desc on smp_affinity changes

if CONFIG_NUMA_MIGRATE_IRQ_DESC is set:

-  make irq_desc to go with affinity aka irq_desc moving etc
-  call move_irq_desc in irq_complete_move()
-  legacy irq_desc is not moved, because they are allocated via static array

for logical apic mode, need to add move_desc_in_progress_in_same_domain,
otherwise it will not be moved ==> also could need two phases to get
irq_desc moved.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig          |   9 +++
 arch/x86/kernel/io_apic.c | 142 +++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/irq.h       |  10 ++++
 kernel/irq/Makefile       |   1 +
 kernel/irq/chip.c         |  12 +++-
 kernel/irq/handle.c       |  15 +++--
 kernel/irq/internals.h    |   5 ++
 kernel/irq/numa_migrate.c | 127 +++++++++++++++++++++++++++++++++++++++++
 8 files changed, 313 insertions(+), 8 deletions(-)
 create mode 100644 kernel/irq/numa_migrate.c

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8943c13502c6..29073532f94c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -248,6 +248,15 @@ config SPARSE_IRQ
 
 	  If you don't know what to do here, say Y.
 
+config NUMA_MIGRATE_IRQ_DESC
+	bool "Move irq desc when changing irq smp_affinity"
+	depends on SPARSE_IRQ && SMP
+	default n
+	help
+	  This enables moving irq_desc to cpu/node that irq will use handled.
+
+	  If you don't know what to do here, say N.
+
 config X86_FIND_SMP_CONFIG
 	def_bool y
 	depends on X86_MPPARSE || X86_VOYAGER
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index a1a2e070f31a..bfe1245b1a3e 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -141,6 +141,9 @@ struct irq_cfg {
 	unsigned move_cleanup_count;
 	u8 vector;
 	u8 move_in_progress : 1;
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+	u8 move_desc_pending : 1;
+#endif
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -223,6 +226,121 @@ void arch_init_chip_data(struct irq_desc *desc, int cpu)
 	}
 }
 
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+
+static void
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+{
+	struct irq_pin_list *old_entry, *head, *tail, *entry;
+
+	cfg->irq_2_pin = NULL;
+	old_entry = old_cfg->irq_2_pin;
+	if (!old_entry)
+		return;
+
+	entry = get_one_free_irq_2_pin(cpu);
+	if (!entry)
+		return;
+
+	entry->apic	= old_entry->apic;
+	entry->pin	= old_entry->pin;
+	head		= entry;
+	tail		= entry;
+	old_entry	= old_entry->next;
+	while (old_entry) {
+		entry = get_one_free_irq_2_pin(cpu);
+		if (!entry) {
+			entry = head;
+			while (entry) {
+				head = entry->next;
+				kfree(entry);
+				entry = head;
+			}
+			/* still use the old one */
+			return;
+		}
+		entry->apic	= old_entry->apic;
+		entry->pin	= old_entry->pin;
+		tail->next	= entry;
+		tail		= entry;
+		old_entry	= old_entry->next;
+	}
+
+	tail->next = NULL;
+	cfg->irq_2_pin = head;
+}
+
+static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
+{
+	struct irq_pin_list *entry, *next;
+
+	if (old_cfg->irq_2_pin == cfg->irq_2_pin)
+		return;
+
+	entry = old_cfg->irq_2_pin;
+
+	while (entry) {
+		next = entry->next;
+		kfree(entry);
+		entry = next;
+	}
+	old_cfg->irq_2_pin = NULL;
+}
+
+void arch_init_copy_chip_data(struct irq_desc *old_desc,
+				 struct irq_desc *desc, int cpu)
+{
+	struct irq_cfg *cfg;
+	struct irq_cfg *old_cfg;
+
+	cfg = get_one_free_irq_cfg(cpu);
+
+	if (!cfg)
+		return;
+
+	desc->chip_data = cfg;
+
+	old_cfg = old_desc->chip_data;
+
+	memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+
+	init_copy_irq_2_pin(old_cfg, cfg, cpu);
+}
+
+static void free_irq_cfg(struct irq_cfg *old_cfg)
+{
+	kfree(old_cfg);
+}
+
+void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+	struct irq_cfg *old_cfg, *cfg;
+
+	old_cfg = old_desc->chip_data;
+	cfg = desc->chip_data;
+
+	if (old_cfg == cfg)
+		return;
+
+	if (old_cfg) {
+		free_irq_2_pin(old_cfg, cfg);
+		free_irq_cfg(old_cfg);
+		old_desc->chip_data = NULL;
+	}
+}
+
+static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+{
+	struct irq_cfg *cfg = desc->chip_data;
+
+	if (!cfg->move_in_progress) {
+		/* it means that domain is not changed */
+		if (!cpus_intersects(desc->affinity, mask))
+			cfg->move_desc_pending = 1;
+	}
+}
+#endif
+
 #else
 static struct irq_cfg *irq_cfg(unsigned int irq)
 {
@@ -231,9 +349,11 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
 
 #endif
 
+#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
 static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
 {
 }
+#endif
 
 struct io_apic {
 	unsigned int index;
@@ -2346,14 +2466,34 @@ static void irq_complete_move(struct irq_desc **descp)
 	struct irq_cfg *cfg = desc->chip_data;
 	unsigned vector, me;
 
-	if (likely(!cfg->move_in_progress))
+	if (likely(!cfg->move_in_progress)) {
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+		if (likely(!cfg->move_desc_pending))
+			return;
+
+		/* domain is not change, but affinity is changed */
+		me = smp_processor_id();
+		if (cpu_isset(me, desc->affinity)) {
+			*descp = desc = move_irq_desc(desc, me);
+			/* get the new one */
+			cfg = desc->chip_data;
+			cfg->move_desc_pending = 0;
+		}
+#endif
 		return;
+	}
 
 	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
 	if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
 		cpumask_t cleanup_mask;
 
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+		*descp = desc = move_irq_desc(desc, me);
+		/* get the new one */
+		cfg = desc->chip_data;
+#endif
+
 		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
 		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
 		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index b5749db3e5a1..36a015746788 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -227,6 +227,16 @@ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
 
 #endif
 
+static inline struct irq_desc *
+irq_remap_to_desc(unsigned int irq, struct irq_desc *desc)
+{
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+	return irq_to_desc(irq);
+#else
+	return desc;
+#endif
+}
+
 /*
  * Migration helpers for obsolete names, they will go away:
  */
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 681c52dbfe22..4dd5b1edac98 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -3,3 +3,4 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o
 obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
+obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 8e4fce4a1b1f..de210f4b7a92 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -353,6 +353,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 
 	spin_lock(&desc->lock);
 	mask_ack_irq(desc, irq);
+	desc = irq_remap_to_desc(irq, desc);
 
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
@@ -430,6 +431,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
 	desc->status &= ~IRQ_INPROGRESS;
 out:
 	desc->chip->eoi(irq);
+	desc = irq_remap_to_desc(irq, desc);
 
 	spin_unlock(&desc->lock);
 }
@@ -466,12 +468,14 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 		    !desc->action)) {
 		desc->status |= (IRQ_PENDING | IRQ_MASKED);
 		mask_ack_irq(desc, irq);
+		desc = irq_remap_to_desc(irq, desc);
 		goto out_unlock;
 	}
 	kstat_incr_irqs_this_cpu(irq, desc);
 
 	/* Start handling the irq */
 	desc->chip->ack(irq);
+	desc = irq_remap_to_desc(irq, desc);
 
 	/* Mark the IRQ currently in progress.*/
 	desc->status |= IRQ_INPROGRESS;
@@ -532,8 +536,10 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 	if (!noirqdebug)
 		note_interrupt(irq, desc, action_ret);
 
-	if (desc->chip->eoi)
+	if (desc->chip->eoi) {
 		desc->chip->eoi(irq);
+		desc = irq_remap_to_desc(irq, desc);
+	}
 }
 
 void
@@ -568,8 +574,10 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 
 	/* Uninstall? */
 	if (handle == handle_bad_irq) {
-		if (desc->chip != &no_irq_chip)
+		if (desc->chip != &no_irq_chip) {
 			mask_ack_irq(desc, irq);
+			desc = irq_remap_to_desc(irq, desc);
+		}
 		desc->status |= IRQ_DISABLED;
 		desc->depth = 1;
 	}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 8aa09547f5ef..f1a23069c20a 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -23,7 +23,7 @@
 /*
  * lockdep: we want to handle all irq_desc locks as a single lock-class:
  */
-static struct lock_class_key irq_desc_lock_class;
+struct lock_class_key irq_desc_lock_class;
 
 /**
  * handle_bad_irq - handle spurious and unhandled irqs
@@ -73,7 +73,7 @@ static struct irq_desc irq_desc_init = {
 #endif
 };
 
-static void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
+void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
 {
 	unsigned long bytes;
 	char *ptr;
@@ -113,7 +113,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
 /*
  * Protect the sparse_irqs:
  */
-static DEFINE_SPINLOCK(sparse_irq_lock);
+DEFINE_SPINLOCK(sparse_irq_lock);
 
 struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly;
 
@@ -337,8 +337,11 @@ unsigned int __do_IRQ(unsigned int irq)
 		/*
 		 * No locking required for CPU-local interrupts:
 		 */
-		if (desc->chip->ack)
+		if (desc->chip->ack) {
 			desc->chip->ack(irq);
+			/* get new one */
+			desc = irq_remap_to_desc(irq, desc);
+		}
 		if (likely(!(desc->status & IRQ_DISABLED))) {
 			action_ret = handle_IRQ_event(irq, desc->action);
 			if (!noirqdebug)
@@ -349,8 +352,10 @@ unsigned int __do_IRQ(unsigned int irq)
 	}
 
 	spin_lock(&desc->lock);
-	if (desc->chip->ack)
+	if (desc->chip->ack) {
 		desc->chip->ack(irq);
+		desc = irq_remap_to_desc(irq, desc);
+	}
 	/*
 	 * REPLAY is when Linux resends an IRQ that was dropped earlier
 	 * WAITING is used by probe to mark irqs that are being tested
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 64c1c7253dae..e6d0a43cc125 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -13,6 +13,11 @@ extern void compat_irq_chip_set_default_handler(struct irq_desc *desc);
 extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 		unsigned long flags);
 
+extern struct lock_class_key irq_desc_lock_class;
+extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr);
+extern spinlock_t sparse_irq_lock;
+extern struct irq_desc *irq_desc_ptrs[NR_IRQS];
+
 #ifdef CONFIG_PROC_FS
 extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
 extern void register_handler_proc(unsigned int irq, struct irqaction *action);
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
new file mode 100644
index 000000000000..0178e2296990
--- /dev/null
+++ b/kernel/irq/numa_migrate.c
@@ -0,0 +1,127 @@
+/*
+ * linux/kernel/irq/handle.c
+ *
+ * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
+ * Copyright (C) 2005-2006, Thomas Gleixner, Russell King
+ *
+ * This file contains the core interrupt handling code.
+ *
+ * Detailed information is available in Documentation/DocBook/genericirq
+ *
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#include "internals.h"
+
+static void init_copy_kstat_irqs(struct irq_desc *old_desc,
+				 struct irq_desc *desc,
+				 int cpu, int nr)
+{
+	unsigned long bytes;
+
+	init_kstat_irqs(desc, cpu, nr);
+
+	if (desc->kstat_irqs != old_desc->kstat_irqs) {
+		/* Compute how many bytes we need per irq and allocate them */
+		bytes = nr * sizeof(unsigned int);
+
+		memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes);
+	}
+}
+
+static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+	if (old_desc->kstat_irqs == desc->kstat_irqs)
+		return;
+
+	kfree(old_desc->kstat_irqs);
+	old_desc->kstat_irqs = NULL;
+}
+
+static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
+		 struct irq_desc *desc, int cpu)
+{
+	memcpy(desc, old_desc, sizeof(struct irq_desc));
+	desc->cpu = cpu;
+	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+	init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
+	arch_init_copy_chip_data(old_desc, desc, cpu);
+}
+
+static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+	free_kstat_irqs(old_desc, desc);
+	arch_free_chip_data(old_desc, desc);
+}
+
+static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
+						int cpu)
+{
+	struct irq_desc *desc;
+	unsigned int irq;
+	unsigned long flags;
+	int node;
+
+	irq = old_desc->irq;
+
+	spin_lock_irqsave(&sparse_irq_lock, flags);
+
+	/* We have to check it to avoid races with another CPU */
+	desc = irq_desc_ptrs[irq];
+
+	if (desc && old_desc != desc)
+			goto out_unlock;
+
+	node = cpu_to_node(cpu);
+	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
+	printk(KERN_DEBUG "  move irq_desc for %d to cpu %d node %d\n",
+		 irq, cpu, node);
+	if (!desc) {
+		printk(KERN_ERR "can not get new irq_desc for moving\n");
+		/* still use old one */
+		desc = old_desc;
+		goto out_unlock;
+	}
+	init_copy_one_irq_desc(irq, old_desc, desc, cpu);
+
+	irq_desc_ptrs[irq] = desc;
+
+	/* free the old one */
+	free_one_irq_desc(old_desc, desc);
+	kfree(old_desc);
+
+out_unlock:
+	spin_unlock_irqrestore(&sparse_irq_lock, flags);
+
+	return desc;
+}
+
+struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu)
+{
+	int old_cpu;
+	int node, old_node;
+
+	/* those all static, do move them */
+	if (desc->irq < NR_IRQS_LEGACY)
+		return desc;
+
+	old_cpu = desc->cpu;
+	printk(KERN_DEBUG
+		 "try to move irq_desc from cpu %d to %d\n", old_cpu, cpu);
+	if (old_cpu != cpu) {
+		node = cpu_to_node(cpu);
+		old_node = cpu_to_node(old_cpu);
+		if (old_node != node)
+			desc = __real_move_irq_desc(desc, cpu);
+		else
+			desc->cpu = cpu;
+	}
+
+	return desc;
+}
+
-- 
cgit v1.2.3


From e08e1f7adba522378e8d2ae941bf25443866136d Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 16 Dec 2008 12:17:30 -0800
Subject: swiotlb: allow architectures to override phys<->bus<->phys
 conversions

Impact: generalize phys<->bus<->phys conversions in the swiotlb code

Architectures may need to override these conversions. Implement a
__weak hook point containing the default implementation.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/swiotlb.h |  3 +++
 lib/swiotlb.c           | 52 ++++++++++++++++++++++++++++++++++---------------
 2 files changed, 39 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 58b996a642f9..694f1839cbc0 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -27,6 +27,9 @@ swiotlb_init(void);
 extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs);
 extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
 
+extern dma_addr_t swiotlb_phys_to_bus(phys_addr_t address);
+extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address);
+
 extern void
 *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			dma_addr_t *dma_handle, gfp_t flags);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 1272b23e4769..3494263cdd9a 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -125,6 +125,26 @@ void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs)
 	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
 }
 
+dma_addr_t __weak swiotlb_phys_to_bus(phys_addr_t paddr)
+{
+	return paddr;
+}
+
+phys_addr_t __weak swiotlb_bus_to_phys(dma_addr_t baddr)
+{
+	return baddr;
+}
+
+static dma_addr_t swiotlb_virt_to_bus(volatile void *address)
+{
+	return swiotlb_phys_to_bus(virt_to_phys(address));
+}
+
+static void *swiotlb_bus_to_virt(dma_addr_t address)
+{
+	return phys_to_virt(swiotlb_bus_to_phys(address));
+}
+
 /*
  * Statically reserve bounce buffer space and initialize bounce buffer data
  * structures for the software IO TLB used to implement the DMA API.
@@ -168,7 +188,7 @@ swiotlb_init_with_default_size(size_t default_size)
 		panic("Cannot allocate SWIOTLB overflow buffer!\n");
 
 	printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
-	       virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end));
+	       swiotlb_virt_to_bus(io_tlb_start), swiotlb_virt_to_bus(io_tlb_end));
 }
 
 void __init
@@ -250,7 +270,7 @@ swiotlb_late_init_with_default_size(size_t default_size)
 
 	printk(KERN_INFO "Placing %luMB software IO TLB between 0x%lx - "
 	       "0x%lx\n", bytes >> 20,
-	       virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end));
+	       swiotlb_virt_to_bus(io_tlb_start), swiotlb_virt_to_bus(io_tlb_end));
 
 	return 0;
 
@@ -298,7 +318,7 @@ map_single(struct device *hwdev, char *buffer, size_t size, int dir)
 	unsigned long max_slots;
 
 	mask = dma_get_seg_boundary(hwdev);
-	start_dma_addr = virt_to_bus(io_tlb_start) & mask;
+	start_dma_addr = swiotlb_virt_to_bus(io_tlb_start) & mask;
 
 	offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
 
@@ -475,7 +495,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		dma_mask = hwdev->coherent_dma_mask;
 
 	ret = (void *)__get_free_pages(flags, order);
-	if (ret && !is_buffer_dma_capable(dma_mask, virt_to_bus(ret), size)) {
+	if (ret && !is_buffer_dma_capable(dma_mask, swiotlb_virt_to_bus(ret), size)) {
 		/*
 		 * The allocated memory isn't reachable by the device.
 		 * Fall back on swiotlb_map_single().
@@ -496,7 +516,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	}
 
 	memset(ret, 0, size);
-	dev_addr = virt_to_bus(ret);
+	dev_addr = swiotlb_virt_to_bus(ret);
 
 	/* Confirm address can be DMA'd by device */
 	if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) {
@@ -556,7 +576,7 @@ dma_addr_t
 swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
 			 int dir, struct dma_attrs *attrs)
 {
-	dma_addr_t dev_addr = virt_to_bus(ptr);
+	dma_addr_t dev_addr = swiotlb_virt_to_bus(ptr);
 	void *map;
 
 	BUG_ON(dir == DMA_NONE);
@@ -577,7 +597,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
 		map = io_tlb_overflow_buffer;
 	}
 
-	dev_addr = virt_to_bus(map);
+	dev_addr = swiotlb_virt_to_bus(map);
 
 	/*
 	 * Ensure that the address returned is DMA'ble
@@ -607,7 +627,7 @@ void
 swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
 			   size_t size, int dir, struct dma_attrs *attrs)
 {
-	char *dma_addr = bus_to_virt(dev_addr);
+	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 	if (is_swiotlb_buffer(dma_addr))
@@ -637,7 +657,7 @@ static void
 swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 		    size_t size, int dir, int target)
 {
-	char *dma_addr = bus_to_virt(dev_addr);
+	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 	if (is_swiotlb_buffer(dma_addr))
@@ -668,7 +688,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
 			  unsigned long offset, size_t size,
 			  int dir, int target)
 {
-	char *dma_addr = bus_to_virt(dev_addr) + offset;
+	char *dma_addr = swiotlb_bus_to_virt(dev_addr) + offset;
 
 	BUG_ON(dir == DMA_NONE);
 	if (is_swiotlb_buffer(dma_addr))
@@ -724,7 +744,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 
 	for_each_sg(sgl, sg, nelems, i) {
 		addr = SG_ENT_VIRT_ADDRESS(sg);
-		dev_addr = virt_to_bus(addr);
+		dev_addr = swiotlb_virt_to_bus(addr);
 		if (swiotlb_force ||
 		    address_needs_mapping(hwdev, dev_addr, sg->length)) {
 			void *map = map_single(hwdev, addr, sg->length, dir);
@@ -737,7 +757,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 				sgl[0].dma_length = 0;
 				return 0;
 			}
-			sg->dma_address = virt_to_bus(map);
+			sg->dma_address = swiotlb_virt_to_bus(map);
 		} else
 			sg->dma_address = dev_addr;
 		sg->dma_length = sg->length;
@@ -768,7 +788,7 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 
 	for_each_sg(sgl, sg, nelems, i) {
 		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
-			unmap_single(hwdev, bus_to_virt(sg->dma_address),
+			unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
 				     sg->dma_length, dir);
 		else if (dir == DMA_FROM_DEVICE)
 			dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
@@ -801,7 +821,7 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
 
 	for_each_sg(sgl, sg, nelems, i) {
 		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
-			sync_single(hwdev, bus_to_virt(sg->dma_address),
+			sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
 				    sg->dma_length, dir, target);
 		else if (dir == DMA_FROM_DEVICE)
 			dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
@@ -825,7 +845,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 int
 swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 {
-	return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
+	return (dma_addr == swiotlb_virt_to_bus(io_tlb_overflow_buffer));
 }
 
 /*
@@ -837,7 +857,7 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 int
 swiotlb_dma_supported(struct device *hwdev, u64 mask)
 {
-	return virt_to_bus(io_tlb_end - 1) <= mask;
+	return swiotlb_virt_to_bus(io_tlb_end - 1) <= mask;
 }
 
 EXPORT_SYMBOL(swiotlb_map_single);
-- 
cgit v1.2.3


From b81ea27b2329bf44b30c427800954f845896d476 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 16 Dec 2008 12:17:31 -0800
Subject: swiotlb: add arch hook to force mapping

Impact: generalize the sw-IOTLB range checks

Some architectures require special rules to determine whether a range
needs mapping or not.  This adds a weak function for architectures to
override.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/swiotlb.h |  2 ++
 lib/swiotlb.c           | 15 +++++++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 694f1839cbc0..325af1de0351 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -30,6 +30,8 @@ extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
 extern dma_addr_t swiotlb_phys_to_bus(phys_addr_t address);
 extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address);
 
+extern int swiotlb_arch_range_needs_mapping(void *ptr, size_t size);
+
 extern void
 *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			dma_addr_t *dma_handle, gfp_t flags);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 3494263cdd9a..d8b09051c455 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -145,6 +145,11 @@ static void *swiotlb_bus_to_virt(dma_addr_t address)
 	return phys_to_virt(swiotlb_bus_to_phys(address));
 }
 
+int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size)
+{
+	return 0;
+}
+
 /*
  * Statically reserve bounce buffer space and initialize bounce buffer data
  * structures for the software IO TLB used to implement the DMA API.
@@ -297,6 +302,11 @@ address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
 	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
 }
 
+static inline int range_needs_mapping(void *ptr, size_t size)
+{
+	return swiotlb_force || swiotlb_arch_range_needs_mapping(ptr, size);
+}
+
 static int is_swiotlb_buffer(char *addr)
 {
 	return addr >= io_tlb_start && addr < io_tlb_end;
@@ -585,7 +595,8 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
-	if (!address_needs_mapping(hwdev, dev_addr, size) && !swiotlb_force)
+	if (!address_needs_mapping(hwdev, dev_addr, size) &&
+	    !range_needs_mapping(ptr, size))
 		return dev_addr;
 
 	/*
@@ -745,7 +756,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 	for_each_sg(sgl, sg, nelems, i) {
 		addr = SG_ENT_VIRT_ADDRESS(sg);
 		dev_addr = swiotlb_virt_to_bus(addr);
-		if (swiotlb_force ||
+		if (range_needs_mapping(sg_virt(sg), sg->length) ||
 		    address_needs_mapping(hwdev, dev_addr, sg->length)) {
 			void *map = map_single(hwdev, addr, sg->length, dir);
 			if (!map) {
-- 
cgit v1.2.3


From 74c8a6130486bed224e960790f4aa72dd09c061e Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 17 Dec 2008 19:40:33 +0900
Subject: locking, irq: enclose irq_desc_lock_class in CONFIG_LOCKDEP

Impact: simplify code

commit "08678b0: generic: sparse irqs: use irq_desc() [...]" introduced
the irq_desc_lock_class variable.

But it is used only if CONFIG_SPARSE_IRQ=Y or CONFIG_TRACE_IRQFLAGS=Y.
Otherwise, following warnings happen:

	CC      kernel/irq/handle.o
	kernel/irq/handle.c:26: warning: 'irq_desc_lock_class' defined but not used

Actually, current early_init_irq_lock_class has a bit strange and messy ifdef.
In addition, it is not valueable.

1. this function is protected by !CONFIG_SPARSE_IRQ, but that is not necessary.
   if CONFIG_SPARSE_IRQ=Y, desc of all irq number are initialized by NULL
   at first - then this function calling is safe.

2. this function protected by CONFIG_TRACE_IRQFLAGS too. but it is not
   necessary either, because lockdep_set_class() doesn't have bad side
   effect even if CONFIG_TRACE_IRQFLAGS=n.

This patch bloat kernel size a bit on CONFIG_TRACE_IRQFLAGS=n and
CONFIG_SPARSE_IRQ=Y - but that's ok. early_init_irq_lock_class() is not
a fastpatch at all.

To avoid messy ifdefs is more important than a few bytes diet.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/lockdep.h | 2 +-
 kernel/irq/handle.c     | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 29aec6e10020..9dba554c802c 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -377,7 +377,7 @@ do {								\
 
 #endif /* CONFIG_LOCK_STAT */
 
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS)
+#ifdef CONFIG_GENERIC_HARDIRQS
 extern void early_init_irq_lock_class(void);
 #else
 static inline void early_init_irq_lock_class(void)
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index f1a23069c20a..6492400cb50d 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -422,11 +422,8 @@ out:
 }
 #endif
 
-
-#ifdef CONFIG_TRACE_IRQFLAGS
 void early_init_irq_lock_class(void)
 {
-#ifndef CONFIG_SPARSE_IRQ
 	struct irq_desc *desc;
 	int i;
 
@@ -436,9 +433,7 @@ void early_init_irq_lock_class(void)
 
 		lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 	}
-#endif
 }
-#endif
 
 #ifdef CONFIG_SPARSE_IRQ
 unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
-- 
cgit v1.2.3


From 64db4cfff99c04cd5f550357edcc8780f96b54a2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 18 Dec 2008 21:55:32 +0100
Subject: "Tree RCU": scalable classic RCU implementation

This patch fixes a long-standing performance bug in classic RCU that
results in massive internal-to-RCU lock contention on systems with
more than a few hundred CPUs.  Although this patch creates a separate
flavor of RCU for ease of review and patch maintenance, it is intended
to replace classic RCU.

This patch still handles stress better than does mainline, so I am still
calling it ready for inclusion.  This patch is against the -tip tree.
Nevertheless, experience on an actual 1000+ CPU machine would still be
most welcome.

Most of the changes noted below were found while creating an rcutiny
(which should permit ejecting the current rcuclassic) and while doing
detailed line-by-line documentation.

Updates from v9 (http://lkml.org/lkml/2008/12/2/334):

o	Fixes from remainder of line-by-line code walkthrough,
	including comment spelling, initialization, undesirable
	narrowing due to type conversion, removing redundant memory
	barriers, removing redundant local-variable initialization,
	and removing redundant local variables.

	I do not believe that any of these fixes address the CPU-hotplug
	issues that Andi Kleen was seeing, but please do give it a whirl
	in case the machine is smarter than I am.

	A writeup from the walkthrough may be found at the following
	URL, in case you are suffering from terminal insomnia or
	masochism:

	http://www.kernel.org/pub/linux/kernel/people/paulmck/tmp/rcutree-walkthrough.2008.12.16a.pdf

o	Made rcutree tracing use seq_file, as suggested some time
	ago by Lai Jiangshan.

o	Added a .csv variant of the rcudata debugfs trace file, to allow
	people having thousands of CPUs to drop the data into
	a spreadsheet.	Tested with oocalc and gnumeric.  Updated
	documentation to suit.

Updates from v8 (http://lkml.org/lkml/2008/11/15/139):

o	Fix a theoretical race between grace-period initialization and
	force_quiescent_state() that could occur if more than three
	jiffies were required to carry out the grace-period
	initialization.  Which it might, if you had enough CPUs.

o	Apply Ingo's printk-standardization patch.

o	Substitute local variables for repeated accesses to global
	variables.

o	Fix comment misspellings and redundant (but harmless) increments
	of ->n_rcu_pending (this latter after having explicitly added it).

o	Apply checkpatch fixes.

Updates from v7 (http://lkml.org/lkml/2008/10/10/291):

o	Fixed a number of problems noted by Gautham Shenoy, including
	the cpu-stall-detection bug that he was having difficulty
	convincing me was real.  ;-)

o	Changed cpu-stall detection to wait for ten seconds rather than
	three in order to reduce false positive, as suggested by Ingo
	Molnar.

o	Produced a design document (http://lwn.net/Articles/305782/).
	The act of writing this document uncovered a number of both
	theoretical and "here and now" bugs as noted below.

o	Fix dynticks_nesting accounting confusion, simplify WARN_ON()
	condition, fix kerneldoc comments, and add memory barriers
	in dynticks interface functions.

o	Add more data to tracing.

o	Remove unused "rcu_barrier" field from rcu_data structure.

o	Count calls to rcu_pending() from scheduling-clock interrupt
	to use as a surrogate timebase should jiffies stop counting.

o	Fix a theoretical race between force_quiescent_state() and
	grace-period initialization.  Yes, initialization does have to
	go on for some jiffies for this race to occur, but given enough
	CPUs...

Updates from v6 (http://lkml.org/lkml/2008/9/23/448):

o	Fix a number of checkpatch.pl complaints.

o	Apply review comments from Ingo Molnar and Lai Jiangshan
	on the stall-detection code.

o	Fix several bugs in !CONFIG_SMP builds.

o	Fix a misspelled config-parameter name so that RCU now announces
	at boot time if stall detection is configured.

o	Run tests on numerous combinations of configurations parameters,
	which after the fixes above, now build and run correctly.

Updates from v5 (http://lkml.org/lkml/2008/9/15/92, bad subject line):

o	Fix a compiler error in the !CONFIG_FANOUT_EXACT case (blew a
	changeset some time ago, and finally got around to retesting
	this option).

o	Fix some tracing bugs in rcupreempt that caused incorrect
	totals to be printed.

o	I now test with a more brutal random-selection online/offline
	script (attached).  Probably more brutal than it needs to be
	on the people reading it as well, but so it goes.

o	A number of optimizations and usability improvements:

	o	Make rcu_pending() ignore the grace-period timeout when
		there is no grace period in progress.

	o	Make force_quiescent_state() avoid going for a global
		lock in the case where there is no grace period in
		progress.

	o	Rearrange struct fields to improve struct layout.

	o	Make call_rcu() initiate a grace period if RCU was
		idle, rather than waiting for the next scheduling
		clock interrupt.

	o	Invoke rcu_irq_enter() and rcu_irq_exit() only when
		idle, as suggested by Andi Kleen.  I still don't
		completely trust this change, and might back it out.

	o	Make CONFIG_RCU_TRACE be the single config variable
		manipulated for all forms of RCU, instead of the prior
		confusion.

	o	Document tracing files and formats for both rcupreempt
		and rcutree.

Updates from v4 for those missing v5 given its bad subject line:

o	Separated dynticks interface so that NMIs and irqs call separate
	functions, greatly simplifying it.  In particular, this code
	no longer requires a proof of correctness.  ;-)

o	Separated dynticks state out into its own per-CPU structure,
	avoiding the duplicated accounting.

o	The case where a dynticks-idle CPU runs an irq handler that
	invokes call_rcu() is now correctly handled, forcing that CPU
	out of dynticks-idle mode.

o	Review comments have been applied (thank you all!!!).
	For but one example, fixed the dynticks-ordering issue that
	Manfred pointed out, saving me much debugging.  ;-)

o	Adjusted rcuclassic and rcupreempt to handle dynticks changes.

Attached is an updated patch to Classic RCU that applies a hierarchy,
greatly reducing the contention on the top-level lock for large machines.
This passes 10-hour concurrent rcutorture and online-offline testing on
128-CPU ppc64 without dynticks enabled, and exposes some timekeeping
bugs in presence of dynticks (exciting working on a system where
"sleep 1" hangs until interrupted...), which were fixed in the
2.6.27 kernel.  It is getting more reliable than mainline by some
measures, so the next version will be against -tip for inclusion.
See also Manfred Spraul's recent patches (or his earlier work from
2004 at http://marc.info/?l=linux-kernel&m=108546384711797&w=2).
We will converge onto a common patch in the fullness of time, but are
currently exploring different regions of the design space.  That said,
I have already gratefully stolen quite a few of Manfred's ideas.

This patch provides CONFIG_RCU_FANOUT, which controls the bushiness
of the RCU hierarchy.  Defaults to 32 on 32-bit machines and 64 on
64-bit machines.  If CONFIG_NR_CPUS is less than CONFIG_RCU_FANOUT,
there is no hierarchy.  By default, the RCU initialization code will
adjust CONFIG_RCU_FANOUT to balance the hierarchy, so strongly NUMA
architectures may choose to set CONFIG_RCU_FANOUT_EXACT to disable
this balancing, allowing the hierarchy to be exactly aligned to the
underlying hardware.  Up to two levels of hierarchy are permitted
(in addition to the root node), allowing up to 16,384 CPUs on 32-bit
systems and up to 262,144 CPUs on 64-bit systems.  I just know that I
am going to regret saying this, but this seems more than sufficient
for the foreseeable future.  (Some architectures might wish to set
CONFIG_RCU_FANOUT=4, which would limit such architectures to 64 CPUs.
If this becomes a real problem, additional levels can be added, but I
doubt that it will make a significant difference on real hardware.)

In the common case, a given CPU will manipulate its private rcu_data
structure and the rcu_node structure that it shares with its immediate
neighbors.  This can reduce both lock and memory contention by multiple
orders of magnitude, which should eliminate the need for the strange
manipulations that are reported to be required when running Linux on
very large systems.

Some shortcomings:

o	More bugs will probably surface as a result of an ongoing
	line-by-line code inspection.

	Patches will be provided as required.

o	There are probably hangs, rcutorture failures, &c.  Seems
	quite stable on a 128-CPU machine, but that is kind of small
	compared to 4096 CPUs.  However, seems to do better than
	mainline.

	Patches will be provided as required.

o	The memory footprint of this version is several KB larger
	than rcuclassic.

	A separate UP-only rcutiny patch will be provided, which will
	reduce the memory footprint significantly, even compared
	to the old rcuclassic.  One such patch passes light testing,
	and has a memory footprint smaller even than rcuclassic.
	Initial reaction from various embedded guys was "it is not
	worth it", so am putting it aside.

Credits:

o	Manfred Spraul for ideas, review comments, and bugs spotted,
	as well as some good friendly competition.  ;-)

o	Josh Triplett, Ingo Molnar, Peter Zijlstra, Mathieu Desnoyers,
	Lai Jiangshan, Andi Kleen, Andy Whitcroft, and Andrew Morton
	for reviews and comments.

o	Thomas Gleixner for much-needed help with some timer issues
	(see patches below).

o	Jon M. Tollefson, Tim Pepper, Andrew Theurer, Jose R. Santos,
	Andy Whitcroft, Darrick Wong, Nishanth Aravamudan, Anton
	Blanchard, Dave Kleikamp, and Nathan Lynch for keeping machines
	alive despite my heavy abuse^Wtesting.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/RCU/00-INDEX             |    2 +
 Documentation/RCU/trace.txt            |  413 +++++++++
 arch/powerpc/platforms/pseries/rtasd.c |    4 +
 include/linux/hardirq.h                |   14 +-
 include/linux/rcupdate.h               |   10 +-
 include/linux/rcutree.h                |  329 +++++++
 init/Kconfig                           |   18 +-
 kernel/Kconfig.preempt                 |   62 +-
 kernel/Makefile                        |    6 +-
 kernel/rcupreempt.c                    |   10 +
 kernel/rcupreempt_trace.c              |   10 +-
 kernel/rcutree.c                       | 1535 ++++++++++++++++++++++++++++++++
 kernel/rcutree_trace.c                 |  271 ++++++
 kernel/softirq.c                       |    5 +-
 lib/Kconfig.debug                      |   13 +
 15 files changed, 2671 insertions(+), 31 deletions(-)
 create mode 100644 Documentation/RCU/trace.txt
 create mode 100644 include/linux/rcutree.h
 create mode 100644 kernel/rcutree.c
 create mode 100644 kernel/rcutree_trace.c

(limited to 'include/linux')

diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX
index 461481dfb7c3..7dc0695a8f90 100644
--- a/Documentation/RCU/00-INDEX
+++ b/Documentation/RCU/00-INDEX
@@ -16,6 +16,8 @@ RTFP.txt
 	- List of RCU papers (bibliography) going back to 1980.
 torture.txt
 	- RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST)
+trace.txt
+	- CONFIG_RCU_TRACE debugfs files and formats
 UP.txt
 	- RCU on Uniprocessor Systems
 whatisRCU.txt
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
new file mode 100644
index 000000000000..068848240a8b
--- /dev/null
+++ b/Documentation/RCU/trace.txt
@@ -0,0 +1,413 @@
+CONFIG_RCU_TRACE debugfs Files and Formats
+
+
+The rcupreempt and rcutree implementations of RCU provide debugfs trace
+output that summarizes counters and state.  This information is useful for
+debugging RCU itself, and can sometimes also help to debug abuses of RCU.
+Note that the rcuclassic implementation of RCU does not provide debugfs
+trace output.
+
+The following sections describe the debugfs files and formats for
+preemptable RCU (rcupreempt) and hierarchical RCU (rcutree).
+
+
+Preemptable RCU debugfs Files and Formats
+
+This implementation of RCU provides three debugfs files under the
+top-level directory RCU: rcu/rcuctrs (which displays the per-CPU
+counters used by preemptable RCU) rcu/rcugp (which displays grace-period
+counters), and rcu/rcustats (which internal counters for debugging RCU).
+
+The output of "cat rcu/rcuctrs" looks as follows:
+
+CPU last cur F M
+  0    5  -5 0 0
+  1   -1   0 0 0
+  2    0   1 0 0
+  3    0   1 0 0
+  4    0   1 0 0
+  5    0   1 0 0
+  6    0   2 0 0
+  7    0  -1 0 0
+  8    0   1 0 0
+ggp = 26226, state = waitzero
+
+The per-CPU fields are as follows:
+
+o	"CPU" gives the CPU number.  Offline CPUs are not displayed.
+
+o	"last" gives the value of the counter that is being decremented
+	for the current grace period phase.  In the example above,
+	the counters sum to 4, indicating that there are still four
+	RCU read-side critical sections still running that started
+	before the last counter flip.
+
+o	"cur" gives the value of the counter that is currently being
+	both incremented (by rcu_read_lock()) and decremented (by
+	rcu_read_unlock()).  In the example above, the counters sum to
+	1, indicating that there is only one RCU read-side critical section
+	still running that started after the last counter flip.
+
+o	"F" indicates whether RCU is waiting for this CPU to acknowledge
+	a counter flip.  In the above example, RCU is not waiting on any,
+	which is consistent with the state being "waitzero" rather than
+	"waitack".
+
+o	"M" indicates whether RCU is waiting for this CPU to execute a
+	memory barrier.  In the above example, RCU is not waiting on any,
+	which is consistent with the state being "waitzero" rather than
+	"waitmb".
+
+o	"ggp" is the global grace-period counter.
+
+o	"state" is the RCU state, which can be one of the following:
+
+	o	"idle": there is no grace period in progress.
+
+	o	"waitack": RCU just incremented the global grace-period
+		counter, which has the effect of reversing the roles of
+		the "last" and "cur" counters above, and is waiting for
+		all the CPUs to acknowledge the flip.  Once the flip has
+		been acknowledged, CPUs will no longer be incrementing
+		what are now the "last" counters, so that their sum will
+		decrease monotonically down to zero.
+
+	o	"waitzero": RCU is waiting for the sum of the "last" counters
+		to decrease to zero.
+
+	o	"waitmb": RCU is waiting for each CPU to execute a memory
+		barrier, which ensures that instructions from a given CPU's
+		last RCU read-side critical section cannot be reordered
+		with instructions following the memory-barrier instruction.
+
+The output of "cat rcu/rcugp" looks as follows:
+
+oldggp=48870  newggp=48873
+
+Note that reading from this file provokes a synchronize_rcu().  The
+"oldggp" value is that of "ggp" from rcu/rcuctrs above, taken before
+executing the synchronize_rcu(), and the "newggp" value is also the
+"ggp" value, but taken after the synchronize_rcu() command returns.
+
+
+The output of "cat rcu/rcugp" looks as follows:
+
+na=1337955 nl=40 wa=1337915 wl=44 da=1337871 dl=0 dr=1337871 di=1337871
+1=50989 e1=6138 i1=49722 ie1=82 g1=49640 a1=315203 ae1=265563 a2=49640
+z1=1401244 ze1=1351605 z2=49639 m1=5661253 me1=5611614 m2=49639
+
+These are counters tracking internal preemptable-RCU events, however,
+some of them may be useful for debugging algorithms using RCU.  In
+particular, the "nl", "wl", and "dl" values track the number of RCU
+callbacks in various states.  The fields are as follows:
+
+o	"na" is the total number of RCU callbacks that have been enqueued
+	since boot.
+
+o	"nl" is the number of RCU callbacks waiting for the previous
+	grace period to end so that they can start waiting on the next
+	grace period.
+
+o	"wa" is the total number of RCU callbacks that have started waiting
+	for a grace period since boot.  "na" should be roughly equal to
+	"nl" plus "wa".
+
+o	"wl" is the number of RCU callbacks currently waiting for their
+	grace period to end.
+
+o	"da" is the total number of RCU callbacks whose grace periods
+	have completed since boot.  "wa" should be roughly equal to
+	"wl" plus "da".
+
+o	"dr" is the total number of RCU callbacks that have been removed
+	from the list of callbacks ready to invoke.  "dr" should be roughly
+	equal to "da".
+
+o	"di" is the total number of RCU callbacks that have been invoked
+	since boot.  "di" should be roughly equal to "da", though some
+	early versions of preemptable RCU had a bug so that only the
+	last CPU's count of invocations was displayed, rather than the
+	sum of all CPU's counts.
+
+o	"1" is the number of calls to rcu_try_flip().  This should be
+	roughly equal to the sum of "e1", "i1", "a1", "z1", and "m1"
+	described below.  In other words, the number of times that
+	the state machine is visited should be equal to the sum of the
+	number of times that each state is visited plus the number of
+	times that the state-machine lock acquisition failed.
+
+o	"e1" is the number of times that rcu_try_flip() was unable to
+	acquire the fliplock.
+
+o	"i1" is the number of calls to rcu_try_flip_idle().
+
+o	"ie1" is the number of times rcu_try_flip_idle() exited early
+	due to the calling CPU having no work for RCU.
+
+o	"g1" is the number of times that rcu_try_flip_idle() decided
+	to start a new grace period.  "i1" should be roughly equal to
+	"ie1" plus "g1".
+
+o	"a1" is the number of calls to rcu_try_flip_waitack().
+
+o	"ae1" is the number of times that rcu_try_flip_waitack() found
+	that at least one CPU had not yet acknowledge the new grace period
+	(AKA "counter flip").
+
+o	"a2" is the number of time rcu_try_flip_waitack() found that
+	all CPUs had acknowledged.  "a1" should be roughly equal to
+	"ae1" plus "a2".  (This particular output was collected on
+	a 128-CPU machine, hence the smaller-than-usual fraction of
+	calls to rcu_try_flip_waitack() finding all CPUs having already
+	acknowledged.)
+
+o	"z1" is the number of calls to rcu_try_flip_waitzero().
+
+o	"ze1" is the number of times that rcu_try_flip_waitzero() found
+	that not all of the old RCU read-side critical sections had
+	completed.
+
+o	"z2" is the number of times that rcu_try_flip_waitzero() finds
+	the sum of the counters equal to zero, in other words, that
+	all of the old RCU read-side critical sections had completed.
+	The value of "z1" should be roughly equal to "ze1" plus
+	"z2".
+
+o	"m1" is the number of calls to rcu_try_flip_waitmb().
+
+o	"me1" is the number of times that rcu_try_flip_waitmb() finds
+	that at least one CPU has not yet executed a memory barrier.
+
+o	"m2" is the number of times that rcu_try_flip_waitmb() finds that
+	all CPUs have executed a memory barrier.
+
+
+Hierarchical RCU debugfs Files and Formats
+
+This implementation of RCU provides three debugfs files under the
+top-level directory RCU: rcu/rcudata (which displays fields in struct
+rcu_data), rcu/rcugp (which displays grace-period counters), and
+rcu/rcuhier (which displays the struct rcu_node hierarchy).
+
+The output of "cat rcu/rcudata" looks as follows:
+
+rcu:
+  0 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=1 rp=3c2a dt=23301/73 dn=2 df=1882 of=0 ri=2126 ql=2 b=10
+  1 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=3 rp=39a6 dt=78073/1 dn=2 df=1402 of=0 ri=1875 ql=46 b=10
+  2 c=4010 g=4010 pq=1 pqc=4010 qp=0 rpfq=-5 rp=1d12 dt=16646/0 dn=2 df=3140 of=0 ri=2080 ql=0 b=10
+  3 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=2b50 dt=21159/1 dn=2 df=2230 of=0 ri=1923 ql=72 b=10
+  4 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1644 dt=5783/1 dn=2 df=3348 of=0 ri=2805 ql=7 b=10
+  5 c=4012 g=4013 pq=0 pqc=4011 qp=1 rpfq=3 rp=1aac dt=5879/1 dn=2 df=3140 of=0 ri=2066 ql=10 b=10
+  6 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=ed8 dt=5847/1 dn=2 df=3797 of=0 ri=1266 ql=10 b=10
+  7 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1fa2 dt=6199/1 dn=2 df=2795 of=0 ri=2162 ql=28 b=10
+rcu_bh:
+  0 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-145 rp=21d6 dt=23301/73 dn=2 df=0 of=0 ri=0 ql=0 b=10
+  1 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-170 rp=20ce dt=78073/1 dn=2 df=26 of=0 ri=5 ql=0 b=10
+  2 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-83 rp=fbd dt=16646/0 dn=2 df=28 of=0 ri=4 ql=0 b=10
+  3 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-105 rp=178c dt=21159/1 dn=2 df=28 of=0 ri=2 ql=0 b=10
+  4 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-30 rp=b54 dt=5783/1 dn=2 df=32 of=0 ri=0 ql=0 b=10
+  5 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-29 rp=df5 dt=5879/1 dn=2 df=30 of=0 ri=3 ql=0 b=10
+  6 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-28 rp=788 dt=5847/1 dn=2 df=32 of=0 ri=0 ql=0 b=10
+  7 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-53 rp=1098 dt=6199/1 dn=2 df=30 of=0 ri=3 ql=0 b=10
+
+The first section lists the rcu_data structures for rcu, the second for
+rcu_bh.  Each section has one line per CPU, or eight for this 8-CPU system.
+The fields are as follows:
+
+o	The number at the beginning of each line is the CPU number.
+	CPUs numbers followed by an exclamation mark are offline,
+	but have been online at least once since boot.	There will be
+	no output for CPUs that have never been online, which can be
+	a good thing in the surprisingly common case where NR_CPUS is
+	substantially larger than the number of actual CPUs.
+
+o	"c" is the count of grace periods that this CPU believes have
+	completed.  CPUs in dynticks idle mode may lag quite a ways
+	behind, for example, CPU 4 under "rcu" above, which has slept
+	through the past 25 RCU grace periods.	It is not unusual to
+	see CPUs lagging by thousands of grace periods.
+
+o	"g" is the count of grace periods that this CPU believes have
+	started.  Again, CPUs in dynticks idle mode may lag behind.
+	If the "c" and "g" values are equal, this CPU has already
+	reported a quiescent state for the last RCU grace period that
+	it is aware of, otherwise, the CPU believes that it owes RCU a
+	quiescent state.
+
+o	"pq" indicates that this CPU has passed through a quiescent state
+	for the current grace period.  It is possible for "pq" to be
+	"1" and "c" different than "g", which indicates that although
+	the CPU has passed through a quiescent state, either (1) this
+	CPU has not yet reported that fact, (2) some other CPU has not
+	yet reported for this grace period, or (3) both.
+
+o	"pqc" indicates which grace period the last-observed quiescent
+	state for this CPU corresponds to.  This is important for handling
+	the race between CPU 0 reporting an extended dynticks-idle
+	quiescent state for CPU 1 and CPU 1 suddenly waking up and
+	reporting its own quiescent state.  If CPU 1 was the last CPU
+	for the current grace period, then the CPU that loses this race
+	will attempt to incorrectly mark CPU 1 as having checked in for
+	the next grace period!
+
+o	"qp" indicates that RCU still expects a quiescent state from
+	this CPU.
+
+o	"rpfq" is the number of rcu_pending() calls on this CPU required
+	to induce this CPU to invoke force_quiescent_state().
+
+o	"rp" is low-order four hex digits of the count of how many times
+	rcu_pending() has been invoked on this CPU.
+
+o	"dt" is the current value of the dyntick counter that is incremented
+	when entering or leaving dynticks idle state, either by the
+	scheduler or by irq.  The number after the "/" is the interrupt
+	nesting depth when in dyntick-idle state, or one greater than
+	the interrupt-nesting depth otherwise.
+
+	This field is displayed only for CONFIG_NO_HZ kernels.
+
+o	"dn" is the current value of the dyntick counter that is incremented
+	when entering or leaving dynticks idle state via NMI.  If both
+	the "dt" and "dn" values are even, then this CPU is in dynticks
+	idle mode and may be ignored by RCU.  If either of these two
+	counters is odd, then RCU must be alert to the possibility of
+	an RCU read-side critical section running on this CPU.
+
+	This field is displayed only for CONFIG_NO_HZ kernels.
+
+o	"df" is the number of times that some other CPU has forced a
+	quiescent state on behalf of this CPU due to this CPU being in
+	dynticks-idle state.
+
+	This field is displayed only for CONFIG_NO_HZ kernels.
+
+o	"of" is the number of times that some other CPU has forced a
+	quiescent state on behalf of this CPU due to this CPU being
+	offline.  In a perfect world, this might neve happen, but it
+	turns out that offlining and onlining a CPU can take several grace
+	periods, and so there is likely to be an extended period of time
+	when RCU believes that the CPU is online when it really is not.
+	Please note that erring in the other direction (RCU believing a
+	CPU is offline when it is really alive and kicking) is a fatal
+	error, so it makes sense to err conservatively.
+
+o	"ri" is the number of times that RCU has seen fit to send a
+	reschedule IPI to this CPU in order to get it to report a
+	quiescent state.
+
+o	"ql" is the number of RCU callbacks currently residing on
+	this CPU.  This is the total number of callbacks, regardless
+	of what state they are in (new, waiting for grace period to
+	start, waiting for grace period to end, ready to invoke).
+
+o	"b" is the batch limit for this CPU.  If more than this number
+	of RCU callbacks is ready to invoke, then the remainder will
+	be deferred.
+
+
+The output of "cat rcu/rcugp" looks as follows:
+
+rcu: completed=33062  gpnum=33063
+rcu_bh: completed=464  gpnum=464
+
+Again, this output is for both "rcu" and "rcu_bh".  The fields are
+taken from the rcu_state structure, and are as follows:
+
+o	"completed" is the number of grace periods that have completed.
+	It is comparable to the "c" field from rcu/rcudata in that a
+	CPU whose "c" field matches the value of "completed" is aware
+	that the corresponding RCU grace period has completed.
+
+o	"gpnum" is the number of grace periods that have started.  It is
+	comparable to the "g" field from rcu/rcudata in that a CPU
+	whose "g" field matches the value of "gpnum" is aware that the
+	corresponding RCU grace period has started.
+
+	If these two fields are equal (as they are for "rcu_bh" above),
+	then there is no grace period in progress, in other words, RCU
+	is idle.  On the other hand, if the two fields differ (as they
+	do for "rcu" above), then an RCU grace period is in progress.
+
+
+The output of "cat rcu/rcuhier" looks as follows, with very long lines:
+
+c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6
+1/1 0:127 ^0    
+3/3 0:35 ^0    0/0 36:71 ^1    0/0 72:107 ^2    0/0 108:127 ^3    
+3/3f 0:5 ^0    2/3 6:11 ^1    0/0 12:17 ^2    0/0 18:23 ^3    0/0 24:29 ^4    0/0 30:35 ^5    0/0 36:41 ^0    0/0 42:47 ^1    0/0 48:53 ^2    0/0 54:59 ^3    0/0 60:65 ^4    0/0 66:71 ^5    0/0 72:77 ^0    0/0 78:83 ^1    0/0 84:89 ^2    0/0 90:95 ^3    0/0 96:101 ^4    0/0 102:107 ^5    0/0 108:113 ^0    0/0 114:119 ^1    0/0 120:125 ^2    0/0 126:127 ^3    
+rcu_bh:
+c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0
+0/1 0:127 ^0    
+0/3 0:35 ^0    0/0 36:71 ^1    0/0 72:107 ^2    0/0 108:127 ^3    
+0/3f 0:5 ^0    0/3 6:11 ^1    0/0 12:17 ^2    0/0 18:23 ^3    0/0 24:29 ^4    0/0 30:35 ^5    0/0 36:41 ^0    0/0 42:47 ^1    0/0 48:53 ^2    0/0 54:59 ^3    0/0 60:65 ^4    0/0 66:71 ^5    0/0 72:77 ^0    0/0 78:83 ^1    0/0 84:89 ^2    0/0 90:95 ^3    0/0 96:101 ^4    0/0 102:107 ^5    0/0 108:113 ^0    0/0 114:119 ^1    0/0 120:125 ^2    0/0 126:127 ^3
+
+This is once again split into "rcu" and "rcu_bh" portions.  The fields are
+as follows:
+
+o	"c" is exactly the same as "completed" under rcu/rcugp.
+
+o	"g" is exactly the same as "gpnum" under rcu/rcugp.
+
+o	"s" is the "signaled" state that drives force_quiescent_state()'s
+	state machine.
+
+o	"jfq" is the number of jiffies remaining for this grace period
+	before force_quiescent_state() is invoked to help push things
+	along.  Note that CPUs in dyntick-idle mode thoughout the grace
+	period will not report on their own, but rather must be check by
+	some other CPU via force_quiescent_state().
+
+o	"j" is the low-order four hex digits of the jiffies counter.
+	Yes, Paul did run into a number of problems that turned out to
+	be due to the jiffies counter no longer counting.  Why do you ask?
+
+o	"nfqs" is the number of calls to force_quiescent_state() since
+	boot.
+
+o	"nfqsng" is the number of useless calls to force_quiescent_state(),
+	where there wasn't actually a grace period active.  This can
+	happen due to races.  The number in parentheses is the difference
+	between "nfqs" and "nfqsng", or the number of times that
+	force_quiescent_state() actually did some real work.
+
+o	"fqlh" is the number of calls to force_quiescent_state() that
+	exited immediately (without even being counted in nfqs above)
+	due to contention on ->fqslock.
+
+o	Each element of the form "1/1 0:127 ^0" represents one struct
+	rcu_node.  Each line represents one level of the hierarchy, from
+	root to leaves.  It is best to think of the rcu_data structures
+	as forming yet another level after the leaves.  Note that there
+	might be either one, two, or three levels of rcu_node structures,
+	depending on the relationship between CONFIG_RCU_FANOUT and
+	CONFIG_NR_CPUS.
+	
+	o	The numbers separated by the "/" are the qsmask followed
+		by the qsmaskinit.  The qsmask will have one bit
+		set for each entity in the next lower level that
+		has not yet checked in for the current grace period.
+		The qsmaskinit will have one bit for each entity that is
+		currently expected to check in during each grace period.
+		The value of qsmaskinit is assigned to that of qsmask
+		at the beginning of each grace period.
+
+		For example, for "rcu", the qsmask of the first entry
+		of the lowest level is 0x14, meaning that we are still
+		waiting for CPUs 2 and 4 to check in for the current
+		grace period.
+
+	o	The numbers separated by the ":" are the range of CPUs
+		served by this struct rcu_node.  This can be helpful
+		in working out how the hierarchy is wired together.
+
+		For example, the first entry at the lowest level shows
+		"0:5", indicating that it covers CPUs 0 through 5.
+
+	o	The number after the "^" indicates the bit in the
+		next higher level rcu_node structure that this
+		rcu_node structure corresponds to.
+
+		For example, the first entry at the lowest level shows
+		"^0", indicating that it corresponds to bit zero in
+		the first entry at the middle level.
diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c
index f4e55be2eea9..afad9f5ac0ac 100644
--- a/arch/powerpc/platforms/pseries/rtasd.c
+++ b/arch/powerpc/platforms/pseries/rtasd.c
@@ -208,6 +208,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
 		break;
 	case ERR_TYPE_KERNEL_PANIC:
 	default:
+		WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
 		spin_unlock_irqrestore(&rtasd_log_lock, s);
 		return;
 	}
@@ -227,6 +228,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
 	/* Check to see if we need to or have stopped logging */
 	if (fatal || !logging_enabled) {
 		logging_enabled = 0;
+		WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
 		spin_unlock_irqrestore(&rtasd_log_lock, s);
 		return;
 	}
@@ -249,11 +251,13 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
 		else
 			rtas_log_start += 1;
 
+		WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
 		spin_unlock_irqrestore(&rtasd_log_lock, s);
 		wake_up_interruptible(&rtas_log_wait);
 		break;
 	case ERR_TYPE_KERNEL_PANIC:
 	default:
+		WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
 		spin_unlock_irqrestore(&rtasd_log_lock, s);
 		return;
 	}
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 181006cc94a0..9b70b9231693 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -118,13 +118,17 @@ static inline void account_system_vtime(struct task_struct *tsk)
 }
 #endif
 
-#if defined(CONFIG_PREEMPT_RCU) && defined(CONFIG_NO_HZ)
+#if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU)
 extern void rcu_irq_enter(void);
 extern void rcu_irq_exit(void);
+extern void rcu_nmi_enter(void);
+extern void rcu_nmi_exit(void);
 #else
 # define rcu_irq_enter() do { } while (0)
 # define rcu_irq_exit() do { } while (0)
-#endif /* CONFIG_PREEMPT_RCU */
+# define rcu_nmi_enter() do { } while (0)
+# define rcu_nmi_exit() do { } while (0)
+#endif /* #if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU) */
 
 /*
  * It is safe to do non-atomic ops on ->hardirq_context,
@@ -134,7 +138,6 @@ extern void rcu_irq_exit(void);
  */
 #define __irq_enter()					\
 	do {						\
-		rcu_irq_enter();			\
 		account_system_vtime(current);		\
 		add_preempt_count(HARDIRQ_OFFSET);	\
 		trace_hardirq_enter();			\
@@ -153,7 +156,6 @@ extern void irq_enter(void);
 		trace_hardirq_exit();			\
 		account_system_vtime(current);		\
 		sub_preempt_count(HARDIRQ_OFFSET);	\
-		rcu_irq_exit();				\
 	} while (0)
 
 /*
@@ -161,7 +163,7 @@ extern void irq_enter(void);
  */
 extern void irq_exit(void);
 
-#define nmi_enter()		do { lockdep_off(); __irq_enter(); } while (0)
-#define nmi_exit()		do { __irq_exit(); lockdep_on(); } while (0)
+#define nmi_enter()		do { lockdep_off(); rcu_nmi_enter(); __irq_enter(); } while (0)
+#define nmi_exit()		do { __irq_exit(); rcu_nmi_exit(); lockdep_on(); } while (0)
 
 #endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 86f1f5e43e33..bfd289aff576 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -52,11 +52,15 @@ struct rcu_head {
 	void (*func)(struct rcu_head *head);
 };
 
-#ifdef CONFIG_CLASSIC_RCU
+#if defined(CONFIG_CLASSIC_RCU)
 #include <linux/rcuclassic.h>
-#else /* #ifdef CONFIG_CLASSIC_RCU */
+#elif defined(CONFIG_TREE_RCU)
+#include <linux/rcutree.h>
+#elif defined(CONFIG_PREEMPT_RCU)
 #include <linux/rcupreempt.h>
-#endif /* #else #ifdef CONFIG_CLASSIC_RCU */
+#else
+#error "Unknown RCU implementation specified to kernel configuration"
+#endif /* #else #if defined(CONFIG_CLASSIC_RCU) */
 
 #define RCU_HEAD_INIT 	{ .next = NULL, .func = NULL }
 #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
new file mode 100644
index 000000000000..d4368b7975c3
--- /dev/null
+++ b/include/linux/rcutree.h
@@ -0,0 +1,329 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion (tree-based version)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ *
+ * Author: Dipankar Sarma <dipankar@in.ibm.com>
+ *	   Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical algorithm
+ *
+ * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 	Documentation/RCU
+ */
+
+#ifndef __LINUX_RCUTREE_H
+#define __LINUX_RCUTREE_H
+
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+#include <linux/percpu.h>
+#include <linux/cpumask.h>
+#include <linux/seqlock.h>
+
+/*
+ * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT.
+ * In theory, it should be possible to add more levels straightforwardly.
+ * In practice, this has not been tested, so there is probably some
+ * bug somewhere.
+ */
+#define MAX_RCU_LVLS 3
+#define RCU_FANOUT	      (CONFIG_RCU_FANOUT)
+#define RCU_FANOUT_SQ	      (RCU_FANOUT * RCU_FANOUT)
+#define RCU_FANOUT_CUBE	      (RCU_FANOUT_SQ * RCU_FANOUT)
+
+#if NR_CPUS <= RCU_FANOUT
+#  define NUM_RCU_LVLS	      1
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      (NR_CPUS)
+#  define NUM_RCU_LVL_2	      0
+#  define NUM_RCU_LVL_3	      0
+#elif NR_CPUS <= RCU_FANOUT_SQ
+#  define NUM_RCU_LVLS	      2
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      (((NR_CPUS) + RCU_FANOUT - 1) / RCU_FANOUT)
+#  define NUM_RCU_LVL_2	      (NR_CPUS)
+#  define NUM_RCU_LVL_3	      0
+#elif NR_CPUS <= RCU_FANOUT_CUBE
+#  define NUM_RCU_LVLS	      3
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      (((NR_CPUS) + RCU_FANOUT_SQ - 1) / RCU_FANOUT_SQ)
+#  define NUM_RCU_LVL_2	      (((NR_CPUS) + (RCU_FANOUT) - 1) / (RCU_FANOUT))
+#  define NUM_RCU_LVL_3	      NR_CPUS
+#else
+# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
+#endif /* #if (NR_CPUS) <= RCU_FANOUT */
+
+#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
+#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
+
+/*
+ * Dynticks per-CPU state.
+ */
+struct rcu_dynticks {
+	int dynticks_nesting;	/* Track nesting level, sort of. */
+	int dynticks;		/* Even value for dynticks-idle, else odd. */
+	int dynticks_nmi;	/* Even value for either dynticks-idle or */
+				/*  not in nmi handler, else odd.  So this */
+				/*  remains even for nmi from irq handler. */
+};
+
+/*
+ * Definition for node within the RCU grace-period-detection hierarchy.
+ */
+struct rcu_node {
+	spinlock_t lock;
+	unsigned long qsmask;	/* CPUs or groups that need to switch in */
+				/*  order for current grace period to proceed.*/
+	unsigned long qsmaskinit;
+				/* Per-GP initialization for qsmask. */
+	unsigned long grpmask;	/* Mask to apply to parent qsmask. */
+	int	grplo;		/* lowest-numbered CPU or group here. */
+	int	grphi;		/* highest-numbered CPU or group here. */
+	u8	grpnum;		/* CPU/group number for next level up. */
+	u8	level;		/* root is at level 0. */
+	struct rcu_node *parent;
+} ____cacheline_internodealigned_in_smp;
+
+/* Index values for nxttail array in struct rcu_data. */
+#define RCU_DONE_TAIL		0	/* Also RCU_WAIT head. */
+#define RCU_WAIT_TAIL		1	/* Also RCU_NEXT_READY head. */
+#define RCU_NEXT_READY_TAIL	2	/* Also RCU_NEXT head. */
+#define RCU_NEXT_TAIL		3
+#define RCU_NEXT_SIZE		4
+
+/* Per-CPU data for read-copy update. */
+struct rcu_data {
+	/* 1) quiescent-state and grace-period handling : */
+	long		completed;	/* Track rsp->completed gp number */
+					/*  in order to detect GP end. */
+	long		gpnum;		/* Highest gp number that this CPU */
+					/*  is aware of having started. */
+	long		passed_quiesc_completed;
+					/* Value of completed at time of qs. */
+	bool		passed_quiesc;	/* User-mode/idle loop etc. */
+	bool		qs_pending;	/* Core waits for quiesc state. */
+	bool		beenonline;	/* CPU online at least once. */
+	struct rcu_node *mynode;	/* This CPU's leaf of hierarchy */
+	unsigned long grpmask;		/* Mask to apply to leaf qsmask. */
+
+	/* 2) batch handling */
+	/*
+	 * If nxtlist is not NULL, it is partitioned as follows.
+	 * Any of the partitions might be empty, in which case the
+	 * pointer to that partition will be equal to the pointer for
+	 * the following partition.  When the list is empty, all of
+	 * the nxttail elements point to nxtlist, which is NULL.
+	 *
+	 * [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]):
+	 *	Entries that might have arrived after current GP ended
+	 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
+	 *	Entries known to have arrived before current GP ended
+	 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
+	 *	Entries that batch # <= ->completed - 1: waiting for current GP
+	 * [nxtlist, *nxttail[RCU_DONE_TAIL]):
+	 *	Entries that batch # <= ->completed
+	 *	The grace period for these entries has completed, and
+	 *	the other grace-period-completed entries may be moved
+	 *	here temporarily in rcu_process_callbacks().
+	 */
+	struct rcu_head *nxtlist;
+	struct rcu_head **nxttail[RCU_NEXT_SIZE];
+	long		qlen; 	 	/* # of queued callbacks */
+	long		blimit;		/* Upper limit on a processed batch */
+
+#ifdef CONFIG_NO_HZ
+	/* 3) dynticks interface. */
+	struct rcu_dynticks *dynticks;	/* Shared per-CPU dynticks state. */
+	int dynticks_snap;		/* Per-GP tracking for dynticks. */
+	int dynticks_nmi_snap;		/* Per-GP tracking for dynticks_nmi. */
+#endif /* #ifdef CONFIG_NO_HZ */
+
+	/* 4) reasons this CPU needed to be kicked by force_quiescent_state */
+#ifdef CONFIG_NO_HZ
+	unsigned long dynticks_fqs;	/* Kicked due to dynticks idle. */
+#endif /* #ifdef CONFIG_NO_HZ */
+	unsigned long offline_fqs;	/* Kicked due to being offline. */
+	unsigned long resched_ipi;	/* Sent a resched IPI. */
+
+	/* 5) state to allow this CPU to force_quiescent_state on others */
+	long n_rcu_pending;		/* rcu_pending() calls since boot. */
+	long n_rcu_pending_force_qs;	/* when to force quiescent states. */
+
+	int cpu;
+};
+
+/* Values for signaled field in struct rcu_state. */
+#define RCU_GP_INIT		0	/* Grace period being initialized. */
+#define RCU_SAVE_DYNTICK	1	/* Need to scan dyntick state. */
+#define RCU_FORCE_QS		2	/* Need to force quiescent state. */
+#ifdef CONFIG_NO_HZ
+#define RCU_SIGNAL_INIT		RCU_SAVE_DYNTICK
+#else /* #ifdef CONFIG_NO_HZ */
+#define RCU_SIGNAL_INIT		RCU_FORCE_QS
+#endif /* #else #ifdef CONFIG_NO_HZ */
+
+#define RCU_JIFFIES_TILL_FORCE_QS	 3	/* for rsp->jiffies_force_qs */
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+#define RCU_SECONDS_TILL_STALL_CHECK   (10 * HZ)  /* for rsp->jiffies_stall */
+#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ)  /* for rsp->jiffies_stall */
+#define RCU_STALL_RAT_DELAY		2	  /* Allow other CPUs time */
+						  /*  to take at least one */
+						  /*  scheduling clock irq */
+						  /*  before ratting on them. */
+
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+/*
+ * RCU global state, including node hierarchy.  This hierarchy is
+ * represented in "heap" form in a dense array.  The root (first level)
+ * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second
+ * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]),
+ * and the third level in ->node[m+1] and following (->node[m+1] referenced
+ * by ->level[2]).  The number of levels is determined by the number of
+ * CPUs and by CONFIG_RCU_FANOUT.  Small systems will have a "hierarchy"
+ * consisting of a single rcu_node.
+ */
+struct rcu_state {
+	struct rcu_node node[NUM_RCU_NODES];	/* Hierarchy. */
+	struct rcu_node *level[NUM_RCU_LVLS];	/* Hierarchy levels. */
+	u32 levelcnt[MAX_RCU_LVLS + 1];		/* # nodes in each level. */
+	u8 levelspread[NUM_RCU_LVLS];		/* kids/node in each level. */
+	struct rcu_data *rda[NR_CPUS];		/* array of rdp pointers. */
+
+	/* The following fields are guarded by the root rcu_node's lock. */
+
+	u8	signaled ____cacheline_internodealigned_in_smp;
+						/* Force QS state. */
+	long	gpnum;				/* Current gp number. */
+	long	completed;			/* # of last completed gp. */
+	spinlock_t onofflock;			/* exclude on/offline and */
+						/*  starting new GP. */
+	spinlock_t fqslock;			/* Only one task forcing */
+						/*  quiescent states. */
+	unsigned long jiffies_force_qs;		/* Time at which to invoke */
+						/*  force_quiescent_state(). */
+	unsigned long n_force_qs;		/* Number of calls to */
+						/*  force_quiescent_state(). */
+	unsigned long n_force_qs_lh;		/* ~Number of calls leaving */
+						/*  due to lock unavailable. */
+	unsigned long n_force_qs_ngp;		/* Number of calls leaving */
+						/*  due to no GP active. */
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+	unsigned long gp_start;			/* Time at which GP started, */
+						/*  but in jiffies. */
+	unsigned long jiffies_stall;		/* Time at which to check */
+						/*  for CPU stalls. */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+#ifdef CONFIG_NO_HZ
+	long dynticks_completed;		/* Value of completed @ snap. */
+#endif /* #ifdef CONFIG_NO_HZ */
+};
+
+extern struct rcu_state rcu_state;
+DECLARE_PER_CPU(struct rcu_data, rcu_data);
+
+extern struct rcu_state rcu_bh_state;
+DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
+
+/*
+ * Increment the quiescent state counter.
+ * The counter is a bit degenerated: We do not need to know
+ * how many quiescent states passed, just if there was at least
+ * one since the start of the grace period. Thus just a flag.
+ */
+static inline void rcu_qsctr_inc(int cpu)
+{
+	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+	rdp->passed_quiesc = 1;
+	rdp->passed_quiesc_completed = rdp->completed;
+}
+static inline void rcu_bh_qsctr_inc(int cpu)
+{
+	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
+	rdp->passed_quiesc = 1;
+	rdp->passed_quiesc_completed = rdp->completed;
+}
+
+extern int rcu_pending(int cpu);
+extern int rcu_needs_cpu(int cpu);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern struct lockdep_map rcu_lock_map;
+# define rcu_read_acquire()	\
+			lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
+# define rcu_read_release()	lock_release(&rcu_lock_map, 1, _THIS_IP_)
+#else
+# define rcu_read_acquire()	do { } while (0)
+# define rcu_read_release()	do { } while (0)
+#endif
+
+static inline void __rcu_read_lock(void)
+{
+	preempt_disable();
+	__acquire(RCU);
+	rcu_read_acquire();
+}
+static inline void __rcu_read_unlock(void)
+{
+	rcu_read_release();
+	__release(RCU);
+	preempt_enable();
+}
+static inline void __rcu_read_lock_bh(void)
+{
+	local_bh_disable();
+	__acquire(RCU_BH);
+	rcu_read_acquire();
+}
+static inline void __rcu_read_unlock_bh(void)
+{
+	rcu_read_release();
+	__release(RCU_BH);
+	local_bh_enable();
+}
+
+#define __synchronize_sched() synchronize_rcu()
+
+#define call_rcu_sched(head, func) call_rcu(head, func)
+
+static inline void rcu_init_sched(void)
+{
+}
+
+extern void __rcu_init(void);
+extern void rcu_check_callbacks(int cpu, int user);
+extern void rcu_restart_cpu(int cpu);
+
+extern long rcu_batches_completed(void);
+extern long rcu_batches_completed_bh(void);
+
+#ifdef CONFIG_NO_HZ
+void rcu_enter_nohz(void);
+void rcu_exit_nohz(void);
+#else /* CONFIG_NO_HZ */
+static inline void rcu_enter_nohz(void)
+{
+}
+static inline void rcu_exit_nohz(void)
+{
+}
+#endif /* CONFIG_NO_HZ */
+
+#endif /* __LINUX_RCUTREE_H */
diff --git a/init/Kconfig b/init/Kconfig
index f763762d544a..9dd7958a71f0 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -928,10 +928,16 @@ source "block/Kconfig"
 config PREEMPT_NOTIFIERS
 	bool
 
-config CLASSIC_RCU
-	def_bool !PREEMPT_RCU
+config TREE_RCU_TRACE
+	def_bool RCU_TRACE && TREE_RCU
+	select DEBUG_FS
 	help
-	  This option selects the classic RCU implementation that is
-	  designed for best read-side performance on non-realtime
-	  systems.  Classic RCU is the default.  Note that the
-	  PREEMPT_RCU symbol is used to select/deselect this option.
+	  This option provides tracing for the TREE_RCU implementation,
+	  permitting Makefile to trivially select kernel/rcutree_trace.c.
+
+config PREEMPT_RCU_TRACE
+	def_bool RCU_TRACE && PREEMPT_RCU
+	select DEBUG_FS
+	help
+	  This option provides tracing for the PREEMPT_RCU implementation,
+	  permitting Makefile to trivially select kernel/rcupreempt_trace.c.
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 9fdba03dc1fc..463f29743ea0 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -52,10 +52,29 @@ config PREEMPT
 
 endchoice
 
+choice
+	prompt "RCU Implementation"
+	default CLASSIC_RCU
+
+config CLASSIC_RCU
+	bool "Classic RCU"
+	help
+	  This option selects the classic RCU implementation that is
+	  designed for best read-side performance on non-realtime
+	  systems.
+	  
+	  Select this option if you are unsure.
+
+config TREE_RCU
+	bool "Tree-based hierarchical RCU"
+	help
+	  This option selects the RCU implementation that is
+	  designed for very large SMP system with hundreds or
+	  thousands of CPUs.
+
 config PREEMPT_RCU
 	bool "Preemptible RCU"
 	depends on PREEMPT
-	default n
 	help
 	  This option reduces the latency of the kernel by making certain
 	  RCU sections preemptible. Normally RCU code is non-preemptible, if
@@ -64,16 +83,47 @@ config PREEMPT_RCU
 	  now-naive assumptions about each RCU read-side critical section
 	  remaining on a given CPU through its execution.
 
-	  Say N if you are unsure.
+endchoice
 
 config RCU_TRACE
-	bool "Enable tracing for RCU - currently stats in debugfs"
-	depends on PREEMPT_RCU
-	select DEBUG_FS
-	default y
+	bool "Enable tracing for RCU"
+	depends on TREE_RCU || PREEMPT_RCU
 	help
 	  This option provides tracing in RCU which presents stats
 	  in debugfs for debugging RCU implementation.
 
 	  Say Y here if you want to enable RCU tracing
 	  Say N if you are unsure.
+
+config RCU_FANOUT
+	int "Tree-based hierarchical RCU fanout value"
+	range 2 64 if 64BIT
+	range 2 32 if !64BIT
+	depends on TREE_RCU
+	default 64 if 64BIT
+	default 32 if !64BIT
+	help
+	  This option controls the fanout of hierarchical implementations
+	  of RCU, allowing RCU to work efficiently on machines with
+	  large numbers of CPUs.  This value must be at least the cube
+	  root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit
+	  systems and up to 262,144 for 64-bit systems.
+
+	  Select a specific number if testing RCU itself.
+	  Take the default if unsure.
+
+config RCU_FANOUT_EXACT
+	bool "Disable tree-based hierarchical RCU auto-balancing"
+	depends on TREE_RCU
+	default n
+	help
+	  This option forces use of the exact RCU_FANOUT value specified,
+	  regardless of imbalances in the hierarchy.  This is useful for
+	  testing RCU itself, and might one day be useful on systems with
+	  strong NUMA behavior.
+
+	  Without RCU_FANOUT_EXACT, the code will balance the hierarchy.
+
+	  Say n if unsure.
+
+	
diff --git a/kernel/Makefile b/kernel/Makefile
index 19fad003b19d..b4fdbbff5ec0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -74,10 +74,10 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o
+obj-$(CONFIG_TREE_RCU) += rcutree.o
 obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
-ifeq ($(CONFIG_PREEMPT_RCU),y)
-obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o
-endif
+obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
+obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o
 obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 59236e8b9daa..04982659875a 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -551,6 +551,16 @@ void rcu_irq_exit(void)
 	}
 }
 
+void rcu_nmi_enter(void)
+{
+	rcu_irq_enter();
+}
+
+void rcu_nmi_exit(void)
+{
+	rcu_irq_exit();
+}
+
 static void dyntick_save_progress_counter(int cpu)
 {
 	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c
index 35c2d3360ecf..7c2665cac172 100644
--- a/kernel/rcupreempt_trace.c
+++ b/kernel/rcupreempt_trace.c
@@ -149,12 +149,12 @@ static void rcupreempt_trace_sum(struct rcupreempt_trace *sp)
 		sp->done_length += cp->done_length;
 		sp->done_add += cp->done_add;
 		sp->done_remove += cp->done_remove;
-		atomic_set(&sp->done_invoked, atomic_read(&cp->done_invoked));
+		atomic_add(atomic_read(&cp->done_invoked), &sp->done_invoked);
 		sp->rcu_check_callbacks += cp->rcu_check_callbacks;
-		atomic_set(&sp->rcu_try_flip_1,
-			   atomic_read(&cp->rcu_try_flip_1));
-		atomic_set(&sp->rcu_try_flip_e1,
-			   atomic_read(&cp->rcu_try_flip_e1));
+		atomic_add(atomic_read(&cp->rcu_try_flip_1),
+			   &sp->rcu_try_flip_1);
+		atomic_add(atomic_read(&cp->rcu_try_flip_e1),
+			   &sp->rcu_try_flip_e1);
 		sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1;
 		sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1;
 		sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
new file mode 100644
index 000000000000..a342b032112c
--- /dev/null
+++ b/kernel/rcutree.c
@@ -0,0 +1,1535 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ *
+ * Authors: Dipankar Sarma <dipankar@in.ibm.com>
+ *	    Manfred Spraul <manfred@colorfullife.com>
+ *	    Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical version
+ *
+ * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 	Documentation/RCU
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/rcupdate.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <asm/atomic.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/mutex.h>
+#include <linux/time.h>
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key rcu_lock_key;
+struct lockdep_map rcu_lock_map =
+	STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
+EXPORT_SYMBOL_GPL(rcu_lock_map);
+#endif
+
+/* Data structures. */
+
+#define RCU_STATE_INITIALIZER(name) { \
+	.level = { &name.node[0] }, \
+	.levelcnt = { \
+		NUM_RCU_LVL_0,  /* root of hierarchy. */ \
+		NUM_RCU_LVL_1, \
+		NUM_RCU_LVL_2, \
+		NUM_RCU_LVL_3, /* == MAX_RCU_LVLS */ \
+	}, \
+	.signaled = RCU_SIGNAL_INIT, \
+	.gpnum = -300, \
+	.completed = -300, \
+	.onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \
+	.fqslock = __SPIN_LOCK_UNLOCKED(&name.fqslock), \
+	.n_force_qs = 0, \
+	.n_force_qs_ngp = 0, \
+}
+
+struct rcu_state rcu_state = RCU_STATE_INITIALIZER(rcu_state);
+DEFINE_PER_CPU(struct rcu_data, rcu_data);
+
+struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
+DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
+
+#ifdef CONFIG_NO_HZ
+DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks);
+#endif /* #ifdef CONFIG_NO_HZ */
+
+static int blimit = 10;		/* Maximum callbacks per softirq. */
+static int qhimark = 10000;	/* If this many pending, ignore blimit. */
+static int qlowmark = 100;	/* Once only this many pending, use blimit. */
+
+static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
+
+/*
+ * Return the number of RCU batches processed thus far for debug & stats.
+ */
+long rcu_batches_completed(void)
+{
+	return rcu_state.completed;
+}
+EXPORT_SYMBOL_GPL(rcu_batches_completed);
+
+/*
+ * Return the number of RCU BH batches processed thus far for debug & stats.
+ */
+long rcu_batches_completed_bh(void)
+{
+	return rcu_bh_state.completed;
+}
+EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
+
+/*
+ * Does the CPU have callbacks ready to be invoked?
+ */
+static int
+cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
+{
+	return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL];
+}
+
+/*
+ * Does the current CPU require a yet-as-unscheduled grace period?
+ */
+static int
+cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	/* ACCESS_ONCE() because we are accessing outside of lock. */
+	return *rdp->nxttail[RCU_DONE_TAIL] &&
+	       ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum);
+}
+
+/*
+ * Return the root node of the specified rcu_state structure.
+ */
+static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
+{
+	return &rsp->node[0];
+}
+
+#ifdef CONFIG_SMP
+
+/*
+ * If the specified CPU is offline, tell the caller that it is in
+ * a quiescent state.  Otherwise, whack it with a reschedule IPI.
+ * Grace periods can end up waiting on an offline CPU when that
+ * CPU is in the process of coming online -- it will be added to the
+ * rcu_node bitmasks before it actually makes it online.  The same thing
+ * can happen while a CPU is in the process of coming online.  Because this
+ * race is quite rare, we check for it after detecting that the grace
+ * period has been delayed rather than checking each and every CPU
+ * each and every time we start a new grace period.
+ */
+static int rcu_implicit_offline_qs(struct rcu_data *rdp)
+{
+	/*
+	 * If the CPU is offline, it is in a quiescent state.  We can
+	 * trust its state not to change because interrupts are disabled.
+	 */
+	if (cpu_is_offline(rdp->cpu)) {
+		rdp->offline_fqs++;
+		return 1;
+	}
+
+	/* The CPU is online, so send it a reschedule IPI. */
+	if (rdp->cpu != smp_processor_id())
+		smp_send_reschedule(rdp->cpu);
+	else
+		set_need_resched();
+	rdp->resched_ipi++;
+	return 0;
+}
+
+#endif /* #ifdef CONFIG_SMP */
+
+#ifdef CONFIG_NO_HZ
+static DEFINE_RATELIMIT_STATE(rcu_rs, 10 * HZ, 5);
+
+/**
+ * rcu_enter_nohz - inform RCU that current CPU is entering nohz
+ *
+ * Enter nohz mode, in other words, -leave- the mode in which RCU
+ * read-side critical sections can occur.  (Though RCU read-side
+ * critical sections can occur in irq handlers in nohz mode, a possibility
+ * handled by rcu_irq_enter() and rcu_irq_exit()).
+ */
+void rcu_enter_nohz(void)
+{
+	unsigned long flags;
+	struct rcu_dynticks *rdtp;
+
+	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
+	local_irq_save(flags);
+	rdtp = &__get_cpu_var(rcu_dynticks);
+	rdtp->dynticks++;
+	rdtp->dynticks_nesting--;
+	WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs);
+	local_irq_restore(flags);
+}
+
+/*
+ * rcu_exit_nohz - inform RCU that current CPU is leaving nohz
+ *
+ * Exit nohz mode, in other words, -enter- the mode in which RCU
+ * read-side critical sections normally occur.
+ */
+void rcu_exit_nohz(void)
+{
+	unsigned long flags;
+	struct rcu_dynticks *rdtp;
+
+	local_irq_save(flags);
+	rdtp = &__get_cpu_var(rcu_dynticks);
+	rdtp->dynticks++;
+	rdtp->dynticks_nesting++;
+	WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs);
+	local_irq_restore(flags);
+	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
+}
+
+/**
+ * rcu_nmi_enter - inform RCU of entry to NMI context
+ *
+ * If the CPU was idle with dynamic ticks active, and there is no
+ * irq handler running, this updates rdtp->dynticks_nmi to let the
+ * RCU grace-period handling know that the CPU is active.
+ */
+void rcu_nmi_enter(void)
+{
+	struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
+
+	if (rdtp->dynticks & 0x1)
+		return;
+	rdtp->dynticks_nmi++;
+	WARN_ON_RATELIMIT(!(rdtp->dynticks_nmi & 0x1), &rcu_rs);
+	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
+}
+
+/**
+ * rcu_nmi_exit - inform RCU of exit from NMI context
+ *
+ * If the CPU was idle with dynamic ticks active, and there is no
+ * irq handler running, this updates rdtp->dynticks_nmi to let the
+ * RCU grace-period handling know that the CPU is no longer active.
+ */
+void rcu_nmi_exit(void)
+{
+	struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
+
+	if (rdtp->dynticks & 0x1)
+		return;
+	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
+	rdtp->dynticks_nmi++;
+	WARN_ON_RATELIMIT(rdtp->dynticks_nmi & 0x1, &rcu_rs);
+}
+
+/**
+ * rcu_irq_enter - inform RCU of entry to hard irq context
+ *
+ * If the CPU was idle with dynamic ticks active, this updates the
+ * rdtp->dynticks to let the RCU handling know that the CPU is active.
+ */
+void rcu_irq_enter(void)
+{
+	struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
+
+	if (rdtp->dynticks_nesting++)
+		return;
+	rdtp->dynticks++;
+	WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs);
+	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
+}
+
+/**
+ * rcu_irq_exit - inform RCU of exit from hard irq context
+ *
+ * If the CPU was idle with dynamic ticks active, update the rdp->dynticks
+ * to put let the RCU handling be aware that the CPU is going back to idle
+ * with no ticks.
+ */
+void rcu_irq_exit(void)
+{
+	struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
+
+	if (--rdtp->dynticks_nesting)
+		return;
+	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
+	rdtp->dynticks++;
+	WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs);
+
+	/* If the interrupt queued a callback, get out of dyntick mode. */
+	if (__get_cpu_var(rcu_data).nxtlist ||
+	    __get_cpu_var(rcu_bh_data).nxtlist)
+		set_need_resched();
+}
+
+/*
+ * Record the specified "completed" value, which is later used to validate
+ * dynticks counter manipulations.  Specify "rsp->completed - 1" to
+ * unconditionally invalidate any future dynticks manipulations (which is
+ * useful at the beginning of a grace period).
+ */
+static void dyntick_record_completed(struct rcu_state *rsp, long comp)
+{
+	rsp->dynticks_completed = comp;
+}
+
+#ifdef CONFIG_SMP
+
+/*
+ * Recall the previously recorded value of the completion for dynticks.
+ */
+static long dyntick_recall_completed(struct rcu_state *rsp)
+{
+	return rsp->dynticks_completed;
+}
+
+/*
+ * Snapshot the specified CPU's dynticks counter so that we can later
+ * credit them with an implicit quiescent state.  Return 1 if this CPU
+ * is already in a quiescent state courtesy of dynticks idle mode.
+ */
+static int dyntick_save_progress_counter(struct rcu_data *rdp)
+{
+	int ret;
+	int snap;
+	int snap_nmi;
+
+	snap = rdp->dynticks->dynticks;
+	snap_nmi = rdp->dynticks->dynticks_nmi;
+	smp_mb();	/* Order sampling of snap with end of grace period. */
+	rdp->dynticks_snap = snap;
+	rdp->dynticks_nmi_snap = snap_nmi;
+	ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0);
+	if (ret)
+		rdp->dynticks_fqs++;
+	return ret;
+}
+
+/*
+ * Return true if the specified CPU has passed through a quiescent
+ * state by virtue of being in or having passed through an dynticks
+ * idle state since the last call to dyntick_save_progress_counter()
+ * for this same CPU.
+ */
+static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
+{
+	long curr;
+	long curr_nmi;
+	long snap;
+	long snap_nmi;
+
+	curr = rdp->dynticks->dynticks;
+	snap = rdp->dynticks_snap;
+	curr_nmi = rdp->dynticks->dynticks_nmi;
+	snap_nmi = rdp->dynticks_nmi_snap;
+	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
+
+	/*
+	 * If the CPU passed through or entered a dynticks idle phase with
+	 * no active irq/NMI handlers, then we can safely pretend that the CPU
+	 * already acknowledged the request to pass through a quiescent
+	 * state.  Either way, that CPU cannot possibly be in an RCU
+	 * read-side critical section that started before the beginning
+	 * of the current RCU grace period.
+	 */
+	if ((curr != snap || (curr & 0x1) == 0) &&
+	    (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) {
+		rdp->dynticks_fqs++;
+		return 1;
+	}
+
+	/* Go check for the CPU being offline. */
+	return rcu_implicit_offline_qs(rdp);
+}
+
+#endif /* #ifdef CONFIG_SMP */
+
+#else /* #ifdef CONFIG_NO_HZ */
+
+static void dyntick_record_completed(struct rcu_state *rsp, long comp)
+{
+}
+
+#ifdef CONFIG_SMP
+
+/*
+ * If there are no dynticks, then the only way that a CPU can passively
+ * be in a quiescent state is to be offline.  Unlike dynticks idle, which
+ * is a point in time during the prior (already finished) grace period,
+ * an offline CPU is always in a quiescent state, and thus can be
+ * unconditionally applied.  So just return the current value of completed.
+ */
+static long dyntick_recall_completed(struct rcu_state *rsp)
+{
+	return rsp->completed;
+}
+
+static int dyntick_save_progress_counter(struct rcu_data *rdp)
+{
+	return 0;
+}
+
+static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
+{
+	return rcu_implicit_offline_qs(rdp);
+}
+
+#endif /* #ifdef CONFIG_SMP */
+
+#endif /* #else #ifdef CONFIG_NO_HZ */
+
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+
+static void record_gp_stall_check_time(struct rcu_state *rsp)
+{
+	rsp->gp_start = jiffies;
+	rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
+}
+
+static void print_other_cpu_stall(struct rcu_state *rsp)
+{
+	int cpu;
+	long delta;
+	unsigned long flags;
+	struct rcu_node *rnp = rcu_get_root(rsp);
+	struct rcu_node *rnp_cur = rsp->level[NUM_RCU_LVLS - 1];
+	struct rcu_node *rnp_end = &rsp->node[NUM_RCU_NODES];
+
+	/* Only let one CPU complain about others per time interval. */
+
+	spin_lock_irqsave(&rnp->lock, flags);
+	delta = jiffies - rsp->jiffies_stall;
+	if (delta < RCU_STALL_RAT_DELAY || rsp->gpnum == rsp->completed) {
+		spin_unlock_irqrestore(&rnp->lock, flags);
+		return;
+	}
+	rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+	spin_unlock_irqrestore(&rnp->lock, flags);
+
+	/* OK, time to rat on our buddy... */
+
+	printk(KERN_ERR "INFO: RCU detected CPU stalls:");
+	for (; rnp_cur < rnp_end; rnp_cur++) {
+		if (rnp_cur->qsmask == 0)
+			continue;
+		for (cpu = 0; cpu <= rnp_cur->grphi - rnp_cur->grplo; cpu++)
+			if (rnp_cur->qsmask & (1UL << cpu))
+				printk(" %d", rnp_cur->grplo + cpu);
+	}
+	printk(" (detected by %d, t=%ld jiffies)\n",
+	       smp_processor_id(), (long)(jiffies - rsp->gp_start));
+	force_quiescent_state(rsp, 0);  /* Kick them all. */
+}
+
+static void print_cpu_stall(struct rcu_state *rsp)
+{
+	unsigned long flags;
+	struct rcu_node *rnp = rcu_get_root(rsp);
+
+	printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu jiffies)\n",
+			smp_processor_id(), jiffies - rsp->gp_start);
+	dump_stack();
+	spin_lock_irqsave(&rnp->lock, flags);
+	if ((long)(jiffies - rsp->jiffies_stall) >= 0)
+		rsp->jiffies_stall =
+			jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+	spin_unlock_irqrestore(&rnp->lock, flags);
+	set_need_resched();  /* kick ourselves to get things going. */
+}
+
+static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	long delta;
+	struct rcu_node *rnp;
+
+	delta = jiffies - rsp->jiffies_stall;
+	rnp = rdp->mynode;
+	if ((rnp->qsmask & rdp->grpmask) && delta >= 0) {
+
+		/* We haven't checked in, so go dump stack. */
+		print_cpu_stall(rsp);
+
+	} else if (rsp->gpnum != rsp->completed &&
+		   delta >= RCU_STALL_RAT_DELAY) {
+
+		/* They had two time units to dump stack, so complain. */
+		print_other_cpu_stall(rsp);
+	}
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+static void record_gp_stall_check_time(struct rcu_state *rsp)
+{
+}
+
+static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+/*
+ * Update CPU-local rcu_data state to record the newly noticed grace period.
+ * This is used both when we started the grace period and when we notice
+ * that someone else started the grace period.
+ */
+static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	rdp->qs_pending = 1;
+	rdp->passed_quiesc = 0;
+	rdp->gpnum = rsp->gpnum;
+	rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
+				      RCU_JIFFIES_TILL_FORCE_QS;
+}
+
+/*
+ * Did someone else start a new RCU grace period start since we last
+ * checked?  Update local state appropriately if so.  Must be called
+ * on the CPU corresponding to rdp.
+ */
+static int
+check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	local_irq_save(flags);
+	if (rdp->gpnum != rsp->gpnum) {
+		note_new_gpnum(rsp, rdp);
+		ret = 1;
+	}
+	local_irq_restore(flags);
+	return ret;
+}
+
+/*
+ * Start a new RCU grace period if warranted, re-initializing the hierarchy
+ * in preparation for detecting the next grace period.  The caller must hold
+ * the root node's ->lock, which is released before return.  Hard irqs must
+ * be disabled.
+ */
+static void
+rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
+	__releases(rcu_get_root(rsp)->lock)
+{
+	struct rcu_data *rdp = rsp->rda[smp_processor_id()];
+	struct rcu_node *rnp = rcu_get_root(rsp);
+	struct rcu_node *rnp_cur;
+	struct rcu_node *rnp_end;
+
+	if (!cpu_needs_another_gp(rsp, rdp)) {
+		spin_unlock_irqrestore(&rnp->lock, flags);
+		return;
+	}
+
+	/* Advance to a new grace period and initialize state. */
+	rsp->gpnum++;
+	rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
+	rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
+	rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
+				      RCU_JIFFIES_TILL_FORCE_QS;
+	record_gp_stall_check_time(rsp);
+	dyntick_record_completed(rsp, rsp->completed - 1);
+	note_new_gpnum(rsp, rdp);
+
+	/*
+	 * Because we are first, we know that all our callbacks will
+	 * be covered by this upcoming grace period, even the ones
+	 * that were registered arbitrarily recently.
+	 */
+	rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+	rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+
+	/* Special-case the common single-level case. */
+	if (NUM_RCU_NODES == 1) {
+		rnp->qsmask = rnp->qsmaskinit;
+		spin_unlock_irqrestore(&rnp->lock, flags);
+		return;
+	}
+
+	spin_unlock(&rnp->lock);  /* leave irqs disabled. */
+
+
+	/* Exclude any concurrent CPU-hotplug operations. */
+	spin_lock(&rsp->onofflock);  /* irqs already disabled. */
+
+	/*
+	 * Set the quiescent-state-needed bits in all the non-leaf RCU
+	 * nodes for all currently online CPUs.  This operation relies
+	 * on the layout of the hierarchy within the rsp->node[] array.
+	 * Note that other CPUs will access only the leaves of the
+	 * hierarchy, which still indicate that no grace period is in
+	 * progress.  In addition, we have excluded CPU-hotplug operations.
+	 *
+	 * We therefore do not need to hold any locks.  Any required
+	 * memory barriers will be supplied by the locks guarding the
+	 * leaf rcu_nodes in the hierarchy.
+	 */
+
+	rnp_end = rsp->level[NUM_RCU_LVLS - 1];
+	for (rnp_cur = &rsp->node[0]; rnp_cur < rnp_end; rnp_cur++)
+		rnp_cur->qsmask = rnp_cur->qsmaskinit;
+
+	/*
+	 * Now set up the leaf nodes.  Here we must be careful.  First,
+	 * we need to hold the lock in order to exclude other CPUs, which
+	 * might be contending for the leaf nodes' locks.  Second, as
+	 * soon as we initialize a given leaf node, its CPUs might run
+	 * up the rest of the hierarchy.  We must therefore acquire locks
+	 * for each node that we touch during this stage.  (But we still
+	 * are excluding CPU-hotplug operations.)
+	 *
+	 * Note that the grace period cannot complete until we finish
+	 * the initialization process, as there will be at least one
+	 * qsmask bit set in the root node until that time, namely the
+	 * one corresponding to this CPU.
+	 */
+	rnp_end = &rsp->node[NUM_RCU_NODES];
+	rnp_cur = rsp->level[NUM_RCU_LVLS - 1];
+	for (; rnp_cur < rnp_end; rnp_cur++) {
+		spin_lock(&rnp_cur->lock);	/* irqs already disabled. */
+		rnp_cur->qsmask = rnp_cur->qsmaskinit;
+		spin_unlock(&rnp_cur->lock);	/* irqs already disabled. */
+	}
+
+	rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
+	spin_unlock_irqrestore(&rsp->onofflock, flags);
+}
+
+/*
+ * Advance this CPU's callbacks, but only if the current grace period
+ * has ended.  This may be called only from the CPU to whom the rdp
+ * belongs.
+ */
+static void
+rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	long completed_snap;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	completed_snap = ACCESS_ONCE(rsp->completed);  /* outside of lock. */
+
+	/* Did another grace period end? */
+	if (rdp->completed != completed_snap) {
+
+		/* Advance callbacks.  No harm if list empty. */
+		rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];
+		rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];
+		rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+
+		/* Remember that we saw this grace-period completion. */
+		rdp->completed = completed_snap;
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * Similar to cpu_quiet(), for which it is a helper function.  Allows
+ * a group of CPUs to be quieted at one go, though all the CPUs in the
+ * group must be represented by the same leaf rcu_node structure.
+ * That structure's lock must be held upon entry, and it is released
+ * before return.
+ */
+static void
+cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
+	      unsigned long flags)
+	__releases(rnp->lock)
+{
+	/* Walk up the rcu_node hierarchy. */
+	for (;;) {
+		if (!(rnp->qsmask & mask)) {
+
+			/* Our bit has already been cleared, so done. */
+			spin_unlock_irqrestore(&rnp->lock, flags);
+			return;
+		}
+		rnp->qsmask &= ~mask;
+		if (rnp->qsmask != 0) {
+
+			/* Other bits still set at this level, so done. */
+			spin_unlock_irqrestore(&rnp->lock, flags);
+			return;
+		}
+		mask = rnp->grpmask;
+		if (rnp->parent == NULL) {
+
+			/* No more levels.  Exit loop holding root lock. */
+
+			break;
+		}
+		spin_unlock_irqrestore(&rnp->lock, flags);
+		rnp = rnp->parent;
+		spin_lock_irqsave(&rnp->lock, flags);
+	}
+
+	/*
+	 * Get here if we are the last CPU to pass through a quiescent
+	 * state for this grace period.  Clean up and let rcu_start_gp()
+	 * start up the next grace period if one is needed.  Note that
+	 * we still hold rnp->lock, as required by rcu_start_gp(), which
+	 * will release it.
+	 */
+	rsp->completed = rsp->gpnum;
+	rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
+	rcu_start_gp(rsp, flags);  /* releases rnp->lock. */
+}
+
+/*
+ * Record a quiescent state for the specified CPU, which must either be
+ * the current CPU or an offline CPU.  The lastcomp argument is used to
+ * make sure we are still in the grace period of interest.  We don't want
+ * to end the current grace period based on quiescent states detected in
+ * an earlier grace period!
+ */
+static void
+cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
+{
+	unsigned long flags;
+	unsigned long mask;
+	struct rcu_node *rnp;
+
+	rnp = rdp->mynode;
+	spin_lock_irqsave(&rnp->lock, flags);
+	if (lastcomp != ACCESS_ONCE(rsp->completed)) {
+
+		/*
+		 * Someone beat us to it for this grace period, so leave.
+		 * The race with GP start is resolved by the fact that we
+		 * hold the leaf rcu_node lock, so that the per-CPU bits
+		 * cannot yet be initialized -- so we would simply find our
+		 * CPU's bit already cleared in cpu_quiet_msk() if this race
+		 * occurred.
+		 */
+		rdp->passed_quiesc = 0;	/* try again later! */
+		spin_unlock_irqrestore(&rnp->lock, flags);
+		return;
+	}
+	mask = rdp->grpmask;
+	if ((rnp->qsmask & mask) == 0) {
+		spin_unlock_irqrestore(&rnp->lock, flags);
+	} else {
+		rdp->qs_pending = 0;
+
+		/*
+		 * This GP can't end until cpu checks in, so all of our
+		 * callbacks can be processed during the next GP.
+		 */
+		rdp = rsp->rda[smp_processor_id()];
+		rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+
+		cpu_quiet_msk(mask, rsp, rnp, flags); /* releases rnp->lock */
+	}
+}
+
+/*
+ * Check to see if there is a new grace period of which this CPU
+ * is not yet aware, and if so, set up local rcu_data state for it.
+ * Otherwise, see if this CPU has just passed through its first
+ * quiescent state for this grace period, and record that fact if so.
+ */
+static void
+rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	/* If there is now a new grace period, record and return. */
+	if (check_for_new_grace_period(rsp, rdp))
+		return;
+
+	/*
+	 * Does this CPU still need to do its part for current grace period?
+	 * If no, return and let the other CPUs do their part as well.
+	 */
+	if (!rdp->qs_pending)
+		return;
+
+	/*
+	 * Was there a quiescent state since the beginning of the grace
+	 * period? If no, then exit and wait for the next call.
+	 */
+	if (!rdp->passed_quiesc)
+		return;
+
+	/* Tell RCU we are done (but cpu_quiet() will be the judge of that). */
+	cpu_quiet(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/*
+ * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy
+ * and move all callbacks from the outgoing CPU to the current one.
+ */
+static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
+{
+	int i;
+	unsigned long flags;
+	long lastcomp;
+	unsigned long mask;
+	struct rcu_data *rdp = rsp->rda[cpu];
+	struct rcu_data *rdp_me;
+	struct rcu_node *rnp;
+
+	/* Exclude any attempts to start a new grace period. */
+	spin_lock_irqsave(&rsp->onofflock, flags);
+
+	/* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
+	rnp = rdp->mynode;
+	mask = rdp->grpmask;	/* rnp->grplo is constant. */
+	do {
+		spin_lock(&rnp->lock);		/* irqs already disabled. */
+		rnp->qsmaskinit &= ~mask;
+		if (rnp->qsmaskinit != 0) {
+			spin_unlock(&rnp->lock); /* irqs already disabled. */
+			break;
+		}
+		mask = rnp->grpmask;
+		spin_unlock(&rnp->lock);	/* irqs already disabled. */
+		rnp = rnp->parent;
+	} while (rnp != NULL);
+	lastcomp = rsp->completed;
+
+	spin_unlock(&rsp->onofflock);		/* irqs remain disabled. */
+
+	/* Being offline is a quiescent state, so go record it. */
+	cpu_quiet(cpu, rsp, rdp, lastcomp);
+
+	/*
+	 * Move callbacks from the outgoing CPU to the running CPU.
+	 * Note that the outgoing CPU is now quiscent, so it is now
+	 * (uncharacteristically) safe to access it rcu_data structure.
+	 * Note also that we must carefully retain the order of the
+	 * outgoing CPU's callbacks in order for rcu_barrier() to work
+	 * correctly.  Finally, note that we start all the callbacks
+	 * afresh, even those that have passed through a grace period
+	 * and are therefore ready to invoke.  The theory is that hotplug
+	 * events are rare, and that if they are frequent enough to
+	 * indefinitely delay callbacks, you have far worse things to
+	 * be worrying about.
+	 */
+	rdp_me = rsp->rda[smp_processor_id()];
+	if (rdp->nxtlist != NULL) {
+		*rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
+		rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+		rdp->nxtlist = NULL;
+		for (i = 0; i < RCU_NEXT_SIZE; i++)
+			rdp->nxttail[i] = &rdp->nxtlist;
+		rdp_me->qlen += rdp->qlen;
+		rdp->qlen = 0;
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * Remove the specified CPU from the RCU hierarchy and move any pending
+ * callbacks that it might have to the current CPU.  This code assumes
+ * that at least one CPU in the system will remain running at all times.
+ * Any attempt to offline -all- CPUs is likely to strand RCU callbacks.
+ */
+static void rcu_offline_cpu(int cpu)
+{
+	__rcu_offline_cpu(cpu, &rcu_state);
+	__rcu_offline_cpu(cpu, &rcu_bh_state);
+}
+
+#else /* #ifdef CONFIG_HOTPLUG_CPU */
+
+static void rcu_offline_cpu(int cpu)
+{
+}
+
+#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
+
+/*
+ * Invoke any RCU callbacks that have made it to the end of their grace
+ * period.  Thottle as specified by rdp->blimit.
+ */
+static void rcu_do_batch(struct rcu_data *rdp)
+{
+	unsigned long flags;
+	struct rcu_head *next, *list, **tail;
+	int count;
+
+	/* If no callbacks are ready, just return.*/
+	if (!cpu_has_callbacks_ready_to_invoke(rdp))
+		return;
+
+	/*
+	 * Extract the list of ready callbacks, disabling to prevent
+	 * races with call_rcu() from interrupt handlers.
+	 */
+	local_irq_save(flags);
+	list = rdp->nxtlist;
+	rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
+	*rdp->nxttail[RCU_DONE_TAIL] = NULL;
+	tail = rdp->nxttail[RCU_DONE_TAIL];
+	for (count = RCU_NEXT_SIZE - 1; count >= 0; count--)
+		if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL])
+			rdp->nxttail[count] = &rdp->nxtlist;
+	local_irq_restore(flags);
+
+	/* Invoke callbacks. */
+	count = 0;
+	while (list) {
+		next = list->next;
+		prefetch(next);
+		list->func(list);
+		list = next;
+		if (++count >= rdp->blimit)
+			break;
+	}
+
+	local_irq_save(flags);
+
+	/* Update count, and requeue any remaining callbacks. */
+	rdp->qlen -= count;
+	if (list != NULL) {
+		*tail = rdp->nxtlist;
+		rdp->nxtlist = list;
+		for (count = 0; count < RCU_NEXT_SIZE; count++)
+			if (&rdp->nxtlist == rdp->nxttail[count])
+				rdp->nxttail[count] = tail;
+			else
+				break;
+	}
+
+	/* Reinstate batch limit if we have worked down the excess. */
+	if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
+		rdp->blimit = blimit;
+
+	local_irq_restore(flags);
+
+	/* Re-raise the RCU softirq if there are callbacks remaining. */
+	if (cpu_has_callbacks_ready_to_invoke(rdp))
+		raise_softirq(RCU_SOFTIRQ);
+}
+
+/*
+ * Check to see if this CPU is in a non-context-switch quiescent state
+ * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
+ * Also schedule the RCU softirq handler.
+ *
+ * This function must be called with hardirqs disabled.  It is normally
+ * invoked from the scheduling-clock interrupt.  If rcu_pending returns
+ * false, there is no point in invoking rcu_check_callbacks().
+ */
+void rcu_check_callbacks(int cpu, int user)
+{
+	if (user ||
+	    (idle_cpu(cpu) && !in_softirq() &&
+				hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
+
+		/*
+		 * Get here if this CPU took its interrupt from user
+		 * mode or from the idle loop, and if this is not a
+		 * nested interrupt.  In this case, the CPU is in
+		 * a quiescent state, so count it.
+		 *
+		 * No memory barrier is required here because both
+		 * rcu_qsctr_inc() and rcu_bh_qsctr_inc() reference
+		 * only CPU-local variables that other CPUs neither
+		 * access nor modify, at least not while the corresponding
+		 * CPU is online.
+		 */
+
+		rcu_qsctr_inc(cpu);
+		rcu_bh_qsctr_inc(cpu);
+
+	} else if (!in_softirq()) {
+
+		/*
+		 * Get here if this CPU did not take its interrupt from
+		 * softirq, in other words, if it is not interrupting
+		 * a rcu_bh read-side critical section.  This is an _bh
+		 * critical section, so count it.
+		 */
+
+		rcu_bh_qsctr_inc(cpu);
+	}
+	raise_softirq(RCU_SOFTIRQ);
+}
+
+#ifdef CONFIG_SMP
+
+/*
+ * Scan the leaf rcu_node structures, processing dyntick state for any that
+ * have not yet encountered a quiescent state, using the function specified.
+ * Returns 1 if the current grace period ends while scanning (possibly
+ * because we made it end).
+ */
+static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
+			       int (*f)(struct rcu_data *))
+{
+	unsigned long bit;
+	int cpu;
+	unsigned long flags;
+	unsigned long mask;
+	struct rcu_node *rnp_cur = rsp->level[NUM_RCU_LVLS - 1];
+	struct rcu_node *rnp_end = &rsp->node[NUM_RCU_NODES];
+
+	for (; rnp_cur < rnp_end; rnp_cur++) {
+		mask = 0;
+		spin_lock_irqsave(&rnp_cur->lock, flags);
+		if (rsp->completed != lastcomp) {
+			spin_unlock_irqrestore(&rnp_cur->lock, flags);
+			return 1;
+		}
+		if (rnp_cur->qsmask == 0) {
+			spin_unlock_irqrestore(&rnp_cur->lock, flags);
+			continue;
+		}
+		cpu = rnp_cur->grplo;
+		bit = 1;
+		for (; cpu <= rnp_cur->grphi; cpu++, bit <<= 1) {
+			if ((rnp_cur->qsmask & bit) != 0 && f(rsp->rda[cpu]))
+				mask |= bit;
+		}
+		if (mask != 0 && rsp->completed == lastcomp) {
+
+			/* cpu_quiet_msk() releases rnp_cur->lock. */
+			cpu_quiet_msk(mask, rsp, rnp_cur, flags);
+			continue;
+		}
+		spin_unlock_irqrestore(&rnp_cur->lock, flags);
+	}
+	return 0;
+}
+
+/*
+ * Force quiescent states on reluctant CPUs, and also detect which
+ * CPUs are in dyntick-idle mode.
+ */
+static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
+{
+	unsigned long flags;
+	long lastcomp;
+	struct rcu_data *rdp = rsp->rda[smp_processor_id()];
+	struct rcu_node *rnp = rcu_get_root(rsp);
+	u8 signaled;
+
+	if (ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum))
+		return;  /* No grace period in progress, nothing to force. */
+	if (!spin_trylock_irqsave(&rsp->fqslock, flags)) {
+		rsp->n_force_qs_lh++; /* Inexact, can lose counts.  Tough! */
+		return;	/* Someone else is already on the job. */
+	}
+	if (relaxed &&
+	    (long)(rsp->jiffies_force_qs - jiffies) >= 0 &&
+	    (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) >= 0)
+		goto unlock_ret; /* no emergency and done recently. */
+	rsp->n_force_qs++;
+	spin_lock(&rnp->lock);
+	lastcomp = rsp->completed;
+	signaled = rsp->signaled;
+	rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
+	rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
+				      RCU_JIFFIES_TILL_FORCE_QS;
+	if (lastcomp == rsp->gpnum) {
+		rsp->n_force_qs_ngp++;
+		spin_unlock(&rnp->lock);
+		goto unlock_ret;  /* no GP in progress, time updated. */
+	}
+	spin_unlock(&rnp->lock);
+	switch (signaled) {
+	case RCU_GP_INIT:
+
+		break; /* grace period still initializing, ignore. */
+
+	case RCU_SAVE_DYNTICK:
+
+		if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
+			break; /* So gcc recognizes the dead code. */
+
+		/* Record dyntick-idle state. */
+		if (rcu_process_dyntick(rsp, lastcomp,
+					dyntick_save_progress_counter))
+			goto unlock_ret;
+
+		/* Update state, record completion counter. */
+		spin_lock(&rnp->lock);
+		if (lastcomp == rsp->completed) {
+			rsp->signaled = RCU_FORCE_QS;
+			dyntick_record_completed(rsp, lastcomp);
+		}
+		spin_unlock(&rnp->lock);
+		break;
+
+	case RCU_FORCE_QS:
+
+		/* Check dyntick-idle state, send IPI to laggarts. */
+		if (rcu_process_dyntick(rsp, dyntick_recall_completed(rsp),
+					rcu_implicit_dynticks_qs))
+			goto unlock_ret;
+
+		/* Leave state in case more forcing is required. */
+
+		break;
+	}
+unlock_ret:
+	spin_unlock_irqrestore(&rsp->fqslock, flags);
+}
+
+#else /* #ifdef CONFIG_SMP */
+
+static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
+{
+	set_need_resched();
+}
+
+#endif /* #else #ifdef CONFIG_SMP */
+
+/*
+ * This does the RCU processing work from softirq context for the
+ * specified rcu_state and rcu_data structures.  This may be called
+ * only from the CPU to whom the rdp belongs.
+ */
+static void
+__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	unsigned long flags;
+
+	/*
+	 * If an RCU GP has gone long enough, go check for dyntick
+	 * idle CPUs and, if needed, send resched IPIs.
+	 */
+	if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 ||
+	    (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)
+		force_quiescent_state(rsp, 1);
+
+	/*
+	 * Advance callbacks in response to end of earlier grace
+	 * period that some other CPU ended.
+	 */
+	rcu_process_gp_end(rsp, rdp);
+
+	/* Update RCU state based on any recent quiescent states. */
+	rcu_check_quiescent_state(rsp, rdp);
+
+	/* Does this CPU require a not-yet-started grace period? */
+	if (cpu_needs_another_gp(rsp, rdp)) {
+		spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags);
+		rcu_start_gp(rsp, flags);  /* releases above lock */
+	}
+
+	/* If there are callbacks ready, invoke them. */
+	rcu_do_batch(rdp);
+}
+
+/*
+ * Do softirq processing for the current CPU.
+ */
+static void rcu_process_callbacks(struct softirq_action *unused)
+{
+	/*
+	 * Memory references from any prior RCU read-side critical sections
+	 * executed by the interrupted code must be seen before any RCU
+	 * grace-period manipulations below.
+	 */
+	smp_mb(); /* See above block comment. */
+
+	__rcu_process_callbacks(&rcu_state, &__get_cpu_var(rcu_data));
+	__rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
+
+	/*
+	 * Memory references from any later RCU read-side critical sections
+	 * executed by the interrupted code must be seen after any RCU
+	 * grace-period manipulations above.
+	 */
+	smp_mb(); /* See above block comment. */
+}
+
+static void
+__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
+	   struct rcu_state *rsp)
+{
+	unsigned long flags;
+	struct rcu_data *rdp;
+
+	head->func = func;
+	head->next = NULL;
+
+	smp_mb(); /* Ensure RCU update seen before callback registry. */
+
+	/*
+	 * Opportunistically note grace-period endings and beginnings.
+	 * Note that we might see a beginning right after we see an
+	 * end, but never vice versa, since this CPU has to pass through
+	 * a quiescent state betweentimes.
+	 */
+	local_irq_save(flags);
+	rdp = rsp->rda[smp_processor_id()];
+	rcu_process_gp_end(rsp, rdp);
+	check_for_new_grace_period(rsp, rdp);
+
+	/* Add the callback to our list. */
+	*rdp->nxttail[RCU_NEXT_TAIL] = head;
+	rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
+
+	/* Start a new grace period if one not already started. */
+	if (ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum)) {
+		unsigned long nestflag;
+		struct rcu_node *rnp_root = rcu_get_root(rsp);
+
+		spin_lock_irqsave(&rnp_root->lock, nestflag);
+		rcu_start_gp(rsp, nestflag);  /* releases rnp_root->lock. */
+	}
+
+	/* Force the grace period if too many callbacks or too long waiting. */
+	if (unlikely(++rdp->qlen > qhimark)) {
+		rdp->blimit = LONG_MAX;
+		force_quiescent_state(rsp, 0);
+	} else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 ||
+		   (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)
+		force_quiescent_state(rsp, 1);
+	local_irq_restore(flags);
+}
+
+/*
+ * Queue an RCU callback for invocation after a grace period.
+ */
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+	__call_rcu(head, func, &rcu_state);
+}
+EXPORT_SYMBOL_GPL(call_rcu);
+
+/*
+ * Queue an RCU for invocation after a quicker grace period.
+ */
+void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+	__call_rcu(head, func, &rcu_bh_state);
+}
+EXPORT_SYMBOL_GPL(call_rcu_bh);
+
+/*
+ * Check to see if there is any immediate RCU-related work to be done
+ * by the current CPU, for the specified type of RCU, returning 1 if so.
+ * The checks are in order of increasing expense: checks that can be
+ * carried out against CPU-local state are performed first.  However,
+ * we must check for CPU stalls first, else we might not get a chance.
+ */
+static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
+{
+	rdp->n_rcu_pending++;
+
+	/* Check for CPU stalls, if enabled. */
+	check_cpu_stall(rsp, rdp);
+
+	/* Is the RCU core waiting for a quiescent state from this CPU? */
+	if (rdp->qs_pending)
+		return 1;
+
+	/* Does this CPU have callbacks ready to invoke? */
+	if (cpu_has_callbacks_ready_to_invoke(rdp))
+		return 1;
+
+	/* Has RCU gone idle with this CPU needing another grace period? */
+	if (cpu_needs_another_gp(rsp, rdp))
+		return 1;
+
+	/* Has another RCU grace period completed?  */
+	if (ACCESS_ONCE(rsp->completed) != rdp->completed) /* outside of lock */
+		return 1;
+
+	/* Has a new RCU grace period started? */
+	if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) /* outside of lock */
+		return 1;
+
+	/* Has an RCU GP gone long enough to send resched IPIs &c? */
+	if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) &&
+	    ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 ||
+	     (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0))
+		return 1;
+
+	/* nothing to do */
+	return 0;
+}
+
+/*
+ * Check to see if there is any immediate RCU-related work to be done
+ * by the current CPU, returning 1 if so.  This function is part of the
+ * RCU implementation; it is -not- an exported member of the RCU API.
+ */
+int rcu_pending(int cpu)
+{
+	return __rcu_pending(&rcu_state, &per_cpu(rcu_data, cpu)) ||
+	       __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu));
+}
+
+/*
+ * Check to see if any future RCU-related work will need to be done
+ * by the current CPU, even if none need be done immediately, returning
+ * 1 if so.  This function is part of the RCU implementation; it is -not-
+ * an exported member of the RCU API.
+ */
+int rcu_needs_cpu(int cpu)
+{
+	/* RCU callbacks either ready or pending? */
+	return per_cpu(rcu_data, cpu).nxtlist ||
+	       per_cpu(rcu_bh_data, cpu).nxtlist;
+}
+
+/*
+ * Initialize a CPU's per-CPU RCU data.  We take this "scorched earth"
+ * approach so that we don't have to worry about how long the CPU has
+ * been gone, or whether it ever was online previously.  We do trust the
+ * ->mynode field, as it is constant for a given struct rcu_data and
+ * initialized during early boot.
+ *
+ * Note that only one online or offline event can be happening at a given
+ * time.  Note also that we can accept some slop in the rsp->completed
+ * access due to the fact that this CPU cannot possibly have any RCU
+ * callbacks in flight yet.
+ */
+static void
+rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
+{
+	unsigned long flags;
+	int i;
+	long lastcomp;
+	unsigned long mask;
+	struct rcu_data *rdp = rsp->rda[cpu];
+	struct rcu_node *rnp = rcu_get_root(rsp);
+
+	/* Set up local state, ensuring consistent view of global state. */
+	spin_lock_irqsave(&rnp->lock, flags);
+	lastcomp = rsp->completed;
+	rdp->completed = lastcomp;
+	rdp->gpnum = lastcomp;
+	rdp->passed_quiesc = 0;  /* We could be racing with new GP, */
+	rdp->qs_pending = 1;	 /*  so set up to respond to current GP. */
+	rdp->beenonline = 1;	 /* We have now been online. */
+	rdp->passed_quiesc_completed = lastcomp - 1;
+	rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
+	rdp->nxtlist = NULL;
+	for (i = 0; i < RCU_NEXT_SIZE; i++)
+		rdp->nxttail[i] = &rdp->nxtlist;
+	rdp->qlen = 0;
+	rdp->blimit = blimit;
+#ifdef CONFIG_NO_HZ
+	rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
+#endif /* #ifdef CONFIG_NO_HZ */
+	rdp->cpu = cpu;
+	spin_unlock(&rnp->lock);		/* irqs remain disabled. */
+
+	/*
+	 * A new grace period might start here.  If so, we won't be part
+	 * of it, but that is OK, as we are currently in a quiescent state.
+	 */
+
+	/* Exclude any attempts to start a new GP on large systems. */
+	spin_lock(&rsp->onofflock);		/* irqs already disabled. */
+
+	/* Add CPU to rcu_node bitmasks. */
+	rnp = rdp->mynode;
+	mask = rdp->grpmask;
+	do {
+		/* Exclude any attempts to start a new GP on small systems. */
+		spin_lock(&rnp->lock);	/* irqs already disabled. */
+		rnp->qsmaskinit |= mask;
+		mask = rnp->grpmask;
+		spin_unlock(&rnp->lock); /* irqs already disabled. */
+		rnp = rnp->parent;
+	} while (rnp != NULL && !(rnp->qsmaskinit & mask));
+
+	spin_unlock(&rsp->onofflock);		/* irqs remain disabled. */
+
+	/*
+	 * A new grace period might start here.  If so, we will be part of
+	 * it, and its gpnum will be greater than ours, so we will
+	 * participate.  It is also possible for the gpnum to have been
+	 * incremented before this function was called, and the bitmasks
+	 * to not be filled out until now, in which case we will also
+	 * participate due to our gpnum being behind.
+	 */
+
+	/* Since it is coming online, the CPU is in a quiescent state. */
+	cpu_quiet(cpu, rsp, rdp, lastcomp);
+	local_irq_restore(flags);
+}
+
+static void __cpuinit rcu_online_cpu(int cpu)
+{
+#ifdef CONFIG_NO_HZ
+	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+
+	rdtp->dynticks_nesting = 1;
+	rdtp->dynticks |= 1; 	/* need consecutive #s even for hotplug. */
+	rdtp->dynticks_nmi = (rdtp->dynticks_nmi + 1) & ~0x1;
+#endif /* #ifdef CONFIG_NO_HZ */
+	rcu_init_percpu_data(cpu, &rcu_state);
+	rcu_init_percpu_data(cpu, &rcu_bh_state);
+	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+}
+
+/*
+ * Handle CPU online/offline notifcation events.
+ */
+static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
+				unsigned long action, void *hcpu)
+{
+	long cpu = (long)hcpu;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		rcu_online_cpu(cpu);
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
+	case CPU_UP_CANCELED:
+	case CPU_UP_CANCELED_FROZEN:
+		rcu_offline_cpu(cpu);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+/*
+ * Compute the per-level fanout, either using the exact fanout specified
+ * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
+ */
+#ifdef CONFIG_RCU_FANOUT_EXACT
+static void __init rcu_init_levelspread(struct rcu_state *rsp)
+{
+	int i;
+
+	for (i = NUM_RCU_LVLS - 1; i >= 0; i--)
+		rsp->levelspread[i] = CONFIG_RCU_FANOUT;
+}
+#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
+static void __init rcu_init_levelspread(struct rcu_state *rsp)
+{
+	int ccur;
+	int cprv;
+	int i;
+
+	cprv = NR_CPUS;
+	for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
+		ccur = rsp->levelcnt[i];
+		rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
+		cprv = ccur;
+	}
+}
+#endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */
+
+/*
+ * Helper function for rcu_init() that initializes one rcu_state structure.
+ */
+static void __init rcu_init_one(struct rcu_state *rsp)
+{
+	int cpustride = 1;
+	int i;
+	int j;
+	struct rcu_node *rnp;
+
+	/* Initialize the level-tracking arrays. */
+
+	for (i = 1; i < NUM_RCU_LVLS; i++)
+		rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
+	rcu_init_levelspread(rsp);
+
+	/* Initialize the elements themselves, starting from the leaves. */
+
+	for (i = NUM_RCU_LVLS - 1; i >= 0; i--) {
+		cpustride *= rsp->levelspread[i];
+		rnp = rsp->level[i];
+		for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
+			spin_lock_init(&rnp->lock);
+			rnp->qsmask = 0;
+			rnp->qsmaskinit = 0;
+			rnp->grplo = j * cpustride;
+			rnp->grphi = (j + 1) * cpustride - 1;
+			if (rnp->grphi >= NR_CPUS)
+				rnp->grphi = NR_CPUS - 1;
+			if (i == 0) {
+				rnp->grpnum = 0;
+				rnp->grpmask = 0;
+				rnp->parent = NULL;
+			} else {
+				rnp->grpnum = j % rsp->levelspread[i - 1];
+				rnp->grpmask = 1UL << rnp->grpnum;
+				rnp->parent = rsp->level[i - 1] +
+					      j / rsp->levelspread[i - 1];
+			}
+			rnp->level = i;
+		}
+	}
+}
+
+/*
+ * Helper macro for __rcu_init().  To be used nowhere else!
+ * Assigns leaf node pointers into each CPU's rcu_data structure.
+ */
+#define RCU_DATA_PTR_INIT(rsp, rcu_data) \
+do { \
+	rnp = (rsp)->level[NUM_RCU_LVLS - 1]; \
+	j = 0; \
+	for_each_possible_cpu(i) { \
+		if (i > rnp[j].grphi) \
+			j++; \
+		per_cpu(rcu_data, i).mynode = &rnp[j]; \
+		(rsp)->rda[i] = &per_cpu(rcu_data, i); \
+	} \
+} while (0)
+
+static struct notifier_block __cpuinitdata rcu_nb = {
+	.notifier_call	= rcu_cpu_notify,
+};
+
+void __init __rcu_init(void)
+{
+	int i;			/* All used by RCU_DATA_PTR_INIT(). */
+	int j;
+	struct rcu_node *rnp;
+
+	printk(KERN_WARNING "Experimental hierarchical RCU implementation.\n");
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+	rcu_init_one(&rcu_state);
+	RCU_DATA_PTR_INIT(&rcu_state, rcu_data);
+	rcu_init_one(&rcu_bh_state);
+	RCU_DATA_PTR_INIT(&rcu_bh_state, rcu_bh_data);
+
+	for_each_online_cpu(i)
+		rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i);
+	/* Register notifier for non-boot CPUs */
+	register_cpu_notifier(&rcu_nb);
+	printk(KERN_WARNING "Experimental hierarchical RCU init done.\n");
+}
+
+module_param(blimit, int, 0);
+module_param(qhimark, int, 0);
+module_param(qlowmark, int, 0);
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
new file mode 100644
index 000000000000..d6db3e837826
--- /dev/null
+++ b/kernel/rcutree_trace.c
@@ -0,0 +1,271 @@
+/*
+ * Read-Copy Update tracing for classic implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ *
+ * Papers:  http://www.rdrop.com/users/paulmck/RCU
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		Documentation/RCU
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/rcupdate.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <asm/atomic.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/mutex.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
+{
+	if (!rdp->beenonline)
+		return;
+	seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d rpfq=%ld rp=%x",
+		   rdp->cpu,
+		   cpu_is_offline(rdp->cpu) ? '!' : ' ',
+		   rdp->completed, rdp->gpnum,
+		   rdp->passed_quiesc, rdp->passed_quiesc_completed,
+		   rdp->qs_pending,
+		   rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending,
+		   (int)(rdp->n_rcu_pending & 0xffff));
+#ifdef CONFIG_NO_HZ
+	seq_printf(m, " dt=%d/%d dn=%d df=%lu",
+		   rdp->dynticks->dynticks,
+		   rdp->dynticks->dynticks_nesting,
+		   rdp->dynticks->dynticks_nmi,
+		   rdp->dynticks_fqs);
+#endif /* #ifdef CONFIG_NO_HZ */
+	seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
+	seq_printf(m, " ql=%ld b=%ld\n", rdp->qlen, rdp->blimit);
+}
+
+#define PRINT_RCU_DATA(name, func, m) \
+	do { \
+		int _p_r_d_i; \
+		\
+		for_each_possible_cpu(_p_r_d_i) \
+			func(m, &per_cpu(name, _p_r_d_i)); \
+	} while (0)
+
+static int show_rcudata(struct seq_file *m, void *unused)
+{
+	seq_puts(m, "rcu:\n");
+	PRINT_RCU_DATA(rcu_data, print_one_rcu_data, m);
+	seq_puts(m, "rcu_bh:\n");
+	PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data, m);
+	return 0;
+}
+
+static int rcudata_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_rcudata, NULL);
+}
+
+static struct file_operations rcudata_fops = {
+	.owner = THIS_MODULE,
+	.open = rcudata_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
+{
+	if (!rdp->beenonline)
+		return;
+	seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d,%ld,%ld",
+		   rdp->cpu,
+		   cpu_is_offline(rdp->cpu) ? "\"Y\"" : "\"N\"",
+		   rdp->completed, rdp->gpnum,
+		   rdp->passed_quiesc, rdp->passed_quiesc_completed,
+		   rdp->qs_pending,
+		   rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending,
+		   rdp->n_rcu_pending);
+#ifdef CONFIG_NO_HZ
+	seq_printf(m, ",%d,%d,%d,%lu",
+		   rdp->dynticks->dynticks,
+		   rdp->dynticks->dynticks_nesting,
+		   rdp->dynticks->dynticks_nmi,
+		   rdp->dynticks_fqs);
+#endif /* #ifdef CONFIG_NO_HZ */
+	seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
+	seq_printf(m, ",%ld,%ld\n", rdp->qlen, rdp->blimit);
+}
+
+static int show_rcudata_csv(struct seq_file *m, void *unused)
+{
+	seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",\"rpfq\",\"rp\",");
+#ifdef CONFIG_NO_HZ
+	seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\",");
+#endif /* #ifdef CONFIG_NO_HZ */
+	seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n");
+	seq_puts(m, "\"rcu:\"\n");
+	PRINT_RCU_DATA(rcu_data, print_one_rcu_data_csv, m);
+	seq_puts(m, "\"rcu_bh:\"\n");
+	PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data_csv, m);
+	return 0;
+}
+
+static int rcudata_csv_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_rcudata_csv, NULL);
+}
+
+static struct file_operations rcudata_csv_fops = {
+	.owner = THIS_MODULE,
+	.open = rcudata_csv_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
+{
+	int level = 0;
+	struct rcu_node *rnp;
+
+	seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x "
+	              "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n",
+		   rsp->completed, rsp->gpnum, rsp->signaled,
+		   (long)(rsp->jiffies_force_qs - jiffies),
+		   (int)(jiffies & 0xffff),
+		   rsp->n_force_qs, rsp->n_force_qs_ngp,
+		   rsp->n_force_qs - rsp->n_force_qs_ngp,
+		   rsp->n_force_qs_lh);
+	for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) {
+		if (rnp->level != level) {
+			seq_puts(m, "\n");
+			level = rnp->level;
+		}
+		seq_printf(m, "%lx/%lx %d:%d ^%d    ",
+			   rnp->qsmask, rnp->qsmaskinit,
+			   rnp->grplo, rnp->grphi, rnp->grpnum);
+	}
+	seq_puts(m, "\n");
+}
+
+static int show_rcuhier(struct seq_file *m, void *unused)
+{
+	seq_puts(m, "rcu:\n");
+	print_one_rcu_state(m, &rcu_state);
+	seq_puts(m, "rcu_bh:\n");
+	print_one_rcu_state(m, &rcu_bh_state);
+	return 0;
+}
+
+static int rcuhier_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_rcuhier, NULL);
+}
+
+static struct file_operations rcuhier_fops = {
+	.owner = THIS_MODULE,
+	.open = rcuhier_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int show_rcugp(struct seq_file *m, void *unused)
+{
+	seq_printf(m, "rcu: completed=%ld  gpnum=%ld\n",
+		   rcu_state.completed, rcu_state.gpnum);
+	seq_printf(m, "rcu_bh: completed=%ld  gpnum=%ld\n",
+		   rcu_bh_state.completed, rcu_bh_state.gpnum);
+	return 0;
+}
+
+static int rcugp_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_rcugp, NULL);
+}
+
+static struct file_operations rcugp_fops = {
+	.owner = THIS_MODULE,
+	.open = rcugp_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static struct dentry *rcudir, *datadir, *datadir_csv, *hierdir, *gpdir;
+static int __init rcuclassic_trace_init(void)
+{
+	rcudir = debugfs_create_dir("rcu", NULL);
+	if (!rcudir)
+		goto out;
+
+	datadir = debugfs_create_file("rcudata", 0444, rcudir,
+						NULL, &rcudata_fops);
+	if (!datadir)
+		goto free_out;
+
+	datadir_csv = debugfs_create_file("rcudata.csv", 0444, rcudir,
+						NULL, &rcudata_csv_fops);
+	if (!datadir_csv)
+		goto free_out;
+
+	gpdir = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
+	if (!gpdir)
+		goto free_out;
+
+	hierdir = debugfs_create_file("rcuhier", 0444, rcudir,
+						NULL, &rcuhier_fops);
+	if (!hierdir)
+		goto free_out;
+	return 0;
+free_out:
+	if (datadir)
+		debugfs_remove(datadir);
+	if (datadir_csv)
+		debugfs_remove(datadir_csv);
+	if (gpdir)
+		debugfs_remove(gpdir);
+	debugfs_remove(rcudir);
+out:
+	return 1;
+}
+
+static void __exit rcuclassic_trace_cleanup(void)
+{
+	debugfs_remove(datadir);
+	debugfs_remove(datadir_csv);
+	debugfs_remove(gpdir);
+	debugfs_remove(hierdir);
+	debugfs_remove(rcudir);
+}
+
+
+module_init(rcuclassic_trace_init);
+module_exit(rcuclassic_trace_cleanup);
+
+MODULE_AUTHOR("Paul E. McKenney");
+MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation");
+MODULE_LICENSE("GPL");
diff --git a/kernel/softirq.c b/kernel/softirq.c
index e7c69a720d69..80d323e6f61a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -269,6 +269,7 @@ void irq_enter(void)
 {
 	int cpu = smp_processor_id();
 
+	rcu_irq_enter();
 	if (idle_cpu(cpu) && !in_interrupt()) {
 		__irq_enter();
 		tick_check_idle(cpu);
@@ -295,9 +296,9 @@ void irq_exit(void)
 
 #ifdef CONFIG_NO_HZ
 	/* Make sure that timer wheel updates are propagated */
-	if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
-		tick_nohz_stop_sched_tick(0);
 	rcu_irq_exit();
+	if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
+		tick_nohz_stop_sched_tick(0);
 #endif
 	preempt_enable_no_resched();
 }
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b0f239e443bc..465d822f3f5d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -619,6 +619,19 @@ config RCU_CPU_STALL_DETECTOR
 
 	  Say N if you are unsure.
 
+config RCU_CPU_STALL_DETECTOR
+	bool "Check for stalled CPUs delaying RCU grace periods"
+	depends on CLASSIC_RCU || TREE_RCU
+	default n
+	help
+	  This option causes RCU to printk information on which
+	  CPUs are delaying the current grace period, but only when
+	  the grace period extends for excessive time periods.
+
+	  Say Y if you want RCU to perform such checks.
+
+	  Say N if you are unsure.
+
 config KPROBES_SANITY_TEST
 	bool "Kprobes sanity tests"
 	depends on DEBUG_KERNEL
-- 
cgit v1.2.3


From 078a55db075bf4dcc03115dc94875b3dd83f69d4 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 18 Dec 2008 16:57:52 -0800
Subject: sparseirq: add kernel-doc notation for new member in irq_desc, -v2

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 36a015746788..7fa7f30fccb6 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -134,6 +134,9 @@ struct irq_2_iommu;
 /**
  * struct irq_desc - interrupt descriptor
  * @irq:		interrupt number for this descriptor
+ * @timer_rand_state:	pointer to timer rand state struct
+ * @kstat_irqs:		irq stats per cpu
+ * @irq_2_iommu:	iommu with this irq
  * @handle_irq:		highlevel irq-events handler [if NULL, __do_IRQ()]
  * @chip:		low level interrupt hardware access
  * @msi_desc:		MSI descriptor
-- 
cgit v1.2.3


From 7b4967c532045a1983d6d4af5c69cc7c5109f62b Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Fri, 19 Dec 2008 16:56:37 +1030
Subject: cpumask: Add alloc_cpumask_var_node()

Impact: New API

This will be needed in x86 code to allocate the domain and old_domain
cpumasks on the same node as where the containing irq_cfg struct is
allocated.

(Also fixes double-dump_stack on rare CONFIG_DEBUG_PER_CPU_MAPS case)

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (re-impl alloc_cpumask_var)
---
 include/linux/cpumask.h |  7 +++++++
 lib/cpumask.c           | 11 ++++++++---
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d4bf52603e6b..b5ad19a6f43f 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -1025,6 +1025,7 @@ static inline size_t cpumask_size(void)
 #ifdef CONFIG_CPUMASK_OFFSTACK
 typedef struct cpumask *cpumask_var_t;
 
+bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
 bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags);
 void alloc_bootmem_cpumask_var(cpumask_var_t *mask);
 void free_cpumask_var(cpumask_var_t mask);
@@ -1038,6 +1039,12 @@ static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
 	return true;
 }
 
+static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
+					  int node)
+{
+	return true;
+}
+
 static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask)
 {
 }
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 8d03f22c6ced..3f258f58c85b 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -76,15 +76,14 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
 
 /* These are not inline because of header tangles. */
 #ifdef CONFIG_CPUMASK_OFFSTACK
-bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
 {
 	if (likely(slab_is_available()))
-		*mask = kmalloc(cpumask_size(), flags);
+		*mask = kmalloc_node(cpumask_size(), flags, node);
 	else {
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 		printk(KERN_ERR
 			"=> alloc_cpumask_var: kmalloc not available!\n");
-		dump_stack();
 #endif
 		*mask = NULL;
 	}
@@ -96,6 +95,12 @@ bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
 #endif
 	return *mask != NULL;
 }
+EXPORT_SYMBOL(alloc_cpumask_var_node);
+
+bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+{
+	return alloc_cpumask_var_node(mask, flags, numa_node_id());
+}
 EXPORT_SYMBOL(alloc_cpumask_var);
 
 void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask)
-- 
cgit v1.2.3


From e057d7aea9d8f2a46cd440d8bfb72245d4e72d79 Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Mon, 15 Dec 2008 20:26:48 -0800
Subject: cpumask: add sysfs displays for configured and disabled cpu maps

Impact: add new sysfs files.

Add sysfs files "kernel_max" and "offline" to display the max CPU index
allowed (NR_CPUS-1), and the map of cpus that are offline.

Cpus can be offlined via HOTPLUG, disabled by the BIOS ACPI tables, or
if they exceed the number of cpus allowed by the NR_CPUS config option,
or the "maxcpus=NUM" kernel start parameter.

The "possible_cpus=NUM" parameter can also extend the number of possible
cpus allowed, in which case the cpus not present at startup will be
in the offline state.  (These cpus can be HOTPLUGGED ON after system
startup [pending a follow-on patch to provide the capability via the
/sys/devices/sys/cpu/cpuN/online mechanism to bring them online.])

By design, the "offlined cpus > possible cpus" display will always
use the following formats:

  * all possible cpus online:   "x$"    or "x-y$"
  * some possible cpus offline: ".*,x$" or ".*,x-y$"

where:
  x == number of possible cpus (nr_cpu_ids); and
  y == number of cpus >= NR_CPUS or maxcpus (if y > x).

One use of this feature is for distros to select (or configure) the
appropriate kernel to install for the resident system.

Notes:
  * cpus offlined <= possible cpus will be printed for all architectures.
  * cpus offlined >  possible cpus will only be printed for arches that
  	set 'total_cpus' [X86 only in this patch].

Based on tip/cpus4096 + .../rusty/linux-2.6-for-ingo.git/master +
	 x86-only-patches sent 12/15.

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/base/cpu.c  | 44 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/smp.h |  3 +++
 2 files changed, 47 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 4259072f5bd0..2aef96f20b30 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -128,10 +128,54 @@ print_cpus_func(online);
 print_cpus_func(possible);
 print_cpus_func(present);
 
+/*
+ * Print values for NR_CPUS and offlined cpus
+ */
+static ssize_t print_cpus_kernel_max(struct sysdev_class *class, char *buf)
+{
+	int n = snprintf(buf, PAGE_SIZE-2, "%d\n", CONFIG_NR_CPUS - 1);
+	return n;
+}
+static SYSDEV_CLASS_ATTR(kernel_max, 0444, print_cpus_kernel_max, NULL);
+
+/* arch-optional setting to enable display of offline cpus >= nr_cpu_ids */
+unsigned int total_cpus;
+
+static ssize_t print_cpus_offline(struct sysdev_class *class, char *buf)
+{
+	int n = 0, len = PAGE_SIZE-2;
+	cpumask_var_t offline;
+
+	/* display offline cpus < nr_cpu_ids */
+	if (!alloc_cpumask_var(&offline, GFP_KERNEL))
+		return -ENOMEM;
+	cpumask_complement(offline, cpu_online_mask);
+	n = cpulist_scnprintf(buf, len, offline);
+	free_cpumask_var(offline);
+
+	/* display offline cpus >= nr_cpu_ids */
+	if (total_cpus && nr_cpu_ids < total_cpus) {
+		if (n && n < len)
+			buf[n++] = ',';
+
+		if (nr_cpu_ids == total_cpus-1)
+			n += snprintf(&buf[n], len - n, "%d", nr_cpu_ids);
+		else
+			n += snprintf(&buf[n], len - n, "%d-%d",
+						      nr_cpu_ids, total_cpus-1);
+	}
+
+	n += snprintf(&buf[n], len - n, "\n");
+	return n;
+}
+static SYSDEV_CLASS_ATTR(offline, 0444, print_cpus_offline, NULL);
+
 static struct sysdev_class_attribute *cpu_state_attr[] = {
 	&attr_online_map,
 	&attr_possible_map,
 	&attr_present_map,
+	&attr_kernel_max,
+	&attr_offline,
 };
 
 static int cpu_states_init(void)
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 3f9a60043a97..0d5770c2e43a 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -21,6 +21,9 @@ struct call_single_data {
 	u16 priv;
 };
 
+/* total number of cpus in this system (may exceed NR_CPUS) */
+extern unsigned int total_cpus;
+
 #ifdef CONFIG_SMP
 
 #include <linux/preempt.h>
-- 
cgit v1.2.3


From 716707b29906e1d8d190defe3d646610b097a861 Mon Sep 17 00:00:00 2001
From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Date: Thu, 18 Dec 2008 23:26:02 +0530
Subject: sched: convert BALANCE_FOR_xx_POWER to inline functions

Impact: cleanup

BALANCE_FOR_MC_POWER and similar macros defined in sched.h are
not constants and have various condition checks and significant
amount of code that is not suitable to be contain in a macro.
Also there could be side effects on the expressions passed to
some of them like test_sd_parent().

This patch converts all complex macros related to power savings
balance to inline functions.

Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h    | 33 ++++++++++++++++++++++++---------
 include/linux/topology.h |  4 ++--
 2 files changed, 26 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4240f6bfa812..1210fb0e45ff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -763,15 +763,23 @@ enum cpu_idle_type {
 #define SD_SERIALIZE		1024	/* Only a single load balancing instance */
 #define SD_WAKE_IDLE_FAR	2048	/* Gain latency sacrificing cache hit */
 
-#define BALANCE_FOR_MC_POWER	\
-	(sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0)
+extern int sched_mc_power_savings, sched_smt_power_savings;
+
+static inline int sd_balance_for_mc_power(void)
+{
+	if (sched_smt_power_savings)
+		return SD_POWERSAVINGS_BALANCE;
 
-#define BALANCE_FOR_PKG_POWER	\
-	((sched_mc_power_savings || sched_smt_power_savings) ?	\
-	 SD_POWERSAVINGS_BALANCE : 0)
+	return 0;
+}
 
-#define test_sd_parent(sd, flag)	((sd->parent &&		\
-					 (sd->parent->flags & flag)) ? 1 : 0)
+static inline int sd_balance_for_package_power(void)
+{
+	if (sched_mc_power_savings | sched_smt_power_savings)
+		return SD_POWERSAVINGS_BALANCE;
+
+	return 0;
+}
 
 
 struct sched_group {
@@ -1399,6 +1407,15 @@ struct task_struct {
 #endif
 };
 
+/* Test a flag in parent sched domain */
+static inline int test_sd_parent(struct sched_domain *sd, int flag)
+{
+	if (sd->parent && (sd->parent->flags & flag))
+		return 1;
+
+	return 0;
+}
+
 /*
  * Priority of a process goes from 0..MAX_PRIO-1, valid RT
  * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
@@ -2256,8 +2273,6 @@ __trace_special(void *__tr, void *__data,
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
-extern int sched_mc_power_savings, sched_smt_power_savings;
-
 extern void normalize_rt_tasks(void);
 
 #ifdef CONFIG_GROUP_SCHED
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 0c5b5ac36d8e..0ce7c0dac06c 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -125,7 +125,7 @@ int arch_update_cpu_topology(void);
 				| SD_WAKE_AFFINE	\
 				| SD_WAKE_BALANCE	\
 				| SD_SHARE_PKG_RESOURCES\
-				| BALANCE_FOR_MC_POWER,	\
+				| sd_balance_for_mc_power(),\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 }
@@ -150,7 +150,7 @@ int arch_update_cpu_topology(void);
 				| SD_BALANCE_FORK	\
 				| SD_WAKE_AFFINE	\
 				| SD_WAKE_BALANCE	\
-				| BALANCE_FOR_PKG_POWER,\
+				| sd_balance_for_package_power(),\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 }
-- 
cgit v1.2.3


From afb8a9b70b86866a60e08b2956ae4e1406390336 Mon Sep 17 00:00:00 2001
From: Gautham R Shenoy <ego@in.ibm.com>
Date: Thu, 18 Dec 2008 23:26:09 +0530
Subject: sched: framework for sched_mc/smt_power_savings=N

Impact: extend range of /sys/devices/system/cpu/sched_mc_power_savings

Currently the sched_mc/smt_power_savings variable is a boolean,
which either enables or disables topology based power savings.
This patch extends the behaviour of the variable from boolean to
multivalued, such that based on the value, we decide how
aggressively do we want to perform powersavings balance at
appropriate sched domain based on topology.

Variable levels of power saving tunable would benefit end user to
match the required level of power savings vs performance
trade-off depending on the system configuration and workloads.

This version makes the sched_mc_power_savings global variable to
take more values (0,1,2).  Later versions can have a single
tunable called sched_power_savings instead of
sched_{mc,smt}_power_savings.

Signed-off-by: Gautham R Shenoy <ego@in.ibm.com>
Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 11 +++++++++++
 kernel/sched.c        | 17 ++++++++++++++---
 2 files changed, 25 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1210fb0e45ff..a96726658eca 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -763,6 +763,17 @@ enum cpu_idle_type {
 #define SD_SERIALIZE		1024	/* Only a single load balancing instance */
 #define SD_WAKE_IDLE_FAR	2048	/* Gain latency sacrificing cache hit */
 
+enum powersavings_balance_level {
+	POWERSAVINGS_BALANCE_NONE = 0,  /* No power saving load balance */
+	POWERSAVINGS_BALANCE_BASIC,	/* Fill one thread/core/package
+					 * first for long running threads
+					 */
+	POWERSAVINGS_BALANCE_WAKEUP,	/* Also bias task wakeups to semi-idle
+					 * cpu package for power savings
+					 */
+	MAX_POWERSAVINGS_BALANCE_LEVELS
+};
+
 extern int sched_mc_power_savings, sched_smt_power_savings;
 
 static inline int sd_balance_for_mc_power(void)
diff --git a/kernel/sched.c b/kernel/sched.c
index b309027bf9e8..56b285cd5350 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7906,14 +7906,25 @@ int arch_reinit_sched_domains(void)
 static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
 {
 	int ret;
+	unsigned int level = 0;
 
-	if (buf[0] != '0' && buf[0] != '1')
+	if (sscanf(buf, "%u", &level) != 1)
+		return -EINVAL;
+
+	/*
+	 * level is always be positive so don't check for
+	 * level < POWERSAVINGS_BALANCE_NONE which is 0
+	 * What happens on 0 or 1 byte write,
+	 * need to check for count as well?
+	 */
+
+	if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS)
 		return -EINVAL;
 
 	if (smt)
-		sched_smt_power_savings = (buf[0] == '1');
+		sched_smt_power_savings = level;
 	else
-		sched_mc_power_savings = (buf[0] == '1');
+		sched_mc_power_savings = level;
 
 	ret = arch_reinit_sched_domains();
 
-- 
cgit v1.2.3


From 100fdaee70ebf5f31b9451fbc01300c627091328 Mon Sep 17 00:00:00 2001
From: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Date: Thu, 18 Dec 2008 23:26:47 +0530
Subject: sched: add SD_BALANCE_NEWIDLE at MC and CPU level for sched_mc>0

Impact: change task balancing to save power more agressively

Add SD_BALANCE_NEWIDLE flag at MC level and CPU level
if sched_mc is set.  This helps power savings and
will not affect performance when sched_mc=0

Ingo and Mike Galbraith have optimised the SD flags by
removing SD_BALANCE_NEWIDLE at MC and CPU level.  This
helps performance but hurts power savings since this
slows down task consolidation by reducing the number
of times load_balance is run.

    sched: fine-tune SD_MC_INIT
        commit 14800984706bf6936bbec5187f736e928be5c218
        Author: Mike Galbraith <efault@gmx.de>
        Date:   Fri Nov 7 15:26:50 2008 +0100

    sched: re-tune balancing -- revert
        commit 9fcd18c9e63e325dbd2b4c726623f760788d5aa8
        Author: Ingo Molnar <mingo@elte.hu>
        Date:   Wed Nov 5 16:52:08 2008 +0100

This patch selectively enables SD_BALANCE_NEWIDLE flag
only when sched_mc is set to 1 or 2.  This helps power savings
by task consolidation and also does not hurt performance at
sched_mc=0 where all power saving optimisations are turned off.

Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h    | 13 +++++++++++++
 include/linux/topology.h |  6 ++++--
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a96726658eca..5a933d925473 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -792,6 +792,19 @@ static inline int sd_balance_for_package_power(void)
 	return 0;
 }
 
+/*
+ * Optimise SD flags for power savings:
+ * SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings.
+ * Keep default SD flags if sched_{smt,mc}_power_saving=0
+ */
+
+static inline int sd_power_saving_flags(void)
+{
+	if (sched_mc_power_savings | sched_smt_power_savings)
+		return SD_BALANCE_NEWIDLE;
+
+	return 0;
+}
 
 struct sched_group {
 	struct sched_group *next;	/* Must be a circular list */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 0ce7c0dac06c..e632d29f0544 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -125,7 +125,8 @@ int arch_update_cpu_topology(void);
 				| SD_WAKE_AFFINE	\
 				| SD_WAKE_BALANCE	\
 				| SD_SHARE_PKG_RESOURCES\
-				| sd_balance_for_mc_power(),\
+				| sd_balance_for_mc_power()\
+				| sd_power_saving_flags(),\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 }
@@ -150,7 +151,8 @@ int arch_update_cpu_topology(void);
 				| SD_BALANCE_FORK	\
 				| SD_WAKE_AFFINE	\
 				| SD_WAKE_BALANCE	\
-				| sd_balance_for_package_power(),\
+				| sd_balance_for_package_power()\
+				| sd_power_saving_flags(),\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 }
-- 
cgit v1.2.3


From 06aaf76a7e2e4cc57eabcb8f43ec99c961fe55fe Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 18 Dec 2008 21:30:23 +0100
Subject: sched: move test_sd_parent() to an SMP section of sched.h

Impact: build fix

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5a933d925473..e5f928a079e8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -920,6 +920,15 @@ extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 				    struct sched_domain_attr *dattr_new);
 extern int arch_reinit_sched_domains(void);
 
+/* Test a flag in parent sched domain */
+static inline int test_sd_parent(struct sched_domain *sd, int flag)
+{
+	if (sd->parent && (sd->parent->flags & flag))
+		return 1;
+
+	return 0;
+}
+
 #else /* CONFIG_SMP */
 
 struct sched_domain_attr;
@@ -1431,15 +1440,6 @@ struct task_struct {
 #endif
 };
 
-/* Test a flag in parent sched domain */
-static inline int test_sd_parent(struct sched_domain *sd, int flag)
-{
-	if (sd->parent && (sd->parent->flags & flag))
-		return 1;
-
-	return 0;
-}
-
 /*
  * Priority of a process goes from 0..MAX_PRIO-1, valid RT
  * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
-- 
cgit v1.2.3


From 50b6f1f4a430608f7345f66ecd68a129bff11649 Mon Sep 17 00:00:00 2001
From: Kwangwoo Lee <kwangwoo.lee@gmail.com>
Date: Sat, 20 Dec 2008 04:26:01 -0500
Subject: Input: add tsc2007 based touchscreen driver

This drive has been tested on ARM9 based SoC - MV86XX.

Signed-off-by: Kwangwoo Lee <kwangwoo.lee@gmail.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/Kconfig   |  11 ++
 drivers/input/touchscreen/Makefile  |   1 +
 drivers/input/touchscreen/tsc2007.c | 381 ++++++++++++++++++++++++++++++++++++
 include/linux/i2c/tsc2007.h         |  17 ++
 4 files changed, 410 insertions(+)
 create mode 100644 drivers/input/touchscreen/tsc2007.c
 create mode 100644 include/linux/i2c/tsc2007.h

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 20eb52ed176d..83747dc9ba05 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -389,4 +389,15 @@ config TOUCHSCREEN_TOUCHIT213
 	  To compile this driver as a module, choose M here: the
 	  module will be called touchit213.
 
+config TOUCHSCREEN_TSC2007
+	tristate "TSC2007 based touchscreens"
+	depends on I2C
+	help
+	  Say Y here if you have a TSC2007 based touchscreen.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called tsc2007.
+
 endif
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 3dc84d3846c1..127f87cc2e2e 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_TOUCHSCREEN_PENMOUNT)	+= penmount.o
 obj-$(CONFIG_TOUCHSCREEN_TOUCHIT213)	+= touchit213.o
 obj-$(CONFIG_TOUCHSCREEN_TOUCHRIGHT)	+= touchright.o
 obj-$(CONFIG_TOUCHSCREEN_TOUCHWIN)	+= touchwin.o
+obj-$(CONFIG_TOUCHSCREEN_TSC2007)	+= tsc2007.o
 obj-$(CONFIG_TOUCHSCREEN_UCB1400)	+= ucb1400_ts.o
 obj-$(CONFIG_TOUCHSCREEN_WACOM_W8001)	+= wacom_w8001.o
 obj-$(CONFIG_TOUCHSCREEN_WM97XX)	+= wm97xx-ts.o
diff --git a/drivers/input/touchscreen/tsc2007.c b/drivers/input/touchscreen/tsc2007.c
new file mode 100644
index 000000000000..b75dc2990574
--- /dev/null
+++ b/drivers/input/touchscreen/tsc2007.c
@@ -0,0 +1,381 @@
+/*
+ * drivers/input/touchscreen/tsc2007.c
+ *
+ * Copyright (c) 2008 MtekVision Co., Ltd.
+ *	Kwangwoo Lee <kwlee@mtekvision.com>
+ *
+ * Using code from:
+ *  - ads7846.c
+ *	Copyright (c) 2005 David Brownell
+ *	Copyright (c) 2006 Nokia Corporation
+ *  - corgi_ts.c
+ *	Copyright (C) 2004-2005 Richard Purdie
+ *  - omap_ts.[hc], ads7846.h, ts_osk.c
+ *	Copyright (C) 2002 MontaVista Software
+ *	Copyright (C) 2004 Texas Instruments
+ *	Copyright (C) 2005 Dirk Behme
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/hrtimer.h>
+#include <linux/slab.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/i2c.h>
+#include <linux/i2c/tsc2007.h>
+
+#define TS_POLL_DELAY	(10 * 1000)	/* ns delay before the first sample */
+#define TS_POLL_PERIOD	(5 * 1000)	/* ns delay between samples */
+
+#define TSC2007_MEASURE_TEMP0		(0x0 << 4)
+#define TSC2007_MEASURE_AUX		(0x2 << 4)
+#define TSC2007_MEASURE_TEMP1		(0x4 << 4)
+#define TSC2007_ACTIVATE_XN		(0x8 << 4)
+#define TSC2007_ACTIVATE_YN		(0x9 << 4)
+#define TSC2007_ACTIVATE_YP_XN		(0xa << 4)
+#define TSC2007_SETUP			(0xb << 4)
+#define TSC2007_MEASURE_X		(0xc << 4)
+#define TSC2007_MEASURE_Y		(0xd << 4)
+#define TSC2007_MEASURE_Z1		(0xe << 4)
+#define TSC2007_MEASURE_Z2		(0xf << 4)
+
+#define TSC2007_POWER_OFF_IRQ_EN	(0x0 << 2)
+#define TSC2007_ADC_ON_IRQ_DIS0		(0x1 << 2)
+#define TSC2007_ADC_OFF_IRQ_EN		(0x2 << 2)
+#define TSC2007_ADC_ON_IRQ_DIS1		(0x3 << 2)
+
+#define TSC2007_12BIT			(0x0 << 1)
+#define TSC2007_8BIT			(0x1 << 1)
+
+#define	MAX_12BIT			((1 << 12) - 1)
+
+#define ADC_ON_12BIT	(TSC2007_12BIT | TSC2007_ADC_ON_IRQ_DIS0)
+
+#define READ_Y		(ADC_ON_12BIT | TSC2007_MEASURE_Y)
+#define READ_Z1		(ADC_ON_12BIT | TSC2007_MEASURE_Z1)
+#define READ_Z2		(ADC_ON_12BIT | TSC2007_MEASURE_Z2)
+#define READ_X		(ADC_ON_12BIT | TSC2007_MEASURE_X)
+#define PWRDOWN		(TSC2007_12BIT | TSC2007_POWER_OFF_IRQ_EN)
+
+struct ts_event {
+	u16	x;
+	u16	y;
+	u16	z1, z2;
+};
+
+struct tsc2007 {
+	struct input_dev	*input;
+	char			phys[32];
+	struct hrtimer		timer;
+	struct ts_event		tc;
+
+	struct i2c_client	*client;
+
+	spinlock_t		lock;
+
+	u16			model;
+	u16			x_plate_ohms;
+
+	unsigned		pendown;
+	int			irq;
+
+	int			(*get_pendown_state)(void);
+	void			(*clear_penirq)(void);
+};
+
+static inline int tsc2007_xfer(struct tsc2007 *tsc, u8 cmd)
+{
+	s32 data;
+	u16 val;
+
+	data = i2c_smbus_read_word_data(tsc->client, cmd);
+	if (data < 0) {
+		dev_err(&tsc->client->dev, "i2c io error: %d\n", data);
+		return data;
+	}
+
+	/* The protocol and raw data format from i2c interface:
+	 * S Addr Wr [A] Comm [A] S Addr Rd [A] [DataLow] A [DataHigh] NA P
+	 * Where DataLow has [D11-D4], DataHigh has [D3-D0 << 4 | Dummy 4bit].
+	 */
+	val = swab16(data) >> 4;
+
+	dev_dbg(&tsc->client->dev, "data: 0x%x, val: 0x%x\n", data, val);
+
+	return val;
+}
+
+static void tsc2007_send_event(void *tsc)
+{
+	struct tsc2007	*ts = tsc;
+	u32		rt;
+	u16		x, y, z1, z2;
+
+	x = ts->tc.x;
+	y = ts->tc.y;
+	z1 = ts->tc.z1;
+	z2 = ts->tc.z2;
+
+	/* range filtering */
+	if (x == MAX_12BIT)
+		x = 0;
+
+	if (likely(x && z1)) {
+		/* compute touch pressure resistance using equation #1 */
+		rt = z2;
+		rt -= z1;
+		rt *= x;
+		rt *= ts->x_plate_ohms;
+		rt /= z1;
+		rt = (rt + 2047) >> 12;
+	} else
+		rt = 0;
+
+	/* Sample found inconsistent by debouncing or pressure is beyond
+	 * the maximum. Don't report it to user space, repeat at least
+	 * once more the measurement
+	 */
+	if (rt > MAX_12BIT) {
+		dev_dbg(&ts->client->dev, "ignored pressure %d\n", rt);
+
+		hrtimer_start(&ts->timer, ktime_set(0, TS_POLL_PERIOD),
+			      HRTIMER_MODE_REL);
+		return;
+	}
+
+	/* NOTE: We can't rely on the pressure to determine the pen down
+	 * state, even this controller has a pressure sensor.  The pressure
+	 * value can fluctuate for quite a while after lifting the pen and
+	 * in some cases may not even settle at the expected value.
+	 *
+	 * The only safe way to check for the pen up condition is in the
+	 * timer by reading the pen signal state (it's a GPIO _and_ IRQ).
+	 */
+	if (rt) {
+		struct input_dev *input = ts->input;
+
+		if (!ts->pendown) {
+			dev_dbg(&ts->client->dev, "DOWN\n");
+
+			input_report_key(input, BTN_TOUCH, 1);
+			ts->pendown = 1;
+		}
+
+		input_report_abs(input, ABS_X, x);
+		input_report_abs(input, ABS_Y, y);
+		input_report_abs(input, ABS_PRESSURE, rt);
+
+		input_sync(input);
+
+		dev_dbg(&ts->client->dev, "point(%4d,%4d), pressure (%4u)\n",
+			x, y, rt);
+	}
+
+	hrtimer_start(&ts->timer, ktime_set(0, TS_POLL_PERIOD),
+			HRTIMER_MODE_REL);
+}
+
+static int tsc2007_read_values(struct tsc2007 *tsc)
+{
+	/* y- still on; turn on only y+ (and ADC) */
+	tsc->tc.y = tsc2007_xfer(tsc, READ_Y);
+
+	/* turn y- off, x+ on, then leave in lowpower */
+	tsc->tc.x = tsc2007_xfer(tsc, READ_X);
+
+	/* turn y+ off, x- on; we'll use formula #1 */
+	tsc->tc.z1 = tsc2007_xfer(tsc, READ_Z1);
+	tsc->tc.z2 = tsc2007_xfer(tsc, READ_Z2);
+
+	/* power down */
+	tsc2007_xfer(tsc, PWRDOWN);
+
+	return 0;
+}
+
+static enum hrtimer_restart tsc2007_timer(struct hrtimer *handle)
+{
+	struct tsc2007 *ts = container_of(handle, struct tsc2007, timer);
+
+	spin_lock_irq(&ts->lock);
+
+	if (unlikely(!ts->get_pendown_state() && ts->pendown)) {
+		struct input_dev *input = ts->input;
+
+		dev_dbg(&ts->client->dev, "UP\n");
+
+		input_report_key(input, BTN_TOUCH, 0);
+		input_report_abs(input, ABS_PRESSURE, 0);
+		input_sync(input);
+
+		ts->pendown = 0;
+		enable_irq(ts->irq);
+	} else {
+		/* pen is still down, continue with the measurement */
+		dev_dbg(&ts->client->dev, "pen is still down\n");
+
+		tsc2007_read_values(ts);
+		tsc2007_send_event(ts);
+	}
+
+	spin_unlock_irq(&ts->lock);
+
+	return HRTIMER_NORESTART;
+}
+
+static irqreturn_t tsc2007_irq(int irq, void *handle)
+{
+	struct tsc2007 *ts = handle;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ts->lock, flags);
+
+	if (likely(ts->get_pendown_state())) {
+		disable_irq(ts->irq);
+		hrtimer_start(&ts->timer, ktime_set(0, TS_POLL_DELAY),
+					HRTIMER_MODE_REL);
+	}
+
+	if (ts->clear_penirq)
+		ts->clear_penirq();
+
+	spin_unlock_irqrestore(&ts->lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static int tsc2007_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	struct tsc2007 *ts;
+	struct tsc2007_platform_data *pdata = pdata = client->dev.platform_data;
+	struct input_dev *input_dev;
+	int err;
+
+	if (!pdata) {
+		dev_err(&client->dev, "platform data is required!\n");
+		return -EINVAL;
+	}
+
+	if (!i2c_check_functionality(client->adapter,
+				     I2C_FUNC_SMBUS_READ_WORD_DATA))
+		return -EIO;
+
+	ts = kzalloc(sizeof(struct tsc2007), GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!ts || !input_dev) {
+		err = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	ts->client = client;
+	i2c_set_clientdata(client, ts);
+
+	ts->input = input_dev;
+
+	hrtimer_init(&ts->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	ts->timer.function = tsc2007_timer;
+
+	spin_lock_init(&ts->lock);
+
+	ts->model             = pdata->model;
+	ts->x_plate_ohms      = pdata->x_plate_ohms;
+	ts->get_pendown_state = pdata->get_pendown_state;
+	ts->clear_penirq      = pdata->clear_penirq;
+
+	pdata->init_platform_hw();
+
+	snprintf(ts->phys, sizeof(ts->phys), "%s/input0", client->dev.bus_id);
+
+	input_dev->name = "TSC2007 Touchscreen";
+	input_dev->phys = ts->phys;
+	input_dev->id.bustype = BUS_I2C;
+
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
+
+	input_set_abs_params(input_dev, ABS_X, 0, MAX_12BIT, 0, 0);
+	input_set_abs_params(input_dev, ABS_Y, 0, MAX_12BIT, 0, 0);
+	input_set_abs_params(input_dev, ABS_PRESSURE, 0, MAX_12BIT, 0, 0);
+
+	tsc2007_read_values(ts);
+
+	ts->irq = client->irq;
+
+	err = request_irq(ts->irq, tsc2007_irq, 0,
+			client->dev.driver->name, ts);
+	if (err < 0) {
+		dev_err(&client->dev, "irq %d busy?\n", ts->irq);
+		goto err_free_mem;
+	}
+
+	err = input_register_device(input_dev);
+	if (err)
+		goto err_free_irq;
+
+	dev_info(&client->dev, "registered with irq (%d)\n", ts->irq);
+
+	return 0;
+
+ err_free_irq:
+	free_irq(ts->irq, ts);
+	hrtimer_cancel(&ts->timer);
+ err_free_mem:
+	input_free_device(input_dev);
+	kfree(ts);
+	return err;
+}
+
+static int tsc2007_remove(struct i2c_client *client)
+{
+	struct tsc2007	*ts = i2c_get_clientdata(client);
+	struct tsc2007_platform_data *pdata;
+
+	pdata = client->dev.platform_data;
+	pdata->exit_platform_hw();
+
+	free_irq(ts->irq, ts);
+	hrtimer_cancel(&ts->timer);
+	input_unregister_device(ts->input);
+	kfree(ts);
+
+	return 0;
+}
+
+static struct i2c_device_id tsc2007_idtable[] = {
+	{ "tsc2007", 0 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(i2c, tsc2007_idtable);
+
+static struct i2c_driver tsc2007_driver = {
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "tsc2007"
+	},
+	.id_table	= tsc2007_idtable,
+	.probe		= tsc2007_probe,
+	.remove		= tsc2007_remove,
+};
+
+static int __init tsc2007_init(void)
+{
+	return i2c_add_driver(&tsc2007_driver);
+}
+
+static void __exit tsc2007_exit(void)
+{
+	i2c_del_driver(&tsc2007_driver);
+}
+
+module_init(tsc2007_init);
+module_exit(tsc2007_exit);
+
+MODULE_AUTHOR("Kwangwoo Lee <kwlee@mtekvision.com>");
+MODULE_DESCRIPTION("TSC2007 TouchScreen Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/tsc2007.h b/include/linux/i2c/tsc2007.h
new file mode 100644
index 000000000000..c6361fbb7bf9
--- /dev/null
+++ b/include/linux/i2c/tsc2007.h
@@ -0,0 +1,17 @@
+#ifndef __LINUX_I2C_TSC2007_H
+#define __LINUX_I2C_TSC2007_H
+
+/* linux/i2c/tsc2007.h */
+
+struct tsc2007_platform_data {
+	u16	model;				/* 2007. */
+	u16	x_plate_ohms;
+
+	int	(*get_pendown_state)(void);
+	void	(*clear_penirq)(void);		/* If needed, clear 2nd level
+						   interrupt source */
+	int	(*init_platform_hw)(void);
+	void	(*exit_platform_hw)(void);
+};
+
+#endif
-- 
cgit v1.2.3


From a01777ecf227de735d7e525ecda48fe74b838a17 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Mon, 22 Dec 2008 18:30:29 +0000
Subject: uwb: remove unused include/linux/uwb/debug.h

Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 drivers/uwb/address.c                 |  2 +-
 drivers/uwb/driver.c                  |  2 +-
 drivers/uwb/i1480/i1480u-wlp/lc.c     |  2 +-
 drivers/uwb/i1480/i1480u-wlp/netdev.c |  2 +-
 drivers/uwb/i1480/i1480u-wlp/sysfs.c  |  2 +-
 include/linux/uwb/debug.h             | 82 -----------------------------------
 6 files changed, 5 insertions(+), 87 deletions(-)
 delete mode 100644 include/linux/uwb/debug.h

(limited to 'include/linux')

diff --git a/drivers/uwb/address.c b/drivers/uwb/address.c
index 1664ae5f1706..ad21b1d7218c 100644
--- a/drivers/uwb/address.c
+++ b/drivers/uwb/address.c
@@ -28,7 +28,7 @@
 #include <linux/device.h>
 #include <linux/random.h>
 #include <linux/etherdevice.h>
-#include <linux/uwb/debug.h>
+
 #include "uwb-internal.h"
 
 
diff --git a/drivers/uwb/driver.c b/drivers/uwb/driver.c
index f57c26580de2..da77e41de990 100644
--- a/drivers/uwb/driver.c
+++ b/drivers/uwb/driver.c
@@ -53,7 +53,7 @@
 #include <linux/err.h>
 #include <linux/kdev_t.h>
 #include <linux/random.h>
-#include <linux/uwb/debug.h>
+
 #include "uwb-internal.h"
 
 
diff --git a/drivers/uwb/i1480/i1480u-wlp/lc.c b/drivers/uwb/i1480/i1480u-wlp/lc.c
index 488b2e30a0a8..049c05d4cc6a 100644
--- a/drivers/uwb/i1480/i1480u-wlp/lc.c
+++ b/drivers/uwb/i1480/i1480u-wlp/lc.c
@@ -57,7 +57,7 @@
  */
 #include <linux/if_arp.h>
 #include <linux/etherdevice.h>
-#include <linux/uwb/debug.h>
+
 #include "i1480u-wlp.h"
 
 
diff --git a/drivers/uwb/i1480/i1480u-wlp/netdev.c b/drivers/uwb/i1480/i1480u-wlp/netdev.c
index 2eafb973cd05..e3873ffb942c 100644
--- a/drivers/uwb/i1480/i1480u-wlp/netdev.c
+++ b/drivers/uwb/i1480/i1480u-wlp/netdev.c
@@ -41,7 +41,7 @@
 
 #include <linux/if_arp.h>
 #include <linux/etherdevice.h>
-#include <linux/uwb/debug.h>
+
 #include "i1480u-wlp.h"
 
 struct i1480u_cmd_set_ip_mas {
diff --git a/drivers/uwb/i1480/i1480u-wlp/sysfs.c b/drivers/uwb/i1480/i1480u-wlp/sysfs.c
index a92a787725fc..4ffaf546cc6c 100644
--- a/drivers/uwb/i1480/i1480u-wlp/sysfs.c
+++ b/drivers/uwb/i1480/i1480u-wlp/sysfs.c
@@ -25,8 +25,8 @@
 
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
-#include <linux/uwb/debug.h>
 #include <linux/device.h>
+
 #include "i1480u-wlp.h"
 
 
diff --git a/include/linux/uwb/debug.h b/include/linux/uwb/debug.h
deleted file mode 100644
index 67a240527145..000000000000
--- a/include/linux/uwb/debug.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Ultra Wide Band
- * Debug Support
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * FIXME: doc
- * Invoke like:
- *
- * #define D_LOCAL 4
- * #include <linux/uwb/debug.h>
- *
- * At the end of your include files.
- */
-#include <linux/types.h>
-
-struct device;
-extern void dump_bytes(struct device *dev, const void *_buf, size_t rsize);
-
-/* Master debug switch; !0 enables, 0 disables */
-#define D_MASTER (!0)
-
-/* Local (per-file) debug switch; #define before #including */
-#ifndef D_LOCAL
-#define D_LOCAL 0
-#endif
-
-#undef __d_printf
-#undef d_fnstart
-#undef d_fnend
-#undef d_printf
-#undef d_dump
-
-#define __d_printf(l, _tag, _dev, f, a...)				\
-do {									\
-	struct device *__dev = (_dev);					\
-	if (D_MASTER && D_LOCAL >= (l)) {				\
-		char __head[64] = "";					\
-		if (_dev != NULL) {					\
-			if ((unsigned long)__dev < 4096)		\
-				printk(KERN_ERR "E: Corrupt dev %p\n",	\
-					__dev);				\
-			else						\
-				snprintf(__head, sizeof(__head),	\
-					 "%s %s: ",			\
-					 dev_driver_string(__dev),	\
-					 dev_name(__dev));		\
-		}							\
-		printk(KERN_ERR "%s%s" _tag ": " f, __head,		\
-			__func__, ## a);				\
-	}								\
-} while (0 && _dev)
-
-#define d_fnstart(l, _dev, f, a...)	\
-	__d_printf(l, " FNSTART", _dev, f, ## a)
-#define d_fnend(l, _dev, f, a...)	\
-	__d_printf(l, " FNEND", _dev, f, ## a)
-#define d_printf(l, _dev, f, a...)	\
-	__d_printf(l, "", _dev, f, ## a)
-#define d_dump(l, _dev, ptr, size)		\
-do {						\
-	struct device *__dev = _dev;		\
-	if (D_MASTER && D_LOCAL >= (l))		\
-		dump_bytes(__dev, ptr, size);	\
-} while (0 && _dev)
-#define d_test(l) (D_MASTER && D_LOCAL >= (l))
-- 
cgit v1.2.3


From 88a9fe8cae3bb52e82489447f45e8d7ba1409ca8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 23 Dec 2008 15:21:31 -0500
Subject: SUNRPC: Remove the last remnant of the BKL...

Somehow, this escaped the previous purge. There should be no need to keep
any extra locks in the XDR callbacks.

The NFS client XDR code only writes into private objects, whereas all reads
of shared objects are confined to fields that do not change, such as
filehandles...

Ditto for lockd, the NFSv2/v3 client mount code, and rpcbind.

The nfsd XDR code may require the BKL, but since it does a synchronous RPC
call from a thread that already holds the lock, that issue is moot.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xdr.h     | 15 ---------------
 net/sunrpc/auth.c              |  4 ++--
 net/sunrpc/auth_gss/auth_gss.c | 10 +++++-----
 3 files changed, 7 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index e4057d729f03..49e1eb454465 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -36,21 +36,6 @@ struct xdr_netobj {
  */
 typedef int	(*kxdrproc_t)(void *rqstp, __be32 *data, void *obj);
 
-/*
- * We're still requiring the BKL in the xdr code until it's been
- * more carefully audited, at which point this wrapper will become
- * unnecessary.
- */
-static inline int rpc_call_xdrproc(kxdrproc_t xdrproc, void *rqstp, __be32 *data, void *obj)
-{
-	int ret;
-
-	lock_kernel();
-	ret = xdrproc(rqstp, data, obj);
-	unlock_kernel();
-	return ret;
-}
-
 /*
  * Basic structure for transmission/reception of a client XDR message.
  * Features a header (for a linear buffer containing RPC headers
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index cb216b2df666..6e28744b1709 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -513,7 +513,7 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
 	if (cred->cr_ops->crwrap_req)
 		return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
 	/* By default, we encode the arguments normally. */
-	return rpc_call_xdrproc(encode, rqstp, data, obj);
+	return encode(rqstp, data, obj);
 }
 
 int
@@ -528,7 +528,7 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
 		return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
 						   data, obj);
 	/* By default, we decode the arguments normally. */
-	return rpc_call_xdrproc(decode, rqstp, data, obj);
+	return decode(rqstp, data, obj);
 }
 
 int
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 853a4142cea1..b8561597f0c8 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1017,7 +1017,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
 	*p++ = htonl(rqstp->rq_seqno);
 
-	status = rpc_call_xdrproc(encode, rqstp, p, obj);
+	status = encode(rqstp, p, obj);
 	if (status)
 		return status;
 
@@ -1111,7 +1111,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
 	*p++ = htonl(rqstp->rq_seqno);
 
-	status = rpc_call_xdrproc(encode, rqstp, p, obj);
+	status = encode(rqstp, p, obj);
 	if (status)
 		return status;
 
@@ -1170,12 +1170,12 @@ gss_wrap_req(struct rpc_task *task,
 		/* The spec seems a little ambiguous here, but I think that not
 		 * wrapping context destruction requests makes the most sense.
 		 */
-		status = rpc_call_xdrproc(encode, rqstp, p, obj);
+		status = encode(rqstp, p, obj);
 		goto out;
 	}
 	switch (gss_cred->gc_service) {
 		case RPC_GSS_SVC_NONE:
-			status = rpc_call_xdrproc(encode, rqstp, p, obj);
+			status = encode(rqstp, p, obj);
 			break;
 		case RPC_GSS_SVC_INTEGRITY:
 			status = gss_wrap_req_integ(cred, ctx, encode,
@@ -1291,7 +1291,7 @@ gss_unwrap_resp(struct rpc_task *task,
 	cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
 						+ (savedlen - head->iov_len);
 out_decode:
-	status = rpc_call_xdrproc(decode, rqstp, p, obj);
+	status = decode(rqstp, p, obj);
 out:
 	gss_put_ctx(ctx);
 	dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid,
-- 
cgit v1.2.3


From 146ec944bbd31d241a44a00518b054fb01921d22 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 23 Dec 2008 15:21:34 -0500
Subject: NFS: Move declaration of nfs_mount() to fs/nfs/internal.h

Clean up:  The nfs_mount() function is not to be used outside of the
NFS client.  Move its public declaration to fs/nfs/internal.h.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/internal.h      | 4 ++++
 fs/nfs/nfsroot.c       | 2 ++
 include/linux/nfs_fs.h | 6 ------
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d212ee41caf2..7a38cc7b4137 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -63,6 +63,10 @@ struct nfs_parsed_mount_data {
 	struct security_mnt_opts lsm_opts;
 };
 
+/* mount_clnt.c */
+extern int nfs_mount(struct sockaddr *, size_t, char *, char *,
+					int, int, struct nfs_fh *);
+
 /* client.c */
 extern struct rpc_program nfs_program;
 
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index f1c0a066439f..a96e5fdb38af 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,6 +86,8 @@
 #include <net/ipconfig.h>
 #include <linux/parser.h>
 
+#include "internal.h"
+
 /* Define this to allow debugging output */
 #undef NFSROOT_DEBUG
 #define NFSDBG_FACILITY NFSDBG_ROOT
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 4eaa8347a0d9..f11077285a6c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -532,12 +532,6 @@ static inline void nfs3_forget_cached_acls(struct inode *inode)
 }
 #endif /* CONFIG_NFS_V3_ACL */
 
-/*
- * linux/fs/mount_clnt.c
- */
-extern int  nfs_mount(struct sockaddr *, size_t, char *, char *,
-		      int, int, struct nfs_fh *);
-
 /*
  * inline functions
  */
-- 
cgit v1.2.3


From d740351bf0960e89ce1aef45cfe00167cb0f9e5b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 23 Dec 2008 15:21:37 -0500
Subject: NFS: add "[no]resvport" mount option

The standard default security setting for NFS is AUTH_SYS.  An NFS
client connects to NFS servers via a privileged source port and a
fixed standard destination port (2049).  The client sends raw uid and
gid numbers to identify users making NFS requests, and the server
assumes an appropriate authority on the client has vetted these
values because the source port is privileged.

On Linux, by default in-kernel RPC services use a privileged port in
the range between 650 and 1023 to avoid using source ports of well-
known IP services.  Using such a small range limits the number of NFS
mount points and the number of unique NFS servers to which a client
can connect concurrently.

An NFS client can use unprivileged source ports to expand the range of
source port numbers, allowing more concurrent server connections and
more NFS mount points.  Servers must explicitly allow NFS connections
from unprivileged ports for this to work.

In the past, bumping the value of the sunrpc.max_resvport sysctl on
the client would permit the NFS client to use unprivileged ports.
Bumping this setting also changes the maximum port number used by
other in-kernel RPC services, some of which still required a port
number less than 1023.

This is exacerbated by the way source port numbers are chosen by the
Linux RPC client, which starts at the top of the range and works
downwards.  It means that bumping the maximum means all RPC services
requesting a source port will likely get an unprivileged port instead
of a privileged one.

Changing this setting effects all NFS mount points on a client.  A
sysadmin could not selectively choose which mount points would use
non-privileged ports and which could not.

Lastly, this mechanism of expanding the limit on the number of NFS
mount points was entirely undocumented.

To address the need for the NFS client to use a large range of source
ports without interfering with the activity of other in-kernel RPC
services, we introduce a new NFS mount option.  This option explicitly
tells only the NFS client to use a non-privileged source port when
communicating with the NFS server for one specific mount point.

This new mount option is called "resvport," like the similar NFS mount
option on FreeBSD and Mac OS X.  A sister patch for nfs-utils will be
submitted that documents this new option in nfs(5).

The default setting for this new mount option requires the NFS client
to use a privileged port, as before.  Explicitly specifying the
"noresvport" mount option allows the NFS client to use an unprivileged
source port for this mount point when connecting to the NFS server
port.

This mount option is supported only for text-based NFS mounts.

[ Sidebar: it is widely known that security mechanisms based on the
  use of privileged source ports are ineffective.  However, the NFS
  client can combine the use of unprivileged ports with the use of
  secure authentication mechanisms, such as Kerberos.  This allows a
  large number of connections and mount points while ensuring a useful
  level of security.

  Eventually we may change the default setting for this option
  depending on the security flavor used for the mount.  For example,
  if the mount is using only AUTH_SYS, then the default setting will
  be "resvport;" if the mount is using a strong security flavor such
  as krb5, the default setting will be "noresvport." ]

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
[Trond.Myklebust@netapp.com: Fixed a bug whereby nfs4_init_client()
was being called with incorrect arguments.]
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 11 +++++++----
 fs/nfs/super.c            | 12 +++++++++++-
 include/linux/nfs_mount.h |  3 ++-
 3 files changed, 20 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 27190337fc13..3a69cacc4fa4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -627,7 +627,8 @@ static int nfs_init_client(struct nfs_client *clp,
 	 * Create a client RPC handle for doing FSSTAT with UNIX auth only
 	 * - RFC 2623, sec 2.3.2
 	 */
-	error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 0, 0);
+	error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX,
+				      0, data->flags & NFS_MOUNT_NORESVPORT);
 	if (error < 0)
 		goto error;
 	nfs_mark_client_ready(clp, NFS_CS_READY);
@@ -969,7 +970,8 @@ error:
 static int nfs4_init_client(struct nfs_client *clp,
 		const struct rpc_timeout *timeparms,
 		const char *ip_addr,
-		rpc_authflavor_t authflavour)
+		rpc_authflavor_t authflavour,
+		int flags)
 {
 	int error;
 
@@ -983,7 +985,7 @@ static int nfs4_init_client(struct nfs_client *clp,
 	clp->rpc_ops = &nfs_v4_clientops;
 
 	error = nfs_create_rpc_client(clp, timeparms, authflavour,
-				      1, 0);
+				      1, flags & NFS_MOUNT_NORESVPORT);
 	if (error < 0)
 		goto error;
 	memcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
@@ -1034,7 +1036,8 @@ static int nfs4_set_client(struct nfs_server *server,
 		error = PTR_ERR(clp);
 		goto error;
 	}
-	error = nfs4_init_client(clp, timeparms, ip_addr, authflavour);
+	error = nfs4_init_client(clp, timeparms, ip_addr, authflavour,
+					server->flags);
 	if (error < 0)
 		goto error_put;
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2b0c8e132b54..e05a77be3068 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -75,6 +75,7 @@ enum {
 	Opt_acl, Opt_noacl,
 	Opt_rdirplus, Opt_nordirplus,
 	Opt_sharecache, Opt_nosharecache,
+	Opt_resvport, Opt_noresvport,
 
 	/* Mount options that take integer arguments */
 	Opt_port,
@@ -129,6 +130,8 @@ static const match_table_t nfs_mount_option_tokens = {
 	{ Opt_nordirplus, "nordirplus" },
 	{ Opt_sharecache, "sharecache" },
 	{ Opt_nosharecache, "nosharecache" },
+	{ Opt_resvport, "resvport" },
+	{ Opt_noresvport, "noresvport" },
 
 	{ Opt_port, "port=%u" },
 	{ Opt_rsize, "rsize=%u" },
@@ -514,7 +517,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 		{ NFS_MOUNT_NONLM, ",nolock", "" },
 		{ NFS_MOUNT_NOACL, ",noacl", "" },
 		{ NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
-		{ NFS_MOUNT_UNSHARED, ",nosharecache", ""},
+		{ NFS_MOUNT_UNSHARED, ",nosharecache", "" },
+		{ NFS_MOUNT_NORESVPORT, ",noresvport", "" },
 		{ 0, NULL, NULL }
 	};
 	const struct proc_nfs_info *nfs_infop;
@@ -1035,6 +1039,12 @@ static int nfs_parse_mount_options(char *raw,
 		case Opt_nosharecache:
 			mnt->flags |= NFS_MOUNT_UNSHARED;
 			break;
+		case Opt_resvport:
+			mnt->flags &= ~NFS_MOUNT_NORESVPORT;
+			break;
+		case Opt_noresvport:
+			mnt->flags |= NFS_MOUNT_NORESVPORT;
+			break;
 
 		/*
 		 * options that take numeric values
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
index 6549a06ac16e..4499016e6d0d 100644
--- a/include/linux/nfs_mount.h
+++ b/include/linux/nfs_mount.h
@@ -45,7 +45,7 @@ struct nfs_mount_data {
 	char		context[NFS_MAX_CONTEXT_LEN + 1];	/* 6 */
 };
 
-/* bits in the flags field */
+/* bits in the flags field visible to user space */
 
 #define NFS_MOUNT_SOFT		0x0001	/* 1 */
 #define NFS_MOUNT_INTR		0x0002	/* 1 */ /* now unused, but ABI */
@@ -68,5 +68,6 @@ struct nfs_mount_data {
 /* The following are for internal use only */
 #define NFS_MOUNT_LOOKUP_CACHE_NONEG	0x10000
 #define NFS_MOUNT_LOOKUP_CACHE_NONE	0x20000
+#define NFS_MOUNT_NORESVPORT		0x40000
 
 #endif
-- 
cgit v1.2.3


From 0cb2659b818eca99235e17c04291cfa9985c14f7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 23 Dec 2008 15:21:38 -0500
Subject: NLM: allow lockd requests from an unprivileged port

If the admin has specified the "noresvport" option for an NFS mount
point, the kernel's NFS client uses an unprivileged source port for
the main NFS transport.  The kernel's lockd client should use an
unprivileged port in this case as well.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntlock.c         |  2 +-
 fs/lockd/host.c             | 10 +++++++++-
 fs/nfs/client.c             |  2 ++
 include/linux/lockd/bind.h  |  1 +
 include/linux/lockd/lockd.h |  4 +++-
 5 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 94d42cc4e393..1f3b0fc0d351 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -61,7 +61,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
 
 	host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen,
 				   nlm_init->protocol, nlm_version,
-				   nlm_init->hostname);
+				   nlm_init->hostname, nlm_init->noresvport);
 	if (host == NULL) {
 		lockd_down();
 		return ERR_PTR(-ENOLCK);
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 70fc63a1727b..acc2aa5021d1 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -48,6 +48,7 @@ struct nlm_lookup_host_info {
 	const size_t		hostname_len;	/* it's length */
 	const struct sockaddr	*src_sap;	/* our address (optional) */
 	const size_t		src_len;	/* it's length */
+	const int		noresvport;	/* use non-priv port */
 };
 
 /*
@@ -222,6 +223,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
 	host->h_nsmstate   = 0;			/* real NSM state */
 	host->h_nsmhandle  = nsm;
 	host->h_server	   = ni->server;
+	host->h_noresvport = ni->noresvport;
 	hlist_add_head(&host->h_hash, chain);
 	INIT_LIST_HEAD(&host->h_lockowners);
 	spin_lock_init(&host->h_lock);
@@ -272,6 +274,7 @@ nlm_destroy_host(struct nlm_host *host)
  * @protocol: transport protocol to use
  * @version: NLM protocol version
  * @hostname: '\0'-terminated hostname of server
+ * @noresvport: 1 if non-privileged port should be used
  *
  * Returns an nlm_host structure that matches the passed-in
  * [server address, transport protocol, NLM version, server hostname].
@@ -281,7 +284,9 @@ nlm_destroy_host(struct nlm_host *host)
 struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
 				     const size_t salen,
 				     const unsigned short protocol,
-				     const u32 version, const char *hostname)
+				     const u32 version,
+				     const char *hostname,
+				     int noresvport)
 {
 	const struct sockaddr source = {
 		.sa_family	= AF_UNSPEC,
@@ -296,6 +301,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
 		.hostname_len	= strlen(hostname),
 		.src_sap	= &source,
 		.src_len	= sizeof(source),
+		.noresvport	= noresvport,
 	};
 
 	dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__,
@@ -417,6 +423,8 @@ nlm_bind_host(struct nlm_host *host)
 		 */
 		if (!host->h_server)
 			args.flags |= RPC_CLNT_CREATE_HARDRTRY;
+		if (host->h_noresvport)
+			args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
 
 		clnt = rpc_create(&args);
 		if (!IS_ERR(clnt))
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 3a69cacc4fa4..70b6d9e8517d 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -526,6 +526,8 @@ static int nfs_start_lockd(struct nfs_server *server)
 		.protocol	= server->flags & NFS_MOUNT_TCP ?
 						IPPROTO_TCP : IPPROTO_UDP,
 		.nfs_version	= clp->rpc_ops->version,
+		.noresvport	= server->flags & NFS_MOUNT_NORESVPORT ?
+					1 : 0,
 	};
 
 	if (nlm_init.nfs_version > 3)
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index e5872dc994c0..fbc48f898521 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -41,6 +41,7 @@ struct nlmclnt_initdata {
 	size_t			addrlen;
 	unsigned short		protocol;
 	u32			nfs_version;
+	int			noresvport;
 };
 
 /*
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index b56d5aa9b194..23da3fa69efa 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -49,6 +49,7 @@ struct nlm_host {
 	unsigned short		h_proto;	/* transport proto */
 	unsigned short		h_reclaiming : 1,
 				h_server     : 1, /* server side, not client side */
+				h_noresvport : 1,
 				h_inuse      : 1;
 	wait_queue_head_t	h_gracewait;	/* wait while reclaiming */
 	struct rw_semaphore	h_rwsem;	/* Reboot recovery lock */
@@ -220,7 +221,8 @@ struct nlm_host  *nlmclnt_lookup_host(const struct sockaddr *sap,
 					const size_t salen,
 					const unsigned short protocol,
 					const u32 version,
-					const char *hostname);
+					const char *hostname,
+					int noresvport);
 struct nlm_host  *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
 					const char *hostname,
 					const size_t hostname_len);
-- 
cgit v1.2.3


From 95d35cb4c473c754824967c0b069bbeb7efa4847 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 23 Dec 2008 15:21:45 -0500
Subject: NFSv4: Remove nfs_client->cl_sem

Now that we're using the flags to indicate state that needs to be
recovered, as well as having implemented proper refcounting and spinlocking
on the state and open_owners, we can get rid of nfs_client->cl_sem. The
only remaining case that was dubious was the file locking, and that case is
now covered by the nfsi->rwsem.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           |  1 -
 fs/nfs/delegation.c       |  5 -----
 fs/nfs/nfs4proc.c         | 20 +-------------------
 fs/nfs/nfs4renewd.c       |  3 ---
 fs/nfs/nfs4state.c        | 17 -----------------
 include/linux/nfs_fs_sb.h |  6 ------
 6 files changed, 1 insertion(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 70b6d9e8517d..b643f0ec5c21 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -143,7 +143,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	clp->cl_proto = cl_init->proto;
 
 #ifdef CONFIG_NFS_V4
-	init_rwsem(&clp->cl_sem);
 	INIT_LIST_HEAD(&clp->cl_delegations);
 	spin_lock_init(&clp->cl_lock);
 	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 646ba3e75a1e..ebc06f2da31a 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -243,16 +243,13 @@ static void nfs_msync_inode(struct inode *inode)
  */
 static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	nfs_msync_inode(inode);
-	down_read(&clp->cl_sem);
 	/* Guard against new delegated open calls */
 	down_write(&nfsi->rwsem);
 	nfs_delegation_claim_opens(inode, &delegation->stateid);
 	up_write(&nfsi->rwsem);
-	up_read(&clp->cl_sem);
 	nfs_msync_inode(inode);
 
 	return nfs_do_return_delegation(inode, delegation, 1);
@@ -425,7 +422,6 @@ static int recall_thread(void *data)
 	daemonize("nfsv4-delegreturn");
 
 	nfs_msync_inode(inode);
-	down_read(&clp->cl_sem);
 	down_write(&nfsi->rwsem);
 	spin_lock(&clp->cl_lock);
 	delegation = nfs_detach_delegation_locked(nfsi, args->stateid);
@@ -437,7 +433,6 @@ static int recall_thread(void *data)
 	complete(&args->started);
 	nfs_delegation_claim_opens(inode, args->stateid);
 	up_write(&nfsi->rwsem);
-	up_read(&clp->cl_sem);
 	nfs_msync_inode(inode);
 
 	if (delegation != NULL)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index aec4e47c462d..26dcd77abb07 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -207,12 +207,8 @@ static int nfs4_wait_clnt_recover(struct nfs_client *clp)
 
 	might_sleep();
 
-	rwsem_acquire(&clp->cl_sem.dep_map, 0, 0, _RET_IP_);
-
 	res = wait_on_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER,
 			nfs4_wait_bit_killable, TASK_KILLABLE);
-
-	rwsem_release(&clp->cl_sem.dep_map, 1, _RET_IP_);
 	return res;
 }
 
@@ -1135,7 +1131,6 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
 	struct nfs_server       *server = NFS_SERVER(dir);
-	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_opendata *opendata;
 	int status;
 
@@ -1150,11 +1145,10 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct
 		goto err_put_state_owner;
 	if (path->dentry->d_inode != NULL)
 		nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE));
-	down_read(&clp->cl_sem);
 	status = -ENOMEM;
 	opendata = nfs4_opendata_alloc(path, sp, flags, sattr);
 	if (opendata == NULL)
-		goto err_release_rwsem;
+		goto err_put_state_owner;
 
 	if (path->dentry->d_inode != NULL)
 		opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp);
@@ -1172,13 +1166,10 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct
 		goto err_opendata_put;
 	nfs4_opendata_put(opendata);
 	nfs4_put_state_owner(sp);
-	up_read(&clp->cl_sem);
 	*res = state;
 	return 0;
 err_opendata_put:
 	nfs4_opendata_put(opendata);
-err_release_rwsem:
-	up_read(&clp->cl_sem);
 err_put_state_owner:
 	nfs4_put_state_owner(sp);
 out_err:
@@ -3099,7 +3090,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 	struct nfs4_lock_state *lsp;
 	int status;
 
-	down_read(&clp->cl_sem);
 	arg.lock_owner.clientid = clp->cl_clientid;
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)
@@ -3116,7 +3106,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 	}
 	request->fl_ops->fl_release_private(request);
 out:
-	up_read(&clp->cl_sem);
 	return status;
 }
 
@@ -3273,7 +3262,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 
 static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
-	struct nfs_client *clp = state->owner->so_client;
 	struct nfs_inode *nfsi = NFS_I(state->inode);
 	struct nfs_seqid *seqid;
 	struct nfs4_lock_state *lsp;
@@ -3284,15 +3272,12 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
 	status = nfs4_set_lock_state(state, request);
 	/* Unlock _before_ we do the RPC call */
 	request->fl_flags |= FL_EXISTS;
-	down_read(&clp->cl_sem);
 	down_read(&nfsi->rwsem);
 	if (do_vfs_lock(request->fl_file, request) == -ENOENT) {
 		up_read(&nfsi->rwsem);
-		up_read(&clp->cl_sem);
 		goto out;
 	}
 	up_read(&nfsi->rwsem);
-	up_read(&clp->cl_sem);
 	if (status != 0)
 		goto out;
 	/* Is this a delegated lock? */
@@ -3518,7 +3503,6 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
 
 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
-	struct nfs_client *clp = state->owner->so_client;
 	struct nfs_inode *nfsi = NFS_I(state->inode);
 	unsigned char fl_flags = request->fl_flags;
 	int status;
@@ -3531,7 +3515,6 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
 	status = do_vfs_lock(request->fl_file, request);
 	if (status < 0)
 		goto out;
-	down_read(&clp->cl_sem);
 	down_read(&nfsi->rwsem);
 	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
 		/* Yes: cache locks! */
@@ -3549,7 +3532,6 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
 		printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__);
 out_unlock:
 	up_read(&nfsi->rwsem);
-	up_read(&clp->cl_sem);
 out:
 	request->fl_flags = fl_flags;
 	return status;
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 9fe8640a88eb..6101f955f231 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -65,7 +65,6 @@ nfs4_renew_state(struct work_struct *work)
 	long lease, timeout;
 	unsigned long last, now;
 
-	down_read(&clp->cl_sem);
 	dprintk("%s: start\n", __func__);
 	/* Are there any active superblocks? */
 	if (list_empty(&clp->cl_superblocks))
@@ -101,11 +100,9 @@ nfs4_renew_state(struct work_struct *work)
 	schedule_delayed_work(&clp->cl_renewd, timeout);
 	spin_unlock(&clp->cl_lock);
 out:
-	up_read(&clp->cl_sem);
 	dprintk("%s: done\n", __func__);
 }
 
-/* Must be called with clp->cl_sem locked for writes */
 void
 nfs4_schedule_state_renewal(struct nfs_client *clp)
 {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 16c9fbdf97b4..ec0cbe00a804 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -305,10 +305,6 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 	}
 }
 
-/*
- * Note: must be called with clp->cl_sem held in order to prevent races
- *       with reboot recovery!
- */
 struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 {
 	struct nfs_client *clp = server->nfs_client;
@@ -337,10 +333,6 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
 	return sp;
 }
 
-/*
- * Must be called with clp->cl_sem held in order to avoid races
- * with state recovery...
- */
 void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 {
 	struct nfs_client *clp = sp->so_client;
@@ -442,10 +434,6 @@ out:
 	return state;
 }
 
-/*
- * Beware! Caller must be holding exactly one
- * reference to clp->cl_sem!
- */
 void nfs4_put_open_state(struct nfs4_state *state)
 {
 	struct inode *inode = state->inode;
@@ -578,7 +566,6 @@ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
  * Return a compatible lock_state. If no initialized lock_state structure
  * exists, return an uninitialized one.
  *
- * The caller must be holding clp->cl_sem
  */
 static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
 {
@@ -1127,7 +1114,6 @@ static int reclaimer(void *ptr)
 	allow_signal(SIGKILL);
 
 	/* Ensure exclusive access to NFSv4 state */
-	down_write(&clp->cl_sem);
 	while (!list_empty(&clp->cl_superblocks)) {
 		if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
 			/* We're going to have to re-establish a clientid */
@@ -1149,7 +1135,6 @@ static int reclaimer(void *ptr)
 
 		/* First recover reboot state... */
 		if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
-			/* Note: list is protected by exclusive lock on cl->cl_sem */
 			status = nfs4_do_reclaim(clp, &nfs4_reboot_recovery_ops);
 			if (status == -NFS4ERR_STALE_CLIENTID)
 				continue;
@@ -1159,7 +1144,6 @@ static int reclaimer(void *ptr)
 
 		/* Now recover expired state... */
 		if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
-			/* Note: list is protected by exclusive lock on cl->cl_sem */
 			status = nfs4_do_reclaim(clp, &nfs4_nograce_recovery_ops);
 			if (status < 0) {
 				set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
@@ -1175,7 +1159,6 @@ static int reclaimer(void *ptr)
 		break;
 	}
 out:
-	up_write(&clp->cl_sem);
 	if (test_and_clear_bit(NFS4CLNT_CB_PATH_DOWN, &clp->cl_state))
 		nfs_handle_cb_pathdown(clp);
 	nfs4_clear_recover_bit(clp);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 4e477ae58699..9bb81aec91cf 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -42,12 +42,6 @@ struct nfs_client {
 	struct rb_root		cl_openowner_id;
 	struct rb_root		cl_lockowner_id;
 
-	/*
-	 * The following rwsem ensures exclusive access to the server
-	 * while we recover the state following a lease expiration.
-	 */
-	struct rw_semaphore	cl_sem;
-
 	struct list_head	cl_delegations;
 	struct rb_root		cl_state_owners;
 	spinlock_t		cl_lock;
-- 
cgit v1.2.3


From bd7bf9d540c001055fba796ebf146d90e4dd2eb2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 23 Dec 2008 15:21:53 -0500
Subject: NFSv4: Convert delegation->type field to fmode_t

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/delegation.c     | 2 +-
 fs/nfs/delegation.h     | 6 +++---
 fs/nfs/nfs4xdr.c        | 4 ++--
 include/linux/nfs_fs.h  | 2 +-
 include/linux/nfs_xdr.h | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index e75f2f8c5245..968225a88015 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -48,7 +48,7 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
 	set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
 }
 
-int nfs_have_delegation(struct inode *inode, int flags)
+int nfs_have_delegation(struct inode *inode, fmode_t flags)
 {
 	struct nfs_delegation *delegation;
 	int ret = 0;
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 2a74882e5d50..09f383795174 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -17,7 +17,7 @@ struct nfs_delegation {
 	struct rpc_cred *cred;
 	struct inode *inode;
 	nfs4_stateid stateid;
-	int type;
+	fmode_t type;
 	loff_t maxsize;
 	__u64 change_attr;
 	unsigned long flags;
@@ -54,10 +54,10 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
 
 void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
-int nfs_have_delegation(struct inode *inode, int flags);
+int nfs_have_delegation(struct inode *inode, fmode_t flags);
 
 #else
-static inline int nfs_have_delegation(struct inode *inode, int flags)
+static inline int nfs_have_delegation(struct inode *inode, fmode_t flags)
 {
 	return 0;
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index b916297d2334..879911c99030 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1024,7 +1024,7 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a
 	}
 }
 
-static inline void encode_delegation_type(struct xdr_stream *xdr, int delegation_type)
+static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delegation_type)
 {
 	__be32 *p;
 
@@ -1053,7 +1053,7 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *
 	encode_string(xdr, name->len, name->name);
 }
 
-static inline void encode_claim_previous(struct xdr_stream *xdr, int type)
+static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type)
 {
 	__be32 *p;
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f11077285a6c..8d71d7b7c78a 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -180,7 +180,7 @@ struct nfs_inode {
         /* NFSv4 state */
 	struct list_head	open_states;
 	struct nfs_delegation	*delegation;
-	int			 delegation_state;
+	fmode_t			 delegation_state;
 	struct rw_semaphore	rwsem;
 #endif /* CONFIG_NFS_V4*/
 	struct inode		vfs_inode;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index c1c31acb8a2b..32c1a0ecdbff 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -126,7 +126,7 @@ struct nfs_openargs {
 		struct iattr *  attrs;    /* UNCHECKED, GUARDED */
 		nfs4_verifier   verifier; /* EXCLUSIVE */
 		nfs4_stateid	delegation;		/* CLAIM_DELEGATE_CUR */
-		int		delegation_type;	/* CLAIM_PREVIOUS */
+		fmode_t		delegation_type;	/* CLAIM_PREVIOUS */
 	} u;
 	const struct qstr *	name;
 	const struct nfs_server *server;	 /* Needed for ID mapping */
@@ -143,7 +143,7 @@ struct nfs_openres {
 	struct nfs_fattr *      dir_attr;
 	struct nfs_seqid *	seqid;
 	const struct nfs_server *server;
-	int			delegation_type;
+	fmode_t			delegation_type;
 	nfs4_stateid		delegation;
 	__u32			do_recall;
 	__u64			maxsize;
-- 
cgit v1.2.3


From dc0b027dfadfcb8a5504f7d8052754bf8d501ab9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 23 Dec 2008 15:21:56 -0500
Subject: NFSv4: Convert the open and close ops to use fmode

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c          |   2 +-
 fs/nfs/nfs4_fs.h        |   8 +--
 fs/nfs/nfs4proc.c       | 126 ++++++++++++++++++++++++++----------------------
 fs/nfs/nfs4state.c      |  24 ++++-----
 fs/nfs/nfs4xdr.c        |  10 ++--
 include/linux/nfs_fs.h  |   4 +-
 include/linux/nfs_xdr.h |   3 +-
 7 files changed, 94 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d22eb383e1cf..1cf39202c3ea 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -592,7 +592,7 @@ static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context
 /*
  * Given an inode, search for an open context with the desired characteristics
  */
-struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode)
+struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_open_context *pos, *ctx = NULL;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ee5f51ef696d..d5f5efaf2b26 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -161,7 +161,7 @@ struct nfs4_state {
 	unsigned int n_rdonly;		/* Number of read-only references */
 	unsigned int n_wronly;		/* Number of write-only references */
 	unsigned int n_rdwr;		/* Number of read/write references */
-	int state;			/* State on the server (R,W, or RW) */
+	fmode_t state;			/* State on the server (R,W, or RW) */
 	atomic_t count;
 };
 
@@ -223,9 +223,9 @@ extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struc
 extern void nfs4_put_state_owner(struct nfs4_state_owner *);
 extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
 extern void nfs4_put_open_state(struct nfs4_state *);
-extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t);
-extern void nfs4_close_sync(struct path *, struct nfs4_state *, mode_t);
-extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
+extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
+extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
+extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
 extern void nfs4_schedule_state_recovery(struct nfs_client *);
 extern void nfs4_schedule_state_manager(struct nfs_client *);
 extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index fc0c9d255cf7..e8df52dc2bc5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -323,7 +323,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
 }
 
 static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
-		struct nfs4_state_owner *sp, int flags,
+		struct nfs4_state_owner *sp, fmode_t fmode, int flags,
 		const struct iattr *attrs)
 {
 	struct dentry *parent = dget_parent(path->dentry);
@@ -343,7 +343,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
 	p->owner = sp;
 	atomic_inc(&sp->so_count);
 	p->o_arg.fh = NFS_FH(dir);
-	p->o_arg.open_flags = flags,
+	p->o_arg.open_flags = flags;
+	p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
 	p->o_arg.clientid = server->nfs_client->cl_clientid;
 	p->o_arg.id = sp->so_owner_id.id;
 	p->o_arg.name = &p->path.dentry->d_name;
@@ -399,10 +400,13 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
 	return ret;
 }
 
-static int can_open_cached(struct nfs4_state *state, int mode)
+static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode)
 {
 	int ret = 0;
-	switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) {
+
+	if (open_mode & O_EXCL)
+		goto out;
+	switch (mode & (FMODE_READ|FMODE_WRITE)) {
 		case FMODE_READ:
 			ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0;
 			break;
@@ -412,12 +416,13 @@ static int can_open_cached(struct nfs4_state *state, int mode)
 		case FMODE_READ|FMODE_WRITE:
 			ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
 	}
+out:
 	return ret;
 }
 
-static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags)
+static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
 {
-	if ((delegation->type & open_flags) != open_flags)
+	if ((delegation->type & fmode) != fmode)
 		return 0;
 	if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
 		return 0;
@@ -425,9 +430,9 @@ static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_fla
 	return 1;
 }
 
-static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
+static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode)
 {
-	switch (open_flags) {
+	switch (fmode) {
 		case FMODE_WRITE:
 			state->n_wronly++;
 			break;
@@ -437,15 +442,15 @@ static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
 		case FMODE_READ|FMODE_WRITE:
 			state->n_rdwr++;
 	}
-	nfs4_state_set_mode_locked(state, state->state | open_flags);
+	nfs4_state_set_mode_locked(state, state->state | fmode);
 }
 
-static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
+static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
 {
 	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
 		memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
 	memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
-	switch (open_flags) {
+	switch (fmode) {
 		case FMODE_READ:
 			set_bit(NFS_O_RDONLY_STATE, &state->flags);
 			break;
@@ -457,14 +462,14 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *
 	}
 }
 
-static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
+static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
 {
 	write_seqlock(&state->seqlock);
-	nfs_set_open_stateid_locked(state, stateid, open_flags);
+	nfs_set_open_stateid_locked(state, stateid, fmode);
 	write_sequnlock(&state->seqlock);
 }
 
-static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, int open_flags)
+static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode)
 {
 	/*
 	 * Protect the call to nfs4_state_set_mode_locked and
@@ -476,20 +481,20 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s
 		set_bit(NFS_DELEGATED_STATE, &state->flags);
 	}
 	if (open_stateid != NULL)
-		nfs_set_open_stateid_locked(state, open_stateid, open_flags);
+		nfs_set_open_stateid_locked(state, open_stateid, fmode);
 	write_sequnlock(&state->seqlock);
 	spin_lock(&state->owner->so_lock);
-	update_open_stateflags(state, open_flags);
+	update_open_stateflags(state, fmode);
 	spin_unlock(&state->owner->so_lock);
 }
 
-static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *delegation, int open_flags)
+static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *delegation, fmode_t fmode)
 {
 	struct nfs_inode *nfsi = NFS_I(state->inode);
 	struct nfs_delegation *deleg_cur;
 	int ret = 0;
 
-	open_flags &= (FMODE_READ|FMODE_WRITE);
+	fmode &= (FMODE_READ|FMODE_WRITE);
 
 	rcu_read_lock();
 	deleg_cur = rcu_dereference(nfsi->delegation);
@@ -498,7 +503,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat
 
 	spin_lock(&deleg_cur->lock);
 	if (nfsi->delegation != deleg_cur ||
-	    (deleg_cur->type & open_flags) != open_flags)
+	    (deleg_cur->type & fmode) != fmode)
 		goto no_delegation_unlock;
 
 	if (delegation == NULL)
@@ -507,7 +512,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat
 		goto no_delegation_unlock;
 
 	nfs_mark_delegation_referenced(deleg_cur);
-	__update_open_stateid(state, open_stateid, &deleg_cur->stateid, open_flags);
+	__update_open_stateid(state, open_stateid, &deleg_cur->stateid, fmode);
 	ret = 1;
 no_delegation_unlock:
 	spin_unlock(&deleg_cur->lock);
@@ -515,7 +520,7 @@ no_delegation:
 	rcu_read_unlock();
 
 	if (!ret && open_stateid != NULL) {
-		__update_open_stateid(state, open_stateid, NULL, open_flags);
+		__update_open_stateid(state, open_stateid, NULL, fmode);
 		ret = 1;
 	}
 
@@ -523,13 +528,13 @@ no_delegation:
 }
 
 
-static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags)
+static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmode)
 {
 	struct nfs_delegation *delegation;
 
 	rcu_read_lock();
 	delegation = rcu_dereference(NFS_I(inode)->delegation);
-	if (delegation == NULL || (delegation->type & open_flags) == open_flags) {
+	if (delegation == NULL || (delegation->type & fmode) == fmode) {
 		rcu_read_unlock();
 		return;
 	}
@@ -542,15 +547,16 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
 	struct nfs4_state *state = opendata->state;
 	struct nfs_inode *nfsi = NFS_I(state->inode);
 	struct nfs_delegation *delegation;
-	int open_mode = opendata->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL);
+	int open_mode = opendata->o_arg.open_flags & O_EXCL;
+	fmode_t fmode = opendata->o_arg.fmode;
 	nfs4_stateid stateid;
 	int ret = -EAGAIN;
 
 	for (;;) {
-		if (can_open_cached(state, open_mode)) {
+		if (can_open_cached(state, fmode, open_mode)) {
 			spin_lock(&state->owner->so_lock);
-			if (can_open_cached(state, open_mode)) {
-				update_open_stateflags(state, open_mode);
+			if (can_open_cached(state, fmode, open_mode)) {
+				update_open_stateflags(state, fmode);
 				spin_unlock(&state->owner->so_lock);
 				goto out_return_state;
 			}
@@ -559,7 +565,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
 		rcu_read_lock();
 		delegation = rcu_dereference(nfsi->delegation);
 		if (delegation == NULL ||
-		    !can_open_delegated(delegation, open_mode)) {
+		    !can_open_delegated(delegation, fmode)) {
 			rcu_read_unlock();
 			break;
 		}
@@ -572,7 +578,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
 		ret = -EAGAIN;
 
 		/* Try to update the stateid using the delegation */
-		if (update_open_stateid(state, NULL, &stateid, open_mode))
+		if (update_open_stateid(state, NULL, &stateid, fmode))
 			goto out_return_state;
 	}
 out:
@@ -624,7 +630,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
 	}
 
 	update_open_stateid(state, &data->o_res.stateid, NULL,
-			data->o_arg.open_flags);
+			data->o_arg.fmode);
 	iput(inode);
 out:
 	return state;
@@ -655,7 +661,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
 {
 	struct nfs4_opendata *opendata;
 
-	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
+	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL);
 	if (opendata == NULL)
 		return ERR_PTR(-ENOMEM);
 	opendata->state = state;
@@ -663,12 +669,13 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
 	return opendata;
 }
 
-static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, struct nfs4_state **res)
+static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmode, struct nfs4_state **res)
 {
 	struct nfs4_state *newstate;
 	int ret;
 
-	opendata->o_arg.open_flags = openflags;
+	opendata->o_arg.open_flags = 0;
+	opendata->o_arg.fmode = fmode;
 	memset(&opendata->o_res, 0, sizeof(opendata->o_res));
 	memset(&opendata->c_res, 0, sizeof(opendata->c_res));
 	nfs4_init_opendata_res(opendata);
@@ -678,7 +685,7 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openf
 	newstate = nfs4_opendata_to_nfs4_state(opendata);
 	if (IS_ERR(newstate))
 		return PTR_ERR(newstate);
-	nfs4_close_state(&opendata->path, newstate, openflags);
+	nfs4_close_state(&opendata->path, newstate, fmode);
 	*res = newstate;
 	return 0;
 }
@@ -734,7 +741,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
 {
 	struct nfs_delegation *delegation;
 	struct nfs4_opendata *opendata;
-	int delegation_type = 0;
+	fmode_t delegation_type = 0;
 	int status;
 
 	opendata = nfs4_open_recoverdata_alloc(ctx, state);
@@ -847,7 +854,7 @@ static void nfs4_open_confirm_release(void *calldata)
 		goto out_free;
 	state = nfs4_opendata_to_nfs4_state(data);
 	if (!IS_ERR(state))
-		nfs4_close_state(&data->path, state, data->o_arg.open_flags);
+		nfs4_close_state(&data->path, state, data->o_arg.fmode);
 out_free:
 	nfs4_opendata_put(data);
 }
@@ -911,7 +918,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
 	if (data->state != NULL) {
 		struct nfs_delegation *delegation;
 
-		if (can_open_cached(data->state, data->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL)))
+		if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags))
 			goto out_no_action;
 		rcu_read_lock();
 		delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
@@ -980,7 +987,7 @@ static void nfs4_open_release(void *calldata)
 		goto out_free;
 	state = nfs4_opendata_to_nfs4_state(data);
 	if (!IS_ERR(state))
-		nfs4_close_state(&data->path, state, data->o_arg.open_flags);
+		nfs4_close_state(&data->path, state, data->o_arg.fmode);
 out_free:
 	nfs4_opendata_put(data);
 }
@@ -1135,7 +1142,7 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
 /*
  * Returns a referenced nfs4_state
  */
-static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
 {
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
@@ -1153,9 +1160,9 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct
 	if (status != 0)
 		goto err_put_state_owner;
 	if (path->dentry->d_inode != NULL)
-		nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE));
+		nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode);
 	status = -ENOMEM;
-	opendata = nfs4_opendata_alloc(path, sp, flags, sattr);
+	opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr);
 	if (opendata == NULL)
 		goto err_put_state_owner;
 
@@ -1187,14 +1194,14 @@ out_err:
 }
 
 
-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred)
+static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred)
 {
 	struct nfs4_exception exception = { };
 	struct nfs4_state *res;
 	int status;
 
 	do {
-		status = _nfs4_do_open(dir, path, flags, sattr, cred, &res);
+		status = _nfs4_do_open(dir, path, fmode, flags, sattr, cred, &res);
 		if (status == 0)
 			break;
 		/* NOTE: BAD_SEQID means the server and client disagree about the
@@ -1332,7 +1339,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 		case -NFS4ERR_OLD_STATEID:
 		case -NFS4ERR_BAD_STATEID:
 		case -NFS4ERR_EXPIRED:
-			if (calldata->arg.open_flags == 0)
+			if (calldata->arg.fmode == 0)
 				break;
 		default:
 			if (nfs4_async_handle_error(task, server, state) == -EAGAIN) {
@@ -1374,10 +1381,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 	nfs_fattr_init(calldata->res.fattr);
 	if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) {
 		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
-		calldata->arg.open_flags = FMODE_READ;
+		calldata->arg.fmode = FMODE_READ;
 	} else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) {
 		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
-		calldata->arg.open_flags = FMODE_WRITE;
+		calldata->arg.fmode = FMODE_WRITE;
 	}
 	calldata->timestamp = jiffies;
 	rpc_call_start(task);
@@ -1430,7 +1437,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
 	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
 	if (calldata->arg.seqid == NULL)
 		goto out_free_calldata;
-	calldata->arg.open_flags = 0;
+	calldata->arg.fmode = 0;
 	calldata->arg.bitmask = server->attr_bitmask;
 	calldata->res.fattr = &calldata->fattr;
 	calldata->res.seqid = calldata->arg.seqid;
@@ -1457,13 +1464,13 @@ out:
 	return status;
 }
 
-static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state)
+static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state, fmode_t fmode)
 {
 	struct file *filp;
 	int ret;
 
 	/* If the open_intent is for execute, we have an extra check to make */
-	if (nd->intent.open.flags & FMODE_EXEC) {
+	if (fmode & FMODE_EXEC) {
 		ret = nfs_may_open(state->inode,
 				state->owner->so_cred,
 				nd->intent.open.flags);
@@ -1479,7 +1486,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct
 	}
 	ret = PTR_ERR(filp);
 out_close:
-	nfs4_close_sync(path, state, nd->intent.open.flags);
+	nfs4_close_sync(path, state, fmode & (FMODE_READ|FMODE_WRITE));
 	return ret;
 }
 
@@ -1495,6 +1502,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
 	struct dentry *res;
+	fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
 
 	if (nd->flags & LOOKUP_CREATE) {
 		attr.ia_mode = nd->intent.open.create_mode;
@@ -1512,7 +1520,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	parent = dentry->d_parent;
 	/* Protect against concurrent sillydeletes */
 	nfs_block_sillyrename(parent);
-	state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
+	state = nfs4_do_open(dir, &path, fmode, nd->intent.open.flags, &attr, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state)) {
 		if (PTR_ERR(state) == -ENOENT) {
@@ -1527,7 +1535,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 		path.dentry = res;
 	nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir));
 	nfs_unblock_sillyrename(parent);
-	nfs4_intent_set_file(nd, &path, state);
+	nfs4_intent_set_file(nd, &path, state, fmode);
 	return res;
 }
 
@@ -1540,11 +1548,12 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
 	};
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
+	fmode_t fmode = openflags & (FMODE_READ | FMODE_WRITE);
 
 	cred = rpc_lookup_cred();
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
-	state = nfs4_do_open(dir, &path, openflags, NULL, cred);
+	state = nfs4_do_open(dir, &path, fmode, openflags, NULL, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state)) {
 		switch (PTR_ERR(state)) {
@@ -1561,10 +1570,10 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
 	}
 	if (state->inode == dentry->d_inode) {
 		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-		nfs4_intent_set_file(nd, &path, state);
+		nfs4_intent_set_file(nd, &path, state, fmode);
 		return 1;
 	}
-	nfs4_close_sync(&path, state, openflags);
+	nfs4_close_sync(&path, state, fmode);
 out_drop:
 	d_drop(dentry);
 	return 0;
@@ -1990,6 +1999,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	};
 	struct nfs4_state *state;
 	struct rpc_cred *cred;
+	fmode_t fmode = flags & (FMODE_READ | FMODE_WRITE);
 	int status = 0;
 
 	cred = rpc_lookup_cred();
@@ -1997,7 +2007,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		status = PTR_ERR(cred);
 		goto out;
 	}
-	state = nfs4_do_open(dir, &path, flags, sattr, cred);
+	state = nfs4_do_open(dir, &path, fmode, flags, sattr, cred);
 	d_drop(dentry);
 	if (IS_ERR(state)) {
 		status = PTR_ERR(state);
@@ -2013,9 +2023,9 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		nfs_post_op_update_inode(state->inode, &fattr);
 	}
 	if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
-		status = nfs4_intent_set_file(nd, &path, state);
+		status = nfs4_intent_set_file(nd, &path, state, fmode);
 	else
-		nfs4_close_sync(&path, state, flags);
+		nfs4_close_sync(&path, state, fmode);
 out_putcred:
 	put_rpccred(cred);
 out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index cd16b301f13c..2022fe47966f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -363,18 +363,18 @@ nfs4_alloc_open_state(void)
 }
 
 void
-nfs4_state_set_mode_locked(struct nfs4_state *state, mode_t mode)
+nfs4_state_set_mode_locked(struct nfs4_state *state, fmode_t fmode)
 {
-	if (state->state == mode)
+	if (state->state == fmode)
 		return;
 	/* NB! List reordering - see the reclaim code for why.  */
-	if ((mode & FMODE_WRITE) != (state->state & FMODE_WRITE)) {
-		if (mode & FMODE_WRITE)
+	if ((fmode & FMODE_WRITE) != (state->state & FMODE_WRITE)) {
+		if (fmode & FMODE_WRITE)
 			list_move(&state->open_states, &state->owner->so_states);
 		else
 			list_move_tail(&state->open_states, &state->owner->so_states);
 	}
-	state->state = mode;
+	state->state = fmode;
 }
 
 static struct nfs4_state *
@@ -454,16 +454,16 @@ void nfs4_put_open_state(struct nfs4_state *state)
 /*
  * Close the current file.
  */
-static void __nfs4_close(struct path *path, struct nfs4_state *state, mode_t mode, int wait)
+static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fmode, int wait)
 {
 	struct nfs4_state_owner *owner = state->owner;
 	int call_close = 0;
-	int newstate;
+	fmode_t newstate;
 
 	atomic_inc(&owner->so_count);
 	/* Protect against nfs4_find_state() */
 	spin_lock(&owner->so_lock);
-	switch (mode & (FMODE_READ | FMODE_WRITE)) {
+	switch (fmode & (FMODE_READ | FMODE_WRITE)) {
 		case FMODE_READ:
 			state->n_rdonly--;
 			break;
@@ -498,14 +498,14 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state, mode_t mod
 		nfs4_do_close(path, state, wait);
 }
 
-void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
+void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
 {
-	__nfs4_close(path, state, mode, 0);
+	__nfs4_close(path, state, fmode, 0);
 }
 
-void nfs4_close_sync(struct path *path, struct nfs4_state *state, mode_t mode)
+void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode)
 {
-	__nfs4_close(path, state, mode, 1);
+	__nfs4_close(path, state, fmode, 1);
 }
 
 /*
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 879911c99030..d8ddfc5467d6 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -953,12 +953,12 @@ static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name)
 	return 0;
 }
 
-static void encode_share_access(struct xdr_stream *xdr, int open_flags)
+static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode)
 {
 	__be32 *p;
 
 	RESERVE_SPACE(8);
-	switch (open_flags & (FMODE_READ|FMODE_WRITE)) {
+	switch (fmode & (FMODE_READ|FMODE_WRITE)) {
 		case FMODE_READ:
 			WRITE32(NFS4_SHARE_ACCESS_READ);
 			break;
@@ -969,7 +969,7 @@ static void encode_share_access(struct xdr_stream *xdr, int open_flags)
 			WRITE32(NFS4_SHARE_ACCESS_BOTH);
 			break;
 		default:
-			BUG();
+			WRITE32(0);
 	}
 	WRITE32(0);		/* for linux, share_deny = 0 always */
 }
@@ -984,7 +984,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
 	RESERVE_SPACE(8);
 	WRITE32(OP_OPEN);
 	WRITE32(arg->seqid->sequence->counter);
-	encode_share_access(xdr, arg->open_flags);
+	encode_share_access(xdr, arg->fmode);
 	RESERVE_SPACE(28);
 	WRITE64(arg->clientid);
 	WRITE32(16);
@@ -1112,7 +1112,7 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea
 	WRITE32(OP_OPEN_DOWNGRADE);
 	WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
 	WRITE32(arg->seqid->sequence->counter);
-	encode_share_access(xdr, arg->open_flags);
+	encode_share_access(xdr, arg->fmode);
 	return 0;
 }
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 8d71d7b7c78a..b8d9c6dd4f63 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -83,7 +83,7 @@ struct nfs_open_context {
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
 	fl_owner_t lockowner;
-	int mode;
+	fmode_t mode;
 
 	unsigned long flags;
 #define NFS_CONTEXT_ERROR_WRITE		(0)
@@ -342,7 +342,7 @@ extern int nfs_setattr(struct dentry *, struct iattr *);
 extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
-extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode);
+extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode);
 extern u64 nfs_compat_user_ino64(u64 fileid);
 extern void nfs_fattr_init(struct nfs_fattr *fattr);
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 32c1a0ecdbff..a550b528319f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -120,6 +120,7 @@ struct nfs_openargs {
 	const struct nfs_fh *	fh;
 	struct nfs_seqid *	seqid;
 	int			open_flags;
+	fmode_t			fmode;
 	__u64                   clientid;
 	__u64                   id;
 	union {
@@ -171,7 +172,7 @@ struct nfs_closeargs {
 	struct nfs_fh *         fh;
 	nfs4_stateid *		stateid;
 	struct nfs_seqid *	seqid;
-	int			open_flags;
+	fmode_t			fmode;
 	const u32 *		bitmask;
 };
 
-- 
cgit v1.2.3


From 64672d55d93c26fb4035fd1a84a803cbc09cb058 Mon Sep 17 00:00:00 2001
From: Peter Staubach <staubach@redhat.com>
Date: Tue, 23 Dec 2008 15:21:56 -0500
Subject: optimize attribute timeouts for "noac" and "actimeo=0"

Hi.

I've been looking at a bugzilla which describes a problem where
a customer was advised to use either the "noac" or "actimeo=0"
mount options to solve a consistency problem that they were
seeing in the file attributes.  It turned out that this solution
did not work reliably for them because sometimes, the local
attribute cache was believed to be valid and not timed out.
(With an attribute cache timeout of 0, the cache should always
appear to be timed out.)

In looking at this situation, it appears to me that the problem
is that the attribute cache timeout code has an off-by-one
error in it.  It is assuming that the cache is valid in the
region, [read_cache_jiffies, read_cache_jiffies + attrtimeo].  The
cache should be considered valid only in the region,
[read_cache_jiffies, read_cache_jiffies + attrtimeo).  With this
change, the options, "noac" and "actimeo=0", work as originally
expected.

This problem was previously addressed by special casing the
attrtimeo == 0 case.  However, since the problem is only an off-
by-one error, the cleaner solution is address the off-by-one
error and thus, not require the special case.

    Thanx...

        ps

Signed-off-by: Peter Staubach <staubach@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c            |  2 +-
 fs/nfs/inode.c          | 11 ++---------
 include/linux/jiffies.h | 10 ++++++++++
 include/linux/nfs_fs.h  |  5 ++++-
 net/sunrpc/auth.c       |  2 +-
 5 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ed7024c34885..e35c8199f82f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1798,7 +1798,7 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str
 	if (cache == NULL)
 		goto out;
 	if (!nfs_have_delegation(inode, FMODE_READ) &&
-	    !time_in_range(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
+	    !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
 		goto out_stale;
 	res->jiffies = cache->jiffies;
 	res->cred = cache->cred;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1cf39202c3ea..0c381686171e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -712,14 +712,7 @@ int nfs_attribute_timeout(struct inode *inode)
 
 	if (nfs_have_delegation(inode, FMODE_READ))
 		return 0;
-	/*
-	 * Special case: if the attribute timeout is set to 0, then always
-	 * 		 treat the cache as having expired (unless holding
-	 * 		 a delegation).
-	 */
-	if (nfsi->attrtimeo == 0)
-		return 1;
-	return !time_in_range(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
+	return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
 }
 
 /**
@@ -1182,7 +1175,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		nfsi->attrtimeo_timestamp = now;
 		nfsi->attr_gencount = nfs_inc_attr_generation_counter();
 	} else {
-		if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
+		if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
 			if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
 				nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
 			nfsi->attrtimeo_timestamp = now;
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index abb6ac639e8e..1a9cf78bfce5 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -115,10 +115,20 @@ static inline u64 get_jiffies_64(void)
 	 ((long)(a) - (long)(b) >= 0))
 #define time_before_eq(a,b)	time_after_eq(b,a)
 
+/*
+ * Calculate whether a is in the range of [b, c].
+ */
 #define time_in_range(a,b,c) \
 	(time_after_eq(a,b) && \
 	 time_before_eq(a,c))
 
+/*
+ * Calculate whether a is in the range of [b, c).
+ */
+#define time_in_range_open(a,b,c) \
+	(time_after_eq(a,b) && \
+	 time_before(a,c))
+
 /* Same as above, but does so with platform independent 64bit types.
  * These must be used when utilizing jiffies_64 (i.e. return value of
  * get_jiffies_64() */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index b8d9c6dd4f63..db867b04ac3c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -130,7 +130,10 @@ struct nfs_inode {
 	 *
 	 * We need to revalidate the cached attrs for this inode if
 	 *
-	 *	jiffies - read_cache_jiffies > attrtimeo
+	 *	jiffies - read_cache_jiffies >= attrtimeo
+	 *
+	 * Please note the comparison is greater than or equal
+	 * so that zero timeout values can be specified.
 	 */
 	unsigned long		read_cache_jiffies;
 	unsigned long		attrtimeo;
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 6e28744b1709..050e4e84d9e3 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -234,7 +234,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
 	list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
 
 		/* Enforce a 60 second garbage collection moratorium */
-		if (time_in_range(cred->cr_expire, expired, jiffies) &&
+		if (time_in_range_open(cred->cr_expire, expired, jiffies) &&
 		    test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
 			continue;
 
-- 
cgit v1.2.3


From c977a2ef40a38c45537ad03823d0a004f06373f0 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Tue, 23 Dec 2008 16:06:13 -0500
Subject: sunrpc: get rid of rpc_rqst.rq_bufsize

rq_bufsize is not used.

Signed-off-by: Mike Sager <Mike.Sager@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 4d80a118d538..11fc71d50c1e 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -76,8 +76,7 @@ struct rpc_rqst {
 	struct list_head	rq_list;
 
 	__u32 *			rq_buffer;	/* XDR encode buffer */
-	size_t			rq_bufsize,
-				rq_callsize,
+	size_t			rq_callsize,
 				rq_rcvsize;
 
 	struct xdr_buf		rq_private_buf;		/* The receive buffer
-- 
cgit v1.2.3


From c381060869317b3c84430d4f54965d409cbfe65f Mon Sep 17 00:00:00 2001
From: "\\\"J. Bruce Fields\\" <bfields@citi.umich.edu>
Date: Tue, 23 Dec 2008 16:08:32 -0500
Subject: rpc: add an rpc_pipe_open method

We want to transition to a new gssd upcall which is text-based and more
easily extensible.

To simplify upgrades, as well as testing and debugging, it will help if
we can upgrade gssd (to a version which understands the new upcall)
without having to choose at boot (or module-load) time whether we want
the new or the old upcall.

We will do this by providing two different pipes: one named, as
currently, after the mechanism (normally "krb5"), and supporting the
old upcall.  One named "gssd" and supporting the new upcall version.

We allow gssd to indicate which version it supports by its choice of
which pipe to open.

As we have no interest in supporting *simultaneous* use of both
versions, we'll forbid opening both pipes at the same time.

So, add a new pipe_open callback to the rpc_pipefs api, which the gss
code can use to track which pipes have been open, and to refuse opens of
incompatible pipes.

We only need this to be called on the first open of a given pipe.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/rpc_pipe_fs.h |  1 +
 net/sunrpc/rpc_pipe.c              | 22 +++++++++++++++-------
 2 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 51b977a4ca20..cea764c2359f 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -15,6 +15,7 @@ struct rpc_pipe_ops {
 	ssize_t (*upcall)(struct file *, struct rpc_pipe_msg *, char __user *, size_t);
 	ssize_t (*downcall)(struct file *, const char __user *, size_t);
 	void (*release_pipe)(struct inode *);
+	int (*open_pipe)(struct inode *);
 	void (*destroy_msg)(struct rpc_pipe_msg *);
 };
 
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 55b2049834c4..c9b57f47108c 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -169,16 +169,24 @@ static int
 rpc_pipe_open(struct inode *inode, struct file *filp)
 {
 	struct rpc_inode *rpci = RPC_I(inode);
+	int first_open;
 	int res = -ENXIO;
 
 	mutex_lock(&inode->i_mutex);
-	if (rpci->ops != NULL) {
-		if (filp->f_mode & FMODE_READ)
-			rpci->nreaders ++;
-		if (filp->f_mode & FMODE_WRITE)
-			rpci->nwriters ++;
-		res = 0;
+	if (rpci->ops == NULL)
+		goto out;
+	first_open = rpci->nreaders == 0 && rpci->nwriters == 0;
+	if (first_open && rpci->ops->open_pipe) {
+		res = rpci->ops->open_pipe(inode);
+		if (res)
+			goto out;
 	}
+	if (filp->f_mode & FMODE_READ)
+		rpci->nreaders++;
+	if (filp->f_mode & FMODE_WRITE)
+		rpci->nwriters++;
+	res = 0;
+out:
 	mutex_unlock(&inode->i_mutex);
 	return res;
 }
@@ -748,7 +756,7 @@ rpc_rmdir(struct dentry *dentry)
  * @name: name of pipe
  * @private: private data to associate with the pipe, for the caller's use
  * @ops: operations defining the behavior of the pipe: upcall, downcall,
- *	release_pipe, and destroy_msg.
+ *	release_pipe, open_pipe, and destroy_msg.
  * @flags: rpc_inode flags
  *
  * Data is made available for userspace to read by calls to
-- 
cgit v1.2.3


From 68e76ad0baf8f5d5060377c2423ee6eed5c63057 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <aglo@citi.umich.edu>
Date: Tue, 23 Dec 2008 16:17:15 -0500
Subject: nfsd: pass client principal name in rsc downcall

Two principals are involved in krb5 authentication: the target, who we
authenticate *to* (normally the name of the server, like
nfs/server.citi.umich.edu@CITI.UMICH.EDU), and the source, we we
authenticate *as* (normally a user, like bfields@UMICH.EDU)

In the case of NFSv4 callbacks, the target of the callback should be the
source of the client's setclientid call, and the source should be the
nfs server's own principal.

Therefore we allow svcgssd to pass down the name of the principal that
just authenticated, so that on setclientid we can store that principal
name with the new client, to be used later on callbacks.

Signed-off-by: Olga Kornievskaia <aglo@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfsd/nfs4state.c                | 11 +++++++++++
 include/linux/nfsd/state.h         |  1 +
 include/linux/sunrpc/svcauth_gss.h |  1 +
 net/sunrpc/auth_gss/svcauth_gss.c  | 23 +++++++++++++++++++++++
 4 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1a052ac2bde9..f3b9a8d064f3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -54,6 +54,7 @@
 #include <linux/mutex.h>
 #include <linux/lockd/bind.h>
 #include <linux/module.h>
+#include <linux/sunrpc/svcauth_gss.h>
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -377,6 +378,7 @@ free_client(struct nfs4_client *clp)
 	shutdown_callback_client(clp);
 	if (clp->cl_cred.cr_group_info)
 		put_group_info(clp->cl_cred.cr_group_info);
+	kfree(clp->cl_principal);
 	kfree(clp->cl_name.data);
 	kfree(clp);
 }
@@ -696,6 +698,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	unsigned int 		strhashval;
 	struct nfs4_client	*conf, *unconf, *new;
 	__be32 			status;
+	char			*princ;
 	char                    dname[HEXDIR_LEN];
 	
 	if (!check_name(clname))
@@ -783,6 +786,14 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	}
 	copy_verf(new, &clverifier);
 	new->cl_addr = sin->sin_addr.s_addr;
+	princ = svc_gss_principal(rqstp);
+	if (princ) {
+		new->cl_principal = kstrdup(princ, GFP_KERNEL);
+		if (new->cl_principal == NULL) {
+			free_client(new);
+			goto out;
+		}
+	}
 	copy_cred(&new->cl_cred, &rqstp->rq_cred);
 	gen_confirm(new);
 	gen_callback(new, setclid);
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index d0fe2e378452..ce7cbf4b7c93 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -124,6 +124,7 @@ struct nfs4_client {
 	nfs4_verifier		cl_verifier; 	/* generated by client */
 	time_t                  cl_time;        /* time of last lease renewal */
 	__be32			cl_addr; 	/* client ipaddress */
+	char			*cl_principal;	/* setclientid principal name */
 	struct svc_cred		cl_cred; 	/* setclientid principal */
 	clientid_t		cl_clientid;	/* generated by server */
 	nfs4_verifier		cl_confirm;	/* generated by server */
diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h
index c9165d9771a8..ca7d725861fc 100644
--- a/include/linux/sunrpc/svcauth_gss.h
+++ b/include/linux/sunrpc/svcauth_gss.h
@@ -20,6 +20,7 @@ int gss_svc_init(void);
 void gss_svc_shutdown(void);
 int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name);
 u32 svcauth_gss_flavor(struct auth_domain *dom);
+char *svc_gss_principal(struct svc_rqst *);
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 12803da95dc4..e9baa6ebb1dd 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -332,6 +332,7 @@ struct rsc {
 	struct svc_cred		cred;
 	struct gss_svc_seq_data	seqdata;
 	struct gss_ctx		*mechctx;
+	char			*client_name;
 };
 
 static struct cache_head *rsc_table[RSC_HASHMAX];
@@ -346,6 +347,7 @@ static void rsc_free(struct rsc *rsci)
 		gss_delete_sec_context(&rsci->mechctx);
 	if (rsci->cred.cr_group_info)
 		put_group_info(rsci->cred.cr_group_info);
+	kfree(rsci->client_name);
 }
 
 static void rsc_put(struct kref *ref)
@@ -383,6 +385,7 @@ rsc_init(struct cache_head *cnew, struct cache_head *ctmp)
 	tmp->handle.data = NULL;
 	new->mechctx = NULL;
 	new->cred.cr_group_info = NULL;
+	new->client_name = NULL;
 }
 
 static void
@@ -397,6 +400,8 @@ update_rsc(struct cache_head *cnew, struct cache_head *ctmp)
 	spin_lock_init(&new->seqdata.sd_lock);
 	new->cred = tmp->cred;
 	tmp->cred.cr_group_info = NULL;
+	new->client_name = tmp->client_name;
+	tmp->client_name = NULL;
 }
 
 static struct cache_head *
@@ -486,6 +491,15 @@ static int rsc_parse(struct cache_detail *cd,
 		status = gss_import_sec_context(buf, len, gm, &rsci.mechctx);
 		if (status)
 			goto out;
+
+		/* get client name */
+		len = qword_get(&mesg, buf, mlen);
+		if (len > 0) {
+			rsci.client_name = kstrdup(buf, GFP_KERNEL);
+			if (!rsci.client_name)
+				goto out;
+		}
+
 	}
 	rsci.h.expiry_time = expiry;
 	rscp = rsc_update(&rsci, rscp);
@@ -913,6 +927,15 @@ struct gss_svc_data {
 	struct rsc			*rsci;
 };
 
+char *svc_gss_principal(struct svc_rqst *rqstp)
+{
+	struct gss_svc_data *gd = (struct gss_svc_data *)rqstp->rq_auth_data;
+
+	if (gd && gd->rsci)
+		return gd->rsci->client_name;
+	return NULL;
+}
+
 static int
 svcauth_gss_set_client(struct svc_rqst *rqstp)
 {
-- 
cgit v1.2.3


From 608207e8884e083ad8b8d33eda868da70f0d63e8 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <aglo@citi.umich.edu>
Date: Tue, 23 Dec 2008 16:17:40 -0500
Subject: rpc: pass target name down to rpc level on callbacks

The rpc client needs to know the principal that the setclientid was done
as, so it can tell gssd who to authenticate to.

Signed-off-by: Olga Kornievskaia <aglo@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfsd/nfs4callback.c      |  6 ++++++
 include/linux/sunrpc/clnt.h |  2 ++
 net/sunrpc/clnt.c           | 16 ++++++++++++++++
 3 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 094747a1227c..3ca141782145 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -384,6 +384,7 @@ static int do_probe_callback(void *data)
 		.version	= nfs_cb_version[1]->number,
 		.authflavor	= RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
 		.flags		= (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
+		.client_name    = clp->cl_principal,
 	};
 	struct rpc_message msg = {
 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
@@ -392,6 +393,11 @@ static int do_probe_callback(void *data)
 	struct rpc_clnt *client;
 	int status;
 
+	if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) {
+		status = nfserr_cb_path_down;
+		goto out_err;
+	}
+
 	/* Initialize address */
 	memset(&addr, 0, sizeof(addr));
 	addr.sin_family = AF_INET;
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 6f0ee1b84a4f..c39a21040dcb 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -58,6 +58,7 @@ struct rpc_clnt {
 	struct rpc_timeout	cl_timeout_default;
 	struct rpc_program *	cl_program;
 	char			cl_inline_name[32];
+	char			*cl_principal;	/* target to authenticate to */
 };
 
 /*
@@ -108,6 +109,7 @@ struct rpc_create_args {
 	u32			version;
 	rpc_authflavor_t	authflavor;
 	unsigned long		flags;
+	char			*client_name;
 };
 
 /* Values for "flags" field */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 4895c341e46d..347f2a25abb6 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -197,6 +197,12 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
 
 	clnt->cl_rtt = &clnt->cl_rtt_default;
 	rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
+	clnt->cl_principal = NULL;
+	if (args->client_name) {
+		clnt->cl_principal = kstrdup(args->client_name, GFP_KERNEL);
+		if (!clnt->cl_principal)
+			goto out_no_principal;
+	}
 
 	kref_init(&clnt->cl_kref);
 
@@ -226,6 +232,8 @@ out_no_auth:
 		rpc_put_mount();
 	}
 out_no_path:
+	kfree(clnt->cl_principal);
+out_no_principal:
 	rpc_free_iostats(clnt->cl_metrics);
 out_no_stats:
 	if (clnt->cl_server != clnt->cl_inline_name)
@@ -354,6 +362,11 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	new->cl_metrics = rpc_alloc_iostats(clnt);
 	if (new->cl_metrics == NULL)
 		goto out_no_stats;
+	if (clnt->cl_principal) {
+		new->cl_principal = kstrdup(clnt->cl_principal, GFP_KERNEL);
+		if (new->cl_principal == NULL)
+			goto out_no_principal;
+	}
 	kref_init(&new->cl_kref);
 	err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
 	if (err != 0)
@@ -366,6 +379,8 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	rpciod_up();
 	return new;
 out_no_path:
+	kfree(new->cl_principal);
+out_no_principal:
 	rpc_free_iostats(new->cl_metrics);
 out_no_stats:
 	kfree(new);
@@ -417,6 +432,7 @@ rpc_free_client(struct kref *kref)
 out_free:
 	rpc_unregister_client(clnt);
 	rpc_free_iostats(clnt->cl_metrics);
+	kfree(clnt->cl_principal);
 	clnt->cl_metrics = NULL;
 	xprt_put(clnt->cl_xprt);
 	rpciod_down();
-- 
cgit v1.2.3


From 61054b14d545e257b9415d5ca0cd5f43762b4d0c Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <aglo@citi.umich.edu>
Date: Tue, 23 Dec 2008 16:19:00 -0500
Subject: nfsd: support callbacks with gss flavors

This patch adds server-side support for callbacks other than AUTH_SYS.

Signed-off-by: Olga Kornievskaia <aglo@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfsd/nfs4callback.c     | 3 ++-
 fs/nfsd/nfs4state.c        | 1 +
 include/linux/nfsd/state.h | 1 +
 net/sunrpc/rpc_pipe.c      | 5 +++++
 4 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 3ca141782145..6d7d8c02c197 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -358,6 +358,7 @@ static struct rpc_program cb_program = {
 		.nrvers		= ARRAY_SIZE(nfs_cb_version),
 		.version	= nfs_cb_version,
 		.stats		= &cb_stats,
+		.pipe_dir_name  = "/nfsd4_cb",
 };
 
 /* Reference counting, callback cleanup, etc., all look racy as heck.
@@ -382,7 +383,7 @@ static int do_probe_callback(void *data)
 		.program	= &cb_program,
 		.prognumber	= cb->cb_prog,
 		.version	= nfs_cb_version[1]->number,
-		.authflavor	= RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
+		.authflavor	= clp->cl_flavor,
 		.flags		= (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
 		.client_name    = clp->cl_principal,
 	};
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f3b9a8d064f3..07db31568ac9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -786,6 +786,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	}
 	copy_verf(new, &clverifier);
 	new->cl_addr = sin->sin_addr.s_addr;
+	new->cl_flavor = rqstp->rq_flavor;
 	princ = svc_gss_principal(rqstp);
 	if (princ) {
 		new->cl_principal = kstrdup(princ, GFP_KERNEL);
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index ce7cbf4b7c93..128298c0362d 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -124,6 +124,7 @@ struct nfs4_client {
 	nfs4_verifier		cl_verifier; 	/* generated by client */
 	time_t                  cl_time;        /* time of last lease renewal */
 	__be32			cl_addr; 	/* client ipaddress */
+	u32			cl_flavor;	/* setclientid pseudoflavor */
 	char			*cl_principal;	/* setclientid principal name */
 	struct svc_cred		cl_cred; 	/* setclientid principal */
 	clientid_t		cl_clientid;	/* generated by server */
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 3105efbb182d..192453248870 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -407,6 +407,7 @@ enum {
 	RPCAUTH_nfs,
 	RPCAUTH_portmap,
 	RPCAUTH_statd,
+	RPCAUTH_nfsd4_cb,
 	RPCAUTH_RootEOF
 };
 
@@ -440,6 +441,10 @@ static struct rpc_filelist files[] = {
 		.name = "statd",
 		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
 	},
+	[RPCAUTH_nfsd4_cb] = {
+		.name = "nfsd4_cb",
+		.mode = S_IFDIR | S_IRUGO | S_IXUGO,
+	},
 };
 
 enum {
-- 
cgit v1.2.3


From f9af0e70911e9d6cc9a68f784dca86415486084d Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Fri, 26 Dec 2008 12:24:24 +0900
Subject: irq: for_each_irq_desc() move to irqnr.h

Impact: cleanup

before CONFIG_SPARSE_IRQ age, for_each_irq_desc() sat in irqnr.h and
could be called from generic code.

CONFIG_SPARSE_IRQ breaks this assumption, but SPARSE_IRQ version
for_each_irq_desc() also can move into irqnr.h easily.

Also, this patch unifies CONFIG_SPARSE_IRQ and !CONFIG_SPARSE_IRQ
for_each_irq_desc().

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h   | 24 ++++--------------------
 include/linux/irqnr.h | 19 +++++++++----------
 kernel/irq/handle.c   | 13 +++++++++++--
 3 files changed, 24 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 98564dc64476..69da275c0ebd 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -202,33 +202,17 @@ extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc
 
 #ifndef CONFIG_SPARSE_IRQ
 extern struct irq_desc irq_desc[NR_IRQS];
-
-static inline struct irq_desc *irq_to_desc(unsigned int irq)
-{
-	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
-}
-static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
-{
-	return irq_to_desc(irq);
-}
-
-#else
-
-extern struct irq_desc *irq_to_desc(unsigned int irq);
-extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
+#else /* CONFIG_SPARSE_IRQ */
 extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
 
-# define for_each_irq_desc(irq, desc)		\
-	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; irq++, desc = irq_to_desc(irq))
-# define for_each_irq_desc_reverse(irq, desc)                          \
-	for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; irq--, desc = irq_to_desc(irq))
-
 #define kstat_irqs_this_cpu(DESC) \
 	((DESC)->kstat_irqs[smp_processor_id()])
 #define kstat_incr_irqs_this_cpu(irqno, DESC) \
 	((DESC)->kstat_irqs[smp_processor_id()]++)
 
-#endif
+#endif /* CONFIG_SPARSE_IRQ */
+
+extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
 
 static inline struct irq_desc *
 irq_remap_to_desc(unsigned int irq, struct irq_desc *desc)
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 95d2b74641f5..c4a59c7a478b 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -15,20 +15,19 @@
 
 # define for_each_irq_desc_reverse(irq, desc)                          \
 	for (irq = nr_irqs - 1; irq >= 0; irq--)
-#else
+#else /* CONFIG_GENERIC_HARDIRQS */
 
 extern int nr_irqs;
+extern struct irq_desc *irq_to_desc(unsigned int irq);
 
-#ifndef CONFIG_SPARSE_IRQ
+# define for_each_irq_desc(irq, desc)					\
+	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs;		\
+	     irq++, desc = irq_to_desc(irq))
+# define for_each_irq_desc_reverse(irq, desc)				\
+	for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0;	\
+	     irq--, desc = irq_to_desc(irq))
 
-struct irq_desc;
-# define for_each_irq_desc(irq, desc)		\
-	for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++)
-# define for_each_irq_desc_reverse(irq, desc)                          \
-	for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1);        \
-	    irq >= 0; irq--, desc--)
-#endif
-#endif
+#endif /* CONFIG_GENERIC_HARDIRQS */
 
 #define for_each_irq_nr(irq)                   \
        for (irq = 0; irq < nr_irqs; irq++)
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 6492400cb50d..4db7d2df86b6 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -203,7 +203,7 @@ out_unlock:
 	return desc;
 }
 
-#else
+#else /* !CONFIG_SPARSE_IRQ */
 
 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	[0 ... NR_IRQS-1] = {
@@ -218,7 +218,16 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
 	}
 };
 
-#endif
+struct irq_desc *irq_to_desc(unsigned int irq)
+{
+	return (irq < NR_IRQS) ? irq_desc + irq : NULL;
+}
+
+struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+{
+	return irq_to_desc(irq);
+}
+#endif /* !CONFIG_SPARSE_IRQ */
 
 /*
  * What should we do if we get a hw irq event on an illegal vector?
-- 
cgit v1.2.3


From 18eefedfe8ad33e8fc7614c13359e29a9fab4644 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Fri, 26 Dec 2008 12:29:48 +0900
Subject: irq: simplify for_each_irq_desc() usage

Impact: cleanup

all for_each_irq_desc() usage point have !desc check.
then its check can move into for_each_irq_desc() macro.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 10 ----------
 drivers/xen/events.c      |  3 ---
 include/linux/irqnr.h     |  8 ++++++--
 kernel/irq/autoprobe.c    | 15 ---------------
 kernel/irq/handle.c       |  3 ---
 kernel/irq/spurious.c     |  5 -----
 6 files changed, 6 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index a74887b416cc..2fe543f58ac8 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1345,8 +1345,6 @@ void __setup_vector_irq(int cpu)
 
 	/* Mark the inuse vectors */
 	for_each_irq_desc(irq, desc) {
-		if (!desc)
-			continue;
 		cfg = desc->chip_data;
 		if (!cpu_isset(cpu, cfg->domain))
 			continue;
@@ -1730,8 +1728,6 @@ __apicdebuginit(void) print_IO_APIC(void)
 	for_each_irq_desc(irq, desc) {
 		struct irq_pin_list *entry;
 
-		if (!desc)
-			continue;
 		cfg = desc->chip_data;
 		entry = cfg->irq_2_pin;
 		if (!entry)
@@ -2378,9 +2374,6 @@ static void ir_irq_migration(struct work_struct *work)
 	struct irq_desc *desc;
 
 	for_each_irq_desc(irq, desc) {
-		if (!desc)
-			continue;
-
 		if (desc->status & IRQ_MOVE_PENDING) {
 			unsigned long flags;
 
@@ -2671,9 +2664,6 @@ static inline void init_IO_APIC_traps(void)
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
 	for_each_irq_desc(irq, desc) {
-		if (!desc)
-			continue;
-
 		cfg = desc->chip_data;
 		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
 			/*
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 46625cd38743..e26733a9df21 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -142,9 +142,6 @@ static void init_evtchn_cpu_bindings(void)
 
 	/* By default all event channels notify CPU#0. */
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		desc->affinity = cpumask_of_cpu(0);
 	}
 #endif
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index c4a59c7a478b..5504a5c97836 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -22,10 +22,14 @@ extern struct irq_desc *irq_to_desc(unsigned int irq);
 
 # define for_each_irq_desc(irq, desc)					\
 	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs;		\
-	     irq++, desc = irq_to_desc(irq))
+	     irq++, desc = irq_to_desc(irq))				\
+		if (desc)
+
+
 # define for_each_irq_desc_reverse(irq, desc)				\
 	for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0;	\
-	     irq--, desc = irq_to_desc(irq))
+	     irq--, desc = irq_to_desc(irq))				\
+		if (desc)
 
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 650ce4102a63..cc0f7321b8ce 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -40,9 +40,6 @@ unsigned long probe_irq_on(void)
 	 * flush such a longstanding irq before considering it as spurious.
 	 */
 	for_each_irq_desc_reverse(i, desc) {
-		if (!desc)
-			continue;
-
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			/*
@@ -71,9 +68,6 @@ unsigned long probe_irq_on(void)
 	 * happened in the previous stage, it may have masked itself)
 	 */
 	for_each_irq_desc_reverse(i, desc) {
-		if (!desc)
-			continue;
-
 		spin_lock_irq(&desc->lock);
 		if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
 			desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
@@ -92,9 +86,6 @@ unsigned long probe_irq_on(void)
 	 * Now filter out any obviously spurious interrupts
 	 */
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -133,9 +124,6 @@ unsigned int probe_irq_mask(unsigned long val)
 	int i;
 
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
@@ -178,9 +166,6 @@ int probe_irq_off(unsigned long val)
 	unsigned int status;
 
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		spin_lock_irq(&desc->lock);
 		status = desc->status;
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 03479dfdebb8..7dbdfe524693 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -437,9 +437,6 @@ void early_init_irq_lock_class(void)
 	int i;
 
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 	}
 }
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 3738107531fd..dd364c11e56e 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -91,9 +91,6 @@ static int misrouted_irq(int irq)
 	int i, ok = 0;
 
 	for_each_irq_desc(i, desc) {
-		if (!desc)
-			continue;
-
 		if (!i)
 			 continue;
 
@@ -115,8 +112,6 @@ static void poll_spurious_irqs(unsigned long dummy)
 	for_each_irq_desc(i, desc) {
 		unsigned int status;
 
-		if (!desc)
-			continue;
 		if (!i)
 			 continue;
 
-- 
cgit v1.2.3


From 13a0c3c269b223f60abfac8a9811d77111a8b4ba Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 26 Dec 2008 02:05:47 -0800
Subject: sparseirq: work around compiler optimizing away __weak functions

Impact: fix panic on null pointer with sparseirq

Some GCC versions seem to inline the weak global function,
when that function is empty.

Work it around, by making the functions return a (dummy) integer.

Signed-off-by: Yinghai <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 8 ++++++--
 include/linux/irq.h       | 6 +++---
 init/main.c               | 7 ++++---
 kernel/irq/handle.c       | 7 ++++---
 4 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 2fe543f58ac8..976039377846 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -170,7 +170,7 @@ static struct irq_cfg irq_cfgx[NR_IRQS] = {
 	[15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
 };
 
-void __init arch_early_irq_init(void)
+int __init arch_early_irq_init(void)
 {
 	struct irq_cfg *cfg;
 	struct irq_desc *desc;
@@ -184,6 +184,8 @@ void __init arch_early_irq_init(void)
 		desc = irq_to_desc(i);
 		desc->chip_data = &cfg[i];
 	}
+
+	return 0;
 }
 
 #ifdef CONFIG_SPARSE_IRQ
@@ -212,7 +214,7 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
 	return cfg;
 }
 
-void arch_init_chip_data(struct irq_desc *desc, int cpu)
+int arch_init_chip_data(struct irq_desc *desc, int cpu)
 {
 	struct irq_cfg *cfg;
 
@@ -224,6 +226,8 @@ void arch_init_chip_data(struct irq_desc *desc, int cpu)
 			BUG_ON(1);
 		}
 	}
+
+	return 0;
 }
 
 #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 69da275c0ebd..0e40af4bac40 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -193,9 +193,9 @@ struct irq_desc {
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
-extern void early_irq_init(void);
-extern void arch_early_irq_init(void);
-extern void arch_init_chip_data(struct irq_desc *desc, int cpu);
+extern int early_irq_init(void);
+extern int arch_early_irq_init(void);
+extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
 extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
 					struct irq_desc *desc, int cpu);
 extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
diff --git a/init/main.c b/init/main.c
index c1f999a3cf31..c314aa15370e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -539,13 +539,14 @@ void __init __weak thread_info_cache_init(void)
 {
 }
 
-void __init __weak arch_early_irq_init(void)
+int __init __weak arch_early_irq_init(void)
 {
+	return 0;
 }
 
-void __init __weak early_irq_init(void)
+int __init __weak early_irq_init(void)
 {
-	arch_early_irq_init();
+	return arch_early_irq_init();
 }
 
 asmlinkage void __init start_kernel(void)
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 893da67b7781..0bef3ecb7a0e 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -86,8 +86,9 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
 		desc->kstat_irqs = (unsigned int *)ptr;
 }
 
-void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu)
+int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
 {
+	return 0;
 }
 
 static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
@@ -132,7 +133,7 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm
 /* FIXME: use bootmem alloc ...*/
 static unsigned int kstat_irqs_legacy[NR_IRQS_LEGACY][NR_CPUS];
 
-void __init early_irq_init(void)
+int __init early_irq_init(void)
 {
 	struct irq_desc *desc;
 	int legacy_count;
@@ -151,7 +152,7 @@ void __init early_irq_init(void)
 	for (i = legacy_count; i < NR_IRQS; i++)
 		irq_desc_ptrs[i] = NULL;
 
-	arch_early_irq_init();
+	return arch_early_irq_init();
 }
 
 struct irq_desc *irq_to_desc(unsigned int irq)
-- 
cgit v1.2.3


From 70a7d3cc1308a55104fbe505d76f2aca8a4cf53e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 22 Dec 2008 10:26:05 -0800
Subject: swiotlb: add hwdev to swiotlb_phys_to_bus() / swiotlb_sg_to_bus()

Impact: extend functions with a (yet unused) parameter, update callsites

Some architectures need it - in preparation for highmem swiotlb.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-swiotlb_64.c |  2 +-
 include/linux/swiotlb.h          |  3 ++-
 lib/swiotlb.c                    | 53 +++++++++++++++++-----------------------
 3 files changed, 25 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 242c3440687f..6cf8a816dc29 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -23,7 +23,7 @@ void *swiotlb_alloc(unsigned order, unsigned long nslabs)
 	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
 }
 
-dma_addr_t swiotlb_phys_to_bus(phys_addr_t paddr)
+dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
 {
 	return paddr;
 }
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 325af1de0351..dedd3c0cfe30 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -27,7 +27,8 @@ swiotlb_init(void);
 extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs);
 extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
 
-extern dma_addr_t swiotlb_phys_to_bus(phys_addr_t address);
+extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev,
+				      phys_addr_t address);
 extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address);
 
 extern int swiotlb_arch_range_needs_mapping(void *ptr, size_t size);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index fa2dc4e5f9ba..3657da8ebbc3 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -126,7 +126,7 @@ void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs)
 	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
 }
 
-dma_addr_t __weak swiotlb_phys_to_bus(phys_addr_t paddr)
+dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
 {
 	return paddr;
 }
@@ -136,9 +136,10 @@ phys_addr_t __weak swiotlb_bus_to_phys(dma_addr_t baddr)
 	return baddr;
 }
 
-static dma_addr_t swiotlb_virt_to_bus(volatile void *address)
+static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
+				      volatile void *address)
 {
-	return swiotlb_phys_to_bus(virt_to_phys(address));
+	return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
 }
 
 static void *swiotlb_bus_to_virt(dma_addr_t address)
@@ -151,35 +152,23 @@ int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size)
 	return 0;
 }
 
-static dma_addr_t swiotlb_sg_to_bus(struct scatterlist *sg)
+static dma_addr_t swiotlb_sg_to_bus(struct device *hwdev, struct scatterlist *sg)
 {
-	return swiotlb_phys_to_bus(page_to_phys(sg_page(sg)) + sg->offset);
+	return swiotlb_phys_to_bus(hwdev, page_to_phys(sg_page(sg)) + sg->offset);
 }
 
 static void swiotlb_print_info(unsigned long bytes)
 {
 	phys_addr_t pstart, pend;
-	dma_addr_t bstart, bend;
 
 	pstart = virt_to_phys(io_tlb_start);
 	pend = virt_to_phys(io_tlb_end);
 
-	bstart = swiotlb_phys_to_bus(pstart);
-	bend = swiotlb_phys_to_bus(pend);
-
 	printk(KERN_INFO "Placing %luMB software IO TLB between %p - %p\n",
 	       bytes >> 20, io_tlb_start, io_tlb_end);
-	if (pstart != bstart || pend != bend)
-		printk(KERN_INFO "software IO TLB at phys %#llx - %#llx"
-		       " bus %#llx - %#llx\n",
-		       (unsigned long long)pstart,
-		       (unsigned long long)pend,
-		       (unsigned long long)bstart,
-		       (unsigned long long)bend);
-	else
-		printk(KERN_INFO "software IO TLB at phys %#llx - %#llx\n",
-		       (unsigned long long)pstart,
-		       (unsigned long long)pend);
+	printk(KERN_INFO "software IO TLB at phys %#llx - %#llx\n",
+	       (unsigned long long)pstart,
+	       (unsigned long long)pend);
 }
 
 /*
@@ -406,7 +395,7 @@ map_single(struct device *hwdev, struct swiotlb_phys_addr buffer, size_t size, i
 	struct swiotlb_phys_addr slot_buf;
 
 	mask = dma_get_seg_boundary(hwdev);
-	start_dma_addr = swiotlb_virt_to_bus(io_tlb_start) & mask;
+	start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start) & mask;
 
 	offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
 
@@ -585,7 +574,9 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		dma_mask = hwdev->coherent_dma_mask;
 
 	ret = (void *)__get_free_pages(flags, order);
-	if (ret && !is_buffer_dma_capable(dma_mask, swiotlb_virt_to_bus(ret), size)) {
+	if (ret &&
+	    !is_buffer_dma_capable(dma_mask, swiotlb_virt_to_bus(hwdev, ret),
+				   size)) {
 		/*
 		 * The allocated memory isn't reachable by the device.
 		 * Fall back on swiotlb_map_single().
@@ -609,7 +600,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	}
 
 	memset(ret, 0, size);
-	dev_addr = swiotlb_virt_to_bus(ret);
+	dev_addr = swiotlb_virt_to_bus(hwdev, ret);
 
 	/* Confirm address can be DMA'd by device */
 	if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) {
@@ -669,7 +660,7 @@ dma_addr_t
 swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
 			 int dir, struct dma_attrs *attrs)
 {
-	dma_addr_t dev_addr = swiotlb_virt_to_bus(ptr);
+	dma_addr_t dev_addr = swiotlb_virt_to_bus(hwdev, ptr);
 	void *map;
 	struct swiotlb_phys_addr buffer;
 
@@ -694,7 +685,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
 		map = io_tlb_overflow_buffer;
 	}
 
-	dev_addr = swiotlb_virt_to_bus(map);
+	dev_addr = swiotlb_virt_to_bus(hwdev, map);
 
 	/*
 	 * Ensure that the address returned is DMA'ble
@@ -840,7 +831,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 	BUG_ON(dir == DMA_NONE);
 
 	for_each_sg(sgl, sg, nelems, i) {
-		dev_addr = swiotlb_sg_to_bus(sg);
+		dev_addr = swiotlb_sg_to_bus(hwdev, sg);
 		if (range_needs_mapping(sg_virt(sg), sg->length) ||
 		    address_needs_mapping(hwdev, dev_addr, sg->length)) {
 			void *map;
@@ -856,7 +847,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 				sgl[0].dma_length = 0;
 				return 0;
 			}
-			sg->dma_address = swiotlb_virt_to_bus(map);
+			sg->dma_address = swiotlb_virt_to_bus(hwdev, map);
 		} else
 			sg->dma_address = dev_addr;
 		sg->dma_length = sg->length;
@@ -886,7 +877,7 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 	BUG_ON(dir == DMA_NONE);
 
 	for_each_sg(sgl, sg, nelems, i) {
-		if (sg->dma_address != swiotlb_sg_to_bus(sg))
+		if (sg->dma_address != swiotlb_sg_to_bus(hwdev, sg))
 			unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
 				     sg->dma_length, dir);
 		else if (dir == DMA_FROM_DEVICE)
@@ -919,7 +910,7 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
 	BUG_ON(dir == DMA_NONE);
 
 	for_each_sg(sgl, sg, nelems, i) {
-		if (sg->dma_address != swiotlb_sg_to_bus(sg))
+		if (sg->dma_address != swiotlb_sg_to_bus(hwdev, sg))
 			sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
 				    sg->dma_length, dir, target);
 		else if (dir == DMA_FROM_DEVICE)
@@ -944,7 +935,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 int
 swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 {
-	return (dma_addr == swiotlb_virt_to_bus(io_tlb_overflow_buffer));
+	return (dma_addr == swiotlb_virt_to_bus(hwdev, io_tlb_overflow_buffer));
 }
 
 /*
@@ -956,7 +947,7 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 int
 swiotlb_dma_supported(struct device *hwdev, u64 mask)
 {
-	return swiotlb_virt_to_bus(io_tlb_end - 1) <= mask;
+	return swiotlb_virt_to_bus(hwdev, io_tlb_end - 1) <= mask;
 }
 
 EXPORT_SYMBOL(swiotlb_map_single);
-- 
cgit v1.2.3


From 1eca4365be25c540650693e941bc06a66cf38f94 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 3 Nov 2008 20:03:17 +0900
Subject: libata: beef up iterators

There currently are the following looping constructs.

* __ata_port_for_each_link() for all available links
* ata_port_for_each_link() for edge links
* ata_link_for_each_dev() for all devices
* ata_link_for_each_dev_reverse() for all devices in reverse order

Now there's a need for looping construct which is similar to
__ata_port_for_each_link() but iterates over PMP links before the host
link.  Instead of adding another one with long name, do the following
cleanup.

* Implement and export ata_link_next() and ata_dev_next() which take
  @mode parameter and can be used to build custom loop.
* Implement ata_for_each_link() and ata_for_each_dev() which take
  looping mode explicitly.

The following iteration modes are implemented.

* ATA_LITER_EDGE		: loop over edge links
* ATA_LITER_HOST_FIRST		: loop over all links, host link first
* ATA_LITER_PMP_FIRST		: loop over all links, PMP links first

* ATA_DITER_ENABLED		: loop over enabled devices
* ATA_DITER_ENABLED_REVERSE	: loop over enabled devices in reverse order
* ATA_DITER_ALL			: loop over all devices
* ATA_DITER_ALL_REVERSE		: loop over all devices in reverse order

This change removes exlicit device enabledness checks from many loops
and makes it clear which ones are iterated over in which direction.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ahci.c           |   8 +-
 drivers/ata/ata_generic.c    |   5 +-
 drivers/ata/libata-acpi.c    |  19 ++---
 drivers/ata/libata-core.c    | 183 ++++++++++++++++++++++++++++---------------
 drivers/ata/libata-eh.c      |  84 +++++++++-----------
 drivers/ata/libata-pmp.c     |  22 +++---
 drivers/ata/libata-scsi.c    |  22 +++---
 drivers/ata/pata_it821x.c    |  34 ++++----
 drivers/ata/pata_ixp4xx_cf.c |  14 ++--
 drivers/ata/pata_legacy.c    |  17 ++--
 drivers/ata/pata_pdc2027x.c  |  31 ++++----
 drivers/ata/pata_platform.c  |  14 ++--
 drivers/ata/pata_rz1000.c    |  16 ++--
 drivers/ata/sata_sil.c       |   2 +-
 include/linux/libata.h       |  76 +++++++++++++-----
 15 files changed, 307 insertions(+), 240 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index a67b8e7c712d..656448c7fef9 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1119,14 +1119,14 @@ static void ahci_start_port(struct ata_port *ap)
 
 	/* turn on LEDs */
 	if (ap->flags & ATA_FLAG_EM) {
-		ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, EDGE) {
 			emp = &pp->em_priv[link->pmp];
 			ahci_transmit_led_message(ap, emp->led_state, 4);
 		}
 	}
 
 	if (ap->flags & ATA_FLAG_SW_ACTIVITY)
-		ata_port_for_each_link(link, ap)
+		ata_for_each_link(link, ap, EDGE)
 			ahci_init_sw_activity(link);
 
 }
@@ -1361,7 +1361,7 @@ static ssize_t ahci_led_show(struct ata_port *ap, char *buf)
 	struct ahci_em_priv *emp;
 	int rc = 0;
 
-	ata_port_for_each_link(link, ap) {
+	ata_for_each_link(link, ap, EDGE) {
 		emp = &pp->em_priv[link->pmp];
 		rc += sprintf(buf, "%lx\n", emp->led_state);
 	}
@@ -1941,7 +1941,7 @@ static void ahci_error_intr(struct ata_port *ap, u32 irq_stat)
 	u32 serror;
 
 	/* determine active link */
-	ata_port_for_each_link(link, ap)
+	ata_for_each_link(link, ap, EDGE)
 		if (ata_link_active(link))
 			break;
 	if (!link)
diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c
index 5c33767e66de..dc48a6398abe 100644
--- a/drivers/ata/ata_generic.c
+++ b/drivers/ata/ata_generic.c
@@ -57,10 +57,7 @@ static int generic_set_mode(struct ata_link *link, struct ata_device **unused)
 	if (pdev->vendor == PCI_VENDOR_ID_CENATEK)
 		dma_enabled = 0xFF;
 
-	ata_link_for_each_dev(dev, link) {
-		if (!ata_dev_enabled(dev))
-			continue;
-
+	ata_for_each_dev(dev, link, ENABLED) {
 		/* We don't really care */
 		dev->pio_mode = XFER_PIO_0;
 		dev->dma_mode = XFER_MW_DMA_0;
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index c012307d0ba6..ef02e488d468 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -89,7 +89,7 @@ void ata_acpi_associate_sata_port(struct ata_port *ap)
 
 		ap->link.device->acpi_handle = NULL;
 
-		ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, EDGE) {
 			acpi_integer adr = SATA_ADR(ap->port_no, link->pmp);
 
 			link->device->acpi_handle =
@@ -129,8 +129,8 @@ static void ata_acpi_detach_device(struct ata_port *ap, struct ata_device *dev)
 		struct ata_link *tlink;
 		struct ata_device *tdev;
 
-		ata_port_for_each_link(tlink, ap)
-			ata_link_for_each_dev(tdev, tlink)
+		ata_for_each_link(tlink, ap, EDGE)
+			ata_for_each_dev(tdev, tlink, ALL)
 				tdev->flags |= ATA_DFLAG_DETACH;
 	}
 
@@ -588,12 +588,9 @@ int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm)
 {
 	struct ata_device *dev;
 
-	ata_link_for_each_dev(dev, &ap->link) {
+	ata_for_each_dev(dev, &ap->link, ENABLED) {
 		unsigned long xfer_mask, udma_mask;
 
-		if (!ata_dev_enabled(dev))
-			continue;
-
 		xfer_mask = ata_acpi_gtm_xfermask(dev, gtm);
 		ata_unpack_xfermask(xfer_mask, NULL, NULL, &udma_mask);
 
@@ -893,7 +890,7 @@ void ata_acpi_on_resume(struct ata_port *ap)
 		 * use values set by _STM.  Cache _GTF result and
 		 * schedule _GTF.
 		 */
-		ata_link_for_each_dev(dev, &ap->link) {
+		ata_for_each_dev(dev, &ap->link, ALL) {
 			ata_acpi_clear_gtf(dev);
 			if (ata_dev_enabled(dev) &&
 			    ata_dev_get_GTF(dev, NULL) >= 0)
@@ -904,7 +901,7 @@ void ata_acpi_on_resume(struct ata_port *ap)
 		 * there's no reason to evaluate IDE _GTF early
 		 * without _STM.  Clear cache and schedule _GTF.
 		 */
-		ata_link_for_each_dev(dev, &ap->link) {
+		ata_for_each_dev(dev, &ap->link, ALL) {
 			ata_acpi_clear_gtf(dev);
 			if (ata_dev_enabled(dev))
 				dev->flags |= ATA_DFLAG_ACPI_PENDING;
@@ -932,8 +929,8 @@ void ata_acpi_set_state(struct ata_port *ap, pm_message_t state)
 	if (state.event == PM_EVENT_ON)
 		acpi_bus_set_power(ap->acpi_handle, ACPI_STATE_D0);
 
-	ata_link_for_each_dev(dev, &ap->link) {
-		if (dev->acpi_handle && ata_dev_enabled(dev))
+	ata_for_each_dev(dev, &ap->link, ENABLED) {
+		if (dev->acpi_handle)
 			acpi_bus_set_power(dev->acpi_handle,
 				state.event == PM_EVENT_ON ?
 					ACPI_STATE_D0 : ACPI_STATE_D3);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index bc6695e3c848..ffd98e4e65b4 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -163,42 +163,118 @@ MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 
 
-/*
- * Iterator helpers.  Don't use directly.
+/**
+ *	ata_link_next - link iteration helper
+ *	@link: the previous link, NULL to start
+ *	@ap: ATA port containing links to iterate
+ *	@mode: iteration mode, one of ATA_LITER_*
+ *
+ *	LOCKING:
+ *	Host lock or EH context.
  *
- * LOCKING:
- * Host lock or EH context.
+ *	RETURNS:
+ *	Pointer to the next link.
  */
-struct ata_link *__ata_port_next_link(struct ata_port *ap,
-				      struct ata_link *link, bool dev_only)
+struct ata_link *ata_link_next(struct ata_link *link, struct ata_port *ap,
+			       enum ata_link_iter_mode mode)
 {
+	BUG_ON(mode != ATA_LITER_EDGE &&
+	       mode != ATA_LITER_PMP_FIRST && mode != ATA_LITER_HOST_FIRST);
+
 	/* NULL link indicates start of iteration */
-	if (!link) {
-		if (dev_only && sata_pmp_attached(ap))
-			return ap->pmp_link;
-		return &ap->link;
-	}
+	if (!link)
+		switch (mode) {
+		case ATA_LITER_EDGE:
+		case ATA_LITER_PMP_FIRST:
+			if (sata_pmp_attached(ap))
+				return ap->pmp_link;
+			/* fall through */
+		case ATA_LITER_HOST_FIRST:
+			return &ap->link;
+		}
 
-	/* we just iterated over the host master link, what's next? */
-	if (link == &ap->link) {
-		if (!sata_pmp_attached(ap)) {
-			if (unlikely(ap->slave_link) && !dev_only)
+	/* we just iterated over the host link, what's next? */
+	if (link == &ap->link)
+		switch (mode) {
+		case ATA_LITER_HOST_FIRST:
+			if (sata_pmp_attached(ap))
+				return ap->pmp_link;
+			/* fall through */
+		case ATA_LITER_PMP_FIRST:
+			if (unlikely(ap->slave_link))
 				return ap->slave_link;
+			/* fall through */
+		case ATA_LITER_EDGE:
 			return NULL;
 		}
-		return ap->pmp_link;
-	}
 
 	/* slave_link excludes PMP */
 	if (unlikely(link == ap->slave_link))
 		return NULL;
 
-	/* iterate to the next PMP link */
+	/* we were over a PMP link */
 	if (++link < ap->pmp_link + ap->nr_pmp_links)
 		return link;
+
+	if (mode == ATA_LITER_PMP_FIRST)
+		return &ap->link;
+
 	return NULL;
 }
 
+/**
+ *	ata_dev_next - device iteration helper
+ *	@dev: the previous device, NULL to start
+ *	@link: ATA link containing devices to iterate
+ *	@mode: iteration mode, one of ATA_DITER_*
+ *
+ *	LOCKING:
+ *	Host lock or EH context.
+ *
+ *	RETURNS:
+ *	Pointer to the next device.
+ */
+struct ata_device *ata_dev_next(struct ata_device *dev, struct ata_link *link,
+				enum ata_dev_iter_mode mode)
+{
+	BUG_ON(mode != ATA_DITER_ENABLED && mode != ATA_DITER_ENABLED_REVERSE &&
+	       mode != ATA_DITER_ALL && mode != ATA_DITER_ALL_REVERSE);
+
+	/* NULL dev indicates start of iteration */
+	if (!dev)
+		switch (mode) {
+		case ATA_DITER_ENABLED:
+		case ATA_DITER_ALL:
+			dev = link->device;
+			goto check;
+		case ATA_DITER_ENABLED_REVERSE:
+		case ATA_DITER_ALL_REVERSE:
+			dev = link->device + ata_link_max_devices(link) - 1;
+			goto check;
+		}
+
+ next:
+	/* move to the next one */
+	switch (mode) {
+	case ATA_DITER_ENABLED:
+	case ATA_DITER_ALL:
+		if (++dev < link->device + ata_link_max_devices(link))
+			goto check;
+		return NULL;
+	case ATA_DITER_ENABLED_REVERSE:
+	case ATA_DITER_ALL_REVERSE:
+		if (--dev >= link->device)
+			goto check;
+		return NULL;
+	}
+
+ check:
+	if ((mode == ATA_DITER_ENABLED || mode == ATA_DITER_ENABLED_REVERSE) &&
+	    !ata_dev_enabled(dev))
+		goto next;
+	return dev;
+}
+
 /**
  *	ata_dev_phys_link - find physical link for a device
  *	@dev: ATA device to look up physical link for
@@ -1107,8 +1183,8 @@ static void ata_lpm_enable(struct ata_host *host)
 
 	for (i = 0; i < host->n_ports; i++) {
 		ap = host->ports[i];
-		ata_port_for_each_link(link, ap) {
-			ata_link_for_each_dev(dev, link)
+		ata_for_each_link(link, ap, EDGE) {
+			ata_for_each_dev(dev, link, ALL)
 				ata_dev_disable_pm(dev);
 		}
 	}
@@ -2594,11 +2670,11 @@ int ata_bus_probe(struct ata_port *ap)
 
 	ata_port_probe(ap);
 
-	ata_link_for_each_dev(dev, &ap->link)
+	ata_for_each_dev(dev, &ap->link, ALL)
 		tries[dev->devno] = ATA_PROBE_MAX_TRIES;
 
  retry:
-	ata_link_for_each_dev(dev, &ap->link) {
+	ata_for_each_dev(dev, &ap->link, ALL) {
 		/* If we issue an SRST then an ATA drive (not ATAPI)
 		 * may change configuration and be in PIO0 timing. If
 		 * we do a hard reset (or are coming from power on)
@@ -2620,7 +2696,7 @@ int ata_bus_probe(struct ata_port *ap)
 	/* reset and determine device classes */
 	ap->ops->phy_reset(ap);
 
-	ata_link_for_each_dev(dev, &ap->link) {
+	ata_for_each_dev(dev, &ap->link, ALL) {
 		if (!(ap->flags & ATA_FLAG_DISABLED) &&
 		    dev->class != ATA_DEV_UNKNOWN)
 			classes[dev->devno] = dev->class;
@@ -2636,7 +2712,7 @@ int ata_bus_probe(struct ata_port *ap)
 	   specific sequence bass-ackwards so that PDIAG- is released by
 	   the slave device */
 
-	ata_link_for_each_dev_reverse(dev, &ap->link) {
+	ata_for_each_dev(dev, &ap->link, ALL_REVERSE) {
 		if (tries[dev->devno])
 			dev->class = classes[dev->devno];
 
@@ -2653,24 +2729,19 @@ int ata_bus_probe(struct ata_port *ap)
 	if (ap->ops->cable_detect)
 		ap->cbl = ap->ops->cable_detect(ap);
 
-	/* We may have SATA bridge glue hiding here irrespective of the
-	   reported cable types and sensed types */
-	ata_link_for_each_dev(dev, &ap->link) {
-		if (!ata_dev_enabled(dev))
-			continue;
-		/* SATA drives indicate we have a bridge. We don't know which
-		   end of the link the bridge is which is a problem */
+	/* We may have SATA bridge glue hiding here irrespective of
+	 * the reported cable types and sensed types.  When SATA
+	 * drives indicate we have a bridge, we don't know which end
+	 * of the link the bridge is which is a problem.
+	 */
+	ata_for_each_dev(dev, &ap->link, ENABLED)
 		if (ata_id_is_sata(dev->id))
 			ap->cbl = ATA_CBL_SATA;
-	}
 
 	/* After the identify sequence we can now set up the devices. We do
 	   this in the normal order so that the user doesn't get confused */
 
-	ata_link_for_each_dev(dev, &ap->link) {
-		if (!ata_dev_enabled(dev))
-			continue;
-
+	ata_for_each_dev(dev, &ap->link, ENABLED) {
 		ap->link.eh_context.i.flags |= ATA_EHI_PRINTINFO;
 		rc = ata_dev_configure(dev);
 		ap->link.eh_context.i.flags &= ~ATA_EHI_PRINTINFO;
@@ -2683,9 +2754,8 @@ int ata_bus_probe(struct ata_port *ap)
 	if (rc)
 		goto fail;
 
-	ata_link_for_each_dev(dev, &ap->link)
-		if (ata_dev_enabled(dev))
-			return 0;
+	ata_for_each_dev(dev, &ap->link, ENABLED)
+		return 0;
 
 	/* no device present, disable port */
 	ata_port_disable(ap);
@@ -3331,13 +3401,10 @@ int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
 	int rc = 0, used_dma = 0, found = 0;
 
 	/* step 1: calculate xfer_mask */
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ENABLED) {
 		unsigned long pio_mask, dma_mask;
 		unsigned int mode_mask;
 
-		if (!ata_dev_enabled(dev))
-			continue;
-
 		mode_mask = ATA_DMA_MASK_ATA;
 		if (dev->class == ATA_DEV_ATAPI)
 			mode_mask = ATA_DMA_MASK_ATAPI;
@@ -3366,10 +3433,7 @@ int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
 		goto out;
 
 	/* step 2: always set host PIO timings */
-	ata_link_for_each_dev(dev, link) {
-		if (!ata_dev_enabled(dev))
-			continue;
-
+	ata_for_each_dev(dev, link, ENABLED) {
 		if (dev->pio_mode == 0xff) {
 			ata_dev_printk(dev, KERN_WARNING, "no PIO support\n");
 			rc = -EINVAL;
@@ -3383,8 +3447,8 @@ int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
 	}
 
 	/* step 3: set host DMA timings */
-	ata_link_for_each_dev(dev, link) {
-		if (!ata_dev_enabled(dev) || !ata_dma_enabled(dev))
+	ata_for_each_dev(dev, link, ENABLED) {
+		if (!ata_dma_enabled(dev))
 			continue;
 
 		dev->xfer_mode = dev->dma_mode;
@@ -3394,11 +3458,7 @@ int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
 	}
 
 	/* step 4: update devices' xfer mode */
-	ata_link_for_each_dev(dev, link) {
-		/* don't update suspended devices' xfer mode */
-		if (!ata_dev_enabled(dev))
-			continue;
-
+	ata_for_each_dev(dev, link, ENABLED) {
 		rc = ata_dev_set_mode(dev);
 		if (rc)
 			goto out;
@@ -4263,9 +4323,9 @@ static int cable_is_40wire(struct ata_port *ap)
 	 * - if you have a non detect capable drive you don't want it
 	 *   to colour the choice
 	 */
-	ata_port_for_each_link(link, ap) {
-		ata_link_for_each_dev(dev, link) {
-			if (ata_dev_enabled(dev) && !ata_is_40wire(dev))
+	ata_for_each_link(link, ap, EDGE) {
+		ata_for_each_dev(dev, link, ENABLED) {
+			if (!ata_is_40wire(dev))
 				return 0;
 		}
 	}
@@ -5218,7 +5278,7 @@ static int ata_host_request_pm(struct ata_host *host, pm_message_t mesg,
 		}
 
 		ap->pflags |= ATA_PFLAG_PM_PENDING;
-		__ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, HOST_FIRST) {
 			link->eh_info.action |= action;
 			link->eh_info.flags |= ehi_flags;
 		}
@@ -6063,9 +6123,9 @@ static void ata_port_detach(struct ata_port *ap)
 	/* EH is now guaranteed to see UNLOADING - EH context belongs
 	 * to us.  Restore SControl and disable all existing devices.
 	 */
-	__ata_port_for_each_link(link, ap) {
+	ata_for_each_link(link, ap, HOST_FIRST) {
 		sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0);
-		ata_link_for_each_dev(dev, link)
+		ata_for_each_dev(dev, link, ALL)
 			ata_dev_disable(dev);
 	}
 
@@ -6528,7 +6588,8 @@ EXPORT_SYMBOL_GPL(ata_base_port_ops);
 EXPORT_SYMBOL_GPL(sata_port_ops);
 EXPORT_SYMBOL_GPL(ata_dummy_port_ops);
 EXPORT_SYMBOL_GPL(ata_dummy_port_info);
-EXPORT_SYMBOL_GPL(__ata_port_next_link);
+EXPORT_SYMBOL_GPL(ata_link_next);
+EXPORT_SYMBOL_GPL(ata_dev_next);
 EXPORT_SYMBOL_GPL(ata_std_bios_param);
 EXPORT_SYMBOL_GPL(ata_host_init);
 EXPORT_SYMBOL_GPL(ata_host_alloc);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 32da9a93ce44..d673f3712bdc 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -422,7 +422,7 @@ static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev,
 
 	if (!dev) {
 		ehi->action &= ~action;
-		ata_link_for_each_dev(tdev, link)
+		ata_for_each_dev(tdev, link, ALL)
 			ehi->dev_action[tdev->devno] &= ~action;
 	} else {
 		/* doesn't make sense for port-wide EH actions */
@@ -430,7 +430,7 @@ static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev,
 
 		/* break ehi->action into ehi->dev_action */
 		if (ehi->action & action) {
-			ata_link_for_each_dev(tdev, link)
+			ata_for_each_dev(tdev, link, ALL)
 				ehi->dev_action[tdev->devno] |=
 					ehi->action & action;
 			ehi->action &= ~action;
@@ -592,7 +592,7 @@ void ata_scsi_error(struct Scsi_Host *host)
 		/* fetch & clear EH info */
 		spin_lock_irqsave(ap->lock, flags);
 
-		__ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, HOST_FIRST) {
 			struct ata_eh_context *ehc = &link->eh_context;
 			struct ata_device *dev;
 
@@ -600,12 +600,9 @@ void ata_scsi_error(struct Scsi_Host *host)
 			link->eh_context.i = link->eh_info;
 			memset(&link->eh_info, 0, sizeof(link->eh_info));
 
-			ata_link_for_each_dev(dev, link) {
+			ata_for_each_dev(dev, link, ENABLED) {
 				int devno = dev->devno;
 
-				if (!ata_dev_enabled(dev))
-					continue;
-
 				ehc->saved_xfer_mode[devno] = dev->xfer_mode;
 				if (ata_ncq_enabled(dev))
 					ehc->saved_ncq_enabled |= 1 << devno;
@@ -644,7 +641,7 @@ void ata_scsi_error(struct Scsi_Host *host)
 		}
 
 		/* this run is complete, make sure EH info is clear */
-		__ata_port_for_each_link(link, ap)
+		ata_for_each_link(link, ap, HOST_FIRST)
 			memset(&link->eh_info, 0, sizeof(link->eh_info));
 
 		/* Clear host_eh_scheduled while holding ap->lock such
@@ -1025,7 +1022,7 @@ int sata_async_notification(struct ata_port *ap)
 		struct ata_link *link;
 
 		/* check and notify ATAPI AN */
-		ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, EDGE) {
 			if (!(sntf & (1 << link->pmp)))
 				continue;
 
@@ -2005,7 +2002,7 @@ void ata_eh_autopsy(struct ata_port *ap)
 {
 	struct ata_link *link;
 
-	ata_port_for_each_link(link, ap)
+	ata_for_each_link(link, ap, EDGE)
 		ata_eh_link_autopsy(link);
 
 	/* Handle the frigging slave link.  Autopsy is done similarly
@@ -2219,7 +2216,7 @@ void ata_eh_report(struct ata_port *ap)
 {
 	struct ata_link *link;
 
-	__ata_port_for_each_link(link, ap)
+	ata_for_each_link(link, ap, HOST_FIRST)
 		ata_eh_link_report(link);
 }
 
@@ -2230,7 +2227,7 @@ static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset,
 	struct ata_device *dev;
 
 	if (clear_classes)
-		ata_link_for_each_dev(dev, link)
+		ata_for_each_dev(dev, link, ALL)
 			classes[dev->devno] = ATA_DEV_UNKNOWN;
 
 	return reset(link, classes, deadline);
@@ -2294,7 +2291,7 @@ int ata_eh_reset(struct ata_link *link, int classify,
 
 	ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
 
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ALL) {
 		/* If we issue an SRST then an ATA drive (not ATAPI)
 		 * may change configuration and be in PIO0 timing. If
 		 * we do a hard reset (or are coming from power on)
@@ -2355,7 +2352,7 @@ int ata_eh_reset(struct ata_link *link, int classify,
 						"port disabled. ignoring.\n");
 				ehc->i.action &= ~ATA_EH_RESET;
 
-				ata_link_for_each_dev(dev, link)
+				ata_for_each_dev(dev, link, ALL)
 					classes[dev->devno] = ATA_DEV_NONE;
 
 				rc = 0;
@@ -2369,7 +2366,7 @@ int ata_eh_reset(struct ata_link *link, int classify,
 		 * bang classes and return.
 		 */
 		if (reset && !(ehc->i.action & ATA_EH_RESET)) {
-			ata_link_for_each_dev(dev, link)
+			ata_for_each_dev(dev, link, ALL)
 				classes[dev->devno] = ATA_DEV_NONE;
 			rc = 0;
 			goto out;
@@ -2454,7 +2451,7 @@ int ata_eh_reset(struct ata_link *link, int classify,
 	/*
 	 * Post-reset processing
 	 */
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ALL) {
 		/* After the reset, the device state is PIO 0 and the
 		 * controller state is undefined.  Reset also wakes up
 		 * drives from sleeping mode.
@@ -2510,7 +2507,7 @@ int ata_eh_reset(struct ata_link *link, int classify,
 	 * can be reliably detected and retried.
 	 */
 	nr_unknown = 0;
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ALL) {
 		/* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */
 		if (classes[dev->devno] == ATA_DEV_UNKNOWN) {
 			classes[dev->devno] = ATA_DEV_NONE;
@@ -2619,8 +2616,8 @@ static inline void ata_eh_pull_park_action(struct ata_port *ap)
 
 	spin_lock_irqsave(ap->lock, flags);
 	INIT_COMPLETION(ap->park_req_pending);
-	ata_port_for_each_link(link, ap) {
-		ata_link_for_each_dev(dev, link) {
+	ata_for_each_link(link, ap, EDGE) {
+		ata_for_each_dev(dev, link, ALL) {
 			struct ata_eh_info *ehi = &link->eh_info;
 
 			link->eh_context.i.dev_action[dev->devno] |=
@@ -2675,7 +2672,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
 	 * be done backwards such that PDIAG- is released by the slave
 	 * device before the master device is identified.
 	 */
-	ata_link_for_each_dev_reverse(dev, link) {
+	ata_for_each_dev(dev, link, ALL_REVERSE) {
 		unsigned int action = ata_eh_dev_action(dev);
 		unsigned int readid_flags = 0;
 
@@ -2744,7 +2741,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
 	/* Configure new devices forward such that user doesn't see
 	 * device detection messages backwards.
 	 */
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ALL) {
 		if (!(new_mask & (1 << dev->devno)) ||
 		    dev->class == ATA_DEV_PMP)
 			continue;
@@ -2793,10 +2790,7 @@ int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
 	int rc;
 
 	/* if data transfer is verified, clear DUBIOUS_XFER on ering top */
-	ata_link_for_each_dev(dev, link) {
-		if (!ata_dev_enabled(dev))
-			continue;
-
+	ata_for_each_dev(dev, link, ENABLED) {
 		if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) {
 			struct ata_ering_entry *ent;
 
@@ -2813,14 +2807,11 @@ int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
 		rc = ata_do_set_mode(link, r_failed_dev);
 
 	/* if transfer mode has changed, set DUBIOUS_XFER on device */
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ENABLED) {
 		struct ata_eh_context *ehc = &link->eh_context;
 		u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno];
 		u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno));
 
-		if (!ata_dev_enabled(dev))
-			continue;
-
 		if (dev->xfer_mode != saved_xfer_mode ||
 		    ata_ncq_enabled(dev) != saved_ncq)
 			dev->flags |= ATA_DFLAG_DUBIOUS_XFER;
@@ -2881,9 +2872,8 @@ static int ata_link_nr_enabled(struct ata_link *link)
 	struct ata_device *dev;
 	int cnt = 0;
 
-	ata_link_for_each_dev(dev, link)
-		if (ata_dev_enabled(dev))
-			cnt++;
+	ata_for_each_dev(dev, link, ENABLED)
+		cnt++;
 	return cnt;
 }
 
@@ -2892,7 +2882,7 @@ static int ata_link_nr_vacant(struct ata_link *link)
 	struct ata_device *dev;
 	int cnt = 0;
 
-	ata_link_for_each_dev(dev, link)
+	ata_for_each_dev(dev, link, ALL)
 		if (dev->class == ATA_DEV_UNKNOWN)
 			cnt++;
 	return cnt;
@@ -2918,7 +2908,7 @@ static int ata_eh_skip_recovery(struct ata_link *link)
 		return 0;
 
 	/* skip if class codes for all vacant slots are ATA_DEV_NONE */
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ALL) {
 		if (dev->class == ATA_DEV_UNKNOWN &&
 		    ehc->classes[dev->devno] != ATA_DEV_NONE)
 			return 0;
@@ -3026,7 +3016,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 	DPRINTK("ENTER\n");
 
 	/* prep for recovery */
-	ata_port_for_each_link(link, ap) {
+	ata_for_each_link(link, ap, EDGE) {
 		struct ata_eh_context *ehc = &link->eh_context;
 
 		/* re-enable link? */
@@ -3038,7 +3028,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 			ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK);
 		}
 
-		ata_link_for_each_dev(dev, link) {
+		ata_for_each_dev(dev, link, ALL) {
 			if (link->flags & ATA_LFLAG_NO_RETRY)
 				ehc->tries[dev->devno] = 1;
 			else
@@ -3068,19 +3058,19 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 		goto out;
 
 	/* prep for EH */
-	ata_port_for_each_link(link, ap) {
+	ata_for_each_link(link, ap, EDGE) {
 		struct ata_eh_context *ehc = &link->eh_context;
 
 		/* skip EH if possible. */
 		if (ata_eh_skip_recovery(link))
 			ehc->i.action = 0;
 
-		ata_link_for_each_dev(dev, link)
+		ata_for_each_dev(dev, link, ALL)
 			ehc->classes[dev->devno] = ATA_DEV_UNKNOWN;
 	}
 
 	/* reset */
-	ata_port_for_each_link(link, ap) {
+	ata_for_each_link(link, ap, EDGE) {
 		struct ata_eh_context *ehc = &link->eh_context;
 
 		if (!(ehc->i.action & ATA_EH_RESET))
@@ -3105,8 +3095,8 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 		ata_eh_pull_park_action(ap);
 
 		deadline = jiffies;
-		ata_port_for_each_link(link, ap) {
-			ata_link_for_each_dev(dev, link) {
+		ata_for_each_link(link, ap, EDGE) {
+			ata_for_each_dev(dev, link, ALL) {
 				struct ata_eh_context *ehc = &link->eh_context;
 				unsigned long tmp;
 
@@ -3134,8 +3124,8 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 		deadline = wait_for_completion_timeout(&ap->park_req_pending,
 						       deadline - now);
 	} while (deadline);
-	ata_port_for_each_link(link, ap) {
-		ata_link_for_each_dev(dev, link) {
+	ata_for_each_link(link, ap, EDGE) {
+		ata_for_each_dev(dev, link, ALL) {
 			if (!(link->eh_context.unloaded_mask &
 			      (1 << dev->devno)))
 				continue;
@@ -3146,7 +3136,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 	}
 
 	/* the rest */
-	ata_port_for_each_link(link, ap) {
+	ata_for_each_link(link, ap, EDGE) {
 		struct ata_eh_context *ehc = &link->eh_context;
 
 		/* revalidate existing devices and attach new ones */
@@ -3172,7 +3162,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 		 * disrupting the current users of the device.
 		 */
 		if (ehc->i.flags & ATA_EHI_DID_RESET) {
-			ata_link_for_each_dev(dev, link) {
+			ata_for_each_dev(dev, link, ALL) {
 				if (dev->class != ATA_DEV_ATAPI)
 					continue;
 				rc = atapi_eh_clear_ua(dev);
@@ -3183,7 +3173,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 
 		/* configure link power saving */
 		if (ehc->i.action & ATA_EH_LPM)
-			ata_link_for_each_dev(dev, link)
+			ata_for_each_dev(dev, link, ALL)
 				ata_dev_enable_pm(dev, ap->pm_policy);
 
 		/* this link is okay now */
@@ -3288,7 +3278,7 @@ void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
 	rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset,
 			    NULL);
 	if (rc) {
-		ata_link_for_each_dev(dev, &ap->link)
+		ata_for_each_dev(dev, &ap->link, ALL)
 			ata_dev_disable(dev);
 	}
 
diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c
index b65db309c181..98ca07a2db87 100644
--- a/drivers/ata/libata-pmp.c
+++ b/drivers/ata/libata-pmp.c
@@ -321,7 +321,7 @@ static void sata_pmp_quirks(struct ata_port *ap)
 
 	if (vendor == 0x1095 && devid == 0x3726) {
 		/* sil3726 quirks */
-		ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, EDGE) {
 			/* Class code report is unreliable and SRST
 			 * times out under certain configurations.
 			 */
@@ -336,7 +336,7 @@ static void sata_pmp_quirks(struct ata_port *ap)
 		}
 	} else if (vendor == 0x1095 && devid == 0x4723) {
 		/* sil4723 quirks */
-		ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, EDGE) {
 			/* class code report is unreliable */
 			if (link->pmp < 2)
 				link->flags |= ATA_LFLAG_ASSUME_ATA;
@@ -348,7 +348,7 @@ static void sata_pmp_quirks(struct ata_port *ap)
 		}
 	} else if (vendor == 0x1095 && devid == 0x4726) {
 		/* sil4726 quirks */
-		ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, EDGE) {
 			/* Class code report is unreliable and SRST
 			 * times out under certain configurations.
 			 * Config device can be at port 0 or 5 and
@@ -450,7 +450,7 @@ int sata_pmp_attach(struct ata_device *dev)
 	if (ap->ops->pmp_attach)
 		ap->ops->pmp_attach(ap);
 
-	ata_port_for_each_link(tlink, ap)
+	ata_for_each_link(tlink, ap, EDGE)
 		sata_link_init_spd(tlink);
 
 	ata_acpi_associate_sata_port(ap);
@@ -487,7 +487,7 @@ static void sata_pmp_detach(struct ata_device *dev)
 	if (ap->ops->pmp_detach)
 		ap->ops->pmp_detach(ap);
 
-	ata_port_for_each_link(tlink, ap)
+	ata_for_each_link(tlink, ap, EDGE)
 		ata_eh_detach_dev(tlink->device);
 
 	spin_lock_irqsave(ap->lock, flags);
@@ -700,7 +700,7 @@ static int sata_pmp_eh_recover_pmp(struct ata_port *ap,
 		}
 
 		/* PMP is reset, SErrors cannot be trusted, scan all */
-		ata_port_for_each_link(tlink, ap) {
+		ata_for_each_link(tlink, ap, EDGE) {
 			struct ata_eh_context *ehc = &tlink->eh_context;
 
 			ehc->i.probe_mask |= ATA_ALL_DEVICES;
@@ -768,7 +768,7 @@ static int sata_pmp_eh_handle_disabled_links(struct ata_port *ap)
 
 	spin_lock_irqsave(ap->lock, flags);
 
-	ata_port_for_each_link(link, ap) {
+	ata_for_each_link(link, ap, EDGE) {
 		if (!(link->flags & ATA_LFLAG_DISABLED))
 			continue;
 
@@ -852,7 +852,7 @@ static int sata_pmp_eh_recover(struct ata_port *ap)
 	int cnt, rc;
 
 	pmp_tries = ATA_EH_PMP_TRIES;
-	ata_port_for_each_link(link, ap)
+	ata_for_each_link(link, ap, EDGE)
 		link_tries[link->pmp] = ATA_EH_PMP_LINK_TRIES;
 
  retry:
@@ -861,7 +861,7 @@ static int sata_pmp_eh_recover(struct ata_port *ap)
 		rc = ata_eh_recover(ap, ops->prereset, ops->softreset,
 				    ops->hardreset, ops->postreset, NULL);
 		if (rc) {
-			ata_link_for_each_dev(dev, &ap->link)
+			ata_for_each_dev(dev, &ap->link, ALL)
 				ata_dev_disable(dev);
 			return rc;
 		}
@@ -870,7 +870,7 @@ static int sata_pmp_eh_recover(struct ata_port *ap)
 			return 0;
 
 		/* new PMP online */
-		ata_port_for_each_link(link, ap)
+		ata_for_each_link(link, ap, EDGE)
 			link_tries[link->pmp] = ATA_EH_PMP_LINK_TRIES;
 
 		/* fall through */
@@ -942,7 +942,7 @@ static int sata_pmp_eh_recover(struct ata_port *ap)
 	}
 
 	cnt = 0;
-	ata_port_for_each_link(link, ap) {
+	ata_for_each_link(link, ap, EDGE) {
 		if (!(gscr_error & (1 << link->pmp)))
 			continue;
 
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 47c7afcb36f2..0b2e14f67655 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3229,12 +3229,12 @@ void ata_scsi_scan_host(struct ata_port *ap, int sync)
 		return;
 
  repeat:
-	ata_port_for_each_link(link, ap) {
-		ata_link_for_each_dev(dev, link) {
+	ata_for_each_link(link, ap, EDGE) {
+		ata_for_each_dev(dev, link, ENABLED) {
 			struct scsi_device *sdev;
 			int channel = 0, id = 0;
 
-			if (!ata_dev_enabled(dev) || dev->sdev)
+			if (dev->sdev)
 				continue;
 
 			if (ata_is_host_link(link))
@@ -3255,9 +3255,9 @@ void ata_scsi_scan_host(struct ata_port *ap, int sync)
 	 * failure occurred, scan would have failed silently.  Check
 	 * whether all devices are attached.
 	 */
-	ata_port_for_each_link(link, ap) {
-		ata_link_for_each_dev(dev, link) {
-			if (ata_dev_enabled(dev) && !dev->sdev)
+	ata_for_each_link(link, ap, EDGE) {
+		ata_for_each_dev(dev, link, ENABLED) {
+			if (!dev->sdev)
 				goto exit_loop;
 		}
 	}
@@ -3381,7 +3381,7 @@ static void ata_scsi_handle_link_detach(struct ata_link *link)
 	struct ata_port *ap = link->ap;
 	struct ata_device *dev;
 
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ALL) {
 		unsigned long flags;
 
 		if (!(dev->flags & ATA_DFLAG_DETACHED))
@@ -3496,7 +3496,7 @@ static int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
 	if (devno == SCAN_WILD_CARD) {
 		struct ata_link *link;
 
-		ata_port_for_each_link(link, ap) {
+		ata_for_each_link(link, ap, EDGE) {
 			struct ata_eh_info *ehi = &link->eh_info;
 			ehi->probe_mask |= ATA_ALL_DEVICES;
 			ehi->action |= ATA_EH_RESET;
@@ -3544,11 +3544,11 @@ void ata_scsi_dev_rescan(struct work_struct *work)
 
 	spin_lock_irqsave(ap->lock, flags);
 
-	ata_port_for_each_link(link, ap) {
-		ata_link_for_each_dev(dev, link) {
+	ata_for_each_link(link, ap, EDGE) {
+		ata_for_each_dev(dev, link, ENABLED) {
 			struct scsi_device *sdev = dev->sdev;
 
-			if (!ata_dev_enabled(dev) || !sdev)
+			if (!sdev)
 				continue;
 			if (scsi_device_get(sdev))
 				continue;
diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c
index 860ede526282..f828a29d7756 100644
--- a/drivers/ata/pata_it821x.c
+++ b/drivers/ata/pata_it821x.c
@@ -465,24 +465,22 @@ static int it821x_smart_set_mode(struct ata_link *link, struct ata_device **unus
 {
 	struct ata_device *dev;
 
-	ata_link_for_each_dev(dev, link) {
-		if (ata_dev_enabled(dev)) {
-			/* We don't really care */
-			dev->pio_mode = XFER_PIO_0;
-			dev->dma_mode = XFER_MW_DMA_0;
-			/* We do need the right mode information for DMA or PIO
-			   and this comes from the current configuration flags */
-			if (ata_id_has_dma(dev->id)) {
-				ata_dev_printk(dev, KERN_INFO, "configured for DMA\n");
-				dev->xfer_mode = XFER_MW_DMA_0;
-				dev->xfer_shift = ATA_SHIFT_MWDMA;
-				dev->flags &= ~ATA_DFLAG_PIO;
-			} else {
-				ata_dev_printk(dev, KERN_INFO, "configured for PIO\n");
-				dev->xfer_mode = XFER_PIO_0;
-				dev->xfer_shift = ATA_SHIFT_PIO;
-				dev->flags |= ATA_DFLAG_PIO;
-			}
+	ata_for_each_dev(dev, link, ENABLED) {
+		/* We don't really care */
+		dev->pio_mode = XFER_PIO_0;
+		dev->dma_mode = XFER_MW_DMA_0;
+		/* We do need the right mode information for DMA or PIO
+		   and this comes from the current configuration flags */
+		if (ata_id_has_dma(dev->id)) {
+			ata_dev_printk(dev, KERN_INFO, "configured for DMA\n");
+			dev->xfer_mode = XFER_MW_DMA_0;
+			dev->xfer_shift = ATA_SHIFT_MWDMA;
+			dev->flags &= ~ATA_DFLAG_PIO;
+		} else {
+			ata_dev_printk(dev, KERN_INFO, "configured for PIO\n");
+			dev->xfer_mode = XFER_PIO_0;
+			dev->xfer_shift = ATA_SHIFT_PIO;
+			dev->flags |= ATA_DFLAG_PIO;
 		}
 	}
 	return 0;
diff --git a/drivers/ata/pata_ixp4xx_cf.c b/drivers/ata/pata_ixp4xx_cf.c
index 2014253f6c88..b173c157ab00 100644
--- a/drivers/ata/pata_ixp4xx_cf.c
+++ b/drivers/ata/pata_ixp4xx_cf.c
@@ -30,14 +30,12 @@ static int ixp4xx_set_mode(struct ata_link *link, struct ata_device **error)
 {
 	struct ata_device *dev;
 
-	ata_link_for_each_dev(dev, link) {
-		if (ata_dev_enabled(dev)) {
-			ata_dev_printk(dev, KERN_INFO, "configured for PIO0\n");
-			dev->pio_mode = XFER_PIO_0;
-			dev->xfer_mode = XFER_PIO_0;
-			dev->xfer_shift = ATA_SHIFT_PIO;
-			dev->flags |= ATA_DFLAG_PIO;
-		}
+	ata_for_each_dev(dev, link, ENABLED) {
+		ata_dev_printk(dev, KERN_INFO, "configured for PIO0\n");
+		dev->pio_mode = XFER_PIO_0;
+		dev->xfer_mode = XFER_PIO_0;
+		dev->xfer_shift = ATA_SHIFT_PIO;
+		dev->flags |= ATA_DFLAG_PIO;
 	}
 	return 0;
 }
diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c
index 930c2208640b..cbd98c16eea4 100644
--- a/drivers/ata/pata_legacy.c
+++ b/drivers/ata/pata_legacy.c
@@ -194,15 +194,12 @@ static int legacy_set_mode(struct ata_link *link, struct ata_device **unused)
 {
 	struct ata_device *dev;
 
-	ata_link_for_each_dev(dev, link) {
-		if (ata_dev_enabled(dev)) {
-			ata_dev_printk(dev, KERN_INFO,
-						"configured for PIO\n");
-			dev->pio_mode = XFER_PIO_0;
-			dev->xfer_mode = XFER_PIO_0;
-			dev->xfer_shift = ATA_SHIFT_PIO;
-			dev->flags |= ATA_DFLAG_PIO;
-		}
+	ata_for_each_dev(dev, link, ENABLED) {
+		ata_dev_printk(dev, KERN_INFO, "configured for PIO\n");
+		dev->pio_mode = XFER_PIO_0;
+		dev->xfer_mode = XFER_PIO_0;
+		dev->xfer_shift = ATA_SHIFT_PIO;
+		dev->flags |= ATA_DFLAG_PIO;
 	}
 	return 0;
 }
@@ -1028,7 +1025,7 @@ static __init int legacy_init_one(struct legacy_probe *probe)
 	/* Nothing found means we drop the port as its probably not there */
 
 	ret = -ENODEV;
-	ata_link_for_each_dev(dev, &ap->link) {
+	ata_for_each_dev(dev, &ap->link, ALL) {
 		if (!ata_dev_absent(dev)) {
 			legacy_host[probe->slot] = host;
 			ld->platform_dev = pdev;
diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c
index 0e1c2c1134d3..d43e8be96b88 100644
--- a/drivers/ata/pata_pdc2027x.c
+++ b/drivers/ata/pata_pdc2027x.c
@@ -406,23 +406,20 @@ static int pdc2027x_set_mode(struct ata_link *link, struct ata_device **r_failed
 	if (rc < 0)
 		return rc;
 
-	ata_link_for_each_dev(dev, link) {
-		if (ata_dev_enabled(dev)) {
-
-			pdc2027x_set_piomode(ap, dev);
-
-			/*
-			 * Enable prefetch if the device support PIO only.
-			 */
-			if (dev->xfer_shift == ATA_SHIFT_PIO) {
-				u32 ctcr1 = ioread32(dev_mmio(ap, dev, PDC_CTCR1));
-				ctcr1 |= (1 << 25);
-				iowrite32(ctcr1, dev_mmio(ap, dev, PDC_CTCR1));
-
-				PDPRINTK("Turn on prefetch\n");
-			} else {
-				pdc2027x_set_dmamode(ap, dev);
-			}
+	ata_for_each_dev(dev, link, ENABLED) {
+		pdc2027x_set_piomode(ap, dev);
+
+		/*
+		 * Enable prefetch if the device support PIO only.
+		 */
+		if (dev->xfer_shift == ATA_SHIFT_PIO) {
+			u32 ctcr1 = ioread32(dev_mmio(ap, dev, PDC_CTCR1));
+			ctcr1 |= (1 << 25);
+			iowrite32(ctcr1, dev_mmio(ap, dev, PDC_CTCR1));
+
+			PDPRINTK("Turn on prefetch\n");
+		} else {
+			pdc2027x_set_dmamode(ap, dev);
 		}
 	}
 	return 0;
diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c
index 77e4e3b17f54..6afa07a37648 100644
--- a/drivers/ata/pata_platform.c
+++ b/drivers/ata/pata_platform.c
@@ -34,14 +34,12 @@ static int pata_platform_set_mode(struct ata_link *link, struct ata_device **unu
 {
 	struct ata_device *dev;
 
-	ata_link_for_each_dev(dev, link) {
-		if (ata_dev_enabled(dev)) {
-			/* We don't really care */
-			dev->pio_mode = dev->xfer_mode = XFER_PIO_0;
-			dev->xfer_shift = ATA_SHIFT_PIO;
-			dev->flags |= ATA_DFLAG_PIO;
-			ata_dev_printk(dev, KERN_INFO, "configured for PIO\n");
-		}
+	ata_for_each_dev(dev, link, ENABLED) {
+		/* We don't really care */
+		dev->pio_mode = dev->xfer_mode = XFER_PIO_0;
+		dev->xfer_shift = ATA_SHIFT_PIO;
+		dev->flags |= ATA_DFLAG_PIO;
+		ata_dev_printk(dev, KERN_INFO, "configured for PIO\n");
 	}
 	return 0;
 }
diff --git a/drivers/ata/pata_rz1000.c b/drivers/ata/pata_rz1000.c
index 7dfd1f3f6f3a..46d6bc1bf1e9 100644
--- a/drivers/ata/pata_rz1000.c
+++ b/drivers/ata/pata_rz1000.c
@@ -38,15 +38,13 @@ static int rz1000_set_mode(struct ata_link *link, struct ata_device **unused)
 {
 	struct ata_device *dev;
 
-	ata_link_for_each_dev(dev, link) {
-		if (ata_dev_enabled(dev)) {
-			/* We don't really care */
-			dev->pio_mode = XFER_PIO_0;
-			dev->xfer_mode = XFER_PIO_0;
-			dev->xfer_shift = ATA_SHIFT_PIO;
-			dev->flags |= ATA_DFLAG_PIO;
-			ata_dev_printk(dev, KERN_INFO, "configured for PIO\n");
-		}
+	ata_for_each_dev(dev, link, ENABLED) {
+		/* We don't really care */
+		dev->pio_mode = XFER_PIO_0;
+		dev->xfer_mode = XFER_PIO_0;
+		dev->xfer_shift = ATA_SHIFT_PIO;
+		dev->flags |= ATA_DFLAG_PIO;
+		ata_dev_printk(dev, KERN_INFO, "configured for PIO\n");
 	}
 	return 0;
 }
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index 031d7b7dee34..177370921774 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -278,7 +278,7 @@ static int sil_set_mode(struct ata_link *link, struct ata_device **r_failed)
 	if (rc)
 		return rc;
 
-	ata_link_for_each_dev(dev, link) {
+	ata_for_each_dev(dev, link, ALL) {
 		if (!ata_dev_enabled(dev))
 			dev_mode[dev->devno] = 0;	/* PIO0/1/2 */
 		else if (dev->flags & ATA_DFLAG_PIO)
diff --git a/include/linux/libata.h b/include/linux/libata.h
index ed3f26eb5df1..3b2a0c6444ee 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1285,26 +1285,62 @@ static inline int ata_link_active(struct ata_link *link)
 	return ata_tag_valid(link->active_tag) || link->sactive;
 }
 
-extern struct ata_link *__ata_port_next_link(struct ata_port *ap,
-					     struct ata_link *link,
-					     bool dev_only);
-
-#define __ata_port_for_each_link(link, ap) \
-	for ((link) = __ata_port_next_link((ap), NULL, false); (link); \
-	     (link) = __ata_port_next_link((ap), (link), false))
-
-#define ata_port_for_each_link(link, ap) \
-	for ((link) = __ata_port_next_link((ap), NULL, true); (link); \
-	     (link) = __ata_port_next_link((ap), (link), true))
-
-#define ata_link_for_each_dev(dev, link) \
-	for ((dev) = (link)->device; \
-	     (dev) < (link)->device + ata_link_max_devices(link) || ((dev) = NULL); \
-	     (dev)++)
-
-#define ata_link_for_each_dev_reverse(dev, link) \
-	for ((dev) = (link)->device + ata_link_max_devices(link) - 1; \
-	     (dev) >= (link)->device || ((dev) = NULL); (dev)--)
+/*
+ * Iterators
+ *
+ * ATA_LITER_* constants are used to select link iteration mode and
+ * ATA_DITER_* device iteration mode.
+ *
+ * For a custom iteration directly using ata_{link|dev}_next(), if
+ * @link or @dev, respectively, is NULL, the first element is
+ * returned.  @dev and @link can be any valid device or link and the
+ * next element according to the iteration mode will be returned.
+ * After the last element, NULL is returned.
+ */
+enum ata_link_iter_mode {
+	ATA_LITER_EDGE,		/* if present, PMP links only; otherwise,
+				 * host link.  no slave link */
+	ATA_LITER_HOST_FIRST,	/* host link followed by PMP or slave links */
+	ATA_LITER_PMP_FIRST,	/* PMP links followed by host link,
+				 * slave link still comes after host link */
+};
+
+enum ata_dev_iter_mode {
+	ATA_DITER_ENABLED,
+	ATA_DITER_ENABLED_REVERSE,
+	ATA_DITER_ALL,
+	ATA_DITER_ALL_REVERSE,
+};
+
+extern struct ata_link *ata_link_next(struct ata_link *link,
+				      struct ata_port *ap,
+				      enum ata_link_iter_mode mode);
+
+extern struct ata_device *ata_dev_next(struct ata_device *dev,
+				       struct ata_link *link,
+				       enum ata_dev_iter_mode mode);
+
+/*
+ * Shortcut notation for iterations
+ *
+ * ata_for_each_link() iterates over each link of @ap according to
+ * @mode.  @link points to the current link in the loop.  @link is
+ * NULL after loop termination.  ata_for_each_dev() works the same way
+ * except that it iterates over each device of @link.
+ *
+ * Note that the mode prefixes ATA_{L|D}ITER_ shouldn't need to be
+ * specified when using the following shorthand notations.  Only the
+ * mode itself (EDGE, HOST_FIRST, ENABLED, etc...) should be
+ * specified.  This not only increases brevity but also makes it
+ * impossible to use ATA_LITER_* for device iteration or vice-versa.
+ */
+#define ata_for_each_link(link, ap, mode) \
+	for ((link) = ata_link_next(NULL, (ap), ATA_LITER_##mode); (link); \
+	     (link) = ata_link_next((link), (ap), ATA_LITER_##mode))
+
+#define ata_for_each_dev(dev, link, mode) \
+	for ((dev) = ata_dev_next(NULL, (link), ATA_DITER_##mode); (dev); \
+	     (dev) = ata_dev_next((dev), (link), ATA_DITER_##mode))
 
 /**
  *	ata_ncq_enabled - Test whether NCQ is enabled
-- 
cgit v1.2.3


From ece180d1cfe5fa751eaa85bf796cf28b2150af15 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 3 Nov 2008 20:04:37 +0900
Subject: libata: perform port detach in EH

ata_port_detach() first made sure EH saw ATA_PFLAG_UNLOADING and then
assumed EH context belongs to it and performed detach operation
itself.  However, UNLOADING doesn't disable all of EH and this could
lead to problems including triggering WARN_ON()'s in EH path.

This patch makes port detach behave more like other EH actions such
that ata_port_detach() requests EH to detach and waits for completion.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 23 ++++-------------------
 drivers/ata/libata-eh.c   | 32 +++++++++++++++++++++++++++++++-
 include/linux/libata.h    |  3 ++-
 3 files changed, 37 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 1ecc3cb0b722..837fb60a6dcc 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -6107,8 +6107,6 @@ int ata_host_activate(struct ata_host *host, int irq,
 static void ata_port_detach(struct ata_port *ap)
 {
 	unsigned long flags;
-	struct ata_link *link;
-	struct ata_device *dev;
 
 	if (!ap->ops->error_handler)
 		goto skip_eh;
@@ -6116,28 +6114,15 @@ static void ata_port_detach(struct ata_port *ap)
 	/* tell EH we're leaving & flush EH */
 	spin_lock_irqsave(ap->lock, flags);
 	ap->pflags |= ATA_PFLAG_UNLOADING;
+	ata_port_schedule_eh(ap);
 	spin_unlock_irqrestore(ap->lock, flags);
 
+	/* wait till EH commits suicide */
 	ata_port_wait_eh(ap);
 
-	/* EH is now guaranteed to see UNLOADING - EH context belongs
-	 * to us.  Restore SControl and disable all existing devices.
-	 */
-	ata_for_each_link(link, ap, PMP_FIRST) {
-		sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0);
-		ata_for_each_dev(dev, link, ALL)
-			ata_dev_disable(dev);
-	}
-
-	/* Final freeze & EH.  All in-flight commands are aborted.  EH
-	 * will be skipped and retrials will be terminated with bad
-	 * target.
-	 */
-	spin_lock_irqsave(ap->lock, flags);
-	ata_port_freeze(ap);	/* won't be thawed */
-	spin_unlock_irqrestore(ap->lock, flags);
+	/* it better be dead now */
+	WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED));
 
-	ata_port_wait_eh(ap);
 	cancel_rearming_delayed_work(&ap->hotplug_task);
 
  skip_eh:
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index d673f3712bdc..8147a8386370 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -491,6 +491,31 @@ enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
 	return ret;
 }
 
+static void ata_eh_unload(struct ata_port *ap)
+{
+	struct ata_link *link;
+	struct ata_device *dev;
+	unsigned long flags;
+
+	/* Restore SControl IPM and SPD for the next driver and
+	 * disable attached devices.
+	 */
+	ata_for_each_link(link, ap, PMP_FIRST) {
+		sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0);
+		ata_for_each_dev(dev, link, ALL)
+			ata_dev_disable(dev);
+	}
+
+	/* freeze and set UNLOADED */
+	spin_lock_irqsave(ap->lock, flags);
+
+	ata_port_freeze(ap);			/* won't be thawed */
+	ap->pflags &= ~ATA_PFLAG_EH_PENDING;	/* clear pending from freeze */
+	ap->pflags |= ATA_PFLAG_UNLOADED;
+
+	spin_unlock_irqrestore(ap->lock, flags);
+}
+
 /**
  *	ata_scsi_error - SCSI layer error handler callback
  *	@host: SCSI host on which error occurred
@@ -618,8 +643,13 @@ void ata_scsi_error(struct Scsi_Host *host)
 		/* invoke EH, skip if unloading or suspended */
 		if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
 			ap->ops->error_handler(ap);
-		else
+		else {
+			/* if unloading, commence suicide */
+			if ((ap->pflags & ATA_PFLAG_UNLOADING) &&
+			    !(ap->pflags & ATA_PFLAG_UNLOADED))
+				ata_eh_unload(ap);
 			ata_eh_finish(ap);
+		}
 
 		/* process port suspend request */
 		ata_eh_handle_port_suspend(ap);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 3b2a0c6444ee..3449de597eff 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -213,10 +213,11 @@ enum {
 	ATA_PFLAG_FROZEN	= (1 << 2), /* port is frozen */
 	ATA_PFLAG_RECOVERED	= (1 << 3), /* recovery action performed */
 	ATA_PFLAG_LOADING	= (1 << 4), /* boot/loading probe */
-	ATA_PFLAG_UNLOADING	= (1 << 5), /* module is unloading */
 	ATA_PFLAG_SCSI_HOTPLUG	= (1 << 6), /* SCSI hotplug scheduled */
 	ATA_PFLAG_INITIALIZING	= (1 << 7), /* being initialized, don't touch */
 	ATA_PFLAG_RESETTING	= (1 << 8), /* reset in progress */
+	ATA_PFLAG_UNLOADING	= (1 << 9), /* driver is being unloaded */
+	ATA_PFLAG_UNLOADED	= (1 << 10), /* driver is unloaded */
 
 	ATA_PFLAG_SUSPENDED	= (1 << 17), /* port is suspended (power) */
 	ATA_PFLAG_PM_PENDING	= (1 << 18), /* PM operation pending */
-- 
cgit v1.2.3


From 88e740f1654bf28565edd528a060695c1f2b5ad7 Mon Sep 17 00:00:00 2001
From: Fernando Luis Vázquez Cao <fernando@oss.ntt.co.jp>
Date: Mon, 27 Oct 2008 18:44:46 +0900
Subject: block: add queue flag for paravirt frontend drivers

As is the case with SSD devices, we do not want to idle in AS/CFQ when
the block device is a paravirt front-end driver. This patch adds a flag
(QUEUE_FLAG_VIRT) which should be used by front-end drivers such as
virtio_blk and xen-blkfront to indicate a paravirtualized device.

Signed-off-by: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/blkdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 031a315c0509..482e9600f7a2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -449,6 +449,7 @@ struct request_queue
 #define QUEUE_FLAG_FAIL_IO     12	/* fake timeout */
 #define QUEUE_FLAG_STACKABLE   13	/* supports request stacking */
 #define QUEUE_FLAG_NONROT      14	/* non-rotational device (SSD) */
+#define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 
 static inline int queue_is_locked(struct request_queue *q)
 {
-- 
cgit v1.2.3


From 08bafc0341f2f7920e9045bc32c40299cac8c21b Mon Sep 17 00:00:00 2001
From: Keith Mannthey <kmannth@us.ibm.com>
Date: Tue, 25 Nov 2008 10:24:35 +0100
Subject: block: Supress Buffer I/O errors when SCSI REQ_QUIET flag set

Allow the scsi request REQ_QUIET flag to be propagated to the buffer
file system layer. The basic ideas is to pass the flag from the scsi
request to the bio (block IO) and then to the buffer layer.  The buffer
layer can then suppress needless printks.

This patch declutters the kernel log by removed the 40-50 (per lun)
buffer io error messages seen during a boot in my multipath setup . It
is a good chance any real errors will be missed in the "noise" it the
logs without this patch.

During boot I see blocks of messages like
"
__ratelimit: 211 callbacks suppressed
Buffer I/O error on device sdm, logical block 5242879
Buffer I/O error on device sdm, logical block 5242879
Buffer I/O error on device sdm, logical block 5242847
Buffer I/O error on device sdm, logical block 1
Buffer I/O error on device sdm, logical block 5242878
Buffer I/O error on device sdm, logical block 5242879
Buffer I/O error on device sdm, logical block 5242879
Buffer I/O error on device sdm, logical block 5242879
Buffer I/O error on device sdm, logical block 5242879
Buffer I/O error on device sdm, logical block 5242872
"
in my logs.

My disk environment is multipath fiber channel using the SCSI_DH_RDAC
code and multipathd.  This topology includes an "active" and "ghost"
path for each lun. IO's to the "ghost" path will never complete and the
SCSI layer, via the scsi device handler rdac code, quick returns the IOs
to theses paths and sets the REQ_QUIET scsi flag to suppress the scsi
layer messages.

 I am wanting to extend the QUIET behavior to include the buffer file
system layer to deal with these errors as well. I have been running this
patch for a while now on several boxes without issue.  A few runs of
bonnie++ show no noticeable difference in performance in my setup.

Thanks for John Stultz for the quiet_error finalization.

Submitted-by:  Keith Mannthey <kmannth@us.ibm.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c            |  3 +++
 fs/buffer.c                 | 19 +++++++++++++++----
 include/linux/bio.h         |  1 +
 include/linux/buffer_head.h |  1 +
 4 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 243d18b4ceb0..20e1724ccb4c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -153,6 +153,9 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
 			nbytes = bio->bi_size;
 		}
 
+		if (unlikely(rq->cmd_flags & REQ_QUIET))
+			set_bit(BIO_QUIET, &bio->bi_flags);
+
 		bio->bi_size -= nbytes;
 		bio->bi_sector += (nbytes >> 9);
 
diff --git a/fs/buffer.c b/fs/buffer.c
index 10179cfa1152..776ae091d3b0 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -99,10 +99,18 @@ __clear_page_buffers(struct page *page)
 	page_cache_release(page);
 }
 
+
+static int quiet_error(struct buffer_head *bh)
+{
+	if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
+		return 0;
+	return 1;
+}
+
+
 static void buffer_io_error(struct buffer_head *bh)
 {
 	char b[BDEVNAME_SIZE];
-
 	printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
 			bdevname(bh->b_bdev, b),
 			(unsigned long long)bh->b_blocknr);
@@ -144,7 +152,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
 	if (uptodate) {
 		set_buffer_uptodate(bh);
 	} else {
-		if (!buffer_eopnotsupp(bh) && printk_ratelimit()) {
+		if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) {
 			buffer_io_error(bh);
 			printk(KERN_WARNING "lost page write due to "
 					"I/O error on %s\n",
@@ -394,7 +402,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
 		set_buffer_uptodate(bh);
 	} else {
 		clear_buffer_uptodate(bh);
-		if (printk_ratelimit())
+		if (!quiet_error(bh))
 			buffer_io_error(bh);
 		SetPageError(page);
 	}
@@ -455,7 +463,7 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 	if (uptodate) {
 		set_buffer_uptodate(bh);
 	} else {
-		if (printk_ratelimit()) {
+		if (!quiet_error(bh)) {
 			buffer_io_error(bh);
 			printk(KERN_WARNING "lost page write due to "
 					"I/O error on %s\n",
@@ -2913,6 +2921,9 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
 		set_bit(BH_Eopnotsupp, &bh->b_state);
 	}
 
+	if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
+		set_bit(BH_Quiet, &bh->b_state);
+
 	bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
 	bio_put(bio);
 }
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 6a642098e5c3..cf132bfbbacf 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -117,6 +117,7 @@ struct bio {
 #define BIO_CPU_AFFINE	8	/* complete bio on same CPU as submitted */
 #define BIO_NULL_MAPPED 9	/* contains invalid user pages */
 #define BIO_FS_INTEGRITY 10	/* fs owns integrity data, not block layer */
+#define BIO_QUIET	11	/* Make BIO Quiet */
 #define bio_flagged(bio, flag)	((bio)->bi_flags & (1 << (flag)))
 
 /*
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 3ce64b90118c..8605f8a74df9 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -35,6 +35,7 @@ enum bh_state_bits {
 	BH_Ordered,	/* ordered write */
 	BH_Eopnotsupp,	/* operation not supported (barrier) */
 	BH_Unwritten,	/* Buffer is allocated on disk but not written */
+	BH_Quiet,	/* Buffer Error Prinks to be quiet */
 
 	BH_PrivateStart,/* not a state bit, but the first bit available
 			 * for private allocation by other entities
-- 
cgit v1.2.3


From 64d01dc9e1927e6535627d73f2336c75d1dd3fe2 Mon Sep 17 00:00:00 2001
From: Cheng Renquan <crquan@gmail.com>
Date: Wed, 3 Dec 2008 12:41:39 +0100
Subject: block: use cancel_work_sync() instead of kblockd_flush_work()

After many improvements on kblockd_flush_work, it is now identical to
cancel_work_sync, so a direct call to cancel_work_sync is suggested.

The only difference is that cancel_work_sync is a GPL symbol,
so no non-GPL modules anymore.

Signed-off-by: Cheng Renquan <crquan@gmail.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/as-iosched.c     | 2 +-
 block/blk-core.c       | 8 +-------
 block/cfq-iosched.c    | 2 +-
 include/linux/blkdev.h | 1 -
 4 files changed, 3 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/block/as-iosched.c b/block/as-iosched.c
index 71f0abb219ee..802b5d0d8536 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1344,7 +1344,7 @@ static void as_exit_queue(elevator_t *e)
 	struct as_data *ad = e->elevator_data;
 
 	del_timer_sync(&ad->antic_timer);
-	kblockd_flush_work(&ad->antic_work);
+	cancel_work_sync(&ad->antic_work);
 
 	BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
 	BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
diff --git a/block/blk-core.c b/block/blk-core.c
index 20e1724ccb4c..2fdcd0cff57f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -408,7 +408,7 @@ void blk_sync_queue(struct request_queue *q)
 {
 	del_timer_sync(&q->unplug_timer);
 	del_timer_sync(&q->timeout);
-	kblockd_flush_work(&q->unplug_work);
+	cancel_work_sync(&q->unplug_work);
 }
 EXPORT_SYMBOL(blk_sync_queue);
 
@@ -2147,12 +2147,6 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 }
 EXPORT_SYMBOL(kblockd_schedule_work);
 
-void kblockd_flush_work(struct work_struct *work)
-{
-	cancel_work_sync(work);
-}
-EXPORT_SYMBOL(kblockd_flush_work);
-
 int __init blk_dev_init(void)
 {
 	kblockd_workqueue = create_workqueue("kblockd");
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 6a062eebbd15..a2bfec7d6b36 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2160,7 +2160,7 @@ out_cont:
 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
 {
 	del_timer_sync(&cfqd->idle_slice_timer);
-	kblockd_flush_work(&cfqd->unplug_work);
+	cancel_work_sync(&cfqd->unplug_work);
 }
 
 static void cfq_put_async_queues(struct cfq_data *cfqd)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 482e9600f7a2..e9bb73ff1d64 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -978,7 +978,6 @@ static inline void put_dev_sector(Sector p)
 
 struct work_struct;
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
-void kblockd_flush_work(struct work_struct *work);
 
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
-- 
cgit v1.2.3


From ba744d5e290055d171c68067259fcc1e2721f542 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Wed, 3 Dec 2008 12:41:40 +0100
Subject: block: reorder struct bio to remove padding on 64bit

Remove 8 bytes of padding from struct bio which also removes 16 bytes from
struct bio_pair to make it 248 bytes.  bio_pair then fits into one fewer
cache lines & into a smaller slab.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/bio.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index cf132bfbbacf..3ed714eb54d9 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -90,10 +90,11 @@ struct bio {
 
 	unsigned int		bi_comp_cpu;	/* completion CPU */
 
+	atomic_t		bi_cnt;		/* pin count */
+
 	struct bio_vec		*bi_io_vec;	/* the actual vec list */
 
 	bio_end_io_t		*bi_end_io;
-	atomic_t		bi_cnt;		/* pin count */
 
 	void			*bi_private;
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
-- 
cgit v1.2.3


From 313e42999dbc0f234ca5909a236f78f082cb43b1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 28 Nov 2008 13:32:02 +0900
Subject: block: reorganize QUEUE_ORDERED_* constants

Separate out ordering type (drain,) and action masks (preflush,
postflush, fua) from visible ordering mode selectors
(QUEUE_ORDERED_*).  Ordering types are now named QUEUE_ORDERED_BY_*
while action masks are named QUEUE_ORDERED_DO_*.

This change is necessary to add QUEUE_ORDERED_DO_BAR and make it
optional to improve empty barrier implementation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c    | 20 ++++++++++----------
 include/linux/blkdev.h | 39 +++++++++++++++++++++++----------------
 2 files changed, 33 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 6e72d661ae42..1d7adc72c95d 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -24,8 +24,8 @@
 int blk_queue_ordered(struct request_queue *q, unsigned ordered,
 		      prepare_flush_fn *prepare_flush_fn)
 {
-	if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
-	    prepare_flush_fn == NULL) {
+	if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH |
+					     QUEUE_ORDERED_DO_POSTFLUSH))) {
 		printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__);
 		return -EINVAL;
 	}
@@ -134,7 +134,7 @@ static void queue_flush(struct request_queue *q, unsigned which)
 	struct request *rq;
 	rq_end_io_fn *end_io;
 
-	if (which == QUEUE_ORDERED_PREFLUSH) {
+	if (which == QUEUE_ORDERED_DO_PREFLUSH) {
 		rq = &q->pre_flush_rq;
 		end_io = pre_flush_end_io;
 	} else {
@@ -167,7 +167,7 @@ static inline struct request *start_ordered(struct request_queue *q,
 	blk_rq_init(q, rq);
 	if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
 		rq->cmd_flags |= REQ_RW;
-	if (q->ordered & QUEUE_ORDERED_FUA)
+	if (q->ordered & QUEUE_ORDERED_DO_FUA)
 		rq->cmd_flags |= REQ_FUA;
 	init_request_from_bio(rq, q->orig_bar_rq->bio);
 	rq->end_io = bar_end_io;
@@ -181,20 +181,20 @@ static inline struct request *start_ordered(struct request_queue *q,
 	 * there will be no data written between the pre and post flush.
 	 * Hence a single flush will suffice.
 	 */
-	if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq))
-		queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
+	if ((q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) && !blk_empty_barrier(rq))
+		queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
 	else
 		q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
 
 	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 
-	if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
-		queue_flush(q, QUEUE_ORDERED_PREFLUSH);
+	if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
+		queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
 		rq = &q->pre_flush_rq;
 	} else
 		q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
 
-	if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
+	if ((q->ordered & QUEUE_ORDERED_BY_TAG) || q->in_flight == 0)
 		q->ordseq |= QUEUE_ORDSEQ_DRAIN;
 	else
 		rq = NULL;
@@ -237,7 +237,7 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
 	    rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
 		return 1;
 
-	if (q->ordered & QUEUE_ORDERED_TAG) {
+	if (q->ordered & QUEUE_ORDERED_BY_TAG) {
 		/* Ordered by tag.  Blocking the next barrier is enough. */
 		if (is_barrier && rq != &q->bar_rq)
 			*rqp = NULL;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e9bb73ff1d64..5c92b4432399 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -523,22 +523,29 @@ enum {
 	 * TAG_FLUSH	: ordering by tag w/ pre and post flushes
 	 * TAG_FUA	: ordering by tag w/ pre flush and FUA write
 	 */
-	QUEUE_ORDERED_NONE	= 0x00,
-	QUEUE_ORDERED_DRAIN	= 0x01,
-	QUEUE_ORDERED_TAG	= 0x02,
-
-	QUEUE_ORDERED_PREFLUSH	= 0x10,
-	QUEUE_ORDERED_POSTFLUSH	= 0x20,
-	QUEUE_ORDERED_FUA	= 0x40,
-
-	QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN |
-			QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
-	QUEUE_ORDERED_DRAIN_FUA	= QUEUE_ORDERED_DRAIN |
-			QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
-	QUEUE_ORDERED_TAG_FLUSH	= QUEUE_ORDERED_TAG |
-			QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
-	QUEUE_ORDERED_TAG_FUA	= QUEUE_ORDERED_TAG |
-			QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
+	QUEUE_ORDERED_BY_DRAIN		= 0x01,
+	QUEUE_ORDERED_BY_TAG		= 0x02,
+	QUEUE_ORDERED_DO_PREFLUSH	= 0x10,
+	QUEUE_ORDERED_DO_POSTFLUSH	= 0x40,
+	QUEUE_ORDERED_DO_FUA		= 0x80,
+
+	QUEUE_ORDERED_NONE		= 0x00,
+
+	QUEUE_ORDERED_DRAIN		= QUEUE_ORDERED_BY_DRAIN,
+	QUEUE_ORDERED_DRAIN_FLUSH	= QUEUE_ORDERED_DRAIN |
+					  QUEUE_ORDERED_DO_PREFLUSH |
+					  QUEUE_ORDERED_DO_POSTFLUSH,
+	QUEUE_ORDERED_DRAIN_FUA		= QUEUE_ORDERED_DRAIN |
+					  QUEUE_ORDERED_DO_PREFLUSH |
+					  QUEUE_ORDERED_DO_FUA,
+
+	QUEUE_ORDERED_TAG		= QUEUE_ORDERED_BY_TAG,
+	QUEUE_ORDERED_TAG_FLUSH		= QUEUE_ORDERED_TAG |
+					  QUEUE_ORDERED_DO_PREFLUSH |
+					  QUEUE_ORDERED_DO_POSTFLUSH,
+	QUEUE_ORDERED_TAG_FUA		= QUEUE_ORDERED_TAG |
+					  QUEUE_ORDERED_DO_PREFLUSH |
+					  QUEUE_ORDERED_DO_FUA,
 
 	/*
 	 * Ordered operation sequence
-- 
cgit v1.2.3


From f671620e7d895af221bdfeda751d54fa55ed9546 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 28 Nov 2008 13:32:04 +0900
Subject: block: make every barrier action optional

In all barrier sequences, the barrier write itself was always assumed
to be issued and thus didn't have corresponding control flag.  This
patch adds QUEUE_ORDERED_DO_BAR and unify action mask handling in
start_ordered() such that any barrier action can be skipped.

This patch doesn't introduce any visible behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c    | 41 ++++++++++++++++++++++++-----------------
 include/linux/blkdev.h |  7 +++++--
 2 files changed, 29 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 43d479a1e664..1efabf829c53 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -158,19 +158,10 @@ static inline struct request *start_ordered(struct request_queue *q,
 	q->ordered = q->next_ordered;
 	q->ordseq |= QUEUE_ORDSEQ_STARTED;
 
-	/*
-	 * Prep proxy barrier request.
-	 */
+	/* stash away the original request */
 	elv_dequeue_request(q, rq);
 	q->orig_bar_rq = rq;
-	rq = &q->bar_rq;
-	blk_rq_init(q, rq);
-	if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
-		rq->cmd_flags |= REQ_RW;
-	if (q->ordered & QUEUE_ORDERED_DO_FUA)
-		rq->cmd_flags |= REQ_FUA;
-	init_request_from_bio(rq, q->orig_bar_rq->bio);
-	rq->end_io = bar_end_io;
+	rq = NULL;
 
 	/*
 	 * Queue ordered sequence.  As we stack them at the head, we
@@ -181,12 +172,28 @@ static inline struct request *start_ordered(struct request_queue *q,
 	 * there will be no data written between the pre and post flush.
 	 * Hence a single flush will suffice.
 	 */
-	if ((q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) && !blk_empty_barrier(rq))
+	if ((q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) &&
+	    !blk_empty_barrier(q->orig_bar_rq)) {
 		queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
-	else
+		rq = &q->post_flush_rq;
+	} else
 		q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
 
-	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+	if (q->ordered & QUEUE_ORDERED_DO_BAR) {
+		rq = &q->bar_rq;
+
+		/* initialize proxy request and queue it */
+		blk_rq_init(q, rq);
+		if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
+			rq->cmd_flags |= REQ_RW;
+		if (q->ordered & QUEUE_ORDERED_DO_FUA)
+			rq->cmd_flags |= REQ_FUA;
+		init_request_from_bio(rq, q->orig_bar_rq->bio);
+		rq->end_io = bar_end_io;
+
+		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+	} else
+		q->ordseq |= QUEUE_ORDSEQ_BAR;
 
 	if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
 		queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
@@ -194,10 +201,10 @@ static inline struct request *start_ordered(struct request_queue *q,
 	} else
 		q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
 
-	if ((q->ordered & QUEUE_ORDERED_BY_TAG) || q->in_flight == 0)
-		q->ordseq |= QUEUE_ORDSEQ_DRAIN;
-	else
+	if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight)
 		rq = NULL;
+	else
+		q->ordseq |= QUEUE_ORDSEQ_DRAIN;
 
 	return rq;
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5c92b4432399..b044267009ed 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -526,12 +526,14 @@ enum {
 	QUEUE_ORDERED_BY_DRAIN		= 0x01,
 	QUEUE_ORDERED_BY_TAG		= 0x02,
 	QUEUE_ORDERED_DO_PREFLUSH	= 0x10,
+	QUEUE_ORDERED_DO_BAR		= 0x20,
 	QUEUE_ORDERED_DO_POSTFLUSH	= 0x40,
 	QUEUE_ORDERED_DO_FUA		= 0x80,
 
 	QUEUE_ORDERED_NONE		= 0x00,
 
-	QUEUE_ORDERED_DRAIN		= QUEUE_ORDERED_BY_DRAIN,
+	QUEUE_ORDERED_DRAIN		= QUEUE_ORDERED_BY_DRAIN |
+					  QUEUE_ORDERED_DO_BAR,
 	QUEUE_ORDERED_DRAIN_FLUSH	= QUEUE_ORDERED_DRAIN |
 					  QUEUE_ORDERED_DO_PREFLUSH |
 					  QUEUE_ORDERED_DO_POSTFLUSH,
@@ -539,7 +541,8 @@ enum {
 					  QUEUE_ORDERED_DO_PREFLUSH |
 					  QUEUE_ORDERED_DO_FUA,
 
-	QUEUE_ORDERED_TAG		= QUEUE_ORDERED_BY_TAG,
+	QUEUE_ORDERED_TAG		= QUEUE_ORDERED_BY_TAG |
+					  QUEUE_ORDERED_DO_BAR,
 	QUEUE_ORDERED_TAG_FLUSH		= QUEUE_ORDERED_TAG |
 					  QUEUE_ORDERED_DO_PREFLUSH |
 					  QUEUE_ORDERED_DO_POSTFLUSH,
-- 
cgit v1.2.3


From 8f11b3e99a1136fcbb67316c3260f085299c0bff Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 28 Nov 2008 13:32:05 +0900
Subject: block: make barrier completion more robust

Barrier completion had the following assumptions.

* start_ordered() couldn't finish the whole sequence properly.  If all
  actions are to be skipped, q->ordseq is set correctly but the actual
  completion was never triggered thus hanging the barrier request.

* Drain completion in elv_complete_request() assumed that there's
  always at least one request in the queue when drain completes.

Both assumptions are true but these assumptions need to be removed to
improve empty barrier implementation.  This patch makes the following
changes.

* Make start_ordered() use blk_ordered_complete_seq() to mark skipped
  steps complete and notify __elv_next_request() that it should fetch
  the next request if the whole barrier has completed inside
  start_ordered().

* Make drain completion path in elv_complete_request() check whether
  the queue is empty.  Empty queue also indicates drain completion.

* While at it, convert 0/1 return from blk_do_ordered() to false/true.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c    | 45 +++++++++++++++++++++++++++------------------
 block/elevator.c       | 10 +++++++---
 include/linux/blkdev.h |  4 ++--
 3 files changed, 36 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 1efabf829c53..b03d88013e1e 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -88,7 +88,7 @@ unsigned blk_ordered_req_seq(struct request *rq)
 		return QUEUE_ORDSEQ_DONE;
 }
 
-void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
+bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
 {
 	struct request *rq;
 
@@ -99,7 +99,7 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
 	q->ordseq |= seq;
 
 	if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
-		return;
+		return false;
 
 	/*
 	 * Okay, sequence complete.
@@ -109,6 +109,8 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
 
 	if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq)))
 		BUG();
+
+	return true;
 }
 
 static void pre_flush_end_io(struct request *rq, int error)
@@ -151,9 +153,11 @@ static void queue_flush(struct request_queue *q, unsigned which)
 	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 }
 
-static inline struct request *start_ordered(struct request_queue *q,
-					    struct request *rq)
+static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 {
+	struct request *rq = *rqp;
+	unsigned skip = 0;
+
 	q->orderr = 0;
 	q->ordered = q->next_ordered;
 	q->ordseq |= QUEUE_ORDSEQ_STARTED;
@@ -177,7 +181,7 @@ static inline struct request *start_ordered(struct request_queue *q,
 		queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
 		rq = &q->post_flush_rq;
 	} else
-		q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
+		skip |= QUEUE_ORDSEQ_POSTFLUSH;
 
 	if (q->ordered & QUEUE_ORDERED_DO_BAR) {
 		rq = &q->bar_rq;
@@ -193,35 +197,40 @@ static inline struct request *start_ordered(struct request_queue *q,
 
 		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 	} else
-		q->ordseq |= QUEUE_ORDSEQ_BAR;
+		skip |= QUEUE_ORDSEQ_BAR;
 
 	if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
 		queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
 		rq = &q->pre_flush_rq;
 	} else
-		q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
+		skip |= QUEUE_ORDSEQ_PREFLUSH;
 
 	if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight)
 		rq = NULL;
 	else
-		q->ordseq |= QUEUE_ORDSEQ_DRAIN;
+		skip |= QUEUE_ORDSEQ_DRAIN;
+
+	*rqp = rq;
 
-	return rq;
+	/*
+	 * Complete skipped sequences.  If whole sequence is complete,
+	 * return false to tell elevator that this request is gone.
+	 */
+	return !blk_ordered_complete_seq(q, skip, 0);
 }
 
-int blk_do_ordered(struct request_queue *q, struct request **rqp)
+bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 {
 	struct request *rq = *rqp;
 	const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
 
 	if (!q->ordseq) {
 		if (!is_barrier)
-			return 1;
+			return true;
 
-		if (q->next_ordered != QUEUE_ORDERED_NONE) {
-			*rqp = start_ordered(q, rq);
-			return 1;
-		} else {
+		if (q->next_ordered != QUEUE_ORDERED_NONE)
+			return start_ordered(q, rqp);
+		else {
 			/*
 			 * Queue ordering not supported.  Terminate
 			 * with prejudice.
@@ -231,7 +240,7 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
 					      blk_rq_bytes(rq)))
 				BUG();
 			*rqp = NULL;
-			return 0;
+			return false;
 		}
 	}
 
@@ -242,7 +251,7 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
 	/* Special requests are not subject to ordering rules. */
 	if (!blk_fs_request(rq) &&
 	    rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
-		return 1;
+		return true;
 
 	if (q->ordered & QUEUE_ORDERED_BY_TAG) {
 		/* Ordered by tag.  Blocking the next barrier is enough. */
@@ -255,7 +264,7 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
 			*rqp = NULL;
 	}
 
-	return 1;
+	return true;
 }
 
 static void bio_end_empty_barrier(struct bio *bio, int err)
diff --git a/block/elevator.c b/block/elevator.c
index 86836dd179c0..261ffaaf47bd 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -944,10 +944,14 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
 	 * drained for flush sequence.
 	 */
 	if (unlikely(q->ordseq)) {
-		struct request *first_rq = list_entry_rq(q->queue_head.next);
-		if (q->in_flight == 0 &&
+		struct request *next = NULL;
+
+		if (!list_empty(&q->queue_head))
+			next = list_entry_rq(q->queue_head.next);
+
+		if (!q->in_flight &&
 		    blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
-		    blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
+		    (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
 			blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
 			blk_start_queueing(q);
 		}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b044267009ed..3c7078e0129d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -866,10 +866,10 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
-extern int blk_do_ordered(struct request_queue *, struct request **);
+extern bool blk_do_ordered(struct request_queue *, struct request **);
 extern unsigned blk_ordered_cur_seq(struct request_queue *);
 extern unsigned blk_ordered_req_seq(struct request *);
-extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int);
+extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int);
 
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
 extern void blk_dump_rq_flags(struct request *, char *);
-- 
cgit v1.2.3


From 58eea927d2de43dc6f03d1ba2c46e55854b31540 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 28 Nov 2008 13:32:06 +0900
Subject: block: simplify empty barrier implementation

Empty barrier required special handling in __elv_next_request() to
complete it without letting the low level driver see it.

With previous changes, barrier code is now flexible enough to skip the
BAR step using the same barrier sequence selection mechanism.  Drop
the special handling and mask off q->ordered from start_ordered().

Remove blk_empty_barrier() test which now has no user.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c    | 16 ++++++++++------
 block/elevator.c       |  8 --------
 include/linux/blkdev.h |  1 -
 3 files changed, 10 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index b03d88013e1e..c63044e9c4c0 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -162,6 +162,14 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 	q->ordered = q->next_ordered;
 	q->ordseq |= QUEUE_ORDSEQ_STARTED;
 
+	/*
+	 * For an empty barrier, there's no actual BAR request, which
+	 * in turn makes POSTFLUSH unnecessary.  Mask them off.
+	 */
+	if (!rq->hard_nr_sectors)
+		q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
+				QUEUE_ORDERED_DO_POSTFLUSH);
+
 	/* stash away the original request */
 	elv_dequeue_request(q, rq);
 	q->orig_bar_rq = rq;
@@ -171,13 +179,9 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 	 * Queue ordered sequence.  As we stack them at the head, we
 	 * need to queue in reverse order.  Note that we rely on that
 	 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
-	 * request gets inbetween ordered sequence. If this request is
-	 * an empty barrier, we don't need to do a postflush ever since
-	 * there will be no data written between the pre and post flush.
-	 * Hence a single flush will suffice.
+	 * request gets inbetween ordered sequence.
 	 */
-	if ((q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) &&
-	    !blk_empty_barrier(q->orig_bar_rq)) {
+	if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
 		queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
 		rq = &q->post_flush_rq;
 	} else
diff --git a/block/elevator.c b/block/elevator.c
index 261ffaaf47bd..ff60177a3bab 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -755,14 +755,6 @@ struct request *elv_next_request(struct request_queue *q)
 	int ret;
 
 	while ((rq = __elv_next_request(q)) != NULL) {
-		/*
-		 * Kill the empty barrier place holder, the driver must
-		 * not ever see it.
-		 */
-		if (blk_empty_barrier(rq)) {
-			__blk_end_request(rq, 0, blk_rq_bytes(rq));
-			continue;
-		}
 		if (!(rq->cmd_flags & REQ_STARTED)) {
 			/*
 			 * This is the first time the device driver
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3c7078e0129d..41bbadfd17f6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -596,7 +596,6 @@ enum {
 #define blk_fua_rq(rq)		((rq)->cmd_flags & REQ_FUA)
 #define blk_discard_rq(rq)	((rq)->cmd_flags & REQ_DISCARD)
 #define blk_bidi_rq(rq)		((rq)->next_rq != NULL)
-#define blk_empty_barrier(rq)	(blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
 /* rq->queuelist of dequeued request must be list_empty() */
 #define blk_queued_rq(rq)	(!list_empty(&(rq)->queuelist))
 
-- 
cgit v1.2.3


From 7ff9345ffac56743b5001561bc2dc1e041b79149 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 11 Dec 2008 11:53:43 +0100
Subject: bio: only mempool back the largest bio_vec slab cache

We only very rarely need the mempool backing, so it makes sense to
get rid of all but one of the mempool in a bio_set. So keep the
largest bio_vec count mempool so we can always honor the largest
allocation, and "upgrade" callers that fail.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/bio.c            | 125 ++++++++++++++++++++++++++++++----------------------
 include/linux/bio.h |   3 +-
 2 files changed, 74 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/fs/bio.c b/fs/bio.c
index df99c882b807..eb6b4683a265 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -58,7 +58,8 @@ unsigned int bvec_nr_vecs(unsigned short idx)
 	return bvec_slabs[idx].nr_vecs;
 }
 
-struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
+struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
+			      struct bio_set *bs)
 {
 	struct bio_vec *bvl;
 
@@ -67,60 +68,90 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
 	 * If not, this is a bio_kmalloc() allocation and just do a
 	 * kzalloc() for the exact number of vecs right away.
 	 */
-	if (bs) {
+	if (!bs)
+		bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask);
+
+	/*
+	 * see comment near bvec_array define!
+	 */
+	switch (nr) {
+	case 1:
+		*idx = 0;
+		break;
+	case 2 ... 4:
+		*idx = 1;
+		break;
+	case 5 ... 16:
+		*idx = 2;
+		break;
+	case 17 ... 64:
+		*idx = 3;
+		break;
+	case 65 ... 128:
+		*idx = 4;
+		break;
+	case 129 ... BIO_MAX_PAGES:
+		*idx = 5;
+		break;
+	default:
+		return NULL;
+	}
+
+	/*
+	 * idx now points to the pool we want to allocate from. only the
+	 * 1-vec entry pool is mempool backed.
+	 */
+	if (*idx == BIOVEC_MAX_IDX) {
+fallback:
+		bvl = mempool_alloc(bs->bvec_pool, gfp_mask);
+	} else {
+		struct biovec_slab *bvs = bvec_slabs + *idx;
+		gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
+
 		/*
-		 * see comment near bvec_array define!
+		 * Make this allocation restricted and don't dump info on
+		 * allocation failures, since we'll fallback to the mempool
+		 * in case of failure.
 		 */
-		switch (nr) {
-		case 1:
-			*idx = 0;
-			break;
-		case 2 ... 4:
-			*idx = 1;
-			break;
-		case 5 ... 16:
-			*idx = 2;
-			break;
-		case 17 ... 64:
-			*idx = 3;
-			break;
-		case 65 ... 128:
-			*idx = 4;
-			break;
-		case 129 ... BIO_MAX_PAGES:
-			*idx = 5;
-			break;
-		default:
-			return NULL;
-		}
+		__gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
 
 		/*
-		 * idx now points to the pool we want to allocate from
+		 * Try a slab allocation. If this fails and __GFP_WAIT
+		 * is set, retry with the 1-entry mempool
 		 */
-		bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
-		if (bvl)
-			memset(bvl, 0,
-				bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
-	} else
-		bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask);
+		bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
+		if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
+			*idx = BIOVEC_MAX_IDX;
+			goto fallback;
+		}
+	}
+
+	if (bvl)
+		memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
 
 	return bvl;
 }
 
-void bio_free(struct bio *bio, struct bio_set *bio_set)
+void bio_free(struct bio *bio, struct bio_set *bs)
 {
 	if (bio->bi_io_vec) {
 		const int pool_idx = BIO_POOL_IDX(bio);
 
 		BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
 
-		mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
+		if (pool_idx == BIOVEC_MAX_IDX)
+			mempool_free(bio->bi_io_vec, bs->bvec_pool);
+		else {
+			struct biovec_slab *bvs = bvec_slabs + pool_idx;
+
+			kmem_cache_free(bvs->slab, bio->bi_io_vec);
+		}
 	}
 
 	if (bio_integrity(bio))
-		bio_integrity_free(bio, bio_set);
+		bio_integrity_free(bio, bs);
 
-	mempool_free(bio, bio_set->bio_pool);
+	mempool_free(bio, bs->bio_pool);
 }
 
 /*
@@ -1346,30 +1377,18 @@ EXPORT_SYMBOL(bio_sector_offset);
  */
 static int biovec_create_pools(struct bio_set *bs, int pool_entries)
 {
-	int i;
+	struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
 
-	for (i = 0; i < BIOVEC_NR_POOLS; i++) {
-		struct biovec_slab *bp = bvec_slabs + i;
-		mempool_t **bvp = bs->bvec_pools + i;
+	bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab);
+	if (!bs->bvec_pool)
+		return -ENOMEM;
 
-		*bvp = mempool_create_slab_pool(pool_entries, bp->slab);
-		if (!*bvp)
-			return -ENOMEM;
-	}
 	return 0;
 }
 
 static void biovec_free_pools(struct bio_set *bs)
 {
-	int i;
-
-	for (i = 0; i < BIOVEC_NR_POOLS; i++) {
-		mempool_t *bvp = bs->bvec_pools[i];
-
-		if (bvp)
-			mempool_destroy(bvp);
-	}
-
+	mempool_destroy(bs->bvec_pool);
 }
 
 void bioset_free(struct bio_set *bs)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 3ed714eb54d9..d76e4bf22f29 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -397,13 +397,14 @@ static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu)
  */
 #define BIO_POOL_SIZE 2
 #define BIOVEC_NR_POOLS 6
+#define BIOVEC_MAX_IDX	(BIOVEC_NR_POOLS - 1)
 
 struct bio_set {
 	mempool_t *bio_pool;
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 	mempool_t *bio_integrity_pool;
 #endif
-	mempool_t *bvec_pools[BIOVEC_NR_POOLS];
+	mempool_t *bvec_pool;
 };
 
 struct biovec_slab {
-- 
cgit v1.2.3


From 1b4344986926da324b5cd10b683e5a1a5e1b7db3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 22 Oct 2008 20:32:58 +0200
Subject: bio: move the slab pointer inside the bio_set

In preparation for adding differently sized bios.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/bio.c            | 7 +++++--
 include/linux/bio.h | 1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/bio.c b/fs/bio.c
index eb6b4683a265..1ab8986b0411 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1404,12 +1404,15 @@ void bioset_free(struct bio_set *bs)
 
 struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size)
 {
-	struct bio_set *bs = kzalloc(sizeof(*bs), GFP_KERNEL);
+	struct bio_set *bs;
 
+	bs = kzalloc(sizeof(*bs), GFP_KERNEL);
 	if (!bs)
 		return NULL;
 
-	bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bio_slab);
+	bs->bio_slab = bio_slab;
+
+	bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bs->bio_slab);
 	if (!bs->bio_pool)
 		goto bad;
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d76e4bf22f29..9340098d75dc 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -400,6 +400,7 @@ static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu)
 #define BIOVEC_MAX_IDX	(BIOVEC_NR_POOLS - 1)
 
 struct bio_set {
+	struct kmem_cache *bio_slab;
 	mempool_t *bio_pool;
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 	mempool_t *bio_integrity_pool;
-- 
cgit v1.2.3


From bb799ca0202a360fa74d5f17039b9100caebdde7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 10 Dec 2008 15:35:05 +0100
Subject: bio: allow individual slabs in the bio_set

Instead of having a global bio slab cache, add a reference to one
in each bio_set that is created. This allows for personalized slabs
in each bio_set, so that they can have bios of different sizes.

This means we can personalize the bios we return. File systems may
want to embed the bio inside another structure, to avoid allocation
more items (and stuffing them in ->bi_private) after the get a bio.
Or we may want to embed a number of bio_vecs directly at the end
of a bio, to avoid doing two allocations to return a bio. This is now
possible.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/md/dm-crypt.c |   2 +-
 drivers/md/dm-io.c    |   2 +-
 drivers/md/dm.c       |   2 +-
 fs/bio-integrity.c    |   2 +-
 fs/bio.c              | 191 ++++++++++++++++++++++++++++++++++++++++++--------
 include/linux/bio.h   |   6 +-
 6 files changed, 170 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index ce26c84af064..3326750ec02c 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1060,7 +1060,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad_page_pool;
 	}
 
-	cc->bs = bioset_create(MIN_IOS, MIN_IOS);
+	cc->bs = bioset_create(MIN_IOS, 0);
 	if (!cc->bs) {
 		ti->error = "Cannot allocate crypt bioset";
 		goto bad_bs;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 2fd6d4450637..a34338567a2a 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -56,7 +56,7 @@ struct dm_io_client *dm_io_client_create(unsigned num_pages)
 	if (!client->pool)
 		goto bad;
 
-	client->bios = bioset_create(16, 16);
+	client->bios = bioset_create(16, 0);
 	if (!client->bios)
 		goto bad;
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 343094c3feeb..421c9f02d8ca 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1093,7 +1093,7 @@ static struct mapped_device *alloc_dev(int minor)
 	if (!md->tio_pool)
 		goto bad_tio_pool;
 
-	md->bs = bioset_create(16, 16);
+	md->bs = bioset_create(16, 0);
 	if (!md->bs)
 		goto bad_no_bioset;
 
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 19caf7c962ac..77ebc3c263d6 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -111,7 +111,7 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs)
 	    && bip->bip_buf != NULL)
 		kfree(bip->bip_buf);
 
-	mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]);
+	bvec_free_bs(bs, bip->bip_vec, bip->bip_pool);
 	mempool_free(bip, bs->bio_integrity_pool);
 
 	bio->bi_integrity = NULL;
diff --git a/fs/bio.c b/fs/bio.c
index 1ab8986b0411..0146f80789e9 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -31,8 +31,6 @@
 
 DEFINE_TRACE(block_split);
 
-static struct kmem_cache *bio_slab __read_mostly;
-
 static mempool_t *bio_split_pool __read_mostly;
 
 /*
@@ -40,9 +38,8 @@ static mempool_t *bio_split_pool __read_mostly;
  * break badly! cannot be bigger than what you can fit into an
  * unsigned short
  */
-
 #define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
-static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
+struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
 	BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
 };
 #undef BV
@@ -53,11 +50,119 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
  */
 struct bio_set *fs_bio_set;
 
+/*
+ * Our slab pool management
+ */
+struct bio_slab {
+	struct kmem_cache *slab;
+	unsigned int slab_ref;
+	unsigned int slab_size;
+	char name[8];
+};
+static DEFINE_MUTEX(bio_slab_lock);
+static struct bio_slab *bio_slabs;
+static unsigned int bio_slab_nr, bio_slab_max;
+
+static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
+{
+	unsigned int sz = sizeof(struct bio) + extra_size;
+	struct kmem_cache *slab = NULL;
+	struct bio_slab *bslab;
+	unsigned int i, entry = -1;
+
+	mutex_lock(&bio_slab_lock);
+
+	i = 0;
+	while (i < bio_slab_nr) {
+		struct bio_slab *bslab = &bio_slabs[i];
+
+		if (!bslab->slab && entry == -1)
+			entry = i;
+		else if (bslab->slab_size == sz) {
+			slab = bslab->slab;
+			bslab->slab_ref++;
+			break;
+		}
+		i++;
+	}
+
+	if (slab)
+		goto out_unlock;
+
+	if (bio_slab_nr == bio_slab_max && entry == -1) {
+		bio_slab_max <<= 1;
+		bio_slabs = krealloc(bio_slabs,
+				     bio_slab_max * sizeof(struct bio_slab),
+				     GFP_KERNEL);
+		if (!bio_slabs)
+			goto out_unlock;
+	}
+	if (entry == -1)
+		entry = bio_slab_nr++;
+
+	bslab = &bio_slabs[entry];
+
+	snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
+	slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!slab)
+		goto out_unlock;
+
+	printk("bio: create slab <%s> at %d\n", bslab->name, entry);
+	bslab->slab = slab;
+	bslab->slab_ref = 1;
+	bslab->slab_size = sz;
+out_unlock:
+	mutex_unlock(&bio_slab_lock);
+	return slab;
+}
+
+static void bio_put_slab(struct bio_set *bs)
+{
+	struct bio_slab *bslab = NULL;
+	unsigned int i;
+
+	mutex_lock(&bio_slab_lock);
+
+	for (i = 0; i < bio_slab_nr; i++) {
+		if (bs->bio_slab == bio_slabs[i].slab) {
+			bslab = &bio_slabs[i];
+			break;
+		}
+	}
+
+	if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
+		goto out;
+
+	WARN_ON(!bslab->slab_ref);
+
+	if (--bslab->slab_ref)
+		goto out;
+
+	kmem_cache_destroy(bslab->slab);
+	bslab->slab = NULL;
+
+out:
+	mutex_unlock(&bio_slab_lock);
+}
+
 unsigned int bvec_nr_vecs(unsigned short idx)
 {
 	return bvec_slabs[idx].nr_vecs;
 }
 
+void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
+{
+	BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
+
+	if (idx == BIOVEC_MAX_IDX)
+		mempool_free(bv, bs->bvec_pool);
+	else {
+		struct biovec_slab *bvs = bvec_slabs + idx;
+
+		kmem_cache_free(bvs->slab, bv);
+	}
+}
+
 struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
 			      struct bio_set *bs)
 {
@@ -134,24 +239,22 @@ fallback:
 
 void bio_free(struct bio *bio, struct bio_set *bs)
 {
-	if (bio->bi_io_vec) {
-		const int pool_idx = BIO_POOL_IDX(bio);
+	void *p;
 
-		BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
-
-		if (pool_idx == BIOVEC_MAX_IDX)
-			mempool_free(bio->bi_io_vec, bs->bvec_pool);
-		else {
-			struct biovec_slab *bvs = bvec_slabs + pool_idx;
-
-			kmem_cache_free(bvs->slab, bio->bi_io_vec);
-		}
-	}
+	if (bio->bi_io_vec)
+		bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
 
 	if (bio_integrity(bio))
 		bio_integrity_free(bio, bs);
 
-	mempool_free(bio, bs->bio_pool);
+	/*
+	 * If we have front padding, adjust the bio pointer before freeing
+	 */
+	p = bio;
+	if (bs->front_pad)
+		p -= bs->front_pad;
+
+	mempool_free(p, bs->bio_pool);
 }
 
 /*
@@ -188,16 +291,20 @@ void bio_init(struct bio *bio)
  *   for a &struct bio to become free. If a %NULL @bs is passed in, we will
  *   fall back to just using @kmalloc to allocate the required memory.
  *
- *   allocate bio and iovecs from the memory pools specified by the
- *   bio_set structure, or @kmalloc if none given.
+ *   Note that the caller must set ->bi_destructor on succesful return
+ *   of a bio, to do the appropriate freeing of the bio once the reference
+ *   count drops to zero.
  **/
 struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 {
-	struct bio *bio;
+	struct bio *bio = NULL;
+
+	if (bs) {
+		void *p = mempool_alloc(bs->bio_pool, gfp_mask);
 
-	if (bs)
-		bio = mempool_alloc(bs->bio_pool, gfp_mask);
-	else
+		if (p)
+			bio = p + bs->front_pad;
+	} else
 		bio = kmalloc(sizeof(*bio), gfp_mask);
 
 	if (likely(bio)) {
@@ -1398,11 +1505,25 @@ void bioset_free(struct bio_set *bs)
 
 	bioset_integrity_free(bs);
 	biovec_free_pools(bs);
+	bio_put_slab(bs);
 
 	kfree(bs);
 }
 
-struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size)
+/**
+ * bioset_create  - Create a bio_set
+ * @pool_size:	Number of bio and bio_vecs to cache in the mempool
+ * @front_pad:	Number of bytes to allocate in front of the returned bio
+ *
+ * Description:
+ *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
+ *    to ask for a number of bytes to be allocated in front of the bio.
+ *    Front pad allocation is useful for embedding the bio inside
+ *    another structure, to avoid allocating extra data to go with the bio.
+ *    Note that the bio must be embedded at the END of that structure always,
+ *    or things will break badly.
+ */
+struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
 {
 	struct bio_set *bs;
 
@@ -1410,16 +1531,22 @@ struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size)
 	if (!bs)
 		return NULL;
 
-	bs->bio_slab = bio_slab;
+	bs->front_pad = front_pad;
 
-	bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bs->bio_slab);
+	bs->bio_slab = bio_find_or_create_slab(front_pad);
+	if (!bs->bio_slab) {
+		kfree(bs);
+		return NULL;
+	}
+
+	bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
 	if (!bs->bio_pool)
 		goto bad;
 
-	if (bioset_integrity_create(bs, bio_pool_size))
+	if (bioset_integrity_create(bs, pool_size))
 		goto bad;
 
-	if (!biovec_create_pools(bs, bvec_pool_size))
+	if (!biovec_create_pools(bs, pool_size))
 		return bs;
 
 bad:
@@ -1443,12 +1570,16 @@ static void __init biovec_init_slabs(void)
 
 static int __init init_bio(void)
 {
-	bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+	bio_slab_max = 2;
+	bio_slab_nr = 0;
+	bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
+	if (!bio_slabs)
+		panic("bio: can't allocate bios\n");
 
 	bio_integrity_init_slab();
 	biovec_init_slabs();
 
-	fs_bio_set = bioset_create(BIO_POOL_SIZE, 2);
+	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
 	if (!fs_bio_set)
 		panic("bio: can't allocate bios\n");
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 9340098d75dc..4b80d3537f97 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -334,7 +334,7 @@ struct bio_pair {
 extern struct bio_pair *bio_split(struct bio *bi, int first_sectors);
 extern void bio_pair_release(struct bio_pair *dbio);
 
-extern struct bio_set *bioset_create(int, int);
+extern struct bio_set *bioset_create(unsigned int, unsigned int);
 extern void bioset_free(struct bio_set *);
 
 extern struct bio *bio_alloc(gfp_t, int);
@@ -379,6 +379,7 @@ extern struct bio *bio_copy_user_iov(struct request_queue *,
 extern int bio_uncopy_user(struct bio *);
 void zero_fill_bio(struct bio *bio);
 extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *);
+extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int);
 extern unsigned int bvec_nr_vecs(unsigned short idx);
 
 /*
@@ -401,6 +402,8 @@ static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu)
 
 struct bio_set {
 	struct kmem_cache *bio_slab;
+	unsigned int front_pad;
+
 	mempool_t *bio_pool;
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 	mempool_t *bio_integrity_pool;
@@ -415,6 +418,7 @@ struct biovec_slab {
 };
 
 extern struct bio_set *fs_bio_set;
+extern struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly;
 
 /*
  * a small number of entries is fine, not going to be performance critical.
-- 
cgit v1.2.3


From 392ddc32982a5c661dd90dd49a3cb37f1c68b782 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 23 Dec 2008 12:42:54 +0100
Subject: bio: add support for inlining a number of bio_vecs inside the bio

When we go and allocate a bio for IO, we actually do two allocations.
One for the bio itself, and one for the bi_io_vec that holds the
actual pages we are interested in.

This feature inlines a definable amount of io vecs inside the bio
itself, so we eliminate the bio_vec array allocation for IO's up
to a certain size. It defaults to 4 vecs, which is typically 16k
of IO.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/bio.c            | 27 ++++++++++++++++++++++-----
 include/linux/bio.h | 12 ++++++++++++
 2 files changed, 34 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/bio.c b/fs/bio.c
index 0146f80789e9..75e6be18ecd3 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -31,6 +31,12 @@
 
 DEFINE_TRACE(block_split);
 
+/*
+ * Test patch to inline a certain number of bi_io_vec's inside the bio
+ * itself, to shrink a bio data allocation from two mempool calls to one
+ */
+#define BIO_INLINE_VECS		4
+
 static mempool_t *bio_split_pool __read_mostly;
 
 /*
@@ -241,7 +247,7 @@ void bio_free(struct bio *bio, struct bio_set *bs)
 {
 	void *p;
 
-	if (bio->bi_io_vec)
+	if (bio_has_allocated_vec(bio))
 		bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
 
 	if (bio_integrity(bio))
@@ -267,7 +273,8 @@ static void bio_fs_destructor(struct bio *bio)
 
 static void bio_kmalloc_destructor(struct bio *bio)
 {
-	kfree(bio->bi_io_vec);
+	if (bio_has_allocated_vec(bio))
+		kfree(bio->bi_io_vec);
 	kfree(bio);
 }
 
@@ -314,7 +321,16 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 		if (likely(nr_iovecs)) {
 			unsigned long uninitialized_var(idx);
 
-			bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
+			if (nr_iovecs <= BIO_INLINE_VECS) {
+				idx = 0;
+				bvl = bio->bi_inline_vecs;
+				nr_iovecs = BIO_INLINE_VECS;
+				memset(bvl, 0, BIO_INLINE_VECS * sizeof(*bvl));
+			} else {
+				bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx,
+							bs);
+				nr_iovecs = bvec_nr_vecs(idx);
+			}
 			if (unlikely(!bvl)) {
 				if (bs)
 					mempool_free(bio, bs->bio_pool);
@@ -324,7 +340,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 				goto out;
 			}
 			bio->bi_flags |= idx << BIO_POOL_OFFSET;
-			bio->bi_max_vecs = bvec_nr_vecs(idx);
+			bio->bi_max_vecs = nr_iovecs;
 		}
 		bio->bi_io_vec = bvl;
 	}
@@ -1525,6 +1541,7 @@ void bioset_free(struct bio_set *bs)
  */
 struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
 {
+	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
 	struct bio_set *bs;
 
 	bs = kzalloc(sizeof(*bs), GFP_KERNEL);
@@ -1533,7 +1550,7 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
 
 	bs->front_pad = front_pad;
 
-	bs->bio_slab = bio_find_or_create_slab(front_pad);
+	bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
 	if (!bs->bio_slab) {
 		kfree(bs);
 		return NULL;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 4b80d3537f97..18462c5b8fff 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -102,6 +102,13 @@ struct bio {
 #endif
 
 	bio_destructor_t	*bi_destructor;	/* destructor */
+
+	/*
+	 * We can inline a number of vecs at the end of the bio, to avoid
+	 * double allocations for a small number of bio_vecs. This member
+	 * MUST obviously be kept at the very end of the bio.
+	 */
+	struct bio_vec		bi_inline_vecs[0];
 };
 
 /*
@@ -213,6 +220,11 @@ static inline void *bio_data(struct bio *bio)
 	return NULL;
 }
 
+static inline int bio_has_allocated_vec(struct bio *bio)
+{
+	return bio->bi_io_vec && bio->bi_io_vec != bio->bi_inline_vecs;
+}
+
 /*
  * will die
  */
-- 
cgit v1.2.3


From abf137dd7712132ee56d5b3143c2ff61a72a5faa Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 9 Dec 2008 08:11:22 +0100
Subject: aio: make the lookup_ioctx() lockless

The mm->ioctx_list is currently protected by a reader-writer lock,
so we always grab that lock on the read side for doing ioctx
lookups. As the workload is extremely reader biased, turn this into
an rcu hlist so we can make lookup_ioctx() lockless. Get rid of
the rwlock and use a spinlock for providing update side exclusion.

There's usually only 1 entry on this list, so it doesn't make sense
to look into fancier data structures.

Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/s390/mm/pgtable.c   |   4 +-
 fs/aio.c                 | 100 ++++++++++++++++++++++++++---------------------
 include/linux/aio.h      |   5 ++-
 include/linux/mm_types.h |   5 ++-
 kernel/fork.c            |   4 +-
 5 files changed, 67 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index ef3635b52fc0..0767827540b1 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -263,7 +263,7 @@ int s390_enable_sie(void)
 	/* lets check if we are allowed to replace the mm */
 	task_lock(tsk);
 	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
-	    tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {
+	    tsk->mm != tsk->active_mm || !hlist_empty(&tsk->mm->ioctx_list)) {
 		task_unlock(tsk);
 		return -EINVAL;
 	}
@@ -279,7 +279,7 @@ int s390_enable_sie(void)
 	/* Now lets check again if something happened */
 	task_lock(tsk);
 	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
-	    tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {
+	    tsk->mm != tsk->active_mm || !hlist_empty(&tsk->mm->ioctx_list)) {
 		mmput(mm);
 		task_unlock(tsk);
 		return -EINVAL;
diff --git a/fs/aio.c b/fs/aio.c
index f658441d5666..d6f89d3c15e8 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -191,6 +191,20 @@ static int aio_setup_ring(struct kioctx *ctx)
 	kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \
 } while(0)
 
+static void ctx_rcu_free(struct rcu_head *head)
+{
+	struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
+	unsigned nr_events = ctx->max_reqs;
+
+	kmem_cache_free(kioctx_cachep, ctx);
+
+	if (nr_events) {
+		spin_lock(&aio_nr_lock);
+		BUG_ON(aio_nr - nr_events > aio_nr);
+		aio_nr -= nr_events;
+		spin_unlock(&aio_nr_lock);
+	}
+}
 
 /* __put_ioctx
  *	Called when the last user of an aio context has gone away,
@@ -198,8 +212,6 @@ static int aio_setup_ring(struct kioctx *ctx)
  */
 static void __put_ioctx(struct kioctx *ctx)
 {
-	unsigned nr_events = ctx->max_reqs;
-
 	BUG_ON(ctx->reqs_active);
 
 	cancel_delayed_work(&ctx->wq);
@@ -208,14 +220,7 @@ static void __put_ioctx(struct kioctx *ctx)
 	mmdrop(ctx->mm);
 	ctx->mm = NULL;
 	pr_debug("__put_ioctx: freeing %p\n", ctx);
-	kmem_cache_free(kioctx_cachep, ctx);
-
-	if (nr_events) {
-		spin_lock(&aio_nr_lock);
-		BUG_ON(aio_nr - nr_events > aio_nr);
-		aio_nr -= nr_events;
-		spin_unlock(&aio_nr_lock);
-	}
+	call_rcu(&ctx->rcu_head, ctx_rcu_free);
 }
 
 #define get_ioctx(kioctx) do {						\
@@ -235,6 +240,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 {
 	struct mm_struct *mm;
 	struct kioctx *ctx;
+	int did_sync = 0;
 
 	/* Prevent overflows */
 	if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
@@ -267,21 +273,30 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 		goto out_freectx;
 
 	/* limit the number of system wide aios */
-	spin_lock(&aio_nr_lock);
-	if (aio_nr + ctx->max_reqs > aio_max_nr ||
-	    aio_nr + ctx->max_reqs < aio_nr)
-		ctx->max_reqs = 0;
-	else
-		aio_nr += ctx->max_reqs;
-	spin_unlock(&aio_nr_lock);
+	do {
+		spin_lock_bh(&aio_nr_lock);
+		if (aio_nr + nr_events > aio_max_nr ||
+		    aio_nr + nr_events < aio_nr)
+			ctx->max_reqs = 0;
+		else
+			aio_nr += ctx->max_reqs;
+		spin_unlock_bh(&aio_nr_lock);
+		if (ctx->max_reqs || did_sync)
+			break;
+
+		/* wait for rcu callbacks to have completed before giving up */
+		synchronize_rcu();
+		did_sync = 1;
+		ctx->max_reqs = nr_events;
+	} while (1);
+
 	if (ctx->max_reqs == 0)
 		goto out_cleanup;
 
 	/* now link into global list. */
-	write_lock(&mm->ioctx_list_lock);
-	ctx->next = mm->ioctx_list;
-	mm->ioctx_list = ctx;
-	write_unlock(&mm->ioctx_list_lock);
+	spin_lock(&mm->ioctx_lock);
+	hlist_add_head_rcu(&ctx->list, &mm->ioctx_list);
+	spin_unlock(&mm->ioctx_lock);
 
 	dprintk("aio: allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
 		ctx, ctx->user_id, current->mm, ctx->ring_info.nr);
@@ -375,11 +390,12 @@ ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
  */
 void exit_aio(struct mm_struct *mm)
 {
-	struct kioctx *ctx = mm->ioctx_list;
-	mm->ioctx_list = NULL;
-	while (ctx) {
-		struct kioctx *next = ctx->next;
-		ctx->next = NULL;
+	struct kioctx *ctx;
+
+	while (!hlist_empty(&mm->ioctx_list)) {
+		ctx = hlist_entry(mm->ioctx_list.first, struct kioctx, list);
+		hlist_del_rcu(&ctx->list);
+
 		aio_cancel_all(ctx);
 
 		wait_for_all_aios(ctx);
@@ -394,7 +410,6 @@ void exit_aio(struct mm_struct *mm)
 				atomic_read(&ctx->users), ctx->dead,
 				ctx->reqs_active);
 		put_ioctx(ctx);
-		ctx = next;
 	}
 }
 
@@ -555,19 +570,21 @@ int aio_put_req(struct kiocb *req)
 
 static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 {
-	struct kioctx *ioctx;
-	struct mm_struct *mm;
+	struct mm_struct *mm = current->mm;
+	struct kioctx *ctx = NULL;
+	struct hlist_node *n;
 
-	mm = current->mm;
-	read_lock(&mm->ioctx_list_lock);
-	for (ioctx = mm->ioctx_list; ioctx; ioctx = ioctx->next)
-		if (likely(ioctx->user_id == ctx_id && !ioctx->dead)) {
-			get_ioctx(ioctx);
+	rcu_read_lock();
+
+	hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
+		if (ctx->user_id == ctx_id && !ctx->dead) {
+			get_ioctx(ctx);
 			break;
 		}
-	read_unlock(&mm->ioctx_list_lock);
+	}
 
-	return ioctx;
+	rcu_read_unlock();
+	return ctx;
 }
 
 /*
@@ -1215,19 +1232,14 @@ out:
 static void io_destroy(struct kioctx *ioctx)
 {
 	struct mm_struct *mm = current->mm;
-	struct kioctx **tmp;
 	int was_dead;
 
 	/* delete the entry from the list is someone else hasn't already */
-	write_lock(&mm->ioctx_list_lock);
+	spin_lock(&mm->ioctx_lock);
 	was_dead = ioctx->dead;
 	ioctx->dead = 1;
-	for (tmp = &mm->ioctx_list; *tmp && *tmp != ioctx;
-	     tmp = &(*tmp)->next)
-		;
-	if (*tmp)
-		*tmp = ioctx->next;
-	write_unlock(&mm->ioctx_list_lock);
+	hlist_del_rcu(&ioctx->list);
+	spin_unlock(&mm->ioctx_lock);
 
 	dprintk("aio_release(%p)\n", ioctx);
 	if (likely(!was_dead))
diff --git a/include/linux/aio.h b/include/linux/aio.h
index f6b8cf99b596..b16a957030f8 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -5,6 +5,7 @@
 #include <linux/workqueue.h>
 #include <linux/aio_abi.h>
 #include <linux/uio.h>
+#include <linux/rcupdate.h>
 
 #include <asm/atomic.h>
 
@@ -183,7 +184,7 @@ struct kioctx {
 
 	/* This needs improving */
 	unsigned long		user_id;
-	struct kioctx		*next;
+	struct hlist_node	list;
 
 	wait_queue_head_t	wait;
 
@@ -199,6 +200,8 @@ struct kioctx {
 	struct aio_ring_info	ring_info;
 
 	struct delayed_work	wq;
+
+	struct rcu_head		rcu_head;
 };
 
 /* prototypes */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index fe825471d5aa..9cfc9b627fdd 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -232,8 +232,9 @@ struct mm_struct {
 	struct core_state *core_state; /* coredumping support */
 
 	/* aio bits */
-	rwlock_t		ioctx_list_lock;	/* aio lock */
-	struct kioctx		*ioctx_list;
+	spinlock_t		ioctx_lock;
+	struct hlist_head	ioctx_list;
+
 #ifdef CONFIG_MM_OWNER
 	/*
 	 * "owner" points to a task that is regarded as the canonical
diff --git a/kernel/fork.c b/kernel/fork.c
index 6144b36cd897..43cbf30669e6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -415,8 +415,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 	set_mm_counter(mm, file_rss, 0);
 	set_mm_counter(mm, anon_rss, 0);
 	spin_lock_init(&mm->page_table_lock);
-	rwlock_init(&mm->ioctx_list_lock);
-	mm->ioctx_list = NULL;
+	spin_lock_init(&mm->ioctx_lock);
+	INIT_HLIST_HEAD(&mm->ioctx_list);
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
 	mm_init_owner(mm, p);
-- 
cgit v1.2.3


From b374d18a4bfce705e4a99ae9f501b53e86ecb283 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 31 Oct 2008 10:05:07 +0100
Subject: block: get rid of elevator_t typedef

Just use struct elevator_queue everywhere instead.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/as-iosched.c       |  8 +++----
 block/cfq-iosched.c      |  6 +++---
 block/deadline-iosched.c |  6 +++---
 block/elevator.c         | 55 +++++++++++++++++++++++++-----------------------
 block/noop-iosched.c     |  2 +-
 drivers/block/nbd.c      |  2 +-
 include/linux/blkdev.h   |  3 +--
 include/linux/elevator.h |  8 +++----
 8 files changed, 46 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/block/as-iosched.c b/block/as-iosched.c
index 802b5d0d8536..631f6f44460a 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1339,7 +1339,7 @@ static int as_may_queue(struct request_queue *q, int rw)
 	return ret;
 }
 
-static void as_exit_queue(elevator_t *e)
+static void as_exit_queue(struct elevator_queue *e)
 {
 	struct as_data *ad = e->elevator_data;
 
@@ -1409,7 +1409,7 @@ as_var_store(unsigned long *var, const char *page, size_t count)
 	return count;
 }
 
-static ssize_t est_time_show(elevator_t *e, char *page)
+static ssize_t est_time_show(struct elevator_queue *e, char *page)
 {
 	struct as_data *ad = e->elevator_data;
 	int pos = 0;
@@ -1427,7 +1427,7 @@ static ssize_t est_time_show(elevator_t *e, char *page)
 }
 
 #define SHOW_FUNCTION(__FUNC, __VAR)				\
-static ssize_t __FUNC(elevator_t *e, char *page)		\
+static ssize_t __FUNC(struct elevator_queue *e, char *page)	\
 {								\
 	struct as_data *ad = e->elevator_data;			\
 	return as_var_show(jiffies_to_msecs((__VAR)), (page));	\
@@ -1440,7 +1440,7 @@ SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX)				\
-static ssize_t __FUNC(elevator_t *e, const char *page, size_t count)	\
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)	\
 {									\
 	struct as_data *ad = e->elevator_data;				\
 	int ret = as_var_store(__PTR, (page), count);			\
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index a2bfec7d6b36..adaf93a9d19d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2178,7 +2178,7 @@ static void cfq_put_async_queues(struct cfq_data *cfqd)
 		cfq_put_queue(cfqd->async_idle_cfqq);
 }
 
-static void cfq_exit_queue(elevator_t *e)
+static void cfq_exit_queue(struct elevator_queue *e)
 {
 	struct cfq_data *cfqd = e->elevator_data;
 	struct request_queue *q = cfqd->queue;
@@ -2288,7 +2288,7 @@ cfq_var_store(unsigned int *var, const char *page, size_t count)
 }
 
 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
-static ssize_t __FUNC(elevator_t *e, char *page)			\
+static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
 {									\
 	struct cfq_data *cfqd = e->elevator_data;			\
 	unsigned int __data = __VAR;					\
@@ -2308,7 +2308,7 @@ SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
-static ssize_t __FUNC(elevator_t *e, const char *page, size_t count)	\
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)	\
 {									\
 	struct cfq_data *cfqd = e->elevator_data;			\
 	unsigned int __data;						\
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index fd311179f44c..c4d991d4adef 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -334,7 +334,7 @@ static int deadline_queue_empty(struct request_queue *q)
 		&& list_empty(&dd->fifo_list[READ]);
 }
 
-static void deadline_exit_queue(elevator_t *e)
+static void deadline_exit_queue(struct elevator_queue *e)
 {
 	struct deadline_data *dd = e->elevator_data;
 
@@ -387,7 +387,7 @@ deadline_var_store(int *var, const char *page, size_t count)
 }
 
 #define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
-static ssize_t __FUNC(elevator_t *e, char *page)			\
+static ssize_t __FUNC(struct elevator_queue *e, char *page)		\
 {									\
 	struct deadline_data *dd = e->elevator_data;			\
 	int __data = __VAR;						\
@@ -403,7 +403,7 @@ SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
-static ssize_t __FUNC(elevator_t *e, const char *page, size_t count)	\
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)	\
 {									\
 	struct deadline_data *dd = e->elevator_data;			\
 	int __data;							\
diff --git a/block/elevator.c b/block/elevator.c
index ff60177a3bab..98259eda0ef6 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -65,7 +65,7 @@ DEFINE_TRACE(block_rq_issue);
 static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
 {
 	struct request_queue *q = rq->q;
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_allow_merge_fn)
 		return e->ops->elevator_allow_merge_fn(q, rq, bio);
@@ -208,13 +208,13 @@ __setup("elevator=", elevator_setup);
 
 static struct kobj_type elv_ktype;
 
-static elevator_t *elevator_alloc(struct request_queue *q,
+static struct elevator_queue *elevator_alloc(struct request_queue *q,
 				  struct elevator_type *e)
 {
-	elevator_t *eq;
+	struct elevator_queue *eq;
 	int i;
 
-	eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL | __GFP_ZERO, q->node);
+	eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q->node);
 	if (unlikely(!eq))
 		goto err;
 
@@ -240,8 +240,9 @@ err:
 
 static void elevator_release(struct kobject *kobj)
 {
-	elevator_t *e = container_of(kobj, elevator_t, kobj);
+	struct elevator_queue *e;
 
+	e = container_of(kobj, struct elevator_queue, kobj);
 	elevator_put(e->elevator_type);
 	kfree(e->hash);
 	kfree(e);
@@ -297,7 +298,7 @@ int elevator_init(struct request_queue *q, char *name)
 }
 EXPORT_SYMBOL(elevator_init);
 
-void elevator_exit(elevator_t *e)
+void elevator_exit(struct elevator_queue *e)
 {
 	mutex_lock(&e->sysfs_lock);
 	if (e->ops->elevator_exit_fn)
@@ -311,7 +312,7 @@ EXPORT_SYMBOL(elevator_exit);
 
 static void elv_activate_rq(struct request_queue *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_activate_req_fn)
 		e->ops->elevator_activate_req_fn(q, rq);
@@ -319,7 +320,7 @@ static void elv_activate_rq(struct request_queue *q, struct request *rq)
 
 static void elv_deactivate_rq(struct request_queue *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_deactivate_req_fn)
 		e->ops->elevator_deactivate_req_fn(q, rq);
@@ -338,7 +339,7 @@ static void elv_rqhash_del(struct request_queue *q, struct request *rq)
 
 static void elv_rqhash_add(struct request_queue *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	BUG_ON(ELV_ON_HASH(rq));
 	hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
@@ -352,7 +353,7 @@ static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
 
 static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 	struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
 	struct hlist_node *entry, *next;
 	struct request *rq;
@@ -494,7 +495,7 @@ EXPORT_SYMBOL(elv_dispatch_add_tail);
 
 int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 	struct request *__rq;
 	int ret;
 
@@ -529,7 +530,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 
 void elv_merged_request(struct request_queue *q, struct request *rq, int type)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_merged_fn)
 		e->ops->elevator_merged_fn(q, rq, type);
@@ -543,7 +544,7 @@ void elv_merged_request(struct request_queue *q, struct request *rq, int type)
 void elv_merge_requests(struct request_queue *q, struct request *rq,
 			     struct request *next)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_merge_req_fn)
 		e->ops->elevator_merge_req_fn(q, rq, next);
@@ -846,7 +847,7 @@ void elv_dequeue_request(struct request_queue *q, struct request *rq)
 
 int elv_queue_empty(struct request_queue *q)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (!list_empty(&q->queue_head))
 		return 0;
@@ -860,7 +861,7 @@ EXPORT_SYMBOL(elv_queue_empty);
 
 struct request *elv_latter_request(struct request_queue *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_latter_req_fn)
 		return e->ops->elevator_latter_req_fn(q, rq);
@@ -869,7 +870,7 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq)
 
 struct request *elv_former_request(struct request_queue *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_former_req_fn)
 		return e->ops->elevator_former_req_fn(q, rq);
@@ -878,7 +879,7 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
 
 int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_set_req_fn)
 		return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
@@ -889,7 +890,7 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 
 void elv_put_request(struct request_queue *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_put_req_fn)
 		e->ops->elevator_put_req_fn(rq);
@@ -897,7 +898,7 @@ void elv_put_request(struct request_queue *q, struct request *rq)
 
 int elv_may_queue(struct request_queue *q, int rw)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	if (e->ops->elevator_may_queue_fn)
 		return e->ops->elevator_may_queue_fn(q, rw);
@@ -920,7 +921,7 @@ EXPORT_SYMBOL(elv_abort_queue);
 
 void elv_completed_request(struct request_queue *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 
 	/*
 	 * request is released from the driver, io must be done
@@ -955,13 +956,14 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
 static ssize_t
 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 {
-	elevator_t *e = container_of(kobj, elevator_t, kobj);
 	struct elv_fs_entry *entry = to_elv(attr);
+	struct elevator_queue *e;
 	ssize_t error;
 
 	if (!entry->show)
 		return -EIO;
 
+	e = container_of(kobj, struct elevator_queue, kobj);
 	mutex_lock(&e->sysfs_lock);
 	error = e->ops ? entry->show(e, page) : -ENOENT;
 	mutex_unlock(&e->sysfs_lock);
@@ -972,13 +974,14 @@ static ssize_t
 elv_attr_store(struct kobject *kobj, struct attribute *attr,
 	       const char *page, size_t length)
 {
-	elevator_t *e = container_of(kobj, elevator_t, kobj);
 	struct elv_fs_entry *entry = to_elv(attr);
+	struct elevator_queue *e;
 	ssize_t error;
 
 	if (!entry->store)
 		return -EIO;
 
+	e = container_of(kobj, struct elevator_queue, kobj);
 	mutex_lock(&e->sysfs_lock);
 	error = e->ops ? entry->store(e, page, length) : -ENOENT;
 	mutex_unlock(&e->sysfs_lock);
@@ -997,7 +1000,7 @@ static struct kobj_type elv_ktype = {
 
 int elv_register_queue(struct request_queue *q)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 	int error;
 
 	error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
@@ -1015,7 +1018,7 @@ int elv_register_queue(struct request_queue *q)
 	return error;
 }
 
-static void __elv_unregister_queue(elevator_t *e)
+static void __elv_unregister_queue(struct elevator_queue *e)
 {
 	kobject_uevent(&e->kobj, KOBJ_REMOVE);
 	kobject_del(&e->kobj);
@@ -1078,7 +1081,7 @@ EXPORT_SYMBOL_GPL(elv_unregister);
  */
 static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 {
-	elevator_t *old_elevator, *e;
+	struct elevator_queue *old_elevator, *e;
 	void *data;
 
 	/*
@@ -1184,7 +1187,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
 
 ssize_t elv_iosched_show(struct request_queue *q, char *name)
 {
-	elevator_t *e = q->elevator;
+	struct elevator_queue *e = q->elevator;
 	struct elevator_type *elv = e->elevator_type;
 	struct elevator_type *__e;
 	int len = 0;
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index c23e02969650..3a0d369d08c7 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -76,7 +76,7 @@ static void *noop_init_queue(struct request_queue *q)
 	return nd;
 }
 
-static void noop_exit_queue(elevator_t *e)
+static void noop_exit_queue(struct elevator_queue *e)
 {
 	struct noop_data *nd = e->elevator_data;
 
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index d3a91cacee8c..0766ce6187a9 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -722,7 +722,7 @@ static int __init nbd_init(void)
 
 	for (i = 0; i < nbds_max; i++) {
 		struct gendisk *disk = alloc_disk(1 << part_shift);
-		elevator_t *old_e;
+		struct elevator_queue *old_e;
 		if (!disk)
 			goto out;
 		nbd_dev[i].disk = disk;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 41bbadfd17f6..7035cec583b6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -26,7 +26,6 @@ struct scsi_ioctl_command;
 
 struct request_queue;
 struct elevator_queue;
-typedef struct elevator_queue elevator_t;
 struct request_pm_state;
 struct blk_trace;
 struct request;
@@ -313,7 +312,7 @@ struct request_queue
 	 */
 	struct list_head	queue_head;
 	struct request		*last_merge;
-	elevator_t		*elevator;
+	struct elevator_queue	*elevator;
 
 	/*
 	 * the queue request freelist, one for reads and one for writes
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 92f6f634e3e6..7a204256b155 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -28,7 +28,7 @@ typedef void (elevator_activate_req_fn) (struct request_queue *, struct request
 typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
 
 typedef void *(elevator_init_fn) (struct request_queue *);
-typedef void (elevator_exit_fn) (elevator_t *);
+typedef void (elevator_exit_fn) (struct elevator_queue *);
 
 struct elevator_ops
 {
@@ -62,8 +62,8 @@ struct elevator_ops
 
 struct elv_fs_entry {
 	struct attribute attr;
-	ssize_t (*show)(elevator_t *, char *);
-	ssize_t (*store)(elevator_t *, const char *, size_t);
+	ssize_t (*show)(struct elevator_queue *, char *);
+	ssize_t (*store)(struct elevator_queue *, const char *, size_t);
 };
 
 /*
@@ -130,7 +130,7 @@ extern ssize_t elv_iosched_show(struct request_queue *, char *);
 extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
 
 extern int elevator_init(struct request_queue *, char *);
-extern void elevator_exit(elevator_t *);
+extern void elevator_exit(struct elevator_queue *);
 extern int elv_rq_merge_ok(struct request *, struct bio *);
 
 /*
-- 
cgit v1.2.3


From a6f23657d3072bde6844055bbc2290e497f33fbc Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 24 Oct 2008 12:52:42 +0200
Subject: block: add one-hit cache for disk partition lookup

disk_map_sector_rcu() returns a partition from a sector offset,
which we use for IO statistics on a per-partition basis. The
lookup itself is an O(N) list lookup, where N is the number of
partitions. This actually hurts performance quite a bit, even
on the lower end partitions. On higher numbered partitions,
it can get pretty bad.

Solve this by adding a one-hit cache for partition lookup.
This makes the lookup O(1) for the case where we do most IO to
one partition. Even for mixed partition workloads, amortized cost
is pretty close to O(1) since the natural IO batching makes the
one-hit cache last for lots of IOs.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/genhd.c         | 23 +++++++++++++++++++----
 include/linux/genhd.h |  1 +
 2 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/genhd.c b/block/genhd.c
index 2f7feda61e35..d84a7df1e2a0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -181,6 +181,12 @@ void disk_part_iter_exit(struct disk_part_iter *piter)
 }
 EXPORT_SYMBOL_GPL(disk_part_iter_exit);
 
+static inline int sector_in_part(struct hd_struct *part, sector_t sector)
+{
+	return part->start_sect <= sector &&
+		sector < part->start_sect + part->nr_sects;
+}
+
 /**
  * disk_map_sector_rcu - map sector to partition
  * @disk: gendisk of interest
@@ -199,16 +205,22 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
 struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
 {
 	struct disk_part_tbl *ptbl;
+	struct hd_struct *part;
 	int i;
 
 	ptbl = rcu_dereference(disk->part_tbl);
 
+	part = rcu_dereference(ptbl->last_lookup);
+	if (part && sector_in_part(part, sector))
+		return part;
+
 	for (i = 1; i < ptbl->len; i++) {
-		struct hd_struct *part = rcu_dereference(ptbl->part[i]);
+		part = rcu_dereference(ptbl->part[i]);
 
-		if (part && part->start_sect <= sector &&
-		    sector < part->start_sect + part->nr_sects)
+		if (part && sector_in_part(part, sector)) {
+			rcu_assign_pointer(ptbl->last_lookup, part);
 			return part;
+		}
 	}
 	return &disk->part0;
 }
@@ -888,8 +900,11 @@ static void disk_replace_part_tbl(struct gendisk *disk,
 	struct disk_part_tbl *old_ptbl = disk->part_tbl;
 
 	rcu_assign_pointer(disk->part_tbl, new_ptbl);
-	if (old_ptbl)
+
+	if (old_ptbl) {
+		rcu_assign_pointer(old_ptbl->last_lookup, NULL);
 		call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
+	}
 }
 
 /**
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 3df7742ce246..16948eaecae3 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -126,6 +126,7 @@ struct blk_scsi_cmd_filter {
 struct disk_part_tbl {
 	struct rcu_head rcu_head;
 	int len;
+	struct hd_struct *last_lookup;
 	struct hd_struct *part[];
 };
 
-- 
cgit v1.2.3


From b3a6ffe16b5cc48abe7db8d04882dc45280eb693 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 12 Dec 2008 09:51:16 +0100
Subject: Get rid of CONFIG_LSF

We have two seperate config entries for large devices/files. One
is CONFIG_LBD that guards just the devices, the other is CONFIG_LSF
that handles large files. This doesn't make a lot of sense, you typically
want both or none. So get rid of CONFIG_LSF and change CONFIG_LBD wording
to indicate that it covers both.

Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/Kconfig         | 23 +++++------------------
 fs/ext4/super.c       |  8 ++++----
 include/linux/types.h | 11 +++--------
 3 files changed, 12 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/block/Kconfig b/block/Kconfig
index 290b219fad9c..ac0956f77785 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -24,21 +24,17 @@ menuconfig BLOCK
 if BLOCK
 
 config LBD
-	bool "Support for Large Block Devices"
+	bool "Support for large block devices and files"
 	depends on !64BIT
 	help
-	  Enable block devices of size 2TB and larger.
+	  Enable block devices or files of size 2TB and larger.
 
 	  This option is required to support the full capacity of large
 	  (2TB+) block devices, including RAID, disk, Network Block Device,
 	  Logical Volume Manager (LVM) and loopback.
-
-	  For example, RAID devices are frequently bigger than the capacity
-	  of the largest individual hard drive.
-
-	  This option is not required if you have individual disk drives
-	  which total 2TB+ and you are not aggregating the capacity into
-	  a large block device (e.g. using RAID or LVM).
+	
+	  This option also enables support for single files larger than
+	  2TB.
 
 	  If unsure, say N.
 
@@ -58,15 +54,6 @@ config BLK_DEV_IO_TRACE
 
 	  If unsure, say N.
 
-config LSF
-	bool "Support for Large Single Files"
-	depends on !64BIT
-	help
-	  Say Y here if you want to be able to handle very large files (2TB
-	  and larger), otherwise say N.
-
-	  If unsure, say Y.
-
 config BLK_DEV_BSG
 	bool "Block layer SG support v4 (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e4a241c65dbe..04158ad74dbb 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1721,7 +1721,7 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
 	/* small i_blocks in vfs inode? */
 	if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
 		/*
-		 * CONFIG_LSF is not enabled implies the inode
+		 * CONFIG_LBD is not enabled implies the inode
 		 * i_block represent total blocks in 512 bytes
 		 * 32 == size of vfs inode i_blocks * 8
 		 */
@@ -1764,7 +1764,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
 
 	if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
 		/*
-		 * !has_huge_files or CONFIG_LSF is not enabled
+		 * !has_huge_files or CONFIG_LBD is not enabled
 		 * implies the inode i_block represent total blocks in
 		 * 512 bytes 32 == size of vfs inode i_blocks * 8
 		 */
@@ -2021,13 +2021,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	if (has_huge_files) {
 		/*
 		 * Large file size enabled file system can only be
-		 * mount if kernel is build with CONFIG_LSF
+		 * mount if kernel is build with CONFIG_LBD
 		 */
 		if (sizeof(root->i_blocks) < sizeof(u64) &&
 				!(sb->s_flags & MS_RDONLY)) {
 			printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
 					"files cannot be mounted read-write "
-					"without CONFIG_LSF.\n", sb->s_id);
+					"without CONFIG_LBD.\n", sb->s_id);
 			goto failed_mount;
 		}
 	}
diff --git a/include/linux/types.h b/include/linux/types.h
index 1d98330b1f2c..121f349cb7ec 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -135,19 +135,14 @@ typedef		__s64		int64_t;
  *
  * Linux always considers sectors to be 512 bytes long independently
  * of the devices real block size.
+ *
+ * blkcnt_t is the type of the inode's block count.
  */
 #ifdef CONFIG_LBD
 typedef u64 sector_t;
-#else
-typedef unsigned long sector_t;
-#endif
-
-/*
- * The type of the inode's block count.
- */
-#ifdef CONFIG_LSF
 typedef u64 blkcnt_t;
 #else
+typedef unsigned long sector_t;
 typedef unsigned long blkcnt_t;
 #endif
 
-- 
cgit v1.2.3


From f453ba0460742ad027ae0c4c7d61e62817b3e7ef Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 7 Nov 2008 14:05:41 -0800
Subject: DRM: add mode setting support

Add mode setting support to the DRM layer.

This is a fairly big chunk of work that allows DRM drivers to provide
full output control and configuration capabilities to userspace.  It was
motivated by several factors:
  - the fb layer's APIs aren't suited for anything but simple
    configurations
  - coordination between the fb layer, DRM layer, and various userspace
    drivers is poor to non-existent (radeonfb excepted)
  - user level mode setting drivers makes displaying panic & oops
    messages more difficult
  - suspend/resume of graphics state is possible in many more
    configurations with kernel level support

This commit just adds the core DRM part of the mode setting APIs.
Driver specific commits using these new structure and APIs will follow.

Co-authors: Jesse Barnes <jbarnes@virtuousgeek.org>, Jakob Bornecrantz <jakob@tungstengraphics.com>
Contributors: Alan Hourihane <alanh@tungstengraphics.com>, Maarten Maathuis <madman2003@gmail.com>

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/Makefile          |    3 +-
 drivers/gpu/drm/drm_crtc.c        | 2497 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/drm_crtc_helper.c |  822 ++++++++++++
 drivers/gpu/drm/drm_drv.c         |   34 +-
 drivers/gpu/drm/drm_edid.c        |  732 +++++++++++
 drivers/gpu/drm/drm_fops.c        |   22 +-
 drivers/gpu/drm/drm_irq.c         |   64 +-
 drivers/gpu/drm/drm_mm.c          |    1 +
 drivers/gpu/drm/drm_modes.c       |  576 +++++++++
 drivers/gpu/drm/drm_stub.c        |   30 +-
 drivers/gpu/drm/drm_sysfs.c       |  329 ++++-
 drivers/video/console/vgacon.c    |   17 +
 include/drm/Kbuild                |    2 +-
 include/drm/drm.h                 |   21 +
 include/drm/drmP.h                |   18 +
 include/drm/drm_crtc.h            |  737 +++++++++++
 include/drm/drm_crtc_helper.h     |  121 ++
 include/drm/drm_edid.h            |  202 +++
 include/drm/drm_mode.h            |  278 +++++
 include/linux/console.h           |    4 +
 20 files changed, 6469 insertions(+), 41 deletions(-)
 create mode 100644 drivers/gpu/drm/drm_crtc.c
 create mode 100644 drivers/gpu/drm/drm_crtc_helper.c
 create mode 100644 drivers/gpu/drm/drm_edid.c
 create mode 100644 drivers/gpu/drm/drm_modes.c
 create mode 100644 include/drm/drm_crtc.h
 create mode 100644 include/drm/drm_crtc_helper.h
 create mode 100644 include/drm/drm_edid.h
 create mode 100644 include/drm/drm_mode.h

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 74da99495e21..30022c4a5c12 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -9,7 +9,8 @@ drm-y       :=	drm_auth.o drm_bufs.o drm_cache.o \
 		drm_drv.o drm_fops.o drm_gem.o drm_ioctl.o drm_irq.o \
 		drm_lock.o drm_memory.o drm_proc.o drm_stub.o drm_vm.o \
 		drm_agpsupport.o drm_scatter.o ati_pcigart.o drm_pci.o \
-		drm_sysfs.o drm_hashtab.o drm_sman.o drm_mm.o
+		drm_sysfs.o drm_hashtab.o drm_sman.o drm_mm.o \
+		drm_crtc.o drm_crtc_helper.o drm_modes.o drm_edid.o
 
 drm-$(CONFIG_COMPAT) += drm_ioc32.o
 
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
new file mode 100644
index 000000000000..2e880240477e
--- /dev/null
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -0,0 +1,2497 @@
+/*
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (c) 2008 Red Hat Inc.
+ *
+ * DRM core CRTC related functions
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ *
+ * Authors:
+ *      Keith Packard
+ *	Eric Anholt <eric@anholt.net>
+ *      Dave Airlie <airlied@linux.ie>
+ *      Jesse Barnes <jesse.barnes@intel.com>
+ */
+#include <linux/list.h>
+#include "drm.h"
+#include "drmP.h"
+#include "drm_crtc.h"
+
+struct drm_prop_enum_list {
+	int type;
+	char *name;
+};
+
+/* Avoid boilerplate.  I'm tired of typing. */
+#define DRM_ENUM_NAME_FN(fnname, list)				\
+	char *fnname(int val)					\
+	{							\
+		int i;						\
+		for (i = 0; i < ARRAY_SIZE(list); i++) {	\
+			if (list[i].type == val)		\
+				return list[i].name;		\
+		}						\
+		return "(unknown)";				\
+	}
+
+/*
+ * Global properties
+ */
+static struct drm_prop_enum_list drm_dpms_enum_list[] =
+{	{ DRM_MODE_DPMS_ON, "On" },
+	{ DRM_MODE_DPMS_STANDBY, "Standby" },
+	{ DRM_MODE_DPMS_SUSPEND, "Suspend" },
+	{ DRM_MODE_DPMS_OFF, "Off" }
+};
+
+DRM_ENUM_NAME_FN(drm_get_dpms_name, drm_dpms_enum_list)
+
+/*
+ * Optional properties
+ */
+static struct drm_prop_enum_list drm_scaling_mode_enum_list[] =
+{
+	{ DRM_MODE_SCALE_NON_GPU, "Non-GPU" },
+	{ DRM_MODE_SCALE_FULLSCREEN, "Fullscreen" },
+	{ DRM_MODE_SCALE_NO_SCALE, "No scale" },
+	{ DRM_MODE_SCALE_ASPECT, "Aspect" },
+};
+
+static struct drm_prop_enum_list drm_dithering_mode_enum_list[] =
+{
+	{ DRM_MODE_DITHERING_OFF, "Off" },
+	{ DRM_MODE_DITHERING_ON, "On" },
+};
+
+/*
+ * Non-global properties, but "required" for certain connectors.
+ */
+static struct drm_prop_enum_list drm_dvi_i_select_enum_list[] =
+{
+	{ DRM_MODE_SUBCONNECTOR_Automatic, "Automatic" }, /* DVI-I and TV-out */
+	{ DRM_MODE_SUBCONNECTOR_DVID,      "DVI-D"     }, /* DVI-I  */
+	{ DRM_MODE_SUBCONNECTOR_DVIA,      "DVI-A"     }, /* DVI-I  */
+};
+
+DRM_ENUM_NAME_FN(drm_get_dvi_i_select_name, drm_dvi_i_select_enum_list)
+
+static struct drm_prop_enum_list drm_dvi_i_subconnector_enum_list[] =
+{
+	{ DRM_MODE_SUBCONNECTOR_Unknown,   "Unknown"   }, /* DVI-I and TV-out */
+	{ DRM_MODE_SUBCONNECTOR_DVID,      "DVI-D"     }, /* DVI-I  */
+	{ DRM_MODE_SUBCONNECTOR_DVIA,      "DVI-A"     }, /* DVI-I  */
+};
+
+DRM_ENUM_NAME_FN(drm_get_dvi_i_subconnector_name,
+		 drm_dvi_i_subconnector_enum_list)
+
+static struct drm_prop_enum_list drm_tv_select_enum_list[] =
+{
+	{ DRM_MODE_SUBCONNECTOR_Automatic, "Automatic" }, /* DVI-I and TV-out */
+	{ DRM_MODE_SUBCONNECTOR_Composite, "Composite" }, /* TV-out */
+	{ DRM_MODE_SUBCONNECTOR_SVIDEO,    "SVIDEO"    }, /* TV-out */
+	{ DRM_MODE_SUBCONNECTOR_Component, "Component" }, /* TV-out */
+};
+
+DRM_ENUM_NAME_FN(drm_get_tv_select_name, drm_tv_select_enum_list)
+
+static struct drm_prop_enum_list drm_tv_subconnector_enum_list[] =
+{
+	{ DRM_MODE_SUBCONNECTOR_Unknown,   "Unknown"   }, /* DVI-I and TV-out */
+	{ DRM_MODE_SUBCONNECTOR_Composite, "Composite" }, /* TV-out */
+	{ DRM_MODE_SUBCONNECTOR_SVIDEO,    "SVIDEO"    }, /* TV-out */
+	{ DRM_MODE_SUBCONNECTOR_Component, "Component" }, /* TV-out */
+};
+
+DRM_ENUM_NAME_FN(drm_get_tv_subconnector_name,
+		 drm_tv_subconnector_enum_list)
+
+struct drm_conn_prop_enum_list {
+	int type;
+	char *name;
+	int count;
+};
+
+/*
+ * Connector and encoder types.
+ */
+static struct drm_conn_prop_enum_list drm_connector_enum_list[] =
+{	{ DRM_MODE_CONNECTOR_Unknown, "Unknown", 0 },
+	{ DRM_MODE_CONNECTOR_VGA, "VGA", 0 },
+	{ DRM_MODE_CONNECTOR_DVII, "DVI-I", 0 },
+	{ DRM_MODE_CONNECTOR_DVID, "DVI-D", 0 },
+	{ DRM_MODE_CONNECTOR_DVIA, "DVI-A", 0 },
+	{ DRM_MODE_CONNECTOR_Composite, "Composite", 0 },
+	{ DRM_MODE_CONNECTOR_SVIDEO, "SVIDEO", 0 },
+	{ DRM_MODE_CONNECTOR_LVDS, "LVDS", 0 },
+	{ DRM_MODE_CONNECTOR_Component, "Component", 0 },
+	{ DRM_MODE_CONNECTOR_9PinDIN, "9-pin DIN", 0 },
+	{ DRM_MODE_CONNECTOR_DisplayPort, "DisplayPort", 0 },
+	{ DRM_MODE_CONNECTOR_HDMIA, "HDMI Type A", 0 },
+	{ DRM_MODE_CONNECTOR_HDMIB, "HDMI Type B", 0 },
+};
+
+static struct drm_prop_enum_list drm_encoder_enum_list[] =
+{	{ DRM_MODE_ENCODER_NONE, "None" },
+	{ DRM_MODE_ENCODER_DAC, "DAC" },
+	{ DRM_MODE_ENCODER_TMDS, "TMDS" },
+	{ DRM_MODE_ENCODER_LVDS, "LVDS" },
+	{ DRM_MODE_ENCODER_TVDAC, "TV" },
+};
+
+char *drm_get_encoder_name(struct drm_encoder *encoder)
+{
+	static char buf[32];
+
+	snprintf(buf, 32, "%s-%d",
+		 drm_encoder_enum_list[encoder->encoder_type].name,
+		 encoder->base.id);
+	return buf;
+}
+
+char *drm_get_connector_name(struct drm_connector *connector)
+{
+	static char buf[32];
+
+	snprintf(buf, 32, "%s-%d",
+		 drm_connector_enum_list[connector->connector_type].name,
+		 connector->connector_type_id);
+	return buf;
+}
+EXPORT_SYMBOL(drm_get_connector_name);
+
+char *drm_get_connector_status_name(enum drm_connector_status status)
+{
+	if (status == connector_status_connected)
+		return "connected";
+	else if (status == connector_status_disconnected)
+		return "disconnected";
+	else
+		return "unknown";
+}
+
+/**
+ * drm_mode_object_get - allocate a new identifier
+ * @dev: DRM device
+ * @ptr: object pointer, used to generate unique ID
+ * @type: object type
+ *
+ * LOCKING:
+ * Caller must hold DRM mode_config lock.
+ *
+ * Create a unique identifier based on @ptr in @dev's identifier space.  Used
+ * for tracking modes, CRTCs and connectors.
+ *
+ * RETURNS:
+ * New unique (relative to other objects in @dev) integer identifier for the
+ * object.
+ */
+static int drm_mode_object_get(struct drm_device *dev,
+			       struct drm_mode_object *obj, uint32_t obj_type)
+{
+	int new_id = 0;
+	int ret;
+
+	WARN(!mutex_is_locked(&dev->mode_config.mutex),
+	     "%s called w/o mode_config lock\n", __FUNCTION__);
+again:
+	if (idr_pre_get(&dev->mode_config.crtc_idr, GFP_KERNEL) == 0) {
+		DRM_ERROR("Ran out memory getting a mode number\n");
+		return -EINVAL;
+	}
+
+	ret = idr_get_new_above(&dev->mode_config.crtc_idr, obj, 1, &new_id);
+	if (ret == -EAGAIN)
+		goto again;
+
+	obj->id = new_id;
+	obj->type = obj_type;
+	return 0;
+}
+
+/**
+ * drm_mode_object_put - free an identifer
+ * @dev: DRM device
+ * @id: ID to free
+ *
+ * LOCKING:
+ * Caller must hold DRM mode_config lock.
+ *
+ * Free @id from @dev's unique identifier pool.
+ */
+static void drm_mode_object_put(struct drm_device *dev,
+				struct drm_mode_object *object)
+{
+	idr_remove(&dev->mode_config.crtc_idr, object->id);
+}
+
+void *drm_mode_object_find(struct drm_device *dev, uint32_t id, uint32_t type)
+{
+	struct drm_mode_object *obj;
+
+	obj = idr_find(&dev->mode_config.crtc_idr, id);
+	if (!obj || (obj->type != type) || (obj->id != id))
+		return NULL;
+
+	return obj;
+}
+EXPORT_SYMBOL(drm_mode_object_find);
+
+/**
+ * drm_crtc_from_fb - find the CRTC structure associated with an fb
+ * @dev: DRM device
+ * @fb: framebuffer in question
+ *
+ * LOCKING:
+ * Caller must hold mode_config lock.
+ *
+ * Find CRTC in the mode_config structure that matches @fb.
+ *
+ * RETURNS:
+ * Pointer to the CRTC or NULL if it wasn't found.
+ */
+struct drm_crtc *drm_crtc_from_fb(struct drm_device *dev,
+				  struct drm_framebuffer *fb)
+{
+	struct drm_crtc *crtc;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		if (crtc->fb == fb)
+			return crtc;
+	}
+	return NULL;
+}
+
+/**
+ * drm_framebuffer_init - initialize a framebuffer
+ * @dev: DRM device
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Allocates an ID for the framebuffer's parent mode object, sets its mode
+ * functions & device file and adds it to the master fd list.
+ *
+ * RETURNS:
+ * Zero on success, error code on falure.
+ */
+int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
+			 const struct drm_framebuffer_funcs *funcs)
+{
+	int ret;
+
+	ret = drm_mode_object_get(dev, &fb->base, DRM_MODE_OBJECT_FB);
+	if (ret) {
+		return ret;
+	}
+
+	fb->dev = dev;
+	fb->funcs = funcs;
+	dev->mode_config.num_fb++;
+	list_add(&fb->head, &dev->mode_config.fb_list);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_framebuffer_init);
+
+/**
+ * drm_framebuffer_cleanup - remove a framebuffer object
+ * @fb: framebuffer to remove
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Scans all the CRTCs in @dev's mode_config.  If they're using @fb, removes
+ * it, setting it to NULL.
+ */
+void drm_framebuffer_cleanup(struct drm_framebuffer *fb)
+{
+	struct drm_device *dev = fb->dev;
+	struct drm_crtc *crtc;
+
+	/* remove from any CRTC */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		if (crtc->fb == fb)
+			crtc->fb = NULL;
+	}
+
+	drm_mode_object_put(dev, &fb->base);
+	list_del(&fb->head);
+	dev->mode_config.num_fb--;
+}
+EXPORT_SYMBOL(drm_framebuffer_cleanup);
+
+/**
+ * drm_crtc_init - Initialise a new CRTC object
+ * @dev: DRM device
+ * @crtc: CRTC object to init
+ * @funcs: callbacks for the new CRTC
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Inits a new object created as base part of an driver crtc object.
+ */
+void drm_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
+		   const struct drm_crtc_funcs *funcs)
+{
+	crtc->dev = dev;
+	crtc->funcs = funcs;
+
+	mutex_lock(&dev->mode_config.mutex);
+	drm_mode_object_get(dev, &crtc->base, DRM_MODE_OBJECT_CRTC);
+
+	list_add_tail(&crtc->head, &dev->mode_config.crtc_list);
+	dev->mode_config.num_crtc++;
+	mutex_unlock(&dev->mode_config.mutex);
+}
+EXPORT_SYMBOL(drm_crtc_init);
+
+/**
+ * drm_crtc_cleanup - Cleans up the core crtc usage.
+ * @crtc: CRTC to cleanup
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Cleanup @crtc. Removes from drm modesetting space
+ * does NOT free object, caller does that.
+ */
+void drm_crtc_cleanup(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+
+	if (crtc->gamma_store) {
+		kfree(crtc->gamma_store);
+		crtc->gamma_store = NULL;
+	}
+
+	drm_mode_object_put(dev, &crtc->base);
+	list_del(&crtc->head);
+	dev->mode_config.num_crtc--;
+}
+EXPORT_SYMBOL(drm_crtc_cleanup);
+
+/**
+ * drm_mode_probed_add - add a mode to a connector's probed mode list
+ * @connector: connector the new mode
+ * @mode: mode data
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Add @mode to @connector's mode list for later use.
+ */
+void drm_mode_probed_add(struct drm_connector *connector,
+			 struct drm_display_mode *mode)
+{
+	list_add(&mode->head, &connector->probed_modes);
+}
+EXPORT_SYMBOL(drm_mode_probed_add);
+
+/**
+ * drm_mode_remove - remove and free a mode
+ * @connector: connector list to modify
+ * @mode: mode to remove
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Remove @mode from @connector's mode list, then free it.
+ */
+void drm_mode_remove(struct drm_connector *connector,
+		     struct drm_display_mode *mode)
+{
+	list_del(&mode->head);
+	kfree(mode);
+}
+EXPORT_SYMBOL(drm_mode_remove);
+
+/**
+ * drm_connector_init - Init a preallocated connector
+ * @dev: DRM device
+ * @connector: the connector to init
+ * @funcs: callbacks for this connector
+ * @name: user visible name of the connector
+ *
+ * LOCKING:
+ * Caller must hold @dev's mode_config lock.
+ *
+ * Initialises a preallocated connector. Connectors should be
+ * subclassed as part of driver connector objects.
+ */
+void drm_connector_init(struct drm_device *dev,
+		     struct drm_connector *connector,
+		     const struct drm_connector_funcs *funcs,
+		     int connector_type)
+{
+	mutex_lock(&dev->mode_config.mutex);
+
+	connector->dev = dev;
+	connector->funcs = funcs;
+	drm_mode_object_get(dev, &connector->base, DRM_MODE_OBJECT_CONNECTOR);
+	connector->connector_type = connector_type;
+	connector->connector_type_id =
+		++drm_connector_enum_list[connector_type].count; /* TODO */
+	INIT_LIST_HEAD(&connector->user_modes);
+	INIT_LIST_HEAD(&connector->probed_modes);
+	INIT_LIST_HEAD(&connector->modes);
+	connector->edid_blob_ptr = NULL;
+
+	list_add_tail(&connector->head, &dev->mode_config.connector_list);
+	dev->mode_config.num_connector++;
+
+	drm_connector_attach_property(connector,
+				      dev->mode_config.edid_property, 0);
+
+	drm_connector_attach_property(connector,
+				      dev->mode_config.dpms_property, 0);
+
+	mutex_unlock(&dev->mode_config.mutex);
+}
+EXPORT_SYMBOL(drm_connector_init);
+
+/**
+ * drm_connector_cleanup - cleans up an initialised connector
+ * @connector: connector to cleanup
+ *
+ * LOCKING:
+ * Caller must hold @dev's mode_config lock.
+ *
+ * Cleans up the connector but doesn't free the object.
+ */
+void drm_connector_cleanup(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_display_mode *mode, *t;
+
+	list_for_each_entry_safe(mode, t, &connector->probed_modes, head)
+		drm_mode_remove(connector, mode);
+
+	list_for_each_entry_safe(mode, t, &connector->modes, head)
+		drm_mode_remove(connector, mode);
+
+	list_for_each_entry_safe(mode, t, &connector->user_modes, head)
+		drm_mode_remove(connector, mode);
+
+	mutex_lock(&dev->mode_config.mutex);
+	drm_mode_object_put(dev, &connector->base);
+	list_del(&connector->head);
+	mutex_unlock(&dev->mode_config.mutex);
+}
+EXPORT_SYMBOL(drm_connector_cleanup);
+
+void drm_encoder_init(struct drm_device *dev,
+		      struct drm_encoder *encoder,
+		      const struct drm_encoder_funcs *funcs,
+		      int encoder_type)
+{
+	mutex_lock(&dev->mode_config.mutex);
+
+	encoder->dev = dev;
+
+	drm_mode_object_get(dev, &encoder->base, DRM_MODE_OBJECT_ENCODER);
+	encoder->encoder_type = encoder_type;
+	encoder->funcs = funcs;
+
+	list_add_tail(&encoder->head, &dev->mode_config.encoder_list);
+	dev->mode_config.num_encoder++;
+
+	mutex_unlock(&dev->mode_config.mutex);
+}
+EXPORT_SYMBOL(drm_encoder_init);
+
+void drm_encoder_cleanup(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	mutex_lock(&dev->mode_config.mutex);
+	drm_mode_object_put(dev, &encoder->base);
+	list_del(&encoder->head);
+	mutex_unlock(&dev->mode_config.mutex);
+}
+EXPORT_SYMBOL(drm_encoder_cleanup);
+
+/**
+ * drm_mode_create - create a new display mode
+ * @dev: DRM device
+ *
+ * LOCKING:
+ * Caller must hold DRM mode_config lock.
+ *
+ * Create a new drm_display_mode, give it an ID, and return it.
+ *
+ * RETURNS:
+ * Pointer to new mode on success, NULL on error.
+ */
+struct drm_display_mode *drm_mode_create(struct drm_device *dev)
+{
+	struct drm_display_mode *nmode;
+
+	nmode = kzalloc(sizeof(struct drm_display_mode), GFP_KERNEL);
+	if (!nmode)
+		return NULL;
+
+	drm_mode_object_get(dev, &nmode->base, DRM_MODE_OBJECT_MODE);
+	return nmode;
+}
+EXPORT_SYMBOL(drm_mode_create);
+
+/**
+ * drm_mode_destroy - remove a mode
+ * @dev: DRM device
+ * @mode: mode to remove
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Free @mode's unique identifier, then free it.
+ */
+void drm_mode_destroy(struct drm_device *dev, struct drm_display_mode *mode)
+{
+	drm_mode_object_put(dev, &mode->base);
+
+	kfree(mode);
+}
+EXPORT_SYMBOL(drm_mode_destroy);
+
+static int drm_mode_create_standard_connector_properties(struct drm_device *dev)
+{
+	struct drm_property *edid;
+	struct drm_property *dpms;
+	int i;
+
+	/*
+	 * Standard properties (apply to all connectors)
+	 */
+	edid = drm_property_create(dev, DRM_MODE_PROP_BLOB |
+				   DRM_MODE_PROP_IMMUTABLE,
+				   "EDID", 0);
+	dev->mode_config.edid_property = edid;
+
+	dpms = drm_property_create(dev, DRM_MODE_PROP_ENUM,
+				   "DPMS", ARRAY_SIZE(drm_dpms_enum_list));
+	for (i = 0; i < ARRAY_SIZE(drm_dpms_enum_list); i++)
+		drm_property_add_enum(dpms, i, drm_dpms_enum_list[i].type,
+				      drm_dpms_enum_list[i].name);
+	dev->mode_config.dpms_property = dpms;
+
+	return 0;
+}
+
+/**
+ * drm_mode_create_dvi_i_properties - create DVI-I specific connector properties
+ * @dev: DRM device
+ *
+ * Called by a driver the first time a DVI-I connector is made.
+ */
+int drm_mode_create_dvi_i_properties(struct drm_device *dev)
+{
+	struct drm_property *dvi_i_selector;
+	struct drm_property *dvi_i_subconnector;
+	int i;
+
+	if (dev->mode_config.dvi_i_select_subconnector_property)
+		return 0;
+
+	dvi_i_selector =
+		drm_property_create(dev, DRM_MODE_PROP_ENUM,
+				    "select subconnector",
+				    ARRAY_SIZE(drm_dvi_i_select_enum_list));
+	for (i = 0; i < ARRAY_SIZE(drm_dvi_i_select_enum_list); i++)
+		drm_property_add_enum(dvi_i_selector, i,
+				      drm_dvi_i_select_enum_list[i].type,
+				      drm_dvi_i_select_enum_list[i].name);
+	dev->mode_config.dvi_i_select_subconnector_property = dvi_i_selector;
+
+	dvi_i_subconnector =
+		drm_property_create(dev, DRM_MODE_PROP_ENUM |
+				    DRM_MODE_PROP_IMMUTABLE,
+				    "subconnector",
+				    ARRAY_SIZE(drm_dvi_i_subconnector_enum_list));
+	for (i = 0; i < ARRAY_SIZE(drm_dvi_i_subconnector_enum_list); i++)
+		drm_property_add_enum(dvi_i_subconnector, i,
+				      drm_dvi_i_subconnector_enum_list[i].type,
+				      drm_dvi_i_subconnector_enum_list[i].name);
+	dev->mode_config.dvi_i_subconnector_property = dvi_i_subconnector;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_mode_create_dvi_i_properties);
+
+/**
+ * drm_create_tv_properties - create TV specific connector properties
+ * @dev: DRM device
+ * @num_modes: number of different TV formats (modes) supported
+ * @modes: array of pointers to strings containing name of each format
+ *
+ * Called by a driver's TV initialization routine, this function creates
+ * the TV specific connector properties for a given device.  Caller is
+ * responsible for allocating a list of format names and passing them to
+ * this routine.
+ */
+int drm_mode_create_tv_properties(struct drm_device *dev, int num_modes,
+				  char *modes[])
+{
+	struct drm_property *tv_selector;
+	struct drm_property *tv_subconnector;
+	int i;
+
+	if (dev->mode_config.tv_select_subconnector_property)
+		return 0;
+
+	/*
+	 * Basic connector properties
+	 */
+	tv_selector = drm_property_create(dev, DRM_MODE_PROP_ENUM,
+					  "select subconnector",
+					  ARRAY_SIZE(drm_tv_select_enum_list));
+	for (i = 0; i < ARRAY_SIZE(drm_tv_select_enum_list); i++)
+		drm_property_add_enum(tv_selector, i,
+				      drm_tv_select_enum_list[i].type,
+				      drm_tv_select_enum_list[i].name);
+	dev->mode_config.tv_select_subconnector_property = tv_selector;
+
+	tv_subconnector =
+		drm_property_create(dev, DRM_MODE_PROP_ENUM |
+				    DRM_MODE_PROP_IMMUTABLE, "subconnector",
+				    ARRAY_SIZE(drm_tv_subconnector_enum_list));
+	for (i = 0; i < ARRAY_SIZE(drm_tv_subconnector_enum_list); i++)
+		drm_property_add_enum(tv_subconnector, i,
+				      drm_tv_subconnector_enum_list[i].type,
+				      drm_tv_subconnector_enum_list[i].name);
+	dev->mode_config.tv_subconnector_property = tv_subconnector;
+
+	/*
+	 * Other, TV specific properties: margins & TV modes.
+	 */
+	dev->mode_config.tv_left_margin_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "left margin", 2);
+	dev->mode_config.tv_left_margin_property->values[0] = 0;
+	dev->mode_config.tv_left_margin_property->values[1] = 100;
+
+	dev->mode_config.tv_right_margin_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "right margin", 2);
+	dev->mode_config.tv_right_margin_property->values[0] = 0;
+	dev->mode_config.tv_right_margin_property->values[1] = 100;
+
+	dev->mode_config.tv_top_margin_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "top margin", 2);
+	dev->mode_config.tv_top_margin_property->values[0] = 0;
+	dev->mode_config.tv_top_margin_property->values[1] = 100;
+
+	dev->mode_config.tv_bottom_margin_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "bottom margin", 2);
+	dev->mode_config.tv_bottom_margin_property->values[0] = 0;
+	dev->mode_config.tv_bottom_margin_property->values[1] = 100;
+
+	dev->mode_config.tv_mode_property =
+		drm_property_create(dev, DRM_MODE_PROP_ENUM,
+				    "mode", num_modes);
+	for (i = 0; i < num_modes; i++)
+		drm_property_add_enum(dev->mode_config.tv_mode_property, i,
+				      i, modes[i]);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_mode_create_tv_properties);
+
+/**
+ * drm_mode_create_scaling_mode_property - create scaling mode property
+ * @dev: DRM device
+ *
+ * Called by a driver the first time it's needed, must be attached to desired
+ * connectors.
+ */
+int drm_mode_create_scaling_mode_property(struct drm_device *dev)
+{
+	struct drm_property *scaling_mode;
+	int i;
+
+	if (dev->mode_config.scaling_mode_property)
+		return 0;
+
+	scaling_mode =
+		drm_property_create(dev, DRM_MODE_PROP_ENUM, "scaling mode",
+				    ARRAY_SIZE(drm_scaling_mode_enum_list));
+	for (i = 0; i < ARRAY_SIZE(drm_scaling_mode_enum_list); i++)
+		drm_property_add_enum(scaling_mode, i,
+				      drm_scaling_mode_enum_list[i].type,
+				      drm_scaling_mode_enum_list[i].name);
+
+	dev->mode_config.scaling_mode_property = scaling_mode;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_mode_create_scaling_mode_property);
+
+/**
+ * drm_mode_create_dithering_property - create dithering property
+ * @dev: DRM device
+ *
+ * Called by a driver the first time it's needed, must be attached to desired
+ * connectors.
+ */
+int drm_mode_create_dithering_property(struct drm_device *dev)
+{
+	struct drm_property *dithering_mode;
+	int i;
+
+	if (dev->mode_config.dithering_mode_property)
+		return 0;
+
+	dithering_mode =
+		drm_property_create(dev, DRM_MODE_PROP_ENUM, "dithering",
+				    ARRAY_SIZE(drm_dithering_mode_enum_list));
+	for (i = 0; i < ARRAY_SIZE(drm_dithering_mode_enum_list); i++)
+		drm_property_add_enum(dithering_mode, i,
+				      drm_dithering_mode_enum_list[i].type,
+				      drm_dithering_mode_enum_list[i].name);
+	dev->mode_config.dithering_mode_property = dithering_mode;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_mode_create_dithering_property);
+
+/**
+ * drm_mode_config_init - initialize DRM mode_configuration structure
+ * @dev: DRM device
+ *
+ * LOCKING:
+ * None, should happen single threaded at init time.
+ *
+ * Initialize @dev's mode_config structure, used for tracking the graphics
+ * configuration of @dev.
+ */
+void drm_mode_config_init(struct drm_device *dev)
+{
+	mutex_init(&dev->mode_config.mutex);
+	INIT_LIST_HEAD(&dev->mode_config.fb_list);
+	INIT_LIST_HEAD(&dev->mode_config.fb_kernel_list);
+	INIT_LIST_HEAD(&dev->mode_config.crtc_list);
+	INIT_LIST_HEAD(&dev->mode_config.connector_list);
+	INIT_LIST_HEAD(&dev->mode_config.encoder_list);
+	INIT_LIST_HEAD(&dev->mode_config.property_list);
+	INIT_LIST_HEAD(&dev->mode_config.property_blob_list);
+	idr_init(&dev->mode_config.crtc_idr);
+
+	mutex_lock(&dev->mode_config.mutex);
+	drm_mode_create_standard_connector_properties(dev);
+	mutex_unlock(&dev->mode_config.mutex);
+
+	/* Just to be sure */
+	dev->mode_config.num_fb = 0;
+	dev->mode_config.num_connector = 0;
+	dev->mode_config.num_crtc = 0;
+	dev->mode_config.num_encoder = 0;
+	dev->mode_config.hotplug_counter = 0;
+}
+EXPORT_SYMBOL(drm_mode_config_init);
+
+int drm_mode_group_init(struct drm_device *dev, struct drm_mode_group *group)
+{
+	uint32_t total_objects = 0;
+
+	total_objects += dev->mode_config.num_crtc;
+	total_objects += dev->mode_config.num_connector;
+	total_objects += dev->mode_config.num_encoder;
+
+	if (total_objects == 0)
+		return -EINVAL;
+
+	group->id_list = kzalloc(total_objects * sizeof(uint32_t), GFP_KERNEL);
+	if (!group->id_list)
+		return -ENOMEM;
+
+	group->num_crtcs = 0;
+	group->num_connectors = 0;
+	group->num_encoders = 0;
+	return 0;
+}
+
+int drm_mode_group_init_legacy_group(struct drm_device *dev,
+				     struct drm_mode_group *group)
+{
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+	int ret;
+
+	if ((ret = drm_mode_group_init(dev, group)))
+		return ret;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
+		group->id_list[group->num_crtcs++] = crtc->base.id;
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head)
+		group->id_list[group->num_crtcs + group->num_encoders++] =
+		encoder->base.id;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+		group->id_list[group->num_crtcs + group->num_encoders +
+			       group->num_connectors++] = connector->base.id;
+
+	return 0;
+}
+
+/**
+ * drm_mode_config_cleanup - free up DRM mode_config info
+ * @dev: DRM device
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Free up all the connectors and CRTCs associated with this DRM device, then
+ * free up the framebuffers and associated buffer objects.
+ *
+ * FIXME: cleanup any dangling user buffer objects too
+ */
+void drm_mode_config_cleanup(struct drm_device *dev)
+{
+	struct drm_connector *connector, *ot;
+	struct drm_crtc *crtc, *ct;
+	struct drm_encoder *encoder, *enct;
+	struct drm_framebuffer *fb, *fbt;
+	struct drm_property *property, *pt;
+
+	list_for_each_entry_safe(encoder, enct, &dev->mode_config.encoder_list,
+				 head) {
+		encoder->funcs->destroy(encoder);
+	}
+
+	list_for_each_entry_safe(connector, ot,
+				 &dev->mode_config.connector_list, head) {
+		connector->funcs->destroy(connector);
+	}
+
+	list_for_each_entry_safe(property, pt, &dev->mode_config.property_list,
+				 head) {
+		drm_property_destroy(dev, property);
+	}
+
+	list_for_each_entry_safe(fb, fbt, &dev->mode_config.fb_list, head) {
+		fb->funcs->destroy(fb);
+	}
+
+	list_for_each_entry_safe(crtc, ct, &dev->mode_config.crtc_list, head) {
+		crtc->funcs->destroy(crtc);
+	}
+
+}
+EXPORT_SYMBOL(drm_mode_config_cleanup);
+
+int drm_mode_hotplug_ioctl(struct drm_device *dev,
+			   void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_hotplug *arg = data;
+
+	arg->counter = dev->mode_config.hotplug_counter;
+
+	return 0;
+}
+
+/**
+ * drm_crtc_convert_to_umode - convert a drm_display_mode into a modeinfo
+ * @out: drm_mode_modeinfo struct to return to the user
+ * @in: drm_display_mode to use
+ *
+ * LOCKING:
+ * None.
+ *
+ * Convert a drm_display_mode into a drm_mode_modeinfo structure to return to
+ * the user.
+ */
+void drm_crtc_convert_to_umode(struct drm_mode_modeinfo *out,
+			       struct drm_display_mode *in)
+{
+	out->clock = in->clock;
+	out->hdisplay = in->hdisplay;
+	out->hsync_start = in->hsync_start;
+	out->hsync_end = in->hsync_end;
+	out->htotal = in->htotal;
+	out->hskew = in->hskew;
+	out->vdisplay = in->vdisplay;
+	out->vsync_start = in->vsync_start;
+	out->vsync_end = in->vsync_end;
+	out->vtotal = in->vtotal;
+	out->vscan = in->vscan;
+	out->vrefresh = in->vrefresh;
+	out->flags = in->flags;
+	out->type = in->type;
+	strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN);
+	out->name[DRM_DISPLAY_MODE_LEN-1] = 0;
+}
+
+/**
+ * drm_crtc_convert_to_umode - convert a modeinfo into a drm_display_mode
+ * @out: drm_display_mode to return to the user
+ * @in: drm_mode_modeinfo to use
+ *
+ * LOCKING:
+ * None.
+ *
+ * Convert a drm_mode_modeinfo into a drm_display_mode structure to return to
+ * the caller.
+ */
+void drm_crtc_convert_umode(struct drm_display_mode *out,
+			    struct drm_mode_modeinfo *in)
+{
+	out->clock = in->clock;
+	out->hdisplay = in->hdisplay;
+	out->hsync_start = in->hsync_start;
+	out->hsync_end = in->hsync_end;
+	out->htotal = in->htotal;
+	out->hskew = in->hskew;
+	out->vdisplay = in->vdisplay;
+	out->vsync_start = in->vsync_start;
+	out->vsync_end = in->vsync_end;
+	out->vtotal = in->vtotal;
+	out->vscan = in->vscan;
+	out->vrefresh = in->vrefresh;
+	out->flags = in->flags;
+	out->type = in->type;
+	strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN);
+	out->name[DRM_DISPLAY_MODE_LEN-1] = 0;
+}
+
+/**
+ * drm_mode_getresources - get graphics configuration
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * LOCKING:
+ * Takes mode config lock.
+ *
+ * Construct a set of configuration description structures and return
+ * them to the user, including CRTC, connector and framebuffer configuration.
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_getresources(struct drm_device *dev, void *data,
+			  struct drm_file *file_priv)
+{
+	struct drm_mode_card_res *card_res = data;
+	struct list_head *lh;
+	struct drm_framebuffer *fb;
+	struct drm_connector *connector;
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+	int ret = 0;
+	int connector_count = 0;
+	int crtc_count = 0;
+	int fb_count = 0;
+	int encoder_count = 0;
+	int copied = 0, i;
+	uint32_t __user *fb_id;
+	uint32_t __user *crtc_id;
+	uint32_t __user *connector_id;
+	uint32_t __user *encoder_id;
+	struct drm_mode_group *mode_group;
+
+	mutex_lock(&dev->mode_config.mutex);
+
+	/*
+	 * For the non-control nodes we need to limit the list of resources
+	 * by IDs in the group list for this node
+	 */
+	list_for_each(lh, &file_priv->fbs)
+		fb_count++;
+
+	mode_group = &file_priv->master->minor->mode_group;
+	if (file_priv->master->minor->type == DRM_MINOR_CONTROL) {
+
+		list_for_each(lh, &dev->mode_config.crtc_list)
+			crtc_count++;
+
+		list_for_each(lh, &dev->mode_config.connector_list)
+			connector_count++;
+
+		list_for_each(lh, &dev->mode_config.encoder_list)
+			encoder_count++;
+	} else {
+
+		crtc_count = mode_group->num_crtcs;
+		connector_count = mode_group->num_connectors;
+		encoder_count = mode_group->num_encoders;
+	}
+
+	card_res->max_height = dev->mode_config.max_height;
+	card_res->min_height = dev->mode_config.min_height;
+	card_res->max_width = dev->mode_config.max_width;
+	card_res->min_width = dev->mode_config.min_width;
+
+	/* handle this in 4 parts */
+	/* FBs */
+	if (card_res->count_fbs >= fb_count) {
+		copied = 0;
+		fb_id = (uint32_t __user *)(unsigned long)card_res->fb_id_ptr;
+		list_for_each_entry(fb, &file_priv->fbs, head) {
+			if (put_user(fb->base.id, fb_id + copied)) {
+				ret = -EFAULT;
+				goto out;
+			}
+			copied++;
+		}
+	}
+	card_res->count_fbs = fb_count;
+
+	/* CRTCs */
+	if (card_res->count_crtcs >= crtc_count) {
+		copied = 0;
+		crtc_id = (uint32_t __user *)(unsigned long)card_res->crtc_id_ptr;
+		if (file_priv->master->minor->type == DRM_MINOR_CONTROL) {
+			list_for_each_entry(crtc, &dev->mode_config.crtc_list,
+					    head) {
+				DRM_DEBUG("CRTC ID is %d\n", crtc->base.id);
+				if (put_user(crtc->base.id, crtc_id + copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+				copied++;
+			}
+		} else {
+			for (i = 0; i < mode_group->num_crtcs; i++) {
+				if (put_user(mode_group->id_list[i],
+					     crtc_id + copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+				copied++;
+			}
+		}
+	}
+	card_res->count_crtcs = crtc_count;
+
+	/* Encoders */
+	if (card_res->count_encoders >= encoder_count) {
+		copied = 0;
+		encoder_id = (uint32_t __user *)(unsigned long)card_res->encoder_id_ptr;
+		if (file_priv->master->minor->type == DRM_MINOR_CONTROL) {
+			list_for_each_entry(encoder,
+					    &dev->mode_config.encoder_list,
+					    head) {
+				DRM_DEBUG("ENCODER ID is %d\n",
+					  encoder->base.id);
+				if (put_user(encoder->base.id, encoder_id +
+					     copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+				copied++;
+			}
+		} else {
+			for (i = mode_group->num_crtcs; i < mode_group->num_crtcs + mode_group->num_encoders; i++) {
+				if (put_user(mode_group->id_list[i],
+					     encoder_id + copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+				copied++;
+			}
+
+		}
+	}
+	card_res->count_encoders = encoder_count;
+
+	/* Connectors */
+	if (card_res->count_connectors >= connector_count) {
+		copied = 0;
+		connector_id = (uint32_t __user *)(unsigned long)card_res->connector_id_ptr;
+		if (file_priv->master->minor->type == DRM_MINOR_CONTROL) {
+			list_for_each_entry(connector,
+					    &dev->mode_config.connector_list,
+					    head) {
+				DRM_DEBUG("CONNECTOR ID is %d\n",
+					  connector->base.id);
+				if (put_user(connector->base.id,
+					     connector_id + copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+				copied++;
+			}
+		} else {
+			int start = mode_group->num_crtcs +
+				mode_group->num_encoders;
+			for (i = start; i < start + mode_group->num_connectors; i++) {
+				if (put_user(mode_group->id_list[i],
+					     connector_id + copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+				copied++;
+			}
+		}
+	}
+	card_res->count_connectors = connector_count;
+
+	DRM_DEBUG("Counted %d %d %d\n", card_res->count_crtcs,
+		  card_res->count_connectors, card_res->count_encoders);
+
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+/**
+ * drm_mode_getcrtc - get CRTC configuration
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * LOCKING:
+ * Caller? (FIXME)
+ *
+ * Construct a CRTC configuration structure to return to the user.
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_getcrtc(struct drm_device *dev,
+		     void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_crtc *crtc_resp = data;
+	struct drm_crtc *crtc;
+	struct drm_mode_object *obj;
+	int ret = 0;
+
+	mutex_lock(&dev->mode_config.mutex);
+
+	obj = drm_mode_object_find(dev, crtc_resp->crtc_id,
+				   DRM_MODE_OBJECT_CRTC);
+	if (!obj) {
+		ret = -EINVAL;
+		goto out;
+	}
+	crtc = obj_to_crtc(obj);
+
+	crtc_resp->x = crtc->x;
+	crtc_resp->y = crtc->y;
+	crtc_resp->gamma_size = crtc->gamma_size;
+	if (crtc->fb)
+		crtc_resp->fb_id = crtc->fb->base.id;
+	else
+		crtc_resp->fb_id = 0;
+
+	if (crtc->enabled) {
+
+		drm_crtc_convert_to_umode(&crtc_resp->mode, &crtc->mode);
+		crtc_resp->mode_valid = 1;
+
+	} else {
+		crtc_resp->mode_valid = 0;
+	}
+
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+/**
+ * drm_mode_getconnector - get connector configuration
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * LOCKING:
+ * Caller? (FIXME)
+ *
+ * Construct a connector configuration structure to return to the user.
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_getconnector(struct drm_device *dev, void *data,
+			  struct drm_file *file_priv)
+{
+	struct drm_mode_get_connector *out_resp = data;
+	struct drm_mode_object *obj;
+	struct drm_connector *connector;
+	struct drm_display_mode *mode;
+	int mode_count = 0;
+	int props_count = 0;
+	int encoders_count = 0;
+	int ret = 0;
+	int copied = 0;
+	int i;
+	struct drm_mode_modeinfo u_mode;
+	struct drm_mode_modeinfo __user *mode_ptr;
+	uint32_t __user *prop_ptr;
+	uint64_t __user *prop_values;
+	uint32_t __user *encoder_ptr;
+
+	memset(&u_mode, 0, sizeof(struct drm_mode_modeinfo));
+
+	DRM_DEBUG("connector id %d:\n", out_resp->connector_id);
+
+	mutex_lock(&dev->mode_config.mutex);
+
+	obj = drm_mode_object_find(dev, out_resp->connector_id,
+				   DRM_MODE_OBJECT_CONNECTOR);
+	if (!obj) {
+		ret = -EINVAL;
+		goto out;
+	}
+	connector = obj_to_connector(obj);
+
+	for (i = 0; i < DRM_CONNECTOR_MAX_PROPERTY; i++) {
+		if (connector->property_ids[i] != 0) {
+			props_count++;
+		}
+	}
+
+	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
+		if (connector->encoder_ids[i] != 0) {
+			encoders_count++;
+		}
+	}
+
+	if (out_resp->count_modes == 0) {
+		connector->funcs->fill_modes(connector,
+					     dev->mode_config.max_width,
+					     dev->mode_config.max_height);
+	}
+
+	/* delayed so we get modes regardless of pre-fill_modes state */
+	list_for_each_entry(mode, &connector->modes, head)
+		mode_count++;
+
+	out_resp->connector_id = connector->base.id;
+	out_resp->connector_type = connector->connector_type;
+	out_resp->connector_type_id = connector->connector_type_id;
+	out_resp->mm_width = connector->display_info.width_mm;
+	out_resp->mm_height = connector->display_info.height_mm;
+	out_resp->subpixel = connector->display_info.subpixel_order;
+	out_resp->connection = connector->status;
+	if (connector->encoder)
+		out_resp->encoder_id = connector->encoder->base.id;
+	else
+		out_resp->encoder_id = 0;
+
+	/*
+	 * This ioctl is called twice, once to determine how much space is
+	 * needed, and the 2nd time to fill it.
+	 */
+	if ((out_resp->count_modes >= mode_count) && mode_count) {
+		copied = 0;
+		mode_ptr = (struct drm_mode_modeinfo *)(unsigned long)out_resp->modes_ptr;
+		list_for_each_entry(mode, &connector->modes, head) {
+			drm_crtc_convert_to_umode(&u_mode, mode);
+			if (copy_to_user(mode_ptr + copied,
+					 &u_mode, sizeof(u_mode))) {
+				ret = -EFAULT;
+				goto out;
+			}
+			copied++;
+		}
+	}
+	out_resp->count_modes = mode_count;
+
+	if ((out_resp->count_props >= props_count) && props_count) {
+		copied = 0;
+		prop_ptr = (uint32_t *)(unsigned long)(out_resp->props_ptr);
+		prop_values = (uint64_t *)(unsigned long)(out_resp->prop_values_ptr);
+		for (i = 0; i < DRM_CONNECTOR_MAX_PROPERTY; i++) {
+			if (connector->property_ids[i] != 0) {
+				if (put_user(connector->property_ids[i],
+					     prop_ptr + copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+
+				if (put_user(connector->property_values[i],
+					     prop_values + copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+				copied++;
+			}
+		}
+	}
+	out_resp->count_props = props_count;
+
+	if ((out_resp->count_encoders >= encoders_count) && encoders_count) {
+		copied = 0;
+		encoder_ptr = (uint32_t *)(unsigned long)(out_resp->encoders_ptr);
+		for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
+			if (connector->encoder_ids[i] != 0) {
+				if (put_user(connector->encoder_ids[i],
+					     encoder_ptr + copied)) {
+					ret = -EFAULT;
+					goto out;
+				}
+				copied++;
+			}
+		}
+	}
+	out_resp->count_encoders = encoders_count;
+
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+int drm_mode_getencoder(struct drm_device *dev, void *data,
+			struct drm_file *file_priv)
+{
+	struct drm_mode_get_encoder *enc_resp = data;
+	struct drm_mode_object *obj;
+	struct drm_encoder *encoder;
+	int ret = 0;
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, enc_resp->encoder_id,
+				   DRM_MODE_OBJECT_ENCODER);
+	if (!obj) {
+		ret = -EINVAL;
+		goto out;
+	}
+	encoder = obj_to_encoder(obj);
+
+	if (encoder->crtc)
+		enc_resp->crtc_id = encoder->crtc->base.id;
+	else
+		enc_resp->crtc_id = 0;
+	enc_resp->encoder_type = encoder->encoder_type;
+	enc_resp->encoder_id = encoder->base.id;
+	enc_resp->possible_crtcs = encoder->possible_crtcs;
+	enc_resp->possible_clones = encoder->possible_clones;
+
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+/**
+ * drm_mode_setcrtc - set CRTC configuration
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * LOCKING:
+ * Caller? (FIXME)
+ *
+ * Build a new CRTC configuration based on user request.
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_setcrtc(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	struct drm_mode_config *config = &dev->mode_config;
+	struct drm_mode_crtc *crtc_req = data;
+	struct drm_mode_object *obj;
+	struct drm_crtc *crtc, *crtcfb;
+	struct drm_connector **connector_set = NULL, *connector;
+	struct drm_framebuffer *fb = NULL;
+	struct drm_display_mode *mode = NULL;
+	struct drm_mode_set set;
+	uint32_t __user *set_connectors_ptr;
+	int ret = 0;
+	int i;
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, crtc_req->crtc_id,
+				   DRM_MODE_OBJECT_CRTC);
+	if (!obj) {
+		DRM_DEBUG("Unknown CRTC ID %d\n", crtc_req->crtc_id);
+		ret = -EINVAL;
+		goto out;
+	}
+	crtc = obj_to_crtc(obj);
+
+	if (crtc_req->mode_valid) {
+		/* If we have a mode we need a framebuffer. */
+		/* If we pass -1, set the mode with the currently bound fb */
+		if (crtc_req->fb_id == -1) {
+			list_for_each_entry(crtcfb,
+					    &dev->mode_config.crtc_list, head) {
+				if (crtcfb == crtc) {
+					DRM_DEBUG("Using current fb for setmode\n");
+					fb = crtc->fb;
+				}
+			}
+		} else {
+			obj = drm_mode_object_find(dev, crtc_req->fb_id,
+						   DRM_MODE_OBJECT_FB);
+			if (!obj) {
+				DRM_DEBUG("Unknown FB ID%d\n", crtc_req->fb_id);
+				ret = -EINVAL;
+				goto out;
+			}
+			fb = obj_to_fb(obj);
+		}
+
+		mode = drm_mode_create(dev);
+		drm_crtc_convert_umode(mode, &crtc_req->mode);
+		drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V);
+	}
+
+	if (crtc_req->count_connectors == 0 && mode) {
+		DRM_DEBUG("Count connectors is 0 but mode set\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (crtc_req->count_connectors > 0 && !mode && !fb) {
+		DRM_DEBUG("Count connectors is %d but no mode or fb set\n",
+			  crtc_req->count_connectors);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (crtc_req->count_connectors > 0) {
+		u32 out_id;
+
+		/* Avoid unbounded kernel memory allocation */
+		if (crtc_req->count_connectors > config->num_connector) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		connector_set = kmalloc(crtc_req->count_connectors *
+					sizeof(struct drm_connector *),
+					GFP_KERNEL);
+		if (!connector_set) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		for (i = 0; i < crtc_req->count_connectors; i++) {
+			set_connectors_ptr = (uint32_t *)(unsigned long)crtc_req->set_connectors_ptr;
+			if (get_user(out_id, &set_connectors_ptr[i])) {
+				ret = -EFAULT;
+				goto out;
+			}
+
+			obj = drm_mode_object_find(dev, out_id,
+						   DRM_MODE_OBJECT_CONNECTOR);
+			if (!obj) {
+				DRM_DEBUG("Connector id %d unknown\n", out_id);
+				ret = -EINVAL;
+				goto out;
+			}
+			connector = obj_to_connector(obj);
+
+			connector_set[i] = connector;
+		}
+	}
+
+	set.crtc = crtc;
+	set.x = crtc_req->x;
+	set.y = crtc_req->y;
+	set.mode = mode;
+	set.connectors = connector_set;
+	set.num_connectors = crtc_req->count_connectors;
+	set.fb =fb;
+	ret = crtc->funcs->set_config(&set);
+
+out:
+	kfree(connector_set);
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+int drm_mode_cursor_ioctl(struct drm_device *dev,
+			void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_cursor *req = data;
+	struct drm_mode_object *obj;
+	struct drm_crtc *crtc;
+	int ret = 0;
+
+	DRM_DEBUG("\n");
+
+	if (!req->flags) {
+		DRM_ERROR("no operation set\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, req->crtc, DRM_MODE_OBJECT_CRTC);
+	if (!obj) {
+		DRM_DEBUG("Unknown CRTC ID %d\n", req->crtc);
+		ret = -EINVAL;
+		goto out;
+	}
+	crtc = obj_to_crtc(obj);
+
+	if (req->flags & DRM_MODE_CURSOR_BO) {
+		if (!crtc->funcs->cursor_set) {
+			DRM_ERROR("crtc does not support cursor\n");
+			ret = -ENXIO;
+			goto out;
+		}
+		/* Turns off the cursor if handle is 0 */
+		ret = crtc->funcs->cursor_set(crtc, file_priv, req->handle,
+					      req->width, req->height);
+	}
+
+	if (req->flags & DRM_MODE_CURSOR_MOVE) {
+		if (crtc->funcs->cursor_move) {
+			ret = crtc->funcs->cursor_move(crtc, req->x, req->y);
+		} else {
+			DRM_ERROR("crtc does not support cursor\n");
+			ret = -EFAULT;
+			goto out;
+		}
+	}
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+/**
+ * drm_mode_addfb - add an FB to the graphics configuration
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * LOCKING:
+ * Takes mode config lock.
+ *
+ * Add a new FB to the specified CRTC, given a user request.
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_addfb(struct drm_device *dev,
+		   void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_fb_cmd *r = data;
+	struct drm_mode_config *config = &dev->mode_config;
+	struct drm_framebuffer *fb;
+	int ret = 0;
+
+	if ((config->min_width > r->width) || (r->width > config->max_width)) {
+		DRM_ERROR("mode new framebuffer width not within limits\n");
+		return -EINVAL;
+	}
+	if ((config->min_height > r->height) || (r->height > config->max_height)) {
+		DRM_ERROR("mode new framebuffer height not within limits\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&dev->mode_config.mutex);
+
+	/* TODO check buffer is sufficently large */
+	/* TODO setup destructor callback */
+
+	fb = dev->mode_config.funcs->fb_create(dev, file_priv, r);
+	if (!fb) {
+		DRM_ERROR("could not create framebuffer\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	r->buffer_id = fb->base.id;
+	list_add(&fb->filp_head, &file_priv->fbs);
+
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+/**
+ * drm_mode_rmfb - remove an FB from the configuration
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * LOCKING:
+ * Takes mode config lock.
+ *
+ * Remove the FB specified by the user.
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_rmfb(struct drm_device *dev,
+		   void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_object *obj;
+	struct drm_framebuffer *fb = NULL;
+	struct drm_framebuffer *fbl = NULL;
+	uint32_t *id = data;
+	int ret = 0;
+	int found = 0;
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, *id, DRM_MODE_OBJECT_FB);
+	/* TODO check that we realy get a framebuffer back. */
+	if (!obj) {
+		DRM_ERROR("mode invalid framebuffer id\n");
+		ret = -EINVAL;
+		goto out;
+	}
+	fb = obj_to_fb(obj);
+
+	list_for_each_entry(fbl, &file_priv->fbs, filp_head)
+		if (fb == fbl)
+			found = 1;
+
+	if (!found) {
+		DRM_ERROR("tried to remove a fb that we didn't own\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* TODO release all crtc connected to the framebuffer */
+	/* TODO unhock the destructor from the buffer object */
+
+	list_del(&fb->filp_head);
+	fb->funcs->destroy(fb);
+
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+/**
+ * drm_mode_getfb - get FB info
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * LOCKING:
+ * Caller? (FIXME)
+ *
+ * Lookup the FB given its ID and return info about it.
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_getfb(struct drm_device *dev,
+		   void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_fb_cmd *r = data;
+	struct drm_mode_object *obj;
+	struct drm_framebuffer *fb;
+	int ret = 0;
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, r->buffer_id, DRM_MODE_OBJECT_FB);
+	if (!obj) {
+		DRM_ERROR("invalid framebuffer id\n");
+		ret = -EINVAL;
+		goto out;
+	}
+	fb = obj_to_fb(obj);
+
+	r->height = fb->height;
+	r->width = fb->width;
+	r->depth = fb->depth;
+	r->bpp = fb->bits_per_pixel;
+	r->pitch = fb->pitch;
+	fb->funcs->create_handle(fb, file_priv, &r->handle);
+
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+/**
+ * drm_fb_release - remove and free the FBs on this file
+ * @filp: file * from the ioctl
+ *
+ * LOCKING:
+ * Takes mode config lock.
+ *
+ * Destroy all the FBs associated with @filp.
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+void drm_fb_release(struct file *filp)
+{
+	struct drm_file *priv = filp->private_data;
+	struct drm_device *dev = priv->minor->dev;
+	struct drm_framebuffer *fb, *tfb;
+
+	mutex_lock(&dev->mode_config.mutex);
+	list_for_each_entry_safe(fb, tfb, &priv->fbs, filp_head) {
+		list_del(&fb->filp_head);
+		fb->funcs->destroy(fb);
+	}
+	mutex_unlock(&dev->mode_config.mutex);
+}
+
+/**
+ * drm_mode_attachmode - add a mode to the user mode list
+ * @dev: DRM device
+ * @connector: connector to add the mode to
+ * @mode: mode to add
+ *
+ * Add @mode to @connector's user mode list.
+ */
+static int drm_mode_attachmode(struct drm_device *dev,
+			       struct drm_connector *connector,
+			       struct drm_display_mode *mode)
+{
+	int ret = 0;
+
+	list_add_tail(&mode->head, &connector->user_modes);
+	return ret;
+}
+
+int drm_mode_attachmode_crtc(struct drm_device *dev, struct drm_crtc *crtc,
+			     struct drm_display_mode *mode)
+{
+	struct drm_connector *connector;
+	int ret = 0;
+	struct drm_display_mode *dup_mode;
+	int need_dup = 0;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (!connector->encoder)
+			break;
+		if (connector->encoder->crtc == crtc) {
+			if (need_dup)
+				dup_mode = drm_mode_duplicate(dev, mode);
+			else
+				dup_mode = mode;
+			ret = drm_mode_attachmode(dev, connector, dup_mode);
+			if (ret)
+				return ret;
+			need_dup = 1;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(drm_mode_attachmode_crtc);
+
+static int drm_mode_detachmode(struct drm_device *dev,
+			       struct drm_connector *connector,
+			       struct drm_display_mode *mode)
+{
+	int found = 0;
+	int ret = 0;
+	struct drm_display_mode *match_mode, *t;
+
+	list_for_each_entry_safe(match_mode, t, &connector->user_modes, head) {
+		if (drm_mode_equal(match_mode, mode)) {
+			list_del(&match_mode->head);
+			drm_mode_destroy(dev, match_mode);
+			found = 1;
+			break;
+		}
+	}
+
+	if (!found)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+int drm_mode_detachmode_crtc(struct drm_device *dev, struct drm_display_mode *mode)
+{
+	struct drm_connector *connector;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		drm_mode_detachmode(dev, connector, mode);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(drm_mode_detachmode_crtc);
+
+/**
+ * drm_fb_attachmode - Attach a user mode to an connector
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * This attaches a user specified mode to an connector.
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_attachmode_ioctl(struct drm_device *dev,
+			      void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_mode_cmd *mode_cmd = data;
+	struct drm_connector *connector;
+	struct drm_display_mode *mode;
+	struct drm_mode_object *obj;
+	struct drm_mode_modeinfo *umode = &mode_cmd->mode;
+	int ret = 0;
+
+	mutex_lock(&dev->mode_config.mutex);
+
+	obj = drm_mode_object_find(dev, mode_cmd->connector_id, DRM_MODE_OBJECT_CONNECTOR);
+	if (!obj) {
+		ret = -EINVAL;
+		goto out;
+	}
+	connector = obj_to_connector(obj);
+
+	mode = drm_mode_create(dev);
+	if (!mode) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	drm_crtc_convert_umode(mode, umode);
+
+	ret = drm_mode_attachmode(dev, connector, mode);
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+
+/**
+ * drm_fb_detachmode - Detach a user specified mode from an connector
+ * @inode: inode from the ioctl
+ * @filp: file * from the ioctl
+ * @cmd: cmd from ioctl
+ * @arg: arg from ioctl
+ *
+ * Called by the user via ioctl.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_mode_detachmode_ioctl(struct drm_device *dev,
+			      void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_object *obj;
+	struct drm_mode_mode_cmd *mode_cmd = data;
+	struct drm_connector *connector;
+	struct drm_display_mode mode;
+	struct drm_mode_modeinfo *umode = &mode_cmd->mode;
+	int ret = 0;
+
+	mutex_lock(&dev->mode_config.mutex);
+
+	obj = drm_mode_object_find(dev, mode_cmd->connector_id, DRM_MODE_OBJECT_CONNECTOR);
+	if (!obj) {
+		ret = -EINVAL;
+		goto out;
+	}
+	connector = obj_to_connector(obj);
+
+	drm_crtc_convert_umode(&mode, umode);
+	ret = drm_mode_detachmode(dev, connector, &mode);
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+struct drm_property *drm_property_create(struct drm_device *dev, int flags,
+					 const char *name, int num_values)
+{
+	struct drm_property *property = NULL;
+
+	property = kzalloc(sizeof(struct drm_property), GFP_KERNEL);
+	if (!property)
+		return NULL;
+
+	if (num_values) {
+		property->values = kzalloc(sizeof(uint64_t)*num_values, GFP_KERNEL);
+		if (!property->values)
+			goto fail;
+	}
+
+	drm_mode_object_get(dev, &property->base, DRM_MODE_OBJECT_PROPERTY);
+	property->flags = flags;
+	property->num_values = num_values;
+	INIT_LIST_HEAD(&property->enum_blob_list);
+
+	if (name)
+		strncpy(property->name, name, DRM_PROP_NAME_LEN);
+
+	list_add_tail(&property->head, &dev->mode_config.property_list);
+	return property;
+fail:
+	kfree(property);
+	return NULL;
+}
+EXPORT_SYMBOL(drm_property_create);
+
+int drm_property_add_enum(struct drm_property *property, int index,
+			  uint64_t value, const char *name)
+{
+	struct drm_property_enum *prop_enum;
+
+	if (!(property->flags & DRM_MODE_PROP_ENUM))
+		return -EINVAL;
+
+	if (!list_empty(&property->enum_blob_list)) {
+		list_for_each_entry(prop_enum, &property->enum_blob_list, head) {
+			if (prop_enum->value == value) {
+				strncpy(prop_enum->name, name, DRM_PROP_NAME_LEN);
+				prop_enum->name[DRM_PROP_NAME_LEN-1] = '\0';
+				return 0;
+			}
+		}
+	}
+
+	prop_enum = kzalloc(sizeof(struct drm_property_enum), GFP_KERNEL);
+	if (!prop_enum)
+		return -ENOMEM;
+
+	strncpy(prop_enum->name, name, DRM_PROP_NAME_LEN);
+	prop_enum->name[DRM_PROP_NAME_LEN-1] = '\0';
+	prop_enum->value = value;
+
+	property->values[index] = value;
+	list_add_tail(&prop_enum->head, &property->enum_blob_list);
+	return 0;
+}
+EXPORT_SYMBOL(drm_property_add_enum);
+
+void drm_property_destroy(struct drm_device *dev, struct drm_property *property)
+{
+	struct drm_property_enum *prop_enum, *pt;
+
+	list_for_each_entry_safe(prop_enum, pt, &property->enum_blob_list, head) {
+		list_del(&prop_enum->head);
+		kfree(prop_enum);
+	}
+
+	if (property->num_values)
+		kfree(property->values);
+	drm_mode_object_put(dev, &property->base);
+	list_del(&property->head);
+	kfree(property);
+}
+EXPORT_SYMBOL(drm_property_destroy);
+
+int drm_connector_attach_property(struct drm_connector *connector,
+			       struct drm_property *property, uint64_t init_val)
+{
+	int i;
+
+	for (i = 0; i < DRM_CONNECTOR_MAX_PROPERTY; i++) {
+		if (connector->property_ids[i] == 0) {
+			connector->property_ids[i] = property->base.id;
+			connector->property_values[i] = init_val;
+			break;
+		}
+	}
+
+	if (i == DRM_CONNECTOR_MAX_PROPERTY)
+		return -EINVAL;
+	return 0;
+}
+EXPORT_SYMBOL(drm_connector_attach_property);
+
+int drm_connector_property_set_value(struct drm_connector *connector,
+				  struct drm_property *property, uint64_t value)
+{
+	int i;
+
+	for (i = 0; i < DRM_CONNECTOR_MAX_PROPERTY; i++) {
+		if (connector->property_ids[i] == property->base.id) {
+			connector->property_values[i] = value;
+			break;
+		}
+	}
+
+	if (i == DRM_CONNECTOR_MAX_PROPERTY)
+		return -EINVAL;
+	return 0;
+}
+EXPORT_SYMBOL(drm_connector_property_set_value);
+
+int drm_connector_property_get_value(struct drm_connector *connector,
+				  struct drm_property *property, uint64_t *val)
+{
+	int i;
+
+	for (i = 0; i < DRM_CONNECTOR_MAX_PROPERTY; i++) {
+		if (connector->property_ids[i] == property->base.id) {
+			*val = connector->property_values[i];
+			break;
+		}
+	}
+
+	if (i == DRM_CONNECTOR_MAX_PROPERTY)
+		return -EINVAL;
+	return 0;
+}
+EXPORT_SYMBOL(drm_connector_property_get_value);
+
+int drm_mode_getproperty_ioctl(struct drm_device *dev,
+			       void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_object *obj;
+	struct drm_mode_get_property *out_resp = data;
+	struct drm_property *property;
+	int enum_count = 0;
+	int blob_count = 0;
+	int value_count = 0;
+	int ret = 0, i;
+	int copied;
+	struct drm_property_enum *prop_enum;
+	struct drm_mode_property_enum __user *enum_ptr;
+	struct drm_property_blob *prop_blob;
+	uint32_t *blob_id_ptr;
+	uint64_t __user *values_ptr;
+	uint32_t __user *blob_length_ptr;
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, out_resp->prop_id, DRM_MODE_OBJECT_PROPERTY);
+	if (!obj) {
+		ret = -EINVAL;
+		goto done;
+	}
+	property = obj_to_property(obj);
+
+	if (property->flags & DRM_MODE_PROP_ENUM) {
+		list_for_each_entry(prop_enum, &property->enum_blob_list, head)
+			enum_count++;
+	} else if (property->flags & DRM_MODE_PROP_BLOB) {
+		list_for_each_entry(prop_blob, &property->enum_blob_list, head)
+			blob_count++;
+	}
+
+	value_count = property->num_values;
+
+	strncpy(out_resp->name, property->name, DRM_PROP_NAME_LEN);
+	out_resp->name[DRM_PROP_NAME_LEN-1] = 0;
+	out_resp->flags = property->flags;
+
+	if ((out_resp->count_values >= value_count) && value_count) {
+		values_ptr = (uint64_t *)(unsigned long)out_resp->values_ptr;
+		for (i = 0; i < value_count; i++) {
+			if (copy_to_user(values_ptr + i, &property->values[i], sizeof(uint64_t))) {
+				ret = -EFAULT;
+				goto done;
+			}
+		}
+	}
+	out_resp->count_values = value_count;
+
+	if (property->flags & DRM_MODE_PROP_ENUM) {
+		if ((out_resp->count_enum_blobs >= enum_count) && enum_count) {
+			copied = 0;
+			enum_ptr = (struct drm_mode_property_enum *)(unsigned long)out_resp->enum_blob_ptr;
+			list_for_each_entry(prop_enum, &property->enum_blob_list, head) {
+
+				if (copy_to_user(&enum_ptr[copied].value, &prop_enum->value, sizeof(uint64_t))) {
+					ret = -EFAULT;
+					goto done;
+				}
+
+				if (copy_to_user(&enum_ptr[copied].name,
+						 &prop_enum->name, DRM_PROP_NAME_LEN)) {
+					ret = -EFAULT;
+					goto done;
+				}
+				copied++;
+			}
+		}
+		out_resp->count_enum_blobs = enum_count;
+	}
+
+	if (property->flags & DRM_MODE_PROP_BLOB) {
+		if ((out_resp->count_enum_blobs >= blob_count) && blob_count) {
+			copied = 0;
+			blob_id_ptr = (uint32_t *)(unsigned long)out_resp->enum_blob_ptr;
+			blob_length_ptr = (uint32_t *)(unsigned long)out_resp->values_ptr;
+
+			list_for_each_entry(prop_blob, &property->enum_blob_list, head) {
+				if (put_user(prop_blob->base.id, blob_id_ptr + copied)) {
+					ret = -EFAULT;
+					goto done;
+				}
+
+				if (put_user(prop_blob->length, blob_length_ptr + copied)) {
+					ret = -EFAULT;
+					goto done;
+				}
+
+				copied++;
+			}
+		}
+		out_resp->count_enum_blobs = blob_count;
+	}
+done:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+static struct drm_property_blob *drm_property_create_blob(struct drm_device *dev, int length,
+							  void *data)
+{
+	struct drm_property_blob *blob;
+
+	if (!length || !data)
+		return NULL;
+
+	blob = kzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL);
+	if (!blob)
+		return NULL;
+
+	blob->data = (void *)((char *)blob + sizeof(struct drm_property_blob));
+	blob->length = length;
+
+	memcpy(blob->data, data, length);
+
+	drm_mode_object_get(dev, &blob->base, DRM_MODE_OBJECT_BLOB);
+
+	list_add_tail(&blob->head, &dev->mode_config.property_blob_list);
+	return blob;
+}
+
+static void drm_property_destroy_blob(struct drm_device *dev,
+			       struct drm_property_blob *blob)
+{
+	drm_mode_object_put(dev, &blob->base);
+	list_del(&blob->head);
+	kfree(blob);
+}
+
+int drm_mode_getblob_ioctl(struct drm_device *dev,
+			   void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_object *obj;
+	struct drm_mode_get_blob *out_resp = data;
+	struct drm_property_blob *blob;
+	int ret = 0;
+	void *blob_ptr;
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, out_resp->blob_id, DRM_MODE_OBJECT_BLOB);
+	if (!obj) {
+		ret = -EINVAL;
+		goto done;
+	}
+	blob = obj_to_blob(obj);
+
+	if (out_resp->length == blob->length) {
+		blob_ptr = (void *)(unsigned long)out_resp->data;
+		if (copy_to_user(blob_ptr, blob->data, blob->length)){
+			ret = -EFAULT;
+			goto done;
+		}
+	}
+	out_resp->length = blob->length;
+
+done:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+int drm_mode_connector_update_edid_property(struct drm_connector *connector,
+					    struct edid *edid)
+{
+	struct drm_device *dev = connector->dev;
+	int ret = 0;
+
+	if (connector->edid_blob_ptr)
+		drm_property_destroy_blob(dev, connector->edid_blob_ptr);
+
+	/* Delete edid, when there is none. */
+	if (!edid) {
+		connector->edid_blob_ptr = NULL;
+		ret = drm_connector_property_set_value(connector, dev->mode_config.edid_property, 0);
+		return ret;
+	}
+
+	connector->edid_blob_ptr = drm_property_create_blob(connector->dev, 128, edid);
+
+	ret = drm_connector_property_set_value(connector,
+					       dev->mode_config.edid_property,
+					       connector->edid_blob_ptr->base.id);
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_mode_connector_update_edid_property);
+
+int drm_mode_connector_property_set_ioctl(struct drm_device *dev,
+				       void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_connector_set_property *out_resp = data;
+	struct drm_mode_object *obj;
+	struct drm_property *property;
+	struct drm_connector *connector;
+	int ret = -EINVAL;
+	int i;
+
+	mutex_lock(&dev->mode_config.mutex);
+
+	obj = drm_mode_object_find(dev, out_resp->connector_id, DRM_MODE_OBJECT_CONNECTOR);
+	if (!obj) {
+		goto out;
+	}
+	connector = obj_to_connector(obj);
+
+	for (i = 0; i < DRM_CONNECTOR_MAX_PROPERTY; i++) {
+		if (connector->property_ids[i] == out_resp->prop_id)
+			break;
+	}
+
+	if (i == DRM_CONNECTOR_MAX_PROPERTY) {
+		goto out;
+	}
+
+	obj = drm_mode_object_find(dev, out_resp->prop_id, DRM_MODE_OBJECT_PROPERTY);
+	if (!obj) {
+		goto out;
+	}
+	property = obj_to_property(obj);
+
+	if (property->flags & DRM_MODE_PROP_IMMUTABLE)
+		goto out;
+
+	if (property->flags & DRM_MODE_PROP_RANGE) {
+		if (out_resp->value < property->values[0])
+			goto out;
+
+		if (out_resp->value > property->values[1])
+			goto out;
+	} else {
+		int found = 0;
+		for (i = 0; i < property->num_values; i++) {
+			if (property->values[i] == out_resp->value) {
+				found = 1;
+				break;
+			}
+		}
+		if (!found) {
+			goto out;
+		}
+	}
+
+	if (connector->funcs->set_property)
+		ret = connector->funcs->set_property(connector, property, out_resp->value);
+
+	/* store the property value if succesful */
+	if (!ret)
+		drm_connector_property_set_value(connector, property, out_resp->value);
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
+
+
+int drm_mode_replacefb(struct drm_device *dev,
+		       void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_fb_cmd *r = data;
+	struct drm_mode_object *obj;
+	struct drm_framebuffer *fb;
+	int found = 0;
+	struct drm_framebuffer *fbl = NULL;
+	int ret = 0;
+
+	/* right replace the current bo attached to this fb with a new bo */
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, r->buffer_id, DRM_MODE_OBJECT_FB);
+	if (!obj) {
+		ret = -EINVAL;
+		goto out;
+	}
+	fb = obj_to_fb(obj);
+
+	list_for_each_entry(fbl, &file_priv->fbs, filp_head)
+		if (fb == fbl)
+			found = 1;
+
+	if (!found) {
+		DRM_ERROR("tried to replace an fb we didn't own\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (dev->mode_config.funcs->resize_fb)
+		ret = dev->mode_config.funcs->resize_fb(dev, file_priv, fb, r);
+	else
+		ret = -EINVAL;
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+
+}
+
+int drm_mode_connector_attach_encoder(struct drm_connector *connector,
+				      struct drm_encoder *encoder)
+{
+	int i;
+
+	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
+		if (connector->encoder_ids[i] == 0) {
+			connector->encoder_ids[i] = encoder->base.id;
+			return 0;
+		}
+	}
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(drm_mode_connector_attach_encoder);
+
+void drm_mode_connector_detach_encoder(struct drm_connector *connector,
+				    struct drm_encoder *encoder)
+{
+	int i;
+	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
+		if (connector->encoder_ids[i] == encoder->base.id) {
+			connector->encoder_ids[i] = 0;
+			if (connector->encoder == encoder)
+				connector->encoder = NULL;
+			break;
+		}
+	}
+}
+EXPORT_SYMBOL(drm_mode_connector_detach_encoder);
+
+bool drm_mode_crtc_set_gamma_size(struct drm_crtc *crtc,
+				  int gamma_size)
+{
+	crtc->gamma_size = gamma_size;
+
+	crtc->gamma_store = kzalloc(gamma_size * sizeof(uint16_t) * 3, GFP_KERNEL);
+	if (!crtc->gamma_store) {
+		crtc->gamma_size = 0;
+		return false;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(drm_mode_crtc_set_gamma_size);
+
+int drm_mode_gamma_set_ioctl(struct drm_device *dev,
+			     void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_crtc_lut *crtc_lut = data;
+	struct drm_mode_object *obj;
+	struct drm_crtc *crtc;
+	void *r_base, *g_base, *b_base;
+	int size;
+	int ret = 0;
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC);
+	if (!obj) {
+		ret = -EINVAL;
+		goto out;
+	}
+	crtc = obj_to_crtc(obj);
+
+	/* memcpy into gamma store */
+	if (crtc_lut->gamma_size != crtc->gamma_size) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	size = crtc_lut->gamma_size * (sizeof(uint16_t));
+	r_base = crtc->gamma_store;
+	if (copy_from_user(r_base, (void __user *)(unsigned long)crtc_lut->red, size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	g_base = r_base + size;
+	if (copy_from_user(g_base, (void __user *)(unsigned long)crtc_lut->green, size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	b_base = g_base + size;
+	if (copy_from_user(b_base, (void __user *)(unsigned long)crtc_lut->blue, size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	crtc->funcs->gamma_set(crtc, r_base, g_base, b_base, crtc->gamma_size);
+
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+
+}
+
+int drm_mode_gamma_get_ioctl(struct drm_device *dev,
+			     void *data, struct drm_file *file_priv)
+{
+	struct drm_mode_crtc_lut *crtc_lut = data;
+	struct drm_mode_object *obj;
+	struct drm_crtc *crtc;
+	void *r_base, *g_base, *b_base;
+	int size;
+	int ret = 0;
+
+	mutex_lock(&dev->mode_config.mutex);
+	obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC);
+	if (!obj) {
+		ret = -EINVAL;
+		goto out;
+	}
+	crtc = obj_to_crtc(obj);
+
+	/* memcpy into gamma store */
+	if (crtc_lut->gamma_size != crtc->gamma_size) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	size = crtc_lut->gamma_size * (sizeof(uint16_t));
+	r_base = crtc->gamma_store;
+	if (copy_to_user((void __user *)(unsigned long)crtc_lut->red, r_base, size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	g_base = r_base + size;
+	if (copy_to_user((void __user *)(unsigned long)crtc_lut->green, g_base, size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	b_base = g_base + size;
+	if (copy_to_user((void __user *)(unsigned long)crtc_lut->blue, b_base, size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+out:
+	mutex_unlock(&dev->mode_config.mutex);
+	return ret;
+}
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
new file mode 100644
index 000000000000..887ed33b0694
--- /dev/null
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -0,0 +1,822 @@
+/*
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ *
+ * DRM core CRTC related functions
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ *
+ * Authors:
+ *      Keith Packard
+ *	Eric Anholt <eric@anholt.net>
+ *      Dave Airlie <airlied@linux.ie>
+ *      Jesse Barnes <jesse.barnes@intel.com>
+ */
+
+#include "drmP.h"
+#include "drm_crtc.h"
+#include "drm_crtc_helper.h"
+
+/*
+ * Detailed mode info for a standard 640x480@60Hz monitor
+ */
+static struct drm_display_mode std_mode[] = {
+	{ DRM_MODE("640x480", DRM_MODE_TYPE_DEFAULT, 25200, 640, 656,
+		   752, 800, 0, 480, 490, 492, 525, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) },
+};
+
+/**
+ * drm_helper_probe_connector_modes - get complete set of display modes
+ * @dev: DRM device
+ * @maxX: max width for modes
+ * @maxY: max height for modes
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Based on @dev's mode_config layout, scan all the connectors and try to detect
+ * modes on them.  Modes will first be added to the connector's probed_modes
+ * list, then culled (based on validity and the @maxX, @maxY parameters) and
+ * put into the normal modes list.
+ *
+ * Intended to be used either at bootup time or when major configuration
+ * changes have occurred.
+ *
+ * FIXME: take into account monitor limits
+ */
+void drm_helper_probe_single_connector_modes(struct drm_connector *connector,
+					     uint32_t maxX, uint32_t maxY)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_display_mode *mode, *t;
+	struct drm_connector_helper_funcs *connector_funcs =
+		connector->helper_private;
+	int ret;
+
+	DRM_DEBUG("%s\n", drm_get_connector_name(connector));
+	/* set all modes to the unverified state */
+	list_for_each_entry_safe(mode, t, &connector->modes, head)
+		mode->status = MODE_UNVERIFIED;
+
+	connector->status = connector->funcs->detect(connector);
+
+	if (connector->status == connector_status_disconnected) {
+		DRM_DEBUG("%s is disconnected\n",
+			  drm_get_connector_name(connector));
+		/* TODO set EDID to NULL */
+		return;
+	}
+
+	ret = (*connector_funcs->get_modes)(connector);
+
+	if (ret) {
+		drm_mode_connector_list_update(connector);
+	}
+
+	if (maxX && maxY)
+		drm_mode_validate_size(dev, &connector->modes, maxX,
+				       maxY, 0);
+	list_for_each_entry_safe(mode, t, &connector->modes, head) {
+		if (mode->status == MODE_OK)
+			mode->status = connector_funcs->mode_valid(connector,
+								   mode);
+	}
+
+
+	drm_mode_prune_invalid(dev, &connector->modes, true);
+
+	if (list_empty(&connector->modes)) {
+		struct drm_display_mode *stdmode;
+
+		DRM_DEBUG("No valid modes on %s\n",
+			  drm_get_connector_name(connector));
+
+		/* Should we do this here ???
+		 * When no valid EDID modes are available we end up
+		 * here and bailed in the past, now we add a standard
+		 * 640x480@60Hz mode and carry on.
+		 */
+		stdmode = drm_mode_duplicate(dev, &std_mode[0]);
+		drm_mode_probed_add(connector, stdmode);
+		drm_mode_list_concat(&connector->probed_modes,
+				     &connector->modes);
+
+		DRM_DEBUG("Adding standard 640x480 @ 60Hz to %s\n",
+			  drm_get_connector_name(connector));
+	}
+
+	drm_mode_sort(&connector->modes);
+
+	DRM_DEBUG("Probed modes for %s\n", drm_get_connector_name(connector));
+	list_for_each_entry_safe(mode, t, &connector->modes, head) {
+		mode->vrefresh = drm_mode_vrefresh(mode);
+
+		drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V);
+		drm_mode_debug_printmodeline(mode);
+	}
+}
+EXPORT_SYMBOL(drm_helper_probe_single_connector_modes);
+
+void drm_helper_probe_connector_modes(struct drm_device *dev, uint32_t maxX,
+				      uint32_t maxY)
+{
+	struct drm_connector *connector;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		drm_helper_probe_single_connector_modes(connector, maxX, maxY);
+	}
+}
+EXPORT_SYMBOL(drm_helper_probe_connector_modes);
+
+
+/**
+ * drm_helper_crtc_in_use - check if a given CRTC is in a mode_config
+ * @crtc: CRTC to check
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Walk @crtc's DRM device's mode_config and see if it's in use.
+ *
+ * RETURNS:
+ * True if @crtc is part of the mode_config, false otherwise.
+ */
+bool drm_helper_crtc_in_use(struct drm_crtc *crtc)
+{
+	struct drm_encoder *encoder;
+	struct drm_device *dev = crtc->dev;
+	/* FIXME: Locking around list access? */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head)
+		if (encoder->crtc == crtc)
+			return true;
+	return false;
+}
+EXPORT_SYMBOL(drm_helper_crtc_in_use);
+
+/**
+ * drm_disable_unused_functions - disable unused objects
+ * @dev: DRM device
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * If an connector or CRTC isn't part of @dev's mode_config, it can be disabled
+ * by calling its dpms function, which should power it off.
+ */
+void drm_helper_disable_unused_functions(struct drm_device *dev)
+{
+	struct drm_encoder *encoder;
+	struct drm_encoder_helper_funcs *encoder_funcs;
+	struct drm_crtc *crtc;
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		encoder_funcs = encoder->helper_private;
+		if (!encoder->crtc)
+			(*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF);
+	}
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+		crtc->enabled = drm_helper_crtc_in_use(crtc);
+		if (!crtc->enabled) {
+			crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
+			crtc->fb = NULL;
+		}
+	}
+}
+EXPORT_SYMBOL(drm_helper_disable_unused_functions);
+
+static struct drm_display_mode *drm_has_preferred_mode(struct drm_connector *connector, int width, int height)
+{
+	struct drm_display_mode *mode;
+
+	list_for_each_entry(mode, &connector->modes, head) {
+		if (drm_mode_width(mode) > width ||
+		    drm_mode_height(mode) > height)
+			continue;
+		if (mode->type & DRM_MODE_TYPE_PREFERRED)
+			return mode;
+	}
+	return NULL;
+}
+
+static bool drm_connector_enabled(struct drm_connector *connector, bool strict)
+{
+	bool enable;
+
+	if (strict) {
+		enable = connector->status == connector_status_connected;
+	} else {
+		enable = connector->status != connector_status_disconnected;
+	}
+	return enable;
+}
+
+static void drm_enable_connectors(struct drm_device *dev, bool *enabled)
+{
+	bool any_enabled = false;
+	struct drm_connector *connector;
+	int i = 0;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		enabled[i] = drm_connector_enabled(connector, true);
+		any_enabled |= enabled[i];
+		i++;
+	}
+
+	if (any_enabled)
+		return;
+
+	i = 0;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		enabled[i] = drm_connector_enabled(connector, false);
+		i++;
+	}
+}
+
+static bool drm_target_preferred(struct drm_device *dev,
+				 struct drm_display_mode **modes,
+				 bool *enabled, int width, int height)
+{
+	struct drm_connector *connector;
+	int i = 0;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+
+		if (enabled[i] == false) {
+			i++;
+			continue;
+		}
+
+		modes[i] = drm_has_preferred_mode(connector, width, height);
+		if (!modes[i]) {
+			list_for_each_entry(modes[i], &connector->modes, head)
+				break;
+		}
+		i++;
+	}
+	return true;
+}
+
+static int drm_pick_crtcs(struct drm_device *dev,
+			  struct drm_crtc **best_crtcs,
+			  struct drm_display_mode **modes,
+			  int n, int width, int height)
+{
+	int c, o;
+	struct drm_connector *connector;
+	struct drm_connector_helper_funcs *connector_funcs;
+	struct drm_encoder *encoder;
+	struct drm_crtc *best_crtc;
+	int my_score, best_score, score;
+	struct drm_crtc **crtcs, *crtc;
+
+	if (n == dev->mode_config.num_connector)
+		return 0;
+	c = 0;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (c == n)
+			break;
+		c++;
+	}
+
+	best_crtcs[n] = NULL;
+	best_crtc = NULL;
+	best_score = drm_pick_crtcs(dev, best_crtcs, modes, n+1, width, height);
+	if (modes[n] == NULL)
+		return best_score;
+
+	crtcs = kmalloc(dev->mode_config.num_connector *
+			sizeof(struct drm_crtc *), GFP_KERNEL);
+	if (!crtcs)
+		return best_score;
+
+	my_score = 1;
+	if (connector->status == connector_status_connected)
+		my_score++;
+	if (drm_has_preferred_mode(connector, width, height))
+		my_score++;
+
+	connector_funcs = connector->helper_private;
+	encoder = connector_funcs->best_encoder(connector);
+	if (!encoder)
+		goto out;
+
+	connector->encoder = encoder;
+
+	/* select a crtc for this connector and then attempt to configure
+	   remaining connectors */
+	c = 0;
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+
+		if ((connector->encoder->possible_crtcs & (1 << c)) == 0) {
+			c++;
+			continue;
+		}
+
+		for (o = 0; o < n; o++)
+			if (best_crtcs[o] == crtc)
+				break;
+
+		if (o < n) {
+			/* ignore cloning for now */
+			c++;
+			continue;
+		}
+
+		crtcs[n] = crtc;
+		memcpy(crtcs, best_crtcs, n * sizeof(struct drm_crtc *));
+		score = my_score + drm_pick_crtcs(dev, crtcs, modes, n + 1,
+						  width, height);
+		if (score > best_score) {
+			best_crtc = crtc;
+			best_score = score;
+			memcpy(best_crtcs, crtcs,
+			       dev->mode_config.num_connector *
+			       sizeof(struct drm_crtc *));
+		}
+		c++;
+	}
+out:
+	kfree(crtcs);
+	return best_score;
+}
+
+static void drm_setup_crtcs(struct drm_device *dev)
+{
+	struct drm_crtc **crtcs;
+	struct drm_display_mode **modes;
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+	bool *enabled;
+	int width, height;
+	int i, ret;
+
+	width = dev->mode_config.max_width;
+	height = dev->mode_config.max_height;
+
+	/* clean out all the encoder/crtc combos */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		encoder->crtc = NULL;
+	}
+
+	crtcs = kcalloc(dev->mode_config.num_connector,
+			sizeof(struct drm_crtc *), GFP_KERNEL);
+	modes = kcalloc(dev->mode_config.num_connector,
+			sizeof(struct drm_display_mode *), GFP_KERNEL);
+	enabled = kcalloc(dev->mode_config.num_connector,
+			  sizeof(bool), GFP_KERNEL);
+
+	drm_enable_connectors(dev, enabled);
+
+	ret = drm_target_preferred(dev, modes, enabled, width, height);
+	if (!ret)
+		DRM_ERROR("Unable to find initial modes\n");
+
+	drm_pick_crtcs(dev, crtcs, modes, 0, width, height);
+
+	i = 0;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		struct drm_display_mode *mode = modes[i];
+		struct drm_crtc *crtc = crtcs[i];
+
+		if (connector->encoder == NULL) {
+			i++;
+			continue;
+		}
+
+		if (mode && crtc) {
+			crtc->desired_mode = mode;
+			connector->encoder->crtc = crtc;
+		} else
+			connector->encoder->crtc = NULL;
+		i++;
+	}
+
+	kfree(crtcs);
+	kfree(modes);
+	kfree(enabled);
+}
+/**
+ * drm_crtc_set_mode - set a mode
+ * @crtc: CRTC to program
+ * @mode: mode to use
+ * @x: width of mode
+ * @y: height of mode
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Try to set @mode on @crtc.  Give @crtc and its associated connectors a chance
+ * to fixup or reject the mode prior to trying to set it.
+ *
+ * RETURNS:
+ * True if the mode was set successfully, or false otherwise.
+ */
+bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
+			      struct drm_display_mode *mode,
+			      int x, int y)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_display_mode *adjusted_mode, saved_mode;
+	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+	struct drm_encoder_helper_funcs *encoder_funcs;
+	int saved_x, saved_y;
+	struct drm_encoder *encoder;
+	bool ret = true;
+
+	adjusted_mode = drm_mode_duplicate(dev, mode);
+
+	crtc->enabled = drm_helper_crtc_in_use(crtc);
+
+	if (!crtc->enabled)
+		return true;
+
+	saved_mode = crtc->mode;
+	saved_x = crtc->x;
+	saved_y = crtc->y;
+
+	/* Update crtc values up front so the driver can rely on them for mode
+	 * setting.
+	 */
+	crtc->mode = *mode;
+	crtc->x = x;
+	crtc->y = y;
+
+	if (drm_mode_equal(&saved_mode, &crtc->mode)) {
+		if (saved_x != crtc->x || saved_y != crtc->y) {
+			crtc_funcs->mode_set_base(crtc, crtc->x, crtc->y);
+			goto done;
+		}
+	}
+
+	/* Pass our mode to the connectors and the CRTC to give them a chance to
+	 * adjust it according to limitations or connector properties, and also
+	 * a chance to reject the mode entirely.
+	 */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+
+		if (encoder->crtc != crtc)
+			continue;
+		encoder_funcs = encoder->helper_private;
+		if (!(ret = encoder_funcs->mode_fixup(encoder, mode,
+						      adjusted_mode))) {
+			goto done;
+		}
+	}
+
+	if (!(ret = crtc_funcs->mode_fixup(crtc, mode, adjusted_mode))) {
+		goto done;
+	}
+
+	/* Prepare the encoders and CRTCs before setting the mode. */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+
+		if (encoder->crtc != crtc)
+			continue;
+		encoder_funcs = encoder->helper_private;
+		/* Disable the encoders as the first thing we do. */
+		encoder_funcs->prepare(encoder);
+	}
+
+	crtc_funcs->prepare(crtc);
+
+	/* Set up the DPLL and any encoders state that needs to adjust or depend
+	 * on the DPLL.
+	 */
+	crtc_funcs->mode_set(crtc, mode, adjusted_mode, x, y);
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+
+		if (encoder->crtc != crtc)
+			continue;
+
+		DRM_INFO("%s: set mode %s %x\n", drm_get_encoder_name(encoder),
+			 mode->name, mode->base.id);
+		encoder_funcs = encoder->helper_private;
+		encoder_funcs->mode_set(encoder, mode, adjusted_mode);
+	}
+
+	/* Now enable the clocks, plane, pipe, and connectors that we set up. */
+	crtc_funcs->commit(crtc);
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+
+		if (encoder->crtc != crtc)
+			continue;
+
+		encoder_funcs = encoder->helper_private;
+		encoder_funcs->commit(encoder);
+
+	}
+
+	/* XXX free adjustedmode */
+	drm_mode_destroy(dev, adjusted_mode);
+	/* FIXME: add subpixel order */
+done:
+	if (!ret) {
+		crtc->mode = saved_mode;
+		crtc->x = saved_x;
+		crtc->y = saved_y;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_crtc_helper_set_mode);
+
+
+/**
+ * drm_crtc_helper_set_config - set a new config from userspace
+ * @crtc: CRTC to setup
+ * @crtc_info: user provided configuration
+ * @new_mode: new mode to set
+ * @connector_set: set of connectors for the new config
+ * @fb: new framebuffer
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Setup a new configuration, provided by the user in @crtc_info, and enable
+ * it.
+ *
+ * RETURNS:
+ * Zero. (FIXME)
+ */
+int drm_crtc_helper_set_config(struct drm_mode_set *set)
+{
+	struct drm_device *dev;
+	struct drm_crtc **save_crtcs, *new_crtc;
+	struct drm_encoder **save_encoders, *new_encoder;
+	bool save_enabled;
+	bool changed = false;
+	bool flip_or_move = false;
+	struct drm_connector *connector;
+	int count = 0, ro, fail = 0;
+	struct drm_crtc_helper_funcs *crtc_funcs;
+	int ret = 0;
+
+	DRM_DEBUG("\n");
+
+	if (!set)
+		return -EINVAL;
+
+	if (!set->crtc)
+		return -EINVAL;
+
+	if (!set->crtc->helper_private)
+		return -EINVAL;
+
+	crtc_funcs = set->crtc->helper_private;
+
+	DRM_DEBUG("crtc: %p %d fb: %p connectors: %p num_connectors: %d (x, y) (%i, %i)\n",
+		  set->crtc, set->crtc->base.id, set->fb, set->connectors,
+		  (int)set->num_connectors, set->x, set->y);
+
+	dev = set->crtc->dev;
+
+	/* save previous config */
+	save_enabled = set->crtc->enabled;
+
+	/* this is meant to be num_connector not num_crtc */
+	save_crtcs = kzalloc(dev->mode_config.num_connector *
+			     sizeof(struct drm_crtc *), GFP_KERNEL);
+	if (!save_crtcs)
+		return -ENOMEM;
+
+	save_encoders = kzalloc(dev->mode_config.num_connector *
+				sizeof(struct drm_encoders *), GFP_KERNEL);
+	if (!save_encoders) {
+		kfree(save_crtcs);
+		return -ENOMEM;
+	}
+
+	/* We should be able to check here if the fb has the same properties
+	 * and then just flip_or_move it */
+	if (set->crtc->fb != set->fb) {
+		/* if we have no fb then its a change not a flip */
+		if (set->crtc->fb == NULL)
+			changed = true;
+		else
+			flip_or_move = true;
+	}
+
+	if (set->x != set->crtc->x || set->y != set->crtc->y)
+		flip_or_move = true;
+
+	if (set->mode && !drm_mode_equal(set->mode, &set->crtc->mode)) {
+		DRM_DEBUG("modes are different\n");
+		drm_mode_debug_printmodeline(&set->crtc->mode);
+		drm_mode_debug_printmodeline(set->mode);
+		changed = true;
+	}
+
+	/* a) traverse passed in connector list and get encoders for them */
+	count = 0;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		struct drm_connector_helper_funcs *connector_funcs =
+			connector->helper_private;
+		save_encoders[count++] = connector->encoder;
+		new_encoder = connector->encoder;
+		for (ro = 0; ro < set->num_connectors; ro++) {
+			if (set->connectors[ro] == connector) {
+				new_encoder = connector_funcs->best_encoder(connector);
+				/* if we can't get an encoder for a connector
+				   we are setting now - then fail */
+				if (new_encoder == NULL)
+					/* don't break so fail path works correct */
+					fail = 1;
+				break;
+			}
+		}
+
+		if (new_encoder != connector->encoder) {
+			changed = true;
+			connector->encoder = new_encoder;
+		}
+	}
+
+	if (fail) {
+		ret = -EINVAL;
+		goto fail_no_encoder;
+	}
+
+	count = 0;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (!connector->encoder)
+			continue;
+
+		save_crtcs[count++] = connector->encoder->crtc;
+
+		if (connector->encoder->crtc == set->crtc)
+			new_crtc = NULL;
+		else
+			new_crtc = connector->encoder->crtc;
+
+		for (ro = 0; ro < set->num_connectors; ro++) {
+			if (set->connectors[ro] == connector)
+				new_crtc = set->crtc;
+		}
+		if (new_crtc != connector->encoder->crtc) {
+			changed = true;
+			connector->encoder->crtc = new_crtc;
+		}
+	}
+
+	/* mode_set_base is not a required function */
+	if (flip_or_move && !crtc_funcs->mode_set_base)
+		changed = true;
+
+	if (changed) {
+		set->crtc->fb = set->fb;
+		set->crtc->enabled = (set->mode != NULL);
+		if (set->mode != NULL) {
+			DRM_DEBUG("attempting to set mode from userspace\n");
+			drm_mode_debug_printmodeline(set->mode);
+			if (!drm_crtc_helper_set_mode(set->crtc, set->mode,
+						      set->x, set->y)) {
+				ret = -EINVAL;
+				goto fail_set_mode;
+			}
+			/* TODO are these needed? */
+			set->crtc->desired_x = set->x;
+			set->crtc->desired_y = set->y;
+			set->crtc->desired_mode = set->mode;
+		}
+		drm_helper_disable_unused_functions(dev);
+	} else if (flip_or_move) {
+		if (set->crtc->fb != set->fb)
+			set->crtc->fb = set->fb;
+		crtc_funcs->mode_set_base(set->crtc, set->x, set->y);
+	}
+
+	kfree(save_encoders);
+	kfree(save_crtcs);
+	return 0;
+
+fail_set_mode:
+	set->crtc->enabled = save_enabled;
+	count = 0;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+		connector->encoder->crtc = save_crtcs[count++];
+fail_no_encoder:
+	kfree(save_crtcs);
+	count = 0;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		connector->encoder = save_encoders[count++];
+	}
+	kfree(save_encoders);
+	return ret;
+}
+EXPORT_SYMBOL(drm_crtc_helper_set_config);
+
+bool drm_helper_plugged_event(struct drm_device *dev)
+{
+	DRM_DEBUG("\n");
+
+	drm_helper_probe_connector_modes(dev, dev->mode_config.max_width,
+					 dev->mode_config.max_height);
+
+	drm_setup_crtcs(dev);
+
+	/* alert the driver fb layer */
+	dev->mode_config.funcs->fb_changed(dev);
+
+	/* FIXME: send hotplug event */
+	return true;
+}
+/**
+ * drm_initial_config - setup a sane initial connector configuration
+ * @dev: DRM device
+ * @can_grow: this configuration is growable
+ *
+ * LOCKING:
+ * Called at init time, must take mode config lock.
+ *
+ * Scan the CRTCs and connectors and try to put together an initial setup.
+ * At the moment, this is a cloned configuration across all heads with
+ * a new framebuffer object as the backing store.
+ *
+ * RETURNS:
+ * Zero if everything went ok, nonzero otherwise.
+ */
+bool drm_helper_initial_config(struct drm_device *dev, bool can_grow)
+{
+	int ret = false;
+
+	drm_helper_plugged_event(dev);
+	return ret;
+}
+EXPORT_SYMBOL(drm_helper_initial_config);
+
+/**
+ * drm_hotplug_stage_two
+ * @dev DRM device
+ * @connector hotpluged connector
+ *
+ * LOCKING.
+ * Caller must hold mode config lock, function might grab struct lock.
+ *
+ * Stage two of a hotplug.
+ *
+ * RETURNS:
+ * Zero on success, errno on failure.
+ */
+int drm_helper_hotplug_stage_two(struct drm_device *dev)
+{
+	dev->mode_config.hotplug_counter++;
+
+	drm_helper_plugged_event(dev);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_helper_hotplug_stage_two);
+
+int drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb,
+				   struct drm_mode_fb_cmd *mode_cmd)
+{
+	fb->width = mode_cmd->width;
+	fb->height = mode_cmd->height;
+	fb->pitch = mode_cmd->pitch;
+	fb->bits_per_pixel = mode_cmd->bpp;
+	fb->depth = mode_cmd->depth;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_helper_mode_fill_fb_struct);
+
+int drm_helper_resume_force_mode(struct drm_device *dev)
+{
+	struct drm_crtc *crtc;
+	int ret;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+
+		if (!crtc->enabled)
+			continue;
+
+		ret = drm_crtc_helper_set_mode(crtc, &crtc->mode, crtc->x,
+					       crtc->y);
+
+		if (ret == false)
+			DRM_ERROR("failed to set mode on crtc %p\n", crtc);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(drm_helper_resume_force_mode);
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 98a781375f60..0b9f3164a3b2 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -126,6 +126,26 @@ static struct drm_ioctl_desc drm_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_MASTER|DRM_CONTROL_ALLOW),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_mode_attachmode_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_mode_detachmode_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_MASTER | DRM_CONTROL_ALLOW),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_REPLACEFB, drm_mode_replacefb, DRM_MASTER|DRM_ROOT_ONLY|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, DRM_MASTER),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER),
 };
 
 #define DRM_CORE_IOCTL_COUNT	ARRAY_SIZE( drm_ioctls )
@@ -150,7 +170,7 @@ int drm_lastclose(struct drm_device * dev)
 		dev->driver->lastclose(dev);
 	DRM_DEBUG("driver lastclose completed\n");
 
-	if (dev->irq_enabled)
+	if (dev->irq_enabled && !drm_core_check_feature(dev, DRIVER_MODESET))
 		drm_irq_uninstall(dev);
 
 	mutex_lock(&dev->struct_mutex);
@@ -160,7 +180,8 @@ int drm_lastclose(struct drm_device * dev)
 	del_timer(&dev->timer);
 
 	/* Clear AGP information */
-	if (drm_core_has_AGP(dev) && dev->agp) {
+	if (drm_core_has_AGP(dev) && dev->agp &&
+	    !drm_core_check_feature(dev, DRIVER_MODESET)) {
 		struct drm_agp_mem *entry, *tempe;
 
 		/* Remove AGP resources, but leave dev->agp
@@ -179,7 +200,8 @@ int drm_lastclose(struct drm_device * dev)
 		dev->agp->acquired = 0;
 		dev->agp->enabled = 0;
 	}
-	if (drm_core_check_feature(dev, DRIVER_SG) && dev->sg) {
+	if (drm_core_check_feature(dev, DRIVER_SG) && dev->sg &&
+	    !drm_core_check_feature(dev, DRIVER_MODESET)) {
 		drm_sg_cleanup(dev->sg);
 		dev->sg = NULL;
 	}
@@ -206,7 +228,8 @@ int drm_lastclose(struct drm_device * dev)
 	}
 	dev->queue_count = 0;
 
-	if (drm_core_check_feature(dev, DRIVER_HAVE_DMA))
+	if (drm_core_check_feature(dev, DRIVER_HAVE_DMA) &&
+	    !drm_core_check_feature(dev, DRIVER_MODESET))
 		drm_dma_takedown(dev);
 
 	dev->dev_mapping = NULL;
@@ -307,6 +330,9 @@ static void drm_cleanup(struct drm_device * dev)
 	drm_ht_remove(&dev->map_hash);
 	drm_ctxbitmap_cleanup(dev);
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		drm_put_minor(&dev->control);
+
 	if (driver->driver_features & DRIVER_GEM)
 		drm_gem_destroy(dev);
 
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
new file mode 100644
index 000000000000..681753e57dc7
--- /dev/null
+++ b/drivers/gpu/drm/drm_edid.c
@@ -0,0 +1,732 @@
+/*
+ * Copyright (c) 2006 Luc Verhaegen (quirks list)
+ * Copyright (c) 2007-2008 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * DDC probing routines (drm_ddc_read & drm_do_probe_ddc_edid) originally from
+ * FB layer.
+ *   Copyright (C) 2006 Dennis Munsie <dmunsie@cecropia.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/kernel.h>
+#include <linux/i2c.h>
+#include <linux/i2c-algo-bit.h>
+#include "drmP.h"
+#include "drm_edid.h"
+
+/*
+ * TODO:
+ *   - support EDID 1.4 (incl. CE blocks)
+ */
+
+/*
+ * EDID blocks out in the wild have a variety of bugs, try to collect
+ * them here (note that userspace may work around broken monitors first,
+ * but fixes should make their way here so that the kernel "just works"
+ * on as many displays as possible).
+ */
+
+/* First detailed mode wrong, use largest 60Hz mode */
+#define EDID_QUIRK_PREFER_LARGE_60		(1 << 0)
+/* Reported 135MHz pixel clock is too high, needs adjustment */
+#define EDID_QUIRK_135_CLOCK_TOO_HIGH		(1 << 1)
+/* Prefer the largest mode at 75 Hz */
+#define EDID_QUIRK_PREFER_LARGE_75		(1 << 2)
+/* Detail timing is in cm not mm */
+#define EDID_QUIRK_DETAILED_IN_CM		(1 << 3)
+/* Detailed timing descriptors have bogus size values, so just take the
+ * maximum size and use that.
+ */
+#define EDID_QUIRK_DETAILED_USE_MAXIMUM_SIZE	(1 << 4)
+/* Monitor forgot to set the first detailed is preferred bit. */
+#define EDID_QUIRK_FIRST_DETAILED_PREFERRED	(1 << 5)
+/* use +hsync +vsync for detailed mode */
+#define EDID_QUIRK_DETAILED_SYNC_PP		(1 << 6)
+
+static struct edid_quirk {
+	char *vendor;
+	int product_id;
+	u32 quirks;
+} edid_quirk_list[] = {
+	/* Acer AL1706 */
+	{ "ACR", 44358, EDID_QUIRK_PREFER_LARGE_60 },
+	/* Acer F51 */
+	{ "API", 0x7602, EDID_QUIRK_PREFER_LARGE_60 },
+	/* Unknown Acer */
+	{ "ACR", 2423, EDID_QUIRK_FIRST_DETAILED_PREFERRED },
+
+	/* Belinea 10 15 55 */
+	{ "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 },
+	{ "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 },
+
+	/* Envision Peripherals, Inc. EN-7100e */
+	{ "EPI", 59264, EDID_QUIRK_135_CLOCK_TOO_HIGH },
+
+	/* Funai Electronics PM36B */
+	{ "FCM", 13600, EDID_QUIRK_PREFER_LARGE_75 |
+	  EDID_QUIRK_DETAILED_IN_CM },
+
+	/* LG Philips LCD LP154W01-A5 */
+	{ "LPL", 0, EDID_QUIRK_DETAILED_USE_MAXIMUM_SIZE },
+	{ "LPL", 0x2a00, EDID_QUIRK_DETAILED_USE_MAXIMUM_SIZE },
+
+	/* Philips 107p5 CRT */
+	{ "PHL", 57364, EDID_QUIRK_FIRST_DETAILED_PREFERRED },
+
+	/* Proview AY765C */
+	{ "PTS", 765, EDID_QUIRK_FIRST_DETAILED_PREFERRED },
+
+	/* Samsung SyncMaster 205BW.  Note: irony */
+	{ "SAM", 541, EDID_QUIRK_DETAILED_SYNC_PP },
+	/* Samsung SyncMaster 22[5-6]BW */
+	{ "SAM", 596, EDID_QUIRK_PREFER_LARGE_60 },
+	{ "SAM", 638, EDID_QUIRK_PREFER_LARGE_60 },
+};
+
+
+/* Valid EDID header has these bytes */
+static u8 edid_header[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
+
+/**
+ * edid_is_valid - sanity check EDID data
+ * @edid: EDID data
+ *
+ * Sanity check the EDID block by looking at the header, the version number
+ * and the checksum.  Return 0 if the EDID doesn't check out, or 1 if it's
+ * valid.
+ */
+static bool edid_is_valid(struct edid *edid)
+{
+	int i;
+	u8 csum = 0;
+	u8 *raw_edid = (u8 *)edid;
+
+	if (memcmp(edid->header, edid_header, sizeof(edid_header)))
+		goto bad;
+	if (edid->version != 1) {
+		DRM_ERROR("EDID has major version %d, instead of 1\n", edid->version);
+		goto bad;
+	}
+	if (edid->revision <= 0 || edid->revision > 3) {
+		DRM_ERROR("EDID has minor version %d, which is not between 0-3\n", edid->revision);
+		goto bad;
+	}
+
+	for (i = 0; i < EDID_LENGTH; i++)
+		csum += raw_edid[i];
+	if (csum) {
+		DRM_ERROR("EDID checksum is invalid, remainder is %d\n", csum);
+		goto bad;
+	}
+
+	return 1;
+
+bad:
+	if (raw_edid) {
+		DRM_ERROR("Raw EDID:\n");
+		print_hex_dump_bytes(KERN_ERR, DUMP_PREFIX_NONE, raw_edid, EDID_LENGTH);
+		printk("\n");
+	}
+	return 0;
+}
+
+/**
+ * edid_vendor - match a string against EDID's obfuscated vendor field
+ * @edid: EDID to match
+ * @vendor: vendor string
+ *
+ * Returns true if @vendor is in @edid, false otherwise
+ */
+static bool edid_vendor(struct edid *edid, char *vendor)
+{
+	char edid_vendor[3];
+
+	edid_vendor[0] = ((edid->mfg_id[0] & 0x7c) >> 2) + '@';
+	edid_vendor[1] = (((edid->mfg_id[0] & 0x3) << 3) |
+			  ((edid->mfg_id[1] & 0xe0) >> 5)) + '@';
+	edid_vendor[2] = (edid->mfg_id[2] & 0x1f) + '@';
+
+	return !strncmp(edid_vendor, vendor, 3);
+}
+
+/**
+ * edid_get_quirks - return quirk flags for a given EDID
+ * @edid: EDID to process
+ *
+ * This tells subsequent routines what fixes they need to apply.
+ */
+static u32 edid_get_quirks(struct edid *edid)
+{
+	struct edid_quirk *quirk;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(edid_quirk_list); i++) {
+		quirk = &edid_quirk_list[i];
+
+		if (edid_vendor(edid, quirk->vendor) &&
+		    (EDID_PRODUCT_ID(edid) == quirk->product_id))
+			return quirk->quirks;
+	}
+
+	return 0;
+}
+
+#define MODE_SIZE(m) ((m)->hdisplay * (m)->vdisplay)
+#define MODE_REFRESH_DIFF(m,r) (abs((m)->vrefresh - target_refresh))
+
+
+/**
+ * edid_fixup_preferred - set preferred modes based on quirk list
+ * @connector: has mode list to fix up
+ * @quirks: quirks list
+ *
+ * Walk the mode list for @connector, clearing the preferred status
+ * on existing modes and setting it anew for the right mode ala @quirks.
+ */
+static void edid_fixup_preferred(struct drm_connector *connector,
+				 u32 quirks)
+{
+	struct drm_display_mode *t, *cur_mode, *preferred_mode;
+	int target_refresh;
+
+	if (list_empty(&connector->probed_modes))
+		return;
+
+	if (quirks & EDID_QUIRK_PREFER_LARGE_60)
+		target_refresh = 60;
+	if (quirks & EDID_QUIRK_PREFER_LARGE_75)
+		target_refresh = 75;
+
+	preferred_mode = list_first_entry(&connector->probed_modes,
+					  struct drm_display_mode, head);
+
+	list_for_each_entry_safe(cur_mode, t, &connector->probed_modes, head) {
+		cur_mode->type &= ~DRM_MODE_TYPE_PREFERRED;
+
+		if (cur_mode == preferred_mode)
+			continue;
+
+		/* Largest mode is preferred */
+		if (MODE_SIZE(cur_mode) > MODE_SIZE(preferred_mode))
+			preferred_mode = cur_mode;
+
+		/* At a given size, try to get closest to target refresh */
+		if ((MODE_SIZE(cur_mode) == MODE_SIZE(preferred_mode)) &&
+		    MODE_REFRESH_DIFF(cur_mode, target_refresh) <
+		    MODE_REFRESH_DIFF(preferred_mode, target_refresh)) {
+			preferred_mode = cur_mode;
+		}
+	}
+
+	preferred_mode->type |= DRM_MODE_TYPE_PREFERRED;
+}
+
+/**
+ * drm_mode_std - convert standard mode info (width, height, refresh) into mode
+ * @t: standard timing params
+ *
+ * Take the standard timing params (in this case width, aspect, and refresh)
+ * and convert them into a real mode using CVT.
+ *
+ * Punts for now, but should eventually use the FB layer's CVT based mode
+ * generation code.
+ */
+struct drm_display_mode *drm_mode_std(struct drm_device *dev,
+				      struct std_timing *t)
+{
+	struct drm_display_mode *mode;
+	int hsize = t->hsize * 8 + 248, vsize;
+
+	mode = drm_mode_create(dev);
+	if (!mode)
+		return NULL;
+
+	if (t->aspect_ratio == 0)
+		vsize = (hsize * 10) / 16;
+	else if (t->aspect_ratio == 1)
+		vsize = (hsize * 3) / 4;
+	else if (t->aspect_ratio == 2)
+		vsize = (hsize * 4) / 5;
+	else
+		vsize = (hsize * 9) / 16;
+
+	drm_mode_set_name(mode);
+
+	return mode;
+}
+
+/**
+ * drm_mode_detailed - create a new mode from an EDID detailed timing section
+ * @dev: DRM device (needed to create new mode)
+ * @edid: EDID block
+ * @timing: EDID detailed timing info
+ * @quirks: quirks to apply
+ *
+ * An EDID detailed timing block contains enough info for us to create and
+ * return a new struct drm_display_mode.
+ */
+static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev,
+						  struct edid *edid,
+						  struct detailed_timing *timing,
+						  u32 quirks)
+{
+	struct drm_display_mode *mode;
+	struct detailed_pixel_timing *pt = &timing->data.pixel_data;
+
+	if (pt->stereo) {
+		printk(KERN_WARNING "stereo mode not supported\n");
+		return NULL;
+	}
+	if (!pt->separate_sync) {
+		printk(KERN_WARNING "integrated sync not supported\n");
+		return NULL;
+	}
+
+	mode = drm_mode_create(dev);
+	if (!mode)
+		return NULL;
+
+	mode->type = DRM_MODE_TYPE_DRIVER;
+
+	if (quirks & EDID_QUIRK_135_CLOCK_TOO_HIGH)
+		timing->pixel_clock = 1088;
+
+	mode->clock = timing->pixel_clock * 10;
+
+	mode->hdisplay = (pt->hactive_hi << 8) | pt->hactive_lo;
+	mode->hsync_start = mode->hdisplay + ((pt->hsync_offset_hi << 8) |
+					      pt->hsync_offset_lo);
+	mode->hsync_end = mode->hsync_start +
+		((pt->hsync_pulse_width_hi << 8) |
+		 pt->hsync_pulse_width_lo);
+	mode->htotal = mode->hdisplay + ((pt->hblank_hi << 8) | pt->hblank_lo);
+
+	mode->vdisplay = (pt->vactive_hi << 8) | pt->vactive_lo;
+	mode->vsync_start = mode->vdisplay + ((pt->vsync_offset_hi << 8) |
+					      pt->vsync_offset_lo);
+	mode->vsync_end = mode->vsync_start +
+		((pt->vsync_pulse_width_hi << 8) |
+		 pt->vsync_pulse_width_lo);
+	mode->vtotal = mode->vdisplay + ((pt->vblank_hi << 8) | pt->vblank_lo);
+
+	drm_mode_set_name(mode);
+
+	if (pt->interlaced)
+		mode->flags |= DRM_MODE_FLAG_INTERLACE;
+
+	if (quirks & EDID_QUIRK_DETAILED_SYNC_PP) {
+		pt->hsync_positive = 1;
+		pt->vsync_positive = 1;
+	}
+
+	mode->flags |= pt->hsync_positive ? DRM_MODE_FLAG_PHSYNC : DRM_MODE_FLAG_NHSYNC;
+	mode->flags |= pt->vsync_positive ? DRM_MODE_FLAG_PVSYNC : DRM_MODE_FLAG_NVSYNC;
+
+	mode->width_mm = pt->width_mm_lo | (pt->width_mm_hi << 8);
+	mode->height_mm = pt->height_mm_lo | (pt->height_mm_hi << 8);
+
+	if (quirks & EDID_QUIRK_DETAILED_IN_CM) {
+		mode->width_mm *= 10;
+		mode->height_mm *= 10;
+	}
+
+	if (quirks & EDID_QUIRK_DETAILED_USE_MAXIMUM_SIZE) {
+		mode->width_mm = edid->width_cm * 10;
+		mode->height_mm = edid->height_cm * 10;
+	}
+
+	return mode;
+}
+
+/*
+ * Detailed mode info for the EDID "established modes" data to use.
+ */
+static struct drm_display_mode edid_est_modes[] = {
+	{ DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 40000, 800, 840,
+		   968, 1056, 0, 600, 601, 605, 628, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 800x600@60Hz */
+	{ DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 36000, 800, 824,
+		   896, 1024, 0, 600, 601, 603,  625, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 800x600@56Hz */
+	{ DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 31500, 640, 656,
+		   720, 840, 0, 480, 481, 484, 500, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 640x480@75Hz */
+	{ DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 31500, 640, 664,
+		   704,  832, 0, 480, 489, 491, 520, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 640x480@72Hz */
+	{ DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 30240, 640, 704,
+		   768,  864, 0, 480, 483, 486, 525, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 640x480@67Hz */
+	{ DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 25200, 640, 656,
+		   752, 800, 0, 480, 490, 492, 525, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 640x480@60Hz */
+	{ DRM_MODE("720x400", DRM_MODE_TYPE_DRIVER, 35500, 720, 738,
+		   846, 900, 0, 400, 421, 423,  449, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 720x400@88Hz */
+	{ DRM_MODE("720x400", DRM_MODE_TYPE_DRIVER, 28320, 720, 738,
+		   846,  900, 0, 400, 412, 414, 449, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 720x400@70Hz */
+	{ DRM_MODE("1280x1024", DRM_MODE_TYPE_DRIVER, 135000, 1280, 1296,
+		   1440, 1688, 0, 1024, 1025, 1028, 1066, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 1280x1024@75Hz */
+	{ DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 78800, 1024, 1040,
+		   1136, 1312, 0,  768, 769, 772, 800, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 1024x768@75Hz */
+	{ DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 75000, 1024, 1048,
+		   1184, 1328, 0,  768, 771, 777, 806, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 1024x768@70Hz */
+	{ DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 65000, 1024, 1048,
+		   1184, 1344, 0,  768, 771, 777, 806, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 1024x768@60Hz */
+	{ DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER,44900, 1024, 1032,
+		   1208, 1264, 0, 768, 768, 776, 817, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC | DRM_MODE_FLAG_INTERLACE) }, /* 1024x768@43Hz */
+	{ DRM_MODE("832x624", DRM_MODE_TYPE_DRIVER, 57284, 832, 864,
+		   928, 1152, 0, 624, 625, 628, 667, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, /* 832x624@75Hz */
+	{ DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 49500, 800, 816,
+		   896, 1056, 0, 600, 601, 604,  625, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 800x600@75Hz */
+	{ DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 50000, 800, 856,
+		   976, 1040, 0, 600, 637, 643, 666, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 800x600@72Hz */
+	{ DRM_MODE("1152x864", DRM_MODE_TYPE_DRIVER, 108000, 1152, 1216,
+		   1344, 1600, 0,  864, 865, 868, 900, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, /* 1152x864@75Hz */
+};
+
+#define EDID_EST_TIMINGS 16
+#define EDID_STD_TIMINGS 8
+#define EDID_DETAILED_TIMINGS 4
+
+/**
+ * add_established_modes - get est. modes from EDID and add them
+ * @edid: EDID block to scan
+ *
+ * Each EDID block contains a bitmap of the supported "established modes" list
+ * (defined above).  Tease them out and add them to the global modes list.
+ */
+static int add_established_modes(struct drm_connector *connector, struct edid *edid)
+{
+	struct drm_device *dev = connector->dev;
+	unsigned long est_bits = edid->established_timings.t1 |
+		(edid->established_timings.t2 << 8) |
+		((edid->established_timings.mfg_rsvd & 0x80) << 9);
+	int i, modes = 0;
+
+	for (i = 0; i <= EDID_EST_TIMINGS; i++)
+		if (est_bits & (1<<i)) {
+			struct drm_display_mode *newmode;
+			newmode = drm_mode_duplicate(dev, &edid_est_modes[i]);
+			if (newmode) {
+				drm_mode_probed_add(connector, newmode);
+				modes++;
+			}
+		}
+
+	return modes;
+}
+
+/**
+ * add_standard_modes - get std. modes from EDID and add them
+ * @edid: EDID block to scan
+ *
+ * Standard modes can be calculated using the CVT standard.  Grab them from
+ * @edid, calculate them, and add them to the list.
+ */
+static int add_standard_modes(struct drm_connector *connector, struct edid *edid)
+{
+	struct drm_device *dev = connector->dev;
+	int i, modes = 0;
+
+	for (i = 0; i < EDID_STD_TIMINGS; i++) {
+		struct std_timing *t = &edid->standard_timings[i];
+		struct drm_display_mode *newmode;
+
+		/* If std timings bytes are 1, 1 it's empty */
+		if (t->hsize == 1 && (t->aspect_ratio | t->vfreq) == 1)
+			continue;
+
+		newmode = drm_mode_std(dev, &edid->standard_timings[i]);
+		if (newmode) {
+			drm_mode_probed_add(connector, newmode);
+			modes++;
+		}
+	}
+
+	return modes;
+}
+
+/**
+ * add_detailed_modes - get detailed mode info from EDID data
+ * @connector: attached connector
+ * @edid: EDID block to scan
+ * @quirks: quirks to apply
+ *
+ * Some of the detailed timing sections may contain mode information.  Grab
+ * it and add it to the list.
+ */
+static int add_detailed_info(struct drm_connector *connector,
+			     struct edid *edid, u32 quirks)
+{
+	struct drm_device *dev = connector->dev;
+	int i, j, modes = 0;
+
+	for (i = 0; i < EDID_DETAILED_TIMINGS; i++) {
+		struct detailed_timing *timing = &edid->detailed_timings[i];
+		struct detailed_non_pixel *data = &timing->data.other_data;
+		struct drm_display_mode *newmode;
+
+		/* EDID up to and including 1.2 may put monitor info here */
+		if (edid->version == 1 && edid->revision < 3)
+			continue;
+
+		/* Detailed mode timing */
+		if (timing->pixel_clock) {
+			newmode = drm_mode_detailed(dev, edid, timing, quirks);
+			if (!newmode)
+				continue;
+
+			/* First detailed mode is preferred */
+			if (i == 0 && edid->preferred_timing)
+				newmode->type |= DRM_MODE_TYPE_PREFERRED;
+			drm_mode_probed_add(connector, newmode);
+
+			modes++;
+			continue;
+		}
+
+		/* Other timing or info */
+		switch (data->type) {
+		case EDID_DETAIL_MONITOR_SERIAL:
+			break;
+		case EDID_DETAIL_MONITOR_STRING:
+			break;
+		case EDID_DETAIL_MONITOR_RANGE:
+			/* Get monitor range data */
+			break;
+		case EDID_DETAIL_MONITOR_NAME:
+			break;
+		case EDID_DETAIL_MONITOR_CPDATA:
+			break;
+		case EDID_DETAIL_STD_MODES:
+			/* Five modes per detailed section */
+			for (j = 0; j < 5; i++) {
+				struct std_timing *std;
+				struct drm_display_mode *newmode;
+
+				std = &data->data.timings[j];
+				newmode = drm_mode_std(dev, std);
+				if (newmode) {
+					drm_mode_probed_add(connector, newmode);
+					modes++;
+				}
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
+	return modes;
+}
+
+#define DDC_ADDR 0x50
+
+unsigned char *drm_do_probe_ddc_edid(struct i2c_adapter *adapter)
+{
+	unsigned char start = 0x0;
+	unsigned char *buf = kmalloc(EDID_LENGTH, GFP_KERNEL);
+	struct i2c_msg msgs[] = {
+		{
+			.addr	= DDC_ADDR,
+			.flags	= 0,
+			.len	= 1,
+			.buf	= &start,
+		}, {
+			.addr	= DDC_ADDR,
+			.flags	= I2C_M_RD,
+			.len	= EDID_LENGTH,
+			.buf	= buf,
+		}
+	};
+
+	if (!buf) {
+		dev_warn(&adapter->dev, "unable to allocate memory for EDID "
+			 "block.\n");
+		return NULL;
+	}
+
+	if (i2c_transfer(adapter, msgs, 2) == 2)
+		return buf;
+
+	dev_info(&adapter->dev, "unable to read EDID block.\n");
+	kfree(buf);
+	return NULL;
+}
+EXPORT_SYMBOL(drm_do_probe_ddc_edid);
+
+static unsigned char *drm_ddc_read(struct i2c_adapter *adapter)
+{
+	struct i2c_algo_bit_data *algo_data = adapter->algo_data;
+	unsigned char *edid = NULL;
+	int i, j;
+
+	algo_data->setscl(algo_data->data, 1);
+
+	for (i = 0; i < 1; i++) {
+		/* For some old monitors we need the
+		 * following process to initialize/stop DDC
+		 */
+		algo_data->setsda(algo_data->data, 1);
+		msleep(13);
+
+		algo_data->setscl(algo_data->data, 1);
+		for (j = 0; j < 5; j++) {
+			msleep(10);
+			if (algo_data->getscl(algo_data->data))
+				break;
+		}
+		if (j == 5)
+			continue;
+
+		algo_data->setsda(algo_data->data, 0);
+		msleep(15);
+		algo_data->setscl(algo_data->data, 0);
+		msleep(15);
+		algo_data->setsda(algo_data->data, 1);
+		msleep(15);
+
+		/* Do the real work */
+		edid = drm_do_probe_ddc_edid(adapter);
+		algo_data->setsda(algo_data->data, 0);
+		algo_data->setscl(algo_data->data, 0);
+		msleep(15);
+
+		algo_data->setscl(algo_data->data, 1);
+		for (j = 0; j < 10; j++) {
+			msleep(10);
+			if (algo_data->getscl(algo_data->data))
+				break;
+		}
+
+		algo_data->setsda(algo_data->data, 1);
+		msleep(15);
+		algo_data->setscl(algo_data->data, 0);
+		algo_data->setsda(algo_data->data, 0);
+		if (edid)
+			break;
+	}
+	/* Release the DDC lines when done or the Apple Cinema HD display
+	 * will switch off
+	 */
+	algo_data->setsda(algo_data->data, 1);
+	algo_data->setscl(algo_data->data, 1);
+
+	return edid;
+}
+
+/**
+ * drm_get_edid - get EDID data, if available
+ * @connector: connector we're probing
+ * @adapter: i2c adapter to use for DDC
+ *
+ * Poke the given connector's i2c channel to grab EDID data if possible.
+ *
+ * Return edid data or NULL if we couldn't find any.
+ */
+struct edid *drm_get_edid(struct drm_connector *connector,
+			  struct i2c_adapter *adapter)
+{
+	struct edid *edid;
+
+	edid = (struct edid *)drm_ddc_read(adapter);
+	if (!edid) {
+		dev_warn(&connector->dev->pdev->dev, "%s: no EDID data\n",
+			 drm_get_connector_name(connector));
+		return NULL;
+	}
+	if (!edid_is_valid(edid)) {
+		dev_warn(&connector->dev->pdev->dev, "%s: EDID invalid.\n",
+			 drm_get_connector_name(connector));
+		kfree(edid);
+		return NULL;
+	}
+
+	connector->display_info.raw_edid = (char *)edid;
+
+	return edid;
+}
+EXPORT_SYMBOL(drm_get_edid);
+
+/**
+ * drm_add_edid_modes - add modes from EDID data, if available
+ * @connector: connector we're probing
+ * @edid: edid data
+ *
+ * Add the specified modes to the connector's mode list.
+ *
+ * Return number of modes added or 0 if we couldn't find any.
+ */
+int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
+{
+	int num_modes = 0;
+	u32 quirks;
+
+	if (edid == NULL) {
+		return 0;
+	}
+	if (!edid_is_valid(edid)) {
+		dev_warn(&connector->dev->pdev->dev, "%s: EDID invalid.\n",
+			 drm_get_connector_name(connector));
+		return 0;
+	}
+
+	quirks = edid_get_quirks(edid);
+
+	num_modes += add_established_modes(connector, edid);
+	num_modes += add_standard_modes(connector, edid);
+	num_modes += add_detailed_info(connector, edid, quirks);
+
+	if (quirks & (EDID_QUIRK_PREFER_LARGE_60 | EDID_QUIRK_PREFER_LARGE_75))
+		edid_fixup_preferred(connector, quirks);
+
+	connector->display_info.serration_vsync = edid->serration_vsync;
+	connector->display_info.sync_on_green = edid->sync_on_green;
+	connector->display_info.composite_sync = edid->composite_sync;
+	connector->display_info.separate_syncs = edid->separate_syncs;
+	connector->display_info.blank_to_black = edid->blank_to_black;
+	connector->display_info.video_level = edid->video_level;
+	connector->display_info.digital = edid->digital;
+	connector->display_info.width_mm = edid->width_cm * 10;
+	connector->display_info.height_mm = edid->height_cm * 10;
+	connector->display_info.gamma = edid->gamma;
+	connector->display_info.gtf_supported = edid->default_gtf;
+	connector->display_info.standard_color = edid->standard_color;
+	connector->display_info.display_type = edid->display_type;
+	connector->display_info.active_off_supported = edid->pm_active_off;
+	connector->display_info.suspend_supported = edid->pm_suspend;
+	connector->display_info.standby_supported = edid->pm_standby;
+	connector->display_info.gamma = edid->gamma;
+
+	return num_modes;
+}
+EXPORT_SYMBOL(drm_add_edid_modes);
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index 3a6c439652a5..3733e36d135e 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -35,7 +35,6 @@
  */
 
 #include "drmP.h"
-#include "drm_sarea.h"
 #include <linux/poll.h>
 #include <linux/smp_lock.h>
 
@@ -55,10 +54,12 @@ static int drm_setup(struct drm_device * dev)
 
 	atomic_set(&dev->ioctl_count, 0);
 	atomic_set(&dev->vma_count, 0);
-	dev->buf_use = 0;
-	atomic_set(&dev->buf_alloc, 0);
 
-	if (drm_core_check_feature(dev, DRIVER_HAVE_DMA)) {
+	if (drm_core_check_feature(dev, DRIVER_HAVE_DMA) &&
+	    !drm_core_check_feature(dev, DRIVER_MODESET)) {
+		dev->buf_use = 0;
+		atomic_set(&dev->buf_alloc, 0);
+
 		i = drm_dma_setup(dev);
 		if (i < 0)
 			return i;
@@ -138,14 +139,14 @@ int drm_open(struct inode *inode, struct file *filp)
 		}
 		spin_unlock(&dev->count_lock);
 	}
-
 out:
 	mutex_lock(&dev->struct_mutex);
-	if (dev->dev_mapping == NULL)
-		dev->dev_mapping = inode->i_mapping;
-	else if (dev->dev_mapping != inode->i_mapping)
-		WARN(1, "dev->dev_mapping not inode mapping (%p expected %p)\n",
-		     dev->dev_mapping, inode->i_mapping);
+	if (minor->type == DRM_MINOR_LEGACY) {
+		BUG_ON((dev->dev_mapping != NULL) &&
+			(dev->dev_mapping != inode->i_mapping));
+		if (dev->dev_mapping == NULL)
+			dev->dev_mapping = inode->i_mapping;
+	}
 	mutex_unlock(&dev->struct_mutex);
 
 	return retcode;
@@ -251,6 +252,7 @@ static int drm_open_helper(struct inode *inode, struct file *filp,
 	priv->lock_count = 0;
 
 	INIT_LIST_HEAD(&priv->lhead);
+	INIT_LIST_HEAD(&priv->fbs);
 
 	if (dev->driver->driver_features & DRIVER_GEM)
 		drm_gem_open(dev, priv);
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 1e787f894b3c..1608f8dbfda0 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -305,6 +305,8 @@ int drm_control(struct drm_device *dev, void *data,
 	case DRM_INST_HANDLER:
 		if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
 			return 0;
+		if (drm_core_check_feature(dev, DRIVER_MODESET))
+			return 0;
 		if (dev->if_version < DRM_IF_VERSION(1, 2) &&
 		    ctl->irq != dev->pdev->irq)
 			return -EINVAL;
@@ -312,6 +314,8 @@ int drm_control(struct drm_device *dev, void *data,
 	case DRM_UNINST_HANDLER:
 		if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
 			return 0;
+		if (drm_core_check_feature(dev, DRIVER_MODESET))
+			return 0;
 		return drm_irq_uninstall(dev);
 	default:
 		return -EINVAL;
@@ -426,6 +430,45 @@ void drm_vblank_put(struct drm_device *dev, int crtc)
 }
 EXPORT_SYMBOL(drm_vblank_put);
 
+/**
+ * drm_vblank_pre_modeset - account for vblanks across mode sets
+ * @dev: DRM device
+ * @crtc: CRTC in question
+ * @post: post or pre mode set?
+ *
+ * Account for vblank events across mode setting events, which will likely
+ * reset the hardware frame counter.
+ */
+void drm_vblank_pre_modeset(struct drm_device *dev, int crtc)
+{
+	/*
+	 * To avoid all the problems that might happen if interrupts
+	 * were enabled/disabled around or between these calls, we just
+	 * have the kernel take a reference on the CRTC (just once though
+	 * to avoid corrupting the count if multiple, mismatch calls occur),
+	 * so that interrupts remain enabled in the interim.
+	 */
+	if (!dev->vblank_inmodeset[crtc]) {
+		dev->vblank_inmodeset[crtc] = 1;
+		drm_vblank_get(dev, crtc);
+	}
+}
+EXPORT_SYMBOL(drm_vblank_pre_modeset);
+
+void drm_vblank_post_modeset(struct drm_device *dev, int crtc)
+{
+	unsigned long irqflags;
+
+	if (dev->vblank_inmodeset[crtc]) {
+		spin_lock_irqsave(&dev->vbl_lock, irqflags);
+		dev->vblank_disable_allowed = 1;
+		dev->vblank_inmodeset[crtc] = 0;
+		spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
+		drm_vblank_put(dev, crtc);
+	}
+}
+EXPORT_SYMBOL(drm_vblank_post_modeset);
+
 /**
  * drm_modeset_ctl - handle vblank event counter changes across mode switch
  * @DRM_IOCTL_ARGS: standard ioctl arguments
@@ -441,7 +484,6 @@ int drm_modeset_ctl(struct drm_device *dev, void *data,
 		    struct drm_file *file_priv)
 {
 	struct drm_modeset_ctl *modeset = data;
-	unsigned long irqflags;
 	int crtc, ret = 0;
 
 	/* If drm_vblank_init() hasn't been called yet, just no-op */
@@ -454,28 +496,12 @@ int drm_modeset_ctl(struct drm_device *dev, void *data,
 		goto out;
 	}
 
-	/*
-	 * To avoid all the problems that might happen if interrupts
-	 * were enabled/disabled around or between these calls, we just
-	 * have the kernel take a reference on the CRTC (just once though
-	 * to avoid corrupting the count if multiple, mismatch calls occur),
-	 * so that interrupts remain enabled in the interim.
-	 */
 	switch (modeset->cmd) {
 	case _DRM_PRE_MODESET:
-		if (!dev->vblank_inmodeset[crtc]) {
-			dev->vblank_inmodeset[crtc] = 1;
-			drm_vblank_get(dev, crtc);
-		}
+		drm_vblank_pre_modeset(dev, crtc);
 		break;
 	case _DRM_POST_MODESET:
-		if (dev->vblank_inmodeset[crtc]) {
-			spin_lock_irqsave(&dev->vbl_lock, irqflags);
-			dev->vblank_disable_allowed = 1;
-			dev->vblank_inmodeset[crtc] = 0;
-			spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
-			drm_vblank_put(dev, crtc);
-		}
+		drm_vblank_post_modeset(dev, crtc);
 		break;
 	default:
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 217ad7dc7076..367c590ffbba 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -296,3 +296,4 @@ void drm_mm_takedown(struct drm_mm * mm)
 
 	drm_free(entry, sizeof(*entry), DRM_MEM_MM);
 }
+EXPORT_SYMBOL(drm_mm_takedown);
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
new file mode 100644
index 000000000000..c9b80fdd4630
--- /dev/null
+++ b/drivers/gpu/drm/drm_modes.c
@@ -0,0 +1,576 @@
+/*
+ * The list_sort function is (presumably) licensed under the GPL (see the
+ * top level "COPYING" file for details).
+ *
+ * The remainder of this file is:
+ *
+ * Copyright © 1997-2003 by The XFree86 Project, Inc.
+ * Copyright © 2007 Dave Airlie
+ * Copyright © 2007-2008 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name of the copyright holder(s)
+ * and author(s) shall not be used in advertising or otherwise to promote
+ * the sale, use or other dealings in this Software without prior written
+ * authorization from the copyright holder(s) and author(s).
+ */
+
+#include <linux/list.h>
+#include "drmP.h"
+#include "drm.h"
+#include "drm_crtc.h"
+
+/**
+ * drm_mode_debug_printmodeline - debug print a mode
+ * @dev: DRM device
+ * @mode: mode to print
+ *
+ * LOCKING:
+ * None.
+ *
+ * Describe @mode using DRM_DEBUG.
+ */
+void drm_mode_debug_printmodeline(struct drm_display_mode *mode)
+{
+	DRM_DEBUG("Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d 0x%x 0x%x\n",
+		  mode->base.id, mode->name, mode->vrefresh, mode->clock,
+		  mode->hdisplay, mode->hsync_start,
+		  mode->hsync_end, mode->htotal,
+		  mode->vdisplay, mode->vsync_start,
+		  mode->vsync_end, mode->vtotal, mode->type, mode->flags);
+}
+EXPORT_SYMBOL(drm_mode_debug_printmodeline);
+
+/**
+ * drm_mode_set_name - set the name on a mode
+ * @mode: name will be set in this mode
+ *
+ * LOCKING:
+ * None.
+ *
+ * Set the name of @mode to a standard format.
+ */
+void drm_mode_set_name(struct drm_display_mode *mode)
+{
+	snprintf(mode->name, DRM_DISPLAY_MODE_LEN, "%dx%d", mode->hdisplay,
+		 mode->vdisplay);
+}
+EXPORT_SYMBOL(drm_mode_set_name);
+
+/**
+ * drm_mode_list_concat - move modes from one list to another
+ * @head: source list
+ * @new: dst list
+ *
+ * LOCKING:
+ * Caller must ensure both lists are locked.
+ *
+ * Move all the modes from @head to @new.
+ */
+void drm_mode_list_concat(struct list_head *head, struct list_head *new)
+{
+
+	struct list_head *entry, *tmp;
+
+	list_for_each_safe(entry, tmp, head) {
+		list_move_tail(entry, new);
+	}
+}
+EXPORT_SYMBOL(drm_mode_list_concat);
+
+/**
+ * drm_mode_width - get the width of a mode
+ * @mode: mode
+ *
+ * LOCKING:
+ * None.
+ *
+ * Return @mode's width (hdisplay) value.
+ *
+ * FIXME: is this needed?
+ *
+ * RETURNS:
+ * @mode->hdisplay
+ */
+int drm_mode_width(struct drm_display_mode *mode)
+{
+	return mode->hdisplay;
+
+}
+EXPORT_SYMBOL(drm_mode_width);
+
+/**
+ * drm_mode_height - get the height of a mode
+ * @mode: mode
+ *
+ * LOCKING:
+ * None.
+ *
+ * Return @mode's height (vdisplay) value.
+ *
+ * FIXME: is this needed?
+ *
+ * RETURNS:
+ * @mode->vdisplay
+ */
+int drm_mode_height(struct drm_display_mode *mode)
+{
+	return mode->vdisplay;
+}
+EXPORT_SYMBOL(drm_mode_height);
+
+/**
+ * drm_mode_vrefresh - get the vrefresh of a mode
+ * @mode: mode
+ *
+ * LOCKING:
+ * None.
+ *
+ * Return @mode's vrefresh rate or calculate it if necessary.
+ *
+ * FIXME: why is this needed?  shouldn't vrefresh be set already?
+ *
+ * RETURNS:
+ * Vertical refresh rate of @mode x 1000. For precision reasons.
+ */
+int drm_mode_vrefresh(struct drm_display_mode *mode)
+{
+	int refresh = 0;
+	unsigned int calc_val;
+
+	if (mode->vrefresh > 0)
+		refresh = mode->vrefresh;
+	else if (mode->htotal > 0 && mode->vtotal > 0) {
+		/* work out vrefresh the value will be x1000 */
+		calc_val = (mode->clock * 1000);
+
+		calc_val /= mode->htotal;
+		calc_val *= 1000;
+		calc_val /= mode->vtotal;
+
+		refresh = calc_val;
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+			refresh *= 2;
+		if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+			refresh /= 2;
+		if (mode->vscan > 1)
+			refresh /= mode->vscan;
+	}
+	return refresh;
+}
+EXPORT_SYMBOL(drm_mode_vrefresh);
+
+/**
+ * drm_mode_set_crtcinfo - set CRTC modesetting parameters
+ * @p: mode
+ * @adjust_flags: unused? (FIXME)
+ *
+ * LOCKING:
+ * None.
+ *
+ * Setup the CRTC modesetting parameters for @p, adjusting if necessary.
+ */
+void drm_mode_set_crtcinfo(struct drm_display_mode *p, int adjust_flags)
+{
+	if ((p == NULL) || ((p->type & DRM_MODE_TYPE_CRTC_C) == DRM_MODE_TYPE_BUILTIN))
+		return;
+
+	p->crtc_hdisplay = p->hdisplay;
+	p->crtc_hsync_start = p->hsync_start;
+	p->crtc_hsync_end = p->hsync_end;
+	p->crtc_htotal = p->htotal;
+	p->crtc_hskew = p->hskew;
+	p->crtc_vdisplay = p->vdisplay;
+	p->crtc_vsync_start = p->vsync_start;
+	p->crtc_vsync_end = p->vsync_end;
+	p->crtc_vtotal = p->vtotal;
+
+	if (p->flags & DRM_MODE_FLAG_INTERLACE) {
+		if (adjust_flags & CRTC_INTERLACE_HALVE_V) {
+			p->crtc_vdisplay /= 2;
+			p->crtc_vsync_start /= 2;
+			p->crtc_vsync_end /= 2;
+			p->crtc_vtotal /= 2;
+		}
+
+		p->crtc_vtotal |= 1;
+	}
+
+	if (p->flags & DRM_MODE_FLAG_DBLSCAN) {
+		p->crtc_vdisplay *= 2;
+		p->crtc_vsync_start *= 2;
+		p->crtc_vsync_end *= 2;
+		p->crtc_vtotal *= 2;
+	}
+
+	if (p->vscan > 1) {
+		p->crtc_vdisplay *= p->vscan;
+		p->crtc_vsync_start *= p->vscan;
+		p->crtc_vsync_end *= p->vscan;
+		p->crtc_vtotal *= p->vscan;
+	}
+
+	p->crtc_vblank_start = min(p->crtc_vsync_start, p->crtc_vdisplay);
+	p->crtc_vblank_end = max(p->crtc_vsync_end, p->crtc_vtotal);
+	p->crtc_hblank_start = min(p->crtc_hsync_start, p->crtc_hdisplay);
+	p->crtc_hblank_end = max(p->crtc_hsync_end, p->crtc_htotal);
+
+	p->crtc_hadjusted = false;
+	p->crtc_vadjusted = false;
+}
+EXPORT_SYMBOL(drm_mode_set_crtcinfo);
+
+
+/**
+ * drm_mode_duplicate - allocate and duplicate an existing mode
+ * @m: mode to duplicate
+ *
+ * LOCKING:
+ * None.
+ *
+ * Just allocate a new mode, copy the existing mode into it, and return
+ * a pointer to it.  Used to create new instances of established modes.
+ */
+struct drm_display_mode *drm_mode_duplicate(struct drm_device *dev,
+					    struct drm_display_mode *mode)
+{
+	struct drm_display_mode *nmode;
+	int new_id;
+
+	nmode = drm_mode_create(dev);
+	if (!nmode)
+		return NULL;
+
+	new_id = nmode->base.id;
+	*nmode = *mode;
+	nmode->base.id = new_id;
+	INIT_LIST_HEAD(&nmode->head);
+	return nmode;
+}
+EXPORT_SYMBOL(drm_mode_duplicate);
+
+/**
+ * drm_mode_equal - test modes for equality
+ * @mode1: first mode
+ * @mode2: second mode
+ *
+ * LOCKING:
+ * None.
+ *
+ * Check to see if @mode1 and @mode2 are equivalent.
+ *
+ * RETURNS:
+ * True if the modes are equal, false otherwise.
+ */
+bool drm_mode_equal(struct drm_display_mode *mode1, struct drm_display_mode *mode2)
+{
+	/* do clock check convert to PICOS so fb modes get matched
+	 * the same */
+	if (mode1->clock && mode2->clock) {
+		if (KHZ2PICOS(mode1->clock) != KHZ2PICOS(mode2->clock))
+			return false;
+	} else if (mode1->clock != mode2->clock)
+		return false;
+
+	if (mode1->hdisplay == mode2->hdisplay &&
+	    mode1->hsync_start == mode2->hsync_start &&
+	    mode1->hsync_end == mode2->hsync_end &&
+	    mode1->htotal == mode2->htotal &&
+	    mode1->hskew == mode2->hskew &&
+	    mode1->vdisplay == mode2->vdisplay &&
+	    mode1->vsync_start == mode2->vsync_start &&
+	    mode1->vsync_end == mode2->vsync_end &&
+	    mode1->vtotal == mode2->vtotal &&
+	    mode1->vscan == mode2->vscan &&
+	    mode1->flags == mode2->flags)
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(drm_mode_equal);
+
+/**
+ * drm_mode_validate_size - make sure modes adhere to size constraints
+ * @dev: DRM device
+ * @mode_list: list of modes to check
+ * @maxX: maximum width
+ * @maxY: maximum height
+ * @maxPitch: max pitch
+ *
+ * LOCKING:
+ * Caller must hold a lock protecting @mode_list.
+ *
+ * The DRM device (@dev) has size and pitch limits.  Here we validate the
+ * modes we probed for @dev against those limits and set their status as
+ * necessary.
+ */
+void drm_mode_validate_size(struct drm_device *dev,
+			    struct list_head *mode_list,
+			    int maxX, int maxY, int maxPitch)
+{
+	struct drm_display_mode *mode;
+
+	list_for_each_entry(mode, mode_list, head) {
+		if (maxPitch > 0 && mode->hdisplay > maxPitch)
+			mode->status = MODE_BAD_WIDTH;
+
+		if (maxX > 0 && mode->hdisplay > maxX)
+			mode->status = MODE_VIRTUAL_X;
+
+		if (maxY > 0 && mode->vdisplay > maxY)
+			mode->status = MODE_VIRTUAL_Y;
+	}
+}
+EXPORT_SYMBOL(drm_mode_validate_size);
+
+/**
+ * drm_mode_validate_clocks - validate modes against clock limits
+ * @dev: DRM device
+ * @mode_list: list of modes to check
+ * @min: minimum clock rate array
+ * @max: maximum clock rate array
+ * @n_ranges: number of clock ranges (size of arrays)
+ *
+ * LOCKING:
+ * Caller must hold a lock protecting @mode_list.
+ *
+ * Some code may need to check a mode list against the clock limits of the
+ * device in question.  This function walks the mode list, testing to make
+ * sure each mode falls within a given range (defined by @min and @max
+ * arrays) and sets @mode->status as needed.
+ */
+void drm_mode_validate_clocks(struct drm_device *dev,
+			      struct list_head *mode_list,
+			      int *min, int *max, int n_ranges)
+{
+	struct drm_display_mode *mode;
+	int i;
+
+	list_for_each_entry(mode, mode_list, head) {
+		bool good = false;
+		for (i = 0; i < n_ranges; i++) {
+			if (mode->clock >= min[i] && mode->clock <= max[i]) {
+				good = true;
+				break;
+			}
+		}
+		if (!good)
+			mode->status = MODE_CLOCK_RANGE;
+	}
+}
+EXPORT_SYMBOL(drm_mode_validate_clocks);
+
+/**
+ * drm_mode_prune_invalid - remove invalid modes from mode list
+ * @dev: DRM device
+ * @mode_list: list of modes to check
+ * @verbose: be verbose about it
+ *
+ * LOCKING:
+ * Caller must hold a lock protecting @mode_list.
+ *
+ * Once mode list generation is complete, a caller can use this routine to
+ * remove invalid modes from a mode list.  If any of the modes have a
+ * status other than %MODE_OK, they are removed from @mode_list and freed.
+ */
+void drm_mode_prune_invalid(struct drm_device *dev,
+			    struct list_head *mode_list, bool verbose)
+{
+	struct drm_display_mode *mode, *t;
+
+	list_for_each_entry_safe(mode, t, mode_list, head) {
+		if (mode->status != MODE_OK) {
+			list_del(&mode->head);
+			if (verbose) {
+				drm_mode_debug_printmodeline(mode);
+				DRM_DEBUG("Not using %s mode %d\n", mode->name, mode->status);
+			}
+			drm_mode_destroy(dev, mode);
+		}
+	}
+}
+EXPORT_SYMBOL(drm_mode_prune_invalid);
+
+/**
+ * drm_mode_compare - compare modes for favorability
+ * @lh_a: list_head for first mode
+ * @lh_b: list_head for second mode
+ *
+ * LOCKING:
+ * None.
+ *
+ * Compare two modes, given by @lh_a and @lh_b, returning a value indicating
+ * which is better.
+ *
+ * RETURNS:
+ * Negative if @lh_a is better than @lh_b, zero if they're equivalent, or
+ * positive if @lh_b is better than @lh_a.
+ */
+static int drm_mode_compare(struct list_head *lh_a, struct list_head *lh_b)
+{
+	struct drm_display_mode *a = list_entry(lh_a, struct drm_display_mode, head);
+	struct drm_display_mode *b = list_entry(lh_b, struct drm_display_mode, head);
+	int diff;
+
+	diff = ((b->type & DRM_MODE_TYPE_PREFERRED) != 0) -
+		((a->type & DRM_MODE_TYPE_PREFERRED) != 0);
+	if (diff)
+		return diff;
+	diff = b->hdisplay * b->vdisplay - a->hdisplay * a->vdisplay;
+	if (diff)
+		return diff;
+	diff = b->clock - a->clock;
+	return diff;
+}
+
+/* FIXME: what we don't have a list sort function? */
+/* list sort from Mark J Roberts (mjr@znex.org) */
+void list_sort(struct list_head *head,
+	       int (*cmp)(struct list_head *a, struct list_head *b))
+{
+	struct list_head *p, *q, *e, *list, *tail, *oldhead;
+	int insize, nmerges, psize, qsize, i;
+
+	list = head->next;
+	list_del(head);
+	insize = 1;
+	for (;;) {
+		p = oldhead = list;
+		list = tail = NULL;
+		nmerges = 0;
+
+		while (p) {
+			nmerges++;
+			q = p;
+			psize = 0;
+			for (i = 0; i < insize; i++) {
+				psize++;
+				q = q->next == oldhead ? NULL : q->next;
+				if (!q)
+					break;
+			}
+
+			qsize = insize;
+			while (psize > 0 || (qsize > 0 && q)) {
+				if (!psize) {
+					e = q;
+					q = q->next;
+					qsize--;
+					if (q == oldhead)
+						q = NULL;
+				} else if (!qsize || !q) {
+					e = p;
+					p = p->next;
+					psize--;
+					if (p == oldhead)
+						p = NULL;
+				} else if (cmp(p, q) <= 0) {
+					e = p;
+					p = p->next;
+					psize--;
+					if (p == oldhead)
+						p = NULL;
+				} else {
+					e = q;
+					q = q->next;
+					qsize--;
+					if (q == oldhead)
+						q = NULL;
+				}
+				if (tail)
+					tail->next = e;
+				else
+					list = e;
+				e->prev = tail;
+				tail = e;
+			}
+			p = q;
+		}
+
+		tail->next = list;
+		list->prev = tail;
+
+		if (nmerges <= 1)
+			break;
+
+		insize *= 2;
+	}
+
+	head->next = list;
+	head->prev = list->prev;
+	list->prev->next = head;
+	list->prev = head;
+}
+
+/**
+ * drm_mode_sort - sort mode list
+ * @mode_list: list to sort
+ *
+ * LOCKING:
+ * Caller must hold a lock protecting @mode_list.
+ *
+ * Sort @mode_list by favorability, putting good modes first.
+ */
+void drm_mode_sort(struct list_head *mode_list)
+{
+	list_sort(mode_list, drm_mode_compare);
+}
+EXPORT_SYMBOL(drm_mode_sort);
+
+/**
+ * drm_mode_connector_list_update - update the mode list for the connector
+ * @connector: the connector to update
+ *
+ * LOCKING:
+ * Caller must hold a lock protecting @mode_list.
+ *
+ * This moves the modes from the @connector probed_modes list
+ * to the actual mode list. It compares the probed mode against the current
+ * list and only adds different modes. All modes unverified after this point
+ * will be removed by the prune invalid modes.
+ */
+void drm_mode_connector_list_update(struct drm_connector *connector)
+{
+	struct drm_display_mode *mode;
+	struct drm_display_mode *pmode, *pt;
+	int found_it;
+
+	list_for_each_entry_safe(pmode, pt, &connector->probed_modes,
+				 head) {
+		found_it = 0;
+		/* go through current modes checking for the new probed mode */
+		list_for_each_entry(mode, &connector->modes, head) {
+			if (drm_mode_equal(pmode, mode)) {
+				found_it = 1;
+				/* if equal delete the probed mode */
+				mode->status = pmode->status;
+				list_del(&pmode->head);
+				drm_mode_destroy(connector->dev, pmode);
+				break;
+			}
+		}
+
+		if (!found_it) {
+			list_move_tail(&pmode->head, &connector->modes);
+		}
+	}
+}
+EXPORT_SYMBOL(drm_mode_connector_list_update);
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index ea7f9e5d47fa..5ca132afa4f2 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -57,6 +57,14 @@ static int drm_minor_get_id(struct drm_device *dev, int type)
 	int ret;
 	int base = 0, limit = 63;
 
+	if (type == DRM_MINOR_CONTROL) {
+                base += 64;
+                limit = base + 127;
+        } else if (type == DRM_MINOR_RENDER) {
+                base += 128;
+                limit = base + 255;
+        }
+
 again:
 	if (idr_pre_get(&drm_minors_idr, GFP_KERNEL) == 0) {
 		DRM_ERROR("Out of memory expanding drawable idr\n");
@@ -362,12 +370,28 @@ int drm_get_dev(struct pci_dev *pdev, const struct pci_device_id *ent,
 		printk(KERN_ERR "DRM: Fill_in_dev failed.\n");
 		goto err_g2;
 	}
+
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		ret = drm_get_minor(dev, &dev->control, DRM_MINOR_CONTROL);
+		if (ret)
+			goto err_g2;
+	}
+
 	if ((ret = drm_get_minor(dev, &dev->primary, DRM_MINOR_LEGACY)))
-		goto err_g2;
+		goto err_g3;
 
-	if (dev->driver->load)
-		if ((ret = dev->driver->load(dev, ent->driver_data)))
+	if (dev->driver->load) {
+		ret = dev->driver->load(dev, ent->driver_data);
+		if (ret)
 			goto err_g3;
+	}
+
+        /* setup the grouping for the legacy output */
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		ret = drm_mode_group_init_legacy_group(dev, &dev->primary->mode_group);
+		if (ret)
+			goto err_g3;
+	}
 
 	list_add_tail(&dev->driver_item, &driver->device_list);
 
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 1611b9bcbe7f..65d72d094c81 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -20,6 +20,7 @@
 #include "drmP.h"
 
 #define to_drm_minor(d) container_of(d, struct drm_minor, kdev)
+#define to_drm_connector(d) container_of(d, struct drm_connector, kdev)
 
 /**
  * drm_sysfs_suspend - DRM class suspend hook
@@ -34,7 +35,7 @@ static int drm_sysfs_suspend(struct device *dev, pm_message_t state)
 	struct drm_minor *drm_minor = to_drm_minor(dev);
 	struct drm_device *drm_dev = drm_minor->dev;
 
-	if (drm_dev->driver->suspend)
+	if (drm_minor->type == DRM_MINOR_LEGACY && drm_dev->driver->suspend)
 		return drm_dev->driver->suspend(drm_dev, state);
 
 	return 0;
@@ -52,7 +53,7 @@ static int drm_sysfs_resume(struct device *dev)
 	struct drm_minor *drm_minor = to_drm_minor(dev);
 	struct drm_device *drm_dev = drm_minor->dev;
 
-	if (drm_dev->driver->resume)
+	if (drm_minor->type == DRM_MINOR_LEGACY && drm_dev->driver->resume)
 		return drm_dev->driver->resume(drm_dev);
 
 	return 0;
@@ -144,6 +145,323 @@ static void drm_sysfs_device_release(struct device *dev)
 	return;
 }
 
+/*
+ * Connector properties
+ */
+static ssize_t status_show(struct device *device,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct drm_connector *connector = to_drm_connector(device);
+	enum drm_connector_status status;
+
+	status = connector->funcs->detect(connector);
+	return snprintf(buf, PAGE_SIZE, "%s",
+			drm_get_connector_status_name(status));
+}
+
+static ssize_t dpms_show(struct device *device,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct drm_connector *connector = to_drm_connector(device);
+	struct drm_device *dev = connector->dev;
+	uint64_t dpms_status;
+	int ret;
+
+	ret = drm_connector_property_get_value(connector,
+					    dev->mode_config.dpms_property,
+					    &dpms_status);
+	if (ret)
+		return 0;
+
+	return snprintf(buf, PAGE_SIZE, "%s",
+			drm_get_dpms_name((int)dpms_status));
+}
+
+static ssize_t enabled_show(struct device *device,
+			    struct device_attribute *attr,
+			   char *buf)
+{
+	struct drm_connector *connector = to_drm_connector(device);
+
+	return snprintf(buf, PAGE_SIZE, connector->encoder ? "enabled" :
+			"disabled");
+}
+
+static ssize_t edid_show(struct kobject *kobj, struct bin_attribute *attr,
+			 char *buf, loff_t off, size_t count)
+{
+	struct device *connector_dev = container_of(kobj, struct device, kobj);
+	struct drm_connector *connector = to_drm_connector(connector_dev);
+	unsigned char *edid;
+	size_t size;
+
+	if (!connector->edid_blob_ptr)
+		return 0;
+
+	edid = connector->edid_blob_ptr->data;
+	size = connector->edid_blob_ptr->length;
+	if (!edid)
+		return 0;
+
+	if (off >= size)
+		return 0;
+
+	if (off + count > size)
+		count = size - off;
+	memcpy(buf, edid + off, count);
+
+	return count;
+}
+
+static ssize_t modes_show(struct device *device,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct drm_connector *connector = to_drm_connector(device);
+	struct drm_display_mode *mode;
+	int written = 0;
+
+	list_for_each_entry(mode, &connector->modes, head) {
+		written += snprintf(buf + written, PAGE_SIZE - written, "%s\n",
+				    mode->name);
+	}
+
+	return written;
+}
+
+static ssize_t subconnector_show(struct device *device,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct drm_connector *connector = to_drm_connector(device);
+	struct drm_device *dev = connector->dev;
+	struct drm_property *prop = NULL;
+	uint64_t subconnector;
+	int is_tv = 0;
+	int ret;
+
+	switch (connector->connector_type) {
+		case DRM_MODE_CONNECTOR_DVII:
+			prop = dev->mode_config.dvi_i_subconnector_property;
+			break;
+		case DRM_MODE_CONNECTOR_Composite:
+		case DRM_MODE_CONNECTOR_SVIDEO:
+		case DRM_MODE_CONNECTOR_Component:
+			prop = dev->mode_config.tv_subconnector_property;
+			is_tv = 1;
+			break;
+		default:
+			DRM_ERROR("Wrong connector type for this property\n");
+			return 0;
+	}
+
+	if (!prop) {
+		DRM_ERROR("Unable to find subconnector property\n");
+		return 0;
+	}
+
+	ret = drm_connector_property_get_value(connector, prop, &subconnector);
+	if (ret)
+		return 0;
+
+	return snprintf(buf, PAGE_SIZE, "%s", is_tv ?
+			drm_get_tv_subconnector_name((int)subconnector) :
+			drm_get_dvi_i_subconnector_name((int)subconnector));
+}
+
+static ssize_t select_subconnector_show(struct device *device,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct drm_connector *connector = to_drm_connector(device);
+	struct drm_device *dev = connector->dev;
+	struct drm_property *prop = NULL;
+	uint64_t subconnector;
+	int is_tv = 0;
+	int ret;
+
+	switch (connector->connector_type) {
+		case DRM_MODE_CONNECTOR_DVII:
+			prop = dev->mode_config.dvi_i_select_subconnector_property;
+			break;
+		case DRM_MODE_CONNECTOR_Composite:
+		case DRM_MODE_CONNECTOR_SVIDEO:
+		case DRM_MODE_CONNECTOR_Component:
+			prop = dev->mode_config.tv_select_subconnector_property;
+			is_tv = 1;
+			break;
+		default:
+			DRM_ERROR("Wrong connector type for this property\n");
+			return 0;
+	}
+
+	if (!prop) {
+		DRM_ERROR("Unable to find select subconnector property\n");
+		return 0;
+	}
+
+	ret = drm_connector_property_get_value(connector, prop, &subconnector);
+	if (ret)
+		return 0;
+
+	return snprintf(buf, PAGE_SIZE, "%s", is_tv ?
+			drm_get_tv_select_name((int)subconnector) :
+			drm_get_dvi_i_select_name((int)subconnector));
+}
+
+static struct device_attribute connector_attrs[] = {
+	__ATTR_RO(status),
+	__ATTR_RO(enabled),
+	__ATTR_RO(dpms),
+	__ATTR_RO(modes),
+};
+
+/* These attributes are for both DVI-I connectors and all types of tv-out. */
+static struct device_attribute connector_attrs_opt1[] = {
+	__ATTR_RO(subconnector),
+	__ATTR_RO(select_subconnector),
+};
+
+static struct bin_attribute edid_attr = {
+	.attr.name = "edid",
+	.size = 128,
+	.read = edid_show,
+};
+
+/**
+ * drm_sysfs_connector_add - add an connector to sysfs
+ * @connector: connector to add
+ *
+ * Create an connector device in sysfs, along with its associated connector
+ * properties (so far, connection status, dpms, mode list & edid) and
+ * generate a hotplug event so userspace knows there's a new connector
+ * available.
+ *
+ * Note:
+ * This routine should only be called *once* for each DRM minor registered.
+ * A second call for an already registered device will trigger the BUG_ON
+ * below.
+ */
+int drm_sysfs_connector_add(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	int ret = 0, i, j;
+
+	/* We shouldn't get called more than once for the same connector */
+	BUG_ON(device_is_registered(&connector->kdev));
+
+	connector->kdev.parent = &dev->primary->kdev;
+	connector->kdev.class = drm_class;
+	connector->kdev.release = drm_sysfs_device_release;
+
+	DRM_DEBUG("adding \"%s\" to sysfs\n",
+		  drm_get_connector_name(connector));
+
+	snprintf(connector->kdev.bus_id, BUS_ID_SIZE, "card%d-%s",
+		 dev->primary->index, drm_get_connector_name(connector));
+	ret = device_register(&connector->kdev);
+
+	if (ret) {
+		DRM_ERROR("failed to register connector device: %d\n", ret);
+		goto out;
+	}
+
+	/* Standard attributes */
+
+	for (i = 0; i < ARRAY_SIZE(connector_attrs); i++) {
+		ret = device_create_file(&connector->kdev, &connector_attrs[i]);
+		if (ret)
+			goto err_out_files;
+	}
+
+	/* Optional attributes */
+	/*
+	 * In the long run it maybe a good idea to make one set of
+	 * optionals per connector type.
+	 */
+	switch (connector->connector_type) {
+		case DRM_MODE_CONNECTOR_DVII:
+		case DRM_MODE_CONNECTOR_Composite:
+		case DRM_MODE_CONNECTOR_SVIDEO:
+		case DRM_MODE_CONNECTOR_Component:
+			for (i = 0; i < ARRAY_SIZE(connector_attrs_opt1); i++) {
+				ret = device_create_file(&connector->kdev, &connector_attrs_opt1[i]);
+				if (ret)
+					goto err_out_files;
+			}
+			break;
+		default:
+			break;
+	}
+
+	ret = sysfs_create_bin_file(&connector->kdev.kobj, &edid_attr);
+	if (ret)
+		goto err_out_files;
+
+	/* Let userspace know we have a new connector */
+	drm_sysfs_hotplug_event(dev);
+
+	return 0;
+
+err_out_files:
+	if (i > 0)
+		for (j = 0; j < i; j++)
+			device_remove_file(&connector->kdev,
+					   &connector_attrs[i]);
+	device_unregister(&connector->kdev);
+
+out:
+	return ret;
+}
+EXPORT_SYMBOL(drm_sysfs_connector_add);
+
+/**
+ * drm_sysfs_connector_remove - remove an connector device from sysfs
+ * @connector: connector to remove
+ *
+ * Remove @connector and its associated attributes from sysfs.  Note that
+ * the device model core will take care of sending the "remove" uevent
+ * at this time, so we don't need to do it.
+ *
+ * Note:
+ * This routine should only be called if the connector was previously
+ * successfully registered.  If @connector hasn't been registered yet,
+ * you'll likely see a panic somewhere deep in sysfs code when called.
+ */
+void drm_sysfs_connector_remove(struct drm_connector *connector)
+{
+	int i;
+
+	DRM_DEBUG("removing \"%s\" from sysfs\n",
+		  drm_get_connector_name(connector));
+
+	for (i = 0; i < ARRAY_SIZE(connector_attrs); i++)
+		device_remove_file(&connector->kdev, &connector_attrs[i]);
+	sysfs_remove_bin_file(&connector->kdev.kobj, &edid_attr);
+	device_unregister(&connector->kdev);
+}
+EXPORT_SYMBOL(drm_sysfs_connector_remove);
+
+/**
+ * drm_sysfs_hotplug_event - generate a DRM uevent
+ * @dev: DRM device
+ *
+ * Send a uevent for the DRM device specified by @dev.  Currently we only
+ * set HOTPLUG=1 in the uevent environment, but this could be expanded to
+ * deal with other types of events.
+ */
+void drm_sysfs_hotplug_event(struct drm_device *dev)
+{
+	char *event_string = "HOTPLUG=1";
+	char *envp[] = { event_string, NULL };
+
+	DRM_DEBUG("generating hotplug event\n");
+
+	kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, envp);
+}
+
 /**
  * drm_sysfs_device_add - adds a class device to sysfs for a character driver
  * @dev: DRM device to be added
@@ -163,7 +481,12 @@ int drm_sysfs_device_add(struct drm_minor *minor)
 	minor->kdev.class = drm_class;
 	minor->kdev.release = drm_sysfs_device_release;
 	minor->kdev.devt = minor->device;
-	minor_str = "card%d";
+	if (minor->type == DRM_MINOR_CONTROL)
+		minor_str = "controlD%d";
+        else if (minor->type == DRM_MINOR_RENDER)
+                minor_str = "renderD%d";
+        else
+                minor_str = "card%d";
 
 	snprintf(minor->kdev.bus_id, BUS_ID_SIZE, minor_str, minor->index);
 
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 448d209a0bf2..e6210725b9ab 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -112,6 +112,23 @@ static int 		vga_video_font_height;
 static int 		vga_scan_lines		__read_mostly;
 static unsigned int 	vga_rolled_over;
 
+int vgacon_text_mode_force = 0;
+
+bool vgacon_text_force(void)
+{
+	return vgacon_text_mode_force ? true : false;
+}
+EXPORT_SYMBOL(vgacon_text_force);
+
+static int __init text_mode(char *str)
+{
+	vgacon_text_mode_force = 1;
+	return 1;
+}
+
+/* force text mode - used by kernel modesetting */
+__setup("nomodeset", text_mode);
+
 static int __init no_scroll(char *str)
 {
 	/*
diff --git a/include/drm/Kbuild b/include/drm/Kbuild
index 82b6983b7fbb..b940fdfa3b25 100644
--- a/include/drm/Kbuild
+++ b/include/drm/Kbuild
@@ -1,4 +1,4 @@
-unifdef-y += drm.h drm_sarea.h
+unifdef-y += drm.h drm_sarea.h drm_mode.h
 unifdef-y += i810_drm.h
 unifdef-y += i830_drm.h
 unifdef-y += i915_drm.h
diff --git a/include/drm/drm.h b/include/drm/drm.h
index 3a66252456ba..76ce6fe300b7 100644
--- a/include/drm/drm.h
+++ b/include/drm/drm.h
@@ -599,6 +599,8 @@ struct drm_gem_open {
 	uint64_t size;
 };
 
+#include "drm_mode.h"
+
 #define DRM_IOCTL_BASE			'd'
 #define DRM_IO(nr)			_IO(DRM_IOCTL_BASE,nr)
 #define DRM_IOR(nr,type)		_IOR(DRM_IOCTL_BASE,nr,type)
@@ -668,6 +670,25 @@ struct drm_gem_open {
 
 #define DRM_IOCTL_UPDATE_DRAW		DRM_IOW(0x3f, struct drm_update_draw)
 
+#define DRM_IOCTL_MODE_GETRESOURCES	DRM_IOWR(0xA0, struct drm_mode_card_res)
+#define DRM_IOCTL_MODE_GETCRTC		DRM_IOWR(0xA1, struct drm_mode_crtc)
+#define DRM_IOCTL_MODE_SETCRTC		DRM_IOWR(0xA2, struct drm_mode_crtc)
+#define DRM_IOCTL_MODE_CURSOR		DRM_IOWR(0xA3, struct drm_mode_cursor)
+#define DRM_IOCTL_MODE_GETGAMMA		DRM_IOWR(0xA4, struct drm_mode_crtc_lut)
+#define DRM_IOCTL_MODE_SETGAMMA		DRM_IOWR(0xA5, struct drm_mode_crtc_lut)
+#define DRM_IOCTL_MODE_GETENCODER	DRM_IOWR(0xA6, struct drm_mode_get_encoder)
+#define DRM_IOCTL_MODE_GETCONNECTOR	DRM_IOWR(0xA7, struct drm_mode_get_connector)
+#define DRM_IOCTL_MODE_ATTACHMODE	DRM_IOWR(0xA8, struct drm_mode_mode_cmd)
+#define DRM_IOCTL_MODE_DETACHMODE	DRM_IOWR(0xA9, struct drm_mode_mode_cmd)
+
+#define DRM_IOCTL_MODE_GETPROPERTY	DRM_IOWR(0xAA, struct drm_mode_get_property)
+#define DRM_IOCTL_MODE_SETPROPERTY	DRM_IOWR(0xAB, struct drm_mode_connector_set_property)
+#define DRM_IOCTL_MODE_GETPROPBLOB	DRM_IOWR(0xAC, struct drm_mode_get_blob)
+#define DRM_IOCTL_MODE_GETFB		DRM_IOWR(0xAD, struct drm_mode_fb_cmd)
+#define DRM_IOCTL_MODE_ADDFB		DRM_IOWR(0xAE, struct drm_mode_fb_cmd)
+#define DRM_IOCTL_MODE_RMFB		DRM_IOWR(0xAF, unsigned int)
+#define DRM_IOCTL_MODE_REPLACEFB	DRM_IOWR(0xB0, struct drm_mode_fb_cmd)
+
 /**
  * Device specific ioctls should only be in their respective headers
  * The device specific ioctl range is from 0x40 to 0x99.
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index ae42a6a5c24e..7802c80f2b23 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -105,6 +105,7 @@ struct drm_device;
 #define DRIVER_FB_DMA      0x400
 #define DRIVER_IRQ_VBL2    0x800
 #define DRIVER_GEM         0x1000
+#define DRIVER_MODESET     0x2000
 
 /***********************************************************************/
 /** \name Begin the DRM... */
@@ -276,6 +277,7 @@ typedef int drm_ioctl_compat_t(struct file *filp, unsigned int cmd,
 #define DRM_AUTH	0x1
 #define	DRM_MASTER	0x2
 #define DRM_ROOT_ONLY	0x4
+#define DRM_CONTROL_ALLOW 0x8
 
 struct drm_ioctl_desc {
 	unsigned int cmd;
@@ -398,6 +400,7 @@ struct drm_file {
 	int is_master; /* this file private is a master for a minor */
 	struct drm_master *master; /* master this node is currently associated with
 				      N.B. not always minor->master */
+	struct list_head fbs;
 };
 
 /** Wait queue */
@@ -629,6 +632,8 @@ struct drm_gem_object {
 	void *driver_private;
 };
 
+#include "drm_crtc.h"
+
 /* per-master structure */
 struct drm_master {
 
@@ -792,6 +797,8 @@ struct drm_driver {
 
 #define DRM_MINOR_UNASSIGNED 0
 #define DRM_MINOR_LEGACY 1
+#define DRM_MINOR_CONTROL 2
+#define DRM_MINOR_RENDER 3
 
 /**
  * DRM minor structure. This structure represents a drm minor number.
@@ -805,6 +812,7 @@ struct drm_minor {
 	struct proc_dir_entry *dev_root;  /**< proc directory entry */
 	struct drm_master *master; /* currently active master for this node */
 	struct list_head master_list;
+	struct drm_mode_group mode_group;
 };
 
 /**
@@ -855,6 +863,7 @@ struct drm_device {
 	struct idr ctx_idr;
 
 	struct list_head vmalist;	/**< List of vmas (for debugging) */
+
 	/*@} */
 
 	/** \name DMA queues (contexts) */
@@ -933,6 +942,7 @@ struct drm_device {
 	struct drm_driver *driver;
 	drm_local_map_t *agp_buffer_map;
 	unsigned int agp_buffer_token;
+	struct drm_minor *control;		/**< Control node for card */
 	struct drm_minor *primary;		/**< render type primary screen head */
 
 	/** \name Drawable information */
@@ -941,6 +951,8 @@ struct drm_device {
 	struct idr drw_idr;
 	/*@} */
 
+        struct drm_mode_config mode_config;	/**< Current mode config */
+
 	/** \name GEM information */
 	/*@{ */
 	spinlock_t object_name_lock;
@@ -1201,6 +1213,8 @@ extern int drm_vblank_get(struct drm_device *dev, int crtc);
 extern void drm_vblank_put(struct drm_device *dev, int crtc);
 extern void drm_vblank_cleanup(struct drm_device *dev);
 /* Modesetting support */
+extern void drm_vblank_pre_modeset(struct drm_device *dev, int crtc);
+extern void drm_vblank_post_modeset(struct drm_device *dev, int crtc);
 extern int drm_modeset_ctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
 
@@ -1286,7 +1300,11 @@ struct drm_sysfs_class;
 extern struct class *drm_sysfs_create(struct module *owner, char *name);
 extern void drm_sysfs_destroy(void);
 extern int drm_sysfs_device_add(struct drm_minor *minor);
+extern void drm_sysfs_hotplug_event(struct drm_device *dev);
 extern void drm_sysfs_device_remove(struct drm_minor *minor);
+extern char *drm_get_connector_status_name(enum drm_connector_status status);
+extern int drm_sysfs_connector_add(struct drm_connector *connector);
+extern void drm_sysfs_connector_remove(struct drm_connector *connector);
 
 /*
  * Basic memory manager support (drm_mm.c)
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
new file mode 100644
index 000000000000..08a884bea446
--- /dev/null
+++ b/include/drm/drm_crtc.h
@@ -0,0 +1,737 @@
+/*
+ * Copyright © 2006 Keith Packard
+ * Copyright © 2007-2008 Dave Airlie
+ * Copyright © 2007-2008 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __DRM_CRTC_H__
+#define __DRM_CRTC_H__
+
+#include <linux/i2c.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/idr.h>
+
+#include <linux/fb.h>
+
+struct drm_device;
+struct drm_mode_set;
+struct drm_framebuffer;
+
+
+#define DRM_MODE_OBJECT_CRTC 0xcccccccc
+#define DRM_MODE_OBJECT_CONNECTOR 0xc0c0c0c0
+#define DRM_MODE_OBJECT_ENCODER 0xe0e0e0e0
+#define DRM_MODE_OBJECT_MODE 0xdededede
+#define DRM_MODE_OBJECT_PROPERTY 0xb0b0b0b0
+#define DRM_MODE_OBJECT_FB 0xfbfbfbfb
+#define DRM_MODE_OBJECT_BLOB 0xbbbbbbbb
+
+struct drm_mode_object {
+	uint32_t id;
+	uint32_t type;
+};
+
+/*
+ * Note on terminology:  here, for brevity and convenience, we refer to connector
+ * control chips as 'CRTCs'.  They can control any type of connector, VGA, LVDS,
+ * DVI, etc.  And 'screen' refers to the whole of the visible display, which
+ * may span multiple monitors (and therefore multiple CRTC and connector
+ * structures).
+ */
+
+enum drm_mode_status {
+    MODE_OK	= 0,	/* Mode OK */
+    MODE_HSYNC,		/* hsync out of range */
+    MODE_VSYNC,		/* vsync out of range */
+    MODE_H_ILLEGAL,	/* mode has illegal horizontal timings */
+    MODE_V_ILLEGAL,	/* mode has illegal horizontal timings */
+    MODE_BAD_WIDTH,	/* requires an unsupported linepitch */
+    MODE_NOMODE,	/* no mode with a maching name */
+    MODE_NO_INTERLACE,	/* interlaced mode not supported */
+    MODE_NO_DBLESCAN,	/* doublescan mode not supported */
+    MODE_NO_VSCAN,	/* multiscan mode not supported */
+    MODE_MEM,		/* insufficient video memory */
+    MODE_VIRTUAL_X,	/* mode width too large for specified virtual size */
+    MODE_VIRTUAL_Y,	/* mode height too large for specified virtual size */
+    MODE_MEM_VIRT,	/* insufficient video memory given virtual size */
+    MODE_NOCLOCK,	/* no fixed clock available */
+    MODE_CLOCK_HIGH,	/* clock required is too high */
+    MODE_CLOCK_LOW,	/* clock required is too low */
+    MODE_CLOCK_RANGE,	/* clock/mode isn't in a ClockRange */
+    MODE_BAD_HVALUE,	/* horizontal timing was out of range */
+    MODE_BAD_VVALUE,	/* vertical timing was out of range */
+    MODE_BAD_VSCAN,	/* VScan value out of range */
+    MODE_HSYNC_NARROW,	/* horizontal sync too narrow */
+    MODE_HSYNC_WIDE,	/* horizontal sync too wide */
+    MODE_HBLANK_NARROW,	/* horizontal blanking too narrow */
+    MODE_HBLANK_WIDE,	/* horizontal blanking too wide */
+    MODE_VSYNC_NARROW,	/* vertical sync too narrow */
+    MODE_VSYNC_WIDE,	/* vertical sync too wide */
+    MODE_VBLANK_NARROW,	/* vertical blanking too narrow */
+    MODE_VBLANK_WIDE,	/* vertical blanking too wide */
+    MODE_PANEL,         /* exceeds panel dimensions */
+    MODE_INTERLACE_WIDTH, /* width too large for interlaced mode */
+    MODE_ONE_WIDTH,     /* only one width is supported */
+    MODE_ONE_HEIGHT,    /* only one height is supported */
+    MODE_ONE_SIZE,      /* only one resolution is supported */
+    MODE_NO_REDUCED,    /* monitor doesn't accept reduced blanking */
+    MODE_UNVERIFIED = -3, /* mode needs to reverified */
+    MODE_BAD = -2,	/* unspecified reason */
+    MODE_ERROR	= -1	/* error condition */
+};
+
+#define DRM_MODE_TYPE_CLOCK_CRTC_C (DRM_MODE_TYPE_CLOCK_C | \
+				    DRM_MODE_TYPE_CRTC_C)
+
+#define DRM_MODE(nm, t, c, hd, hss, hse, ht, hsk, vd, vss, vse, vt, vs, f) \
+	.name = nm, .status = 0, .type = (t), .clock = (c), \
+	.hdisplay = (hd), .hsync_start = (hss), .hsync_end = (hse), \
+	.htotal = (ht), .hskew = (hsk), .vdisplay = (vd), \
+	.vsync_start = (vss), .vsync_end = (vse), .vtotal = (vt), \
+	.vscan = (vs), .flags = (f), .vrefresh = 0
+
+#define CRTC_INTERLACE_HALVE_V 0x1 /* halve V values for interlacing */
+
+struct drm_display_mode {
+	/* Header */
+	struct list_head head;
+	struct drm_mode_object base;
+
+	char name[DRM_DISPLAY_MODE_LEN];
+
+	int connector_count;
+	enum drm_mode_status status;
+	int type;
+
+	/* Proposed mode values */
+	int clock;
+	int hdisplay;
+	int hsync_start;
+	int hsync_end;
+	int htotal;
+	int hskew;
+	int vdisplay;
+	int vsync_start;
+	int vsync_end;
+	int vtotal;
+	int vscan;
+	unsigned int flags;
+
+	/* Addressable image size (may be 0 for projectors, etc.) */
+	int width_mm;
+	int height_mm;
+
+	/* Actual mode we give to hw */
+	int clock_index;
+	int synth_clock;
+	int crtc_hdisplay;
+	int crtc_hblank_start;
+	int crtc_hblank_end;
+	int crtc_hsync_start;
+	int crtc_hsync_end;
+	int crtc_htotal;
+	int crtc_hskew;
+	int crtc_vdisplay;
+	int crtc_vblank_start;
+	int crtc_vblank_end;
+	int crtc_vsync_start;
+	int crtc_vsync_end;
+	int crtc_vtotal;
+	int crtc_hadjusted;
+	int crtc_vadjusted;
+
+	/* Driver private mode info */
+	int private_size;
+	int *private;
+	int private_flags;
+
+	int vrefresh;
+	float hsync;
+};
+
+enum drm_connector_status {
+	connector_status_connected = 1,
+	connector_status_disconnected = 2,
+	connector_status_unknown = 3,
+};
+
+enum subpixel_order {
+	SubPixelUnknown = 0,
+	SubPixelHorizontalRGB,
+	SubPixelHorizontalBGR,
+	SubPixelVerticalRGB,
+	SubPixelVerticalBGR,
+	SubPixelNone,
+};
+
+
+/*
+ * Describes a given display (e.g. CRT or flat panel) and its limitations.
+ */
+struct drm_display_info {
+	char name[DRM_DISPLAY_INFO_LEN];
+	/* Input info */
+	bool serration_vsync;
+	bool sync_on_green;
+	bool composite_sync;
+	bool separate_syncs;
+	bool blank_to_black;
+	unsigned char video_level;
+	bool digital;
+	/* Physical size */
+        unsigned int width_mm;
+	unsigned int height_mm;
+
+	/* Display parameters */
+	unsigned char gamma; /* FIXME: storage format */
+	bool gtf_supported;
+	bool standard_color;
+	enum {
+		monochrome = 0,
+		rgb,
+		other,
+		unknown,
+	} display_type;
+	bool active_off_supported;
+	bool suspend_supported;
+	bool standby_supported;
+
+	/* Color info FIXME: storage format */
+	unsigned short redx, redy;
+	unsigned short greenx, greeny;
+	unsigned short bluex, bluey;
+	unsigned short whitex, whitey;
+
+	/* Clock limits FIXME: storage format */
+	unsigned int min_vfreq, max_vfreq;
+	unsigned int min_hfreq, max_hfreq;
+	unsigned int pixel_clock;
+
+	/* White point indices FIXME: storage format */
+	unsigned int wpx1, wpy1;
+	unsigned int wpgamma1;
+	unsigned int wpx2, wpy2;
+	unsigned int wpgamma2;
+
+	enum subpixel_order subpixel_order;
+
+	char *raw_edid; /* if any */
+};
+
+struct drm_framebuffer_funcs {
+	void (*destroy)(struct drm_framebuffer *framebuffer);
+	int (*create_handle)(struct drm_framebuffer *fb,
+			     struct drm_file *file_priv,
+			     unsigned int *handle);
+};
+
+struct drm_framebuffer {
+	struct drm_device *dev;
+	struct list_head head;
+	struct drm_mode_object base;
+	const struct drm_framebuffer_funcs *funcs;
+	unsigned int pitch;
+	unsigned int width;
+	unsigned int height;
+	/* depth can be 15 or 16 */
+	unsigned int depth;
+	int bits_per_pixel;
+	int flags;
+	void *fbdev;
+	u32 pseudo_palette[17];
+	struct list_head filp_head;
+};
+
+struct drm_property_blob {
+	struct drm_mode_object base;
+	struct list_head head;
+	unsigned int length;
+	void *data;
+};
+
+struct drm_property_enum {
+	uint64_t value;
+	struct list_head head;
+	char name[DRM_PROP_NAME_LEN];
+};
+
+struct drm_property {
+	struct list_head head;
+	struct drm_mode_object base;
+	uint32_t flags;
+	char name[DRM_PROP_NAME_LEN];
+	uint32_t num_values;
+	uint64_t *values;
+
+	struct list_head enum_blob_list;
+};
+
+struct drm_crtc;
+struct drm_connector;
+struct drm_encoder;
+
+/**
+ * drm_crtc_funcs - control CRTCs for a given device
+ * @dpms: control display power levels
+ * @save: save CRTC state
+ * @resore: restore CRTC state
+ * @lock: lock the CRTC
+ * @unlock: unlock the CRTC
+ * @shadow_allocate: allocate shadow pixmap
+ * @shadow_create: create shadow pixmap for rotation support
+ * @shadow_destroy: free shadow pixmap
+ * @mode_fixup: fixup proposed mode
+ * @mode_set: set the desired mode on the CRTC
+ * @gamma_set: specify color ramp for CRTC
+ * @destroy: deinit and free object.
+ *
+ * The drm_crtc_funcs structure is the central CRTC management structure
+ * in the DRM.  Each CRTC controls one or more connectors (note that the name
+ * CRTC is simply historical, a CRTC may control LVDS, VGA, DVI, TV out, etc.
+ * connectors, not just CRTs).
+ *
+ * Each driver is responsible for filling out this structure at startup time,
+ * in addition to providing other modesetting features, like i2c and DDC
+ * bus accessors.
+ */
+struct drm_crtc_funcs {
+	/* Save CRTC state */
+	void (*save)(struct drm_crtc *crtc); /* suspend? */
+	/* Restore CRTC state */
+	void (*restore)(struct drm_crtc *crtc); /* resume? */
+
+	/* cursor controls */
+	int (*cursor_set)(struct drm_crtc *crtc, struct drm_file *file_priv,
+			  uint32_t handle, uint32_t width, uint32_t height);
+	int (*cursor_move)(struct drm_crtc *crtc, int x, int y);
+
+	/* Set gamma on the CRTC */
+	void (*gamma_set)(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
+			  uint32_t size);
+	/* Object destroy routine */
+	void (*destroy)(struct drm_crtc *crtc);
+
+	int (*set_config)(struct drm_mode_set *set);
+};
+
+/**
+ * drm_crtc - central CRTC control structure
+ * @enabled: is this CRTC enabled?
+ * @x: x position on screen
+ * @y: y position on screen
+ * @desired_mode: new desired mode
+ * @desired_x: desired x for desired_mode
+ * @desired_y: desired y for desired_mode
+ * @funcs: CRTC control functions
+ *
+ * Each CRTC may have one or more connectors associated with it.  This structure
+ * allows the CRTC to be controlled.
+ */
+struct drm_crtc {
+	struct drm_device *dev;
+	struct list_head head;
+
+	struct drm_mode_object base;
+
+	/* framebuffer the connector is currently bound to */
+	struct drm_framebuffer *fb;
+
+	bool enabled;
+
+	struct drm_display_mode mode;
+
+	int x, y;
+	struct drm_display_mode *desired_mode;
+	int desired_x, desired_y;
+	const struct drm_crtc_funcs *funcs;
+
+	/* CRTC gamma size for reporting to userspace */
+	uint32_t gamma_size;
+	uint16_t *gamma_store;
+
+	/* if you are using the helper */
+	void *helper_private;
+};
+
+
+/**
+ * drm_connector_funcs - control connectors on a given device
+ * @dpms: set power state (see drm_crtc_funcs above)
+ * @save: save connector state
+ * @restore: restore connector state
+ * @mode_valid: is this mode valid on the given connector?
+ * @mode_fixup: try to fixup proposed mode for this connector
+ * @mode_set: set this mode
+ * @detect: is this connector active?
+ * @get_modes: get mode list for this connector
+ * @set_property: property for this connector may need update
+ * @destroy: make object go away
+ *
+ * Each CRTC may have one or more connectors attached to it.  The functions
+ * below allow the core DRM code to control connectors, enumerate available modes,
+ * etc.
+ */
+struct drm_connector_funcs {
+	void (*dpms)(struct drm_connector *connector, int mode);
+	void (*save)(struct drm_connector *connector);
+	void (*restore)(struct drm_connector *connector);
+	enum drm_connector_status (*detect)(struct drm_connector *connector);
+	void (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height);
+	int (*set_property)(struct drm_connector *connector, struct drm_property *property,
+			     uint64_t val);
+	void (*destroy)(struct drm_connector *connector);
+};
+
+struct drm_encoder_funcs {
+	void (*destroy)(struct drm_encoder *encoder);
+};
+
+#define DRM_CONNECTOR_MAX_UMODES 16
+#define DRM_CONNECTOR_MAX_PROPERTY 16
+#define DRM_CONNECTOR_LEN 32
+#define DRM_CONNECTOR_MAX_ENCODER 2
+
+/**
+ * drm_encoder - central DRM encoder structure
+ */
+struct drm_encoder {
+	struct drm_device *dev;
+	struct list_head head;
+
+	struct drm_mode_object base;
+	int encoder_type;
+	uint32_t possible_crtcs;
+	uint32_t possible_clones;
+
+	struct drm_crtc *crtc;
+	const struct drm_encoder_funcs *funcs;
+	void *helper_private;
+};
+
+/**
+ * drm_connector - central DRM connector control structure
+ * @crtc: CRTC this connector is currently connected to, NULL if none
+ * @interlace_allowed: can this connector handle interlaced modes?
+ * @doublescan_allowed: can this connector handle doublescan?
+ * @available_modes: modes available on this connector (from get_modes() + user)
+ * @initial_x: initial x position for this connector
+ * @initial_y: initial y position for this connector
+ * @status: connector connected?
+ * @funcs: connector control functions
+ *
+ * Each connector may be connected to one or more CRTCs, or may be clonable by
+ * another connector if they can share a CRTC.  Each connector also has a specific
+ * position in the broader display (referred to as a 'screen' though it could
+ * span multiple monitors).
+ */
+struct drm_connector {
+	struct drm_device *dev;
+	struct device kdev;
+	struct device_attribute *attr;
+	struct list_head head;
+
+	struct drm_mode_object base;
+
+	int connector_type;
+	int connector_type_id;
+	bool interlace_allowed;
+	bool doublescan_allowed;
+	struct list_head modes; /* list of modes on this connector */
+
+	int initial_x, initial_y;
+	enum drm_connector_status status;
+
+	/* these are modes added by probing with DDC or the BIOS */
+	struct list_head probed_modes;
+
+	struct drm_display_info display_info;
+	const struct drm_connector_funcs *funcs;
+
+	struct list_head user_modes;
+	struct drm_property_blob *edid_blob_ptr;
+	u32 property_ids[DRM_CONNECTOR_MAX_PROPERTY];
+	uint64_t property_values[DRM_CONNECTOR_MAX_PROPERTY];
+
+	void *helper_private;
+
+	uint32_t encoder_ids[DRM_CONNECTOR_MAX_ENCODER];
+	uint32_t force_encoder_id;
+	struct drm_encoder *encoder; /* currently active encoder */
+};
+
+/**
+ * struct drm_mode_set
+ *
+ * Represents a single crtc the connectors that it drives with what mode
+ * and from which framebuffer it scans out from.
+ *
+ * This is used to set modes.
+ */
+struct drm_mode_set {
+	struct list_head head;
+
+	struct drm_framebuffer *fb;
+	struct drm_crtc *crtc;
+	struct drm_display_mode *mode;
+
+	uint32_t x;
+	uint32_t y;
+
+	struct drm_connector **connectors;
+	size_t num_connectors;
+};
+
+/**
+ * struct drm_mode_config_funcs - configure CRTCs for a given screen layout
+ * @resize: adjust CRTCs as necessary for the proposed layout
+ *
+ * Currently only a resize hook is available.  DRM will call back into the
+ * driver with a new screen width and height.  If the driver can't support
+ * the proposed size, it can return false.  Otherwise it should adjust
+ * the CRTC<->connector mappings as needed and update its view of the screen.
+ */
+struct drm_mode_config_funcs {
+	int (*resize_fb)(struct drm_device *dev, struct drm_file *file_priv, struct drm_framebuffer *fb, struct drm_mode_fb_cmd *mode_cmd);
+	struct drm_framebuffer *(*fb_create)(struct drm_device *dev, struct drm_file *file_priv, struct drm_mode_fb_cmd *mode_cmd);
+	int (*fb_changed)(struct drm_device *dev);
+};
+
+struct drm_mode_group {
+	uint32_t num_crtcs;
+	uint32_t num_encoders;
+	uint32_t num_connectors;
+
+	/* list of object IDs for this group */
+	uint32_t *id_list;
+};
+
+/**
+ * drm_mode_config - Mode configuration control structure
+ *
+ */
+struct drm_mode_config {
+	struct mutex mutex; /* protects configuration and IDR */
+	struct idr crtc_idr; /* use this idr for all IDs, fb, crtc, connector, modes - just makes life easier */
+	/* this is limited to one for now */
+	int num_fb;
+	struct list_head fb_list;
+	int num_connector;
+	struct list_head connector_list;
+	int num_encoder;
+	struct list_head encoder_list;
+
+	int num_crtc;
+	struct list_head crtc_list;
+
+	struct list_head property_list;
+
+	/* in-kernel framebuffers - hung of filp_head in drm_framebuffer */
+	struct list_head fb_kernel_list;
+
+	int min_width, min_height;
+	int max_width, max_height;
+	struct drm_mode_config_funcs *funcs;
+	unsigned long fb_base;
+
+	/* pointers to standard properties */
+	struct list_head property_blob_list;
+	struct drm_property *edid_property;
+	struct drm_property *dpms_property;
+
+	/* DVI-I properties */
+	struct drm_property *dvi_i_subconnector_property;
+	struct drm_property *dvi_i_select_subconnector_property;
+
+	/* TV properties */
+	struct drm_property *tv_subconnector_property;
+	struct drm_property *tv_select_subconnector_property;
+	struct drm_property *tv_mode_property;
+	struct drm_property *tv_left_margin_property;
+	struct drm_property *tv_right_margin_property;
+	struct drm_property *tv_top_margin_property;
+	struct drm_property *tv_bottom_margin_property;
+
+	/* Optional properties */
+	struct drm_property *scaling_mode_property;
+	struct drm_property *dithering_mode_property;
+
+	/* hotplug */
+	uint32_t hotplug_counter;
+};
+
+#define obj_to_crtc(x) container_of(x, struct drm_crtc, base)
+#define obj_to_connector(x) container_of(x, struct drm_connector, base)
+#define obj_to_encoder(x) container_of(x, struct drm_encoder, base)
+#define obj_to_mode(x) container_of(x, struct drm_display_mode, base)
+#define obj_to_fb(x) container_of(x, struct drm_framebuffer, base)
+#define obj_to_property(x) container_of(x, struct drm_property, base)
+#define obj_to_blob(x) container_of(x, struct drm_property_blob, base)
+
+
+extern void drm_crtc_init(struct drm_device *dev,
+			  struct drm_crtc *crtc,
+			  const struct drm_crtc_funcs *funcs);
+extern void drm_crtc_cleanup(struct drm_crtc *crtc);
+
+extern void drm_connector_init(struct drm_device *dev,
+			    struct drm_connector *connector,
+			    const struct drm_connector_funcs *funcs,
+			    int connector_type);
+
+extern void drm_connector_cleanup(struct drm_connector *connector);
+
+extern void drm_encoder_init(struct drm_device *dev,
+			     struct drm_encoder *encoder,
+			     const struct drm_encoder_funcs *funcs,
+			     int encoder_type);
+
+extern void drm_encoder_cleanup(struct drm_encoder *encoder);
+
+extern char *drm_get_connector_name(struct drm_connector *connector);
+extern char *drm_get_dpms_name(int val);
+extern char *drm_get_dvi_i_subconnector_name(int val);
+extern char *drm_get_dvi_i_select_name(int val);
+extern char *drm_get_tv_subconnector_name(int val);
+extern char *drm_get_tv_select_name(int val);
+extern void drm_fb_release(struct file *filp);
+extern int drm_mode_group_init_legacy_group(struct drm_device *dev, struct drm_mode_group *group);
+extern struct edid *drm_get_edid(struct drm_connector *connector,
+				 struct i2c_adapter *adapter);
+extern unsigned char *drm_do_probe_ddc_edid(struct i2c_adapter *adapter);
+extern int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid);
+extern void drm_mode_probed_add(struct drm_connector *connector, struct drm_display_mode *mode);
+extern void drm_mode_remove(struct drm_connector *connector, struct drm_display_mode *mode);
+extern struct drm_display_mode *drm_mode_duplicate(struct drm_device *dev,
+						   struct drm_display_mode *mode);
+extern void drm_mode_debug_printmodeline(struct drm_display_mode *mode);
+extern void drm_mode_config_init(struct drm_device *dev);
+extern void drm_mode_config_cleanup(struct drm_device *dev);
+extern void drm_mode_set_name(struct drm_display_mode *mode);
+extern bool drm_mode_equal(struct drm_display_mode *mode1, struct drm_display_mode *mode2);
+extern int drm_mode_width(struct drm_display_mode *mode);
+extern int drm_mode_height(struct drm_display_mode *mode);
+
+/* for us by fb module */
+extern int drm_mode_attachmode_crtc(struct drm_device *dev,
+				    struct drm_crtc *crtc,
+				    struct drm_display_mode *mode);
+extern int drm_mode_detachmode_crtc(struct drm_device *dev, struct drm_display_mode *mode);
+
+extern struct drm_display_mode *drm_mode_create(struct drm_device *dev);
+extern void drm_mode_destroy(struct drm_device *dev, struct drm_display_mode *mode);
+extern void drm_mode_list_concat(struct list_head *head,
+				 struct list_head *new);
+extern void drm_mode_validate_size(struct drm_device *dev,
+				   struct list_head *mode_list,
+				   int maxX, int maxY, int maxPitch);
+extern void drm_mode_prune_invalid(struct drm_device *dev,
+				   struct list_head *mode_list, bool verbose);
+extern void drm_mode_sort(struct list_head *mode_list);
+extern int drm_mode_vrefresh(struct drm_display_mode *mode);
+extern void drm_mode_set_crtcinfo(struct drm_display_mode *p,
+				  int adjust_flags);
+extern void drm_mode_connector_list_update(struct drm_connector *connector);
+extern int drm_mode_connector_update_edid_property(struct drm_connector *connector,
+						struct edid *edid);
+extern int drm_connector_property_set_value(struct drm_connector *connector,
+					 struct drm_property *property,
+					 uint64_t value);
+extern int drm_connector_property_get_value(struct drm_connector *connector,
+					 struct drm_property *property,
+					 uint64_t *value);
+extern struct drm_display_mode *drm_crtc_mode_create(struct drm_device *dev);
+extern void drm_framebuffer_set_object(struct drm_device *dev,
+				       unsigned long handle);
+extern int drm_framebuffer_init(struct drm_device *dev,
+				struct drm_framebuffer *fb,
+				const struct drm_framebuffer_funcs *funcs);
+extern void drm_framebuffer_cleanup(struct drm_framebuffer *fb);
+extern int drmfb_probe(struct drm_device *dev, struct drm_crtc *crtc);
+extern int drmfb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
+extern void drm_crtc_probe_connector_modes(struct drm_device *dev, int maxX, int maxY);
+extern bool drm_crtc_in_use(struct drm_crtc *crtc);
+
+extern int drm_connector_attach_property(struct drm_connector *connector,
+				      struct drm_property *property, uint64_t init_val);
+extern struct drm_property *drm_property_create(struct drm_device *dev, int flags,
+						const char *name, int num_values);
+extern void drm_property_destroy(struct drm_device *dev, struct drm_property *property);
+extern int drm_property_add_enum(struct drm_property *property, int index,
+				 uint64_t value, const char *name);
+extern int drm_mode_create_dvi_i_properties(struct drm_device *dev);
+extern int drm_mode_create_tv_properties(struct drm_device *dev, int num_formats,
+				     char *formats[]);
+extern int drm_mode_create_scaling_mode_property(struct drm_device *dev);
+extern int drm_mode_create_dithering_property(struct drm_device *dev);
+extern char *drm_get_encoder_name(struct drm_encoder *encoder);
+
+extern int drm_mode_connector_attach_encoder(struct drm_connector *connector,
+					     struct drm_encoder *encoder);
+extern void drm_mode_connector_detach_encoder(struct drm_connector *connector,
+					   struct drm_encoder *encoder);
+extern bool drm_mode_crtc_set_gamma_size(struct drm_crtc *crtc,
+					 int gamma_size);
+extern void *drm_mode_object_find(struct drm_device *dev, uint32_t id, uint32_t type);
+/* IOCTLs */
+extern int drm_mode_getresources(struct drm_device *dev,
+				 void *data, struct drm_file *file_priv);
+
+extern int drm_mode_getcrtc(struct drm_device *dev,
+			    void *data, struct drm_file *file_priv);
+extern int drm_mode_getconnector(struct drm_device *dev,
+			      void *data, struct drm_file *file_priv);
+extern int drm_mode_setcrtc(struct drm_device *dev,
+			    void *data, struct drm_file *file_priv);
+extern int drm_mode_cursor_ioctl(struct drm_device *dev,
+				void *data, struct drm_file *file_priv);
+extern int drm_mode_addfb(struct drm_device *dev,
+			  void *data, struct drm_file *file_priv);
+extern int drm_mode_rmfb(struct drm_device *dev,
+			 void *data, struct drm_file *file_priv);
+extern int drm_mode_getfb(struct drm_device *dev,
+			  void *data, struct drm_file *file_priv);
+extern int drm_mode_addmode_ioctl(struct drm_device *dev,
+				  void *data, struct drm_file *file_priv);
+extern int drm_mode_rmmode_ioctl(struct drm_device *dev,
+				 void *data, struct drm_file *file_priv);
+extern int drm_mode_attachmode_ioctl(struct drm_device *dev,
+				     void *data, struct drm_file *file_priv);
+extern int drm_mode_detachmode_ioctl(struct drm_device *dev,
+				     void *data, struct drm_file *file_priv);
+
+extern int drm_mode_getproperty_ioctl(struct drm_device *dev,
+				      void *data, struct drm_file *file_priv);
+extern int drm_mode_getblob_ioctl(struct drm_device *dev,
+				  void *data, struct drm_file *file_priv);
+extern int drm_mode_connector_property_set_ioctl(struct drm_device *dev,
+					      void *data, struct drm_file *file_priv);
+extern int drm_mode_hotplug_ioctl(struct drm_device *dev,
+				  void *data, struct drm_file *file_priv);
+extern int drm_mode_replacefb(struct drm_device *dev,
+			      void *data, struct drm_file *file_priv);
+extern int drm_mode_getencoder(struct drm_device *dev,
+			       void *data, struct drm_file *file_priv);
+extern int drm_mode_gamma_get_ioctl(struct drm_device *dev,
+				    void *data, struct drm_file *file_priv);
+extern int drm_mode_gamma_set_ioctl(struct drm_device *dev,
+				    void *data, struct drm_file *file_priv);
+#endif /* __DRM_CRTC_H__ */
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
new file mode 100644
index 000000000000..a341828d1d15
--- /dev/null
+++ b/include/drm/drm_crtc_helper.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright © 2006 Keith Packard
+ * Copyright © 2007-2008 Dave Airlie
+ * Copyright © 2007-2008 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * The DRM mode setting helper functions are common code for drivers to use if
+ * they wish.  Drivers are not forced to use this code in their
+ * implementations but it would be useful if they code they do use at least
+ * provides a consistent interface and operation to userspace
+ */
+
+#ifndef __DRM_CRTC_HELPER_H__
+#define __DRM_CRTC_HELPER_H__
+
+#include <linux/i2c.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/idr.h>
+
+#include <linux/fb.h>
+
+struct drm_crtc_helper_funcs {
+	/*
+	 * Control power levels on the CRTC.  If the mode passed in is
+	 * unsupported, the provider must use the next lowest power level.
+	 */
+	void (*dpms)(struct drm_crtc *crtc, int mode);
+	void (*prepare)(struct drm_crtc *crtc);
+	void (*commit)(struct drm_crtc *crtc);
+
+	/* Provider can fixup or change mode timings before modeset occurs */
+	bool (*mode_fixup)(struct drm_crtc *crtc,
+			   struct drm_display_mode *mode,
+			   struct drm_display_mode *adjusted_mode);
+	/* Actually set the mode */
+	void (*mode_set)(struct drm_crtc *crtc, struct drm_display_mode *mode,
+			 struct drm_display_mode *adjusted_mode, int x, int y);
+
+	/* Move the crtc on the current fb to the given position *optional* */
+	void (*mode_set_base)(struct drm_crtc *crtc, int x, int y);
+};
+
+struct drm_encoder_helper_funcs {
+	void (*dpms)(struct drm_encoder *encoder, int mode);
+	void (*save)(struct drm_encoder *encoder);
+	void (*restore)(struct drm_encoder *encoder);
+
+	bool (*mode_fixup)(struct drm_encoder *encoder,
+			   struct drm_display_mode *mode,
+			   struct drm_display_mode *adjusted_mode);
+	void (*prepare)(struct drm_encoder *encoder);
+	void (*commit)(struct drm_encoder *encoder);
+	void (*mode_set)(struct drm_encoder *encoder,
+			 struct drm_display_mode *mode,
+			 struct drm_display_mode *adjusted_mode);
+	/* detect for DAC style encoders */
+	enum drm_connector_status (*detect)(struct drm_encoder *encoder,
+					    struct drm_connector *connector);
+};
+
+struct drm_connector_helper_funcs {
+	int (*get_modes)(struct drm_connector *connector);
+	int (*mode_valid)(struct drm_connector *connector,
+			  struct drm_display_mode *mode);
+	struct drm_encoder *(*best_encoder)(struct drm_connector *connector);
+};
+
+extern void drm_helper_probe_single_connector_modes(struct drm_connector *connector, uint32_t maxX, uint32_t maxY);
+extern void drm_helper_disable_unused_functions(struct drm_device *dev);
+extern int drm_helper_hotplug_stage_two(struct drm_device *dev);
+extern bool drm_helper_initial_config(struct drm_device *dev, bool can_grow);
+extern int drm_crtc_helper_set_config(struct drm_mode_set *set);
+extern bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
+				     struct drm_display_mode *mode,
+				     int x, int y);
+extern bool drm_helper_crtc_in_use(struct drm_crtc *crtc);
+
+extern int drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb,
+					  struct drm_mode_fb_cmd *mode_cmd);
+
+static inline void drm_crtc_helper_add(struct drm_crtc *crtc,
+				       const struct drm_crtc_helper_funcs *funcs)
+{
+	crtc->helper_private = (void *)funcs;
+}
+
+static inline void drm_encoder_helper_add(struct drm_encoder *encoder,
+					  const struct drm_encoder_helper_funcs *funcs)
+{
+	encoder->helper_private = (void *)funcs;
+}
+
+static inline void drm_connector_helper_add(struct drm_connector *connector,
+					    const struct drm_connector_helper_funcs *funcs)
+{
+	connector->helper_private = (void *)funcs;
+}
+
+extern int drm_helper_resume_force_mode(struct drm_device *dev);
+#endif
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
new file mode 100644
index 000000000000..c707c15f5164
--- /dev/null
+++ b/include/drm/drm_edid.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright © 2007-2008 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __DRM_EDID_H__
+#define __DRM_EDID_H__
+
+#include <linux/types.h>
+
+#define EDID_LENGTH 128
+#define DDC_ADDR 0x50
+
+#ifdef BIG_ENDIAN
+#error "EDID structure is little endian, need big endian versions"
+#else
+
+struct est_timings {
+	u8 t1;
+	u8 t2;
+	u8 mfg_rsvd;
+} __attribute__((packed));
+
+struct std_timing {
+	u8 hsize; /* need to multiply by 8 then add 248 */
+	u8 vfreq:6; /* need to add 60 */
+	u8 aspect_ratio:2; /* 00=16:10, 01=4:3, 10=5:4, 11=16:9 */
+} __attribute__((packed));
+
+/* If detailed data is pixel timing */
+struct detailed_pixel_timing {
+	u8 hactive_lo;
+	u8 hblank_lo;
+	u8 hblank_hi:4;
+	u8 hactive_hi:4;
+	u8 vactive_lo;
+	u8 vblank_lo;
+	u8 vblank_hi:4;
+	u8 vactive_hi:4;
+	u8 hsync_offset_lo;
+	u8 hsync_pulse_width_lo;
+	u8 vsync_pulse_width_lo:4;
+	u8 vsync_offset_lo:4;
+	u8 hsync_pulse_width_hi:2;
+	u8 hsync_offset_hi:2;
+	u8 vsync_pulse_width_hi:2;
+	u8 vsync_offset_hi:2;
+	u8 width_mm_lo;
+	u8 height_mm_lo;
+	u8 height_mm_hi:4;
+	u8 width_mm_hi:4;
+	u8 hborder;
+	u8 vborder;
+	u8 unknown0:1;
+	u8 vsync_positive:1;
+	u8 hsync_positive:1;
+	u8 separate_sync:2;
+	u8 stereo:1;
+	u8 unknown6:1;
+	u8 interlaced:1;
+} __attribute__((packed));
+
+/* If it's not pixel timing, it'll be one of the below */
+struct detailed_data_string {
+	u8 str[13];
+} __attribute__((packed));
+
+struct detailed_data_monitor_range {
+	u8 min_vfreq;
+	u8 max_vfreq;
+	u8 min_hfreq_khz;
+	u8 max_hfreq_khz;
+	u8 pixel_clock_mhz; /* need to multiply by 10 */
+	u16 sec_gtf_toggle; /* A000=use above, 20=use below */ /* FIXME: byte order */
+	u8 hfreq_start_khz; /* need to multiply by 2 */
+	u8 c; /* need to divide by 2 */
+	u16 m; /* FIXME: byte order */
+	u8 k;
+	u8 j; /* need to divide by 2 */
+} __attribute__((packed));
+
+struct detailed_data_wpindex {
+	u8 white_y_lo:2;
+	u8 white_x_lo:2;
+	u8 pad:4;
+	u8 white_x_hi;
+	u8 white_y_hi;
+	u8 gamma; /* need to divide by 100 then add 1 */
+} __attribute__((packed));
+
+struct detailed_data_color_point {
+	u8 windex1;
+	u8 wpindex1[3];
+	u8 windex2;
+	u8 wpindex2[3];
+} __attribute__((packed));
+
+struct detailed_non_pixel {
+	u8 pad1;
+	u8 type; /* ff=serial, fe=string, fd=monitor range, fc=monitor name
+		    fb=color point data, fa=standard timing data,
+		    f9=undefined, f8=mfg. reserved */
+	u8 pad2;
+	union {
+		struct detailed_data_string str;
+		struct detailed_data_monitor_range range;
+		struct detailed_data_wpindex color;
+		struct std_timing timings[5];
+	} data;
+} __attribute__((packed));
+
+#define EDID_DETAIL_STD_MODES 0xfa
+#define EDID_DETAIL_MONITOR_CPDATA 0xfb
+#define EDID_DETAIL_MONITOR_NAME 0xfc
+#define EDID_DETAIL_MONITOR_RANGE 0xfd
+#define EDID_DETAIL_MONITOR_STRING 0xfe
+#define EDID_DETAIL_MONITOR_SERIAL 0xff
+
+struct detailed_timing {
+	u16 pixel_clock; /* need to multiply by 10 KHz */ /* FIXME: byte order */
+	union {
+		struct detailed_pixel_timing pixel_data;
+		struct detailed_non_pixel other_data;
+	} data;
+} __attribute__((packed));
+
+struct edid {
+	u8 header[8];
+	/* Vendor & product info */
+	u8 mfg_id[2];
+	u8 prod_code[2];
+	u32 serial; /* FIXME: byte order */
+	u8 mfg_week;
+	u8 mfg_year;
+	/* EDID version */
+	u8 version;
+	u8 revision;
+	/* Display info: */
+	/*   input definition */
+	u8 serration_vsync:1;
+	u8 sync_on_green:1;
+	u8 composite_sync:1;
+	u8 separate_syncs:1;
+	u8 blank_to_black:1;
+	u8 video_level:2;
+	u8 digital:1; /* bits below must be zero if set */
+	u8 width_cm;
+	u8 height_cm;
+	u8 gamma;
+	/*   feature support */
+	u8 default_gtf:1;
+	u8 preferred_timing:1;
+	u8 standard_color:1;
+	u8 display_type:2; /* 00=mono, 01=rgb, 10=non-rgb, 11=unknown */
+	u8 pm_active_off:1;
+	u8 pm_suspend:1;
+	u8 pm_standby:1;
+	/* Color characteristics */
+	u8 red_green_lo;
+	u8 black_white_lo;
+	u8 red_x;
+	u8 red_y;
+	u8 green_x;
+	u8 green_y;
+	u8 blue_x;
+	u8 blue_y;
+	u8 white_x;
+	u8 white_y;
+	/* Est. timings and mfg rsvd timings*/
+	struct est_timings established_timings;
+	/* Standard timings 1-8*/
+	struct std_timing standard_timings[8];
+	/* Detailing timings 1-4 */
+	struct detailed_timing detailed_timings[4];
+	/* Number of 128 byte ext. blocks */
+	u8 extensions;
+	/* Checksum */
+	u8 checksum;
+} __attribute__((packed));
+
+#endif /* little endian structs */
+
+#define EDID_PRODUCT_ID(e) ((e)->prod_code[0] | ((e)->prod_code[1] << 8))
+
+#endif /* __DRM_EDID_H__ */
diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
new file mode 100644
index 000000000000..d2e791920aba
--- /dev/null
+++ b/include/drm/drm_mode.h
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (c) 2007 Jakob Bornecrantz <wallbraker@gmail.com>
+ * Copyright (c) 2008 Red Hat Inc.
+ * Copyright (c) 2007-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA
+ * Copyright (c) 2007-2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _DRM_MODE_H
+#define _DRM_MODE_H
+
+#if !defined(__KERNEL__) && !defined(_KERNEL)
+#include <stdint.h>
+#else
+#include <linux/kernel.h>
+#endif
+
+#define DRM_DISPLAY_INFO_LEN 32
+#define DRM_CONNECTOR_NAME_LEN 32
+#define DRM_DISPLAY_MODE_LEN 32
+#define DRM_PROP_NAME_LEN 32
+
+#define DRM_MODE_TYPE_BUILTIN	(1<<0)
+#define DRM_MODE_TYPE_CLOCK_C	((1<<1) | DRM_MODE_TYPE_BUILTIN)
+#define DRM_MODE_TYPE_CRTC_C	((1<<2) | DRM_MODE_TYPE_BUILTIN)
+#define DRM_MODE_TYPE_PREFERRED	(1<<3)
+#define DRM_MODE_TYPE_DEFAULT	(1<<4)
+#define DRM_MODE_TYPE_USERDEF	(1<<5)
+#define DRM_MODE_TYPE_DRIVER	(1<<6)
+
+/* Video mode flags */
+/* bit compatible with the xorg definitions. */
+#define DRM_MODE_FLAG_PHSYNC	(1<<0)
+#define DRM_MODE_FLAG_NHSYNC	(1<<1)
+#define DRM_MODE_FLAG_PVSYNC	(1<<2)
+#define DRM_MODE_FLAG_NVSYNC	(1<<3)
+#define DRM_MODE_FLAG_INTERLACE	(1<<4)
+#define DRM_MODE_FLAG_DBLSCAN	(1<<5)
+#define DRM_MODE_FLAG_CSYNC	(1<<6)
+#define DRM_MODE_FLAG_PCSYNC	(1<<7)
+#define DRM_MODE_FLAG_NCSYNC	(1<<8)
+#define DRM_MODE_FLAG_HSKEW	(1<<9) /* hskew provided */
+#define DRM_MODE_FLAG_BCAST	(1<<10)
+#define DRM_MODE_FLAG_PIXMUX	(1<<11)
+#define DRM_MODE_FLAG_DBLCLK	(1<<12)
+#define DRM_MODE_FLAG_CLKDIV2	(1<<13)
+
+/* DPMS flags */
+/* bit compatible with the xorg definitions. */
+#define DRM_MODE_DPMS_ON 0
+#define DRM_MODE_DPMS_STANDBY 1
+#define DRM_MODE_DPMS_SUSPEND 2
+#define DRM_MODE_DPMS_OFF 3
+
+/* Scaling mode options */
+#define DRM_MODE_SCALE_NON_GPU 0
+#define DRM_MODE_SCALE_FULLSCREEN 1
+#define DRM_MODE_SCALE_NO_SCALE 2
+#define DRM_MODE_SCALE_ASPECT 3
+
+/* Dithering mode options */
+#define DRM_MODE_DITHERING_OFF 0
+#define DRM_MODE_DITHERING_ON 1
+
+struct drm_mode_modeinfo {
+	unsigned int clock;
+	unsigned short hdisplay, hsync_start, hsync_end, htotal, hskew;
+	unsigned short vdisplay, vsync_start, vsync_end, vtotal, vscan;
+
+	unsigned int vrefresh; /* vertical refresh * 1000 */
+
+	unsigned int flags;
+	unsigned int type;
+	char name[DRM_DISPLAY_MODE_LEN];
+};
+
+struct drm_mode_card_res {
+	uint64_t fb_id_ptr;
+	uint64_t crtc_id_ptr;
+	uint64_t connector_id_ptr;
+	uint64_t encoder_id_ptr;
+	int count_fbs;
+	int count_crtcs;
+	int count_connectors;
+	int count_encoders;
+	int min_width, max_width;
+	int min_height, max_height;
+};
+
+struct drm_mode_crtc {
+	uint64_t set_connectors_ptr;
+	int count_connectors;
+
+	unsigned int crtc_id; /**< Id */
+	unsigned int fb_id; /**< Id of framebuffer */
+
+	int x, y; /**< Position on the frameuffer */
+
+	uint32_t gamma_size;
+	int mode_valid;
+	struct drm_mode_modeinfo mode;
+};
+
+#define DRM_MODE_ENCODER_NONE 0
+#define DRM_MODE_ENCODER_DAC  1
+#define DRM_MODE_ENCODER_TMDS 2
+#define DRM_MODE_ENCODER_LVDS 3
+#define DRM_MODE_ENCODER_TVDAC 4
+
+struct drm_mode_get_encoder {
+	unsigned int encoder_id;
+	unsigned int encoder_type;
+
+	unsigned int crtc_id; /**< Id of crtc */
+
+	uint32_t possible_crtcs;
+	uint32_t possible_clones;
+};
+
+/* This is for connectors with multiple signal types. */
+/* Try to match DRM_MODE_CONNECTOR_X as closely as possible. */
+#define DRM_MODE_SUBCONNECTOR_Automatic 0
+#define DRM_MODE_SUBCONNECTOR_Unknown 0
+#define DRM_MODE_SUBCONNECTOR_DVID 3
+#define DRM_MODE_SUBCONNECTOR_DVIA 4
+#define DRM_MODE_SUBCONNECTOR_Composite 5
+#define DRM_MODE_SUBCONNECTOR_SVIDEO 6
+#define DRM_MODE_SUBCONNECTOR_Component 8
+
+#define DRM_MODE_CONNECTOR_Unknown 0
+#define DRM_MODE_CONNECTOR_VGA 1
+#define DRM_MODE_CONNECTOR_DVII 2
+#define DRM_MODE_CONNECTOR_DVID 3
+#define DRM_MODE_CONNECTOR_DVIA 4
+#define DRM_MODE_CONNECTOR_Composite 5
+#define DRM_MODE_CONNECTOR_SVIDEO 6
+#define DRM_MODE_CONNECTOR_LVDS 7
+#define DRM_MODE_CONNECTOR_Component 8
+#define DRM_MODE_CONNECTOR_9PinDIN 9
+#define DRM_MODE_CONNECTOR_DisplayPort 10
+#define DRM_MODE_CONNECTOR_HDMIA 11
+#define DRM_MODE_CONNECTOR_HDMIB 12
+
+struct drm_mode_get_connector {
+
+	uint64_t encoders_ptr;
+	uint64_t modes_ptr;
+	uint64_t props_ptr;
+	uint64_t prop_values_ptr;
+
+	int count_modes;
+	int count_props;
+	int count_encoders;
+
+	unsigned int encoder_id; /**< Current Encoder */
+	unsigned int connector_id; /**< Id */
+	unsigned int connector_type;
+	unsigned int connector_type_id;
+
+	unsigned int connection;
+	unsigned int mm_width, mm_height; /**< HxW in millimeters */
+	unsigned int subpixel;
+};
+
+#define DRM_MODE_PROP_PENDING (1<<0)
+#define DRM_MODE_PROP_RANGE (1<<1)
+#define DRM_MODE_PROP_IMMUTABLE (1<<2)
+#define DRM_MODE_PROP_ENUM (1<<3) /* enumerated type with text strings */
+#define DRM_MODE_PROP_BLOB (1<<4)
+
+struct drm_mode_property_enum {
+	uint64_t value;
+	unsigned char name[DRM_PROP_NAME_LEN];
+};
+
+struct drm_mode_get_property {
+	uint64_t values_ptr; /* values and blob lengths */
+	uint64_t enum_blob_ptr; /* enum and blob id ptrs */
+
+	unsigned int prop_id;
+	unsigned int flags;
+	unsigned char name[DRM_PROP_NAME_LEN];
+
+	int count_values;
+	int count_enum_blobs;
+};
+
+struct drm_mode_connector_set_property {
+	uint64_t value;
+	unsigned int prop_id;
+	unsigned int connector_id;
+};
+
+struct drm_mode_get_blob {
+	uint32_t blob_id;
+	uint32_t length;
+	uint64_t data;
+};
+
+struct drm_mode_fb_cmd {
+	unsigned int buffer_id;
+	unsigned int width, height;
+	unsigned int pitch;
+	unsigned int bpp;
+	unsigned int depth;
+
+	unsigned int handle;
+};
+
+struct drm_mode_mode_cmd {
+	unsigned int connector_id;
+	struct drm_mode_modeinfo mode;
+};
+
+#define DRM_MODE_CURSOR_BO   0x01
+#define DRM_MODE_CURSOR_MOVE 0x02
+
+/*
+ * depending on the value in flags diffrent members are used.
+ *
+ * CURSOR_BO uses
+ *    crtc
+ *    width
+ *    height
+ *    handle - if 0 turns the cursor of
+ *
+ * CURSOR_MOVE uses
+ *    crtc
+ *    x
+ *    y
+ */
+struct drm_mode_cursor {
+	unsigned int flags;
+	unsigned int crtc;
+	int x;
+	int y;
+	uint32_t width;
+	uint32_t height;
+	unsigned int handle;
+};
+
+/*
+ * oh so ugly hotplug
+ */
+struct drm_mode_hotplug {
+	uint32_t counter;
+};
+
+struct drm_mode_crtc_lut {
+
+	uint32_t crtc_id;
+	uint32_t gamma_size;
+
+	/* pointers to arrays */
+	uint64_t red;
+	uint64_t green;
+	uint64_t blue;
+};
+
+#endif
diff --git a/include/linux/console.h b/include/linux/console.h
index 248e6e3b9b73..a67a90cf8268 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -153,4 +153,8 @@ void vcs_remove_sysfs(struct tty_struct *tty);
 #define VESA_HSYNC_SUSPEND      2
 #define VESA_POWERDOWN          3
 
+#ifdef CONFIG_VGA_CONSOLE
+extern bool vgacon_text_force(void);
+#endif
+
 #endif /* _LINUX_CONSOLE_H */
-- 
cgit v1.2.3


From 773ff60e841461cb1f9374a713ffcda029b8c317 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 23 Dec 2008 19:37:01 +0900
Subject: SLUB: failslab support

Currently fault-injection capability for SLAB allocator is only
available to SLAB. This patch makes it available to SLUB, too.

[penberg@cs.helsinki.fi: unify slab and slub implementations]
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Matt Mackall <mpm@selenic.com>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/fault-inject.h |  9 ++++++
 lib/Kconfig.debug            |  1 +
 mm/Makefile                  |  1 +
 mm/failslab.c                | 59 ++++++++++++++++++++++++++++++++++
 mm/slab.c                    | 75 +++-----------------------------------------
 mm/slub.c                    |  4 +++
 6 files changed, 79 insertions(+), 70 deletions(-)
 create mode 100644 mm/failslab.c

(limited to 'include/linux')

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 32368c4f0326..06ca9b21dad2 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -81,4 +81,13 @@ static inline void cleanup_fault_attr_dentries(struct fault_attr *attr)
 
 #endif /* CONFIG_FAULT_INJECTION */
 
+#ifdef CONFIG_FAILSLAB
+extern bool should_failslab(size_t size, gfp_t gfpflags);
+#else
+static inline bool should_failslab(size_t size, gfp_t gfpflags)
+{
+	return false;
+}
+#endif /* CONFIG_FAILSLAB */
+
 #endif /* _LINUX_FAULT_INJECT_H */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b0f239e443bc..af65ae7f0549 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -699,6 +699,7 @@ config FAULT_INJECTION
 config FAILSLAB
 	bool "Fault-injection capability for kmalloc"
 	depends on FAULT_INJECTION
+	depends on SLAB || SLUB
 	help
 	  Provide fault-injection capability for kmalloc.
 
diff --git a/mm/Makefile b/mm/Makefile
index c06b45a1ff5f..51c27709cc7c 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
 obj-$(CONFIG_SLAB) += slab.o
 obj-$(CONFIG_SLUB) += slub.o
+obj-$(CONFIG_FAILSLAB) += failslab.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
diff --git a/mm/failslab.c b/mm/failslab.c
new file mode 100644
index 000000000000..7c6ea6493f80
--- /dev/null
+++ b/mm/failslab.c
@@ -0,0 +1,59 @@
+#include <linux/fault-inject.h>
+
+static struct {
+	struct fault_attr attr;
+	u32 ignore_gfp_wait;
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+	struct dentry *ignore_gfp_wait_file;
+#endif
+} failslab = {
+	.attr = FAULT_ATTR_INITIALIZER,
+	.ignore_gfp_wait = 1,
+};
+
+bool should_failslab(size_t size, gfp_t gfpflags)
+{
+	if (gfpflags & __GFP_NOFAIL)
+		return false;
+
+        if (failslab.ignore_gfp_wait && (gfpflags & __GFP_WAIT))
+		return false;
+
+	return should_fail(&failslab.attr, size);
+}
+
+static int __init setup_failslab(char *str)
+{
+	return setup_fault_attr(&failslab.attr, str);
+}
+__setup("failslab=", setup_failslab);
+
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+
+static int __init failslab_debugfs_init(void)
+{
+	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+	struct dentry *dir;
+	int err;
+
+	err = init_fault_attr_dentries(&failslab.attr, "failslab");
+	if (err)
+		return err;
+	dir = failslab.attr.dentries.dir;
+
+	failslab.ignore_gfp_wait_file =
+		debugfs_create_bool("ignore-gfp-wait", mode, dir,
+				      &failslab.ignore_gfp_wait);
+
+	if (!failslab.ignore_gfp_wait_file) {
+		err = -ENOMEM;
+		debugfs_remove(failslab.ignore_gfp_wait_file);
+		cleanup_fault_attr_dentries(&failslab.attr);
+	}
+
+	return err;
+}
+
+late_initcall(failslab_debugfs_init);
+
+#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/mm/slab.c b/mm/slab.c
index 09187517f9dc..c347dd8480cc 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3106,79 +3106,14 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
 #define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
 #endif
 
-#ifdef CONFIG_FAILSLAB
-
-static struct failslab_attr {
-
-	struct fault_attr attr;
-
-	u32 ignore_gfp_wait;
-#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-	struct dentry *ignore_gfp_wait_file;
-#endif
-
-} failslab = {
-	.attr = FAULT_ATTR_INITIALIZER,
-	.ignore_gfp_wait = 1,
-};
-
-static int __init setup_failslab(char *str)
-{
-	return setup_fault_attr(&failslab.attr, str);
-}
-__setup("failslab=", setup_failslab);
-
-static int should_failslab(struct kmem_cache *cachep, gfp_t flags)
+static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
 {
 	if (cachep == &cache_cache)
-		return 0;
-	if (flags & __GFP_NOFAIL)
-		return 0;
-	if (failslab.ignore_gfp_wait && (flags & __GFP_WAIT))
-		return 0;
+		return false;
 
-	return should_fail(&failslab.attr, obj_size(cachep));
+	return should_failslab(obj_size(cachep), flags);
 }
 
-#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-
-static int __init failslab_debugfs(void)
-{
-	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
-	struct dentry *dir;
-	int err;
-
-	err = init_fault_attr_dentries(&failslab.attr, "failslab");
-	if (err)
-		return err;
-	dir = failslab.attr.dentries.dir;
-
-	failslab.ignore_gfp_wait_file =
-		debugfs_create_bool("ignore-gfp-wait", mode, dir,
-				      &failslab.ignore_gfp_wait);
-
-	if (!failslab.ignore_gfp_wait_file) {
-		err = -ENOMEM;
-		debugfs_remove(failslab.ignore_gfp_wait_file);
-		cleanup_fault_attr_dentries(&failslab.attr);
-	}
-
-	return err;
-}
-
-late_initcall(failslab_debugfs);
-
-#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
-
-#else /* CONFIG_FAILSLAB */
-
-static inline int should_failslab(struct kmem_cache *cachep, gfp_t flags)
-{
-	return 0;
-}
-
-#endif /* CONFIG_FAILSLAB */
-
 static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 {
 	void *objp;
@@ -3381,7 +3316,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
 	unsigned long save_flags;
 	void *ptr;
 
-	if (should_failslab(cachep, flags))
+	if (slab_should_failslab(cachep, flags))
 		return NULL;
 
 	cache_alloc_debugcheck_before(cachep, flags);
@@ -3457,7 +3392,7 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
 	unsigned long save_flags;
 	void *objp;
 
-	if (should_failslab(cachep, flags))
+	if (slab_should_failslab(cachep, flags))
 		return NULL;
 
 	cache_alloc_debugcheck_before(cachep, flags);
diff --git a/mm/slub.c b/mm/slub.c
index a2cd47d89e0a..640fde7e354c 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -24,6 +24,7 @@
 #include <linux/kallsyms.h>
 #include <linux/memory.h>
 #include <linux/math64.h>
+#include <linux/fault-inject.h>
 
 /*
  * Lock order:
@@ -1591,6 +1592,9 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 	unsigned long flags;
 	unsigned int objsize;
 
+	if (should_failslab(s->objsize, gfpflags))
+		return NULL;
+
 	local_irq_save(flags);
 	c = get_cpu_slab(s, smp_processor_id());
 	objsize = c->objsize;
-- 
cgit v1.2.3


From dfcd3610289132a762b7dc0eaf33998262cd9e20 Mon Sep 17 00:00:00 2001
From: Pascal Terjan <pterjan@mandriva.com>
Date: Tue, 25 Nov 2008 15:08:19 +0100
Subject: slab: Fix comment on #endif

This #endif in slab.h is described as closing the inner block while it's for
the big CONFIG_NUMA one. That makes reading the code a bit harder.

This trivial patch fixes the comment.

Signed-off-by: Pascal Terjan <pterjan@mandriva.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slab.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 000da12b5cf0..9d8ca14be3c4 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -285,7 +285,7 @@ extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *);
 #define kmalloc_node_track_caller(size, flags, node) \
 	kmalloc_track_caller(size, flags)
 
-#endif /* DEBUG_SLAB */
+#endif /* CONFIG_NUMA */
 
 /*
  * Shortcuts
-- 
cgit v1.2.3


From 43a256322ac1fc105c181b3cade3b9bfc0b63ca1 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Sun, 28 Dec 2008 16:01:13 -0800
Subject: sparseirq: move __weak symbols into separate compilation unit

GCC has a bug with __weak alias functions: if the functions are in
the same compilation unit as their call site, GCC can decide to
inline them - and thus rob the linker of the opportunity to override
the weak alias with the real thing.

So move all the IRQ handling related __weak symbols to kernel/irq/chip.c.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/interrupt.h |  6 ++++++
 include/linux/irq.h       |  3 ---
 init/main.c               | 10 ----------
 kernel/irq/manage.c       |  9 ---------
 kernel/softirq.c          | 20 ++++++++++++++++++++
 5 files changed, 26 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 777f89e00b4a..d9a370325ae2 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -467,4 +467,10 @@ static inline void init_irq_proc(void)
 
 int show_interrupts(struct seq_file *p, void *v);
 
+struct irq_desc;
+
+extern int early_irq_init(void);
+extern int arch_early_irq_init(void);
+extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
+
 #endif
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 0e40af4bac40..d64a6d49bdef 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -193,9 +193,6 @@ struct irq_desc {
 	const char		*name;
 } ____cacheline_internodealigned_in_smp;
 
-extern int early_irq_init(void);
-extern int arch_early_irq_init(void);
-extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
 extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
 					struct irq_desc *desc, int cpu);
 extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
diff --git a/init/main.c b/init/main.c
index c314aa15370e..2c183abbf61c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -539,16 +539,6 @@ void __init __weak thread_info_cache_init(void)
 {
 }
 
-int __init __weak arch_early_irq_init(void)
-{
-	return 0;
-}
-
-int __init __weak early_irq_init(void)
-{
-	return arch_early_irq_init();
-}
-
 asmlinkage void __init start_kernel(void)
 {
 	char * command_line;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index c2741b02ad38..46953a06f4a8 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -261,15 +261,6 @@ void enable_irq(unsigned int irq)
 }
 EXPORT_SYMBOL(enable_irq);
 
-/*
- * [ Not in kernel/irq/handle.c, so that GCC does not
- *   inline the __weak alias: ]
- */
-int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
-{
-	return 0;
-}
-
 static int set_irq_wake_real(unsigned int irq, unsigned int on)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index e7c69a720d69..daf46358d2dd 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -797,3 +797,23 @@ int on_each_cpu(void (*func) (void *info), void *info, int wait)
 }
 EXPORT_SYMBOL(on_each_cpu);
 #endif
+
+/*
+ * [ These __weak aliases are kept in a separate compilation unit, so that
+ *   GCC does not inline them incorrectly. ]
+ */
+
+int __init __weak early_irq_init(void)
+{
+	return 0;
+}
+
+int __init __weak arch_early_irq_init(void)
+{
+	return 0;
+}
+
+int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
+{
+	return 0;
+}
-- 
cgit v1.2.3


From d61c72e52b98411d1cfef1fdb3f5a8bb070f8966 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Wed, 10 Dec 2008 14:07:21 +0100
Subject: DMI: add dmi_match

Add a wrapper for testing system_info which will handle also NULL
system infos.

This will be used by the ata PIIX driver.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: Alexandru Romanescu <a_romanescu@yahoo.co.uk>
Cc: Tejun Heo <tj@kernel.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/firmware/dmi_scan.c | 16 ++++++++++++++++
 include/linux/dmi.h         |  3 +++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 4a597d8c2f70..78b989d202a3 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -582,3 +582,19 @@ int dmi_walk(void (*decode)(const struct dmi_header *))
 	return 0;
 }
 EXPORT_SYMBOL_GPL(dmi_walk);
+
+/**
+ * dmi_match - compare a string to the dmi field (if exists)
+ *
+ * Returns true if the requested field equals to the str (including NULL).
+ */
+bool dmi_match(enum dmi_field f, const char *str)
+{
+	const char *info = dmi_get_system_info(f);
+
+	if (info == NULL || str == NULL)
+		return info == str;
+
+	return !strcmp(info, str);
+}
+EXPORT_SYMBOL_GPL(dmi_match);
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index 2bfda178f274..34161907b2f8 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -47,6 +47,7 @@ extern int dmi_name_in_vendors(const char *str);
 extern int dmi_name_in_serial(const char *str);
 extern int dmi_available;
 extern int dmi_walk(void (*decode)(const struct dmi_header *));
+extern bool dmi_match(enum dmi_field f, const char *str);
 
 #else
 
@@ -61,6 +62,8 @@ static inline int dmi_name_in_serial(const char *s) { return 0; }
 #define dmi_available 0
 static inline int dmi_walk(void (*decode)(const struct dmi_header *))
 	{ return -1; }
+static inline bool dmi_match(enum dmi_field f, const char *str)
+	{ return false; }
 
 #endif
 
-- 
cgit v1.2.3


From ea319518ba3de282c13ae1cf4bf2215c5e03e67e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 26 Dec 2008 15:08:55 +0100
Subject: locking, percpu counters: introduce separate lock classes

Impact: fix lockdep false positives

Classify percpu_counter instances similar to regular lock objects --
that is, per instantiation site.

The networking code has increased its use of percpu_counters, which
leads to false positives if they are treated as a single class.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/percpu_counter.h | 14 ++++++++++----
 lib/percpu_counter.c           | 18 ++++--------------
 lib/proportions.c              |  6 +++---
 mm/backing-dev.c               |  2 +-
 4 files changed, 18 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 9007ccdfc112..96bdde36599f 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -30,8 +30,16 @@ struct percpu_counter {
 #define FBC_BATCH	(NR_CPUS*4)
 #endif
 
-int percpu_counter_init(struct percpu_counter *fbc, s64 amount);
-int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
+int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
+			  struct lock_class_key *key);
+
+#define percpu_counter_init(fbc, value)					\
+	({								\
+		static struct lock_class_key __key;			\
+									\
+		__percpu_counter_init(fbc, value, &__key);		\
+	})
+
 void percpu_counter_destroy(struct percpu_counter *fbc);
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
 void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
@@ -85,8 +93,6 @@ static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount)
 	return 0;
 }
 
-#define percpu_counter_init_irq percpu_counter_init
-
 static inline void percpu_counter_destroy(struct percpu_counter *fbc)
 {
 }
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index a8663890a88c..c7fe2e4e8ed1 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -71,11 +71,11 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
 }
 EXPORT_SYMBOL(__percpu_counter_sum);
 
-static struct lock_class_key percpu_counter_irqsafe;
-
-int percpu_counter_init(struct percpu_counter *fbc, s64 amount)
+int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
+			  struct lock_class_key *key)
 {
 	spin_lock_init(&fbc->lock);
+	lockdep_set_class(&fbc->lock, key);
 	fbc->count = amount;
 	fbc->counters = alloc_percpu(s32);
 	if (!fbc->counters)
@@ -87,17 +87,7 @@ int percpu_counter_init(struct percpu_counter *fbc, s64 amount)
 #endif
 	return 0;
 }
-EXPORT_SYMBOL(percpu_counter_init);
-
-int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount)
-{
-	int err;
-
-	err = percpu_counter_init(fbc, amount);
-	if (!err)
-		lockdep_set_class(&fbc->lock, &percpu_counter_irqsafe);
-	return err;
-}
+EXPORT_SYMBOL(__percpu_counter_init);
 
 void percpu_counter_destroy(struct percpu_counter *fbc)
 {
diff --git a/lib/proportions.c b/lib/proportions.c
index 4f387a643d72..7367f2b727d0 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -83,11 +83,11 @@ int prop_descriptor_init(struct prop_descriptor *pd, int shift)
 	pd->index = 0;
 	pd->pg[0].shift = shift;
 	mutex_init(&pd->mutex);
-	err = percpu_counter_init_irq(&pd->pg[0].events, 0);
+	err = percpu_counter_init(&pd->pg[0].events, 0);
 	if (err)
 		goto out;
 
-	err = percpu_counter_init_irq(&pd->pg[1].events, 0);
+	err = percpu_counter_init(&pd->pg[1].events, 0);
 	if (err)
 		percpu_counter_destroy(&pd->pg[0].events);
 
@@ -191,7 +191,7 @@ int prop_local_init_percpu(struct prop_local_percpu *pl)
 	spin_lock_init(&pl->lock);
 	pl->shift = 0;
 	pl->period = 0;
-	return percpu_counter_init_irq(&pl->events, 0);
+	return percpu_counter_init(&pl->events, 0);
 }
 
 void prop_local_destroy_percpu(struct prop_local_percpu *pl)
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f2e574dbc300..f3b125857827 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -220,7 +220,7 @@ int bdi_init(struct backing_dev_info *bdi)
 	bdi->max_prop_frac = PROP_FRAC_BASE;
 
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
-		err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0);
+		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
 		if (err)
 			goto err;
 	}
-- 
cgit v1.2.3


From 34a4c5eb421dab6fe8381aa12c990f9d6f645b17 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Mon, 29 Dec 2008 04:00:23 -0800
Subject: Input: map_to_7segment.h - convert to __inline__ for userspace

Use __inline__ rather than inline for map_to_seg7() since it is exported
to userspace.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/map_to_7segment.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/map_to_7segment.h b/include/linux/map_to_7segment.h
index 7df8432c4402..12d62a54d470 100644
--- a/include/linux/map_to_7segment.h
+++ b/include/linux/map_to_7segment.h
@@ -75,7 +75,7 @@ struct seg7_conversion_map {
 	unsigned char	table[128];
 };
 
-static inline int map_to_seg7(struct seg7_conversion_map *map, int c)
+static __inline__ int map_to_seg7(struct seg7_conversion_map *map, int c)
 {
 	return c >= 0 && c < sizeof(map->table) ? map->table[c] : -EINVAL;
 }
-- 
cgit v1.2.3


From 2a2ca6a96194c4744a2adeefbc09ce881f3c5abe Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:31 +0100
Subject: ide: replace the global ide_lock spinlock by per-hwgroup spinlocks
 (v2)

Now that (almost) all host drivers have been fixed not to abuse ide_lock
and core code usage of ide_lock has been sanitized we may safely replace
ide_lock by per-hwgroup locks.

This patch is partially based on earlier patch from Ravikiran G Thirumalai.

While at it:
- don't use deprecated HWIF() and HWGROUP() macros
- update locking documentation in ide.h

v2:
Add missing spin_lock_init(&hwgroup->lock).  (Noticed by Elias Oltmanns)

Cc: Vaibhav V. Nivargi <vaibhav.nivargi@gmail.com>
Cc: Alok N. Kataria <alokk@calsoftinc.com>
Cc: Shai Fultheim <shai@scalex86.org>
Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org>
Cc: Elias Oltmanns <eo@nebensachen.de>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-io.c    | 33 ++++++++++++++++-----------------
 drivers/ide/ide-iops.c  | 48 +++++++++++++++++++++++-------------------------
 drivers/ide/ide-park.c  | 16 ++++++++--------
 drivers/ide/ide-probe.c | 26 +++++++++++++++-----------
 drivers/ide/ide.c       |  8 +++-----
 drivers/ide/umc8672.c   | 11 +++++++----
 drivers/scsi/ide-scsi.c | 32 +++++++++++++++++++++-----------
 include/linux/ide.h     | 12 +++++++-----
 8 files changed, 100 insertions(+), 86 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 02059e96e6cd..72d0d702d5da 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -907,7 +907,7 @@ repeat:
 
 /*
  * Issue a new request to a drive from hwgroup
- * Caller must have already done spin_lock_irqsave(&ide_lock, ..);
+ * Caller must have already done spin_lock_irqsave(&hwgroup->lock, ..);
  *
  * A hwgroup is a serialized group of IDE interfaces.  Usually there is
  * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640)
@@ -919,7 +919,7 @@ repeat:
  * possibly along with many other devices.  This is especially common in
  * PCI-based systems with off-board IDE controller cards.
  *
- * The IDE driver uses the single global ide_lock spinlock to protect
+ * The IDE driver uses a per-hwgroup spinlock to protect
  * access to the request queues, and to protect the hwgroup->busy flag.
  *
  * The first thread into the driver for a particular hwgroup sets the
@@ -935,7 +935,7 @@ repeat:
  * will start the next request from the queue.  If no more work remains,
  * the driver will clear the hwgroup->busy flag and exit.
  *
- * The ide_lock (spinlock) is used to protect all access to the
+ * The per-hwgroup spinlock is used to protect all access to the
  * hwgroup->busy flag, but is otherwise not needed for most processing in
  * the driver.  This makes the driver much more friendlier to shared IRQs
  * than previous designs, while remaining 100% (?) SMP safe and capable.
@@ -948,7 +948,7 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 	ide_startstop_t	startstop;
 	int             loops = 0;
 
-	/* caller must own ide_lock */
+	/* caller must own hwgroup->lock */
 	BUG_ON(!irqs_disabled());
 
 	while (!hwgroup->busy) {
@@ -1070,11 +1070,11 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 		 */
 		if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
 			disable_irq_nosync(hwif->irq);
-		spin_unlock(&ide_lock);
+		spin_unlock(&hwgroup->lock);
 		local_irq_enable_in_hardirq();
 			/* allow other IRQs while we start this request */
 		startstop = start_request(drive, rq);
-		spin_lock_irq(&ide_lock);
+		spin_lock_irq(&hwgroup->lock);
 		if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
 			enable_irq(hwif->irq);
 		if (startstop == ide_stopped)
@@ -1172,7 +1172,7 @@ void ide_timer_expiry (unsigned long data)
 	unsigned long	flags;
 	unsigned long	wait = -1;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&hwgroup->lock, flags);
 
 	if (((handler = hwgroup->handler) == NULL) ||
 	    (hwgroup->req_gen != hwgroup->req_gen_timer)) {
@@ -1205,7 +1205,7 @@ void ide_timer_expiry (unsigned long data)
 					hwgroup->timer.expires  = jiffies + wait;
 					hwgroup->req_gen_timer = hwgroup->req_gen;
 					add_timer(&hwgroup->timer);
-					spin_unlock_irqrestore(&ide_lock, flags);
+					spin_unlock_irqrestore(&hwgroup->lock, flags);
 					return;
 				}
 			}
@@ -1215,7 +1215,7 @@ void ide_timer_expiry (unsigned long data)
 			 * the handler() function, which means we need to
 			 * globally mask the specific IRQ:
 			 */
-			spin_unlock(&ide_lock);
+			spin_unlock(&hwgroup->lock);
 			hwif  = HWIF(drive);
 			/* disable_irq_nosync ?? */
 			disable_irq(hwif->irq);
@@ -1239,14 +1239,14 @@ void ide_timer_expiry (unsigned long data)
 						  hwif->tp_ops->read_status(hwif));
 			}
 			drive->service_time = jiffies - drive->service_start;
-			spin_lock_irq(&ide_lock);
+			spin_lock_irq(&hwgroup->lock);
 			enable_irq(hwif->irq);
 			if (startstop == ide_stopped)
 				hwgroup->busy = 0;
 		}
 	}
 	ide_do_request(hwgroup, IDE_NO_IRQ);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&hwgroup->lock, flags);
 }
 
 /**
@@ -1339,14 +1339,13 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 {
 	unsigned long flags;
 	ide_hwgroup_t *hwgroup = (ide_hwgroup_t *)dev_id;
-	ide_hwif_t *hwif;
+	ide_hwif_t *hwif = hwgroup->hwif;
 	ide_drive_t *drive;
 	ide_handler_t *handler;
 	ide_startstop_t startstop;
 	irqreturn_t irq_ret = IRQ_NONE;
 
-	spin_lock_irqsave(&ide_lock, flags);
-	hwif = hwgroup->hwif;
+	spin_lock_irqsave(&hwgroup->lock, flags);
 
 	if (!ide_ack_intr(hwif))
 		goto out;
@@ -1416,7 +1415,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	hwgroup->handler = NULL;
 	hwgroup->req_gen++;
 	del_timer(&hwgroup->timer);
-	spin_unlock(&ide_lock);
+	spin_unlock(&hwgroup->lock);
 
 	if (hwif->port_ops && hwif->port_ops->clear_irq)
 		hwif->port_ops->clear_irq(drive);
@@ -1427,7 +1426,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	/* service this interrupt, may set handler for next interrupt */
 	startstop = handler(drive);
 
-	spin_lock_irq(&ide_lock);
+	spin_lock_irq(&hwgroup->lock);
 	/*
 	 * Note that handler() may have set things up for another
 	 * interrupt to occur soon, but it cannot happen until
@@ -1448,7 +1447,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 out_handled:
 	irq_ret = IRQ_HANDLED;
 out:
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&hwgroup->lock, flags);
 	return irq_ret;
 }
 
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index c41c3b9b6f02..ad8bd6539283 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -835,10 +835,12 @@ static void __ide_set_handler (ide_drive_t *drive, ide_handler_t *handler,
 void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler,
 		      unsigned int timeout, ide_expiry_t *expiry)
 {
+	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
 	unsigned long flags;
-	spin_lock_irqsave(&ide_lock, flags);
+
+	spin_lock_irqsave(&hwgroup->lock, flags);
 	__ide_set_handler(drive, handler, timeout, expiry);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&hwgroup->lock, flags);
 }
 
 EXPORT_SYMBOL(ide_set_handler);
@@ -860,10 +862,11 @@ EXPORT_SYMBOL(ide_set_handler);
 void ide_execute_command(ide_drive_t *drive, u8 cmd, ide_handler_t *handler,
 			 unsigned timeout, ide_expiry_t *expiry)
 {
+	ide_hwif_t *hwif = drive->hwif;
+	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	unsigned long flags;
-	ide_hwif_t *hwif = HWIF(drive);
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&hwgroup->lock, flags);
 	__ide_set_handler(drive, handler, timeout, expiry);
 	hwif->tp_ops->exec_command(hwif, cmd);
 	/*
@@ -873,19 +876,20 @@ void ide_execute_command(ide_drive_t *drive, u8 cmd, ide_handler_t *handler,
 	 * FIXME: we could skip this delay with care on non shared devices
 	 */
 	ndelay(400);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&hwgroup->lock, flags);
 }
 EXPORT_SYMBOL(ide_execute_command);
 
 void ide_execute_pkt_cmd(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
+	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	unsigned long flags;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&hwgroup->lock, flags);
 	hwif->tp_ops->exec_command(hwif, ATA_CMD_PACKET);
 	ndelay(400);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&hwgroup->lock, flags);
 }
 EXPORT_SYMBOL_GPL(ide_execute_pkt_cmd);
 
@@ -1076,22 +1080,16 @@ static void pre_reset(ide_drive_t *drive)
  */
 static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 {
-	unsigned int unit;
-	unsigned long flags, timeout;
-	ide_hwif_t *hwif;
-	ide_hwgroup_t *hwgroup;
-	struct ide_io_ports *io_ports;
-	const struct ide_tp_ops *tp_ops;
+	ide_hwif_t *hwif = drive->hwif;
+	ide_hwgroup_t *hwgroup = hwif->hwgroup;
+	struct ide_io_ports *io_ports = &hwif->io_ports;
+	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	const struct ide_port_ops *port_ops;
+	unsigned long flags, timeout;
+	unsigned int unit;
 	DEFINE_WAIT(wait);
 
-	spin_lock_irqsave(&ide_lock, flags);
-	hwif = HWIF(drive);
-	hwgroup = HWGROUP(drive);
-
-	io_ports = &hwif->io_ports;
-
-	tp_ops = hwif->tp_ops;
+	spin_lock_irqsave(&hwgroup->lock, flags);
 
 	/* We must not reset with running handlers */
 	BUG_ON(hwgroup->handler != NULL);
@@ -1106,7 +1104,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 		hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
 		hwgroup->polling = 1;
 		__ide_set_handler(drive, &atapi_reset_pollfunc, HZ/20, NULL);
-		spin_unlock_irqrestore(&ide_lock, flags);
+		spin_unlock_irqrestore(&hwgroup->lock, flags);
 		return ide_started;
 	}
 
@@ -1129,9 +1127,9 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 		if (time_before_eq(timeout, now))
 			break;
 
-		spin_unlock_irqrestore(&ide_lock, flags);
+		spin_unlock_irqrestore(&hwgroup->lock, flags);
 		timeout = schedule_timeout_uninterruptible(timeout - now);
-		spin_lock_irqsave(&ide_lock, flags);
+		spin_lock_irqsave(&hwgroup->lock, flags);
 	} while (timeout);
 	finish_wait(&ide_park_wq, &wait);
 
@@ -1143,7 +1141,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 		pre_reset(&hwif->drives[unit]);
 
 	if (io_ports->ctl_addr == 0) {
-		spin_unlock_irqrestore(&ide_lock, flags);
+		spin_unlock_irqrestore(&hwgroup->lock, flags);
 		ide_complete_drive_reset(drive, -ENXIO);
 		return ide_stopped;
 	}
@@ -1179,7 +1177,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 	if (port_ops && port_ops->resetproc)
 		port_ops->resetproc(drive);
 
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&hwgroup->lock, flags);
 	return ide_started;
 }
 
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 03b00e57e93f..63d01c55f865 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -7,17 +7,16 @@ DECLARE_WAIT_QUEUE_HEAD(ide_park_wq);
 
 static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 {
+	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
 	struct request_queue *q = drive->queue;
 	struct request *rq;
 	int rc;
 
 	timeout += jiffies;
-	spin_lock_irq(&ide_lock);
+	spin_lock_irq(&hwgroup->lock);
 	if (drive->dev_flags & IDE_DFLAG_PARKED) {
-		ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
-		int reset_timer;
+		int reset_timer = time_before(timeout, drive->sleep);
 
-		reset_timer = time_before(timeout, drive->sleep);
 		drive->sleep = timeout;
 		wake_up_all(&ide_park_wq);
 		if (reset_timer && hwgroup->sleeping &&
@@ -26,10 +25,10 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 			hwgroup->busy = 0;
 			blk_start_queueing(q);
 		}
-		spin_unlock_irq(&ide_lock);
+		spin_unlock_irq(&hwgroup->lock);
 		return;
 	}
-	spin_unlock_irq(&ide_lock);
+	spin_unlock_irq(&hwgroup->lock);
 
 	rq = blk_get_request(q, READ, __GFP_WAIT);
 	rq->cmd[0] = REQ_PARK_HEADS;
@@ -62,20 +61,21 @@ ssize_t ide_park_show(struct device *dev, struct device_attribute *attr,
 		      char *buf)
 {
 	ide_drive_t *drive = to_ide_device(dev);
+	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
 	unsigned long now;
 	unsigned int msecs;
 
 	if (drive->dev_flags & IDE_DFLAG_NO_UNLOAD)
 		return -EOPNOTSUPP;
 
-	spin_lock_irq(&ide_lock);
+	spin_lock_irq(&hwgroup->lock);
 	now = jiffies;
 	if (drive->dev_flags & IDE_DFLAG_PARKED &&
 	    time_after(drive->sleep, now))
 		msecs = jiffies_to_msecs(drive->sleep - now);
 	else
 		msecs = 0;
-	spin_unlock_irq(&ide_lock);
+	spin_unlock_irq(&hwgroup->lock);
 
 	return snprintf(buf, 20, "%u\n", msecs);
 }
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index c55bdbd22314..504bc9480e93 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -906,7 +906,8 @@ static int ide_init_queue(ide_drive_t *drive)
 	 *	do not.
 	 */
 
-	q = blk_init_queue_node(do_ide_request, &ide_lock, hwif_to_node(hwif));
+	q = blk_init_queue_node(do_ide_request, &hwif->hwgroup->lock,
+				hwif_to_node(hwif));
 	if (!q)
 		return 1;
 
@@ -947,7 +948,7 @@ static void ide_add_drive_to_hwgroup(ide_drive_t *drive)
 {
 	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
 
-	spin_lock_irq(&ide_lock);
+	spin_lock_irq(&hwgroup->lock);
 	if (!hwgroup->drive) {
 		/* first drive for hwgroup. */
 		drive->next = drive;
@@ -957,7 +958,7 @@ static void ide_add_drive_to_hwgroup(ide_drive_t *drive)
 		drive->next = hwgroup->drive->next;
 		hwgroup->drive->next = drive;
 	}
-	spin_unlock_irq(&ide_lock);
+	spin_unlock_irq(&hwgroup->lock);
 }
 
 /*
@@ -1002,7 +1003,7 @@ void ide_remove_port_from_hwgroup(ide_hwif_t *hwif)
 
 	ide_ports[hwif->index] = NULL;
 
-	spin_lock_irq(&ide_lock);
+	spin_lock_irq(&hwgroup->lock);
 	/*
 	 * Remove us from the hwgroup, and free
 	 * the hwgroup if we were the only member
@@ -1030,7 +1031,7 @@ void ide_remove_port_from_hwgroup(ide_hwif_t *hwif)
 		}
 		BUG_ON(hwgroup->hwif == hwif);
 	}
-	spin_unlock_irq(&ide_lock);
+	spin_unlock_irq(&hwgroup->lock);
 }
 
 /*
@@ -1092,17 +1093,19 @@ static int init_irq (ide_hwif_t *hwif)
 		 * linked list, the first entry is the hwif that owns
 		 * hwgroup->handler - do not change that.
 		 */
-		spin_lock_irq(&ide_lock);
+		spin_lock_irq(&hwgroup->lock);
 		hwif->next = hwgroup->hwif->next;
 		hwgroup->hwif->next = hwif;
 		BUG_ON(hwif->next == hwif);
-		spin_unlock_irq(&ide_lock);
+		spin_unlock_irq(&hwgroup->lock);
 	} else {
 		hwgroup = kmalloc_node(sizeof(*hwgroup), GFP_KERNEL|__GFP_ZERO,
 				       hwif_to_node(hwif));
 		if (hwgroup == NULL)
 			goto out_up;
 
+		spin_lock_init(&hwgroup->lock);
+
 		hwif->hwgroup = hwgroup;
 		hwgroup->hwif = hwif->next = hwif;
 
@@ -1263,20 +1266,21 @@ static void ide_remove_drive_from_hwgroup(ide_drive_t *drive)
 static void drive_release_dev (struct device *dev)
 {
 	ide_drive_t *drive = container_of(dev, ide_drive_t, gendev);
+	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
 
 	ide_proc_unregister_device(drive);
 
-	spin_lock_irq(&ide_lock);
+	spin_lock_irq(&hwgroup->lock);
 	ide_remove_drive_from_hwgroup(drive);
 	kfree(drive->id);
 	drive->id = NULL;
 	drive->dev_flags &= ~IDE_DFLAG_PRESENT;
 	/* Messed up locking ... */
-	spin_unlock_irq(&ide_lock);
+	spin_unlock_irq(&hwgroup->lock);
 	blk_cleanup_queue(drive->queue);
-	spin_lock_irq(&ide_lock);
+	spin_lock_irq(&hwgroup->lock);
 	drive->queue = NULL;
-	spin_unlock_irq(&ide_lock);
+	spin_unlock_irq(&hwgroup->lock);
 
 	complete(&drive->gendev_rel_comp);
 }
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index bca6877ee6a5..41d042053548 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -74,9 +74,6 @@ static const u8 ide_hwif_to_major[] = { IDE0_MAJOR, IDE1_MAJOR,
 
 DEFINE_MUTEX(ide_cfg_mtx);
 
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(ide_lock);
-EXPORT_SYMBOL(ide_lock);
-
 static void ide_port_init_devices_data(ide_hwif_t *);
 
 /*
@@ -333,6 +330,7 @@ static int set_pio_mode_abuse(ide_hwif_t *hwif, u8 req_pio)
 static int set_pio_mode(ide_drive_t *drive, int arg)
 {
 	ide_hwif_t *hwif = drive->hwif;
+	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	const struct ide_port_ops *port_ops = hwif->port_ops;
 
 	if (arg < 0 || arg > 255)
@@ -347,9 +345,9 @@ static int set_pio_mode(ide_drive_t *drive, int arg)
 			unsigned long flags;
 
 			/* take lock for IDE_DFLAG_[NO_]UNMASK/[NO_]IO_32BIT */
-			spin_lock_irqsave(&ide_lock, flags);
+			spin_lock_irqsave(&hwgroup->lock, flags);
 			port_ops->set_pio_mode(drive, arg);
-			spin_unlock_irqrestore(&ide_lock, flags);
+			spin_unlock_irqrestore(&hwgroup->lock, flags);
 		} else
 			port_ops->set_pio_mode(drive, arg);
 	} else {
diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c
index 1da076e0c917..e29978cf6197 100644
--- a/drivers/ide/umc8672.c
+++ b/drivers/ide/umc8672.c
@@ -107,18 +107,21 @@ static void umc_set_speeds(u8 speeds[])
 static void umc_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	unsigned long flags;
+	ide_hwgroup_t *mate_hwgroup = hwif->mate ? hwif->mate->hwgroup : NULL;
+	unsigned long uninitialized_var(flags);
 
 	printk("%s: setting umc8672 to PIO mode%d (speed %d)\n",
 		drive->name, pio, pio_to_umc[pio]);
-	spin_lock_irqsave(&ide_lock, flags);
-	if (hwif->mate && hwif->mate->hwgroup->handler) {
+	if (mate_hwgroup)
+		spin_lock_irqsave(&mate_hwgroup->lock, flags);
+	if (mate_hwgroup && mate_hwgroup->handler) {
 		printk(KERN_ERR "umc8672: other interface is busy: exiting tune_umc()\n");
 	} else {
 		current_speeds[drive->name[2] - 'a'] = pio_to_umc[pio];
 		umc_set_speeds(current_speeds);
 	}
-	spin_unlock_irqrestore(&ide_lock, flags);
+	if (mate_hwgroup)
+		spin_unlock_irqrestore(&mate_hwgroup->lock, flags);
 }
 
 static const struct ide_port_ops umc8672_port_ops = {
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 2370fd82ebfe..c24140aff8e7 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -578,6 +578,8 @@ static int idescsi_eh_abort (struct scsi_cmnd *cmd)
 {
 	idescsi_scsi_t *scsi  = scsihost_to_idescsi(cmd->device->host);
 	ide_drive_t    *drive = scsi->drive;
+	ide_hwif_t     *hwif;
+	ide_hwgroup_t  *hwgroup;
 	int		busy;
 	int             ret   = FAILED;
 
@@ -594,13 +596,16 @@ static int idescsi_eh_abort (struct scsi_cmnd *cmd)
 		goto no_drive;
 	}
 
-	/* First give it some more time, how much is "right" is hard to say :-( */
+	hwif = drive->hwif;
+	hwgroup = hwif->hwgroup;
 
-	busy = ide_wait_not_busy(HWIF(drive), 100);	/* FIXME - uses mdelay which causes latency? */
+	/* First give it some more time, how much is "right" is hard to say :-(
+	   FIXME - uses mdelay which causes latency? */
+	busy = ide_wait_not_busy(hwif, 100);
 	if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
 		printk (KERN_WARNING "ide-scsi: drive did%s become ready\n", busy?" not":"");
 
-	spin_lock_irq(&ide_lock);
+	spin_lock_irq(&hwgroup->lock);
 
 	/* If there is no pc running we're done (our interrupt took care of it) */
 	pc = drive->pc;
@@ -629,7 +634,7 @@ static int idescsi_eh_abort (struct scsi_cmnd *cmd)
 	}
 
 ide_unlock:
-	spin_unlock_irq(&ide_lock);
+	spin_unlock_irq(&hwgroup->lock);
 no_drive:
 	if (test_bit(IDESCSI_LOG_CMD, &scsi->log))
 		printk (KERN_WARNING "ide-scsi: abort returns %s\n", ret == SUCCESS?"success":"failed");
@@ -642,6 +647,7 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd)
 	struct request *req;
 	idescsi_scsi_t *scsi  = scsihost_to_idescsi(cmd->device->host);
 	ide_drive_t    *drive = scsi->drive;
+	ide_hwgroup_t  *hwgroup;
 	int             ready = 0;
 	int             ret   = SUCCESS;
 
@@ -658,14 +664,18 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd)
 		return FAILED;
 	}
 
+	hwgroup = drive->hwif->hwgroup;
+
 	spin_lock_irq(cmd->device->host->host_lock);
-	spin_lock(&ide_lock);
+	spin_lock(&hwgroup->lock);
 
 	pc = drive->pc;
+	if (pc)
+		req = pc->rq;
 
-	if (pc == NULL || (req = pc->rq) != HWGROUP(drive)->rq || !HWGROUP(drive)->handler) {
+	if (pc == NULL || req != hwgroup->rq || hwgroup->handler == NULL) {
 		printk (KERN_WARNING "ide-scsi: No active request in idescsi_eh_reset\n");
-		spin_unlock(&ide_lock);
+		spin_unlock(&hwgroup->lock);
 		spin_unlock_irq(cmd->device->host->host_lock);
 		return FAILED;
 	}
@@ -685,10 +695,10 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd)
 			BUG();
 	}
 
-	HWGROUP(drive)->rq = NULL;
-	HWGROUP(drive)->handler = NULL;
-	HWGROUP(drive)->busy = 1;		/* will set this to zero when ide reset finished */
-	spin_unlock(&ide_lock);
+	hwgroup->rq = NULL;
+	hwgroup->handler = NULL;
+	hwgroup->busy = 1; /* will set this to zero when ide reset finished */
+	spin_unlock(&hwgroup->lock);
 
 	ide_do_reset(drive);
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 010fb26a1579..c871d325cedb 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -909,6 +909,8 @@ typedef struct hwgroup_s {
 
 	int req_gen;
 	int req_gen_timer;
+
+	spinlock_t lock;
 } ide_hwgroup_t;
 
 typedef struct ide_driver_s ide_driver_t;
@@ -1610,13 +1612,13 @@ extern struct mutex ide_cfg_mtx;
 /*
  * Structure locking:
  *
- * ide_cfg_mtx and ide_lock together protect changes to
- * ide_hwif_t->{next,hwgroup}
+ * ide_cfg_mtx and hwgroup->lock together protect changes to
+ * ide_hwif_t->next
  * ide_drive_t->next
  *
- * ide_hwgroup_t->busy: ide_lock
- * ide_hwgroup_t->hwif: ide_lock
- * ide_hwif_t->mate: constant, no locking
+ * ide_hwgroup_t->busy: hwgroup->lock
+ * ide_hwgroup_t->hwif: hwgroup->lock
+ * ide_hwif_t->{hwgroup,mate}: constant, no locking
  * ide_drive_t->hwif: constant, no locking
  */
 
-- 
cgit v1.2.3


From 27c01c2db05c3cf8824975e50403cd4fd9356dca Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:32 +0100
Subject: ide-cd: remove obsolete seek optimization

It doesn't make much sense nowadays and is problematic on some drives.

Cc: Borislav Petkov <petkovbb@googlemail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c | 89 ++++------------------------------------------------
 drivers/ide/ide-cd.h |  2 --
 include/linux/ide.h  |  4 ---
 3 files changed, 6 insertions(+), 89 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 692fd4570df6..ea35e94d0980 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -774,52 +774,6 @@ static ide_startstop_t cdrom_start_rw_cont(ide_drive_t *drive)
 	return cdrom_transfer_packet_command(drive, rq, cdrom_newpc_intr);
 }
 
-#define IDECD_SEEK_THRESHOLD	(1000)			/* 1000 blocks */
-#define IDECD_SEEK_TIMER	(5 * WAIT_MIN_SLEEP)	/* 100 ms */
-#define IDECD_SEEK_TIMEOUT	(2 * WAIT_CMD)		/* 20 sec */
-
-static ide_startstop_t cdrom_seek_intr(ide_drive_t *drive)
-{
-	struct cdrom_info *info = drive->driver_data;
-	int stat;
-	static int retry = 10;
-
-	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
-
-	if (cdrom_decode_status(drive, 0, &stat))
-		return ide_stopped;
-
-	drive->atapi_flags |= IDE_AFLAG_SEEKING;
-
-	if (retry && time_after(jiffies, info->start_seek + IDECD_SEEK_TIMER)) {
-		if (--retry == 0)
-			drive->dev_flags &= ~IDE_DFLAG_DSC_OVERLAP;
-	}
-	return ide_stopped;
-}
-
-static void ide_cd_prepare_seek_request(ide_drive_t *drive, struct request *rq)
-{
-	sector_t frame = rq->sector;
-
-	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
-
-	sector_div(frame, queue_hardsect_size(drive->queue) >> SECTOR_BITS);
-
-	memset(rq->cmd, 0, BLK_MAX_CDB);
-	rq->cmd[0] = GPCMD_SEEK;
-	put_unaligned(cpu_to_be32(frame), (unsigned int *) &rq->cmd[2]);
-
-	rq->timeout = ATAPI_WAIT_PC;
-}
-
-static ide_startstop_t cdrom_start_seek_continuation(ide_drive_t *drive)
-{
-	struct request *rq = drive->hwif->hwgroup->rq;
-
-	return cdrom_transfer_packet_command(drive, rq, &cdrom_seek_intr);
-}
-
 /*
  * Fix up a possibly partially-processed request so that we can start it over
  * entirely, or even put it back on the request queue.
@@ -1260,7 +1214,6 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 					sector_t block)
 {
-	struct cdrom_info *info = drive->driver_data;
 	ide_handler_t *fn;
 	int xferlen;
 
@@ -1270,44 +1223,14 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 		      (unsigned long long)block);
 
 	if (blk_fs_request(rq)) {
-		if (drive->atapi_flags & IDE_AFLAG_SEEKING) {
-			ide_hwif_t *hwif = drive->hwif;
-			unsigned long elapsed = jiffies - info->start_seek;
-			int stat = hwif->tp_ops->read_status(hwif);
-
-			if ((stat & ATA_DSC) != ATA_DSC) {
-				if (elapsed < IDECD_SEEK_TIMEOUT) {
-					ide_stall_queue(drive,
-							IDECD_SEEK_TIMER);
-					return ide_stopped;
-				}
-				printk(KERN_ERR PFX "%s: DSC timeout\n",
-						drive->name);
-			}
-			drive->atapi_flags &= ~IDE_AFLAG_SEEKING;
-		}
-		if (rq_data_dir(rq) == READ &&
-		    IDE_LARGE_SEEK(info->last_block, block,
-			    IDECD_SEEK_THRESHOLD) &&
-		    (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP)) {
-			xferlen = 0;
-			fn = cdrom_start_seek_continuation;
-
-			drive->dma = 0;
-			info->start_seek = jiffies;
-
-			ide_cd_prepare_seek_request(drive, rq);
-		} else {
-			xferlen = 32768;
-			fn = cdrom_start_rw_cont;
+		xferlen = 32768;
+		fn = cdrom_start_rw_cont;
 
-			if (cdrom_start_rw(drive, rq) == ide_stopped)
-				return ide_stopped;
+		if (cdrom_start_rw(drive, rq) == ide_stopped)
+			return ide_stopped;
 
-			if (ide_cd_prepare_rw_request(drive, rq) == ide_stopped)
-				return ide_stopped;
-		}
-		info->last_block = block;
+		if (ide_cd_prepare_rw_request(drive, rq) == ide_stopped)
+			return ide_stopped;
 	} else if (blk_sense_request(rq) || blk_pc_request(rq) ||
 		   rq->cmd_type == REQ_TYPE_ATA_PC) {
 		xferlen = rq->data_len;
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index 5882b9a9ea8b..d5ce3362dbd1 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -88,8 +88,6 @@ struct cdrom_info {
 	struct request_sense sense_data;
 
 	struct request request_sense_request;
-	unsigned long last_block;
-	unsigned long start_seek;
 
 	u8 max_speed;		/* Max speed of the drive. */
 	u8 current_speed;	/* Current speed of the drive. */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index c871d325cedb..0b8af0535d85 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -122,8 +122,6 @@ struct ide_io_ports {
 #define MAX_DRIVES	2	/* per interface; 2 assumed by lots of code */
 #define SECTOR_SIZE	512
 
-#define IDE_LARGE_SEEK(b1,b2,t)	(((b1) > (b2) + (t)) || ((b2) > (b1) + (t)))
-
 /*
  * Timeouts for various operations:
  */
@@ -496,8 +494,6 @@ enum {
 	 * when more than one interrupt is needed.
 	 */
 	IDE_AFLAG_LIMIT_NFRAMES		= (1 << 7),
-	/* Seeking in progress. */
-	IDE_AFLAG_SEEKING		= (1 << 8),
 	/* Saved TOC information is current. */
 	IDE_AFLAG_TOC_VALID		= (1 << 9),
 	/* We think that the drive door is locked. */
-- 
cgit v1.2.3


From 6b5cde3629701258004b94cde75dd1089b556b02 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:32 +0100
Subject: cmd64x: set IDE_HFLAG_SERIALIZE explictly for CMD646

* Set IDE_HFLAG_SERIALIZE explictly for CMD646.

* Remove no longer needed ide_cmd646 chipset type (which has
  a nice side-effect of fixing handling of unexpected IRQs).

Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/cmd64x.c    | 4 ++--
 drivers/ide/ide-probe.c | 2 +-
 drivers/ide/ide-proc.c  | 1 -
 include/linux/ide.h     | 2 +-
 4 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c
index 935385c77e06..3623bf013bcf 100644
--- a/drivers/ide/cmd64x.c
+++ b/drivers/ide/cmd64x.c
@@ -424,10 +424,10 @@ static const struct ide_port_info cmd64x_chipsets[] __devinitdata = {
 		.name		= DRV_NAME,
 		.init_chipset	= init_chipset_cmd64x,
 		.enablebits	= {{0x51,0x04,0x04}, {0x51,0x08,0x08}},
-		.chipset	= ide_cmd646,
 		.port_ops	= &cmd64x_port_ops,
 		.dma_ops	= &cmd648_dma_ops,
-		.host_flags	= IDE_HFLAG_ABUSE_PREFETCH,
+		.host_flags	= IDE_HFLAG_SERIALIZE |
+				  IDE_HFLAG_ABUSE_PREFETCH,
 		.pio_mask	= ATA_PIO5,
 		.mwdma_mask	= ATA_MWDMA2,
 		.udma_mask	= ATA_UDMA2,
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index b0f9ededfad7..a0eb72e2a026 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1125,7 +1125,7 @@ static int init_irq (ide_hwif_t *hwif)
 		sa = IRQF_SHARED;
 #endif /* __mc68000__ */
 
-		if (hwif->chipset == ide_pci || hwif->chipset == ide_cmd646)
+		if (hwif->chipset == ide_pci)
 			sa = IRQF_SHARED;
 
 		if (io_ports->ctl_addr)
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index dd899e1f3841..e41669eec2c2 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -48,7 +48,6 @@ static int proc_ide_read_imodel
 	case ide_ht6560b:	name = "ht6560b";	break;
 	case ide_rz1000:	name = "rz1000";	break;
 	case ide_trm290:	name = "trm290";	break;
-	case ide_cmd646:	name = "cmd646";	break;
 	case ide_cy82c693:	name = "cy82c693";	break;
 	case ide_4drives:	name = "4drives";	break;
 	case ide_pmac:		name = "mac-io";	break;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 0b8af0535d85..150e42311ee0 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -171,7 +171,7 @@ enum {		ide_unknown,	ide_generic,	ide_pci,
 		ide_cmd640,	ide_dtc2278,	ide_ali14xx,
 		ide_qd65xx,	ide_umc8672,	ide_ht6560b,
 		ide_rz1000,	ide_trm290,
-		ide_cmd646,	ide_cy82c693,	ide_4drives,
+		ide_cy82c693,	ide_4drives,
 		ide_pmac,	ide_acorn,
 		ide_au1xxx,	ide_palm3710
 };
-- 
cgit v1.2.3


From f58c1ab8deebc2360cef998f169a6727c288210f Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:33 +0100
Subject: ide: always set nIEN on idle devices

* Set nIEN for previous port/device in ide_do_request()
  also if port uses a non-shared IRQ.

* Remove no longer needed ide_hwif_t.sharing_irq.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-io.c    | 2 +-
 drivers/ide/ide-probe.c | 4 +---
 include/linux/ide.h     | 1 -
 3 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index ca51460af756..f776bd475010 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -1003,7 +1003,7 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 		}
 	again:
 		hwif = HWIF(drive);
-		if (hwgroup->hwif->sharing_irq && hwif != hwgroup->hwif) {
+		if (hwif != hwgroup->hwif) {
 			/*
 			 * set nIEN for previous hwif, drives in the
 			 * quirk_list may not like intr setups/cleanups
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index a0eb72e2a026..81f61e8ea97f 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1060,7 +1060,6 @@ static int init_irq (ide_hwif_t *hwif)
 
 		if (h && h->hwgroup) {  /* scan only initialized ports */
 			if (hwif->irq == h->irq) {
-				hwif->sharing_irq = h->sharing_irq = 1;
 				if (hwif->chipset != ide_pci ||
 				    h->chipset != ide_pci) {
 					save_match(hwif, h, &match);
@@ -1152,8 +1151,7 @@ static int init_irq (ide_hwif_t *hwif)
 		io_ports->data_addr, hwif->irq);
 #endif /* __mc68000__ */
 	if (match)
-		printk(KERN_CONT " (%sed with %s)",
-			hwif->sharing_irq ? "shar" : "serializ", match->name);
+		printk(KERN_CONT " (serialized with %s)", match->name);
 	printk(KERN_CONT "\n");
 
 	mutex_unlock(&ide_cfg_mtx);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 150e42311ee0..1d28006aec68 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -842,7 +842,6 @@ typedef struct hwif_s {
 
 	unsigned	present    : 1;	/* this interface exists */
 	unsigned	serialized : 1;	/* serialized all channel operation */
-	unsigned	sharing_irq: 1;	/* 1 = sharing irq with another hwif */
 	unsigned	sg_mapped  : 1;	/* sg_table and sg_nents are ready */
 
 	struct device		gendev;
-- 
cgit v1.2.3


From 7f1ac8c4b9dadc55ec656b368f5f470f2cbe3083 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:33 +0100
Subject: rz1000: apply chipset quirks early (v2)

* Use pci_name(dev) instead of hwif->name in init_hwif_rz1000().

* init_hwif_rz1000() -> rz1000_init_chipset().  Update rz1000_init_one()
  to use rz1000_init_chipset() and add now required rz1000_remove().

* Remove superfluous ide_rz1000 chipset type.

v2:
* unsigned int rz1000_init_chipset() -> int rz1000_disable_readahead()
  per Sergei's suggestion.

Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-proc.c |  1 -
 drivers/ide/rz1000.c   | 36 +++++++++++++++++++++++++-----------
 include/linux/ide.h    |  3 +--
 3 files changed, 26 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index e41669eec2c2..c2e6b8927bdc 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -46,7 +46,6 @@ static int proc_ide_read_imodel
 	case ide_qd65xx:	name = "qd65xx";	break;
 	case ide_umc8672:	name = "umc8672";	break;
 	case ide_ht6560b:	name = "ht6560b";	break;
-	case ide_rz1000:	name = "rz1000";	break;
 	case ide_trm290:	name = "trm290";	break;
 	case ide_cy82c693:	name = "cy82c693";	break;
 	case ide_4drives:	name = "4drives";	break;
diff --git a/drivers/ide/rz1000.c b/drivers/ide/rz1000.c
index 7daf0135cbac..a6414a884eb1 100644
--- a/drivers/ide/rz1000.c
+++ b/drivers/ide/rz1000.c
@@ -22,34 +22,48 @@
 
 #define DRV_NAME "rz1000"
 
-static void __devinit init_hwif_rz1000 (ide_hwif_t *hwif)
+static int __devinit rz1000_disable_readahead(struct pci_dev *dev)
 {
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	u16 reg;
 
 	if (!pci_read_config_word (dev, 0x40, &reg) &&
 	    !pci_write_config_word(dev, 0x40, reg & 0xdfff)) {
 		printk(KERN_INFO "%s: disabled chipset read-ahead "
-			"(buggy RZ1000/RZ1001)\n", hwif->name);
+			"(buggy RZ1000/RZ1001)\n", pci_name(dev));
+		return 0;
 	} else {
-		if (hwif->mate)
-			hwif->mate->serialized = hwif->serialized = 1;
-		hwif->host_flags |= IDE_HFLAG_NO_UNMASK_IRQS;
 		printk(KERN_INFO "%s: serialized, disabled unmasking "
-			"(buggy RZ1000/RZ1001)\n", hwif->name);
+			"(buggy RZ1000/RZ1001)\n", pci_name(dev));
+		return 1;
 	}
 }
 
 static const struct ide_port_info rz1000_chipset __devinitdata = {
 	.name		= DRV_NAME,
-	.init_hwif	= init_hwif_rz1000,
-	.chipset	= ide_rz1000,
 	.host_flags	= IDE_HFLAG_NO_DMA,
 };
 
 static int __devinit rz1000_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	return ide_pci_init_one(dev, &rz1000_chipset, NULL);
+	struct ide_port_info d = rz1000_chipset;
+	int rc;
+
+	rc = pci_enable_device(dev);
+	if (rc)
+		return rc;
+
+	if (rz1000_disable_readahead(dev)) {
+		d.host_flags |= IDE_HFLAG_SERIALIZE;
+		d.host_flags |= IDE_HFLAG_NO_UNMASK_IRQS;
+	}
+
+	return ide_pci_init_one(dev, &d, NULL);
+}
+
+static void rz1000_remove(struct pci_dev *dev)
+{
+	ide_pci_remove(dev);
+	pci_disable_device(dev);
 }
 
 static const struct pci_device_id rz1000_pci_tbl[] = {
@@ -63,7 +77,7 @@ static struct pci_driver rz1000_pci_driver = {
 	.name		= "RZ1000_IDE",
 	.id_table	= rz1000_pci_tbl,
 	.probe		= rz1000_init_one,
-	.remove		= ide_pci_remove,
+	.remove		= rz1000_remove,
 };
 
 static int __init rz1000_ide_init(void)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 1d28006aec68..fc1a966c7b7d 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -170,8 +170,7 @@ typedef int (ide_ack_intr_t)(struct hwif_s *);
 enum {		ide_unknown,	ide_generic,	ide_pci,
 		ide_cmd640,	ide_dtc2278,	ide_ali14xx,
 		ide_qd65xx,	ide_umc8672,	ide_ht6560b,
-		ide_rz1000,	ide_trm290,
-		ide_cy82c693,	ide_4drives,
+		ide_trm290,	ide_cy82c693,	ide_4drives,
 		ide_pmac,	ide_acorn,
 		ide_au1xxx,	ide_palm3710
 };
-- 
cgit v1.2.3


From 6b4924962c49655494d2c8e9d3faab0e349a3062 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:34 +0100
Subject: ide: add ->max_sectors field to struct ide_port_info

* Add ->max_sectors field to struct ide_port_info to allow host drivers
  to specify value used for hwif->rqsize (if smaller than the default).

* Convert pdc202xx_old to use ->max_sectors and remove no longer needed
  IDE_HFLAG_RQSIZE_256 flag.

There should be no functional changes caused by this patch.

Acked-by: Sergei Shtyltov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-probe.c    | 4 ++--
 drivers/ide/pdc202xx_old.c | 9 +++++----
 include/linux/ide.h        | 5 +++--
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 81f61e8ea97f..f76c22c45086 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1442,8 +1442,8 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
 			hwif->mate->serialized = hwif->serialized = 1;
 	}
 
-	if (d->host_flags & IDE_HFLAG_RQSIZE_256)
-		hwif->rqsize = 256;
+	if (d->max_sectors)
+		hwif->rqsize = d->max_sectors;
 
 	/* call chipset specific routine for each enabled port */
 	if (d->init_hwif)
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 799557c25eef..624e62e5cc9a 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -350,16 +350,17 @@ static const struct ide_dma_ops pdc2026x_dma_ops = {
 	.dma_timeout		= pdc202xx_dma_timeout,
 };
 
-#define DECLARE_PDC2026X_DEV(udma, extra_flags) \
+#define DECLARE_PDC2026X_DEV(udma, sectors) \
 	{ \
 		.name		= DRV_NAME, \
 		.init_chipset	= init_chipset_pdc202xx, \
 		.port_ops	= &pdc2026x_port_ops, \
 		.dma_ops	= &pdc2026x_dma_ops, \
-		.host_flags	= IDE_HFLAGS_PDC202XX | extra_flags, \
+		.host_flags	= IDE_HFLAGS_PDC202XX, \
 		.pio_mask	= ATA_PIO4, \
 		.mwdma_mask	= ATA_MWDMA2, \
 		.udma_mask	= udma, \
+		.max_sectors	= sectors, \
 	}
 
 static const struct ide_port_info pdc202xx_chipsets[] __devinitdata = {
@@ -376,8 +377,8 @@ static const struct ide_port_info pdc202xx_chipsets[] __devinitdata = {
 
 	/* 1: PDC2026{2,3} */
 	DECLARE_PDC2026X_DEV(ATA_UDMA4, 0),
-	/* 2: PDC2026{5,7} */
-	DECLARE_PDC2026X_DEV(ATA_UDMA5, IDE_HFLAG_RQSIZE_256),
+	/* 2: PDC2026{5,7}: UDMA5, limit LBA48 requests to 256 sectors */
+	DECLARE_PDC2026X_DEV(ATA_UDMA5, 256),
 };
 
 /**
diff --git a/include/linux/ide.h b/include/linux/ide.h
index fc1a966c7b7d..2574dda4a3e7 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1372,8 +1372,6 @@ enum {
 	IDE_HFLAG_LEGACY_IRQS		= (1 << 21),
 	/* force use of legacy IRQs */
 	IDE_HFLAG_FORCE_LEGACY_IRQS	= (1 << 22),
-	/* limit LBA48 requests to 256 sectors */
-	IDE_HFLAG_RQSIZE_256		= (1 << 23),
 	/* use 32-bit I/O ops */
 	IDE_HFLAG_IO_32BIT		= (1 << 24),
 	/* unmask IRQs */
@@ -1411,6 +1409,9 @@ struct ide_port_info {
 
 	ide_pci_enablebit_t	enablebits[2];
 	hwif_chipset_t		chipset;
+
+	u16			max_sectors;	/* if < than the default one */
+
 	u32			host_flags;
 	u8			pio_mask;
 	u8			swdma_mask;
-- 
cgit v1.2.3


From 1f66019bdf902cb59adf959e462bcd3f4c01f683 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:34 +0100
Subject: trm290: add IDE_HFLAG_TRM290 host flag

* Add IDE_HFLAG_TRM290 host flag and use it in ide_build_dmatable().

* Remove no longer needed ide_trm290 chipset type.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-dma-sff.c | 2 +-
 drivers/ide/ide-proc.c    | 1 -
 drivers/ide/trm290.c      | 4 ++--
 include/linux/ide.h       | 4 +++-
 4 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-dma-sff.c b/drivers/ide/ide-dma-sff.c
index cac431f0df17..1f2a5f56f81c 100644
--- a/drivers/ide/ide-dma-sff.c
+++ b/drivers/ide/ide-dma-sff.c
@@ -98,10 +98,10 @@ int ide_build_dmatable(ide_drive_t *drive, struct request *rq)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	__le32 *table = (__le32 *)hwif->dmatable_cpu;
-	unsigned int is_trm290	= (hwif->chipset == ide_trm290) ? 1 : 0;
 	unsigned int count = 0;
 	int i;
 	struct scatterlist *sg;
+	u8 is_trm290 = !!(hwif->host_flags & IDE_HFLAG_TRM290);
 
 	hwif->sg_nents = ide_build_sglist(drive, rq);
 	if (hwif->sg_nents == 0)
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index c2e6b8927bdc..066d2317545b 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -46,7 +46,6 @@ static int proc_ide_read_imodel
 	case ide_qd65xx:	name = "qd65xx";	break;
 	case ide_umc8672:	name = "umc8672";	break;
 	case ide_ht6560b:	name = "ht6560b";	break;
-	case ide_trm290:	name = "trm290";	break;
 	case ide_cy82c693:	name = "cy82c693";	break;
 	case ide_4drives:	name = "4drives";	break;
 	case ide_pmac:		name = "mac-io";	break;
diff --git a/drivers/ide/trm290.c b/drivers/ide/trm290.c
index 75ea61526566..2a5ea90cf8b8 100644
--- a/drivers/ide/trm290.c
+++ b/drivers/ide/trm290.c
@@ -328,10 +328,10 @@ static struct ide_dma_ops trm290_dma_ops = {
 static const struct ide_port_info trm290_chipset __devinitdata = {
 	.name		= DRV_NAME,
 	.init_hwif	= init_hwif_trm290,
-	.chipset	= ide_trm290,
 	.port_ops	= &trm290_port_ops,
 	.dma_ops	= &trm290_dma_ops,
-	.host_flags	= IDE_HFLAG_NO_ATAPI_DMA |
+	.host_flags	= IDE_HFLAG_TRM290 |
+			  IDE_HFLAG_NO_ATAPI_DMA |
 #if 0 /* play it safe for now */
 			  IDE_HFLAG_TRUST_BIOS_FOR_DMA |
 #endif
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 2574dda4a3e7..f62d35a5fb71 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -170,7 +170,7 @@ typedef int (ide_ack_intr_t)(struct hwif_s *);
 enum {		ide_unknown,	ide_generic,	ide_pci,
 		ide_cmd640,	ide_dtc2278,	ide_ali14xx,
 		ide_qd65xx,	ide_umc8672,	ide_ht6560b,
-		ide_trm290,	ide_cy82c693,	ide_4drives,
+		ide_cy82c693,	ide_4drives,
 		ide_pmac,	ide_acorn,
 		ide_au1xxx,	ide_palm3710
 };
@@ -1372,6 +1372,8 @@ enum {
 	IDE_HFLAG_LEGACY_IRQS		= (1 << 21),
 	/* force use of legacy IRQs */
 	IDE_HFLAG_FORCE_LEGACY_IRQS	= (1 << 22),
+	/* host is TRM290 */
+	IDE_HFLAG_TRM290		= (1 << 23),
 	/* use 32-bit I/O ops */
 	IDE_HFLAG_IO_32BIT		= (1 << 24),
 	/* unmask IRQs */
-- 
cgit v1.2.3


From b7876a6fb6e9bf6cbcf7b40cf034aa4138d7978f Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:34 +0100
Subject: cy82c693: remove superfluous ide_cy82c693 chipset type

Since CY82C693 doesn't require serialization we may as well
use the default ide_pci chipset type.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/cy82c693.c | 1 -
 drivers/ide/ide-proc.c | 1 -
 include/linux/ide.h    | 3 +--
 3 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/cy82c693.c b/drivers/ide/cy82c693.c
index 5297f07d2933..d37baf8ecc5f 100644
--- a/drivers/ide/cy82c693.c
+++ b/drivers/ide/cy82c693.c
@@ -292,7 +292,6 @@ static const struct ide_port_info cy82c693_chipset __devinitdata = {
 	.name		= DRV_NAME,
 	.init_iops	= init_iops_cy82c693,
 	.port_ops	= &cy82c693_port_ops,
-	.chipset	= ide_cy82c693,
 	.host_flags	= IDE_HFLAG_SINGLE,
 	.pio_mask	= ATA_PIO4,
 	.swdma_mask	= ATA_SWDMA2,
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index 066d2317545b..a14e2938e4f3 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -46,7 +46,6 @@ static int proc_ide_read_imodel
 	case ide_qd65xx:	name = "qd65xx";	break;
 	case ide_umc8672:	name = "umc8672";	break;
 	case ide_ht6560b:	name = "ht6560b";	break;
-	case ide_cy82c693:	name = "cy82c693";	break;
 	case ide_4drives:	name = "4drives";	break;
 	case ide_pmac:		name = "mac-io";	break;
 	case ide_au1xxx:	name = "au1xxx";	break;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index f62d35a5fb71..f89d6d69a386 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -170,8 +170,7 @@ typedef int (ide_ack_intr_t)(struct hwif_s *);
 enum {		ide_unknown,	ide_generic,	ide_pci,
 		ide_cmd640,	ide_dtc2278,	ide_ali14xx,
 		ide_qd65xx,	ide_umc8672,	ide_ht6560b,
-		ide_cy82c693,	ide_4drives,
-		ide_pmac,	ide_acorn,
+		ide_4drives,	ide_pmac,	ide_acorn,
 		ide_au1xxx,	ide_palm3710
 };
 
-- 
cgit v1.2.3


From 702c026be87ef8374ae58122969a4b0b081ce6f2 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:36 +0100
Subject: ide: rework handling of serialized ports (v2)

* hpt366: set IDE_HFLAG_SERIALIZE in ->host_flags if needed
  in init_hwif_hpt366().  Remove HPT_SERIALIZE_IO while at it.

* Set IDE_HFLAG_SERIALIZE in ->host_flags if needed in
  ide_init_port().

* Convert init_irq() to use IDE_HFLAG_SERIALIZE together with
  hwif->host to find out ports which need to be serialized.

* Remove no longer needed save_match() and ide_hwif_t.serialized.

v2:
* Set host's ->host_flags field instead of port's copy.

This patch should fix the incorrect grouping of port(s) from
host(s) that need serialization with port(s) that happen to use
the same IRQ(s) but are from the host(s) that don't need it.

Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/hpt366.c    |  8 +-------
 drivers/ide/ide-probe.c | 50 +++++--------------------------------------------
 include/linux/ide.h     |  1 -
 3 files changed, 6 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index f5afd46ed51c..b18e10d99d2e 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -135,7 +135,6 @@
 /* various tuning parameters */
 #define HPT_RESET_STATE_ENGINE
 #undef	HPT_DELAY_INTERRUPT
-#define HPT_SERIALIZE_IO	0
 
 static const char *quirk_drives[] = {
 	"QUANTUM FIREBALLlct08 08",
@@ -1288,7 +1287,6 @@ static u8 hpt3xx_cable_detect(ide_hwif_t *hwif)
 static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 {
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
-	int serialize		= HPT_SERIALIZE_IO;
 	u8  chip_type		= info->chip_type;
 
 	/* Cache the channel's MISC. control registers' offset */
@@ -1305,13 +1303,9 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif)
 		 * Clock is shared between the channels,
 		 * so we'll have to serialize them... :-(
 		 */
-		serialize = 1;
+		hwif->host->host_flags |= IDE_HFLAG_SERIALIZE;
 		hwif->rw_disk = &hpt3xxn_rw_disk;
 	}
-
-	/* Serialize access to this device if needed */
-	if (serialize && hwif->mate)
-		hwif->serialized = hwif->mate->serialized = 1;
 }
 
 static int __devinit init_dma_hpt366(ide_hwif_t *hwif,
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index f76c22c45086..c1cd1af2646b 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -863,31 +863,6 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
 	}
 }
 
-/*
- * save_match() is used to simplify logic in init_irq() below.
- *
- * A loophole here is that we may not know about a particular
- * hwif's irq until after that hwif is actually probed/initialized..
- * This could be a problem for the case where an hwif is on a
- * dual interface that requires serialization (eg. cmd640) and another
- * hwif using one of the same irqs is initialized beforehand.
- *
- * This routine detects and reports such situations, but does not fix them.
- */
-static void save_match(ide_hwif_t *hwif, ide_hwif_t *new, ide_hwif_t **match)
-{
-	ide_hwif_t *m = *match;
-
-	if (m && m->hwgroup && m->hwgroup != new->hwgroup) {
-		if (!new->hwgroup)
-			return;
-		printk(KERN_WARNING "%s: potential IRQ problem with %s and %s\n",
-			hwif->name, new->name, m->name);
-	}
-	if (!m || m->irq != hwif->irq) /* don't undo a prior perfect match */
-		*match = new;
-}
-
 /*
  * init request queue
  */
@@ -1052,26 +1027,13 @@ static int init_irq (ide_hwif_t *hwif)
 	mutex_lock(&ide_cfg_mtx);
 	hwif->hwgroup = NULL;
 
-	/*
-	 * Group up with any other hwifs that share our irq(s).
-	 */
 	for (index = 0; index < MAX_HWIFS; index++) {
 		ide_hwif_t *h = ide_ports[index];
 
 		if (h && h->hwgroup) {  /* scan only initialized ports */
-			if (hwif->irq == h->irq) {
-				if (hwif->chipset != ide_pci ||
-				    h->chipset != ide_pci) {
-					save_match(hwif, h, &match);
-				}
-			}
-			if (hwif->serialized) {
-				if (hwif->mate && hwif->mate->irq == h->irq)
-					save_match(hwif, h, &match);
-			}
-			if (h->serialized) {
-				if (h->mate && hwif->irq == h->mate->irq)
-					save_match(hwif, h, &match);
+			if (hwif->host->host_flags & IDE_HFLAG_SERIALIZE) {
+				if (hwif->host == h->host)
+					match = h;
 			}
 		}
 	}
@@ -1437,10 +1399,8 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
 	}
 
 	if ((d->host_flags & IDE_HFLAG_SERIALIZE) ||
-	    ((d->host_flags & IDE_HFLAG_SERIALIZE_DMA) && hwif->dma_base)) {
-		if (hwif->mate)
-			hwif->mate->serialized = hwif->serialized = 1;
-	}
+	    ((d->host_flags & IDE_HFLAG_SERIALIZE_DMA) && hwif->dma_base))
+		hwif->host->host_flags |= IDE_HFLAG_SERIALIZE;
 
 	if (d->max_sectors)
 		hwif->rqsize = d->max_sectors;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index f89d6d69a386..9b89cab6d493 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -839,7 +839,6 @@ typedef struct hwif_s {
 	unsigned	extra_ports;	/* number of extra dma ports */
 
 	unsigned	present    : 1;	/* this interface exists */
-	unsigned	serialized : 1;	/* serialized all channel operation */
 	unsigned	sg_mapped  : 1;	/* sg_table and sg_nents are ready */
 
 	struct device		gendev;
-- 
cgit v1.2.3


From e2984c628c924442132304ae662da433f41c05c9 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Dec 2008 20:27:37 +0100
Subject: ide: move Power Management support to ide-pm.c

There should be no functional changes caused by this patch.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Makefile |   2 +-
 drivers/ide/ide-io.c | 159 ----------------------------------
 drivers/ide/ide-pm.c | 235 +++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/ide/ide.c    |  74 ----------------
 include/linux/ide.h  |   8 ++
 5 files changed, 244 insertions(+), 234 deletions(-)
 create mode 100644 drivers/ide/ide-pm.c

(limited to 'include/linux')

diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index 7818d402b188..bdc7b81bc044 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -5,7 +5,7 @@
 EXTRA_CFLAGS				+= -Idrivers/ide
 
 ide-core-y += ide.o ide-ioctls.o ide-io.o ide-iops.o ide-lib.o ide-probe.o \
-	      ide-taskfile.o ide-park.o ide-pio-blacklist.o
+	      ide-taskfile.o ide-pm.o ide-park.o ide-pio-blacklist.o
 
 # core IDE code
 ide-core-$(CONFIG_IDE_TIMINGS)		+= ide-timings.o
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 26d3f3cacc8a..ecacc008fdaf 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -120,98 +120,6 @@ int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors)
 }
 EXPORT_SYMBOL(ide_end_request);
 
-static void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
-{
-	struct request_pm_state *pm = rq->data;
-
-#ifdef DEBUG_PM
-	printk(KERN_INFO "%s: complete_power_step(step: %d)\n",
-		drive->name, pm->pm_step);
-#endif
-	if (drive->media != ide_disk)
-		return;
-
-	switch (pm->pm_step) {
-	case IDE_PM_FLUSH_CACHE:	/* Suspend step 1 (flush cache) */
-		if (pm->pm_state == PM_EVENT_FREEZE)
-			pm->pm_step = IDE_PM_COMPLETED;
-		else
-			pm->pm_step = IDE_PM_STANDBY;
-		break;
-	case IDE_PM_STANDBY:		/* Suspend step 2 (standby) */
-		pm->pm_step = IDE_PM_COMPLETED;
-		break;
-	case IDE_PM_RESTORE_PIO:	/* Resume step 1 (restore PIO) */
-		pm->pm_step = IDE_PM_IDLE;
-		break;
-	case IDE_PM_IDLE:		/* Resume step 2 (idle)*/
-		pm->pm_step = IDE_PM_RESTORE_DMA;
-		break;
-	}
-}
-
-static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
-{
-	struct request_pm_state *pm = rq->data;
-	ide_task_t *args = rq->special;
-
-	memset(args, 0, sizeof(*args));
-
-	switch (pm->pm_step) {
-	case IDE_PM_FLUSH_CACHE:	/* Suspend step 1 (flush cache) */
-		if (drive->media != ide_disk)
-			break;
-		/* Not supported? Switch to next step now. */
-		if (ata_id_flush_enabled(drive->id) == 0 ||
-		    (drive->dev_flags & IDE_DFLAG_WCACHE) == 0) {
-			ide_complete_power_step(drive, rq);
-			return ide_stopped;
-		}
-		if (ata_id_flush_ext_enabled(drive->id))
-			args->tf.command = ATA_CMD_FLUSH_EXT;
-		else
-			args->tf.command = ATA_CMD_FLUSH;
-		goto out_do_tf;
-	case IDE_PM_STANDBY:		/* Suspend step 2 (standby) */
-		args->tf.command = ATA_CMD_STANDBYNOW1;
-		goto out_do_tf;
-	case IDE_PM_RESTORE_PIO:	/* Resume step 1 (restore PIO) */
-		ide_set_max_pio(drive);
-		/*
-		 * skip IDE_PM_IDLE for ATAPI devices
-		 */
-		if (drive->media != ide_disk)
-			pm->pm_step = IDE_PM_RESTORE_DMA;
-		else
-			ide_complete_power_step(drive, rq);
-		return ide_stopped;
-	case IDE_PM_IDLE:		/* Resume step 2 (idle) */
-		args->tf.command = ATA_CMD_IDLEIMMEDIATE;
-		goto out_do_tf;
-	case IDE_PM_RESTORE_DMA:	/* Resume step 3 (restore DMA) */
-		/*
-		 * Right now, all we do is call ide_set_dma(drive),
-		 * we could be smarter and check for current xfer_speed
-		 * in struct drive etc...
-		 */
-		if (drive->hwif->dma_ops == NULL)
-			break;
-		/*
-		 * TODO: respect IDE_DFLAG_USING_DMA
-		 */
-		ide_set_dma(drive);
-		break;
-	}
-
-	pm->pm_step = IDE_PM_COMPLETED;
-	return ide_stopped;
-
-out_do_tf:
-	args->tf_flags	 = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
-	args->data_phase = TASKFILE_NO_DATA;
-	return do_rw_taskfile(drive, args);
-}
-
 /**
  *	ide_end_dequeued_request	-	complete an IDE I/O
  *	@drive: IDE device for the I/O
@@ -236,39 +144,6 @@ int ide_end_dequeued_request(ide_drive_t *drive, struct request *rq,
 }
 EXPORT_SYMBOL_GPL(ide_end_dequeued_request);
 
-
-/**
- *	ide_complete_pm_request - end the current Power Management request
- *	@drive: target drive
- *	@rq: request
- *
- *	This function cleans up the current PM request and stops the queue
- *	if necessary.
- */
-static void ide_complete_pm_request (ide_drive_t *drive, struct request *rq)
-{
-	struct request_queue *q = drive->queue;
-	unsigned long flags;
-
-#ifdef DEBUG_PM
-	printk("%s: completing PM request, %s\n", drive->name,
-	       blk_pm_suspend_request(rq) ? "suspend" : "resume");
-#endif
-	spin_lock_irqsave(q->queue_lock, flags);
-	if (blk_pm_suspend_request(rq)) {
-		blk_stop_queue(q);
-	} else {
-		drive->dev_flags &= ~IDE_DFLAG_BLOCKED;
-		blk_start_queue(q);
-	}
-	spin_unlock_irqrestore(q->queue_lock, flags);
-
-	drive->hwif->hwgroup->rq = NULL;
-
-	if (blk_end_request(rq, 0, 0))
-		BUG();
-}
-
 /**
  *	ide_end_drive_cmd	-	end an explicit drive command
  *	@drive: command 
@@ -697,40 +572,6 @@ static ide_startstop_t ide_special_rq(ide_drive_t *drive, struct request *rq)
 	}
 }
 
-static void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
-{
-	struct request_pm_state *pm = rq->data;
-
-	if (blk_pm_suspend_request(rq) &&
-	    pm->pm_step == IDE_PM_START_SUSPEND)
-		/* Mark drive blocked when starting the suspend sequence. */
-		drive->dev_flags |= IDE_DFLAG_BLOCKED;
-	else if (blk_pm_resume_request(rq) &&
-		 pm->pm_step == IDE_PM_START_RESUME) {
-		/* 
-		 * The first thing we do on wakeup is to wait for BSY bit to
-		 * go away (with a looong timeout) as a drive on this hwif may
-		 * just be POSTing itself.
-		 * We do that before even selecting as the "other" device on
-		 * the bus may be broken enough to walk on our toes at this
-		 * point.
-		 */
-		ide_hwif_t *hwif = drive->hwif;
-		int rc;
-#ifdef DEBUG_PM
-		printk("%s: Wakeup request inited, waiting for !BSY...\n", drive->name);
-#endif
-		rc = ide_wait_not_busy(hwif, 35000);
-		if (rc)
-			printk(KERN_WARNING "%s: bus not ready on wakeup\n", drive->name);
-		SELECT_DRIVE(drive);
-		hwif->tp_ops->set_irq(hwif, 1);
-		rc = ide_wait_not_busy(hwif, 100000);
-		if (rc)
-			printk(KERN_WARNING "%s: drive not ready on wakeup\n", drive->name);
-	}
-}
-
 /**
  *	start_request	-	start of I/O and command issuing for IDE
  *
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
new file mode 100644
index 000000000000..8282c6086e6a
--- /dev/null
+++ b/drivers/ide/ide-pm.c
@@ -0,0 +1,235 @@
+#include <linux/kernel.h>
+#include <linux/ide.h>
+#include <linux/hdreg.h>
+
+int generic_ide_suspend(struct device *dev, pm_message_t mesg)
+{
+	ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive);
+	ide_hwif_t *hwif = HWIF(drive);
+	struct request *rq;
+	struct request_pm_state rqpm;
+	ide_task_t args;
+	int ret;
+
+	/* call ACPI _GTM only once */
+	if ((drive->dn & 1) == 0 || pair == NULL)
+		ide_acpi_get_timing(hwif);
+
+	memset(&rqpm, 0, sizeof(rqpm));
+	memset(&args, 0, sizeof(args));
+	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq->cmd_type = REQ_TYPE_PM_SUSPEND;
+	rq->special = &args;
+	rq->data = &rqpm;
+	rqpm.pm_step = IDE_PM_START_SUSPEND;
+	if (mesg.event == PM_EVENT_PRETHAW)
+		mesg.event = PM_EVENT_FREEZE;
+	rqpm.pm_state = mesg.event;
+
+	ret = blk_execute_rq(drive->queue, NULL, rq, 0);
+	blk_put_request(rq);
+
+	/* call ACPI _PS3 only after both devices are suspended */
+	if (ret == 0 && ((drive->dn & 1) || pair == NULL))
+		ide_acpi_set_state(hwif, 0);
+
+	return ret;
+}
+
+int generic_ide_resume(struct device *dev)
+{
+	ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive);
+	ide_hwif_t *hwif = HWIF(drive);
+	struct request *rq;
+	struct request_pm_state rqpm;
+	ide_task_t args;
+	int err;
+
+	/* call ACPI _PS0 / _STM only once */
+	if ((drive->dn & 1) == 0 || pair == NULL) {
+		ide_acpi_set_state(hwif, 1);
+		ide_acpi_push_timing(hwif);
+	}
+
+	ide_acpi_exec_tfs(drive);
+
+	memset(&rqpm, 0, sizeof(rqpm));
+	memset(&args, 0, sizeof(args));
+	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
+	rq->cmd_type = REQ_TYPE_PM_RESUME;
+	rq->cmd_flags |= REQ_PREEMPT;
+	rq->special = &args;
+	rq->data = &rqpm;
+	rqpm.pm_step = IDE_PM_START_RESUME;
+	rqpm.pm_state = PM_EVENT_ON;
+
+	err = blk_execute_rq(drive->queue, NULL, rq, 1);
+	blk_put_request(rq);
+
+	if (err == 0 && dev->driver) {
+		ide_driver_t *drv = to_ide_driver(dev->driver);
+
+		if (drv->resume)
+			drv->resume(drive);
+	}
+
+	return err;
+}
+
+void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
+{
+	struct request_pm_state *pm = rq->data;
+
+#ifdef DEBUG_PM
+	printk(KERN_INFO "%s: complete_power_step(step: %d)\n",
+		drive->name, pm->pm_step);
+#endif
+	if (drive->media != ide_disk)
+		return;
+
+	switch (pm->pm_step) {
+	case IDE_PM_FLUSH_CACHE:	/* Suspend step 1 (flush cache) */
+		if (pm->pm_state == PM_EVENT_FREEZE)
+			pm->pm_step = IDE_PM_COMPLETED;
+		else
+			pm->pm_step = IDE_PM_STANDBY;
+		break;
+	case IDE_PM_STANDBY:		/* Suspend step 2 (standby) */
+		pm->pm_step = IDE_PM_COMPLETED;
+		break;
+	case IDE_PM_RESTORE_PIO:	/* Resume step 1 (restore PIO) */
+		pm->pm_step = IDE_PM_IDLE;
+		break;
+	case IDE_PM_IDLE:		/* Resume step 2 (idle)*/
+		pm->pm_step = IDE_PM_RESTORE_DMA;
+		break;
+	}
+}
+
+ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
+{
+	struct request_pm_state *pm = rq->data;
+	ide_task_t *args = rq->special;
+
+	memset(args, 0, sizeof(*args));
+
+	switch (pm->pm_step) {
+	case IDE_PM_FLUSH_CACHE:	/* Suspend step 1 (flush cache) */
+		if (drive->media != ide_disk)
+			break;
+		/* Not supported? Switch to next step now. */
+		if (ata_id_flush_enabled(drive->id) == 0 ||
+		    (drive->dev_flags & IDE_DFLAG_WCACHE) == 0) {
+			ide_complete_power_step(drive, rq);
+			return ide_stopped;
+		}
+		if (ata_id_flush_ext_enabled(drive->id))
+			args->tf.command = ATA_CMD_FLUSH_EXT;
+		else
+			args->tf.command = ATA_CMD_FLUSH;
+		goto out_do_tf;
+	case IDE_PM_STANDBY:		/* Suspend step 2 (standby) */
+		args->tf.command = ATA_CMD_STANDBYNOW1;
+		goto out_do_tf;
+	case IDE_PM_RESTORE_PIO:	/* Resume step 1 (restore PIO) */
+		ide_set_max_pio(drive);
+		/*
+		 * skip IDE_PM_IDLE for ATAPI devices
+		 */
+		if (drive->media != ide_disk)
+			pm->pm_step = IDE_PM_RESTORE_DMA;
+		else
+			ide_complete_power_step(drive, rq);
+		return ide_stopped;
+	case IDE_PM_IDLE:		/* Resume step 2 (idle) */
+		args->tf.command = ATA_CMD_IDLEIMMEDIATE;
+		goto out_do_tf;
+	case IDE_PM_RESTORE_DMA:	/* Resume step 3 (restore DMA) */
+		/*
+		 * Right now, all we do is call ide_set_dma(drive),
+		 * we could be smarter and check for current xfer_speed
+		 * in struct drive etc...
+		 */
+		if (drive->hwif->dma_ops == NULL)
+			break;
+		/*
+		 * TODO: respect IDE_DFLAG_USING_DMA
+		 */
+		ide_set_dma(drive);
+		break;
+	}
+
+	pm->pm_step = IDE_PM_COMPLETED;
+	return ide_stopped;
+
+out_do_tf:
+	args->tf_flags	 = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
+	args->data_phase = TASKFILE_NO_DATA;
+	return do_rw_taskfile(drive, args);
+}
+
+/**
+ *	ide_complete_pm_request - end the current Power Management request
+ *	@drive: target drive
+ *	@rq: request
+ *
+ *	This function cleans up the current PM request and stops the queue
+ *	if necessary.
+ */
+void ide_complete_pm_request(ide_drive_t *drive, struct request *rq)
+{
+	struct request_queue *q = drive->queue;
+	unsigned long flags;
+
+#ifdef DEBUG_PM
+	printk("%s: completing PM request, %s\n", drive->name,
+	       blk_pm_suspend_request(rq) ? "suspend" : "resume");
+#endif
+	spin_lock_irqsave(q->queue_lock, flags);
+	if (blk_pm_suspend_request(rq)) {
+		blk_stop_queue(q);
+	} else {
+		drive->dev_flags &= ~IDE_DFLAG_BLOCKED;
+		blk_start_queue(q);
+	}
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	drive->hwif->hwgroup->rq = NULL;
+
+	if (blk_end_request(rq, 0, 0))
+		BUG();
+}
+
+void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
+{
+	struct request_pm_state *pm = rq->data;
+
+	if (blk_pm_suspend_request(rq) &&
+	    pm->pm_step == IDE_PM_START_SUSPEND)
+		/* Mark drive blocked when starting the suspend sequence. */
+		drive->dev_flags |= IDE_DFLAG_BLOCKED;
+	else if (blk_pm_resume_request(rq) &&
+		 pm->pm_step == IDE_PM_START_RESUME) {
+		/*
+		 * The first thing we do on wakeup is to wait for BSY bit to
+		 * go away (with a looong timeout) as a drive on this hwif may
+		 * just be POSTing itself.
+		 * We do that before even selecting as the "other" device on
+		 * the bus may be broken enough to walk on our toes at this
+		 * point.
+		 */
+		ide_hwif_t *hwif = drive->hwif;
+		int rc;
+#ifdef DEBUG_PM
+		printk("%s: Wakeup request inited, waiting for !BSY...\n", drive->name);
+#endif
+		rc = ide_wait_not_busy(hwif, 35000);
+		if (rc)
+			printk(KERN_WARNING "%s: bus not ready on wakeup\n", drive->name);
+		SELECT_DRIVE(drive);
+		hwif->tp_ops->set_irq(hwif, 1);
+		rc = ide_wait_not_busy(hwif, 100000);
+		if (rc)
+			printk(KERN_WARNING "%s: drive not ready on wakeup\n", drive->name);
+	}
+}
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 41d042053548..f0f09f702e9c 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -388,80 +388,6 @@ ide_ext_devset_rw_sync(unmaskirq, unmaskirq);
 ide_ext_devset_rw_sync(using_dma, using_dma);
 __IDE_DEVSET(pio_mode, DS_SYNC, NULL, set_pio_mode);
 
-static int generic_ide_suspend(struct device *dev, pm_message_t mesg)
-{
-	ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive);
-	ide_hwif_t *hwif = HWIF(drive);
-	struct request *rq;
-	struct request_pm_state rqpm;
-	ide_task_t args;
-	int ret;
-
-	/* call ACPI _GTM only once */
-	if ((drive->dn & 1) == 0 || pair == NULL)
-		ide_acpi_get_timing(hwif);
-
-	memset(&rqpm, 0, sizeof(rqpm));
-	memset(&args, 0, sizeof(args));
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
-	rq->cmd_type = REQ_TYPE_PM_SUSPEND;
-	rq->special = &args;
-	rq->data = &rqpm;
-	rqpm.pm_step = IDE_PM_START_SUSPEND;
-	if (mesg.event == PM_EVENT_PRETHAW)
-		mesg.event = PM_EVENT_FREEZE;
-	rqpm.pm_state = mesg.event;
-
-	ret = blk_execute_rq(drive->queue, NULL, rq, 0);
-	blk_put_request(rq);
-
-	/* call ACPI _PS3 only after both devices are suspended */
-	if (ret == 0 && ((drive->dn & 1) || pair == NULL))
-		ide_acpi_set_state(hwif, 0);
-
-	return ret;
-}
-
-static int generic_ide_resume(struct device *dev)
-{
-	ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive);
-	ide_hwif_t *hwif = HWIF(drive);
-	struct request *rq;
-	struct request_pm_state rqpm;
-	ide_task_t args;
-	int err;
-
-	/* call ACPI _PS0 / _STM only once */
-	if ((drive->dn & 1) == 0 || pair == NULL) {
-		ide_acpi_set_state(hwif, 1);
-		ide_acpi_push_timing(hwif);
-	}
-
-	ide_acpi_exec_tfs(drive);
-
-	memset(&rqpm, 0, sizeof(rqpm));
-	memset(&args, 0, sizeof(args));
-	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
-	rq->cmd_type = REQ_TYPE_PM_RESUME;
-	rq->cmd_flags |= REQ_PREEMPT;
-	rq->special = &args;
-	rq->data = &rqpm;
-	rqpm.pm_step = IDE_PM_START_RESUME;
-	rqpm.pm_state = PM_EVENT_ON;
-
-	err = blk_execute_rq(drive->queue, NULL, rq, 1);
-	blk_put_request(rq);
-
-	if (err == 0 && dev->driver) {
-		ide_driver_t *drv = to_ide_driver(dev->driver);
-
-		if (drv->resume)
-			drv->resume(drive);
-	}
-
-	return err;
-}
-
 /**
  * ide_device_get	-	get an additional reference to a ide_drive_t
  * @drive:	device to get a reference to
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9b89cab6d493..e99c56de7f56 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1116,6 +1116,14 @@ enum {
 	IDE_PM_COMPLETED,
 };
 
+int generic_ide_suspend(struct device *, pm_message_t);
+int generic_ide_resume(struct device *);
+
+void ide_complete_power_step(ide_drive_t *, struct request *);
+ide_startstop_t ide_start_power_step(ide_drive_t *, struct request *);
+void ide_complete_pm_request(ide_drive_t *, struct request *);
+void ide_check_pm_state(ide_drive_t *, struct request *);
+
 /*
  * Subdrivers support.
  *
-- 
cgit v1.2.3


From 69acdf1e5a9146ec6667f6c4b439acd38c18f5ea Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Tue, 4 Nov 2008 21:59:37 -0300
Subject: V4L/DVB (9530): Add new pixel format VYUY 16 bits wide.

There were already 3 YUV formats defined :
 - YUYV
 - YVYU
 - UYVY
The only left combination is VYUY, which is added in this
patch.

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 4669d7e72e75..ec311d4616cd 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -293,6 +293,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_YVU420  v4l2_fourcc('Y', 'V', '1', '2') /* 12  YVU 4:2:0     */
 #define V4L2_PIX_FMT_YUYV    v4l2_fourcc('Y', 'U', 'Y', 'V') /* 16  YUV 4:2:2     */
 #define V4L2_PIX_FMT_UYVY    v4l2_fourcc('U', 'Y', 'V', 'Y') /* 16  YUV 4:2:2     */
+#define V4L2_PIX_FMT_VYUY    v4l2_fourcc('V', 'Y', 'U', 'Y') /* 16  YUV 4:2:2     */
 #define V4L2_PIX_FMT_YUV422P v4l2_fourcc('4', '2', '2', 'P') /* 16  YVU422 planar */
 #define V4L2_PIX_FMT_YUV411P v4l2_fourcc('4', '1', '1', 'P') /* 16  YVU411 planar */
 #define V4L2_PIX_FMT_Y41P    v4l2_fourcc('Y', '4', '1', 'P') /* 12  YUV 4:1:1     */
-- 
cgit v1.2.3


From 278d1ed65e25d80af7c3a112d707b3f70516ddb4 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:12 +1030
Subject: cpumask: make CONFIG_NR_CPUS always valid.

Impact: cleanup

Currently we have NR_CPUS, which is 1 on UP, and CONFIG_NR_CPUS on
SMP.  If we make CONFIG_NR_CPUS always valid (and always 1 on !SMP),
we can skip the middleman.

This also allows us to find and check all the unaudited NR_CPUS usage
as we prepare for v. large NR_CPUS.

To avoid breaking every arch, we cheat and do this for the moment
in the header if the arch doesn't.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
---
 include/linux/threads.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/threads.h b/include/linux/threads.h
index 38d1a5d6568e..052b12bec8bd 100644
--- a/include/linux/threads.h
+++ b/include/linux/threads.h
@@ -8,17 +8,17 @@
  */
 
 /*
- * Maximum supported processors that can run under SMP.  This value is
- * set via configure setting.  The maximum is equal to the size of the
- * bitmasks used on that platform, i.e. 32 or 64.  Setting this smaller
- * saves quite a bit of memory.
+ * Maximum supported processors.  Setting this smaller saves quite a
+ * bit of memory.  Use nr_cpu_ids instead of this except for static bitmaps.
  */
-#ifdef CONFIG_SMP
-#define NR_CPUS		CONFIG_NR_CPUS
-#else
-#define NR_CPUS		1
+#ifndef CONFIG_NR_CPUS
+/* FIXME: This should be fixed in the arch's Kconfig */
+#define CONFIG_NR_CPUS	1
 #endif
 
+/* Places which use this should consider cpumask_var_t. */
+#define NR_CPUS		CONFIG_NR_CPUS
+
 #define MIN_THREADS_LEFT_FOR_ROOT 4
 
 /*
-- 
cgit v1.2.3


From 4b0bc0bca83f3fb7cf920e2ec80684c15d2269c0 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:13 +1030
Subject: bitmap: test for constant as well as small size for inline versions

Impact: reduce text size

bitmap_zero et al have a fastpath for nbits <= BITS_PER_LONG, but this
should really only apply where the nbits is known at compile time.

This only saves about 1200 bytes on an allyesconfig kernel, but with
cpumasks going variable that number will increase.

   text		data	bss	dec		hex	filename
35327852        5035607 6782976 47146435        2cf65c3 vmlinux-before
35326640        5035607 6782976 47145223        2cf6107 vmlinux-after

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/bitmap.h | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index a08c33a26ca9..2878811c6134 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -137,9 +137,12 @@ extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
 		(1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL		\
 )
 
+#define small_const_nbits(nbits) \
+	(__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG)
+
 static inline void bitmap_zero(unsigned long *dst, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = 0UL;
 	else {
 		int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
@@ -150,7 +153,7 @@ static inline void bitmap_zero(unsigned long *dst, int nbits)
 static inline void bitmap_fill(unsigned long *dst, int nbits)
 {
 	size_t nlongs = BITS_TO_LONGS(nbits);
-	if (nlongs > 1) {
+	if (!small_const_nbits(nbits)) {
 		int len = (nlongs - 1) * sizeof(unsigned long);
 		memset(dst, 0xff,  len);
 	}
@@ -160,7 +163,7 @@ static inline void bitmap_fill(unsigned long *dst, int nbits)
 static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
 			int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = *src;
 	else {
 		int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
@@ -171,7 +174,7 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
 static inline void bitmap_and(unsigned long *dst, const unsigned long *src1,
 			const unsigned long *src2, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = *src1 & *src2;
 	else
 		__bitmap_and(dst, src1, src2, nbits);
@@ -180,7 +183,7 @@ static inline void bitmap_and(unsigned long *dst, const unsigned long *src1,
 static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
 			const unsigned long *src2, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = *src1 | *src2;
 	else
 		__bitmap_or(dst, src1, src2, nbits);
@@ -189,7 +192,7 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
 static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
 			const unsigned long *src2, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = *src1 ^ *src2;
 	else
 		__bitmap_xor(dst, src1, src2, nbits);
@@ -198,7 +201,7 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
 static inline void bitmap_andnot(unsigned long *dst, const unsigned long *src1,
 			const unsigned long *src2, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = *src1 & ~(*src2);
 	else
 		__bitmap_andnot(dst, src1, src2, nbits);
@@ -207,7 +210,7 @@ static inline void bitmap_andnot(unsigned long *dst, const unsigned long *src1,
 static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
 			int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
 	else
 		__bitmap_complement(dst, src, nbits);
@@ -216,7 +219,7 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr
 static inline int bitmap_equal(const unsigned long *src1,
 			const unsigned long *src2, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
 	else
 		return __bitmap_equal(src1, src2, nbits);
@@ -225,7 +228,7 @@ static inline int bitmap_equal(const unsigned long *src1,
 static inline int bitmap_intersects(const unsigned long *src1,
 			const unsigned long *src2, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
 	else
 		return __bitmap_intersects(src1, src2, nbits);
@@ -234,7 +237,7 @@ static inline int bitmap_intersects(const unsigned long *src1,
 static inline int bitmap_subset(const unsigned long *src1,
 			const unsigned long *src2, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
 	else
 		return __bitmap_subset(src1, src2, nbits);
@@ -242,7 +245,7 @@ static inline int bitmap_subset(const unsigned long *src1,
 
 static inline int bitmap_empty(const unsigned long *src, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
 	else
 		return __bitmap_empty(src, nbits);
@@ -250,7 +253,7 @@ static inline int bitmap_empty(const unsigned long *src, int nbits)
 
 static inline int bitmap_full(const unsigned long *src, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
 	else
 		return __bitmap_full(src, nbits);
@@ -258,7 +261,7 @@ static inline int bitmap_full(const unsigned long *src, int nbits)
 
 static inline int bitmap_weight(const unsigned long *src, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
 	return __bitmap_weight(src, nbits);
 }
@@ -266,7 +269,7 @@ static inline int bitmap_weight(const unsigned long *src, int nbits)
 static inline void bitmap_shift_right(unsigned long *dst,
 			const unsigned long *src, int n, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = *src >> n;
 	else
 		__bitmap_shift_right(dst, src, n, nbits);
@@ -275,7 +278,7 @@ static inline void bitmap_shift_right(unsigned long *dst,
 static inline void bitmap_shift_left(unsigned long *dst,
 			const unsigned long *src, int n, int nbits)
 {
-	if (nbits <= BITS_PER_LONG)
+	if (small_const_nbits(nbits))
 		*dst = (*src << n) & BITMAP_LAST_WORD_MASK(nbits);
 	else
 		__bitmap_shift_left(dst, src, n, nbits);
-- 
cgit v1.2.3


From cb78a0ce69fad2026825f957e24e2d9cda1ec9f1 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:14 +1030
Subject: bitmap: fix seq_bitmap and seq_cpumask to take const pointer

Impact: cleanup

seq_bitmap just calls bitmap_scnprintf on the bits: that arg can be const.
Similarly, seq_cpumask just calls seq_bitmap.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 fs/seq_file.c            | 3 ++-
 include/linux/seq_file.h | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 16c211558c22..c99358a52176 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -462,7 +462,8 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
 	return -1;
 }
 
-int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits)
+int seq_bitmap(struct seq_file *m, const unsigned long *bits,
+				   unsigned int nr_bits)
 {
 	if (m->count < m->size) {
 		int len = bitmap_scnprintf(m->buf + m->count,
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index b3dfa72f13b9..952e0187ba16 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -50,8 +50,9 @@ int seq_path(struct seq_file *, struct path *, char *);
 int seq_dentry(struct seq_file *, struct dentry *, char *);
 int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
 		  char *esc);
-int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits);
-static inline int seq_cpumask(struct seq_file *m, cpumask_t *mask)
+int seq_bitmap(struct seq_file *m, const unsigned long *bits,
+				   unsigned int nr_bits);
+static inline int seq_cpumask(struct seq_file *m, const struct cpumask *mask)
 {
 	return seq_bitmap(m, mask->bits, NR_CPUS);
 }
-- 
cgit v1.2.3


From b3199c025d1646e25e7d1d640dd605db251dccf8 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:14 +1030
Subject: cpumask: switch over to cpu_online/possible/active/present_mask: core

Impact: cleanup

This implements the obsolescent cpu_online_map in terms of
cpu_online_mask, rather than the other way around.  Same for the other
maps.

The documentation comments are also updated to refer to _mask rather
than _map.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
---
 include/linux/cpumask.h | 75 +++++++++++++++++++------------------------------
 kernel/cpu.c            | 49 +++++++++++++++-----------------
 2 files changed, 52 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index b5ad19a6f43f..db2341beca45 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -416,65 +416,54 @@ int __next_cpu_nr(int n, const cpumask_t *srcp);
 
 /*
  * The following particular system cpumasks and operations manage
- * possible, present, active and online cpus.  Each of them is a fixed size
- * bitmap of size NR_CPUS.
+ * possible, present, active and online cpus.
  *
- *  #ifdef CONFIG_HOTPLUG_CPU
- *     cpu_possible_map - has bit 'cpu' set iff cpu is populatable
- *     cpu_present_map  - has bit 'cpu' set iff cpu is populated
- *     cpu_online_map   - has bit 'cpu' set iff cpu available to scheduler
- *     cpu_active_map   - has bit 'cpu' set iff cpu available to migration
- *  #else
- *     cpu_possible_map - has bit 'cpu' set iff cpu is populated
- *     cpu_present_map  - copy of cpu_possible_map
- *     cpu_online_map   - has bit 'cpu' set iff cpu available to scheduler
- *  #endif
+ *     cpu_possible_mask- has bit 'cpu' set iff cpu is populatable
+ *     cpu_present_mask - has bit 'cpu' set iff cpu is populated
+ *     cpu_online_mask  - has bit 'cpu' set iff cpu available to scheduler
+ *     cpu_active_mask  - has bit 'cpu' set iff cpu available to migration
  *
- *  In either case, NR_CPUS is fixed at compile time, as the static
- *  size of these bitmaps.  The cpu_possible_map is fixed at boot
- *  time, as the set of CPU id's that it is possible might ever
- *  be plugged in at anytime during the life of that system boot.
- *  The cpu_present_map is dynamic(*), representing which CPUs
- *  are currently plugged in.  And cpu_online_map is the dynamic
- *  subset of cpu_present_map, indicating those CPUs available
- *  for scheduling.
+ *  If !CONFIG_HOTPLUG_CPU, present == possible, and active == online.
  *
- *  If HOTPLUG is enabled, then cpu_possible_map is forced to have
+ *  The cpu_possible_mask is fixed at boot time, as the set of CPU id's
+ *  that it is possible might ever be plugged in at anytime during the
+ *  life of that system boot.  The cpu_present_mask is dynamic(*),
+ *  representing which CPUs are currently plugged in.  And
+ *  cpu_online_mask is the dynamic subset of cpu_present_mask,
+ *  indicating those CPUs available for scheduling.
+ *
+ *  If HOTPLUG is enabled, then cpu_possible_mask is forced to have
  *  all NR_CPUS bits set, otherwise it is just the set of CPUs that
  *  ACPI reports present at boot.
  *
- *  If HOTPLUG is enabled, then cpu_present_map varies dynamically,
+ *  If HOTPLUG is enabled, then cpu_present_mask varies dynamically,
  *  depending on what ACPI reports as currently plugged in, otherwise
- *  cpu_present_map is just a copy of cpu_possible_map.
+ *  cpu_present_mask is just a copy of cpu_possible_mask.
  *
- *  (*) Well, cpu_present_map is dynamic in the hotplug case.  If not
- *      hotplug, it's a copy of cpu_possible_map, hence fixed at boot.
+ *  (*) Well, cpu_present_mask is dynamic in the hotplug case.  If not
+ *      hotplug, it's a copy of cpu_possible_mask, hence fixed at boot.
  *
  * Subtleties:
  * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode
  *    assumption that their single CPU is online.  The UP
- *    cpu_{online,possible,present}_maps are placebos.  Changing them
+ *    cpu_{online,possible,present}_masks are placebos.  Changing them
  *    will have no useful affect on the following num_*_cpus()
  *    and cpu_*() macros in the UP case.  This ugliness is a UP
  *    optimization - don't waste any instructions or memory references
  *    asking if you're online or how many CPUs there are if there is
  *    only one CPU.
- * 2) Most SMP arch's #define some of these maps to be some
- *    other map specific to that arch.  Therefore, the following
- *    must be #define macros, not inlines.  To see why, examine
- *    the assembly code produced by the following.  Note that
- *    set1() writes phys_x_map, but set2() writes x_map:
- *        int x_map, phys_x_map;
- *        #define set1(a) x_map = a
- *        inline void set2(int a) { x_map = a; }
- *        #define x_map phys_x_map
- *        main(){ set1(3); set2(5); }
  */
 
-extern cpumask_t cpu_possible_map;
-extern cpumask_t cpu_online_map;
-extern cpumask_t cpu_present_map;
-extern cpumask_t cpu_active_map;
+extern const struct cpumask *const cpu_possible_mask;
+extern const struct cpumask *const cpu_online_mask;
+extern const struct cpumask *const cpu_present_mask;
+extern const struct cpumask *const cpu_active_mask;
+
+/* These strip const, as traditionally they weren't const. */
+#define cpu_possible_map	(*(cpumask_t *)cpu_possible_mask)
+#define cpu_online_map		(*(cpumask_t *)cpu_online_mask)
+#define cpu_present_map		(*(cpumask_t *)cpu_present_mask)
+#define cpu_active_map		(*(cpumask_t *)cpu_active_mask)
 
 #if NR_CPUS > 1
 #define num_online_cpus()	cpus_weight_nr(cpu_online_map)
@@ -1058,12 +1047,6 @@ static inline void free_bootmem_cpumask_var(cpumask_var_t mask)
 }
 #endif /* CONFIG_CPUMASK_OFFSTACK */
 
-/* The pointer versions of the maps, these will become the primary versions. */
-#define cpu_possible_mask ((const struct cpumask *)&cpu_possible_map)
-#define cpu_online_mask ((const struct cpumask *)&cpu_online_map)
-#define cpu_present_mask ((const struct cpumask *)&cpu_present_map)
-#define cpu_active_mask ((const struct cpumask *)&cpu_active_map)
-
 /* It's common to want to use cpu_all_mask in struct member initializers,
  * so it has to refer to an address rather than a pointer. */
 extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index bae131a1211b..3ddc509b19c5 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -15,30 +15,8 @@
 #include <linux/stop_machine.h>
 #include <linux/mutex.h>
 
-/*
- * Represents all cpu's present in the system
- * In systems capable of hotplug, this map could dynamically grow
- * as new cpu's are detected in the system via any platform specific
- * method, such as ACPI for e.g.
- */
-cpumask_t cpu_present_map __read_mostly;
-EXPORT_SYMBOL(cpu_present_map);
-
-/*
- * Represents all cpu's that are currently online.
- */
-cpumask_t cpu_online_map __read_mostly;
-EXPORT_SYMBOL(cpu_online_map);
-
-#ifdef CONFIG_INIT_ALL_POSSIBLE
-cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
-#else
-cpumask_t cpu_possible_map __read_mostly;
-#endif
-EXPORT_SYMBOL(cpu_possible_map);
-
 #ifdef CONFIG_SMP
-/* Serializes the updates to cpu_online_map, cpu_present_map */
+/* Serializes the updates to cpu_online_mask, cpu_present_mask */
 static DEFINE_MUTEX(cpu_add_remove_lock);
 
 static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
@@ -65,8 +43,6 @@ void __init cpu_hotplug_init(void)
 	cpu_hotplug.refcount = 0;
 }
 
-cpumask_t cpu_active_map;
-
 #ifdef CONFIG_HOTPLUG_CPU
 
 void get_online_cpus(void)
@@ -97,7 +73,7 @@ EXPORT_SYMBOL_GPL(put_online_cpus);
 
 /*
  * The following two API's must be used when attempting
- * to serialize the updates to cpu_online_map, cpu_present_map.
+ * to serialize the updates to cpu_online_mask, cpu_present_mask.
  */
 void cpu_maps_update_begin(void)
 {
@@ -503,3 +479,24 @@ EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
 
 const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
 EXPORT_SYMBOL(cpu_all_bits);
+
+#ifdef CONFIG_INIT_ALL_POSSIBLE
+static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly
+	= CPU_BITS_ALL;
+#else
+static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly;
+#endif
+const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits);
+EXPORT_SYMBOL(cpu_possible_mask);
+
+static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly;
+const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits);
+EXPORT_SYMBOL(cpu_online_mask);
+
+static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly;
+const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits);
+EXPORT_SYMBOL(cpu_present_mask);
+
+static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
+const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
+EXPORT_SYMBOL(cpu_active_mask);
-- 
cgit v1.2.3


From ae7a47e72e1a0b5e2b46d1596bc2c22942a73023 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:15 +1030
Subject: cpumask: make cpumask.h eat its own dogfood.

Changes:
1) cpumask_t to struct cpumask,
2) cpus_weight_nr to cpumask_weight,
3) cpu_isset to cpumask_test_cpu,
4) ->bits to cpumask_bits()
5) cpu_*_map to cpu_*_mask.
6) for_each_cpu_mask_nr to for_each_cpu

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/cpumask.h | 75 +++++++++++++++++++++++++------------------------
 1 file changed, 38 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index db2341beca45..e62a67156c53 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -268,6 +268,25 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
 	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
 }
 
+/**
+ * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
+ * @bitmap: the bitmap
+ *
+ * There are a few places where cpumask_var_t isn't appropriate and
+ * static cpumasks must be used (eg. very early boot), yet we don't
+ * expose the definition of 'struct cpumask'.
+ *
+ * This does the conversion, and can be used as a constant initializer.
+ */
+#define to_cpumask(bitmap)						\
+	((struct cpumask *)(1 ? (bitmap)				\
+			    : (void *)sizeof(__check_is_bitmap(bitmap))))
+
+static inline int __check_is_bitmap(const unsigned long *bitmap)
+{
+	return 1;
+}
+
 /*
  * Special-case data structure for "single bit set only" constant CPU masks.
  *
@@ -278,11 +297,11 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
 extern const unsigned long
 	cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
 
-static inline const cpumask_t *get_cpu_mask(unsigned int cpu)
+static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
 {
 	const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
 	p -= cpu / BITS_PER_LONG;
-	return (const cpumask_t *)p;
+	return to_cpumask(p);
 }
 
 /*
@@ -466,13 +485,13 @@ extern const struct cpumask *const cpu_active_mask;
 #define cpu_active_map		(*(cpumask_t *)cpu_active_mask)
 
 #if NR_CPUS > 1
-#define num_online_cpus()	cpus_weight_nr(cpu_online_map)
-#define num_possible_cpus()	cpus_weight_nr(cpu_possible_map)
-#define num_present_cpus()	cpus_weight_nr(cpu_present_map)
-#define cpu_online(cpu)		cpu_isset((cpu), cpu_online_map)
-#define cpu_possible(cpu)	cpu_isset((cpu), cpu_possible_map)
-#define cpu_present(cpu)	cpu_isset((cpu), cpu_present_map)
-#define cpu_active(cpu)		cpu_isset((cpu), cpu_active_map)
+#define num_online_cpus()	cpumask_weight(cpu_online_mask)
+#define num_possible_cpus()	cpumask_weight(cpu_possible_mask)
+#define num_present_cpus()	cpumask_weight(cpu_present_mask)
+#define cpu_online(cpu)		cpumask_test_cpu((cpu), cpu_online_mask)
+#define cpu_possible(cpu)	cpumask_test_cpu((cpu), cpu_possible_mask)
+#define cpu_present(cpu)	cpumask_test_cpu((cpu), cpu_present_mask)
+#define cpu_active(cpu)		cpumask_test_cpu((cpu), cpu_active_mask)
 #else
 #define num_online_cpus()	1
 #define num_possible_cpus()	1
@@ -485,10 +504,6 @@ extern const struct cpumask *const cpu_active_mask;
 
 #define cpu_is_offline(cpu)	unlikely(!cpu_online(cpu))
 
-#define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_possible_map)
-#define for_each_online_cpu(cpu)   for_each_cpu_mask_nr((cpu), cpu_online_map)
-#define for_each_present_cpu(cpu)  for_each_cpu_mask_nr((cpu), cpu_present_map)
-
 /* These are the new versions of the cpumask operators: passed by pointer.
  * The older versions will be implemented in terms of these, then deleted. */
 #define cpumask_bits(maskp) ((maskp)->bits)
@@ -676,7 +691,7 @@ static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp)
  * No static inline type checking - see Subtlety (1) above.
  */
 #define cpumask_test_cpu(cpu, cpumask) \
-	test_bit(cpumask_check(cpu), (cpumask)->bits)
+	test_bit(cpumask_check(cpu), cpumask_bits((cpumask)))
 
 /**
  * cpumask_test_and_set_cpu - atomically test and set a cpu in a cpumask
@@ -919,7 +934,7 @@ static inline void cpumask_copy(struct cpumask *dstp,
 static inline int cpumask_scnprintf(char *buf, int len,
 				    const struct cpumask *srcp)
 {
-	return bitmap_scnprintf(buf, len, srcp->bits, nr_cpumask_bits);
+	return bitmap_scnprintf(buf, len, cpumask_bits(srcp), nr_cpumask_bits);
 }
 
 /**
@@ -933,7 +948,7 @@ static inline int cpumask_scnprintf(char *buf, int len,
 static inline int cpumask_parse_user(const char __user *buf, int len,
 				     struct cpumask *dstp)
 {
-	return bitmap_parse_user(buf, len, dstp->bits, nr_cpumask_bits);
+	return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
 }
 
 /**
@@ -948,7 +963,8 @@ static inline int cpumask_parse_user(const char __user *buf, int len,
 static inline int cpulist_scnprintf(char *buf, int len,
 				    const struct cpumask *srcp)
 {
-	return bitmap_scnlistprintf(buf, len, srcp->bits, nr_cpumask_bits);
+	return bitmap_scnlistprintf(buf, len, cpumask_bits(srcp),
+				    nr_cpumask_bits);
 }
 
 /**
@@ -961,26 +977,7 @@ static inline int cpulist_scnprintf(char *buf, int len,
  */
 static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
 {
-	return bitmap_parselist(buf, dstp->bits, nr_cpumask_bits);
-}
-
-/**
- * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
- * @bitmap: the bitmap
- *
- * There are a few places where cpumask_var_t isn't appropriate and
- * static cpumasks must be used (eg. very early boot), yet we don't
- * expose the definition of 'struct cpumask'.
- *
- * This does the conversion, and can be used as a constant initializer.
- */
-#define to_cpumask(bitmap)						\
-	((struct cpumask *)(1 ? (bitmap)				\
-			    : (void *)sizeof(__check_is_bitmap(bitmap))))
-
-static inline int __check_is_bitmap(const unsigned long *bitmap)
-{
-	return 1;
+	return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits);
 }
 
 /**
@@ -1055,6 +1052,10 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
 /* First bits of cpu_bit_bitmap are in fact unset. */
 #define cpu_none_mask to_cpumask(cpu_bit_bitmap[0])
 
+#define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)
+#define for_each_online_cpu(cpu)   for_each_cpu((cpu), cpu_online_mask)
+#define for_each_present_cpu(cpu)  for_each_cpu((cpu), cpu_present_mask)
+
 /* Wrappers for arch boot code to manipulate normally-constant masks */
 static inline void set_cpu_possible(unsigned int cpu, bool possible)
 {
-- 
cgit v1.2.3


From 3fa41520696fec2815e2d88fbcccdda77ba4d693 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:16 +1030
Subject: cpumask: make set_cpu_*/init_cpu_* out-of-line

They're only for use in boot/cpu hotplug code anyway, and this avoids
the use of deprecated cpu_*_map.

Stephen Rothwell points out that gcc 4.2.4 (on powerpc at least)
didn't like the cast away of const anyway:

  include/linux/cpumask.h: In function 'set_cpu_possible':
  include/linux/cpumask.h:1052: warning: passing argument 2 of 'cpumask_set_cpu' discards qualifiers from pointer target type

So this kills two birds with one stone.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/cpumask.h | 53 +++++++------------------------------------------
 kernel/cpu.c            | 47 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index e62a67156c53..7c178a6baae3 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -1057,50 +1057,11 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
 #define for_each_present_cpu(cpu)  for_each_cpu((cpu), cpu_present_mask)
 
 /* Wrappers for arch boot code to manipulate normally-constant masks */
-static inline void set_cpu_possible(unsigned int cpu, bool possible)
-{
-	if (possible)
-		cpumask_set_cpu(cpu, &cpu_possible_map);
-	else
-		cpumask_clear_cpu(cpu, &cpu_possible_map);
-}
-
-static inline void set_cpu_present(unsigned int cpu, bool present)
-{
-	if (present)
-		cpumask_set_cpu(cpu, &cpu_present_map);
-	else
-		cpumask_clear_cpu(cpu, &cpu_present_map);
-}
-
-static inline void set_cpu_online(unsigned int cpu, bool online)
-{
-	if (online)
-		cpumask_set_cpu(cpu, &cpu_online_map);
-	else
-		cpumask_clear_cpu(cpu, &cpu_online_map);
-}
-
-static inline void set_cpu_active(unsigned int cpu, bool active)
-{
-	if (active)
-		cpumask_set_cpu(cpu, &cpu_active_map);
-	else
-		cpumask_clear_cpu(cpu, &cpu_active_map);
-}
-
-static inline void init_cpu_present(const struct cpumask *src)
-{
-	cpumask_copy(&cpu_present_map, src);
-}
-
-static inline void init_cpu_possible(const struct cpumask *src)
-{
-	cpumask_copy(&cpu_possible_map, src);
-}
-
-static inline void init_cpu_online(const struct cpumask *src)
-{
-	cpumask_copy(&cpu_online_map, src);
-}
+void set_cpu_possible(unsigned int cpu, bool possible);
+void set_cpu_present(unsigned int cpu, bool present);
+void set_cpu_online(unsigned int cpu, bool online);
+void set_cpu_active(unsigned int cpu, bool active);
+void init_cpu_present(const struct cpumask *src);
+void init_cpu_possible(const struct cpumask *src);
+void init_cpu_online(const struct cpumask *src);
 #endif /* __LINUX_CPUMASK_H */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3ddc509b19c5..2c9f78f3a2fc 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -500,3 +500,50 @@ EXPORT_SYMBOL(cpu_present_mask);
 static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
 const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
 EXPORT_SYMBOL(cpu_active_mask);
+
+void set_cpu_possible(unsigned int cpu, bool possible)
+{
+	if (possible)
+		cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits));
+	else
+		cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits));
+}
+
+void set_cpu_present(unsigned int cpu, bool present)
+{
+	if (present)
+		cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits));
+	else
+		cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits));
+}
+
+void set_cpu_online(unsigned int cpu, bool online)
+{
+	if (online)
+		cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
+	else
+		cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
+}
+
+void set_cpu_active(unsigned int cpu, bool active)
+{
+	if (active)
+		cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
+	else
+		cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits));
+}
+
+void init_cpu_present(const struct cpumask *src)
+{
+	cpumask_copy(to_cpumask(cpu_present_bits), src);
+}
+
+void init_cpu_possible(const struct cpumask *src)
+{
+	cpumask_copy(to_cpumask(cpu_possible_bits), src);
+}
+
+void init_cpu_online(const struct cpumask *src)
+{
+	cpumask_copy(to_cpumask(cpu_online_bits), src);
+}
-- 
cgit v1.2.3


From 54b11e6d57a10aa9d0009efd93873e17bffd5d30 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:16 +1030
Subject: cpumask: smp_call_function_many()

Impact: Implementation change to remove cpumask_t from stack.

Actually change smp_call_function_mask() to smp_call_function_many().
We avoid cpumasks on the stack in this version.

(S390 has its own version, but that's going away apparently).

We have to do some dancing to figure out if 0 or 1 other cpus are in
the mask supplied and the online mask without allocating a tmp
cpumask.  It's still fairly cheap.

We allocate the cpumask at the end of the call_function_data
structure: if allocation fails we fallback to smp_call_function_single
rather than using the baroque quiescing code (which needs a cpumask on
stack).

(Thanks to Hiroshi Shimamoto for spotting several bugs in previous versions!)

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: npiggin@suse.de
Cc: axboe@kernel.dk
---
 include/linux/smp.h |  15 +++---
 kernel/smp.c        | 139 ++++++++++++++++++----------------------------------
 2 files changed, 57 insertions(+), 97 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 2f85f3b04bc4..b82466968101 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -67,15 +67,16 @@ extern void smp_cpus_done(unsigned int max_cpus);
  * Call a function on all other processors
  */
 int smp_call_function(void(*func)(void *info), void *info, int wait);
-/* Deprecated: use smp_call_function_many() which uses a cpumask ptr. */
-int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info,
-				int wait);
+void smp_call_function_many(const struct cpumask *mask,
+			    void (*func)(void *info), void *info, bool wait);
 
-static inline void smp_call_function_many(const struct cpumask *mask,
-					  void (*func)(void *info), void *info,
-					  int wait)
+/* Deprecated: Use smp_call_function_many which takes a pointer to the mask. */
+static inline int
+smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info,
+		       int wait)
 {
-	smp_call_function_mask(*mask, func, info, wait);
+	smp_call_function_many(&mask, func, info, wait);
+	return 0;
 }
 
 int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
diff --git a/kernel/smp.c b/kernel/smp.c
index 75c8dde58c55..9f0eafed1399 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -24,8 +24,8 @@ struct call_function_data {
 	struct call_single_data csd;
 	spinlock_t lock;
 	unsigned int refs;
-	cpumask_t cpumask;
 	struct rcu_head rcu_head;
+	unsigned long cpumask_bits[];
 };
 
 struct call_single_queue {
@@ -110,13 +110,13 @@ void generic_smp_call_function_interrupt(void)
 	list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
 		int refs;
 
-		if (!cpu_isset(cpu, data->cpumask))
+		if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits)))
 			continue;
 
 		data->csd.func(data->csd.info);
 
 		spin_lock(&data->lock);
-		cpu_clear(cpu, data->cpumask);
+		cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits));
 		WARN_ON(data->refs == 0);
 		data->refs--;
 		refs = data->refs;
@@ -266,51 +266,13 @@ void __smp_call_function_single(int cpu, struct call_single_data *data)
 	generic_exec_single(cpu, data);
 }
 
-/* Dummy function */
-static void quiesce_dummy(void *unused)
-{
-}
-
-/*
- * Ensure stack based data used in call function mask is safe to free.
- *
- * This is needed by smp_call_function_mask when using on-stack data, because
- * a single call function queue is shared by all CPUs, and any CPU may pick up
- * the data item on the queue at any time before it is deleted. So we need to
- * ensure that all CPUs have transitioned through a quiescent state after
- * this call.
- *
- * This is a very slow function, implemented by sending synchronous IPIs to
- * all possible CPUs. For this reason, we have to alloc data rather than use
- * stack based data even in the case of synchronous calls. The stack based
- * data is then just used for deadlock/oom fallback which will be very rare.
- *
- * If a faster scheme can be made, we could go back to preferring stack based
- * data -- the data allocation/free is non-zero cost.
- */
-static void smp_call_function_mask_quiesce_stack(cpumask_t mask)
-{
-	struct call_single_data data;
-	int cpu;
-
-	data.func = quiesce_dummy;
-	data.info = NULL;
-
-	for_each_cpu_mask(cpu, mask) {
-		data.flags = CSD_FLAG_WAIT;
-		generic_exec_single(cpu, &data);
-	}
-}
-
 /**
- * smp_call_function_mask(): Run a function on a set of other CPUs.
- * @mask: The set of cpus to run on.
+ * smp_call_function_many(): Run a function on a set of other CPUs.
+ * @mask: The set of cpus to run on (only runs on online subset).
  * @func: The function to run. This must be fast and non-blocking.
  * @info: An arbitrary pointer to pass to the function.
  * @wait: If true, wait (atomically) until function has completed on other CPUs.
  *
- * Returns 0 on success, else a negative status code.
- *
  * If @wait is true, then returns once @func has returned. Note that @wait
  * will be implicitly turned on in case of allocation failures, since
  * we fall back to on-stack allocation.
@@ -319,53 +281,57 @@ static void smp_call_function_mask_quiesce_stack(cpumask_t mask)
  * hardware interrupt handler or from a bottom half handler. Preemption
  * must be disabled when calling this function.
  */
-int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
-			   int wait)
+void smp_call_function_many(const struct cpumask *mask,
+			    void (*func)(void *), void *info,
+			    bool wait)
 {
-	struct call_function_data d;
-	struct call_function_data *data = NULL;
-	cpumask_t allbutself;
+	struct call_function_data *data;
 	unsigned long flags;
-	int cpu, num_cpus;
-	int slowpath = 0;
+	int cpu, next_cpu;
 
 	/* Can deadlock when called with interrupts disabled */
 	WARN_ON(irqs_disabled());
 
-	cpu = smp_processor_id();
-	allbutself = cpu_online_map;
-	cpu_clear(cpu, allbutself);
-	cpus_and(mask, mask, allbutself);
-	num_cpus = cpus_weight(mask);
-
-	/*
-	 * If zero CPUs, return. If just a single CPU, turn this request
-	 * into a targetted single call instead since it's faster.
-	 */
-	if (!num_cpus)
-		return 0;
-	else if (num_cpus == 1) {
-		cpu = first_cpu(mask);
-		return smp_call_function_single(cpu, func, info, wait);
+	/* So, what's a CPU they want?  Ignoring this one. */
+	cpu = cpumask_first_and(mask, cpu_online_mask);
+	if (cpu == smp_processor_id())
+		cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
+	/* No online cpus?  We're done. */
+	if (cpu >= nr_cpu_ids)
+		return;
+
+	/* Do we have another CPU which isn't us? */
+	next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
+	if (next_cpu == smp_processor_id())
+		next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
+
+	/* Fastpath: do that cpu by itself. */
+	if (next_cpu >= nr_cpu_ids) {
+		smp_call_function_single(cpu, func, info, wait);
+		return;
 	}
 
-	data = kmalloc(sizeof(*data), GFP_ATOMIC);
-	if (data) {
-		data->csd.flags = CSD_FLAG_ALLOC;
-		if (wait)
-			data->csd.flags |= CSD_FLAG_WAIT;
-	} else {
-		data = &d;
-		data->csd.flags = CSD_FLAG_WAIT;
-		wait = 1;
-		slowpath = 1;
+	data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC);
+	if (unlikely(!data)) {
+		/* Slow path. */
+		for_each_online_cpu(cpu) {
+			if (cpu == smp_processor_id())
+				continue;
+			if (cpumask_test_cpu(cpu, mask))
+				smp_call_function_single(cpu, func, info, wait);
+		}
+		return;
 	}
 
 	spin_lock_init(&data->lock);
+	data->csd.flags = CSD_FLAG_ALLOC;
+	if (wait)
+		data->csd.flags |= CSD_FLAG_WAIT;
 	data->csd.func = func;
 	data->csd.info = info;
-	data->refs = num_cpus;
-	data->cpumask = mask;
+	cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits));
+	data->refs = cpumask_weight(to_cpumask(data->cpumask_bits));
 
 	spin_lock_irqsave(&call_function_lock, flags);
 	list_add_tail_rcu(&data->csd.list, &call_function_queue);
@@ -377,18 +343,13 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
 	smp_mb();
 
 	/* Send a message to all CPUs in the map */
-	arch_send_call_function_ipi(mask);
+	arch_send_call_function_ipi(*to_cpumask(data->cpumask_bits));
 
 	/* optionally wait for the CPUs to complete */
-	if (wait) {
+	if (wait)
 		csd_flag_wait(&data->csd);
-		if (unlikely(slowpath))
-			smp_call_function_mask_quiesce_stack(mask);
-	}
-
-	return 0;
 }
-EXPORT_SYMBOL(smp_call_function_mask);
+EXPORT_SYMBOL(smp_call_function_many);
 
 /**
  * smp_call_function(): Run a function on all other CPUs.
@@ -396,7 +357,7 @@ EXPORT_SYMBOL(smp_call_function_mask);
  * @info: An arbitrary pointer to pass to the function.
  * @wait: If true, wait (atomically) until function has completed on other CPUs.
  *
- * Returns 0 on success, else a negative status code.
+ * Returns 0.
  *
  * If @wait is true, then returns once @func has returned; otherwise
  * it returns just before the target cpu calls @func. In case of allocation
@@ -407,12 +368,10 @@ EXPORT_SYMBOL(smp_call_function_mask);
  */
 int smp_call_function(void (*func)(void *), void *info, int wait)
 {
-	int ret;
-
 	preempt_disable();
-	ret = smp_call_function_mask(cpu_online_map, func, info, wait);
+	smp_call_function_many(cpu_online_mask, func, info, wait);
 	preempt_enable();
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(smp_call_function);
 
-- 
cgit v1.2.3


From e12f0102ac81d660c9f801d0a0e10ccf4537a9de Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:05:19 +1030
Subject: cpumask: Use nr_cpu_ids in seq_cpumask

Impact: cleanup, futureproof

nr_cpu_ids is the (badly named) runtime limit on possible CPU numbers;
ie. the variable version of NR_CPUS.

With the new cpumask operators, only bits less than this are defined.
So we should use it everywhere, rather than NR_CPUS.  Eventually this
will make it possible to allocate cpumasks of the minimal length at runtime.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/seq_file.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 952e0187ba16..40ea5058c2ec 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -54,7 +54,7 @@ int seq_bitmap(struct seq_file *m, const unsigned long *bits,
 				   unsigned int nr_bits);
 static inline int seq_cpumask(struct seq_file *m, const struct cpumask *mask)
 {
-	return seq_bitmap(m, mask->bits, NR_CPUS);
+	return seq_bitmap(m, mask->bits, nr_cpu_ids);
 }
 
 static inline int seq_nodemask(struct seq_file *m, nodemask_t *mask)
-- 
cgit v1.2.3


From 480daab42c4dd74b3c07031ddf9031251c530c77 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:25:56 -0600
Subject: virtio: Don't use PAGE_SIZE in virtio_pci.c

The virtio PCI devices don't depend on the guest page size.  This matters
now PowerPC virtio is gaining ground (they like 64k pages).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_pci.c | 2 +-
 include/linux/virtio_pci.h  | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index e37d686edfe0..c2cd69ba0a8c 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -245,7 +245,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 	}
 
 	/* activate the queue */
-	iowrite32(virt_to_phys(info->queue) >> PAGE_SHIFT,
+	iowrite32(virt_to_phys(info->queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT,
 		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	/* create the vring */
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index cdef35742932..e13d7ebcf576 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -53,4 +53,8 @@
 
 /* Virtio ABI version, this must match exactly */
 #define VIRTIO_PCI_ABI_VERSION		0
+
+/* How many bits to shift physical queue address written to QUEUE_PFN.
+ * 12 is historical, and due to x86 page size. */
+#define VIRTIO_PCI_QUEUE_ADDR_SHIFT	12
 #endif
-- 
cgit v1.2.3


From 5f0d1d7f2286c8a02dab69f5f0bd51681fab161e Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:25:57 -0600
Subject: virtio: rename 'pagesize' arg to vring_init/vring_size

It's really the alignment desired for consumer/producer separation;
historically this x86 pagesize, but with PowerPC it'll still be x86
pagesize.  And in theory lguest could choose a different value.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio_ring.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index c4a598fb3826..01bf3124e312 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -83,7 +83,7 @@ struct vring {
  *	__u16 avail_idx;
  *	__u16 available[num];
  *
- *	// Padding to the next page boundary.
+ *	// Padding to the next align boundary.
  *	char pad[];
  *
  *	// A ring of used descriptor heads with free-running index.
@@ -93,19 +93,19 @@ struct vring {
  * };
  */
 static inline void vring_init(struct vring *vr, unsigned int num, void *p,
-			      unsigned long pagesize)
+			      unsigned long align)
 {
 	vr->num = num;
 	vr->desc = p;
 	vr->avail = p + num*sizeof(struct vring_desc);
-	vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + pagesize-1)
-			    & ~(pagesize - 1));
+	vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + align-1)
+			    & ~(align - 1));
 }
 
-static inline unsigned vring_size(unsigned int num, unsigned long pagesize)
+static inline unsigned vring_size(unsigned int num, unsigned long align)
 {
 	return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num)
-		 + pagesize - 1) & ~(pagesize - 1))
+		 + align - 1) & ~(align - 1))
 		+ sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num;
 }
 
-- 
cgit v1.2.3


From 498af14783935af487d17dbee4ac451783cbc2b7 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:25:57 -0600
Subject: virtio: Don't use PAGE_SIZE for vring alignment in virtio_pci.

That doesn't work for non-4k guests which are now appearing.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_pci.c | 4 ++--
 include/linux/virtio_pci.h  | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index c2cd69ba0a8c..f28643f3a4e8 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -237,7 +237,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 	info->queue_index = index;
 	info->num = num;
 
-	size = PAGE_ALIGN(vring_size(num, PAGE_SIZE));
+	size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
 	info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO);
 	if (info->queue == NULL) {
 		err = -ENOMEM;
@@ -290,7 +290,7 @@ static void vp_del_vq(struct virtqueue *vq)
 	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
 	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
-	size = PAGE_ALIGN(vring_size(info->num, PAGE_SIZE));
+	size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN));
 	free_pages_exact(info->queue, size);
 	kfree(info);
 }
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index e13d7ebcf576..cd0fd5d181a6 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -57,4 +57,8 @@
 /* How many bits to shift physical queue address written to QUEUE_PFN.
  * 12 is historical, and due to x86 page size. */
 #define VIRTIO_PCI_QUEUE_ADDR_SHIFT	12
+
+/* The alignment to use between consumer and producer parts of vring.
+ * x86 pagesize again. */
+#define VIRTIO_PCI_VRING_ALIGN		4096
 #endif
-- 
cgit v1.2.3


From 2966af73e70dee461c256b5eb877b2ff757f8c82 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:25:58 -0600
Subject: virtio: use LGUEST_VRING_ALIGN instead of relying on pagesize

This doesn't really matter, since lguest is i386 only at the moment,
but we could actually choose a different value.  (lguest doesn't have
a guarenteed ABI).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 Documentation/lguest/lguest.c   | 6 +++---
 drivers/lguest/lguest_device.c  | 2 +-
 include/linux/lguest_launcher.h | 4 ++++
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 804520633fcf..aa2574ca94c7 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -1030,7 +1030,7 @@ static void update_device_status(struct device *dev)
 		/* Zero out the virtqueues. */
 		for (vq = dev->vq; vq; vq = vq->next) {
 			memset(vq->vring.desc, 0,
-			       vring_size(vq->config.num, getpagesize()));
+			       vring_size(vq->config.num, LGUEST_VRING_ALIGN));
 			lg_last_avail(vq) = 0;
 		}
 	} else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
@@ -1211,7 +1211,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
 	void *p;
 
 	/* First we need some memory for this virtqueue. */
-	pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1)
+	pages = (vring_size(num_descs, LGUEST_VRING_ALIGN) + getpagesize() - 1)
 		/ getpagesize();
 	p = get_pages(pages);
 
@@ -1228,7 +1228,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
 	vq->config.pfn = to_guest_phys(p) / getpagesize();
 
 	/* Initialize the vring. */
-	vring_init(&vq->vring, num_descs, p, getpagesize());
+	vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN);
 
 	/* Append virtqueue to this device's descriptor.  We use
 	 * device_config() to get the end of the device's current virtqueues;
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index a661bbdae3d6..f062dc55c573 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -250,7 +250,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 	/* Figure out how many pages the ring will take, and map that memory */
 	lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT,
 				DIV_ROUND_UP(vring_size(lvq->config.num,
-							PAGE_SIZE),
+							LGUEST_VRING_ALIGN),
 					     PAGE_SIZE));
 	if (!lvq->pages) {
 		err = -ENOMEM;
diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h
index e7217dc58f39..bd0eba760522 100644
--- a/include/linux/lguest_launcher.h
+++ b/include/linux/lguest_launcher.h
@@ -59,4 +59,8 @@ enum lguest_req
 	LHREQ_IRQ, /* + irq */
 	LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */
 };
+
+/* The alignment to use between consumer and producer parts of vring.
+ * x86 pagesize for historical reasons. */
+#define LGUEST_VRING_ALIGN	4096
 #endif /* _LINUX_LGUEST_LAUNCHER */
-- 
cgit v1.2.3


From 87c7d57c17ade5024d95b6ca0da249da49b0672a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 30 Dec 2008 09:26:03 -0600
Subject: virtio: hand virtio ring alignment as argument to vring_new_virtqueue

This allows each virtio user to hand in the alignment appropriate to
their virtio_ring structures.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 drivers/lguest/lguest_device.c | 4 ++--
 drivers/s390/kvm/kvm_virtio.c  | 3 ++-
 drivers/virtio/virtio_pci.c    | 4 ++--
 drivers/virtio/virtio_ring.c   | 3 ++-
 include/linux/virtio_ring.h    | 1 +
 5 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index f062dc55c573..b02f6bcb64c4 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -259,8 +259,8 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 
 	/* OK, tell virtio_ring.c to set up a virtqueue now we know its size
 	 * and we've got a pointer to its pages. */
-	vq = vring_new_virtqueue(lvq->config.num, vdev, lvq->pages,
-				 lg_notify, callback);
+	vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN,
+				 vdev, lvq->pages, lg_notify, callback);
 	if (!vq) {
 		err = -ENOMEM;
 		goto unmap;
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index f5a2dbe75575..4e8354aa8576 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -193,7 +193,8 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
 	if (err)
 		goto out;
 
-	vq = vring_new_virtqueue(config->num, vdev, (void *) config->address,
+	vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN,
+				 vdev, (void *) config->address,
 				 kvm_notify, callback);
 	if (!vq) {
 		err = -ENOMEM;
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index f28643f3a4e8..7462a51e820b 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -249,8 +249,8 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index,
 		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	/* create the vring */
-	vq = vring_new_virtqueue(info->num, vdev, info->queue,
-				 vp_notify, callback);
+	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN,
+				 vdev, info->queue, vp_notify, callback);
 	if (!vq) {
 		err = -ENOMEM;
 		goto out_activate_queue;
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 6eb5303fed11..5777196bf6c9 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -274,6 +274,7 @@ static struct virtqueue_ops vring_vq_ops = {
 };
 
 struct virtqueue *vring_new_virtqueue(unsigned int num,
+				      unsigned int vring_align,
 				      struct virtio_device *vdev,
 				      void *pages,
 				      void (*notify)(struct virtqueue *),
@@ -292,7 +293,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 	if (!vq)
 		return NULL;
 
-	vring_init(&vq->vring, num, pages, PAGE_SIZE);
+	vring_init(&vq->vring, num, pages, vring_align);
 	vq->vq.callback = callback;
 	vq->vq.vdev = vdev;
 	vq->vq.vq_ops = &vring_vq_ops;
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index 01bf3124e312..71e03722fb59 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -115,6 +115,7 @@ struct virtio_device;
 struct virtqueue;
 
 struct virtqueue *vring_new_virtqueue(unsigned int num,
+				      unsigned int vring_align,
 				      struct virtio_device *vdev,
 				      void *pages,
 				      void (*notify)(struct virtqueue *vq),
-- 
cgit v1.2.3


From 1b4aa2faeca1b9922033daf2475b6fc13b0ffea6 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Thu, 13 Nov 2008 15:48:33 -0600
Subject: virtio: avoid implicit use of Linux page size in balloon interface

Make the balloon interface always use 4K pages, and convert Linux pfns if
necessary. This patch assumes that Linux's PAGE_SHIFT will never be less than
12.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (modified)
---
 drivers/virtio/virtio_balloon.c | 13 +++++++++++--
 include/linux/virtio_balloon.h  |  3 +++
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 62eab43152d2..59268266b79a 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -56,6 +56,15 @@ static struct virtio_device_id id_table[] = {
 	{ 0 },
 };
 
+static u32 page_to_balloon_pfn(struct page *page)
+{
+	unsigned long pfn = page_to_pfn(page);
+
+	BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT);
+	/* Convert pfn from Linux page size to balloon page size. */
+	return pfn >> (PAGE_SHIFT - VIRTIO_BALLOON_PFN_SHIFT);
+}
+
 static void balloon_ack(struct virtqueue *vq)
 {
 	struct virtio_balloon *vb;
@@ -99,7 +108,7 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
 			msleep(200);
 			break;
 		}
-		vb->pfns[vb->num_pfns] = page_to_pfn(page);
+		vb->pfns[vb->num_pfns] = page_to_balloon_pfn(page);
 		totalram_pages--;
 		vb->num_pages++;
 		list_add(&page->lru, &vb->pages);
@@ -132,7 +141,7 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num)
 	for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) {
 		page = list_first_entry(&vb->pages, struct page, lru);
 		list_del(&page->lru);
-		vb->pfns[vb->num_pfns] = page_to_pfn(page);
+		vb->pfns[vb->num_pfns] = page_to_balloon_pfn(page);
 		vb->num_pages--;
 	}
 
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index c30c7bfbf39b..8726ff77763e 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -10,6 +10,9 @@
 /* The feature bitmap for virtio balloon */
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
 
+/* Size of a PFN in the balloon interface. */
+#define VIRTIO_BALLOON_PFN_SHIFT 12
+
 struct virtio_balloon_config
 {
 	/* Number of pages host wants Guest to give up. */
-- 
cgit v1.2.3


From c29834584ea4eafccf2f62a0b8a32e64f792044c Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 25 Nov 2008 13:36:26 +0100
Subject: virtio_console: support console resizing

this patch uses the new hvc callback hvc_resize to set the window size
which allows to change the tty size of hvc_console via a hvc_resize
function.

I have added a new feature bit VIRTIO_CONSOLE_F_SIZE. The driver will
change the window size on tty open and via the config_changed callback
of the transport. Currently lguest and kvm_s390 have not implemented this
callback, but the callback can be implemented at a later point in time.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/char/hvc_console.c     |  1 +
 drivers/char/virtio_console.c  | 30 +++++++++++++++++++++++++++++-
 include/linux/virtio_console.h | 11 +++++++++++
 3 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index fb57f67bb427..0587b66d6fc7 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -695,6 +695,7 @@ void hvc_resize(struct hvc_struct *hp, struct winsize ws)
 	hp->ws = ws;
 	schedule_work(&hp->tty_resize);
 }
+EXPORT_SYMBOL_GPL(hvc_resize);
 
 /*
  * This kthread is either polling or interrupt driven.  This is determined by
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 3fb0d2c88ba5..ff6f5a4b58fb 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -137,13 +137,34 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int))
 	return hvc_instantiate(0, 0, &virtio_cons);
 }
 
+/*
+ * virtio console configuration. This supports:
+ * - console resize
+ */
+static void virtcons_apply_config(struct virtio_device *dev)
+{
+	struct winsize ws;
+
+	if (virtio_has_feature(dev, VIRTIO_CONSOLE_F_SIZE)) {
+		dev->config->get(dev,
+				 offsetof(struct virtio_console_config, cols),
+				 &ws.ws_col, sizeof(u16));
+		dev->config->get(dev,
+				 offsetof(struct virtio_console_config, rows),
+				 &ws.ws_row, sizeof(u16));
+		hvc_resize(hvc, ws);
+	}
+}
+
 /*
  * we support only one console, the hvc struct is a global var
- * There is no need to do anything
+ * We set the configuration at this point, since we now have a tty
  */
 static int notifier_add_vio(struct hvc_struct *hp, int data)
 {
 	hp->irq_requested = 1;
+	virtcons_apply_config(vdev);
+
 	return 0;
 }
 
@@ -234,11 +255,18 @@ static struct virtio_device_id id_table[] = {
 	{ 0 },
 };
 
+static unsigned int features[] = {
+	VIRTIO_CONSOLE_F_SIZE,
+};
+
 static struct virtio_driver virtio_console = {
+	.feature_table = features,
+	.feature_table_size = ARRAY_SIZE(features),
 	.driver.name =	KBUILD_MODNAME,
 	.driver.owner =	THIS_MODULE,
 	.id_table =	id_table,
 	.probe =	virtcons_probe,
+	.config_changed = virtcons_apply_config,
 };
 
 static int __init init(void)
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index 19a0da0dba41..7615ffcdd555 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -7,6 +7,17 @@
 /* The ID for virtio console */
 #define VIRTIO_ID_CONSOLE	3
 
+/* Feature bits */
+#define VIRTIO_CONSOLE_F_SIZE	0	/* Does host provide console size? */
+
+struct virtio_console_config {
+	/* colums of the screens */
+	__u16 cols;
+	/* rows of the screens */
+	__u16 rows;
+} __attribute__((packed));
+
+
 #ifdef __KERNEL__
 int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int));
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From 58a24566449892dda409b9ad92c2e56c76c5670c Mon Sep 17 00:00:00 2001
From: Matias Zabaljauregui <zabaljauregui@gmail.com>
Date: Mon, 29 Sep 2008 01:40:07 -0300
Subject: lguest: move the initial guest page table creation code to the host

This patch moves the initial guest page table creation code to the host,
so the launcher keeps working with PAE enabled configs.

Signed-off-by: Matias Zabaljauregui <zabaljauregui@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 Documentation/lguest/lguest.c   | 60 ++++------------------------------
 arch/x86/lguest/i386_head.S     | 15 ---------
 drivers/lguest/lg.h             |  2 +-
 drivers/lguest/lguest_user.c    | 13 +++-----
 drivers/lguest/page_tables.c    | 72 +++++++++++++++++++++++++++++++++++++++--
 include/linux/lguest_launcher.h |  2 +-
 6 files changed, 83 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index aa2574ca94c7..f2dbbf3bdeab 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -481,51 +481,6 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
 	/* We return the initrd size. */
 	return len;
 }
-
-/* Once we know how much memory we have we can construct simple linear page
- * tables which set virtual == physical which will get the Guest far enough
- * into the boot to create its own.
- *
- * We lay them out of the way, just below the initrd (which is why we need to
- * know its size here). */
-static unsigned long setup_pagetables(unsigned long mem,
-				      unsigned long initrd_size)
-{
-	unsigned long *pgdir, *linear;
-	unsigned int mapped_pages, i, linear_pages;
-	unsigned int ptes_per_page = getpagesize()/sizeof(void *);
-
-	mapped_pages = mem/getpagesize();
-
-	/* Each PTE page can map ptes_per_page pages: how many do we need? */
-	linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page;
-
-	/* We put the toplevel page directory page at the top of memory. */
-	pgdir = from_guest_phys(mem) - initrd_size - getpagesize();
-
-	/* Now we use the next linear_pages pages as pte pages */
-	linear = (void *)pgdir - linear_pages*getpagesize();
-
-	/* Linear mapping is easy: put every page's address into the mapping in
-	 * order.  PAGE_PRESENT contains the flags Present, Writable and
-	 * Executable. */
-	for (i = 0; i < mapped_pages; i++)
-		linear[i] = ((i * getpagesize()) | PAGE_PRESENT);
-
-	/* The top level points to the linear page table pages above. */
-	for (i = 0; i < mapped_pages; i += ptes_per_page) {
-		pgdir[i/ptes_per_page]
-			= ((to_guest_phys(linear) + i*sizeof(void *))
-			   | PAGE_PRESENT);
-	}
-
-	verbose("Linear mapping of %u pages in %u pte pages at %#lx\n",
-		mapped_pages, linear_pages, to_guest_phys(linear));
-
-	/* We return the top level (guest-physical) address: the kernel needs
-	 * to know where it is. */
-	return to_guest_phys(pgdir);
-}
 /*:*/
 
 /* Simple routine to roll all the commandline arguments together with spaces
@@ -548,13 +503,13 @@ static void concat(char *dst, char *args[])
 
 /*L:185 This is where we actually tell the kernel to initialize the Guest.  We
  * saw the arguments it expects when we looked at initialize() in lguest_user.c:
- * the base of Guest "physical" memory, the top physical page to allow, the
- * top level pagetable and the entry point for the Guest. */
-static int tell_kernel(unsigned long pgdir, unsigned long start)
+ * the base of Guest "physical" memory, the top physical page to allow and the
+ * entry point for the Guest. */
+static int tell_kernel(unsigned long start)
 {
 	unsigned long args[] = { LHREQ_INITIALIZE,
 				 (unsigned long)guest_base,
-				 guest_limit / getpagesize(), pgdir, start };
+				 guest_limit / getpagesize(), start };
 	int fd;
 
 	verbose("Guest: %p - %p (%#lx)\n",
@@ -1941,7 +1896,7 @@ int main(int argc, char *argv[])
 {
 	/* Memory, top-level pagetable, code startpoint and size of the
 	 * (optional) initrd. */
-	unsigned long mem = 0, pgdir, start, initrd_size = 0;
+	unsigned long mem = 0, start, initrd_size = 0;
 	/* Two temporaries and the /dev/lguest file descriptor. */
 	int i, c, lguest_fd;
 	/* The boot information for the Guest. */
@@ -2040,9 +1995,6 @@ int main(int argc, char *argv[])
 		boot->hdr.type_of_loader = 0xFF;
 	}
 
-	/* Set up the initial linear pagetables, starting below the initrd. */
-	pgdir = setup_pagetables(mem, initrd_size);
-
 	/* The Linux boot header contains an "E820" memory map: ours is a
 	 * simple, single region. */
 	boot->e820_entries = 1;
@@ -2064,7 +2016,7 @@ int main(int argc, char *argv[])
 
 	/* We tell the kernel to initialize the Guest: this returns the open
 	 * /dev/lguest file descriptor. */
-	lguest_fd = tell_kernel(pgdir, start);
+	lguest_fd = tell_kernel(start);
 
 	/* We clone off a thread, which wakes the Launcher whenever one of the
 	 * input file descriptors needs attention.  We call this the Waker, and
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 5c7cef34c9e7..10b9bd35a8ff 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -30,21 +30,6 @@ ENTRY(lguest_entry)
 	movl $lguest_data - __PAGE_OFFSET, %edx
 	int $LGUEST_TRAP_ENTRY
 
-	/* The Host put the toplevel pagetable in lguest_data.pgdir.  The movsl
-	 * instruction uses %esi implicitly as the source for the copy we're
-	 * about to do. */
-	movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi
-
-	/* Copy first 32 entries of page directory to __PAGE_OFFSET entries.
-	 * This means the first 128M of kernel memory will be mapped at
-	 * PAGE_OFFSET where the kernel expects to run.  This will get it far
-	 * enough through boot to switch to its own pagetables. */
-	movl $32, %ecx
-	movl %esi, %edi
-	addl $((__PAGE_OFFSET >> 22) * 4), %edi
-	rep
-	movsl
-
 	/* Set up the initial stack so we can run C code. */
 	movl $(init_thread_union+THREAD_SIZE),%esp
 
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 5faefeaf6790..f2c641e0bdde 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -164,7 +164,7 @@ void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt);
 void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt);
 
 /* page_tables.c: */
-int init_guest_pagetable(struct lguest *lg, unsigned long pgtable);
+int init_guest_pagetable(struct lguest *lg);
 void free_guest_pagetable(struct lguest *lg);
 void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable);
 void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i);
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index e73a000473cc..34bc017b8b3c 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -146,7 +146,7 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
 	return 0;
 }
 
-/*L:020 The initialization write supplies 4 pointer sized (32 or 64 bit)
+/*L:020 The initialization write supplies 3 pointer sized (32 or 64 bit)
  * values (in addition to the LHREQ_INITIALIZE value).  These are:
  *
  * base: The start of the Guest-physical memory inside the Launcher memory.
@@ -155,9 +155,6 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
  * allowed to access.  The Guest memory lives inside the Launcher, so it sets
  * this to ensure the Guest can only reach its own memory.
  *
- * pgdir: The (Guest-physical) address of the top of the initial Guest
- * pagetables (which are set up by the Launcher).
- *
  * start: The first instruction to execute ("eip" in x86-speak).
  */
 static int initialize(struct file *file, const unsigned long __user *input)
@@ -166,7 +163,7 @@ static int initialize(struct file *file, const unsigned long __user *input)
 	 * Guest. */
 	struct lguest *lg;
 	int err;
-	unsigned long args[4];
+	unsigned long args[3];
 
 	/* We grab the Big Lguest lock, which protects against multiple
 	 * simultaneous initializations. */
@@ -192,14 +189,14 @@ static int initialize(struct file *file, const unsigned long __user *input)
 	lg->mem_base = (void __user *)args[0];
 	lg->pfn_limit = args[1];
 
-	/* This is the first cpu (cpu 0) and it will start booting at args[3] */
-	err = lg_cpu_start(&lg->cpus[0], 0, args[3]);
+	/* This is the first cpu (cpu 0) and it will start booting at args[2] */
+	err = lg_cpu_start(&lg->cpus[0], 0, args[2]);
 	if (err)
 		goto release_guest;
 
 	/* Initialize the Guest's shadow page tables, using the toplevel
 	 * address the Launcher gave us.  This allocates memory, so can fail. */
-	err = init_guest_pagetable(lg, args[2]);
+	err = init_guest_pagetable(lg);
 	if (err)
 		goto free_regs;
 
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index 81d0c6053447..576a8318221c 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -14,6 +14,7 @@
 #include <linux/percpu.h>
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
+#include <asm/bootparam.h>
 #include "lg.h"
 
 /*M:008 We hold reference to pages, which prevents them from being swapped.
@@ -581,15 +582,82 @@ void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx)
 		release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx);
 }
 
+/* Once we know how much memory we have we can construct simple identity
+ * (which set virtual == physical) and linear mappings
+ * which will get the Guest far enough into the boot to create its own.
+ *
+ * We lay them out of the way, just below the initrd (which is why we need to
+ * know its size here). */
+static unsigned long setup_pagetables(struct lguest *lg,
+				      unsigned long mem,
+				      unsigned long initrd_size)
+{
+	pgd_t __user *pgdir;
+	pte_t __user *linear;
+	unsigned int mapped_pages, i, linear_pages, phys_linear;
+	unsigned long mem_base = (unsigned long)lg->mem_base;
+
+	/* We have mapped_pages frames to map, so we need
+	 * linear_pages page tables to map them. */
+	mapped_pages = mem / PAGE_SIZE;
+	linear_pages = (mapped_pages + PTRS_PER_PTE - 1) / PTRS_PER_PTE;
+
+	/* We put the toplevel page directory page at the top of memory. */
+	pgdir = (pgd_t *)(mem + mem_base - initrd_size - PAGE_SIZE);
+
+	/* Now we use the next linear_pages pages as pte pages */
+	linear = (void *)pgdir - linear_pages * PAGE_SIZE;
+
+	/* Linear mapping is easy: put every page's address into the
+	 * mapping in order. */
+	for (i = 0; i < mapped_pages; i++) {
+		pte_t pte;
+		pte = pfn_pte(i, __pgprot(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER));
+		if (copy_to_user(&linear[i], &pte, sizeof(pte)) != 0)
+			return -EFAULT;
+	}
+
+	/* The top level points to the linear page table pages above.
+	 * We setup the identity and linear mappings here. */
+	phys_linear = (unsigned long)linear - mem_base;
+	for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) {
+		pgd_t pgd;
+		pgd = __pgd((phys_linear + i * sizeof(pte_t)) |
+			    (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
+
+		if (copy_to_user(&pgdir[i / PTRS_PER_PTE], &pgd, sizeof(pgd))
+		    || copy_to_user(&pgdir[pgd_index(PAGE_OFFSET)
+					   + i / PTRS_PER_PTE],
+				    &pgd, sizeof(pgd)))
+			return -EFAULT;
+	}
+
+	/* We return the top level (guest-physical) address: remember where
+	 * this is. */
+	return (unsigned long)pgdir - mem_base;
+}
+
 /*H:500 (vii) Setting up the page tables initially.
  *
  * When a Guest is first created, the Launcher tells us where the toplevel of
  * its first page table is.  We set some things up here: */
-int init_guest_pagetable(struct lguest *lg, unsigned long pgtable)
+int init_guest_pagetable(struct lguest *lg)
 {
+	u64 mem;
+	u32 initrd_size;
+	struct boot_params __user *boot = (struct boot_params *)lg->mem_base;
+
+	/* Get the Guest memory size and the ramdisk size from the boot header
+	 * located at lg->mem_base (Guest address 0). */
+	if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem))
+	    || get_user(initrd_size, &boot->hdr.ramdisk_size))
+		return -EFAULT;
+
 	/* We start on the first shadow page table, and give it a blank PGD
 	 * page. */
-	lg->pgdirs[0].gpgdir = pgtable;
+	lg->pgdirs[0].gpgdir = setup_pagetables(lg, mem, initrd_size);
+	if (IS_ERR_VALUE(lg->pgdirs[0].gpgdir))
+		return lg->pgdirs[0].gpgdir;
 	lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
 	if (!lg->pgdirs[0].pgdir)
 		return -ENOMEM;
diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h
index bd0eba760522..a53407a4165c 100644
--- a/include/linux/lguest_launcher.h
+++ b/include/linux/lguest_launcher.h
@@ -54,7 +54,7 @@ struct lguest_vqconfig {
 /* Write command first word is a request. */
 enum lguest_req
 {
-	LHREQ_INITIALIZE, /* + base, pfnlimit, pgdir, start */
+	LHREQ_INITIALIZE, /* + base, pfnlimit, start */
 	LHREQ_GETDMA, /* No longer used */
 	LHREQ_IRQ, /* + irq */
 	LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */
-- 
cgit v1.2.3


From 47fea2adfc9e16846bc57c2f64ff233b354fef39 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@infradead.org>
Date: Mon, 29 Dec 2008 23:39:17 +0530
Subject: sched: sched.c declare variables before they get used

Impact: cleanup, avoid sparse warnings

In linux/sched.h moved out sysctl_sched_latency, sysctl_sched_min_granularity,
sysctl_sched_wakeup_granularity, sysctl_sched_shares_ratelimit and
sysctl_sched_shares_thresh from #ifdef CONFIG_SCHED_DEBUG as these variables
are common for both.

Fixes these sparse warnings:
  kernel/sched.c:825:14: warning: symbol 'sysctl_sched_shares_ratelimit' was not declared. Should it be static?
  kernel/sched.c:832:14: warning: symbol 'sysctl_sched_shares_thresh' was not declared. Should it be static?
  kernel/sched_fair.c:37:14: warning: symbol 'sysctl_sched_latency' was not declared. Should it be static?
  kernel/sched_fair.c:43:14: warning: symbol 'sysctl_sched_min_granularity' was not declared. Should it be static?
  kernel/sched_fair.c:72:14: warning: symbol 'sysctl_sched_wakeup_granularity' was not declared. Should it be static?

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8395e715809d..01d9fd268eb0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1651,16 +1651,16 @@ extern void wake_up_idle_cpu(int cpu);
 static inline void wake_up_idle_cpu(int cpu) { }
 #endif
 
-#ifdef CONFIG_SCHED_DEBUG
 extern unsigned int sysctl_sched_latency;
 extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
+extern unsigned int sysctl_sched_shares_ratelimit;
+extern unsigned int sysctl_sched_shares_thresh;
+#ifdef CONFIG_SCHED_DEBUG
 extern unsigned int sysctl_sched_child_runs_first;
 extern unsigned int sysctl_sched_features;
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
-extern unsigned int sysctl_sched_shares_ratelimit;
-extern unsigned int sysctl_sched_shares_thresh;
 
 int sched_nr_latency_handler(struct ctl_table *table, int write,
 		struct file *file, void __user *buffer, size_t *length,
-- 
cgit v1.2.3


From 0877258d98154565def498833ccb208234c85084 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Sun, 14 Dec 2008 16:21:16 -0300
Subject: V4L/DVB (9897): v4l2: Add camera zoom controls

The zoom controls move the zoom lens group to a an absolute position, as a
relative displacement or at a given speed until reaching physical device
limits. Positive values move the zoom lens group towards the telephoto
direction, negative values towards the wide-angle direction.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index ec311d4616cd..e450a92a622b 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1118,6 +1118,10 @@ enum  v4l2_exposure_auto_type {
 #define V4L2_CID_FOCUS_RELATIVE			(V4L2_CID_CAMERA_CLASS_BASE+11)
 #define V4L2_CID_FOCUS_AUTO			(V4L2_CID_CAMERA_CLASS_BASE+12)
 
+#define V4L2_CID_ZOOM_ABSOLUTE			(V4L2_CID_CAMERA_CLASS_BASE+13)
+#define V4L2_CID_ZOOM_RELATIVE			(V4L2_CID_CAMERA_CLASS_BASE+14)
+#define V4L2_CID_ZOOM_CONTINUOUS		(V4L2_CID_CAMERA_CLASS_BASE+15)
+
 /*
  *	T U N I N G
  */
-- 
cgit v1.2.3


From 046425f8c4ac431db00c09a6d9fba16560b8e5b9 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Sun, 14 Dec 2008 16:22:05 -0300
Subject: V4L/DVB (9898): v4l2: Add privacy control

The privacy control prevents video from being acquired by the camera. A true
value indicates that no image can be captured. Devices that implement the
privacy control must support read access and may support write access.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index e450a92a622b..6e6743bd0f92 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1122,6 +1122,8 @@ enum  v4l2_exposure_auto_type {
 #define V4L2_CID_ZOOM_RELATIVE			(V4L2_CID_CAMERA_CLASS_BASE+14)
 #define V4L2_CID_ZOOM_CONTINUOUS		(V4L2_CID_CAMERA_CLASS_BASE+15)
 
+#define V4L2_CID_PRIVACY			(V4L2_CID_CAMERA_CLASS_BASE+16)
+
 /*
  *	T U N I N G
  */
-- 
cgit v1.2.3


From 92f45badbbaccdbc1be25085292a1e258948e221 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sun, 21 Dec 2008 10:35:25 -0300
Subject: V4L/DVB (9932): v4l2-compat32: fix 32-64 compatibility module

Added all missing v4l1/2 ioctls and fix several broken conversions.
Partially based on work done by Cody Pisto <cpisto@gmail.com>.

Tested-by: Brandon Jenkins <bcjenkins@tvwhere.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-compat-ioctl32.c | 781 ++++++++++++++++++------------
 include/linux/videodev2.h                 |   2 +
 2 files changed, 467 insertions(+), 316 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index 9b50b43b76a2..5084773d4f10 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -7,12 +7,14 @@
  * Copyright (C) 2001,2002  Andi Kleen, SuSE Labs
  * Copyright (C) 2003       Pavel Machek (pavel@suse.cz)
  * Copyright (C) 2005       Philippe De Muyter (phdm@macqel.be)
+ * Copyright (C) 2008       Hans Verkuil <hverkuil@xs4all.nl>
  *
  * These routines maintain argument size conversion between 32bit and 64bit
  * ioctls.
  */
 
 #include <linux/compat.h>
+#define __OLD_VIDIOC_ /* To allow fixing old calls*/
 #include <linux/videodev.h>
 #include <linux/videodev2.h>
 #include <linux/module.h>
@@ -58,7 +60,6 @@ static int put_video_tuner32(struct video_tuner *kp, struct video_tuner32 __user
 	return 0;
 }
 
-
 struct video_buffer32 {
 	compat_caddr_t base;
 	compat_int_t height, width, depth, bytesperline;
@@ -99,7 +100,7 @@ static int put_video_buffer32(struct video_buffer *kp, struct video_buffer32 __u
 }
 
 struct video_clip32 {
-	s32 x, y, width, height;	/* Its really s32 in videodev.h */
+	s32 x, y, width, height;	/* It's really s32 in videodev.h */
 	compat_caddr_t next;
 };
 
@@ -108,25 +109,72 @@ struct video_window32 {
 	compat_caddr_t clips;
 	compat_int_t clipcount;
 };
-#endif
 
-static int native_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static int get_video_window32(struct video_window *kp, struct video_window32 __user *up)
 {
-	int ret = -ENOIOCTLCMD;
+	struct video_clip __user *uclips;
+	struct video_clip __user *kclips;
+	compat_caddr_t p;
+	int nclips;
 
-	if (file->f_op->unlocked_ioctl)
-		ret = file->f_op->unlocked_ioctl(file, cmd, arg);
-	else if (file->f_op->ioctl) {
-		lock_kernel();
-		ret = file->f_op->ioctl(file->f_path.dentry->d_inode, file, cmd, arg);
-		unlock_kernel();
+	if (!access_ok(VERIFY_READ, up, sizeof(struct video_window32)))
+		return -EFAULT;
+
+	if (get_user(nclips, &up->clipcount))
+		return -EFAULT;
+
+	if (!access_ok(VERIFY_READ, up, sizeof(struct video_window32)) ||
+	    get_user(kp->x, &up->x) ||
+	    get_user(kp->y, &up->y) ||
+	    get_user(kp->width, &up->width) ||
+	    get_user(kp->height, &up->height) ||
+	    get_user(kp->chromakey, &up->chromakey) ||
+	    get_user(kp->flags, &up->flags) ||
+	    get_user(kp->clipcount, &up->clipcount))
+		return -EFAULT;
+
+	nclips = kp->clipcount;
+	kp->clips = NULL;
+
+	if (nclips == 0)
+		return 0;
+	if (get_user(p, &up->clips))
+		return -EFAULT;
+	uclips = compat_ptr(p);
+
+	/* If nclips < 0, then it is a clipping bitmap of size
+	   VIDEO_CLIPMAP_SIZE */
+	if (nclips < 0) {
+		if (!access_ok(VERIFY_READ, uclips, VIDEO_CLIPMAP_SIZE))
+			return -EFAULT;
+		kp->clips = compat_alloc_user_space(VIDEO_CLIPMAP_SIZE);
+		if (copy_in_user(kp->clips, uclips, VIDEO_CLIPMAP_SIZE))
+			return -EFAULT;
+		return 0;
 	}
 
-	return ret;
-}
+	/* Otherwise it is an array of video_clip structs. */
+	if (!access_ok(VERIFY_READ, uclips, nclips * sizeof(struct video_clip)))
+		return -EFAULT;
 
+	kp->clips = compat_alloc_user_space(nclips * sizeof(struct video_clip));
+	kclips = kp->clips;
+	while (nclips--) {
+		int err;
+
+		err = copy_in_user(&kclips->x, &uclips->x, sizeof(kclips->x));
+		err |= copy_in_user(&kclips->y, &uclips->y, sizeof(kclips->y));
+		err |= copy_in_user(&kclips->width, &uclips->width, sizeof(kclips->width));
+		err |= copy_in_user(&kclips->height, &uclips->height, sizeof(kclips->height));
+		kclips->next = NULL;
+		if (err)
+			return -EFAULT;
+		kclips++;
+		uclips++;
+	}
+	return 0;
+}
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
 /* You get back everything except the clips... */
 static int put_video_window32(struct video_window *kp, struct video_window32 __user *up)
 {
@@ -141,8 +189,55 @@ static int put_video_window32(struct video_window *kp, struct video_window32 __u
 			return -EFAULT;
 	return 0;
 }
+
+struct video_code32 {
+	char		loadwhat[16];	/* name or tag of file being passed */
+	compat_int_t	datasize;
+	unsigned char	*data;
+};
+
+static int get_microcode32(struct video_code *kp, struct video_code32 __user *up)
+{
+	if (!access_ok(VERIFY_READ, up, sizeof(struct video_code32)) ||
+		copy_from_user(kp->loadwhat, up->loadwhat, sizeof(up->loadwhat)) ||
+		get_user(kp->datasize, &up->datasize) ||
+		copy_from_user(kp->data, up->data, up->datasize))
+			return -EFAULT;
+	return 0;
+}
+
+#define VIDIOCGTUNER32		_IOWR('v', 4, struct video_tuner32)
+#define VIDIOCSTUNER32		_IOW('v', 5, struct video_tuner32)
+#define VIDIOCGWIN32		_IOR('v', 9, struct video_window32)
+#define VIDIOCSWIN32		_IOW('v', 10, struct video_window32)
+#define VIDIOCGFBUF32		_IOR('v', 11, struct video_buffer32)
+#define VIDIOCSFBUF32		_IOW('v', 12, struct video_buffer32)
+#define VIDIOCGFREQ32		_IOR('v', 14, u32)
+#define VIDIOCSFREQ32		_IOW('v', 15, u32)
+#define VIDIOCSMICROCODE32	_IOW('v', 27, struct video_code32)
+
+#define VIDIOCCAPTURE32		_IOW('v', 8, s32)
+#define VIDIOCSYNC32		_IOW('v', 18, s32)
+#define VIDIOCSWRITEMODE32	_IOW('v', 25, s32)
+
 #endif
 
+static int native_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int ret = -ENOIOCTLCMD;
+
+	if (file->f_op->unlocked_ioctl)
+		ret = file->f_op->unlocked_ioctl(file, cmd, arg);
+	else if (file->f_op->ioctl) {
+		lock_kernel();
+		ret = file->f_op->ioctl(file->f_path.dentry->d_inode, file, cmd, arg);
+		unlock_kernel();
+	}
+
+	return ret;
+}
+
+
 struct v4l2_clip32 {
 	struct v4l2_rect        c;
 	compat_caddr_t 		next;
@@ -229,12 +324,27 @@ static inline int put_v4l2_vbi_format(struct v4l2_vbi_format *kp, struct v4l2_vb
 	return 0;
 }
 
+static inline int get_v4l2_sliced_vbi_format(struct v4l2_sliced_vbi_format *kp, struct v4l2_sliced_vbi_format __user *up)
+{
+	if (copy_from_user(kp, up, sizeof(struct v4l2_sliced_vbi_format)))
+		return -EFAULT;
+	return 0;
+}
+
+static inline int put_v4l2_sliced_vbi_format(struct v4l2_sliced_vbi_format *kp, struct v4l2_sliced_vbi_format __user *up)
+{
+	if (copy_to_user(up, kp, sizeof(struct v4l2_sliced_vbi_format)))
+		return -EFAULT;
+	return 0;
+}
+
 struct v4l2_format32 {
 	enum v4l2_buf_type type;
 	union {
-		struct v4l2_pix_format	pix;  /* V4L2_BUF_TYPE_VIDEO_CAPTURE */
-		struct v4l2_window32	win;  /* V4L2_BUF_TYPE_VIDEO_OVERLAY */
-		struct v4l2_vbi_format	vbi;  /* V4L2_BUF_TYPE_VBI_CAPTURE */
+		struct v4l2_pix_format	pix;
+		struct v4l2_window32	win;
+		struct v4l2_vbi_format	vbi;
+		struct v4l2_sliced_vbi_format	sliced;
 		__u8	raw_data[200];        /* user-defined */
 	} fmt;
 };
@@ -246,15 +356,27 @@ static int get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user
 			return -EFAULT;
 	switch (kp->type) {
 	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
 		return get_v4l2_pix_format(&kp->fmt.pix, &up->fmt.pix);
 	case V4L2_BUF_TYPE_VIDEO_OVERLAY:
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY:
 		return get_v4l2_window32(&kp->fmt.win, &up->fmt.win);
 	case V4L2_BUF_TYPE_VBI_CAPTURE:
+	case V4L2_BUF_TYPE_VBI_OUTPUT:
 		return get_v4l2_vbi_format(&kp->fmt.vbi, &up->fmt.vbi);
+	case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE:
+	case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT:
+		return get_v4l2_sliced_vbi_format(&kp->fmt.sliced, &up->fmt.sliced);
+	case V4L2_BUF_TYPE_PRIVATE:
+		if (copy_from_user(kp, up, sizeof(kp->fmt.raw_data)))
+			return -EFAULT;
+		return 0;
+	case 0:
+		return -EINVAL;
 	default:
-		printk(KERN_INFO "compat_ioctl: unexpected VIDIOC_FMT type %d\n",
+		printk(KERN_INFO "compat_ioctl32: unexpected VIDIOC_FMT type %d\n",
 								kp->type);
-		return -ENXIO;
+		return -EINVAL;
 	}
 }
 
@@ -265,31 +387,30 @@ static int put_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user
 		return -EFAULT;
 	switch (kp->type) {
 	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
 		return put_v4l2_pix_format(&kp->fmt.pix, &up->fmt.pix);
 	case V4L2_BUF_TYPE_VIDEO_OVERLAY:
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY:
 		return put_v4l2_window32(&kp->fmt.win, &up->fmt.win);
 	case V4L2_BUF_TYPE_VBI_CAPTURE:
+	case V4L2_BUF_TYPE_VBI_OUTPUT:
 		return put_v4l2_vbi_format(&kp->fmt.vbi, &up->fmt.vbi);
+	case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE:
+	case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT:
+		return put_v4l2_sliced_vbi_format(&kp->fmt.sliced, &up->fmt.sliced);
+	case V4L2_BUF_TYPE_PRIVATE:
+		if (copy_to_user(up, kp, sizeof(up->fmt.raw_data)))
+			return -EFAULT;
+		return 0;
+	case 0:
+		return -EINVAL;
 	default:
-		return -ENXIO;
+		printk(KERN_INFO "compat_ioctl32: unexpected VIDIOC_FMT type %d\n",
+								kp->type);
+		return -EINVAL;
 	}
 }
 
-static inline int get_v4l2_standard(struct v4l2_standard *kp, struct v4l2_standard __user *up)
-{
-	if (copy_from_user(kp, up, sizeof(struct v4l2_standard)))
-		return -EFAULT;
-	return 0;
-
-}
-
-static inline int put_v4l2_standard(struct v4l2_standard *kp, struct v4l2_standard __user *up)
-{
-	if (copy_to_user(up, kp, sizeof(struct v4l2_standard)))
-		return -EFAULT;
-	return 0;
-}
-
 struct v4l2_standard32 {
 	__u32		     index;
 	__u32		     id[2]; /* __u64 would get the alignment wrong */
@@ -321,21 +442,6 @@ static int put_v4l2_standard32(struct v4l2_standard *kp, struct v4l2_standard32
 	return 0;
 }
 
-static inline int get_v4l2_tuner(struct v4l2_tuner *kp, struct v4l2_tuner __user *up)
-{
-	if (copy_from_user(kp, up, sizeof(struct v4l2_tuner)))
-		return -EFAULT;
-	return 0;
-
-}
-
-static inline int put_v4l2_tuner(struct v4l2_tuner *kp, struct v4l2_tuner __user *up)
-{
-	if (copy_to_user(up, kp, sizeof(struct v4l2_tuner)))
-		return -EFAULT;
-	return 0;
-}
-
 struct v4l2_buffer32 {
 	__u32			index;
 	enum v4l2_buf_type      type;
@@ -459,149 +565,143 @@ static int put_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame
 	return 0;
 }
 
-static inline int get_v4l2_input32(struct v4l2_input *kp, struct v4l2_input __user *up)
-{
-	if (copy_from_user(kp, up, sizeof(struct v4l2_input) - 4))
-		return -EFAULT;
-	return 0;
-}
-
-static inline int put_v4l2_input32(struct v4l2_input *kp, struct v4l2_input __user *up)
+struct v4l2_input32 {
+	__u32	     index;		/*  Which input */
+	__u8	     name[32];		/*  Label */
+	__u32	     type;		/*  Type of input */
+	__u32	     audioset;		/*  Associated audios (bitfield) */
+	__u32        tuner;             /*  Associated tuner */
+	v4l2_std_id  std;
+	__u32	     status;
+	__u32	     reserved[4];
+} __attribute__ ((packed));
+
+/* The 64-bit v4l2_input struct has extra padding at the end of the struct.
+   Otherwise it is identical to the 32-bit version. */
+static inline int get_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up)
 {
-	if (copy_to_user(up, kp, sizeof(struct v4l2_input) - 4))
+	if (copy_from_user(kp, up, sizeof(struct v4l2_input32)))
 		return -EFAULT;
 	return 0;
 }
 
-static inline int get_v4l2_input(struct v4l2_input *kp, struct v4l2_input __user *up)
+static inline int put_v4l2_input32(struct v4l2_input *kp, struct v4l2_input32 __user *up)
 {
-	if (copy_from_user(kp, up, sizeof(struct v4l2_input)))
+	if (copy_to_user(up, kp, sizeof(struct v4l2_input32)))
 		return -EFAULT;
 	return 0;
 }
 
-static inline int put_v4l2_input(struct v4l2_input *kp, struct v4l2_input __user *up)
-{
-	if (copy_to_user(up, kp, sizeof(struct v4l2_input)))
-		return -EFAULT;
-	return 0;
-}
-
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-struct video_code32 {
-	char		loadwhat[16];	/* name or tag of file being passed */
-	compat_int_t	datasize;
-	unsigned char	*data;
+struct v4l2_ext_controls32 {
+       __u32 ctrl_class;
+       __u32 count;
+       __u32 error_idx;
+       __u32 reserved[2];
+       compat_caddr_t controls; /* actually struct v4l2_ext_control32 * */
 };
 
-static inline int microcode32(struct video_code *kp, struct video_code32 __user *up)
+static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up)
 {
-	if (!access_ok(VERIFY_READ, up, sizeof(struct video_code32)) ||
-		copy_from_user(kp->loadwhat, up->loadwhat, sizeof(up->loadwhat)) ||
-		get_user(kp->datasize, &up->datasize) ||
-		copy_from_user(kp->data, up->data, up->datasize))
+	struct v4l2_ext_control __user *ucontrols;
+	struct v4l2_ext_control __user *kcontrols;
+	int n;
+	compat_caddr_t p;
+
+	if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_ext_controls32)) ||
+		get_user(kp->ctrl_class, &up->ctrl_class) ||
+		get_user(kp->count, &up->count) ||
+		get_user(kp->error_idx, &up->error_idx) ||
+		copy_from_user(kp->reserved, up->reserved, sizeof(kp->reserved)))
+			return -EFAULT;
+	n = kp->count;
+	if (n == 0) {
+		kp->controls = NULL;
+		return 0;
+	}
+	if (get_user(p, &up->controls))
+		return -EFAULT;
+	ucontrols = compat_ptr(p);
+	if (!access_ok(VERIFY_READ, ucontrols, n * sizeof(struct v4l2_ext_control)))
+		return -EFAULT;
+	kcontrols = compat_alloc_user_space(n * sizeof(struct v4l2_ext_control));
+	kp->controls = kcontrols;
+	while (--n >= 0) {
+		if (copy_in_user(&kcontrols->id, &ucontrols->id, sizeof(__u32)))
+			return -EFAULT;
+		if (copy_in_user(&kcontrols->reserved2, &ucontrols->reserved2, sizeof(ucontrols->reserved2)))
 			return -EFAULT;
+		/* Note: if the void * part of the union ever becomes relevant
+		   then we need to know the type of the control in order to do
+		   the right thing here. Luckily, that is not yet an issue. */
+		if (copy_in_user(&kcontrols->value, &ucontrols->value, sizeof(ucontrols->value)))
+			return -EFAULT;
+		ucontrols++;
+		kcontrols++;
+	}
 	return 0;
 }
 
-#define VIDIOCGTUNER32		_IOWR('v', 4, struct video_tuner32)
-#define VIDIOCSTUNER32		_IOW('v', 5, struct video_tuner32)
-#define VIDIOCGWIN32		_IOR('v', 9, struct video_window32)
-#define VIDIOCSWIN32		_IOW('v', 10, struct video_window32)
-#define VIDIOCGFBUF32		_IOR('v', 11, struct video_buffer32)
-#define VIDIOCSFBUF32		_IOW('v', 12, struct video_buffer32)
-#define VIDIOCGFREQ32		_IOR('v', 14, u32)
-#define VIDIOCSFREQ32		_IOW('v', 15, u32)
-#define VIDIOCSMICROCODE32	_IOW('v', 27, struct video_code32)
-
-#endif
-
-/* VIDIOC_ENUMINPUT32 is VIDIOC_ENUMINPUT minus 4 bytes of padding alignement */
-#define VIDIOC_ENUMINPUT32	(VIDIOC_ENUMINPUT - _IOC(0, 0, 0, 4))
-#define VIDIOC_G_FMT32		_IOWR ('V',  4, struct v4l2_format32)
-#define VIDIOC_S_FMT32		_IOWR ('V',  5, struct v4l2_format32)
-#define VIDIOC_QUERYBUF32	_IOWR ('V',  9, struct v4l2_buffer32)
-#define VIDIOC_G_FBUF32		_IOR  ('V', 10, struct v4l2_framebuffer32)
-#define VIDIOC_S_FBUF32		_IOW  ('V', 11, struct v4l2_framebuffer32)
-/* VIDIOC_OVERLAY is now _IOW, but was _IOWR */
-#define VIDIOC_OVERLAY32	_IOWR ('V', 14, compat_int_t)
-#define VIDIOC_QBUF32		_IOWR ('V', 15, struct v4l2_buffer32)
-#define VIDIOC_DQBUF32		_IOWR ('V', 17, struct v4l2_buffer32)
-#define VIDIOC_STREAMON32	_IOW  ('V', 18, compat_int_t)
-#define VIDIOC_STREAMOFF32	_IOW  ('V', 19, compat_int_t)
-#define VIDIOC_ENUMSTD32	_IOWR ('V', 25, struct v4l2_standard32)
-/* VIDIOC_S_CTRL is now _IOWR, but was _IOW */
-#define VIDIOC_S_CTRL32		_IOW  ('V', 28, struct v4l2_control)
-#define VIDIOC_G_INPUT32	_IOR  ('V', 38, compat_int_t)
-#define VIDIOC_S_INPUT32	_IOWR ('V', 39, compat_int_t)
-#define VIDIOC_TRY_FMT32      	_IOWR ('V', 64, struct v4l2_format32)
-
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-enum {
-	MaxClips = (~0U-sizeof(struct video_window))/sizeof(struct video_clip)
-};
-
-static int do_set_window(struct file *file, unsigned int cmd, unsigned long arg)
+static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up)
 {
-	struct video_window32 __user *up = compat_ptr(arg);
-	struct video_window __user *vw;
-	struct video_clip __user *p;
-	int nclips;
-	u32 n;
-
-	if (!access_ok(VERIFY_READ, up, sizeof(struct video_window32)))
-		return -EFAULT;
+	struct v4l2_ext_control __user *ucontrols;
+	struct v4l2_ext_control __user *kcontrols = kp->controls;
+	int n = kp->count;
+	compat_caddr_t p;
+
+	if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_ext_controls32)) ||
+		put_user(kp->ctrl_class, &up->ctrl_class) ||
+		put_user(kp->count, &up->count) ||
+		put_user(kp->error_idx, &up->error_idx) ||
+		copy_to_user(up->reserved, kp->reserved, sizeof(up->reserved)))
+			return -EFAULT;
+	if (!kp->count)
+		return 0;
 
-	if (get_user(nclips, &up->clipcount))
+	if (get_user(p, &up->controls))
 		return -EFAULT;
-
-	/* Peculiar interface... */
-	if (nclips < 0)
-		nclips = VIDEO_CLIPMAP_SIZE;
-
-	if (nclips > MaxClips)
-		return -ENOMEM;
-
-	vw = compat_alloc_user_space(sizeof(struct video_window) +
-				    nclips * sizeof(struct video_clip));
-
-	p = nclips ? (struct video_clip __user *)(vw + 1) : NULL;
-
-	if (get_user(n, &up->x) || put_user(n, &vw->x) ||
-	    get_user(n, &up->y) || put_user(n, &vw->y) ||
-	    get_user(n, &up->width) || put_user(n, &vw->width) ||
-	    get_user(n, &up->height) || put_user(n, &vw->height) ||
-	    get_user(n, &up->chromakey) || put_user(n, &vw->chromakey) ||
-	    get_user(n, &up->flags) || put_user(n, &vw->flags) ||
-	    get_user(n, &up->clipcount) || put_user(n, &vw->clipcount) ||
-	    get_user(n, &up->clips) || put_user(p, &vw->clips))
+	ucontrols = compat_ptr(p);
+	if (!access_ok(VERIFY_WRITE, ucontrols, n * sizeof(struct v4l2_ext_control)))
 		return -EFAULT;
 
-	if (nclips) {
-		struct video_clip32 __user *u = compat_ptr(n);
-		int i;
-		if (!u)
-			return -EINVAL;
-		for (i = 0; i < nclips; i++, u++, p++) {
-			s32 v;
-			if (!access_ok(VERIFY_READ, u, sizeof(struct video_clip32)) ||
-			    !access_ok(VERIFY_WRITE, p, sizeof(struct video_clip32)) ||
-			    get_user(v, &u->x) ||
-			    put_user(v, &p->x) ||
-			    get_user(v, &u->y) ||
-			    put_user(v, &p->y) ||
-			    get_user(v, &u->width) ||
-			    put_user(v, &p->width) ||
-			    get_user(v, &u->height) ||
-			    put_user(v, &p->height) ||
-			    put_user(NULL, &p->next))
-				return -EFAULT;
-		}
+	while (--n >= 0) {
+		if (copy_in_user(&ucontrols->id, &kcontrols->id, sizeof(__u32)))
+			return -EFAULT;
+		if (copy_in_user(&ucontrols->reserved2, &kcontrols->reserved2,
+					sizeof(ucontrols->reserved2)))
+			return -EFAULT;
+		/* Note: if the void * part of the union ever becomes relevant
+		   then we need to know the type of the control in order to do
+		   the right thing here. Luckily, that is not yet an issue. */
+		if (copy_in_user(&ucontrols->value, &kcontrols->value, sizeof(ucontrols->value)))
+			return -EFAULT;
+		ucontrols++;
+		kcontrols++;
 	}
-
-	return native_ioctl(file, VIDIOCSWIN, (unsigned long)vw);
+	return 0;
 }
-#endif
+
+#define VIDIOC_G_FMT32		_IOWR('V',  4, struct v4l2_format32)
+#define VIDIOC_S_FMT32		_IOWR('V',  5, struct v4l2_format32)
+#define VIDIOC_QUERYBUF32	_IOWR('V',  9, struct v4l2_buffer32)
+#define VIDIOC_G_FBUF32		_IOR ('V', 10, struct v4l2_framebuffer32)
+#define VIDIOC_S_FBUF32		_IOW ('V', 11, struct v4l2_framebuffer32)
+#define VIDIOC_QBUF32		_IOWR('V', 15, struct v4l2_buffer32)
+#define VIDIOC_DQBUF32		_IOWR('V', 17, struct v4l2_buffer32)
+#define VIDIOC_ENUMSTD32	_IOWR('V', 25, struct v4l2_standard32)
+#define VIDIOC_ENUMINPUT32	_IOWR('V', 26, struct v4l2_input32)
+#define VIDIOC_TRY_FMT32      	_IOWR('V', 64, struct v4l2_format32)
+#define VIDIOC_G_EXT_CTRLS32    _IOWR('V', 71, struct v4l2_ext_controls32)
+#define VIDIOC_S_EXT_CTRLS32    _IOWR('V', 72, struct v4l2_ext_controls32)
+#define VIDIOC_TRY_EXT_CTRLS32  _IOWR('V', 73, struct v4l2_ext_controls32)
+
+#define VIDIOC_OVERLAY32	_IOW ('V', 14, s32)
+#define VIDIOC_OVERLAY32_OLD	_IOWR('V', 14, s32)
+#define VIDIOC_STREAMON32	_IOW ('V', 18, s32)
+#define VIDIOC_STREAMOFF32	_IOW ('V', 19, s32)
+#define VIDIOC_G_INPUT32	_IOR ('V', 38, s32)
+#define VIDIOC_S_INPUT32	_IOWR('V', 39, s32)
+#define VIDIOC_G_OUTPUT32	_IOR ('V', 46, s32)
+#define VIDIOC_S_OUTPUT32	_IOWR('V', 47, s32)
 
 static int do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
@@ -616,45 +716,51 @@ static int do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 		struct v4l2_format v2f;
 		struct v4l2_buffer v2b;
 		struct v4l2_framebuffer v2fb;
-		struct v4l2_standard v2s;
 		struct v4l2_input v2i;
-		struct v4l2_tuner v2t;
+		struct v4l2_standard v2s;
+		struct v4l2_ext_controls v2ecs;
 		unsigned long vx;
+		int vi;
 	} karg;
 	void __user *up = compat_ptr(arg);
 	int compatible_arg = 1;
 	int err = 0;
-	int realcmd = cmd;
 
 	/* First, convert the command. */
 	switch (cmd) {
 #ifdef CONFIG_VIDEO_V4L1_COMPAT
-	case VIDIOCGTUNER32: realcmd = cmd = VIDIOCGTUNER; break;
-	case VIDIOCSTUNER32: realcmd = cmd = VIDIOCSTUNER; break;
-	case VIDIOCGWIN32: realcmd = cmd = VIDIOCGWIN; break;
-	case VIDIOCGFBUF32: realcmd = cmd = VIDIOCGFBUF; break;
-	case VIDIOCSFBUF32: realcmd = cmd = VIDIOCSFBUF; break;
-	case VIDIOCGFREQ32: realcmd = cmd = VIDIOCGFREQ; break;
-	case VIDIOCSFREQ32: realcmd = cmd = VIDIOCSFREQ; break;
-	case VIDIOCSMICROCODE32: realcmd = cmd = VIDIOCSMICROCODE; break;
+	case VIDIOCGTUNER32: cmd = VIDIOCGTUNER; break;
+	case VIDIOCSTUNER32: cmd = VIDIOCSTUNER; break;
+	case VIDIOCGWIN32: cmd = VIDIOCGWIN; break;
+	case VIDIOCSWIN32: cmd = VIDIOCSWIN; break;
+	case VIDIOCGFBUF32: cmd = VIDIOCGFBUF; break;
+	case VIDIOCSFBUF32: cmd = VIDIOCSFBUF; break;
+	case VIDIOCGFREQ32: cmd = VIDIOCGFREQ; break;
+	case VIDIOCSFREQ32: cmd = VIDIOCSFREQ; break;
+	case VIDIOCSMICROCODE32: cmd = VIDIOCSMICROCODE; break;
 #endif
-	case VIDIOC_G_FMT32: realcmd = cmd = VIDIOC_G_FMT; break;
-	case VIDIOC_S_FMT32: realcmd = cmd = VIDIOC_S_FMT; break;
-	case VIDIOC_QUERYBUF32: realcmd = cmd = VIDIOC_QUERYBUF; break;
-	case VIDIOC_QBUF32: realcmd = cmd = VIDIOC_QBUF; break;
-	case VIDIOC_DQBUF32: realcmd = cmd = VIDIOC_DQBUF; break;
-	case VIDIOC_STREAMON32: realcmd = cmd = VIDIOC_STREAMON; break;
-	case VIDIOC_STREAMOFF32: realcmd = cmd = VIDIOC_STREAMOFF; break;
-	case VIDIOC_G_FBUF32: realcmd = cmd = VIDIOC_G_FBUF; break;
-	case VIDIOC_S_FBUF32: realcmd = cmd = VIDIOC_S_FBUF; break;
-	case VIDIOC_OVERLAY32: realcmd = cmd = VIDIOC_OVERLAY; break;
-	case VIDIOC_ENUMSTD32: realcmd = VIDIOC_ENUMSTD; break;
-	case VIDIOC_ENUMINPUT32: realcmd = VIDIOC_ENUMINPUT; break;
-	case VIDIOC_S_CTRL32: realcmd = cmd = VIDIOC_S_CTRL; break;
-	case VIDIOC_G_INPUT32: realcmd = cmd = VIDIOC_G_INPUT; break;
-	case VIDIOC_S_INPUT32: realcmd = cmd = VIDIOC_S_INPUT; break;
-	case VIDIOC_TRY_FMT32: realcmd = cmd = VIDIOC_TRY_FMT; break;
-	};
+	case VIDIOC_G_FMT32: cmd = VIDIOC_G_FMT; break;
+	case VIDIOC_S_FMT32: cmd = VIDIOC_S_FMT; break;
+	case VIDIOC_QUERYBUF32: cmd = VIDIOC_QUERYBUF; break;
+	case VIDIOC_G_FBUF32: cmd = VIDIOC_G_FBUF; break;
+	case VIDIOC_S_FBUF32: cmd = VIDIOC_S_FBUF; break;
+	case VIDIOC_QBUF32: cmd = VIDIOC_QBUF; break;
+	case VIDIOC_DQBUF32: cmd = VIDIOC_DQBUF; break;
+	case VIDIOC_ENUMSTD32: cmd = VIDIOC_ENUMSTD; break;
+	case VIDIOC_ENUMINPUT32: cmd = VIDIOC_ENUMINPUT; break;
+	case VIDIOC_TRY_FMT32: cmd = VIDIOC_TRY_FMT; break;
+	case VIDIOC_G_EXT_CTRLS32: cmd = VIDIOC_G_EXT_CTRLS; break;
+	case VIDIOC_S_EXT_CTRLS32: cmd = VIDIOC_S_EXT_CTRLS; break;
+	case VIDIOC_TRY_EXT_CTRLS32: cmd = VIDIOC_TRY_EXT_CTRLS; break;
+	case VIDIOC_OVERLAY32: cmd = VIDIOC_OVERLAY; break;
+	case VIDIOC_OVERLAY32_OLD: cmd = VIDIOC_OVERLAY; break;
+	case VIDIOC_STREAMON32: cmd = VIDIOC_STREAMON; break;
+	case VIDIOC_STREAMOFF32: cmd = VIDIOC_STREAMOFF; break;
+	case VIDIOC_G_INPUT32: cmd = VIDIOC_G_INPUT; break;
+	case VIDIOC_S_INPUT32: cmd = VIDIOC_S_INPUT; break;
+	case VIDIOC_G_OUTPUT32: cmd = VIDIOC_G_OUTPUT; break;
+	case VIDIOC_S_OUTPUT32: cmd = VIDIOC_S_OUTPUT; break;
+	}
 
 	switch (cmd) {
 #ifdef CONFIG_VIDEO_V4L1_COMPAT
@@ -662,7 +768,6 @@ static int do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 	case VIDIOCGTUNER:
 		err = get_video_tuner32(&karg.vt, up);
 		compatible_arg = 0;
-
 		break;
 
 	case VIDIOCSFBUF:
@@ -670,19 +775,42 @@ static int do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 		compatible_arg = 0;
 		break;
 
+	case VIDIOCSWIN:
+		err = get_video_window32(&karg.vw, up);
+		compatible_arg = 0;
+		break;
+
+	case VIDIOCGWIN:
+	case VIDIOCGFBUF:
+	case VIDIOCGFREQ:
+		compatible_arg = 0;
+		break;
+
+	case VIDIOCSMICROCODE:
+		err = get_microcode32(&karg.vc, up);
+		compatible_arg = 0;
+		break;
 
 	case VIDIOCSFREQ:
+		err = get_user(karg.vx, (u32 __user *)up);
+		compatible_arg = 0;
+		break;
+
+	case VIDIOCCAPTURE:
+	case VIDIOCSYNC:
+	case VIDIOCSWRITEMODE:
 #endif
-	case VIDIOC_S_INPUT:
 	case VIDIOC_OVERLAY:
 	case VIDIOC_STREAMON:
 	case VIDIOC_STREAMOFF:
-		err = get_user(karg.vx, (u32 __user *)up);
-		compatible_arg = 1;
+	case VIDIOC_S_INPUT:
+	case VIDIOC_S_OUTPUT:
+		err = get_user(karg.vi, (s32 __user *)up);
+		compatible_arg = 0;
 		break;
 
-	case VIDIOC_S_FBUF:
-		err = get_v4l2_framebuffer32(&karg.v2fb, up);
+	case VIDIOC_G_INPUT:
+	case VIDIOC_G_OUTPUT:
 		compatible_arg = 0;
 		break;
 
@@ -700,122 +828,108 @@ static int do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 		compatible_arg = 0;
 		break;
 
-	case VIDIOC_ENUMSTD:
-		err = get_v4l2_standard(&karg.v2s, up);
+	case VIDIOC_S_FBUF:
+		err = get_v4l2_framebuffer32(&karg.v2fb, up);
 		compatible_arg = 0;
 		break;
 
-	case VIDIOC_ENUMSTD32:
-		err = get_v4l2_standard32(&karg.v2s, up);
+	case VIDIOC_G_FBUF:
 		compatible_arg = 0;
 		break;
 
-	case VIDIOC_ENUMINPUT:
-		err = get_v4l2_input(&karg.v2i, up);
+	case VIDIOC_ENUMSTD:
+		err = get_v4l2_standard32(&karg.v2s, up);
 		compatible_arg = 0;
 		break;
 
-	case VIDIOC_ENUMINPUT32:
+	case VIDIOC_ENUMINPUT:
 		err = get_v4l2_input32(&karg.v2i, up);
 		compatible_arg = 0;
 		break;
 
-	case VIDIOC_G_TUNER:
-	case VIDIOC_S_TUNER:
-		err = get_v4l2_tuner(&karg.v2t, up);
+	case VIDIOC_G_EXT_CTRLS:
+	case VIDIOC_S_EXT_CTRLS:
+	case VIDIOC_TRY_EXT_CTRLS:
+		err = get_v4l2_ext_controls32(&karg.v2ecs, up);
 		compatible_arg = 0;
 		break;
-
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	case VIDIOCGWIN:
-	case VIDIOCGFBUF:
-	case VIDIOCGFREQ:
-#endif
-	case VIDIOC_G_FBUF:
-	case VIDIOC_G_INPUT:
-		compatible_arg = 0;
-		break;
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-	case VIDIOCSMICROCODE:
-		err = microcode32(&karg.vc, up);
-		compatible_arg = 0;
-		break;
-#endif
-	};
+	}
 	if (err)
-		goto out;
+		return err;
 
 	if (compatible_arg)
-		err = native_ioctl(file, realcmd, (unsigned long)up);
+		err = native_ioctl(file, cmd, (unsigned long)up);
 	else {
 		mm_segment_t old_fs = get_fs();
 
 		set_fs(KERNEL_DS);
-		err = native_ioctl(file, realcmd, (unsigned long)&karg);
+		err = native_ioctl(file, cmd, (unsigned long)&karg);
 		set_fs(old_fs);
 	}
-	if (err == 0) {
-		switch (cmd) {
+
+	/* Special case: even after an error we need to put the
+	   results back for these ioctls since the error_idx will
+	   contain information on which control failed. */
+	switch (cmd) {
+	case VIDIOC_G_EXT_CTRLS:
+	case VIDIOC_S_EXT_CTRLS:
+	case VIDIOC_TRY_EXT_CTRLS:
+		if (put_v4l2_ext_controls32(&karg.v2ecs, up))
+			err = -EFAULT;
+		break;
+	}
+	if (err)
+		return err;
+
+	switch (cmd) {
 #ifdef CONFIG_VIDEO_V4L1_COMPAT
-		case VIDIOCGTUNER:
-			err = put_video_tuner32(&karg.vt, up);
-			break;
+	case VIDIOCGTUNER:
+		err = put_video_tuner32(&karg.vt, up);
+		break;
 
-		case VIDIOCGWIN:
-			err = put_video_window32(&karg.vw, up);
-			break;
+	case VIDIOCGWIN:
+		err = put_video_window32(&karg.vw, up);
+		break;
 
-		case VIDIOCGFBUF:
-			err = put_video_buffer32(&karg.vb, up);
-			break;
+	case VIDIOCGFBUF:
+		err = put_video_buffer32(&karg.vb, up);
+		break;
 
+	case VIDIOCGFREQ:
+		err = put_user(((u32)karg.vx), (u32 __user *)up);
+		break;
 #endif
-		case VIDIOC_G_FBUF:
-			err = put_v4l2_framebuffer32(&karg.v2fb, up);
-			break;
-
-		case VIDIOC_G_FMT:
-		case VIDIOC_S_FMT:
-		case VIDIOC_TRY_FMT:
-			err = put_v4l2_format32(&karg.v2f, up);
-			break;
-
-		case VIDIOC_QUERYBUF:
-		case VIDIOC_QBUF:
-		case VIDIOC_DQBUF:
-			err = put_v4l2_buffer32(&karg.v2b, up);
-			break;
-
-		case VIDIOC_ENUMSTD:
-			err = put_v4l2_standard(&karg.v2s, up);
-			break;
-
-		case VIDIOC_ENUMSTD32:
-			err = put_v4l2_standard32(&karg.v2s, up);
-			break;
-
-		case VIDIOC_G_TUNER:
-		case VIDIOC_S_TUNER:
-			err = put_v4l2_tuner(&karg.v2t, up);
-			break;
-
-		case VIDIOC_ENUMINPUT:
-			err = put_v4l2_input(&karg.v2i, up);
-			break;
-
-		case VIDIOC_ENUMINPUT32:
-			err = put_v4l2_input32(&karg.v2i, up);
-			break;
+	case VIDIOC_S_INPUT:
+	case VIDIOC_S_OUTPUT:
+	case VIDIOC_G_INPUT:
+	case VIDIOC_G_OUTPUT:
+		err = put_user(((s32)karg.vi), (s32 __user *)up);
+		break;
 
-#ifdef CONFIG_VIDEO_V4L1_COMPAT
-		case VIDIOCGFREQ:
-#endif
-		case VIDIOC_G_INPUT:
-			err = put_user(((u32)karg.vx), (u32 __user *)up);
-			break;
-		};
+	case VIDIOC_G_FBUF:
+		err = put_v4l2_framebuffer32(&karg.v2fb, up);
+		break;
+
+	case VIDIOC_G_FMT:
+	case VIDIOC_S_FMT:
+	case VIDIOC_TRY_FMT:
+		err = put_v4l2_format32(&karg.v2f, up);
+		break;
+
+	case VIDIOC_QUERYBUF:
+	case VIDIOC_QBUF:
+	case VIDIOC_DQBUF:
+		err = put_v4l2_buffer32(&karg.v2b, up);
+		break;
+
+	case VIDIOC_ENUMSTD:
+		err = put_v4l2_standard32(&karg.v2s, up);
+		break;
+
+	case VIDIOC_ENUMINPUT:
+		err = put_v4l2_input32(&karg.v2i, up);
+		break;
 	}
-out:
 	return err;
 }
 
@@ -828,26 +942,48 @@ long v4l_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 #ifdef CONFIG_VIDEO_V4L1_COMPAT
-	case VIDIOCSWIN32:
-		ret = do_set_window(file, cmd, arg);
-		break;
+	case VIDIOCGCAP:
+	case VIDIOCGCHAN:
+	case VIDIOCSCHAN:
 	case VIDIOCGTUNER32:
 	case VIDIOCSTUNER32:
+	case VIDIOCGPICT:
+	case VIDIOCSPICT:
+	case VIDIOCCAPTURE32:
 	case VIDIOCGWIN32:
+	case VIDIOCSWIN32:
 	case VIDIOCGFBUF32:
 	case VIDIOCSFBUF32:
+	case VIDIOCKEY:
 	case VIDIOCGFREQ32:
 	case VIDIOCSFREQ32:
 	case VIDIOCGAUDIO:
 	case VIDIOCSAUDIO:
+	case VIDIOCSYNC32:
+	case VIDIOCMCAPTURE:
+	case VIDIOCGMBUF:
+	case VIDIOCGUNIT:
+	case VIDIOCGCAPTURE:
+	case VIDIOCSCAPTURE:
+	case VIDIOCSPLAYMODE:
+	case VIDIOCSWRITEMODE32:
+	case VIDIOCGPLAYINFO:
+	case VIDIOCSMICROCODE32:
 	case VIDIOCGVBIFMT:
 	case VIDIOCSVBIFMT:
+#endif
+#ifdef __OLD_VIDIOC_
+	case VIDIOC_OVERLAY32_OLD:
+	case VIDIOC_S_PARM_OLD:
+	case VIDIOC_S_CTRL_OLD:
+	case VIDIOC_G_AUDIO_OLD:
+	case VIDIOC_G_AUDOUT_OLD:
+	case VIDIOC_CROPCAP_OLD:
 #endif
 	case VIDIOC_QUERYCAP:
+	case VIDIOC_RESERVED:
 	case VIDIOC_ENUM_FMT:
 	case VIDIOC_G_FMT32:
-	case VIDIOC_CROPCAP:
-	case VIDIOC_S_CROP:
 	case VIDIOC_S_FMT32:
 	case VIDIOC_REQBUFS:
 	case VIDIOC_QUERYBUF32:
@@ -862,43 +998,56 @@ long v4l_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 	case VIDIOC_S_PARM:
 	case VIDIOC_G_STD:
 	case VIDIOC_S_STD:
-	case VIDIOC_G_TUNER:
-	case VIDIOC_S_TUNER:
-	case VIDIOC_ENUMSTD:
 	case VIDIOC_ENUMSTD32:
-	case VIDIOC_ENUMINPUT:
 	case VIDIOC_ENUMINPUT32:
 	case VIDIOC_G_CTRL:
 	case VIDIOC_S_CTRL:
-	case VIDIOC_S_CTRL32:
-	case VIDIOC_S_FREQUENCY:
-	case VIDIOC_G_FREQUENCY:
+	case VIDIOC_G_TUNER:
+	case VIDIOC_S_TUNER:
+	case VIDIOC_G_AUDIO:
+	case VIDIOC_S_AUDIO:
 	case VIDIOC_QUERYCTRL:
+	case VIDIOC_QUERYMENU:
 	case VIDIOC_G_INPUT32:
 	case VIDIOC_S_INPUT32:
+	case VIDIOC_G_OUTPUT32:
+	case VIDIOC_S_OUTPUT32:
+	case VIDIOC_ENUMOUTPUT:
+	case VIDIOC_G_AUDOUT:
+	case VIDIOC_S_AUDOUT:
+	case VIDIOC_G_MODULATOR:
+	case VIDIOC_S_MODULATOR:
+	case VIDIOC_S_FREQUENCY:
+	case VIDIOC_G_FREQUENCY:
+	case VIDIOC_CROPCAP:
+	case VIDIOC_G_CROP:
+	case VIDIOC_S_CROP:
+	case VIDIOC_G_JPEGCOMP:
+	case VIDIOC_S_JPEGCOMP:
+	case VIDIOC_QUERYSTD:
 	case VIDIOC_TRY_FMT32:
-	case VIDIOC_S_HW_FREQ_SEEK:
+	case VIDIOC_ENUMAUDIO:
+	case VIDIOC_ENUMAUDOUT:
+	case VIDIOC_G_PRIORITY:
+	case VIDIOC_S_PRIORITY:
+	case VIDIOC_G_SLICED_VBI_CAP:
+	case VIDIOC_LOG_STATUS:
+	case VIDIOC_G_EXT_CTRLS32:
+	case VIDIOC_S_EXT_CTRLS32:
+	case VIDIOC_TRY_EXT_CTRLS32:
 	case VIDIOC_ENUM_FRAMESIZES:
 	case VIDIOC_ENUM_FRAMEINTERVALS:
+	case VIDIOC_G_ENC_INDEX:
+	case VIDIOC_ENCODER_CMD:
+	case VIDIOC_TRY_ENCODER_CMD:
+	case VIDIOC_DBG_S_REGISTER:
+	case VIDIOC_DBG_G_REGISTER:
+	case VIDIOC_G_CHIP_IDENT:
+	case VIDIOC_S_HW_FREQ_SEEK:
 		ret = do_video_ioctl(file, cmd, arg);
 		break;
 
 #ifdef CONFIG_VIDEO_V4L1_COMPAT
-	/* Little v, the video4linux ioctls (conflict?) */
-	case VIDIOCGCAP:
-	case VIDIOCGCHAN:
-	case VIDIOCSCHAN:
-	case VIDIOCGPICT:
-	case VIDIOCSPICT:
-	case VIDIOCCAPTURE:
-	case VIDIOCKEY:
-	case VIDIOCSYNC:
-	case VIDIOCMCAPTURE:
-	case VIDIOCGMBUF:
-	case VIDIOCGUNIT:
-	case VIDIOCGCAPTURE:
-	case VIDIOCSCAPTURE:
-
 	/* BTTV specific... */
 	case _IOW('v',  BASE_VIDIOCPRIVATE+0, char [256]):
 	case _IOR('v',  BASE_VIDIOCPRIVATE+1, char [256]):
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 6e6743bd0f92..e2cfd6add78c 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1465,6 +1465,8 @@ struct v4l2_chip_ident {
 #define VIDIOC_G_CHIP_IDENT     _IOWR('V', 81, struct v4l2_chip_ident)
 #endif
 #define VIDIOC_S_HW_FREQ_SEEK	 _IOW('V', 82, struct v4l2_hw_freq_seek)
+/* Reminder: when adding new ioctls please add support for them to
+   drivers/media/video/v4l2-compat-ioctl32.c as well! */
 
 #ifdef __OLD_VIDIOC_
 /* for compatibility, will go away some day */
-- 
cgit v1.2.3


From 4b00eb25340c1a9b9eedaf0bc5b0f0d18eddb028 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 19 Dec 2008 11:17:56 -0300
Subject: V4L/DVB (9944): videodev2.h: fix typo.

The comment said CX2584X instead of CX2341X.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index e2cfd6add78c..754c8d9685a4 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1051,7 +1051,7 @@ enum v4l2_mpeg_video_bitrate_mode {
 #define V4L2_CID_MPEG_VIDEO_MUTE 		(V4L2_CID_MPEG_BASE+210)
 #define V4L2_CID_MPEG_VIDEO_MUTE_YUV 		(V4L2_CID_MPEG_BASE+211)
 
-/*  MPEG-class control IDs specific to the CX2584x driver as defined by V4L2 */
+/*  MPEG-class control IDs specific to the CX2341x driver as defined by V4L2 */
 #define V4L2_CID_MPEG_CX2341X_BASE 				(V4L2_CTRL_CLASS_MPEG | 0x1000)
 #define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE 	(V4L2_CID_MPEG_CX2341X_BASE+0)
 enum v4l2_mpeg_cx2341x_video_spatial_filter_mode {
-- 
cgit v1.2.3


From 531c98e71805b32e9ea35a218119100bbd2b7615 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 22 Dec 2008 13:18:27 -0300
Subject: V4L/DVB (9953): em28xx: Add suport for debugging AC97 anciliary chips

The em28xx driver can be coupled to an anciliary AC97 chip. This patch
allows read/write AC97 registers directly.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/em28xx/em28xx-core.c  |  4 ++--
 drivers/media/video/em28xx/em28xx-video.c | 19 +++++++++++++++++++
 drivers/media/video/em28xx/em28xx.h       |  3 +++
 include/linux/videodev2.h                 |  1 +
 4 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/video/em28xx/em28xx-core.c b/drivers/media/video/em28xx/em28xx-core.c
index c647dcb20a57..5964998daf2e 100644
--- a/drivers/media/video/em28xx/em28xx-core.c
+++ b/drivers/media/video/em28xx/em28xx-core.c
@@ -259,7 +259,7 @@ static int em28xx_is_ac97_ready(struct em28xx *dev)
  * em28xx_read_ac97()
  * write a 16 bit value to the specified AC97 address (LSB first!)
  */
-static int em28xx_read_ac97(struct em28xx *dev, u8 reg)
+int em28xx_read_ac97(struct em28xx *dev, u8 reg)
 {
 	int ret;
 	u8 addr = (reg & 0x7f) | 0x80;
@@ -285,7 +285,7 @@ static int em28xx_read_ac97(struct em28xx *dev, u8 reg)
  * em28xx_write_ac97()
  * write a 16 bit value to the specified AC97 address (LSB first!)
  */
-static int em28xx_write_ac97(struct em28xx *dev, u8 reg, u16 val)
+int em28xx_write_ac97(struct em28xx *dev, u8 reg, u16 val)
 {
 	int ret;
 	u8 addr = reg & 0x7f;
diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c
index 4701b6589b10..4ea4920c927a 100644
--- a/drivers/media/video/em28xx/em28xx-video.c
+++ b/drivers/media/video/em28xx/em28xx-video.c
@@ -1221,6 +1221,17 @@ static int vidioc_g_register(struct file *file, void *priv,
 	struct em28xx         *dev = fh->dev;
 	int ret;
 
+	if (reg->match_type == V4L2_CHIP_MATCH_AC97) {
+		mutex_lock(&dev->lock);
+		ret = em28xx_read_ac97(dev, reg->reg);
+		mutex_unlock(&dev->lock);
+		if (ret < 0)
+			return ret;
+
+		reg->val = ret;
+		return 0;
+	}
+
 	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
 		return -EINVAL;
 
@@ -1256,6 +1267,14 @@ static int vidioc_s_register(struct file *file, void *priv,
 	__le64 buf;
 	int    rc;
 
+	if (reg->match_type == V4L2_CHIP_MATCH_AC97) {
+		mutex_lock(&dev->lock);
+		rc = em28xx_write_ac97(dev, reg->reg, reg->val);
+		mutex_unlock(&dev->lock);
+
+		return rc;
+	}
+
 	buf = cpu_to_le64(reg->val);
 
 	mutex_lock(&dev->lock);
diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h
index 80d1d55d80f9..100f27819cdc 100644
--- a/drivers/media/video/em28xx/em28xx.h
+++ b/drivers/media/video/em28xx/em28xx.h
@@ -564,6 +564,9 @@ int em28xx_write_regs_req(struct em28xx *dev, u8 req, u16 reg, char *buf,
 int em28xx_write_regs(struct em28xx *dev, u16 reg, char *buf, int len);
 int em28xx_write_reg(struct em28xx *dev, u16 reg, u8 val);
 
+int em28xx_read_ac97(struct em28xx *dev, u8 reg);
+int em28xx_write_ac97(struct em28xx *dev, u8 reg, u16 val);
+
 int em28xx_audio_analog_set(struct em28xx *dev);
 int em28xx_audio_setup(struct em28xx *dev);
 
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 754c8d9685a4..e31144d22237 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1376,6 +1376,7 @@ struct v4l2_streamparm {
 #define V4L2_CHIP_MATCH_HOST       0  /* Match against chip ID on host (0 for the host) */
 #define V4L2_CHIP_MATCH_I2C_DRIVER 1  /* Match against I2C driver ID */
 #define V4L2_CHIP_MATCH_I2C_ADDR   2  /* Match against I2C 7-bit address */
+#define V4L2_CHIP_MATCH_AC97       3  /* Match against anciliary AC97 chip */
 
 struct v4l2_register {
 	__u32 match_type; /* Match type */
-- 
cgit v1.2.3


From 91962fa713bd8bf47434b02ac661fdc201365fa5 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Thu, 18 Dec 2008 11:45:00 -0300
Subject: V4L/DVB (10078): video: add NV16 and NV61 pixel formats

This patch adds support for NV16 and NV61 pixel formats.

These pixel formats use two planes; one for 8-bit Y values and
one for interleaved 8-bit U and V values. NV16/NV61 formats are
very similar to NV12/NV21 with the exception that NV16/NV61 are
using the same number of lines for both planes. The difference
between NV16 and NV61 is the U and V byte order.

The fourcc values are extrapolated from the NV12/NV21 case.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index e31144d22237..1f126e30766c 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -305,6 +305,8 @@ struct v4l2_pix_format {
 /* two planes -- one Y, one Cr + Cb interleaved  */
 #define V4L2_PIX_FMT_NV12    v4l2_fourcc('N', 'V', '1', '2') /* 12  Y/CbCr 4:2:0  */
 #define V4L2_PIX_FMT_NV21    v4l2_fourcc('N', 'V', '2', '1') /* 12  Y/CrCb 4:2:0  */
+#define V4L2_PIX_FMT_NV16    v4l2_fourcc('N', 'V', '1', '6') /* 16  Y/CbCr 4:2:2  */
+#define V4L2_PIX_FMT_NV61    v4l2_fourcc('N', 'V', '6', '1') /* 16  Y/CrCb 4:2:2  */
 
 /*  The following formats are not defined in the V4L2 specification */
 #define V4L2_PIX_FMT_YUV410  v4l2_fourcc('Y', 'U', 'V', '9') /*  9  YUV 4:1:0     */
-- 
cgit v1.2.3


From 1c5745aa380efb6417b5681104b007c8612fb496 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 22 Dec 2008 23:05:28 +0100
Subject: sched_clock: prevent scd->clock from moving backwards, take #2

Redo:

  5b7dba4: sched_clock: prevent scd->clock from moving backwards

which had to be reverted due to s2ram hangs:

  ca7e716: Revert "sched_clock: prevent scd->clock from moving backwards"

... this time with resume restoring GTOD later in the sequence
taken into account as well.

The "timekeeping_suspended" flag is not very nice but we cannot call into
GTOD before it has been properly resumed and the scheduler will run very
early in the resume sequence.

Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/time.h      | 1 +
 kernel/sched_clock.c      | 5 ++++-
 kernel/time/timekeeping.c | 7 +++++--
 3 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index ce321ac5c8f8..fbbd2a1c92ba 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -105,6 +105,7 @@ extern unsigned long read_persistent_clock(void);
 extern int update_persistent_clock(struct timespec now);
 extern int no_sync_cmos_clock __read_mostly;
 void timekeeping_init(void);
+extern int timekeeping_suspended;
 
 unsigned long get_seconds(void);
 struct timespec current_kernel_time(void);
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index e8ab096ddfe3..a0b0852414cc 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -124,7 +124,7 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
 
 	clock = scd->tick_gtod + delta;
 	min_clock = wrap_max(scd->tick_gtod, scd->clock);
-	max_clock = scd->tick_gtod + TICK_NSEC;
+	max_clock = wrap_max(scd->clock, scd->tick_gtod + TICK_NSEC);
 
 	clock = wrap_max(clock, min_clock);
 	clock = wrap_min(clock, max_clock);
@@ -227,6 +227,9 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
  */
 void sched_clock_idle_wakeup_event(u64 delta_ns)
 {
+	if (timekeeping_suspended)
+		return;
+
 	sched_clock_tick();
 	touch_softlockup_watchdog();
 }
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index fa05e88aa76f..900f1b6598d1 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -46,6 +46,9 @@ struct timespec xtime __attribute__ ((aligned (16)));
 struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
 static unsigned long total_sleep_time;		/* seconds */
 
+/* flag for if timekeeping is suspended */
+int __read_mostly timekeeping_suspended;
+
 static struct timespec xtime_cache __attribute__ ((aligned (16)));
 void update_xtime_cache(u64 nsec)
 {
@@ -92,6 +95,8 @@ void getnstimeofday(struct timespec *ts)
 	unsigned long seq;
 	s64 nsecs;
 
+	WARN_ON(timekeeping_suspended);
+
 	do {
 		seq = read_seqbegin(&xtime_lock);
 
@@ -299,8 +304,6 @@ void __init timekeeping_init(void)
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 }
 
-/* flag for if timekeeping is suspended */
-static int timekeeping_suspended;
 /* time in seconds when suspend began */
 static unsigned long timekeeping_suspend_time;
 
-- 
cgit v1.2.3


From 457533a7d3402d1d91fbc125c8bd1bd16dcd3cd4 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 31 Dec 2008 15:11:37 +0100
Subject: [PATCH] fix scaled & unscaled cputime accounting

The utimescaled / stimescaled fields in the task structure and the
global cpustat should be set on all architectures. On s390 the calls
to account_user_time_scaled and account_system_time_scaled never have
been added. In addition system time that is accounted as guest time
to the user time of a process is accounted to the scaled system time
instead of the scaled user time.
To fix the bugs and to prevent future forgetfulness this patch merges
account_system_time_scaled into account_system_time and
account_user_time_scaled into account_user_time.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Michael Neuling <mikey@neuling.org>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/ia64/kernel/time.c     | 12 ++++--------
 arch/powerpc/kernel/time.c  |  7 ++-----
 arch/s390/kernel/vtime.c    | 10 +++++-----
 include/linux/kernel_stat.h |  6 ++----
 kernel/sched.c              | 41 ++++++++++++++++-------------------------
 kernel/time/tick-sched.c    |  5 +++--
 kernel/timer.c              | 12 +++++-------
 7 files changed, 37 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 65c10a42c88f..4ee367817049 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -93,13 +93,11 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
 	now = ia64_get_itc();
 
 	delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
-	account_system_time(prev, 0, delta_stime);
-	account_system_time_scaled(prev, delta_stime);
+	account_system_time(prev, 0, delta_stime, delta_stime);
 
 	if (pi->ac_utime) {
 		delta_utime = cycle_to_cputime(pi->ac_utime);
-		account_user_time(prev, delta_utime);
-		account_user_time_scaled(prev, delta_utime);
+		account_user_time(prev, delta_utime, delta_utime);
 	}
 
 	pi->ac_stamp = ni->ac_stamp = now;
@@ -122,8 +120,7 @@ void account_system_vtime(struct task_struct *tsk)
 	now = ia64_get_itc();
 
 	delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
-	account_system_time(tsk, 0, delta_stime);
-	account_system_time_scaled(tsk, delta_stime);
+	account_system_time(tsk, 0, delta_stime, delta_stime);
 	ti->ac_stime = 0;
 
 	ti->ac_stamp = now;
@@ -143,8 +140,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
 
 	if (ti->ac_utime) {
 		delta_utime = cycle_to_cputime(ti->ac_utime);
-		account_user_time(p, delta_utime);
-		account_user_time_scaled(p, delta_utime);
+		account_user_time(p, delta_utime, delta_utime);
 		ti->ac_utime = 0;
 	}
 }
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e1f3a5140429..92650ccad2e1 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -256,8 +256,7 @@ void account_system_vtime(struct task_struct *tsk)
 		delta += sys_time;
 		get_paca()->system_time = 0;
 	}
-	account_system_time(tsk, 0, delta);
-	account_system_time_scaled(tsk, deltascaled);
+	account_system_time(tsk, 0, delta, deltascaled);
 	per_cpu(cputime_last_delta, smp_processor_id()) = delta;
 	per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled;
 	local_irq_restore(flags);
@@ -275,10 +274,8 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
 
 	utime = get_paca()->user_time;
 	get_paca()->user_time = 0;
-	account_user_time(tsk, utime);
-
 	utimescaled = cputime_to_scaled(utime);
-	account_user_time_scaled(tsk, utimescaled);
+	account_user_time(tsk, utime, utimescaled);
 }
 
 /*
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 75a6e62ea973..07283aea2e56 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -50,12 +50,12 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
 	rcu_user_flag = cputime != 0;
 	S390_lowcore.user_timer -= cputime << 12;
 	S390_lowcore.steal_clock -= cputime << 12;
-	account_user_time(tsk, cputime);
+	account_user_time(tsk, cputime, cputime);
 
 	cputime =  S390_lowcore.system_timer >> 12;
 	S390_lowcore.system_timer -= cputime << 12;
 	S390_lowcore.steal_clock -= cputime << 12;
-	account_system_time(tsk, HARDIRQ_OFFSET, cputime);
+	account_system_time(tsk, HARDIRQ_OFFSET, cputime, cputime);
 
 	cputime = S390_lowcore.steal_clock;
 	if ((__s64) cputime > 0) {
@@ -82,12 +82,12 @@ void account_vtime(struct task_struct *tsk)
 	cputime = S390_lowcore.user_timer >> 12;
 	S390_lowcore.user_timer -= cputime << 12;
 	S390_lowcore.steal_clock -= cputime << 12;
-	account_user_time(tsk, cputime);
+	account_user_time(tsk, cputime, cputime);
 
 	cputime =  S390_lowcore.system_timer >> 12;
 	S390_lowcore.system_timer -= cputime << 12;
 	S390_lowcore.steal_clock -= cputime << 12;
-	account_system_time(tsk, 0, cputime);
+	account_system_time(tsk, 0, cputime, cputime);
 }
 
 /*
@@ -107,7 +107,7 @@ void account_system_vtime(struct task_struct *tsk)
 	cputime =  S390_lowcore.system_timer >> 12;
 	S390_lowcore.system_timer -= cputime << 12;
 	S390_lowcore.steal_clock -= cputime << 12;
-	account_system_time(tsk, 0, cputime);
+	account_system_time(tsk, 0, cputime, cputime);
 }
 EXPORT_SYMBOL_GPL(account_system_vtime);
 
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 4ee4b3d2316f..c78a459662a6 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -79,10 +79,8 @@ static inline unsigned int kstat_irqs(unsigned int irq)
 }
 
 extern unsigned long long task_delta_exec(struct task_struct *);
-extern void account_user_time(struct task_struct *, cputime_t);
-extern void account_user_time_scaled(struct task_struct *, cputime_t);
-extern void account_system_time(struct task_struct *, int, cputime_t);
-extern void account_system_time_scaled(struct task_struct *, cputime_t);
+extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
+extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
 extern void account_steal_time(struct task_struct *, cputime_t);
 
 #endif /* _LINUX_KERNEL_STAT_H */
diff --git a/kernel/sched.c b/kernel/sched.c
index fff1c4a20b65..5b03679ff712 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4080,13 +4080,17 @@ unsigned long long task_delta_exec(struct task_struct *p)
  * Account user cpu time to a process.
  * @p: the process that the cpu time gets accounted to
  * @cputime: the cpu time spent in user space since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
  */
-void account_user_time(struct task_struct *p, cputime_t cputime)
+void account_user_time(struct task_struct *p, cputime_t cputime,
+		       cputime_t cputime_scaled)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
 	cputime64_t tmp;
 
+	/* Add user time to process. */
 	p->utime = cputime_add(p->utime, cputime);
+	p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
 	account_group_user_time(p, cputime);
 
 	/* Add user time to cpustat. */
@@ -4103,51 +4107,49 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
  * Account guest cpu time to a process.
  * @p: the process that the cpu time gets accounted to
  * @cputime: the cpu time spent in virtual machine since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
  */
-static void account_guest_time(struct task_struct *p, cputime_t cputime)
+static void account_guest_time(struct task_struct *p, cputime_t cputime,
+			       cputime_t cputime_scaled)
 {
 	cputime64_t tmp;
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
 
 	tmp = cputime_to_cputime64(cputime);
 
+	/* Add guest time to process. */
 	p->utime = cputime_add(p->utime, cputime);
+	p->utimescaled = cputime_add(p->utimescaled, cputime_scaled);
 	account_group_user_time(p, cputime);
 	p->gtime = cputime_add(p->gtime, cputime);
 
+	/* Add guest time to cpustat. */
 	cpustat->user = cputime64_add(cpustat->user, tmp);
 	cpustat->guest = cputime64_add(cpustat->guest, tmp);
 }
 
-/*
- * Account scaled user cpu time to a process.
- * @p: the process that the cpu time gets accounted to
- * @cputime: the cpu time spent in user space since the last update
- */
-void account_user_time_scaled(struct task_struct *p, cputime_t cputime)
-{
-	p->utimescaled = cputime_add(p->utimescaled, cputime);
-}
-
 /*
  * Account system cpu time to a process.
  * @p: the process that the cpu time gets accounted to
  * @hardirq_offset: the offset to subtract from hardirq_count()
  * @cputime: the cpu time spent in kernel space since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
  */
 void account_system_time(struct task_struct *p, int hardirq_offset,
-			 cputime_t cputime)
+			 cputime_t cputime, cputime_t cputime_scaled)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
 	struct rq *rq = this_rq();
 	cputime64_t tmp;
 
 	if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
-		account_guest_time(p, cputime);
+		account_guest_time(p, cputime, cputime_scaled);
 		return;
 	}
 
+	/* Add system time to process. */
 	p->stime = cputime_add(p->stime, cputime);
+	p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
 	account_group_system_time(p, cputime);
 
 	/* Add system time to cpustat. */
@@ -4166,17 +4168,6 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 	acct_update_integrals(p);
 }
 
-/*
- * Account scaled system cpu time to a process.
- * @p: the process that the cpu time gets accounted to
- * @hardirq_offset: the offset to subtract from hardirq_count()
- * @cputime: the cpu time spent in kernel space since the last update
- */
-void account_system_time_scaled(struct task_struct *p, cputime_t cputime)
-{
-	p->stimescaled = cputime_add(p->stimescaled, cputime);
-}
-
 /*
  * Account for involuntary wait time.
  * @p: the process from which the cpu time has been stolen
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 8f3fc2582d38..1f2fce2479fe 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -420,6 +420,7 @@ void tick_nohz_restart_sched_tick(void)
 	int cpu = smp_processor_id();
 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
 	unsigned long ticks;
+	cputime_t cputime;
 	ktime_t now;
 
 	local_irq_disable();
@@ -452,8 +453,8 @@ void tick_nohz_restart_sched_tick(void)
 	 */
 	if (ticks && ticks < LONG_MAX) {
 		add_preempt_count(HARDIRQ_OFFSET);
-		account_system_time(current, HARDIRQ_OFFSET,
-				    jiffies_to_cputime(ticks));
+		cputime = jiffies_to_cputime(ticks);
+		account_system_time(current, HARDIRQ_OFFSET, cputime, cputime);
 		sub_preempt_count(HARDIRQ_OFFSET);
 	}
 
diff --git a/kernel/timer.c b/kernel/timer.c
index 566257d1dc10..b5efb528aa1d 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1023,13 +1023,11 @@ void account_process_tick(struct task_struct *p, int user_tick)
 {
 	cputime_t one_jiffy = jiffies_to_cputime(1);
 
-	if (user_tick) {
-		account_user_time(p, one_jiffy);
-		account_user_time_scaled(p, cputime_to_scaled(one_jiffy));
-	} else {
-		account_system_time(p, HARDIRQ_OFFSET, one_jiffy);
-		account_system_time_scaled(p, cputime_to_scaled(one_jiffy));
-	}
+	if (user_tick)
+		account_user_time(p, one_jiffy, cputime_to_scaled(one_jiffy));
+	else
+		account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
+				    cputime_to_scaled(one_jiffy));
 }
 #endif
 
-- 
cgit v1.2.3


From 79741dd35713ff4f6fd0eafd59fa94e8a4ba922d Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 31 Dec 2008 15:11:38 +0100
Subject: [PATCH] idle cputime accounting

The cpu time spent by the idle process actually doing something is
currently accounted as idle time. This is plain wrong, the architectures
that support VIRT_CPU_ACCOUNTING=y can do better: distinguish between the
time spent doing nothing and the time spent by idle doing work. The first
is accounted with account_idle_time and the second with account_system_time.
The architectures that use the account_xxx_time interface directly and not
the account_xxx_ticks interface now need to do the check for the idle
process in their arch code. In particular to improve the system vs true
idle time accounting the arch code needs to measure the true idle time
instead of just testing for the idle process.
To improve the tick based accounting as well we would need an architecture
primitive that can tell us if the pt_regs of the interrupted context
points to the magic instruction that halts the cpu.

In addition idle time is no more added to the stime of the idle process.
This field now contains the system time of the idle process as it should
be. On systems without VIRT_CPU_ACCOUNTING this will always be zero as
every tick that occurs while idle is running will be accounted as idle
time.

This patch contains the necessary common code changes to be able to
distinguish idle system time and true idle time. The architectures with
support for VIRT_CPU_ACCOUNTING need some changes to exploit this.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/ia64/kernel/time.c       | 10 ++++--
 arch/powerpc/kernel/process.c |  1 +
 arch/powerpc/kernel/time.c    | 13 +++++--
 arch/s390/kernel/vtime.c      | 20 ++++++++---
 arch/x86/xen/time.c           | 10 +++---
 include/linux/kernel_stat.h   |  7 +++-
 include/linux/sched.h         |  1 -
 kernel/sched.c                | 80 ++++++++++++++++++++++++++++++++++---------
 kernel/time/tick-sched.c      | 13 ++++---
 kernel/timer.c                | 13 -------
 10 files changed, 114 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 4ee367817049..f0ebb342409d 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -93,7 +93,10 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
 	now = ia64_get_itc();
 
 	delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
-	account_system_time(prev, 0, delta_stime, delta_stime);
+	if (idle_task(smp_processor_id()) != prev)
+		account_system_time(prev, 0, delta_stime, delta_stime);
+	else
+		account_idle_time(delta_stime);
 
 	if (pi->ac_utime) {
 		delta_utime = cycle_to_cputime(pi->ac_utime);
@@ -120,7 +123,10 @@ void account_system_vtime(struct task_struct *tsk)
 	now = ia64_get_itc();
 
 	delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
-	account_system_time(tsk, 0, delta_stime, delta_stime);
+	if (irq_count() || idle_task(smp_processor_id()) != tsk)
+		account_system_time(tsk, 0, delta_stime, delta_stime);
+	else
+		account_idle_time(delta_stime);
 	ti->ac_stime = 0;
 
 	ti->ac_stamp = now;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 51b201ddf9a1..fb7049c054c0 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -33,6 +33,7 @@
 #include <linux/mqueue.h>
 #include <linux/hardirq.h>
 #include <linux/utsname.h>
+#include <linux/kernel_stat.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 92650ccad2e1..3be355c1cfa7 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -256,7 +256,10 @@ void account_system_vtime(struct task_struct *tsk)
 		delta += sys_time;
 		get_paca()->system_time = 0;
 	}
-	account_system_time(tsk, 0, delta, deltascaled);
+	if (in_irq() || idle_task(smp_processor_id()) != tsk)
+		account_system_time(tsk, 0, delta, deltascaled);
+	else
+		account_idle_time(delta);
 	per_cpu(cputime_last_delta, smp_processor_id()) = delta;
 	per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled;
 	local_irq_restore(flags);
@@ -335,8 +338,12 @@ void calculate_steal_time(void)
 	tb = mftb();
 	purr = mfspr(SPRN_PURR);
 	stolen = (tb - pme->tb) - (purr - pme->purr);
-	if (stolen > 0)
-		account_steal_time(current, stolen);
+	if (stolen > 0) {
+		if (idle_task(smp_processor_id()) != current)
+			account_steal_time(stolen);
+		else
+			account_idle_time(stolen);
+	}
 	pme->tb = tb;
 	pme->purr = purr;
 }
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 07283aea2e56..4a4a34caec55 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -55,13 +55,19 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
 	cputime =  S390_lowcore.system_timer >> 12;
 	S390_lowcore.system_timer -= cputime << 12;
 	S390_lowcore.steal_clock -= cputime << 12;
-	account_system_time(tsk, HARDIRQ_OFFSET, cputime, cputime);
+	if (idle_task(smp_processor_id()) != current)
+		account_system_time(tsk, HARDIRQ_OFFSET, cputime, cputime);
+	else
+		account_idle_time(cputime);
 
 	cputime = S390_lowcore.steal_clock;
 	if ((__s64) cputime > 0) {
 		cputime >>= 12;
 		S390_lowcore.steal_clock -= cputime << 12;
-		account_steal_time(tsk, cputime);
+		if (idle_task(smp_processor_id()) != current)
+			account_steal_time(cputime);
+		else
+			account_idle_time(cputime);
 	}
 }
 
@@ -87,7 +93,10 @@ void account_vtime(struct task_struct *tsk)
 	cputime =  S390_lowcore.system_timer >> 12;
 	S390_lowcore.system_timer -= cputime << 12;
 	S390_lowcore.steal_clock -= cputime << 12;
-	account_system_time(tsk, 0, cputime, cputime);
+	if (idle_task(smp_processor_id()) != current)
+		account_system_time(tsk, 0, cputime, cputime);
+	else
+		account_idle_time(cputime);
 }
 
 /*
@@ -107,7 +116,10 @@ void account_system_vtime(struct task_struct *tsk)
 	cputime =  S390_lowcore.system_timer >> 12;
 	S390_lowcore.system_timer -= cputime << 12;
 	S390_lowcore.steal_clock -= cputime << 12;
-	account_system_time(tsk, 0, cputime, cputime);
+	if (in_irq() || idle_task(smp_processor_id()) != current)
+		account_system_time(tsk, 0, cputime, cputime);
+	else
+		account_idle_time(cputime);
 }
 EXPORT_SYMBOL_GPL(account_system_vtime);
 
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c9f7cda48ed7..732e52dc991a 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -132,8 +132,7 @@ static void do_stolen_accounting(void)
 	*snap = state;
 
 	/* Add the appropriate number of ticks of stolen time,
-	   including any left-overs from last time.  Passing NULL to
-	   account_steal_time accounts the time as stolen. */
+	   including any left-overs from last time. */
 	stolen = runnable + offline + __get_cpu_var(residual_stolen);
 
 	if (stolen < 0)
@@ -141,11 +140,10 @@ static void do_stolen_accounting(void)
 
 	ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
 	__get_cpu_var(residual_stolen) = stolen;
-	account_steal_time(NULL, ticks);
+	account_steal_ticks(ticks);
 
 	/* Add the appropriate number of ticks of blocked time,
-	   including any left-overs from last time.  Passing idle to
-	   account_steal_time accounts the time as idle/wait. */
+	   including any left-overs from last time. */
 	blocked += __get_cpu_var(residual_blocked);
 
 	if (blocked < 0)
@@ -153,7 +151,7 @@ static void do_stolen_accounting(void)
 
 	ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
 	__get_cpu_var(residual_blocked) = blocked;
-	account_steal_time(idle_task(smp_processor_id()), ticks);
+	account_idle_ticks(ticks);
 }
 
 /*
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index c78a459662a6..570d20413119 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -81,6 +81,11 @@ static inline unsigned int kstat_irqs(unsigned int irq)
 extern unsigned long long task_delta_exec(struct task_struct *);
 extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
 extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
-extern void account_steal_time(struct task_struct *, cputime_t);
+extern void account_steal_time(cputime_t);
+extern void account_idle_time(cputime_t);
+
+extern void account_process_tick(struct task_struct *, int user);
+extern void account_steal_ticks(unsigned long ticks);
+extern void account_idle_ticks(unsigned long ticks);
 
 #endif /* _LINUX_KERNEL_STAT_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8395e715809d..b475d4db8053 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -284,7 +284,6 @@ long io_schedule_timeout(long timeout);
 
 extern void cpu_init (void);
 extern void trap_init(void);
-extern void account_process_tick(struct task_struct *task, int user);
 extern void update_process_times(int user);
 extern void scheduler_tick(void);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 5b03679ff712..635eaffe1e4c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4139,7 +4139,6 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 			 cputime_t cputime, cputime_t cputime_scaled)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-	struct rq *rq = this_rq();
 	cputime64_t tmp;
 
 	if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
@@ -4158,37 +4157,84 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 		cpustat->irq = cputime64_add(cpustat->irq, tmp);
 	else if (softirq_count())
 		cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
-	else if (p != rq->idle)
-		cpustat->system = cputime64_add(cpustat->system, tmp);
-	else if (atomic_read(&rq->nr_iowait) > 0)
-		cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
 	else
-		cpustat->idle = cputime64_add(cpustat->idle, tmp);
+		cpustat->system = cputime64_add(cpustat->system, tmp);
+
 	/* Account for system time used */
 	acct_update_integrals(p);
 }
 
 /*
  * Account for involuntary wait time.
- * @p: the process from which the cpu time has been stolen
  * @steal: the cpu time spent in involuntary wait
  */
-void account_steal_time(struct task_struct *p, cputime_t steal)
+void account_steal_time(cputime_t cputime)
+{
+	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+	cputime64_t cputime64 = cputime_to_cputime64(cputime);
+
+	cpustat->steal = cputime64_add(cpustat->steal, cputime64);
+}
+
+/*
+ * Account for idle time.
+ * @cputime: the cpu time spent in idle wait
+ */
+void account_idle_time(cputime_t cputime)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-	cputime64_t tmp = cputime_to_cputime64(steal);
+	cputime64_t cputime64 = cputime_to_cputime64(cputime);
 	struct rq *rq = this_rq();
 
-	if (p == rq->idle) {
-		p->stime = cputime_add(p->stime, steal);
-		if (atomic_read(&rq->nr_iowait) > 0)
-			cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
-		else
-			cpustat->idle = cputime64_add(cpustat->idle, tmp);
-	} else
-		cpustat->steal = cputime64_add(cpustat->steal, tmp);
+	if (atomic_read(&rq->nr_iowait) > 0)
+		cpustat->iowait = cputime64_add(cpustat->iowait, cputime64);
+	else
+		cpustat->idle = cputime64_add(cpustat->idle, cputime64);
+}
+
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+
+/*
+ * Account a single tick of cpu time.
+ * @p: the process that the cpu time gets accounted to
+ * @user_tick: indicates if the tick is a user or a system tick
+ */
+void account_process_tick(struct task_struct *p, int user_tick)
+{
+	cputime_t one_jiffy = jiffies_to_cputime(1);
+	cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy);
+	struct rq *rq = this_rq();
+
+	if (user_tick)
+		account_user_time(p, one_jiffy, one_jiffy_scaled);
+	else if (p != rq->idle)
+		account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
+				    one_jiffy_scaled);
+	else
+		account_idle_time(one_jiffy);
+}
+
+/*
+ * Account multiple ticks of steal time.
+ * @p: the process from which the cpu time has been stolen
+ * @ticks: number of stolen ticks
+ */
+void account_steal_ticks(unsigned long ticks)
+{
+	account_steal_time(jiffies_to_cputime(ticks));
+}
+
+/*
+ * Account multiple ticks of idle time.
+ * @ticks: number of stolen ticks
+ */
+void account_idle_ticks(unsigned long ticks)
+{
+	account_idle_time(jiffies_to_cputime(ticks));
 }
 
+#endif
+
 /*
  * Use precise platform statistics if available:
  */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1f2fce2479fe..611fa4c0baab 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -419,8 +419,9 @@ void tick_nohz_restart_sched_tick(void)
 {
 	int cpu = smp_processor_id();
 	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	unsigned long ticks;
-	cputime_t cputime;
+#endif
 	ktime_t now;
 
 	local_irq_disable();
@@ -442,6 +443,7 @@ void tick_nohz_restart_sched_tick(void)
 	tick_do_update_jiffies64(now);
 	cpu_clear(cpu, nohz_cpu_mask);
 
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	/*
 	 * We stopped the tick in idle. Update process times would miss the
 	 * time we slept as update_process_times does only a 1 tick
@@ -451,12 +453,9 @@ void tick_nohz_restart_sched_tick(void)
 	/*
 	 * We might be one off. Do not randomly account a huge number of ticks!
 	 */
-	if (ticks && ticks < LONG_MAX) {
-		add_preempt_count(HARDIRQ_OFFSET);
-		cputime = jiffies_to_cputime(ticks);
-		account_system_time(current, HARDIRQ_OFFSET, cputime, cputime);
-		sub_preempt_count(HARDIRQ_OFFSET);
-	}
+	if (ticks && ticks < LONG_MAX)
+		account_idle_ticks(ticks);
+#endif
 
 	touch_softlockup_watchdog();
 	/*
diff --git a/kernel/timer.c b/kernel/timer.c
index b5efb528aa1d..dee3f641a7a7 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1018,19 +1018,6 @@ unsigned long get_next_timer_interrupt(unsigned long now)
 }
 #endif
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
-void account_process_tick(struct task_struct *p, int user_tick)
-{
-	cputime_t one_jiffy = jiffies_to_cputime(1);
-
-	if (user_tick)
-		account_user_time(p, one_jiffy, cputime_to_scaled(one_jiffy));
-	else
-		account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
-				    cputime_to_scaled(one_jiffy));
-}
-#endif
-
 /*
  * Called from the timer interrupt handler to charge one tick to the current
  * process.  user_tick is 1 if the tick is user time, 0 for system.
-- 
cgit v1.2.3


From c4abb7c9cde24b7351a47328ef866e6a2bbb1ad0 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 26 Sep 2008 09:30:55 +0200
Subject: KVM: x86: Support for user space injected NMIs

Introduces the KVM_NMI IOCTL to the generic x86 part of KVM for
injecting NMIs from user space and also extends the statistic report
accordingly.

Based on the original patch by Sheng Yang.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/x86.c              | 46 +++++++++++++++++++++++++++++++++++++++--
 include/linux/kvm.h             | 11 ++++++++--
 3 files changed, 55 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bfbbdea869bf..a40fa8478920 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -398,6 +398,7 @@ struct kvm_vcpu_stat {
 	u32 halt_exits;
 	u32 halt_wakeup;
 	u32 request_irq_exits;
+	u32 request_nmi_exits;
 	u32 irq_exits;
 	u32 host_state_reload;
 	u32 efer_reload;
@@ -406,6 +407,7 @@ struct kvm_vcpu_stat {
 	u32 insn_emulation_fail;
 	u32 hypercalls;
 	u32 irq_injections;
+	u32 nmi_injections;
 };
 
 struct descriptor_table {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1fa9a6db633d..07971451b947 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -86,6 +86,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "hypercalls", VCPU_STAT(hypercalls) },
 	{ "request_irq", VCPU_STAT(request_irq_exits) },
+	{ "request_nmi", VCPU_STAT(request_nmi_exits) },
 	{ "irq_exits", VCPU_STAT(irq_exits) },
 	{ "host_state_reload", VCPU_STAT(host_state_reload) },
 	{ "efer_reload", VCPU_STAT(efer_reload) },
@@ -93,6 +94,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "insn_emulation", VCPU_STAT(insn_emulation) },
 	{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
 	{ "irq_injections", VCPU_STAT(irq_injections) },
+	{ "nmi_injections", VCPU_STAT(nmi_injections) },
 	{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
 	{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
 	{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -1318,6 +1320,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
+{
+	vcpu_load(vcpu);
+	kvm_inject_nmi(vcpu);
+	vcpu_put(vcpu);
+
+	return 0;
+}
+
 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
 					   struct kvm_tpr_access_ctl *tac)
 {
@@ -1377,6 +1388,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_NMI: {
+		r = kvm_vcpu_ioctl_nmi(vcpu);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
 	case KVM_SET_CPUID: {
 		struct kvm_cpuid __user *cpuid_arg = argp;
 		struct kvm_cpuid cpuid;
@@ -2812,18 +2830,37 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 		(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
 }
 
+/*
+ * Check if userspace requested a NMI window, and that the NMI window
+ * is open.
+ *
+ * No need to exit to userspace if we already have a NMI queued.
+ */
+static int dm_request_for_nmi_injection(struct kvm_vcpu *vcpu,
+					struct kvm_run *kvm_run)
+{
+	return (!vcpu->arch.nmi_pending &&
+		kvm_run->request_nmi_window &&
+		vcpu->arch.nmi_window_open);
+}
+
 static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 			      struct kvm_run *kvm_run)
 {
 	kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = kvm_get_cr8(vcpu);
 	kvm_run->apic_base = kvm_get_apic_base(vcpu);
-	if (irqchip_in_kernel(vcpu->kvm))
+	if (irqchip_in_kernel(vcpu->kvm)) {
 		kvm_run->ready_for_interrupt_injection = 1;
-	else
+		kvm_run->ready_for_nmi_injection = 1;
+	} else {
 		kvm_run->ready_for_interrupt_injection =
 					(vcpu->arch.interrupt_window_open &&
 					 vcpu->arch.irq_summary == 0);
+		kvm_run->ready_for_nmi_injection =
+					(vcpu->arch.nmi_window_open &&
+					 vcpu->arch.nmi_pending == 0);
+	}
 }
 
 static void vapic_enter(struct kvm_vcpu *vcpu)
@@ -2999,6 +3036,11 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 
 		if (r > 0) {
+			if (dm_request_for_nmi_injection(vcpu, kvm_run)) {
+				r = -EINTR;
+				kvm_run->exit_reason = KVM_EXIT_NMI;
+				++vcpu->stat.request_nmi_exits;
+			}
 			if (dm_request_for_irq_injection(vcpu, kvm_run)) {
 				r = -EINTR;
 				kvm_run->exit_reason = KVM_EXIT_INTR;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index f18b86fa8655..44fd7fa0af2b 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -83,18 +83,22 @@ struct kvm_irqchip {
 #define KVM_EXIT_S390_SIEIC       13
 #define KVM_EXIT_S390_RESET       14
 #define KVM_EXIT_DCR              15
+#define KVM_EXIT_NMI              16
+#define KVM_EXIT_NMI_WINDOW_OPEN  17
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
 	/* in */
 	__u8 request_interrupt_window;
-	__u8 padding1[7];
+	__u8 request_nmi_window;
+	__u8 padding1[6];
 
 	/* out */
 	__u32 exit_reason;
 	__u8 ready_for_interrupt_injection;
 	__u8 if_flag;
-	__u8 padding2[2];
+	__u8 ready_for_nmi_injection;
+	__u8 padding2;
 
 	/* in (pre_kvm_run), out (post_kvm_run) */
 	__u64 cr8;
@@ -387,6 +391,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
 #endif
 #define KVM_CAP_IOMMU 18
+#define KVM_CAP_NMI 19
 
 /*
  * ioctls for VM fds
@@ -458,6 +463,8 @@ struct kvm_trace_rec {
 #define KVM_S390_INITIAL_RESET    _IO(KVMIO,  0x97)
 #define KVM_GET_MP_STATE          _IOR(KVMIO,  0x98, struct kvm_mp_state)
 #define KVM_SET_MP_STATE          _IOW(KVMIO,  0x99, struct kvm_mp_state)
+/* Available with KVM_CAP_NMI */
+#define KVM_NMI                   _IO(KVMIO,  0x9a)
 
 #define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
 #define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
-- 
cgit v1.2.3


From e19e30effac03f5a005a8e42ed941a2a5dc62654 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Mon, 20 Oct 2008 16:07:10 +0800
Subject: KVM: IRQ ACK notifier should be used with in-kernel irqchip

Also remove unnecessary parameter of unregister irq ack notifier.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 3 +--
 virt/kvm/irq_comm.c      | 8 ++++++--
 virt/kvm/kvm_main.c      | 2 +-
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bb92be2153bc..3a0fb77d1f6a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -316,8 +316,7 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-				     struct kvm_irq_ack_notifier *kian);
+void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 55ad76ee2d09..9fbbdea3d1d5 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -58,12 +58,16 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi)
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian)
 {
+	/* Must be called with in-kernel IRQ chip, otherwise it's nonsense */
+	ASSERT(irqchip_in_kernel(kvm));
+	ASSERT(kian);
 	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
 }
 
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-				     struct kvm_irq_ack_notifier *kian)
+void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
 {
+	if (!kian)
+		return;
 	hlist_del(&kian->link);
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a87f45edfae8..4f43abe198e4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -143,7 +143,7 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 	if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
 		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
 
-	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
+	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
 	kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
 
 	if (cancel_work_sync(&assigned_dev->interrupt_work))
-- 
cgit v1.2.3


From 4f906c19ae29397409bedabf7bbe5cb42ad90332 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Mon, 24 Nov 2008 14:32:51 +0800
Subject: KVM: Replace irq_requested with more generic irq_requested_type

Separate guest irq type and host irq type, for we can support guest using INTx
with host using MSI (but not opposite combination).

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 4 +++-
 virt/kvm/kvm_main.c      | 9 +++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3a0fb77d1f6a..c3d4b96a08fa 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -307,7 +307,9 @@ struct kvm_assigned_dev_kernel {
 	int host_devfn;
 	int host_irq;
 	int guest_irq;
-	int irq_requested;
+#define KVM_ASSIGNED_DEV_GUEST_INTX	(1 << 0)
+#define KVM_ASSIGNED_DEV_HOST_INTX	(1 << 8)
+	unsigned long irq_requested_type;
 	int irq_source_id;
 	struct pci_dev *dev;
 	struct kvm *kvm;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ef2f03cf42c0..638de47e167d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -140,7 +140,7 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 				     struct kvm_assigned_dev_kernel
 				     *assigned_dev)
 {
-	if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
+	if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested_type)
 		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
 
 	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
@@ -180,7 +180,7 @@ static int assigned_device_update_intx(struct kvm *kvm,
 			struct kvm_assigned_dev_kernel *adev,
 			struct kvm_assigned_irq *airq)
 {
-	if (adev->irq_requested) {
+	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX) {
 		adev->guest_irq = airq->guest_irq;
 		adev->ack_notifier.gsi = airq->guest_irq;
 		return 0;
@@ -207,7 +207,8 @@ static int assigned_device_update_intx(struct kvm *kvm,
 			return -EIO;
 	}
 
-	adev->irq_requested = true;
+	adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX |
+				   KVM_ASSIGNED_DEV_HOST_INTX;
 	return 0;
 }
 
@@ -227,7 +228,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 		return -EINVAL;
 	}
 
-	if (!match->irq_requested) {
+	if (!match->irq_requested_type) {
 		INIT_WORK(&match->interrupt_work,
 				kvm_assigned_dev_interrupt_work_handler);
 		if (irqchip_in_kernel(kvm)) {
-- 
cgit v1.2.3


From 0937c48d075ddd59ae2c12a6fa8308b9c7a63753 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Mon, 24 Nov 2008 14:32:53 +0800
Subject: KVM: Add fields for MSI device assignment

Prepared for kvm_arch_assigned_device_msi_dispatch().

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm.h      | 7 +++++++
 include/linux/kvm_host.h | 4 ++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 44fd7fa0af2b..bb283c388a24 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -507,10 +507,17 @@ struct kvm_assigned_irq {
 	__u32 guest_irq;
 	__u32 flags;
 	union {
+		struct {
+			__u32 addr_lo;
+			__u32 addr_hi;
+			__u32 data;
+		} guest_msi;
 		__u32 reserved[12];
 	};
 };
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 
+#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c3d4b96a08fa..8091a4d90ddf 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -16,6 +16,7 @@
 #include <linux/mm.h>
 #include <linux/preempt.h>
 #include <linux/marker.h>
+#include <linux/msi.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -307,8 +308,11 @@ struct kvm_assigned_dev_kernel {
 	int host_devfn;
 	int host_irq;
 	int guest_irq;
+	struct msi_msg guest_msi;
 #define KVM_ASSIGNED_DEV_GUEST_INTX	(1 << 0)
+#define KVM_ASSIGNED_DEV_GUEST_MSI	(1 << 1)
 #define KVM_ASSIGNED_DEV_HOST_INTX	(1 << 8)
+#define KVM_ASSIGNED_DEV_HOST_MSI	(1 << 9)
 	unsigned long irq_requested_type;
 	int irq_source_id;
 	struct pci_dev *dev;
-- 
cgit v1.2.3


From 6b9cc7fd469869bed38831c5adac3f59dc25eaf5 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Mon, 24 Nov 2008 14:32:56 +0800
Subject: KVM: Enable MSI for device assignment

We enable guest MSI and host MSI support in this patch. The userspace want to
enable MSI should set KVM_DEV_IRQ_ASSIGN_ENABLE_MSI in the assigned_irq's flag.
Function would return -ENOTTY if can't enable MSI, userspace shouldn't set MSI
Enable bit when KVM_ASSIGN_IRQ return -ENOTTY with
KVM_DEV_IRQ_ASSIGN_ENABLE_MSI.

Userspace can tell the support of MSI device from #ifdef KVM_CAP_DEVICE_MSI.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm.h |  3 ++
 virt/kvm/kvm_main.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 78 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index bb283c388a24..0997e6f5490c 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -392,6 +392,9 @@ struct kvm_trace_rec {
 #endif
 #define KVM_CAP_IOMMU 18
 #define KVM_CAP_NMI 19
+#if defined(CONFIG_X86)
+#define KVM_CAP_DEVICE_MSI 20
+#endif
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 228c1d18a614..bf36ae9ae7df 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -159,9 +159,15 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 	 * finer-grained lock, update this
 	 */
 	mutex_lock(&assigned_dev->kvm->lock);
-	kvm_set_irq(assigned_dev->kvm,
-		    assigned_dev->irq_source_id,
-		    assigned_dev->guest_irq, 1);
+	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX)
+		kvm_set_irq(assigned_dev->kvm,
+			    assigned_dev->irq_source_id,
+			    assigned_dev->guest_irq, 1);
+	else if (assigned_dev->irq_requested_type &
+				KVM_ASSIGNED_DEV_GUEST_MSI) {
+		assigned_device_msi_dispatch(assigned_dev);
+		enable_irq(assigned_dev->host_irq);
+	}
 	mutex_unlock(&assigned_dev->kvm->lock);
 	kvm_put_kvm(assigned_dev->kvm);
 }
@@ -197,6 +203,8 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 {
 	if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested_type)
 		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+	if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
+		pci_disable_msi(assigned_dev->dev);
 
 	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
 	kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
@@ -242,6 +250,11 @@ static int assigned_device_update_intx(struct kvm *kvm,
 		return 0;
 
 	if (irqchip_in_kernel(kvm)) {
+		if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) {
+			free_irq(adev->host_irq, (void *)kvm);
+			pci_disable_msi(adev->dev);
+		}
+
 		if (!capable(CAP_SYS_RAWIO))
 			return -EPERM;
 
@@ -265,6 +278,41 @@ static int assigned_device_update_intx(struct kvm *kvm,
 	return 0;
 }
 
+#ifdef CONFIG_X86
+static int assigned_device_update_msi(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *adev,
+			struct kvm_assigned_irq *airq)
+{
+	int r;
+
+	/* x86 don't care upper address of guest msi message addr */
+	adev->guest_msi.address_lo = airq->guest_msi.addr_lo;
+	adev->guest_msi.data = airq->guest_msi.data;
+	adev->ack_notifier.gsi = -1;
+
+	if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
+		return 0;
+
+	if (irqchip_in_kernel(kvm)) {
+		if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX)
+			free_irq(adev->host_irq, (void *)adev);
+
+		r = pci_enable_msi(adev->dev);
+		if (r)
+			return r;
+
+		adev->host_irq = adev->dev->irq;
+		if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0,
+				"kvm_assigned_msi_device", (void *)adev))
+			return -EIO;
+	}
+
+	adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI |
+				   KVM_ASSIGNED_DEV_HOST_MSI;
+	return 0;
+}
+#endif
+
 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 				   struct kvm_assigned_irq
 				   *assigned_irq)
@@ -301,9 +349,30 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 		}
 	}
 
-	r = assigned_device_update_intx(kvm, match, assigned_irq);
-	if (r)
-		goto out_release;
+	if (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
+#ifdef CONFIG_X86
+		r = assigned_device_update_msi(kvm, match, assigned_irq);
+		if (r) {
+			printk(KERN_WARNING "kvm: failed to enable "
+					"MSI device!\n");
+			goto out_release;
+		}
+#else
+		r = -ENOTTY;
+#endif
+	} else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) {
+		/* Host device IRQ 0 means don't support INTx */
+		printk(KERN_WARNING "kvm: wait device to enable MSI!\n");
+		r = 0;
+	} else {
+		/* Non-sharing INTx mode */
+		r = assigned_device_update_intx(kvm, match, assigned_irq);
+		if (r) {
+			printk(KERN_WARNING "kvm: failed to enable "
+					"INTx device!\n");
+			goto out_release;
+		}
+	}
 
 	mutex_unlock(&kvm->lock);
 	return r;
-- 
cgit v1.2.3


From 1a811b6167089bcdb84284f2dc9fd0b4d0f1899d Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 8 Dec 2008 18:25:27 +0200
Subject: KVM: Advertise the bug in memory region destruction as fixed

Userspace might need to act differently.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm.h |  2 ++
 virt/kvm/kvm_main.c | 13 ++++++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 0997e6f5490c..48807767e726 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -395,6 +395,8 @@ struct kvm_trace_rec {
 #if defined(CONFIG_X86)
 #define KVM_CAP_DEVICE_MSI 20
 #endif
+/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
+#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e7644b90667e..e066eb125e55 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1905,6 +1905,17 @@ static int kvm_dev_ioctl_create_vm(void)
 	return fd;
 }
 
+static long kvm_dev_ioctl_check_extension_generic(long arg)
+{
+	switch (arg) {
+	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+		return 1;
+	default:
+		break;
+	}
+	return kvm_dev_ioctl_check_extension(arg);
+}
+
 static long kvm_dev_ioctl(struct file *filp,
 			  unsigned int ioctl, unsigned long arg)
 {
@@ -1924,7 +1935,7 @@ static long kvm_dev_ioctl(struct file *filp,
 		r = kvm_dev_ioctl_create_vm();
 		break;
 	case KVM_CHECK_EXTENSION:
-		r = kvm_dev_ioctl_check_extension(arg);
+		r = kvm_dev_ioctl_check_extension_generic(arg);
 		break;
 	case KVM_GET_VCPU_MMAP_SIZE:
 		r = -EINVAL;
-- 
cgit v1.2.3


From defaf1587c5d7dff828f6f11c8941e5bcef00f50 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Tue, 2 Dec 2008 12:16:33 +0000
Subject: KVM: fix handling of ACK from shared guest IRQ

If an assigned device shares a guest irq with an emulated
device then we currently interpret an ack generated by the
emulated device as originating from the assigned device
leading to e.g. "Unbalanced enable for IRQ 4347" from the
enable_irq() in kvm_assigned_dev_ack_irq().

The fix is fairly simple - don't enable the physical device
irq unless it was previously disabled.

Of course, this can still lead to a situation where a
non-assigned device ACK can cause the physical device irq to
be reenabled before the device was serviced. However, being
level sensitive, the interrupt will merely be regenerated.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 15 ++++++++++++++-
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8091a4d90ddf..eafabd5c66b2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -307,6 +307,7 @@ struct kvm_assigned_dev_kernel {
 	int host_busnr;
 	int host_devfn;
 	int host_irq;
+	bool host_irq_disabled;
 	int guest_irq;
 	struct msi_msg guest_msi;
 #define KVM_ASSIGNED_DEV_GUEST_INTX	(1 << 0)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index eb70ca6c7145..fc6127cbea1f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -170,6 +170,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 				KVM_ASSIGNED_DEV_GUEST_MSI) {
 		assigned_device_msi_dispatch(assigned_dev);
 		enable_irq(assigned_dev->host_irq);
+		assigned_dev->host_irq_disabled = false;
 	}
 	mutex_unlock(&assigned_dev->kvm->lock);
 	kvm_put_kvm(assigned_dev->kvm);
@@ -181,8 +182,12 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
 		(struct kvm_assigned_dev_kernel *) dev_id;
 
 	kvm_get_kvm(assigned_dev->kvm);
+
 	schedule_work(&assigned_dev->interrupt_work);
+
 	disable_irq_nosync(irq);
+	assigned_dev->host_irq_disabled = true;
+
 	return IRQ_HANDLED;
 }
 
@@ -196,8 +201,16 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 
 	dev = container_of(kian, struct kvm_assigned_dev_kernel,
 			   ack_notifier);
+
 	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
-	enable_irq(dev->host_irq);
+
+	/* The guest irq may be shared so this ack may be
+	 * from another device.
+	 */
+	if (dev->host_irq_disabled) {
+		enable_irq(dev->host_irq);
+		dev->host_irq_disabled = false;
+	}
 }
 
 static void kvm_free_assigned_irq(struct kvm *kvm,
-- 
cgit v1.2.3


From 4531220b71f0399e71cda0c4cf749e7281a7416a Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Thu, 11 Dec 2008 16:54:54 +0100
Subject: KVM: x86: Rework user space NMI injection as KVM_CAP_USER_NMI

There is no point in doing the ready_for_nmi_injection/
request_nmi_window dance with user space. First, we don't do this for
in-kernel irqchip anyway, while the code path is the same as for user
space irqchip mode. And second, there is nothing to loose if a pending
NMI is overwritten by another one (in contrast to IRQs where we have to
save the number). Actually, there is even the risk of raising spurious
NMIs this way because the reason for the held-back NMI might already be
handled while processing the first one.

Therefore this patch creates a simplified user space NMI injection
interface, exporting it under KVM_CAP_USER_NMI and dropping the old
KVM_CAP_NMI capability. And this time we also take care to provide the
interface only on archs supporting NMIs via KVM (right now only x86).

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c  | 24 ++----------------------
 arch/x86/kvm/x86.c  | 28 ++--------------------------
 include/linux/kvm.h | 11 +++++------
 3 files changed, 9 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 487e1dcdce33..6259d7467648 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2498,15 +2498,13 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 	}
 	if (vcpu->arch.nmi_injected) {
 		vmx_inject_nmi(vcpu);
-		if (vcpu->arch.nmi_pending || kvm_run->request_nmi_window)
+		if (vcpu->arch.nmi_pending)
 			enable_nmi_window(vcpu);
 		else if (vcpu->arch.irq_summary
 			 || kvm_run->request_interrupt_window)
 			enable_irq_window(vcpu);
 		return;
 	}
-	if (!vcpu->arch.nmi_window_open || kvm_run->request_nmi_window)
-		enable_nmi_window(vcpu);
 
 	if (vcpu->arch.interrupt_window_open) {
 		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
@@ -3040,14 +3038,6 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 	++vcpu->stat.nmi_window_exits;
 
-	/*
-	 * If the user space waits to inject a NMI, exit as soon as possible
-	 */
-	if (kvm_run->request_nmi_window && !vcpu->arch.nmi_pending) {
-		kvm_run->exit_reason = KVM_EXIT_NMI_WINDOW_OPEN;
-		return 0;
-	}
-
 	return 1;
 }
 
@@ -3162,7 +3152,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 			vmx->soft_vnmi_blocked = 0;
 			vcpu->arch.nmi_window_open = 1;
 		} else if (vmx->vnmi_blocked_time > 1000000000LL &&
-		    (kvm_run->request_nmi_window || vcpu->arch.nmi_pending)) {
+			   vcpu->arch.nmi_pending) {
 			/*
 			 * This CPU don't support us in finding the end of an
 			 * NMI-blocked window if the guest runs with IRQs
@@ -3175,16 +3165,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 			vmx->soft_vnmi_blocked = 0;
 			vmx->vcpu.arch.nmi_window_open = 1;
 		}
-
-		/*
-		 * If the user space waits to inject an NNI, exit ASAP
-		 */
-		if (vcpu->arch.nmi_window_open && kvm_run->request_nmi_window
-		    && !vcpu->arch.nmi_pending) {
-			kvm_run->exit_reason = KVM_EXIT_NMI_WINDOW_OPEN;
-			++vcpu->stat.nmi_window_exits;
-			return 0;
-		}
 	}
 
 	if (exit_reason < kvm_vmx_max_exit_handlers
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 10302d3bd415..0e6aa8141dcd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2887,37 +2887,18 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 		(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
 }
 
-/*
- * Check if userspace requested a NMI window, and that the NMI window
- * is open.
- *
- * No need to exit to userspace if we already have a NMI queued.
- */
-static int dm_request_for_nmi_injection(struct kvm_vcpu *vcpu,
-					struct kvm_run *kvm_run)
-{
-	return (!vcpu->arch.nmi_pending &&
-		kvm_run->request_nmi_window &&
-		vcpu->arch.nmi_window_open);
-}
-
 static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 			      struct kvm_run *kvm_run)
 {
 	kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = kvm_get_cr8(vcpu);
 	kvm_run->apic_base = kvm_get_apic_base(vcpu);
-	if (irqchip_in_kernel(vcpu->kvm)) {
+	if (irqchip_in_kernel(vcpu->kvm))
 		kvm_run->ready_for_interrupt_injection = 1;
-		kvm_run->ready_for_nmi_injection = 1;
-	} else {
+	else
 		kvm_run->ready_for_interrupt_injection =
 					(vcpu->arch.interrupt_window_open &&
 					 vcpu->arch.irq_summary == 0);
-		kvm_run->ready_for_nmi_injection =
-					(vcpu->arch.nmi_window_open &&
-					 vcpu->arch.nmi_pending == 0);
-	}
 }
 
 static void vapic_enter(struct kvm_vcpu *vcpu)
@@ -3093,11 +3074,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 
 		if (r > 0) {
-			if (dm_request_for_nmi_injection(vcpu, kvm_run)) {
-				r = -EINTR;
-				kvm_run->exit_reason = KVM_EXIT_NMI;
-				++vcpu->stat.request_nmi_exits;
-			}
 			if (dm_request_for_irq_injection(vcpu, kvm_run)) {
 				r = -EINTR;
 				kvm_run->exit_reason = KVM_EXIT_INTR;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 48807767e726..35525ac63337 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -84,21 +84,18 @@ struct kvm_irqchip {
 #define KVM_EXIT_S390_RESET       14
 #define KVM_EXIT_DCR              15
 #define KVM_EXIT_NMI              16
-#define KVM_EXIT_NMI_WINDOW_OPEN  17
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
 	/* in */
 	__u8 request_interrupt_window;
-	__u8 request_nmi_window;
-	__u8 padding1[6];
+	__u8 padding1[7];
 
 	/* out */
 	__u32 exit_reason;
 	__u8 ready_for_interrupt_injection;
 	__u8 if_flag;
-	__u8 ready_for_nmi_injection;
-	__u8 padding2;
+	__u8 padding2[2];
 
 	/* in (pre_kvm_run), out (post_kvm_run) */
 	__u64 cr8;
@@ -391,12 +388,14 @@ struct kvm_trace_rec {
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
 #endif
 #define KVM_CAP_IOMMU 18
-#define KVM_CAP_NMI 19
 #if defined(CONFIG_X86)
 #define KVM_CAP_DEVICE_MSI 20
 #endif
 /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
+#if defined(CONFIG_X86)
+#define KVM_CAP_USER_NMI 22
+#endif
 
 /*
  * ioctls for VM fds
-- 
cgit v1.2.3


From b30f8af3358b5c66be223e3a9f3d11b3d02b4a8f Mon Sep 17 00:00:00 2001
From: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Date: Mon, 17 Nov 2008 14:35:21 +0200
Subject: mmc: Add 8-bit bus width support

Signed-off-by: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>
---
 drivers/mmc/core/mmc.c   | 18 ++++++++++++++----
 include/linux/mmc/host.h |  2 ++
 2 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index fdd7c760be8c..c232d11a7ed4 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -434,13 +434,24 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	 * Activate wide bus (if supported).
 	 */
 	if ((card->csd.mmca_vsn >= CSD_SPEC_VER_4) &&
-		(host->caps & MMC_CAP_4_BIT_DATA)) {
+	    (host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA))) {
+		unsigned ext_csd_bit, bus_width;
+
+		if (host->caps & MMC_CAP_8_BIT_DATA) {
+			ext_csd_bit = EXT_CSD_BUS_WIDTH_8;
+			bus_width = MMC_BUS_WIDTH_8;
+		} else {
+			ext_csd_bit = EXT_CSD_BUS_WIDTH_4;
+			bus_width = MMC_BUS_WIDTH_4;
+		}
+
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-			EXT_CSD_BUS_WIDTH, EXT_CSD_BUS_WIDTH_4);
+				 EXT_CSD_BUS_WIDTH, ext_csd_bit);
+
 		if (err)
 			goto free_card;
 
-		mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
+		mmc_set_bus_width(card->host, bus_width);
 	}
 
 	if (!oldcard)
@@ -624,4 +635,3 @@ err:
 
 	return err;
 }
-
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index f842f234e44f..4e457256bd33 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -41,6 +41,7 @@ struct mmc_ios {
 
 #define MMC_BUS_WIDTH_1		0
 #define MMC_BUS_WIDTH_4		2
+#define MMC_BUS_WIDTH_8		3
 
 	unsigned char	timing;			/* timing specification used */
 
@@ -116,6 +117,7 @@ struct mmc_host {
 #define MMC_CAP_SDIO_IRQ	(1 << 3)	/* Can signal pending SDIO IRQs */
 #define MMC_CAP_SPI		(1 << 4)	/* Talks only SPI protocols */
 #define MMC_CAP_NEEDS_POLL	(1 << 5)	/* Needs polling for card-detection */
+#define MMC_CAP_8_BIT_DATA	(1 << 6)	/* Can the host do 8 bit transfers */
 
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
-- 
cgit v1.2.3


From 86e8286a0e48663e1e86a5884b30a6d05de2993a Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Wed, 26 Nov 2008 22:54:17 +0300
Subject: mmc: Add mmc_vddrange_to_ocrmask() helper function

This function sets the OCR mask bits according to provided voltage
ranges. Will be used by the mmc_spi OpenFirmware bindings.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>
---
 drivers/mmc/core/core.c  | 75 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mmc/core.h |  2 ++
 2 files changed, 77 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index f7284b905eb3..5f288aeeb721 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -20,6 +20,7 @@
 #include <linux/err.h>
 #include <linux/leds.h>
 #include <linux/scatterlist.h>
+#include <linux/log2.h>
 
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
@@ -448,6 +449,80 @@ void mmc_set_bus_width(struct mmc_host *host, unsigned int width)
 	mmc_set_ios(host);
 }
 
+/**
+ * mmc_vdd_to_ocrbitnum - Convert a voltage to the OCR bit number
+ * @vdd:	voltage (mV)
+ * @low_bits:	prefer low bits in boundary cases
+ *
+ * This function returns the OCR bit number according to the provided @vdd
+ * value. If conversion is not possible a negative errno value returned.
+ *
+ * Depending on the @low_bits flag the function prefers low or high OCR bits
+ * on boundary voltages. For example,
+ * with @low_bits = true, 3300 mV translates to ilog2(MMC_VDD_32_33);
+ * with @low_bits = false, 3300 mV translates to ilog2(MMC_VDD_33_34);
+ *
+ * Any value in the [1951:1999] range translates to the ilog2(MMC_VDD_20_21).
+ */
+static int mmc_vdd_to_ocrbitnum(int vdd, bool low_bits)
+{
+	const int max_bit = ilog2(MMC_VDD_35_36);
+	int bit;
+
+	if (vdd < 1650 || vdd > 3600)
+		return -EINVAL;
+
+	if (vdd >= 1650 && vdd <= 1950)
+		return ilog2(MMC_VDD_165_195);
+
+	if (low_bits)
+		vdd -= 1;
+
+	/* Base 2000 mV, step 100 mV, bit's base 8. */
+	bit = (vdd - 2000) / 100 + 8;
+	if (bit > max_bit)
+		return max_bit;
+	return bit;
+}
+
+/**
+ * mmc_vddrange_to_ocrmask - Convert a voltage range to the OCR mask
+ * @vdd_min:	minimum voltage value (mV)
+ * @vdd_max:	maximum voltage value (mV)
+ *
+ * This function returns the OCR mask bits according to the provided @vdd_min
+ * and @vdd_max values. If conversion is not possible the function returns 0.
+ *
+ * Notes wrt boundary cases:
+ * This function sets the OCR bits for all boundary voltages, for example
+ * [3300:3400] range is translated to MMC_VDD_32_33 | MMC_VDD_33_34 |
+ * MMC_VDD_34_35 mask.
+ */
+u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max)
+{
+	u32 mask = 0;
+
+	if (vdd_max < vdd_min)
+		return 0;
+
+	/* Prefer high bits for the boundary vdd_max values. */
+	vdd_max = mmc_vdd_to_ocrbitnum(vdd_max, false);
+	if (vdd_max < 0)
+		return 0;
+
+	/* Prefer low bits for the boundary vdd_min values. */
+	vdd_min = mmc_vdd_to_ocrbitnum(vdd_min, true);
+	if (vdd_min < 0)
+		return 0;
+
+	/* Fill the mask, from max bit to min bit. */
+	while (vdd_max >= vdd_min)
+		mask |= 1 << vdd_max--;
+
+	return mask;
+}
+EXPORT_SYMBOL(mmc_vddrange_to_ocrmask);
+
 /*
  * Mask off any voltages we don't support and select
  * the lowest voltage
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 143cebf0586f..7ac8b500d55c 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -151,4 +151,6 @@ static inline void mmc_claim_host(struct mmc_host *host)
 	__mmc_claim_host(host, NULL);
 }
 
+extern u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max);
+
 #endif
-- 
cgit v1.2.3


From 9c43df57910bbba540a6cb5c9132302a9ea5f41a Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 30 Dec 2008 18:15:28 +0300
Subject: mmc_spi: Add support for OpenFirmware bindings

The support is implemented via platform data accessors, new module
(of_mmc_spi) will be created automatically when the driver compiles
on OpenFirmware platforms. Link-time dependency will load the module
automatically.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>
---
 drivers/mmc/host/Makefile     |   3 +
 drivers/mmc/host/mmc_spi.c    |   4 +-
 drivers/mmc/host/of_mmc_spi.c | 149 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/mmc_spi.h   |  15 ++++-
 4 files changed, 169 insertions(+), 2 deletions(-)
 create mode 100644 drivers/mmc/host/of_mmc_spi.c

(limited to 'include/linux')

diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index c794cc5ce442..f4853288bbb1 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -19,6 +19,9 @@ obj-$(CONFIG_MMC_AT91)		+= at91_mci.o
 obj-$(CONFIG_MMC_ATMELMCI)	+= atmel-mci.o
 obj-$(CONFIG_MMC_TIFM_SD)	+= tifm_sd.o
 obj-$(CONFIG_MMC_SPI)		+= mmc_spi.o
+ifeq ($(CONFIG_OF),y)
+obj-$(CONFIG_MMC_SPI)		+= of_mmc_spi.o
+endif
 obj-$(CONFIG_MMC_S3C)   	+= s3cmci.o
 obj-$(CONFIG_MMC_SDRICOH_CS)	+= sdricoh_cs.o
 obj-$(CONFIG_MMC_TMIO)		+= tmio_mmc.o
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index ad00e1632317..87e211df68ac 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1285,7 +1285,7 @@ static int mmc_spi_probe(struct spi_device *spi)
 	/* Platform data is used to hook up things like card sensing
 	 * and power switching gpios.
 	 */
-	host->pdata = spi->dev.platform_data;
+	host->pdata = mmc_spi_get_pdata(spi);
 	if (host->pdata)
 		mmc->ocr_avail = host->pdata->ocr_mask;
 	if (!mmc->ocr_avail) {
@@ -1368,6 +1368,7 @@ fail_glue_init:
 
 fail_nobuf1:
 	mmc_free_host(mmc);
+	mmc_spi_put_pdata(spi);
 	dev_set_drvdata(&spi->dev, NULL);
 
 nomem:
@@ -1402,6 +1403,7 @@ static int __devexit mmc_spi_remove(struct spi_device *spi)
 
 		spi->max_speed_hz = mmc->f_max;
 		mmc_free_host(mmc);
+		mmc_spi_put_pdata(spi);
 		dev_set_drvdata(&spi->dev, NULL);
 	}
 	return 0;
diff --git a/drivers/mmc/host/of_mmc_spi.c b/drivers/mmc/host/of_mmc_spi.c
new file mode 100644
index 000000000000..fb2921f8099d
--- /dev/null
+++ b/drivers/mmc/host/of_mmc_spi.c
@@ -0,0 +1,149 @@
+/*
+ * OpenFirmware bindings for the MMC-over-SPI driver
+ *
+ * Copyright (c) MontaVista Software, Inc. 2008.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/gpio.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/mmc_spi.h>
+#include <linux/mmc/core.h>
+#include <linux/mmc/host.h>
+
+enum {
+	CD_GPIO = 0,
+	WP_GPIO,
+	NUM_GPIOS,
+};
+
+struct of_mmc_spi {
+	int gpios[NUM_GPIOS];
+	bool alow_gpios[NUM_GPIOS];
+	struct mmc_spi_platform_data pdata;
+};
+
+static struct of_mmc_spi *to_of_mmc_spi(struct device *dev)
+{
+	return container_of(dev->platform_data, struct of_mmc_spi, pdata);
+}
+
+static int of_mmc_spi_read_gpio(struct device *dev, int gpio_num)
+{
+	struct of_mmc_spi *oms = to_of_mmc_spi(dev);
+	bool active_low = oms->alow_gpios[gpio_num];
+	bool value = gpio_get_value(oms->gpios[gpio_num]);
+
+	return active_low ^ value;
+}
+
+static int of_mmc_spi_get_cd(struct device *dev)
+{
+	return of_mmc_spi_read_gpio(dev, CD_GPIO);
+}
+
+static int of_mmc_spi_get_ro(struct device *dev)
+{
+	return of_mmc_spi_read_gpio(dev, WP_GPIO);
+}
+
+struct mmc_spi_platform_data *mmc_spi_get_pdata(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct device_node *np = dev_archdata_get_node(&dev->archdata);
+	struct of_mmc_spi *oms;
+	const u32 *voltage_ranges;
+	int num_ranges;
+	int i;
+	int ret = -EINVAL;
+
+	if (dev->platform_data || !np)
+		return dev->platform_data;
+
+	oms = kzalloc(sizeof(*oms), GFP_KERNEL);
+	if (!oms)
+		return NULL;
+
+	voltage_ranges = of_get_property(np, "voltage-ranges", &num_ranges);
+	num_ranges = num_ranges / sizeof(*voltage_ranges) / 2;
+	if (!voltage_ranges || !num_ranges) {
+		dev_err(dev, "OF: voltage-ranges unspecified\n");
+		goto err_ocr;
+	}
+
+	for (i = 0; i < num_ranges; i++) {
+		const int j = i * 2;
+		u32 mask;
+
+		mask = mmc_vddrange_to_ocrmask(voltage_ranges[j],
+					       voltage_ranges[j + 1]);
+		if (!mask) {
+			ret = -EINVAL;
+			dev_err(dev, "OF: voltage-range #%d is invalid\n", i);
+			goto err_ocr;
+		}
+		oms->pdata.ocr_mask |= mask;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(oms->gpios); i++) {
+		enum of_gpio_flags gpio_flags;
+
+		oms->gpios[i] = of_get_gpio_flags(np, i, &gpio_flags);
+		if (!gpio_is_valid(oms->gpios[i]))
+			continue;
+
+		ret = gpio_request(oms->gpios[i], dev->bus_id);
+		if (ret < 0) {
+			oms->gpios[i] = -EINVAL;
+			continue;
+		}
+
+		if (gpio_flags & OF_GPIO_ACTIVE_LOW)
+			oms->alow_gpios[i] = true;
+	}
+
+	if (gpio_is_valid(oms->gpios[CD_GPIO]))
+		oms->pdata.get_cd = of_mmc_spi_get_cd;
+	if (gpio_is_valid(oms->gpios[WP_GPIO]))
+		oms->pdata.get_ro = of_mmc_spi_get_ro;
+
+	/* We don't support interrupts yet, let's poll. */
+	oms->pdata.caps |= MMC_CAP_NEEDS_POLL;
+
+	dev->platform_data = &oms->pdata;
+	return dev->platform_data;
+err_ocr:
+	kfree(oms);
+	return NULL;
+}
+EXPORT_SYMBOL(mmc_spi_get_pdata);
+
+void mmc_spi_put_pdata(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct device_node *np = dev_archdata_get_node(&dev->archdata);
+	struct of_mmc_spi *oms = to_of_mmc_spi(dev);
+	int i;
+
+	if (!dev->platform_data || !np)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(oms->gpios); i++) {
+		if (gpio_is_valid(oms->gpios[i]))
+			gpio_free(oms->gpios[i]);
+	}
+	kfree(oms);
+	dev->platform_data = NULL;
+}
+EXPORT_SYMBOL(mmc_spi_put_pdata);
diff --git a/include/linux/spi/mmc_spi.h b/include/linux/spi/mmc_spi.h
index a3626aedaec9..0f4eb165f254 100644
--- a/include/linux/spi/mmc_spi.h
+++ b/include/linux/spi/mmc_spi.h
@@ -1,9 +1,10 @@
 #ifndef __LINUX_SPI_MMC_SPI_H
 #define __LINUX_SPI_MMC_SPI_H
 
+#include <linux/device.h>
+#include <linux/spi/spi.h>
 #include <linux/interrupt.h>
 
-struct device;
 struct mmc_host;
 
 /* Put this in platform_data of a device being used to manage an MMC/SD
@@ -41,4 +42,16 @@ struct mmc_spi_platform_data {
 	void (*setpower)(struct device *, unsigned int maskval);
 };
 
+#ifdef CONFIG_OF
+extern struct mmc_spi_platform_data *mmc_spi_get_pdata(struct spi_device *spi);
+extern void mmc_spi_put_pdata(struct spi_device *spi);
+#else
+static inline struct mmc_spi_platform_data *
+mmc_spi_get_pdata(struct spi_device *spi)
+{
+	return spi->dev.platform_data;
+}
+static inline void mmc_spi_put_pdata(struct spi_device *spi) {}
+#endif /* CONFIG_OF */
+
 #endif /* __LINUX_SPI_MMC_SPI_H */
-- 
cgit v1.2.3


From be6d3e56a6b9b3a4ee44a0685e39e595073c6f0d Mon Sep 17 00:00:00 2001
From: Kentaro Takeda <takedakn@nttdata.co.jp>
Date: Wed, 17 Dec 2008 13:24:15 +0900
Subject: introduce new LSM hooks where vfsmount is available.

Add new LSM hooks for path-based checks.  Call them on directory-modifying
operations at the points where we still know the vfsmount involved.

Signed-off-by: Kentaro Takeda <takedakn@nttdata.co.jp>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Toshiharu Harada <haradats@nttdata.co.jp>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c               |  36 +++++++++++++
 fs/open.c                |   5 ++
 include/linux/security.h | 137 +++++++++++++++++++++++++++++++++++++++++++++++
 net/unix/af_unix.c       |   4 ++
 security/Kconfig         |   9 ++++
 security/capability.c    |  57 ++++++++++++++++++++
 security/security.c      |  66 +++++++++++++++++++++++
 7 files changed, 314 insertions(+)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index af3783fff1de..ab441af4196b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1556,6 +1556,9 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
 		 * Refuse to truncate files with mandatory locks held on them.
 		 */
 		error = locks_verify_locked(inode);
+		if (!error)
+			error = security_path_truncate(&nd->path, 0,
+					       ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
 		if (!error) {
 			DQUOT_INIT(inode);
 
@@ -1586,7 +1589,11 @@ static int __open_namei_create(struct nameidata *nd, struct path *path,
 
 	if (!IS_POSIXACL(dir->d_inode))
 		mode &= ~current->fs->umask;
+	error = security_path_mknod(&nd->path, path->dentry, mode, 0);
+	if (error)
+		goto out_unlock;
 	error = vfs_create(dir->d_inode, path->dentry, mode, nd);
+out_unlock:
 	mutex_unlock(&dir->d_inode->i_mutex);
 	dput(nd->path.dentry);
 	nd->path.dentry = path->dentry;
@@ -1999,6 +2006,9 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto out_dput;
+	error = security_path_mknod(&nd.path, dentry, mode, dev);
+	if (error)
+		goto out_drop_write;
 	switch (mode & S_IFMT) {
 		case 0: case S_IFREG:
 			error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd);
@@ -2011,6 +2021,7 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
 			error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0);
 			break;
 	}
+out_drop_write:
 	mnt_drop_write(nd.path.mnt);
 out_dput:
 	dput(dentry);
@@ -2070,7 +2081,11 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto out_dput;
+	error = security_path_mkdir(&nd.path, dentry, mode);
+	if (error)
+		goto out_drop_write;
 	error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
+out_drop_write:
 	mnt_drop_write(nd.path.mnt);
 out_dput:
 	dput(dentry);
@@ -2180,7 +2195,11 @@ static long do_rmdir(int dfd, const char __user *pathname)
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto exit3;
+	error = security_path_rmdir(&nd.path, dentry);
+	if (error)
+		goto exit4;
 	error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
+exit4:
 	mnt_drop_write(nd.path.mnt);
 exit3:
 	dput(dentry);
@@ -2265,7 +2284,11 @@ static long do_unlinkat(int dfd, const char __user *pathname)
 		error = mnt_want_write(nd.path.mnt);
 		if (error)
 			goto exit2;
+		error = security_path_unlink(&nd.path, dentry);
+		if (error)
+			goto exit3;
 		error = vfs_unlink(nd.path.dentry->d_inode, dentry);
+exit3:
 		mnt_drop_write(nd.path.mnt);
 	exit2:
 		dput(dentry);
@@ -2346,7 +2369,11 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto out_dput;
+	error = security_path_symlink(&nd.path, dentry, from);
+	if (error)
+		goto out_drop_write;
 	error = vfs_symlink(nd.path.dentry->d_inode, dentry, from);
+out_drop_write:
 	mnt_drop_write(nd.path.mnt);
 out_dput:
 	dput(dentry);
@@ -2443,7 +2470,11 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
 	error = mnt_want_write(nd.path.mnt);
 	if (error)
 		goto out_dput;
+	error = security_path_link(old_path.dentry, &nd.path, new_dentry);
+	if (error)
+		goto out_drop_write;
 	error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry);
+out_drop_write:
 	mnt_drop_write(nd.path.mnt);
 out_dput:
 	dput(new_dentry);
@@ -2679,8 +2710,13 @@ asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
 	error = mnt_want_write(oldnd.path.mnt);
 	if (error)
 		goto exit5;
+	error = security_path_rename(&oldnd.path, old_dentry,
+				     &newnd.path, new_dentry);
+	if (error)
+		goto exit6;
 	error = vfs_rename(old_dir->d_inode, old_dentry,
 				   new_dir->d_inode, new_dentry);
+exit6:
 	mnt_drop_write(oldnd.path.mnt);
 exit5:
 	dput(new_dentry);
diff --git a/fs/open.c b/fs/open.c
index c0a426d5766c..1cd7d40e9991 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -272,6 +272,8 @@ static long do_sys_truncate(const char __user *pathname, loff_t length)
 		goto put_write_and_out;
 
 	error = locks_verify_truncate(inode, NULL, length);
+	if (!error)
+		error = security_path_truncate(&path, length, 0);
 	if (!error) {
 		DQUOT_INIT(inode);
 		error = do_truncate(path.dentry, length, 0, NULL);
@@ -328,6 +330,9 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 		goto out_putf;
 
 	error = locks_verify_truncate(inode, file, length);
+	if (!error)
+		error = security_path_truncate(&file->f_path, length,
+					       ATTR_MTIME|ATTR_CTIME);
 	if (!error)
 		error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
 out_putf:
diff --git a/include/linux/security.h b/include/linux/security.h
index 3416cb85e77b..b92b5e453f64 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -335,17 +335,37 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@dir contains the inode structure of the parent directory of the new link.
  *	@new_dentry contains the dentry structure for the new link.
  *	Return 0 if permission is granted.
+ * @path_link:
+ *	Check permission before creating a new hard link to a file.
+ *	@old_dentry contains the dentry structure for an existing link
+ *	to the file.
+ *	@new_dir contains the path structure of the parent directory of
+ *	the new link.
+ *	@new_dentry contains the dentry structure for the new link.
+ *	Return 0 if permission is granted.
  * @inode_unlink:
  *	Check the permission to remove a hard link to a file.
  *	@dir contains the inode structure of parent directory of the file.
  *	@dentry contains the dentry structure for file to be unlinked.
  *	Return 0 if permission is granted.
+ * @path_unlink:
+ *	Check the permission to remove a hard link to a file.
+ *	@dir contains the path structure of parent directory of the file.
+ *	@dentry contains the dentry structure for file to be unlinked.
+ *	Return 0 if permission is granted.
  * @inode_symlink:
  *	Check the permission to create a symbolic link to a file.
  *	@dir contains the inode structure of parent directory of the symbolic link.
  *	@dentry contains the dentry structure of the symbolic link.
  *	@old_name contains the pathname of file.
  *	Return 0 if permission is granted.
+ * @path_symlink:
+ *	Check the permission to create a symbolic link to a file.
+ *	@dir contains the path structure of parent directory of
+ *	the symbolic link.
+ *	@dentry contains the dentry structure of the symbolic link.
+ *	@old_name contains the pathname of file.
+ *	Return 0 if permission is granted.
  * @inode_mkdir:
  *	Check permissions to create a new directory in the existing directory
  *	associated with inode strcture @dir.
@@ -353,11 +373,25 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@dentry contains the dentry structure of new directory.
  *	@mode contains the mode of new directory.
  *	Return 0 if permission is granted.
+ * @path_mkdir:
+ *	Check permissions to create a new directory in the existing directory
+ *	associated with path strcture @path.
+ *	@dir containst the path structure of parent of the directory
+ *	to be created.
+ *	@dentry contains the dentry structure of new directory.
+ *	@mode contains the mode of new directory.
+ *	Return 0 if permission is granted.
  * @inode_rmdir:
  *	Check the permission to remove a directory.
  *	@dir contains the inode structure of parent of the directory to be removed.
  *	@dentry contains the dentry structure of directory to be removed.
  *	Return 0 if permission is granted.
+ * @path_rmdir:
+ *	Check the permission to remove a directory.
+ *	@dir contains the path structure of parent of the directory to be
+ *	removed.
+ *	@dentry contains the dentry structure of directory to be removed.
+ *	Return 0 if permission is granted.
  * @inode_mknod:
  *	Check permissions when creating a special file (or a socket or a fifo
  *	file created via the mknod system call).  Note that if mknod operation
@@ -368,6 +402,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@mode contains the mode of the new file.
  *	@dev contains the device number.
  *	Return 0 if permission is granted.
+ * @path_mknod:
+ *	Check permissions when creating a file. Note that this hook is called
+ *	even if mknod operation is being done for a regular file.
+ *	@dir contains the path structure of parent of the new file.
+ *	@dentry contains the dentry structure of the new file.
+ *	@mode contains the mode of the new file.
+ *	@dev contains the undecoded device number. Use new_decode_dev() to get
+ *	the decoded device number.
+ *	Return 0 if permission is granted.
  * @inode_rename:
  *	Check for permission to rename a file or directory.
  *	@old_dir contains the inode structure for parent of the old link.
@@ -375,6 +418,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@new_dir contains the inode structure for parent of the new link.
  *	@new_dentry contains the dentry structure of the new link.
  *	Return 0 if permission is granted.
+ * @path_rename:
+ *	Check for permission to rename a file or directory.
+ *	@old_dir contains the path structure for parent of the old link.
+ *	@old_dentry contains the dentry structure of the old link.
+ *	@new_dir contains the path structure for parent of the new link.
+ *	@new_dentry contains the dentry structure of the new link.
+ *	Return 0 if permission is granted.
  * @inode_readlink:
  *	Check the permission to read the symbolic link.
  *	@dentry contains the dentry structure for the file link.
@@ -403,6 +453,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@dentry contains the dentry structure for the file.
  *	@attr is the iattr structure containing the new file attributes.
  *	Return 0 if permission is granted.
+ * @path_truncate:
+ *	Check permission before truncating a file.
+ *	@path contains the path structure for the file.
+ *	@length is the new length of the file.
+ *	@time_attrs is the flags passed to do_truncate().
+ *	Return 0 if permission is granted.
  * @inode_getattr:
  *	Check permission before obtaining file attributes.
  *	@mnt is the vfsmount where the dentry was looked up
@@ -1331,6 +1387,22 @@ struct security_operations {
 				   struct super_block *newsb);
 	int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts);
 
+#ifdef CONFIG_SECURITY_PATH
+	int (*path_unlink) (struct path *dir, struct dentry *dentry);
+	int (*path_mkdir) (struct path *dir, struct dentry *dentry, int mode);
+	int (*path_rmdir) (struct path *dir, struct dentry *dentry);
+	int (*path_mknod) (struct path *dir, struct dentry *dentry, int mode,
+			   unsigned int dev);
+	int (*path_truncate) (struct path *path, loff_t length,
+			      unsigned int time_attrs);
+	int (*path_symlink) (struct path *dir, struct dentry *dentry,
+			     const char *old_name);
+	int (*path_link) (struct dentry *old_dentry, struct path *new_dir,
+			  struct dentry *new_dentry);
+	int (*path_rename) (struct path *old_dir, struct dentry *old_dentry,
+			    struct path *new_dir, struct dentry *new_dentry);
+#endif
+
 	int (*inode_alloc_security) (struct inode *inode);
 	void (*inode_free_security) (struct inode *inode);
 	int (*inode_init_security) (struct inode *inode, struct inode *dir,
@@ -2705,6 +2777,71 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi
 
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 
+#ifdef CONFIG_SECURITY_PATH
+int security_path_unlink(struct path *dir, struct dentry *dentry);
+int security_path_mkdir(struct path *dir, struct dentry *dentry, int mode);
+int security_path_rmdir(struct path *dir, struct dentry *dentry);
+int security_path_mknod(struct path *dir, struct dentry *dentry, int mode,
+			unsigned int dev);
+int security_path_truncate(struct path *path, loff_t length,
+			   unsigned int time_attrs);
+int security_path_symlink(struct path *dir, struct dentry *dentry,
+			  const char *old_name);
+int security_path_link(struct dentry *old_dentry, struct path *new_dir,
+		       struct dentry *new_dentry);
+int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
+			 struct path *new_dir, struct dentry *new_dentry);
+#else	/* CONFIG_SECURITY_PATH */
+static inline int security_path_unlink(struct path *dir, struct dentry *dentry)
+{
+	return 0;
+}
+
+static inline int security_path_mkdir(struct path *dir, struct dentry *dentry,
+				      int mode)
+{
+	return 0;
+}
+
+static inline int security_path_rmdir(struct path *dir, struct dentry *dentry)
+{
+	return 0;
+}
+
+static inline int security_path_mknod(struct path *dir, struct dentry *dentry,
+				      int mode, unsigned int dev)
+{
+	return 0;
+}
+
+static inline int security_path_truncate(struct path *path, loff_t length,
+					 unsigned int time_attrs)
+{
+	return 0;
+}
+
+static inline int security_path_symlink(struct path *dir, struct dentry *dentry,
+					const char *old_name)
+{
+	return 0;
+}
+
+static inline int security_path_link(struct dentry *old_dentry,
+				     struct path *new_dir,
+				     struct dentry *new_dentry)
+{
+	return 0;
+}
+
+static inline int security_path_rename(struct path *old_dir,
+				       struct dentry *old_dentry,
+				       struct path *new_dir,
+				       struct dentry *new_dentry)
+{
+	return 0;
+}
+#endif	/* CONFIG_SECURITY_PATH */
+
 #ifdef CONFIG_KEYS
 #ifdef CONFIG_SECURITY
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c6250d0055d2..d1b89820ab4f 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -836,7 +836,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		err = mnt_want_write(nd.path.mnt);
 		if (err)
 			goto out_mknod_dput;
+		err = security_path_mknod(&nd.path, dentry, mode, 0);
+		if (err)
+			goto out_mknod_drop_write;
 		err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
+out_mknod_drop_write:
 		mnt_drop_write(nd.path.mnt);
 		if (err)
 			goto out_mknod_dput;
diff --git a/security/Kconfig b/security/Kconfig
index d9f47ce7e207..9438535d7fd0 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -81,6 +81,15 @@ config SECURITY_NETWORK_XFRM
 	  IPSec.
 	  If you are unsure how to answer this question, answer N.
 
+config SECURITY_PATH
+	bool "Security hooks for pathname based access control"
+	depends on SECURITY
+	help
+	  This enables the security hooks for pathname based access control.
+	  If enabled, a security module can use these hooks to
+	  implement pathname based access controls.
+	  If you are unsure how to answer this question, answer N.
+
 config SECURITY_FILE_CAPABILITIES
 	bool "File POSIX Capabilities"
 	default n
diff --git a/security/capability.c b/security/capability.c
index 2dce66fcb992..c545bd1300b5 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -263,6 +263,53 @@ static void cap_inode_getsecid(const struct inode *inode, u32 *secid)
 	*secid = 0;
 }
 
+#ifdef CONFIG_SECURITY_PATH
+static int cap_path_mknod(struct path *dir, struct dentry *dentry, int mode,
+			  unsigned int dev)
+{
+	return 0;
+}
+
+static int cap_path_mkdir(struct path *dir, struct dentry *dentry, int mode)
+{
+	return 0;
+}
+
+static int cap_path_rmdir(struct path *dir, struct dentry *dentry)
+{
+	return 0;
+}
+
+static int cap_path_unlink(struct path *dir, struct dentry *dentry)
+{
+	return 0;
+}
+
+static int cap_path_symlink(struct path *dir, struct dentry *dentry,
+			    const char *old_name)
+{
+	return 0;
+}
+
+static int cap_path_link(struct dentry *old_dentry, struct path *new_dir,
+			 struct dentry *new_dentry)
+{
+	return 0;
+}
+
+static int cap_path_rename(struct path *old_path, struct dentry *old_dentry,
+			   struct path *new_path, struct dentry *new_dentry)
+{
+	return 0;
+}
+
+static int cap_path_truncate(struct path *path, loff_t length,
+			     unsigned int time_attrs)
+{
+	return 0;
+}
+#endif
+
 static int cap_file_permission(struct file *file, int mask)
 {
 	return 0;
@@ -883,6 +930,16 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, inode_setsecurity);
 	set_to_cap_if_null(ops, inode_listsecurity);
 	set_to_cap_if_null(ops, inode_getsecid);
+#ifdef CONFIG_SECURITY_PATH
+	set_to_cap_if_null(ops, path_mknod);
+	set_to_cap_if_null(ops, path_mkdir);
+	set_to_cap_if_null(ops, path_rmdir);
+	set_to_cap_if_null(ops, path_unlink);
+	set_to_cap_if_null(ops, path_symlink);
+	set_to_cap_if_null(ops, path_link);
+	set_to_cap_if_null(ops, path_rename);
+	set_to_cap_if_null(ops, path_truncate);
+#endif
 	set_to_cap_if_null(ops, file_permission);
 	set_to_cap_if_null(ops, file_alloc_security);
 	set_to_cap_if_null(ops, file_free_security);
diff --git a/security/security.c b/security/security.c
index d85dbb37c972..678d4d07b852 100644
--- a/security/security.c
+++ b/security/security.c
@@ -355,6 +355,72 @@ int security_inode_init_security(struct inode *inode, struct inode *dir,
 }
 EXPORT_SYMBOL(security_inode_init_security);
 
+#ifdef CONFIG_SECURITY_PATH
+int security_path_mknod(struct path *path, struct dentry *dentry, int mode,
+			unsigned int dev)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_mknod(path, dentry, mode, dev);
+}
+EXPORT_SYMBOL(security_path_mknod);
+
+int security_path_mkdir(struct path *path, struct dentry *dentry, int mode)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_mkdir(path, dentry, mode);
+}
+
+int security_path_rmdir(struct path *path, struct dentry *dentry)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_rmdir(path, dentry);
+}
+
+int security_path_unlink(struct path *path, struct dentry *dentry)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_unlink(path, dentry);
+}
+
+int security_path_symlink(struct path *path, struct dentry *dentry,
+			  const char *old_name)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_symlink(path, dentry, old_name);
+}
+
+int security_path_link(struct dentry *old_dentry, struct path *new_dir,
+		       struct dentry *new_dentry)
+{
+	if (unlikely(IS_PRIVATE(old_dentry->d_inode)))
+		return 0;
+	return security_ops->path_link(old_dentry, new_dir, new_dentry);
+}
+
+int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
+			 struct path *new_dir, struct dentry *new_dentry)
+{
+	if (unlikely(IS_PRIVATE(old_dentry->d_inode) ||
+		     (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode))))
+		return 0;
+	return security_ops->path_rename(old_dir, old_dentry, new_dir,
+					 new_dentry);
+}
+
+int security_path_truncate(struct path *path, loff_t length,
+			   unsigned int time_attrs)
+{
+	if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+		return 0;
+	return security_ops->path_truncate(path, length, time_attrs);
+}
+#endif
+
 int security_inode_create(struct inode *dir, struct dentry *dentry, int mode)
 {
 	if (unlikely(IS_PRIVATE(dir)))
-- 
cgit v1.2.3


From c2452f32786159ed85f0e4b21fec09258f822fc8 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Mon, 1 Dec 2008 09:33:43 +0100
Subject: shrink struct dentry

struct dentry is one of the most critical structures in the kernel. So it's
sad to see it going neglected.

With CONFIG_PROFILING turned on (which is probably the common case at least
for distros and kernel developers), sizeof(struct dcache) == 208 here
(64-bit). This gives 19 objects per slab.

I packed d_mounted into a hole, and took another 4 bytes off the inline
name length to take the padding out from the end of the structure. This
shinks it to 200 bytes. I could have gone the other way and increased the
length to 40, but I'm aiming for a magic number, read on...

I then got rid of the d_cookie pointer. This shrinks it to 192 bytes. Rant:
why was this ever a good idea? The cookie system should increase its hash
size or use a tree or something if lookups are a problem. Also the "fast
dcookie lookups" in oprofile should be moved into the dcookie code -- how
can oprofile possibly care about the dcookie_mutex? It gets dropped after
get_dcookie() returns so it can't be providing any sort of protection.

At 192 bytes, 21 objects fit into a 4K page, saving about 3MB on my system
with ~140 000 entries allocated. 192 is also a multiple of 64, so we get
nice cacheline alignment on 64 and 32 byte line systems -- any given dentry
will now require 3 cachelines to touch all fields wheras previously it
would require 4.

I know the inline name size was chosen quite carefully, however with the
reduction in cacheline footprint, it should actually be just about as fast
to do a name lookup for a 36 character name as it was before the patch (and
faster for other sizes). The memory footprint savings for names which are
<= 32 or > 36 bytes long should more than make up for the memory cost for
33-36 byte names.

Performance is a feature...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/oprofile/cell/spu_task_sync.c |  2 +-
 drivers/oprofile/buffer_sync.c             |  2 +-
 fs/dcache.c                                |  4 ----
 fs/dcookies.c                              | 28 +++++++++++++++++++---------
 include/linux/dcache.h                     | 21 ++++++++++++++-------
 5 files changed, 35 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index 2949126d28d1..6b793aeda72e 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -297,7 +297,7 @@ static inline unsigned long fast_get_dcookie(struct path *path)
 {
 	unsigned long cookie;
 
-	if (path->dentry->d_cookie)
+	if (path->dentry->d_flags & DCACHE_COOKIE)
 		return (unsigned long)path->dentry;
 	get_dcookie(path, &cookie);
 	return cookie;
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 737bd9484822..65e8294a9e29 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -200,7 +200,7 @@ static inline unsigned long fast_get_dcookie(struct path *path)
 {
 	unsigned long cookie;
 
-	if (path->dentry->d_cookie)
+	if (path->dentry->d_flags & DCACHE_COOKIE)
 		return (unsigned long)path->dentry;
 	get_dcookie(path, &cookie);
 	return cookie;
diff --git a/fs/dcache.c b/fs/dcache.c
index a1d86c7f3e66..fd244c7a7cc0 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -34,7 +34,6 @@
 #include <linux/bootmem.h>
 #include "internal.h"
 
-
 int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
@@ -948,9 +947,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	dentry->d_op = NULL;
 	dentry->d_fsdata = NULL;
 	dentry->d_mounted = 0;
-#ifdef CONFIG_PROFILING
-	dentry->d_cookie = NULL;
-#endif
 	INIT_HLIST_NODE(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 855d4b1d619a..180e9fec4ad8 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -93,10 +93,15 @@ static struct dcookie_struct *alloc_dcookie(struct path *path)
 {
 	struct dcookie_struct *dcs = kmem_cache_alloc(dcookie_cache,
 							GFP_KERNEL);
+	struct dentry *d;
 	if (!dcs)
 		return NULL;
 
-	path->dentry->d_cookie = dcs;
+	d = path->dentry;
+	spin_lock(&d->d_lock);
+	d->d_flags |= DCACHE_COOKIE;
+	spin_unlock(&d->d_lock);
+
 	dcs->path = *path;
 	path_get(path);
 	hash_dcookie(dcs);
@@ -119,14 +124,14 @@ int get_dcookie(struct path *path, unsigned long *cookie)
 		goto out;
 	}
 
-	dcs = path->dentry->d_cookie;
-
-	if (!dcs)
+	if (path->dentry->d_flags & DCACHE_COOKIE) {
+		dcs = find_dcookie((unsigned long)path->dentry);
+	} else {
 		dcs = alloc_dcookie(path);
-
-	if (!dcs) {
-		err = -ENOMEM;
-		goto out;
+		if (!dcs) {
+			err = -ENOMEM;
+			goto out;
+		}
 	}
 
 	*cookie = dcookie_value(dcs);
@@ -251,7 +256,12 @@ out_kmem:
 
 static void free_dcookie(struct dcookie_struct * dcs)
 {
-	dcs->path.dentry->d_cookie = NULL;
+	struct dentry *d = dcs->path.dentry;
+
+	spin_lock(&d->d_lock);
+	d->d_flags &= ~DCACHE_COOKIE;
+	spin_unlock(&d->d_lock);
+
 	path_put(&dcs->path);
 	kmem_cache_free(dcookie_cache, dcs);
 }
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index a37359d0bad1..c66d22487bf8 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -75,14 +75,22 @@ full_name_hash(const unsigned char *name, unsigned int len)
 	return end_name_hash(hash);
 }
 
-struct dcookie_struct;
-
-#define DNAME_INLINE_LEN_MIN 36
+/*
+ * Try to keep struct dentry aligned on 64 byte cachelines (this will
+ * give reasonable cacheline footprint with larger lines without the
+ * large memory footprint increase).
+ */
+#ifdef CONFIG_64BIT
+#define DNAME_INLINE_LEN_MIN 32 /* 192 bytes */
+#else
+#define DNAME_INLINE_LEN_MIN 40 /* 128 bytes */
+#endif
 
 struct dentry {
 	atomic_t d_count;
 	unsigned int d_flags;		/* protected by d_lock */
 	spinlock_t d_lock;		/* per dentry lock */
+	int d_mounted;
 	struct inode *d_inode;		/* Where the name belongs to - NULL is
 					 * negative */
 	/*
@@ -107,10 +115,7 @@ struct dentry {
 	struct dentry_operations *d_op;
 	struct super_block *d_sb;	/* The root of the dentry tree */
 	void *d_fsdata;			/* fs-specific data */
-#ifdef CONFIG_PROFILING
-	struct dcookie_struct *d_cookie; /* cookie, if any */
-#endif
-	int d_mounted;
+
 	unsigned char d_iname[DNAME_INLINE_LEN_MIN];	/* small names */
 };
 
@@ -177,6 +182,8 @@ d_iput:		no		no		no       yes
 
 #define DCACHE_INOTIFY_PARENT_WATCHED	0x0020 /* Parent inode is watched */
 
+#define DCACHE_COOKIE		0x0040	/* For use by dcookie subsystem */
+
 extern spinlock_t dcache_lock;
 extern seqlock_t rename_lock;
 
-- 
cgit v1.2.3


From dded4f4d5048e64a01cf52eed4d27c8cb2600525 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 1 Dec 2008 14:34:50 -0800
Subject: include: linux/fs.h: put declarations in __KERNEL__

include/linux/fs.h contains externs for a bunch of variables.  That obviously
belongs under ifdef __KERNEL__.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 001ded4845b4..c5e4c5c74034 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -21,7 +21,6 @@
 
 /* Fixed constants first: */
 #undef NR_OPEN
-extern int sysctl_nr_open;
 #define INR_OPEN 1024		/* Initial setting for nfile rlimits */
 
 #define BLOCK_SIZE_BITS 10
@@ -38,21 +37,13 @@ struct files_stat_struct {
 	int nr_free_files;	/* read only */
 	int max_files;		/* tunable */
 };
-extern struct files_stat_struct files_stat;
-extern int get_max_files(void);
 
 struct inodes_stat_t {
 	int nr_inodes;
 	int nr_unused;
 	int dummy[5];		/* padding for sysctl ABI compatibility */
 };
-extern struct inodes_stat_t inodes_stat;
 
-extern int leases_enable, lease_break_time;
-
-#ifdef CONFIG_DNOTIFY
-extern int dir_notify_enable;
-#endif
 
 #define NR_FILE  8192	/* this can well be larger on a larger system */
 
@@ -330,6 +321,15 @@ extern void __init inode_init(void);
 extern void __init inode_init_early(void);
 extern void __init files_init(unsigned long);
 
+extern struct files_stat_struct files_stat;
+extern int get_max_files(void);
+extern int sysctl_nr_open;
+extern struct inodes_stat_t inodes_stat;
+extern int leases_enable, lease_break_time;
+#ifdef CONFIG_DNOTIFY
+extern int dir_notify_enable;
+#endif
+
 struct buffer_head;
 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create);
-- 
cgit v1.2.3


From 035146851cfa2fe24c1d9dc7637bb009ad06b2f7 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Fri, 19 Dec 2008 20:47:11 +0000
Subject: vfs: introduce helper function to safely NUL-terminate symlinks

A number of filesystems were potentially triggering kernel bugs due to
corrupted symlink names on disk. This function helps safely terminate
the names.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Duane Griffin <duaneg@dghda.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/namei.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 99eb80306dc5..fc2e03579877 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -94,4 +94,9 @@ static inline char *nd_get_link(struct nameidata *nd)
 	return nd->saved_names[nd->depth];
 }
 
+static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
+{
+	((char *) name)[min(len, maxlen)] = '\0';
+}
+
 #endif /* _LINUX_NAMEI_H */
-- 
cgit v1.2.3


From 3fb64190aa3c23c10e6e9fd0124ac030115c99bf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Oct 2008 09:58:10 +0200
Subject: pass a struct path * to may_open

No need for the nameidata in may_open - a struct path is enough.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c         | 14 +++++++-------
 fs/nfsctl.c        |  5 +++--
 include/linux/fs.h |  2 +-
 3 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index d4d0b59ed2cc..5cc0dc95a7a5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1487,9 +1487,9 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	return error;
 }
 
-int may_open(struct nameidata *nd, int acc_mode, int flag)
+int may_open(struct path *path, int acc_mode, int flag)
 {
-	struct dentry *dentry = nd->path.dentry;
+	struct dentry *dentry = path->dentry;
 	struct inode *inode = dentry->d_inode;
 	int error;
 
@@ -1510,13 +1510,13 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
 	if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
 	    	flag &= ~O_TRUNC;
 	} else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
-		if (nd->path.mnt->mnt_flags & MNT_NODEV)
+		if (path->mnt->mnt_flags & MNT_NODEV)
 			return -EACCES;
 
 		flag &= ~O_TRUNC;
 	}
 
-	error = vfs_permission(nd, acc_mode);
+	error = inode_permission(inode, acc_mode);
 	if (error)
 		return error;
 	/*
@@ -1551,7 +1551,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
 		 */
 		error = locks_verify_locked(inode);
 		if (!error)
-			error = security_path_truncate(&nd->path, 0,
+			error = security_path_truncate(path, 0,
 					       ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
 		if (!error) {
 			DQUOT_INIT(inode);
@@ -1594,7 +1594,7 @@ out_unlock:
 	if (error)
 		return error;
 	/* Don't check for write permission, don't truncate */
-	return may_open(nd, 0, flag & ~O_TRUNC);
+	return may_open(&nd->path, 0, flag & ~O_TRUNC);
 }
 
 /*
@@ -1780,7 +1780,7 @@ ok:
 		if (error)
 			goto exit;
 	}
-	error = may_open(&nd, acc_mode, flag);
+	error = may_open(&nd.path, acc_mode, flag);
 	if (error) {
 		if (will_write)
 			mnt_drop_write(nd.path.mnt);
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index b1acbd6ab6fb..b27451909dff 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -38,9 +38,10 @@ static struct file *do_open(char *name, int flags)
 		return ERR_PTR(error);
 
 	if (flags == O_RDWR)
-		error = may_open(&nd,MAY_READ|MAY_WRITE,FMODE_READ|FMODE_WRITE);
+		error = may_open(&nd.path, MAY_READ|MAY_WRITE,
+					   FMODE_READ|FMODE_WRITE);
 	else
-		error = may_open(&nd, MAY_WRITE, FMODE_WRITE);
+		error = may_open(&nd.path, MAY_WRITE, FMODE_WRITE);
 
 	if (!error)
 		return dentry_open(nd.path.dentry, nd.path.mnt, flags,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c5e4c5c74034..3468df5a06e0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1869,7 +1869,7 @@ extern void free_write_pipe(struct file *);
 
 extern struct file *do_filp_open(int dfd, const char *pathname,
 		int open_flag, int mode);
-extern int may_open(struct nameidata *, int, int);
+extern int may_open(struct path *, int, int);
 
 extern int kernel_read(struct file *, unsigned long, char *, unsigned long);
 extern struct file * open_exec(const char *);
-- 
cgit v1.2.3


From cb23beb55100171646e69e248fb45f10db6e99a4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Oct 2008 09:59:29 +0200
Subject: kill vfs_permission

With all the nameidata removal there's no point anymore for this helper.
Of the three callers left two will go away with the next lookup series
anyway.

Also add proper kerneldoc to inode_permission as this is the main
permission check routine now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c          |  5 +++--
 fs/namei.c         | 31 +++++++++++++------------------
 include/linux/fs.h |  1 -
 3 files changed, 16 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 02d2e120542d..dfbf7009fbe7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -127,7 +127,8 @@ asmlinkage long sys_uselib(const char __user * library)
 	if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
 		goto exit;
 
-	error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN);
+	error = inode_permission(nd.path.dentry->d_inode,
+				 MAY_READ | MAY_EXEC | MAY_OPEN);
 	if (error)
 		goto exit;
 
@@ -680,7 +681,7 @@ struct file *open_exec(const char *name)
 	if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
 		goto out_path_put;
 
-	err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN);
+	err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN);
 	if (err)
 		goto out_path_put;
 
diff --git a/fs/namei.c b/fs/namei.c
index 5cc0dc95a7a5..3f88e043d459 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -226,6 +226,16 @@ int generic_permission(struct inode *inode, int mask,
 	return -EACCES;
 }
 
+/**
+ * inode_permission  -  check for access rights to a given inode
+ * @inode:	inode to check permission on
+ * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
+ *
+ * Used to check for read/write/execute permissions on an inode.
+ * We use "fsuid" for this, letting us set arbitrary permissions
+ * for filesystem access without changing the "normal" uids which
+ * are used for other things.
+ */
 int inode_permission(struct inode *inode, int mask)
 {
 	int retval;
@@ -263,21 +273,6 @@ int inode_permission(struct inode *inode, int mask)
 			mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND));
 }
 
-/**
- * vfs_permission  -  check for access rights to a given path
- * @nd:		lookup result that describes the path
- * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
- *
- * Used to check for read/write/execute permissions on a path.
- * We use "fsuid" for this, letting us set arbitrary permissions
- * for filesystem access without changing the "normal" uids which
- * are used for other things.
- */
-int vfs_permission(struct nameidata *nd, int mask)
-{
-	return inode_permission(nd->path.dentry->d_inode, mask);
-}
-
 /**
  * file_permission  -  check for additional access rights to a given file
  * @file:	file to check access rights for
@@ -288,7 +283,7 @@ int vfs_permission(struct nameidata *nd, int mask)
  *
  * Note:
  *	Do not use this function in new code.  All access checks should
- *	be done using vfs_permission().
+ *	be done using inode_permission().
  */
 int file_permission(struct file *file, int mask)
 {
@@ -853,7 +848,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
 		nd->flags |= LOOKUP_CONTINUE;
 		err = exec_permission_lite(inode);
 		if (err == -EAGAIN)
-			err = vfs_permission(nd, MAY_EXEC);
+			err = inode_permission(nd->path.dentry->d_inode,
+					       MAY_EXEC);
  		if (err)
 			break;
 
@@ -2882,7 +2878,6 @@ EXPORT_SYMBOL(path_lookup);
 EXPORT_SYMBOL(kern_path);
 EXPORT_SYMBOL(vfs_path_lookup);
 EXPORT_SYMBOL(inode_permission);
-EXPORT_SYMBOL(vfs_permission);
 EXPORT_SYMBOL(file_permission);
 EXPORT_SYMBOL(unlock_rename);
 EXPORT_SYMBOL(vfs_create);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3468df5a06e0..fd615986a41c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1212,7 +1212,6 @@ extern void unlock_super(struct super_block *);
 /*
  * VFS helper functions..
  */
-extern int vfs_permission(struct nameidata *, int);
 extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *);
 extern int vfs_mkdir(struct inode *, struct dentry *, int);
 extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t);
-- 
cgit v1.2.3


From 18d8fda7c3c9439be04d7ea2e82da2513b121acb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 26 Dec 2008 00:35:37 -0500
Subject: take init_fs to saner place

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/init_task.c     | 1 -
 arch/arm/kernel/init_task.c       | 1 -
 arch/avr32/kernel/init_task.c     | 1 -
 arch/blackfin/kernel/init_task.c  | 1 -
 arch/cris/kernel/process.c        | 1 -
 arch/frv/kernel/init_task.c       | 1 -
 arch/h8300/kernel/init_task.c     | 1 -
 arch/ia64/kernel/init_task.c      | 1 -
 arch/m32r/kernel/init_task.c      | 1 -
 arch/m68k/kernel/process.c        | 1 -
 arch/m68knommu/kernel/init_task.c | 1 -
 arch/mips/kernel/init_task.c      | 1 -
 arch/mn10300/kernel/init_task.c   | 1 -
 arch/parisc/kernel/init_task.c    | 1 -
 arch/powerpc/kernel/init_task.c   | 1 -
 arch/s390/kernel/init_task.c      | 1 -
 arch/sh/kernel/init_task.c        | 1 -
 arch/sparc/kernel/init_task.c     | 1 -
 arch/um/kernel/init_task.c        | 1 -
 arch/x86/kernel/init_task.c       | 1 -
 arch/xtensa/kernel/init_task.c    | 1 -
 fs/namei.c                        | 7 +++++++
 include/linux/fs_struct.h         | 6 ------
 include/linux/init_task.h         | 1 +
 24 files changed, 8 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/init_task.c b/arch/alpha/kernel/init_task.c
index 1f762189fa64..c2938e574a40 100644
--- a/arch/alpha/kernel/init_task.c
+++ b/arch/alpha/kernel/init_task.c
@@ -8,7 +8,6 @@
 #include <asm/uaccess.h>
 
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c
index 0bbf80625395..e859af349467 100644
--- a/arch/arm/kernel/init_task.c
+++ b/arch/arm/kernel/init_task.c
@@ -12,7 +12,6 @@
 
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c
index 44058469c6ec..993d56ee3cf3 100644
--- a/arch/avr32/kernel/init_task.c
+++ b/arch/avr32/kernel/init_task.c
@@ -13,7 +13,6 @@
 
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/blackfin/kernel/init_task.c b/arch/blackfin/kernel/init_task.c
index 6bdba7b21109..2c228c020978 100644
--- a/arch/blackfin/kernel/init_task.c
+++ b/arch/blackfin/kernel/init_task.c
@@ -33,7 +33,6 @@
 #include <linux/mqueue.h>
 #include <linux/fs.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c
index 5933656db5a2..60816e876455 100644
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -37,7 +37,6 @@
  * setup.
  */
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/frv/kernel/init_task.c b/arch/frv/kernel/init_task.c
index e2198815b630..29429a8b7f6a 100644
--- a/arch/frv/kernel/init_task.c
+++ b/arch/frv/kernel/init_task.c
@@ -10,7 +10,6 @@
 #include <asm/pgtable.h>
 
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/h8300/kernel/init_task.c b/arch/h8300/kernel/init_task.c
index 93a4899e46c2..cb5dc552da97 100644
--- a/arch/h8300/kernel/init_task.c
+++ b/arch/h8300/kernel/init_task.c
@@ -12,7 +12,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
index 9d7e1c66faf4..5b0e830c6f33 100644
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -17,7 +17,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/m32r/kernel/init_task.c b/arch/m32r/kernel/init_task.c
index 0d658dbb6766..016885c6f260 100644
--- a/arch/m32r/kernel/init_task.c
+++ b/arch/m32r/kernel/init_task.c
@@ -11,7 +11,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 3042c2bc8c58..632ce016014d 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -40,7 +40,6 @@
  * alignment requirements and potentially different initial
  * setup.
  */
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/m68knommu/kernel/init_task.c b/arch/m68knommu/kernel/init_task.c
index 344c01aede08..fe282de1d596 100644
--- a/arch/m68knommu/kernel/init_task.c
+++ b/arch/m68knommu/kernel/init_task.c
@@ -12,7 +12,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/mips/kernel/init_task.c b/arch/mips/kernel/init_task.c
index d72487ad7c15..149cd914526e 100644
--- a/arch/mips/kernel/init_task.c
+++ b/arch/mips/kernel/init_task.c
@@ -9,7 +9,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/mn10300/kernel/init_task.c b/arch/mn10300/kernel/init_task.c
index af16f6e5c918..5ac3566f8c98 100644
--- a/arch/mn10300/kernel/init_task.c
+++ b/arch/mn10300/kernel/init_task.c
@@ -18,7 +18,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/parisc/kernel/init_task.c b/arch/parisc/kernel/init_task.c
index f5941c086551..1e25a45d64c1 100644
--- a/arch/parisc/kernel/init_task.c
+++ b/arch/parisc/kernel/init_task.c
@@ -34,7 +34,6 @@
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c
index 4c85b8d56478..688b329800bd 100644
--- a/arch/powerpc/kernel/init_task.c
+++ b/arch/powerpc/kernel/init_task.c
@@ -7,7 +7,6 @@
 #include <linux/mqueue.h>
 #include <asm/uaccess.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c
index e80716843619..7db95c0b8693 100644
--- a/arch/s390/kernel/init_task.c
+++ b/arch/s390/kernel/init_task.c
@@ -16,7 +16,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/sh/kernel/init_task.c b/arch/sh/kernel/init_task.c
index b151a25cb14d..80c35ff71d56 100644
--- a/arch/sh/kernel/init_task.c
+++ b/arch/sh/kernel/init_task.c
@@ -7,7 +7,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct pt_regs fake_swapper_regs;
diff --git a/arch/sparc/kernel/init_task.c b/arch/sparc/kernel/init_task.c
index 62126e4cec54..f28cb8278e98 100644
--- a/arch/sparc/kernel/init_task.c
+++ b/arch/sparc/kernel/init_task.c
@@ -8,7 +8,6 @@
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c
index 910eda8fca18..806d381947bf 100644
--- a/arch/um/kernel/init_task.c
+++ b/arch/um/kernel/init_task.c
@@ -10,7 +10,6 @@
 #include "linux/mqueue.h"
 #include "asm/uaccess.h"
 
-static struct fs_struct init_fs = INIT_FS;
 struct mm_struct init_mm = INIT_MM(init_mm);
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index d39918076bb4..df3bf269beab 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -10,7 +10,6 @@
 #include <asm/pgtable.h>
 #include <asm/desc.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/xtensa/kernel/init_task.c b/arch/xtensa/kernel/init_task.c
index 3df469dbe814..e07f5c9fcd35 100644
--- a/arch/xtensa/kernel/init_task.c
+++ b/arch/xtensa/kernel/init_task.c
@@ -21,7 +21,6 @@
 
 #include <asm/uaccess.h>
 
-static struct fs_struct init_fs = INIT_FS;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/fs/namei.c b/fs/namei.c
index 3f88e043d459..e203691b9d12 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2893,3 +2893,10 @@ EXPORT_SYMBOL(vfs_symlink);
 EXPORT_SYMBOL(vfs_unlink);
 EXPORT_SYMBOL(dentry_unhash);
 EXPORT_SYMBOL(generic_readlink);
+
+/* to be mentioned only in INIT_TASK */
+struct fs_struct init_fs = {
+	.count		= ATOMIC_INIT(1),
+	.lock		= RW_LOCK_UNLOCKED,
+	.umask		= 0022,
+};
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 9e5a06e78d02..a97c053d3a9a 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -10,12 +10,6 @@ struct fs_struct {
 	struct path root, pwd;
 };
 
-#define INIT_FS {				\
-	.count		= ATOMIC_INIT(1),	\
-	.lock		= RW_LOCK_UNLOCKED,	\
-	.umask		= 0022, \
-}
-
 extern struct kmem_cache *fs_cachep;
 
 extern void exit_fs(struct task_struct *);
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 959f5522d10a..2f3c2d4ef73b 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -12,6 +12,7 @@
 #include <net/net_namespace.h>
 
 extern struct files_struct init_files;
+extern struct fs_struct init_fs;
 
 #define INIT_KIOCTX(name, which_mm) \
 {							\
-- 
cgit v1.2.3


From b6b3fdead251d432f32f2cfce2a893ab8a658110 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 10 Dec 2008 09:35:45 -0800
Subject: filp_cachep can be static in fs/file_table.c

Instead of creating the "filp" kmem_cache in vfs_caches_init(),
we can do it a litle be later in files_init(), so that filp_cachep
is static to fs/file_table.c

Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c             |  6 ------
 fs/file_table.c         | 10 +++++++++-
 include/linux/fdtable.h |  2 --
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index bdb3f50248a7..e88c23b85a32 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2314,9 +2314,6 @@ static void __init dcache_init(void)
 /* SLAB cache for __getname() consumers */
 struct kmem_cache *names_cachep __read_mostly;
 
-/* SLAB cache for file structures */
-struct kmem_cache *filp_cachep __read_mostly;
-
 EXPORT_SYMBOL(d_genocide);
 
 void __init vfs_caches_init_early(void)
@@ -2338,9 +2335,6 @@ void __init vfs_caches_init(unsigned long mempages)
 	names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
 
-	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
-
 	dcache_init();
 	inode_init();
 	files_init(mempages);
diff --git a/fs/file_table.c b/fs/file_table.c
index 0fbcacc3ea75..bbeeac6efa1a 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -32,6 +32,9 @@ struct files_stat_struct files_stat = {
 /* public. Not pretty! */
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
 
+/* SLAB cache for file structures */
+static struct kmem_cache *filp_cachep __read_mostly;
+
 static struct percpu_counter nr_files __cacheline_aligned_in_smp;
 
 static inline void file_free_rcu(struct rcu_head *head)
@@ -397,7 +400,12 @@ too_bad:
 void __init files_init(unsigned long mempages)
 { 
 	int n; 
-	/* One file with associated inode and dcache is very roughly 1K. 
+
+	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
+			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+
+	/*
+	 * One file with associated inode and dcache is very roughly 1K.
 	 * Per default don't use more than 10% of our memory for files. 
 	 */ 
 
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 4aab6f12cfab..09d6c5bbdddd 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -57,8 +57,6 @@ struct files_struct {
 
 #define files_fdtable(files) (rcu_dereference((files)->fdt))
 
-extern struct kmem_cache *filp_cachep;
-
 struct file_operations;
 struct vfsmount;
 struct dentry;
-- 
cgit v1.2.3


From 6badd79bd002788aaec27b50a74ab69ef65ab8ee Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 26 Dec 2008 00:57:40 -0500
Subject: kill ->dir_notify()

Remove the hopelessly misguided ->dir_notify().  The only instance (cifs)
has been broken by design from the very beginning; the objects it creates
are never destroyed, keep references to struct file they can outlive, nothing
that could possibly evict them exists on close(2) path *and* no locking
whatsoever is done to prevent races with close(), should the previous, er,
deficiencies someday be dealt with.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking |   2 -
 Documentation/filesystems/vfs.txt |   3 -
 fs/bad_inode.c                    |   6 --
 fs/cifs/Makefile                  |   2 +-
 fs/cifs/cifsfs.c                  |   7 ---
 fs/cifs/cifsfs.h                  |   1 -
 fs/cifs/fcntl.c                   | 118 --------------------------------------
 fs/dnotify.c                      |   3 -
 include/linux/fs.h                |   1 -
 9 files changed, 1 insertion(+), 142 deletions(-)
 delete mode 100644 fs/cifs/fcntl.c

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 23d2f4460deb..ccec55394380 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -394,7 +394,6 @@ prototypes:
 	unsigned long (*get_unmapped_area)(struct file *, unsigned long,
 			unsigned long, unsigned long, unsigned long);
 	int (*check_flags)(int);
-	int (*dir_notify)(struct file *, unsigned long);
 };
 
 locking rules:
@@ -424,7 +423,6 @@ sendfile:		no
 sendpage:		no
 get_unmapped_area:	no
 check_flags:		no
-dir_notify:		no
 
 ->llseek() locking has moved from llseek to the individual llseek
 implementations.  If your fs is not using generic_file_llseek, you
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 041cb771d505..ef19afa186a9 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -733,7 +733,6 @@ struct file_operations {
 	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
 	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 	int (*check_flags)(int);
-	int (*dir_notify)(struct file *filp, unsigned long arg);
 	int (*flock) (struct file *, int, struct file_lock *);
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
@@ -800,8 +799,6 @@ otherwise noted.
 
   check_flags: called by the fcntl(2) system call for F_SETFL command
 
-  dir_notify: called by the fcntl(2) system call for F_NOTIFY command
-
   flock: called by the flock(2) system call
 
   splice_write: called by the VFS to splice data from a pipe to a file. This
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 5f1538c03b1b..a05287a23f62 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -132,11 +132,6 @@ static int bad_file_check_flags(int flags)
 	return -EIO;
 }
 
-static int bad_file_dir_notify(struct file *file, unsigned long arg)
-{
-	return -EIO;
-}
-
 static int bad_file_flock(struct file *filp, int cmd, struct file_lock *fl)
 {
 	return -EIO;
@@ -179,7 +174,6 @@ static const struct file_operations bad_file_ops =
 	.sendpage	= bad_file_sendpage,
 	.get_unmapped_area = bad_file_get_unmapped_area,
 	.check_flags	= bad_file_check_flags,
-	.dir_notify	= bad_file_dir_notify,
 	.flock		= bad_file_flock,
 	.splice_write	= bad_file_splice_write,
 	.splice_read	= bad_file_splice_read,
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 6ba43fb346fb..9948c0030e86 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -5,7 +5,7 @@ obj-$(CONFIG_CIFS) += cifs.o
 
 cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
 	  link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \
-	  md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o \
+	  md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
 	  readdir.o ioctl.o sess.o export.o cifsacl.o
 
 cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 0005a194a75c..13ea53251dcf 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -747,7 +747,6 @@ const struct file_operations cifs_file_ops = {
 #endif /* CONFIG_CIFS_POSIX */
 
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-	.dir_notify = cifs_dir_notify,
 	.setlease = cifs_setlease,
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
@@ -768,7 +767,6 @@ const struct file_operations cifs_file_direct_ops = {
 #endif /* CONFIG_CIFS_POSIX */
 	.llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-	.dir_notify = cifs_dir_notify,
 	.setlease = cifs_setlease,
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
@@ -789,7 +787,6 @@ const struct file_operations cifs_file_nobrl_ops = {
 #endif /* CONFIG_CIFS_POSIX */
 
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-	.dir_notify = cifs_dir_notify,
 	.setlease = cifs_setlease,
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
@@ -809,7 +806,6 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
 #endif /* CONFIG_CIFS_POSIX */
 	.llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-	.dir_notify = cifs_dir_notify,
 	.setlease = cifs_setlease,
 #endif /* CONFIG_CIFS_EXPERIMENTAL */
 };
@@ -818,9 +814,6 @@ const struct file_operations cifs_dir_ops = {
 	.readdir = cifs_readdir,
 	.release = cifs_closedir,
 	.read    = generic_read_dir,
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-	.dir_notify = cifs_dir_notify,
-#endif /* CONFIG_CIFS_EXPERIMENTAL */
 	.unlocked_ioctl  = cifs_ioctl,
 	.llseek = generic_file_llseek,
 };
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 2ce04c73d74e..7ac481841f87 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -76,7 +76,6 @@ extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
 extern const struct file_operations cifs_dir_ops;
 extern int cifs_dir_open(struct inode *inode, struct file *file);
 extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir);
-extern int cifs_dir_notify(struct file *, unsigned long arg);
 
 /* Functions related to dir entries */
 extern struct dentry_operations cifs_dentry_ops;
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
deleted file mode 100644
index 5a57581eb4b2..000000000000
--- a/fs/cifs/fcntl.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- *   fs/cifs/fcntl.c
- *
- *   vfs operations that deal with the file control API
- *
- *   Copyright (C) International Business Machines  Corp., 2003,2004
- *   Author(s): Steve French (sfrench@us.ibm.com)
- *
- *   This library is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU Lesser General Public License as published
- *   by the Free Software Foundation; either version 2.1 of the License, or
- *   (at your option) any later version.
- *
- *   This library is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU Lesser General Public License for more details.
- *
- *   You should have received a copy of the GNU Lesser General Public License
- *   along with this library; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include "cifsglob.h"
-#include "cifsproto.h"
-#include "cifs_unicode.h"
-#include "cifs_debug.h"
-#include "cifsfs.h"
-
-static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags)
-{
-	__u32 cifs_ntfy_flags = 0;
-
-	/* No way on Linux VFS to ask to monitor xattr
-	changes (and no stream support either */
-	if (fcntl_notify_flags & DN_ACCESS)
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS;
-	if (fcntl_notify_flags & DN_MODIFY) {
-		/* What does this mean on directories? */
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE |
-			FILE_NOTIFY_CHANGE_SIZE;
-	}
-	if (fcntl_notify_flags & DN_CREATE) {
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION |
-			FILE_NOTIFY_CHANGE_LAST_WRITE;
-	}
-	if (fcntl_notify_flags & DN_DELETE)
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE;
-	if (fcntl_notify_flags & DN_RENAME) {
-		/* BB review this - checking various server behaviors */
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME |
-			FILE_NOTIFY_CHANGE_FILE_NAME;
-	}
-	if (fcntl_notify_flags & DN_ATTRIB) {
-		cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY |
-			FILE_NOTIFY_CHANGE_ATTRIBUTES;
-	}
-/*	if (fcntl_notify_flags & DN_MULTISHOT) {
-		cifs_ntfy_flags |= ;
-	} */ /* BB fixme - not sure how to handle this with CIFS yet */
-
-	return cifs_ntfy_flags;
-}
-
-int cifs_dir_notify(struct file *file, unsigned long arg)
-{
-	int xid;
-	int rc = -EINVAL;
-	int oplock = 0;
-	struct cifs_sb_info *cifs_sb;
-	struct cifsTconInfo *pTcon;
-	char *full_path = NULL;
-	__u32 filter = FILE_NOTIFY_CHANGE_NAME | FILE_NOTIFY_CHANGE_ATTRIBUTES;
-	__u16 netfid;
-
-	if (experimEnabled == 0)
-		return 0;
-
-	xid = GetXid();
-	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
-	pTcon = cifs_sb->tcon;
-
-	full_path = build_path_from_dentry(file->f_path.dentry);
-
-	if (full_path == NULL) {
-		rc = -ENOMEM;
-	} else {
-		cFYI(1, ("dir notify on file %s Arg 0x%lx", full_path, arg));
-		rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
-			GENERIC_READ | SYNCHRONIZE, 0 /* create options */,
-			&netfid, &oplock, NULL, cifs_sb->local_nls,
-			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
-		/* BB fixme - add this handle to a notify handle list */
-		if (rc) {
-			cFYI(1, ("Could not open directory for notify"));
-		} else {
-			filter = convert_to_cifs_notify_flags(arg);
-			if (filter != 0) {
-				rc = CIFSSMBNotify(xid, pTcon,
-					0 /* no subdirs */, netfid,
-					filter, file, arg & DN_MULTISHOT,
-					cifs_sb->local_nls);
-			} else {
-				rc = -EINVAL;
-			}
-			/* BB add code to close file eventually (at unmount
-			it would close automatically but may be a way
-			to do it easily when inode freed or when
-			notify info is cleared/changed */
-			cFYI(1, ("notify rc %d", rc));
-		}
-	}
-
-	FreeXid(xid);
-	return rc;
-}
diff --git a/fs/dnotify.c b/fs/dnotify.c
index 676073b8dda5..b0aa2cde80bd 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -115,9 +115,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	dn->dn_next = inode->i_dnotify;
 	inode->i_dnotify = dn;
 	spin_unlock(&inode->i_lock);
-
-	if (filp->f_op && filp->f_op->dir_notify)
-		return filp->f_op->dir_notify(filp, arg);
 	return 0;
 
 out_free:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fd615986a41c..be16ce01fb1b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1309,7 +1309,6 @@ struct file_operations {
 	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
 	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 	int (*check_flags)(int);
-	int (*dir_notify)(struct file *filp, unsigned long arg);
 	int (*flock) (struct file *, int, struct file_lock *);
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
-- 
cgit v1.2.3


From 261bca86ed4f7f391d1938167624e78da61dcc6b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 30 Dec 2008 01:48:21 -0500
Subject: nfsd/create race fixes, infrastructure

new helpers - insert_inode_locked() and insert_inode_locked4().
Hash new inode, making sure that there's no such inode in icache
already.  If there is and it does not end up unhashed (as would
happen if we have nfsd trying to resolve a bogus fhandle), fail.
Otherwise insert our inode into hash and succeed.

In either case have i_state set to new+locked; cleanup ends up
being simpler with such calling conventions.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c         | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |  2 ++
 2 files changed, 61 insertions(+)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 098a2443196f..7de1cda92489 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1032,6 +1032,65 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
 
 EXPORT_SYMBOL(iget_locked);
 
+int insert_inode_locked(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	ino_t ino = inode->i_ino;
+	struct hlist_head *head = inode_hashtable + hash(sb, ino);
+	struct inode *old;
+
+	inode->i_state |= I_LOCK|I_NEW;
+	while (1) {
+		spin_lock(&inode_lock);
+		old = find_inode_fast(sb, head, ino);
+		if (likely(!old)) {
+			hlist_add_head(&inode->i_hash, head);
+			spin_unlock(&inode_lock);
+			return 0;
+		}
+		__iget(old);
+		spin_unlock(&inode_lock);
+		wait_on_inode(old);
+		if (unlikely(!hlist_unhashed(&old->i_hash))) {
+			iput(old);
+			return -EBUSY;
+		}
+		iput(old);
+	}
+}
+
+EXPORT_SYMBOL(insert_inode_locked);
+
+int insert_inode_locked4(struct inode *inode, unsigned long hashval,
+		int (*test)(struct inode *, void *), void *data)
+{
+	struct super_block *sb = inode->i_sb;
+	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+	struct inode *old;
+
+	inode->i_state |= I_LOCK|I_NEW;
+
+	while (1) {
+		spin_lock(&inode_lock);
+		old = find_inode(sb, head, test, data);
+		if (likely(!old)) {
+			hlist_add_head(&inode->i_hash, head);
+			spin_unlock(&inode_lock);
+			return 0;
+		}
+		__iget(old);
+		spin_unlock(&inode_lock);
+		wait_on_inode(old);
+		if (unlikely(!hlist_unhashed(&old->i_hash))) {
+			iput(old);
+			return -EBUSY;
+		}
+		iput(old);
+	}
+}
+
+EXPORT_SYMBOL(insert_inode_locked4);
+
 /**
  *	__insert_inode_hash - hash an inode
  *	@inode: unhashed inode
diff --git a/include/linux/fs.h b/include/linux/fs.h
index be16ce01fb1b..e2170ee21e18 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1902,6 +1902,8 @@ extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
 
 extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
 extern struct inode * iget_locked(struct super_block *, unsigned long);
+extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
+extern int insert_inode_locked(struct inode *);
 extern void unlock_new_inode(struct inode *);
 
 extern void __iget(struct inode * inode);
-- 
cgit v1.2.3


From ab53d472e785e51fdfc08fc1d66252c1153e6c0f Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:19 +1030
Subject: bitmap: find_last_bit()

Impact: New API

As the name suggests.  For the moment everyone uses the generic one.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/bitops.h | 13 ++++++++++++-
 lib/Kconfig            |  4 ++++
 lib/Makefile           |  1 +
 lib/find_last_bit.c    | 45 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 lib/find_last_bit.c

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 024f2b027244..61829139795a 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -134,9 +134,20 @@ extern unsigned long find_first_bit(const unsigned long *addr,
  */
 extern unsigned long find_first_zero_bit(const unsigned long *addr,
 					 unsigned long size);
-
 #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
 
+#ifdef CONFIG_GENERIC_FIND_LAST_BIT
+/**
+ * find_last_bit - find the last set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit number of the first set bit, or size.
+ */
+extern unsigned long find_last_bit(const unsigned long *addr,
+				   unsigned long size);
+#endif /* CONFIG_GENERIC_FIND_LAST_BIT */
+
 #ifdef CONFIG_GENERIC_FIND_NEXT_BIT
 
 /**
diff --git a/lib/Kconfig b/lib/Kconfig
index 2ba43c4a5b07..fc5f5ee50bc2 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -13,6 +13,10 @@ config GENERIC_FIND_FIRST_BIT
 config GENERIC_FIND_NEXT_BIT
 	bool
 
+config GENERIC_FIND_LAST_BIT
+	bool
+	default y
+
 config CRC_CCITT
 	tristate "CRC-CCITT functions"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index 80fe8a3ec12a..32b0e64ded27 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -37,6 +37,7 @@ lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o
 lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
+lib-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
 obj-$(CONFIG_PLIST) += plist.o
diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
new file mode 100644
index 000000000000..5d202e36bdd8
--- /dev/null
+++ b/lib/find_last_bit.c
@@ -0,0 +1,45 @@
+/* find_last_bit.c: fallback find next bit implementation
+ *
+ * Copyright (C) 2008 IBM Corporation
+ * Written by Rusty Russell <rusty@rustcorp.com.au>
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <asm/types.h>
+#include <asm/byteorder.h>
+
+unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+{
+	unsigned long words;
+	unsigned long tmp;
+
+	/* Start at final word. */
+	words = size / BITS_PER_LONG;
+
+	/* Partial final word? */
+	if (size & (BITS_PER_LONG-1)) {
+		tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
+					 - (size & (BITS_PER_LONG-1)))));
+		if (tmp)
+			goto found;
+	}
+
+	while (words) {
+		tmp = addr[--words];
+		if (tmp) {
+found:
+			return words * BITS_PER_LONG + __fls(tmp);
+		}
+	}
+
+	/* Not found */
+	return size;
+}
+EXPORT_SYMBOL(find_last_bit);
-- 
cgit v1.2.3


From 6b954823c24f04ed026a8517f6bab5abda279db8 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:25 +1030
Subject: cpumask: convert kernel time functions

Impact: Use new APIs

Convert kernel/time functions to use struct cpumask *.

Note the ugly bitmap declarations in tick-broadcast.c.  These should
be cpumask_var_t, but there was no obvious initialization function to
put the alloc_cpumask_var() calls in.  This was safe.

(Eventually 'struct cpumask' will be undefined for CONFIG_CPUMASK_OFFSTACK,
so we use a bitmap here to show we really mean it).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
---
 include/linux/tick.h         |   4 +-
 kernel/time/clocksource.c    |   6 +--
 kernel/time/tick-broadcast.c | 113 ++++++++++++++++++++++---------------------
 kernel/time/tick-common.c    |   6 +--
 4 files changed, 66 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index b6ec8189ac0c..469b82d88b3b 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -84,10 +84,10 @@ static inline void tick_cancel_sched_timer(int cpu) { }
 
 # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 extern struct tick_device *tick_get_broadcast_device(void);
-extern cpumask_t *tick_get_broadcast_mask(void);
+extern struct cpumask *tick_get_broadcast_mask(void);
 
 #  ifdef CONFIG_TICK_ONESHOT
-extern cpumask_t *tick_get_broadcast_oneshot_mask(void);
+extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
 #  endif
 
 # endif /* BROADCAST */
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 9ed2eec97526..32141b15d63e 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -148,7 +148,7 @@ static void clocksource_watchdog(unsigned long data)
 		int next_cpu = next_cpu_nr(raw_smp_processor_id(), cpu_online_map);
 
 		if (next_cpu >= nr_cpu_ids)
-			next_cpu = first_cpu(cpu_online_map);
+			next_cpu = cpumask_first(cpu_online_mask);
 		watchdog_timer.expires += WATCHDOG_INTERVAL;
 		add_timer_on(&watchdog_timer, next_cpu);
 	}
@@ -173,7 +173,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
 			watchdog_last = watchdog->read();
 			watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
 			add_timer_on(&watchdog_timer,
-				     first_cpu(cpu_online_map));
+				     cpumask_first(cpu_online_mask));
 		}
 	} else {
 		if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
@@ -195,7 +195,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
 				watchdog_timer.expires =
 					jiffies + WATCHDOG_INTERVAL;
 				add_timer_on(&watchdog_timer,
-					     first_cpu(cpu_online_map));
+					     cpumask_first(cpu_online_mask));
 			}
 		}
 	}
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 9590af2327be..356fac57a182 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -28,7 +28,9 @@
  */
 
 struct tick_device tick_broadcast_device;
-static cpumask_t tick_broadcast_mask;
+/* FIXME: Use cpumask_var_t. */
+static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
+static DECLARE_BITMAP(tmpmask, NR_CPUS);
 static DEFINE_SPINLOCK(tick_broadcast_lock);
 static int tick_broadcast_force;
 
@@ -46,9 +48,9 @@ struct tick_device *tick_get_broadcast_device(void)
 	return &tick_broadcast_device;
 }
 
-cpumask_t *tick_get_broadcast_mask(void)
+struct cpumask *tick_get_broadcast_mask(void)
 {
-	return &tick_broadcast_mask;
+	return to_cpumask(tick_broadcast_mask);
 }
 
 /*
@@ -72,7 +74,7 @@ int tick_check_broadcast_device(struct clock_event_device *dev)
 
 	clockevents_exchange_device(NULL, dev);
 	tick_broadcast_device.evtdev = dev;
-	if (!cpus_empty(tick_broadcast_mask))
+	if (!cpumask_empty(tick_get_broadcast_mask()))
 		tick_broadcast_start_periodic(dev);
 	return 1;
 }
@@ -104,7 +106,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 	 */
 	if (!tick_device_is_functional(dev)) {
 		dev->event_handler = tick_handle_periodic;
-		cpu_set(cpu, tick_broadcast_mask);
+		cpumask_set_cpu(cpu, tick_get_broadcast_mask());
 		tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
 		ret = 1;
 	} else {
@@ -116,7 +118,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 		if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
 			int cpu = smp_processor_id();
 
-			cpu_clear(cpu, tick_broadcast_mask);
+			cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
 			tick_broadcast_clear_oneshot(cpu);
 		}
 	}
@@ -125,9 +127,9 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 }
 
 /*
- * Broadcast the event to the cpus, which are set in the mask
+ * Broadcast the event to the cpus, which are set in the mask (mangled).
  */
-static void tick_do_broadcast(cpumask_t mask)
+static void tick_do_broadcast(struct cpumask *mask)
 {
 	int cpu = smp_processor_id();
 	struct tick_device *td;
@@ -135,22 +137,21 @@ static void tick_do_broadcast(cpumask_t mask)
 	/*
 	 * Check, if the current cpu is in the mask
 	 */
-	if (cpu_isset(cpu, mask)) {
-		cpu_clear(cpu, mask);
+	if (cpumask_test_cpu(cpu, mask)) {
+		cpumask_clear_cpu(cpu, mask);
 		td = &per_cpu(tick_cpu_device, cpu);
 		td->evtdev->event_handler(td->evtdev);
 	}
 
-	if (!cpus_empty(mask)) {
+	if (!cpumask_empty(mask)) {
 		/*
 		 * It might be necessary to actually check whether the devices
 		 * have different broadcast functions. For now, just use the
 		 * one of the first device. This works as long as we have this
 		 * misfeature only on x86 (lapic)
 		 */
-		cpu = first_cpu(mask);
-		td = &per_cpu(tick_cpu_device, cpu);
-		td->evtdev->broadcast(&mask);
+		td = &per_cpu(tick_cpu_device, cpumask_first(mask));
+		td->evtdev->broadcast(mask);
 	}
 }
 
@@ -160,12 +161,11 @@ static void tick_do_broadcast(cpumask_t mask)
  */
 static void tick_do_periodic_broadcast(void)
 {
-	cpumask_t mask;
-
 	spin_lock(&tick_broadcast_lock);
 
-	cpus_and(mask, cpu_online_map, tick_broadcast_mask);
-	tick_do_broadcast(mask);
+	cpumask_and(to_cpumask(tmpmask),
+		    cpu_online_mask, tick_get_broadcast_mask());
+	tick_do_broadcast(to_cpumask(tmpmask));
 
 	spin_unlock(&tick_broadcast_lock);
 }
@@ -228,13 +228,13 @@ static void tick_do_broadcast_on_off(void *why)
 	if (!tick_device_is_functional(dev))
 		goto out;
 
-	bc_stopped = cpus_empty(tick_broadcast_mask);
+	bc_stopped = cpumask_empty(tick_get_broadcast_mask());
 
 	switch (*reason) {
 	case CLOCK_EVT_NOTIFY_BROADCAST_ON:
 	case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
-		if (!cpu_isset(cpu, tick_broadcast_mask)) {
-			cpu_set(cpu, tick_broadcast_mask);
+		if (!cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
+			cpumask_set_cpu(cpu, tick_get_broadcast_mask());
 			if (tick_broadcast_device.mode ==
 			    TICKDEV_MODE_PERIODIC)
 				clockevents_shutdown(dev);
@@ -244,8 +244,8 @@ static void tick_do_broadcast_on_off(void *why)
 		break;
 	case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
 		if (!tick_broadcast_force &&
-		    cpu_isset(cpu, tick_broadcast_mask)) {
-			cpu_clear(cpu, tick_broadcast_mask);
+		    cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
+			cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
 			if (tick_broadcast_device.mode ==
 			    TICKDEV_MODE_PERIODIC)
 				tick_setup_periodic(dev, 0);
@@ -253,7 +253,7 @@ static void tick_do_broadcast_on_off(void *why)
 		break;
 	}
 
-	if (cpus_empty(tick_broadcast_mask)) {
+	if (cpumask_empty(tick_get_broadcast_mask())) {
 		if (!bc_stopped)
 			clockevents_shutdown(bc);
 	} else if (bc_stopped) {
@@ -272,7 +272,7 @@ out:
  */
 void tick_broadcast_on_off(unsigned long reason, int *oncpu)
 {
-	if (!cpu_isset(*oncpu, cpu_online_map))
+	if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
 		printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
 		       "offline CPU #%d\n", *oncpu);
 	else
@@ -303,10 +303,10 @@ void tick_shutdown_broadcast(unsigned int *cpup)
 	spin_lock_irqsave(&tick_broadcast_lock, flags);
 
 	bc = tick_broadcast_device.evtdev;
-	cpu_clear(cpu, tick_broadcast_mask);
+	cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
 
 	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
-		if (bc && cpus_empty(tick_broadcast_mask))
+		if (bc && cpumask_empty(tick_get_broadcast_mask()))
 			clockevents_shutdown(bc);
 	}
 
@@ -342,10 +342,10 @@ int tick_resume_broadcast(void)
 
 		switch (tick_broadcast_device.mode) {
 		case TICKDEV_MODE_PERIODIC:
-			if(!cpus_empty(tick_broadcast_mask))
+			if (!cpumask_empty(tick_get_broadcast_mask()))
 				tick_broadcast_start_periodic(bc);
-			broadcast = cpu_isset(smp_processor_id(),
-					      tick_broadcast_mask);
+			broadcast = cpumask_test_cpu(smp_processor_id(),
+						     tick_get_broadcast_mask());
 			break;
 		case TICKDEV_MODE_ONESHOT:
 			broadcast = tick_resume_broadcast_oneshot(bc);
@@ -360,14 +360,15 @@ int tick_resume_broadcast(void)
 
 #ifdef CONFIG_TICK_ONESHOT
 
-static cpumask_t tick_broadcast_oneshot_mask;
+/* FIXME: use cpumask_var_t. */
+static DECLARE_BITMAP(tick_broadcast_oneshot_mask, NR_CPUS);
 
 /*
- * Debugging: see timer_list.c
+ * Exposed for debugging: see timer_list.c
  */
-cpumask_t *tick_get_broadcast_oneshot_mask(void)
+struct cpumask *tick_get_broadcast_oneshot_mask(void)
 {
-	return &tick_broadcast_oneshot_mask;
+	return to_cpumask(tick_broadcast_oneshot_mask);
 }
 
 static int tick_broadcast_set_event(ktime_t expires, int force)
@@ -389,7 +390,7 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
  */
 void tick_check_oneshot_broadcast(int cpu)
 {
-	if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
+	if (cpumask_test_cpu(cpu, to_cpumask(tick_broadcast_oneshot_mask))) {
 		struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
 
 		clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
@@ -402,7 +403,6 @@ void tick_check_oneshot_broadcast(int cpu)
 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
 {
 	struct tick_device *td;
-	cpumask_t mask;
 	ktime_t now, next_event;
 	int cpu;
 
@@ -410,13 +410,13 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
 again:
 	dev->next_event.tv64 = KTIME_MAX;
 	next_event.tv64 = KTIME_MAX;
-	mask = CPU_MASK_NONE;
+	cpumask_clear(to_cpumask(tmpmask));
 	now = ktime_get();
 	/* Find all expired events */
-	for_each_cpu_mask_nr(cpu, tick_broadcast_oneshot_mask) {
+	for_each_cpu(cpu, tick_get_broadcast_oneshot_mask()) {
 		td = &per_cpu(tick_cpu_device, cpu);
 		if (td->evtdev->next_event.tv64 <= now.tv64)
-			cpu_set(cpu, mask);
+			cpumask_set_cpu(cpu, to_cpumask(tmpmask));
 		else if (td->evtdev->next_event.tv64 < next_event.tv64)
 			next_event.tv64 = td->evtdev->next_event.tv64;
 	}
@@ -424,7 +424,7 @@ again:
 	/*
 	 * Wakeup the cpus which have an expired event.
 	 */
-	tick_do_broadcast(mask);
+	tick_do_broadcast(to_cpumask(tmpmask));
 
 	/*
 	 * Two reasons for reprogram:
@@ -476,15 +476,16 @@ void tick_broadcast_oneshot_control(unsigned long reason)
 		goto out;
 
 	if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
-		if (!cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
-			cpu_set(cpu, tick_broadcast_oneshot_mask);
+		if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) {
+			cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask());
 			clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
 			if (dev->next_event.tv64 < bc->next_event.tv64)
 				tick_broadcast_set_event(dev->next_event, 1);
 		}
 	} else {
-		if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
-			cpu_clear(cpu, tick_broadcast_oneshot_mask);
+		if (cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) {
+			cpumask_clear_cpu(cpu,
+					  tick_get_broadcast_oneshot_mask());
 			clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
 			if (dev->next_event.tv64 != KTIME_MAX)
 				tick_program_event(dev->next_event, 1);
@@ -502,10 +503,11 @@ out:
  */
 static void tick_broadcast_clear_oneshot(int cpu)
 {
-	cpu_clear(cpu, tick_broadcast_oneshot_mask);
+	cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
 }
 
-static void tick_broadcast_init_next_event(cpumask_t *mask, ktime_t expires)
+static void tick_broadcast_init_next_event(struct cpumask *mask,
+					   ktime_t expires)
 {
 	struct tick_device *td;
 	int cpu;
@@ -526,7 +528,6 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 	if (bc->event_handler != tick_handle_oneshot_broadcast) {
 		int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
 		int cpu = smp_processor_id();
-		cpumask_t mask;
 
 		bc->event_handler = tick_handle_oneshot_broadcast;
 		clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
@@ -540,13 +541,15 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 		 * oneshot_mask bits for those and program the
 		 * broadcast device to fire.
 		 */
-		mask = tick_broadcast_mask;
-		cpu_clear(cpu, mask);
-		cpus_or(tick_broadcast_oneshot_mask,
-			tick_broadcast_oneshot_mask, mask);
-
-		if (was_periodic && !cpus_empty(mask)) {
-			tick_broadcast_init_next_event(&mask, tick_next_period);
+		cpumask_copy(to_cpumask(tmpmask), tick_get_broadcast_mask());
+		cpumask_clear_cpu(cpu, to_cpumask(tmpmask));
+		cpumask_or(tick_get_broadcast_oneshot_mask(),
+			   tick_get_broadcast_oneshot_mask(),
+			   to_cpumask(tmpmask));
+
+		if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) {
+			tick_broadcast_init_next_event(to_cpumask(tmpmask),
+						       tick_next_period);
 			tick_broadcast_set_event(tick_next_period, 1);
 		} else
 			bc->next_event.tv64 = KTIME_MAX;
@@ -585,7 +588,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
 	 * Clear the broadcast mask flag for the dead cpu, but do not
 	 * stop the broadcast device!
 	 */
-	cpu_clear(cpu, tick_broadcast_oneshot_mask);
+	cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
 
 	spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index f8372be74122..63e05d423a09 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -254,7 +254,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
 		curdev = NULL;
 	}
 	clockevents_exchange_device(curdev, newdev);
-	tick_setup_device(td, newdev, cpu, &cpumask_of_cpu(cpu));
+	tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
 	if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
 		tick_oneshot_notify();
 
@@ -299,9 +299,9 @@ static void tick_shutdown(unsigned int *cpup)
 	}
 	/* Transfer the do_timer job away from this cpu */
 	if (*cpup == tick_do_timer_cpu) {
-		int cpu = first_cpu(cpu_online_map);
+		int cpu = cpumask_first(cpu_online_mask);
 
-		tick_do_timer_cpu = (cpu != NR_CPUS) ? cpu :
+		tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
 			TICK_DO_TIMER_NONE;
 	}
 	spin_unlock_irqrestore(&tick_device_lock, flags);
-- 
cgit v1.2.3


From d036e67b40f52bdd95392390108defbac7e53837 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:26 +1030
Subject: cpumask: convert kernel/irq

Impact: Reduce stack usage, use new cpumask API.  ALPHA mod!

Main change is that irq_default_affinity becomes a cpumask_var_t, so
treat it as a pointer (this effects alpha).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/alpha/kernel/irq.c   |  3 ++-
 include/linux/interrupt.h |  2 +-
 kernel/irq/manage.c       | 11 +++++++++--
 kernel/irq/proc.c         | 32 +++++++++++++++++++++-----------
 4 files changed, 33 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index d0f1620007f7..703731accda6 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -50,7 +50,8 @@ int irq_select_affinity(unsigned int irq)
 	if (!irq_desc[irq].chip->set_affinity || irq_user_affinity[irq])
 		return 1;
 
-	while (!cpu_possible(cpu) || !cpu_isset(cpu, irq_default_affinity))
+	while (!cpu_possible(cpu) ||
+	       !cpumask_test_cpu(cpu, irq_default_affinity))
 		cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0);
 	last_cpu = cpu;
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index dfaee6bd265b..91f1ef8e5810 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -109,7 +109,7 @@ extern void enable_irq(unsigned int irq);
 
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
 
-extern cpumask_t irq_default_affinity;
+extern cpumask_var_t irq_default_affinity;
 
 extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
 extern int irq_can_set_affinity(unsigned int irq);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 61c4a9b62165..cd0cd8dcb345 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -16,8 +16,15 @@
 #include "internals.h"
 
 #ifdef CONFIG_SMP
+cpumask_var_t irq_default_affinity;
 
-cpumask_t irq_default_affinity = CPU_MASK_ALL;
+static int init_irq_default_affinity(void)
+{
+	alloc_cpumask_var(&irq_default_affinity, GFP_KERNEL);
+	cpumask_setall(irq_default_affinity);
+	return 0;
+}
+core_initcall(init_irq_default_affinity);
 
 /**
  *	synchronize_irq - wait for pending IRQ handlers (on other CPUs)
@@ -127,7 +134,7 @@ int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
 			desc->status &= ~IRQ_AFFINITY_SET;
 	}
 
-	cpumask_and(&desc->affinity, cpu_online_mask, &irq_default_affinity);
+	cpumask_and(&desc->affinity, cpu_online_mask, irq_default_affinity);
 set_affinity:
 	desc->chip->set_affinity(irq, &desc->affinity);
 
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index d2c0e5ee53c5..2abd3a7716ed 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -20,7 +20,7 @@ static struct proc_dir_entry *root_irq_dir;
 static int irq_affinity_proc_show(struct seq_file *m, void *v)
 {
 	struct irq_desc *desc = irq_to_desc((long)m->private);
-	cpumask_t *mask = &desc->affinity;
+	const struct cpumask *mask = &desc->affinity;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	if (desc->status & IRQ_MOVE_PENDING)
@@ -93,7 +93,7 @@ static const struct file_operations irq_affinity_proc_fops = {
 
 static int default_affinity_show(struct seq_file *m, void *v)
 {
-	seq_cpumask(m, &irq_default_affinity);
+	seq_cpumask(m, irq_default_affinity);
 	seq_putc(m, '\n');
 	return 0;
 }
@@ -101,27 +101,37 @@ static int default_affinity_show(struct seq_file *m, void *v)
 static ssize_t default_affinity_write(struct file *file,
 		const char __user *buffer, size_t count, loff_t *ppos)
 {
-	cpumask_t new_value;
+	cpumask_var_t new_value;
 	int err;
 
-	err = cpumask_parse_user(buffer, count, &new_value);
+	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
+		return -ENOMEM;
+
+	err = cpumask_parse_user(buffer, count, new_value);
 	if (err)
-		return err;
+		goto out;
 
-	if (!is_affinity_mask_valid(new_value))
-		return -EINVAL;
+	if (!is_affinity_mask_valid(new_value)) {
+		err = -EINVAL;
+		goto out;
+	}
 
 	/*
 	 * Do not allow disabling IRQs completely - it's a too easy
 	 * way to make the system unusable accidentally :-) At least
 	 * one online CPU still has to be targeted.
 	 */
-	if (!cpus_intersects(new_value, cpu_online_map))
-		return -EINVAL;
+	if (!cpumask_intersects(new_value, cpu_online_mask)) {
+		err = -EINVAL;
+		goto out;
+	}
 
-	irq_default_affinity = new_value;
+	cpumask_copy(irq_default_affinity, new_value);
+	err = count;
 
-	return count;
+out:
+	free_cpumask_var(new_value);
+	return err;
 }
 
 static int default_affinity_open(struct inode *inode, struct file *file)
-- 
cgit v1.2.3


From bd232f97b30f6bb630efa136a777647545db3039 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:26 +1030
Subject: cpumask: convert RCU implementations

Impact: use new cpumask API.

rcu_ctrlblk contains a cpumask, and it's highly optimized so I don't want
a cpumask_var_t (ie. a pointer) for the CONFIG_CPUMASK_OFFSTACK case.  It
could use a dangling bitmap, and be allocated in __rcu_init to save memory,
but for the moment we use a bitmap.

(Eventually 'struct cpumask' will be undefined for CONFIG_CPUMASK_OFFSTACK,
so we use a bitmap here to show we really mean it).

We remove on-stack cpumasks, using cpumask_var_t for
rcu_torture_shuffle_tasks() and for_each_cpu_and in force_quiescent_state().

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/rcuclassic.h |  4 ++--
 kernel/rcuclassic.c        | 32 +++++++++++++++++---------------
 kernel/rcupreempt.c        | 19 ++++++++++---------
 kernel/rcutorture.c        | 27 +++++++++++++++------------
 4 files changed, 44 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 301dda829e37..f3f697df1d71 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -59,8 +59,8 @@ struct rcu_ctrlblk {
 	int	signaled;
 
 	spinlock_t	lock	____cacheline_internodealigned_in_smp;
-	cpumask_t	cpumask; /* CPUs that need to switch in order    */
-				 /* for current batch to proceed.        */
+	DECLARE_BITMAP(cpumask, NR_CPUS); /* CPUs that need to switch for */
+					  /* current batch to proceed.     */
 } ____cacheline_internodealigned_in_smp;
 
 /* Is batch a before batch b ? */
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index e503a002f330..0ff9b05706a6 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -63,14 +63,14 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
 	.completed = -300,
 	.pending = -300,
 	.lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
-	.cpumask = CPU_MASK_NONE,
+	.cpumask = CPU_BITS_NONE,
 };
 static struct rcu_ctrlblk rcu_bh_ctrlblk = {
 	.cur = -300,
 	.completed = -300,
 	.pending = -300,
 	.lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
-	.cpumask = CPU_MASK_NONE,
+	.cpumask = CPU_BITS_NONE,
 };
 
 DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
@@ -85,7 +85,6 @@ static void force_quiescent_state(struct rcu_data *rdp,
 			struct rcu_ctrlblk *rcp)
 {
 	int cpu;
-	cpumask_t cpumask;
 	unsigned long flags;
 
 	set_need_resched();
@@ -96,10 +95,10 @@ static void force_quiescent_state(struct rcu_data *rdp,
 		 * Don't send IPI to itself. With irqs disabled,
 		 * rdp->cpu is the current cpu.
 		 *
-		 * cpu_online_map is updated by the _cpu_down()
+		 * cpu_online_mask is updated by the _cpu_down()
 		 * using __stop_machine(). Since we're in irqs disabled
 		 * section, __stop_machine() is not exectuting, hence
-		 * the cpu_online_map is stable.
+		 * the cpu_online_mask is stable.
 		 *
 		 * However,  a cpu might have been offlined _just_ before
 		 * we disabled irqs while entering here.
@@ -107,13 +106,14 @@ static void force_quiescent_state(struct rcu_data *rdp,
 		 * notification, leading to the offlined cpu's bit
 		 * being set in the rcp->cpumask.
 		 *
-		 * Hence cpumask = (rcp->cpumask & cpu_online_map) to prevent
+		 * Hence cpumask = (rcp->cpumask & cpu_online_mask) to prevent
 		 * sending smp_reschedule() to an offlined CPU.
 		 */
-		cpus_and(cpumask, rcp->cpumask, cpu_online_map);
-		cpu_clear(rdp->cpu, cpumask);
-		for_each_cpu_mask_nr(cpu, cpumask)
-			smp_send_reschedule(cpu);
+		for_each_cpu_and(cpu,
+				  to_cpumask(rcp->cpumask), cpu_online_mask) {
+			if (cpu != rdp->cpu)
+				smp_send_reschedule(cpu);
+		}
 	}
 	spin_unlock_irqrestore(&rcp->lock, flags);
 }
@@ -193,7 +193,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
 
 	printk(KERN_ERR "INFO: RCU detected CPU stalls:");
 	for_each_possible_cpu(cpu) {
-		if (cpu_isset(cpu, rcp->cpumask))
+		if (cpumask_test_cpu(cpu, to_cpumask(rcp->cpumask)))
 			printk(" %d", cpu);
 	}
 	printk(" (detected by %d, t=%ld jiffies)\n",
@@ -221,7 +221,8 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
 	long delta;
 
 	delta = jiffies - rcp->jiffies_stall;
-	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
+	if (cpumask_test_cpu(smp_processor_id(), to_cpumask(rcp->cpumask)) &&
+		delta >= 0) {
 
 		/* We haven't checked in, so go dump stack. */
 		print_cpu_stall(rcp);
@@ -393,7 +394,8 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
 		 * unnecessarily.
 		 */
 		smp_mb();
-		cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
+		cpumask_andnot(to_cpumask(rcp->cpumask),
+			       cpu_online_mask, &nohz_cpu_mask);
 
 		rcp->signaled = 0;
 	}
@@ -406,8 +408,8 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
  */
 static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
 {
-	cpu_clear(cpu, rcp->cpumask);
-	if (cpus_empty(rcp->cpumask)) {
+	cpumask_clear_cpu(cpu, to_cpumask(rcp->cpumask));
+	if (cpumask_empty(to_cpumask(rcp->cpumask))) {
 		/* batch completed ! */
 		rcp->completed = rcp->cur;
 		rcu_start_batch(rcp);
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 04982659875a..f9dc8f3720f6 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -164,7 +164,8 @@ static char *rcu_try_flip_state_names[] =
 	{ "idle", "waitack", "waitzero", "waitmb" };
 #endif /* #ifdef CONFIG_RCU_TRACE */
 
-static cpumask_t rcu_cpu_online_map __read_mostly = CPU_MASK_NONE;
+static DECLARE_BITMAP(rcu_cpu_online_map, NR_CPUS) __read_mostly
+	= CPU_BITS_NONE;
 
 /*
  * Enum and per-CPU flag to determine when each CPU has seen
@@ -758,7 +759,7 @@ rcu_try_flip_idle(void)
 
 	/* Now ask each CPU for acknowledgement of the flip. */
 
-	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) {
+	for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) {
 		per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
 		dyntick_save_progress_counter(cpu);
 	}
@@ -776,7 +777,7 @@ rcu_try_flip_waitack(void)
 	int cpu;
 
 	RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
-	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
+	for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map))
 		if (rcu_try_flip_waitack_needed(cpu) &&
 		    per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
 			RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
@@ -808,7 +809,7 @@ rcu_try_flip_waitzero(void)
 	/* Check to see if the sum of the "last" counters is zero. */
 
 	RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
-	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
+	for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map))
 		sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
 	if (sum != 0) {
 		RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
@@ -823,7 +824,7 @@ rcu_try_flip_waitzero(void)
 	smp_mb();  /*  ^^^^^^^^^^^^ */
 
 	/* Call for a memory barrier from each CPU. */
-	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) {
+	for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) {
 		per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
 		dyntick_save_progress_counter(cpu);
 	}
@@ -843,7 +844,7 @@ rcu_try_flip_waitmb(void)
 	int cpu;
 
 	RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
-	for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
+	for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map))
 		if (rcu_try_flip_waitmb_needed(cpu) &&
 		    per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
 			RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
@@ -1032,7 +1033,7 @@ void rcu_offline_cpu(int cpu)
 	RCU_DATA_CPU(cpu)->rcu_flipctr[0] = 0;
 	RCU_DATA_CPU(cpu)->rcu_flipctr[1] = 0;
 
-	cpu_clear(cpu, rcu_cpu_online_map);
+	cpumask_clear_cpu(cpu, to_cpumask(rcu_cpu_online_map));
 
 	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
 
@@ -1072,7 +1073,7 @@ void __cpuinit rcu_online_cpu(int cpu)
 	struct rcu_data *rdp;
 
 	spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
-	cpu_set(cpu, rcu_cpu_online_map);
+	cpumask_set_cpu(cpu, to_cpumask(rcu_cpu_online_map));
 	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
 
 	/*
@@ -1430,7 +1431,7 @@ void __init __rcu_init(void)
 	 * We don't need protection against CPU-Hotplug here
 	 * since
 	 * a) If a CPU comes online while we are iterating over the
-	 *    cpu_online_map below, we would only end up making a
+	 *    cpu_online_mask below, we would only end up making a
 	 *    duplicate call to rcu_online_cpu() which sets the corresponding
 	 *    CPU's mask in the rcu_cpu_online_map.
 	 *
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index b31065522104..3245b40952c6 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -868,49 +868,52 @@ static int rcu_idle_cpu;	/* Force all torture tasks off this CPU */
  */
 static void rcu_torture_shuffle_tasks(void)
 {
-	cpumask_t tmp_mask;
+	cpumask_var_t tmp_mask;
 	int i;
 
-	cpus_setall(tmp_mask);
+	if (!alloc_cpumask_var(&tmp_mask, GFP_KERNEL))
+		BUG();
+
+	cpumask_setall(tmp_mask);
 	get_online_cpus();
 
 	/* No point in shuffling if there is only one online CPU (ex: UP) */
-	if (num_online_cpus() == 1) {
-		put_online_cpus();
-		return;
-	}
+	if (num_online_cpus() == 1)
+		goto out;
 
 	if (rcu_idle_cpu != -1)
-		cpu_clear(rcu_idle_cpu, tmp_mask);
+		cpumask_clear_cpu(rcu_idle_cpu, tmp_mask);
 
-	set_cpus_allowed_ptr(current, &tmp_mask);
+	set_cpus_allowed_ptr(current, tmp_mask);
 
 	if (reader_tasks) {
 		for (i = 0; i < nrealreaders; i++)
 			if (reader_tasks[i])
 				set_cpus_allowed_ptr(reader_tasks[i],
-						     &tmp_mask);
+						     tmp_mask);
 	}
 
 	if (fakewriter_tasks) {
 		for (i = 0; i < nfakewriters; i++)
 			if (fakewriter_tasks[i])
 				set_cpus_allowed_ptr(fakewriter_tasks[i],
-						     &tmp_mask);
+						     tmp_mask);
 	}
 
 	if (writer_task)
-		set_cpus_allowed_ptr(writer_task, &tmp_mask);
+		set_cpus_allowed_ptr(writer_task, tmp_mask);
 
 	if (stats_task)
-		set_cpus_allowed_ptr(stats_task, &tmp_mask);
+		set_cpus_allowed_ptr(stats_task, tmp_mask);
 
 	if (rcu_idle_cpu == -1)
 		rcu_idle_cpu = num_online_cpus() - 1;
 	else
 		rcu_idle_cpu--;
 
+out:
 	put_online_cpus();
+	free_cpumask_var(tmp_mask);
 }
 
 /* Shuffle tasks across CPUs, with the intent of allowing each CPU in the
-- 
cgit v1.2.3


From 41c7bb9588904eb060a95bcad47bd3804a1ece25 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:28 +1030
Subject: cpumask: convert rest of files in kernel/

Impact: Reduce stack usage, use new cpumask API.

Mainly changing cpumask_t to 'struct cpumask' and similar simple API
conversion.  Two conversions worth mentioning:

1) we use cpumask_any_but to avoid a temporary in kernel/softlockup.c,
2) Use cpumask_var_t in taskstats_user_cmd().

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
---
 include/linux/stop_machine.h |  6 +++---
 kernel/power/poweroff.c      |  2 +-
 kernel/softlockup.c          |  6 ++----
 kernel/stop_machine.c        |  8 ++++----
 kernel/taskstats.c           | 39 ++++++++++++++++++++++++---------------
 5 files changed, 34 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index faf1519b5adc..74d59a641362 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -23,7 +23,7 @@
  *
  * This can be thought of as a very heavy write lock, equivalent to
  * grabbing every spinlock in the kernel. */
-int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus);
+int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
 
 /**
  * __stop_machine: freeze the machine on all CPUs and run this function
@@ -34,11 +34,11 @@ int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus);
  * Description: This is a special version of the above, which assumes cpus
  * won't come or go while it's being called.  Used by hotplug cpu.
  */
-int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus);
+int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
 #else
 
 static inline int stop_machine(int (*fn)(void *), void *data,
-			       const cpumask_t *cpus)
+			       const struct cpumask *cpus)
 {
 	int ret;
 	local_irq_disable();
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index 72016f051477..97890831e1b5 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -27,7 +27,7 @@ static DECLARE_WORK(poweroff_work, do_poweroff);
 static void handle_poweroff(int key, struct tty_struct *tty)
 {
 	/* run sysrq poweroff on boot cpu */
-	schedule_work_on(first_cpu(cpu_online_map), &poweroff_work);
+	schedule_work_on(cpumask_first(cpu_online_mask), &poweroff_work);
 }
 
 static struct sysrq_key_op	sysrq_poweroff_op = {
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 492f0c72fec5..d9188c66278a 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -310,10 +310,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
 		if (hotcpu == check_cpu) {
-			cpumask_t temp_cpu_online_map = cpu_online_map;
-
-			cpu_clear(hotcpu, temp_cpu_online_map);
-			check_cpu = cpumask_any(&temp_cpu_online_map);
+			/* Pick any other online cpu. */
+			check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
 		}
 		break;
 
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 24e8ceacc388..286c41722e8c 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -69,10 +69,10 @@ static void stop_cpu(struct work_struct *unused)
 	int err;
 
 	if (!active_cpus) {
-		if (cpu == first_cpu(cpu_online_map))
+		if (cpu == cpumask_first(cpu_online_mask))
 			smdata = &active;
 	} else {
-		if (cpu_isset(cpu, *active_cpus))
+		if (cpumask_test_cpu(cpu, active_cpus))
 			smdata = &active;
 	}
 	/* Simple state machine */
@@ -109,7 +109,7 @@ static int chill(void *unused)
 	return 0;
 }
 
-int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
+int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 {
 	struct work_struct *sm_work;
 	int i, ret;
@@ -142,7 +142,7 @@ int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
 	return ret;
 }
 
-int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
+int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 {
 	int ret;
 
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 6d7dc4ec4aa5..888adbcca30c 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -290,18 +290,17 @@ ret:
 	return;
 }
 
-static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
+static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd)
 {
 	struct listener_list *listeners;
 	struct listener *s, *tmp;
 	unsigned int cpu;
-	cpumask_t mask = *maskp;
 
-	if (!cpus_subset(mask, cpu_possible_map))
+	if (!cpumask_subset(mask, cpu_possible_mask))
 		return -EINVAL;
 
 	if (isadd == REGISTER) {
-		for_each_cpu_mask_nr(cpu, mask) {
+		for_each_cpu(cpu, mask) {
 			s = kmalloc_node(sizeof(struct listener), GFP_KERNEL,
 					 cpu_to_node(cpu));
 			if (!s)
@@ -320,7 +319,7 @@ static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
 
 	/* Deregister or cleanup */
 cleanup:
-	for_each_cpu_mask_nr(cpu, mask) {
+	for_each_cpu(cpu, mask) {
 		listeners = &per_cpu(listener_array, cpu);
 		down_write(&listeners->sem);
 		list_for_each_entry_safe(s, tmp, &listeners->list, list) {
@@ -335,7 +334,7 @@ cleanup:
 	return 0;
 }
 
-static int parse(struct nlattr *na, cpumask_t *mask)
+static int parse(struct nlattr *na, struct cpumask *mask)
 {
 	char *data;
 	int len;
@@ -428,23 +427,33 @@ err:
 
 static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 {
-	int rc = 0;
+	int rc;
 	struct sk_buff *rep_skb;
 	struct taskstats *stats;
 	size_t size;
-	cpumask_t mask;
+	cpumask_var_t mask;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
 
-	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask);
+	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask);
 	if (rc < 0)
-		return rc;
-	if (rc == 0)
-		return add_del_listener(info->snd_pid, &mask, REGISTER);
+		goto free_return_rc;
+	if (rc == 0) {
+		rc = add_del_listener(info->snd_pid, mask, REGISTER);
+		goto free_return_rc;
+	}
 
-	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask);
+	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask);
 	if (rc < 0)
+		goto free_return_rc;
+	if (rc == 0) {
+		rc = add_del_listener(info->snd_pid, mask, DEREGISTER);
+free_return_rc:
+		free_cpumask_var(mask);
 		return rc;
-	if (rc == 0)
-		return add_del_listener(info->snd_pid, &mask, DEREGISTER);
+	}
+	free_cpumask_var(mask);
 
 	/*
 	 * Size includes space for nested attributes
-- 
cgit v1.2.3


From 8c384cdee3e04d6194a2c2b192b624754f990835 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 1 Jan 2009 10:12:30 +1030
Subject: cpumask: CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS

Impact: new debug CONFIG options

This helps find unconverted code.  It currently breaks compile horribly,
but we never wanted a flag day so that's expected.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/cpumask.h | 11 ++++++++++-
 lib/Kconfig             |  4 ++++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 7c178a6baae3..9f315382610b 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -144,6 +144,7 @@
 typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
 extern cpumask_t _unused_cpumask_arg_;
 
+#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 #define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
 static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
 {
@@ -267,6 +268,7 @@ static inline void __cpus_shift_left(cpumask_t *dstp,
 {
 	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
 }
+#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
 
 /**
  * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
@@ -304,6 +306,7 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
 	return to_cpumask(p);
 }
 
+#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 /*
  * In cases where we take the address of the cpumask immediately,
  * gcc optimizes it out (it's a constant) and there's no huge stack
@@ -389,19 +392,22 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp,
 {
 	bitmap_fold(dstp->bits, origp->bits, sz, nbits);
 }
+#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
 
 #if NR_CPUS == 1
 
 #define nr_cpu_ids		1
+#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 #define first_cpu(src)		({ (void)(src); 0; })
 #define next_cpu(n, src)	({ (void)(src); 1; })
 #define any_online_cpu(mask)	0
 #define for_each_cpu_mask(cpu, mask)	\
 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
-
+#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
 #else /* NR_CPUS > 1 */
 
 extern int nr_cpu_ids;
+#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 int __first_cpu(const cpumask_t *srcp);
 int __next_cpu(int n, const cpumask_t *srcp);
 int __any_online_cpu(const cpumask_t *mask);
@@ -413,8 +419,10 @@ int __any_online_cpu(const cpumask_t *mask);
 	for ((cpu) = -1;				\
 		(cpu) = next_cpu((cpu), (mask)),	\
 		(cpu) < NR_CPUS; )
+#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
 #endif
 
+#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 #if NR_CPUS <= 64
 
 #define next_cpu_nr(n, src)		next_cpu(n, src)
@@ -432,6 +440,7 @@ int __next_cpu_nr(int n, const cpumask_t *srcp);
 		(cpu) < nr_cpu_ids; )
 
 #endif /* NR_CPUS > 64 */
+#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
 
 /*
  * The following particular system cpumasks and operations manage
diff --git a/lib/Kconfig b/lib/Kconfig
index fc5f5ee50bc2..03c2c24b9083 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -170,4 +170,8 @@ config CPUMASK_OFFSTACK
 	  them on the stack.  This is a bit more expensive, but avoids
 	  stack overflow.
 
+config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
+       bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS
+       depends on EXPERIMENTAL && BROKEN
+
 endmenu
-- 
cgit v1.2.3


From ebdab07dad3d3a008e519b0a028e1e1ad5ecaef0 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 2 Jan 2009 16:12:48 +0100
Subject: ide: move sysfs support to ide-sysfs.c

While at it:
- media_string() -> ide_media_string()

There should be no functional changes caused by this patch.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Makefile    |   2 +-
 drivers/ide/ide-probe.c |  52 --------------------
 drivers/ide/ide-sysfs.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/ide/ide.c       |  72 +---------------------------
 include/linux/ide.h     |   4 ++
 5 files changed, 132 insertions(+), 123 deletions(-)
 create mode 100644 drivers/ide/ide-sysfs.c

(limited to 'include/linux')

diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index 177e3f8523ed..410728992e6a 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -5,7 +5,7 @@
 EXTRA_CFLAGS				+= -Idrivers/ide
 
 ide-core-y += ide.o ide-ioctls.o ide-io.o ide-iops.o ide-lib.o ide-probe.o \
-	      ide-taskfile.o ide-pm.o ide-park.o ide-pio-blacklist.o
+	      ide-taskfile.o ide-pm.o ide-park.o ide-pio-blacklist.o ide-sysfs.o
 
 # core IDE code
 ide-core-$(CONFIG_IDE_TIMINGS)		+= ide-timings.o
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 91f5faee7404..f9efd069edc2 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1420,58 +1420,6 @@ static void ide_port_cable_detect(ide_hwif_t *hwif)
 	}
 }
 
-static ssize_t store_delete_devices(struct device *portdev,
-				    struct device_attribute *attr,
-				    const char *buf, size_t n)
-{
-	ide_hwif_t *hwif = dev_get_drvdata(portdev);
-
-	if (strncmp(buf, "1", n))
-		return -EINVAL;
-
-	ide_port_unregister_devices(hwif);
-
-	return n;
-};
-
-static DEVICE_ATTR(delete_devices, S_IWUSR, NULL, store_delete_devices);
-
-static ssize_t store_scan(struct device *portdev,
-			  struct device_attribute *attr,
-			  const char *buf, size_t n)
-{
-	ide_hwif_t *hwif = dev_get_drvdata(portdev);
-
-	if (strncmp(buf, "1", n))
-		return -EINVAL;
-
-	ide_port_unregister_devices(hwif);
-	ide_port_scan(hwif);
-
-	return n;
-};
-
-static DEVICE_ATTR(scan, S_IWUSR, NULL, store_scan);
-
-static struct device_attribute *ide_port_attrs[] = {
-	&dev_attr_delete_devices,
-	&dev_attr_scan,
-	NULL
-};
-
-static int ide_sysfs_register_port(ide_hwif_t *hwif)
-{
-	int i, uninitialized_var(rc);
-
-	for (i = 0; ide_port_attrs[i]; i++) {
-		rc = device_create_file(hwif->portdev, ide_port_attrs[i]);
-		if (rc)
-			break;
-	}
-
-	return rc;
-}
-
 static unsigned int ide_indexes;
 
 /**
diff --git a/drivers/ide/ide-sysfs.c b/drivers/ide/ide-sysfs.c
new file mode 100644
index 000000000000..883ffacaf45a
--- /dev/null
+++ b/drivers/ide/ide-sysfs.c
@@ -0,0 +1,125 @@
+#include <linux/kernel.h>
+#include <linux/ide.h>
+
+char *ide_media_string(ide_drive_t *drive)
+{
+	switch (drive->media) {
+	case ide_disk:
+		return "disk";
+	case ide_cdrom:
+		return "cdrom";
+	case ide_tape:
+		return "tape";
+	case ide_floppy:
+		return "floppy";
+	case ide_optical:
+		return "optical";
+	default:
+		return "UNKNOWN";
+	}
+}
+
+static ssize_t media_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "%s\n", ide_media_string(drive));
+}
+
+static ssize_t drivename_show(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "%s\n", drive->name);
+}
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "ide:m-%s\n", ide_media_string(drive));
+}
+
+static ssize_t model_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_PROD]);
+}
+
+static ssize_t firmware_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_FW_REV]);
+}
+
+static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	ide_drive_t *drive = to_ide_device(dev);
+	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_SERNO]);
+}
+
+struct device_attribute ide_dev_attrs[] = {
+	__ATTR_RO(media),
+	__ATTR_RO(drivename),
+	__ATTR_RO(modalias),
+	__ATTR_RO(model),
+	__ATTR_RO(firmware),
+	__ATTR(serial, 0400, serial_show, NULL),
+	__ATTR(unload_heads, 0644, ide_park_show, ide_park_store),
+	__ATTR_NULL
+};
+
+static ssize_t store_delete_devices(struct device *portdev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t n)
+{
+	ide_hwif_t *hwif = dev_get_drvdata(portdev);
+
+	if (strncmp(buf, "1", n))
+		return -EINVAL;
+
+	ide_port_unregister_devices(hwif);
+
+	return n;
+};
+
+static DEVICE_ATTR(delete_devices, S_IWUSR, NULL, store_delete_devices);
+
+static ssize_t store_scan(struct device *portdev,
+			  struct device_attribute *attr,
+			  const char *buf, size_t n)
+{
+	ide_hwif_t *hwif = dev_get_drvdata(portdev);
+
+	if (strncmp(buf, "1", n))
+		return -EINVAL;
+
+	ide_port_unregister_devices(hwif);
+	ide_port_scan(hwif);
+
+	return n;
+};
+
+static DEVICE_ATTR(scan, S_IWUSR, NULL, store_scan);
+
+static struct device_attribute *ide_port_attrs[] = {
+	&dev_attr_delete_devices,
+	&dev_attr_scan,
+	NULL
+};
+
+int ide_sysfs_register_port(ide_hwif_t *hwif)
+{
+	int i, uninitialized_var(rc);
+
+	for (i = 0; ide_port_attrs[i]; i++) {
+		rc = device_create_file(hwif->portdev, ide_port_attrs[i]);
+		if (rc)
+			break;
+	}
+
+	return rc;
+}
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index f0f09f702e9c..46a2d4ca812b 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -440,81 +440,13 @@ static int ide_bus_match(struct device *dev, struct device_driver *drv)
 	return 1;
 }
 
-static char *media_string(ide_drive_t *drive)
-{
-	switch (drive->media) {
-	case ide_disk:
-		return "disk";
-	case ide_cdrom:
-		return "cdrom";
-	case ide_tape:
-		return "tape";
-	case ide_floppy:
-		return "floppy";
-	case ide_optical:
-		return "optical";
-	default:
-		return "UNKNOWN";
-	}
-}
-
-static ssize_t media_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", media_string(drive));
-}
-
-static ssize_t drivename_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", drive->name);
-}
-
-static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "ide:m-%s\n", media_string(drive));
-}
-
-static ssize_t model_show(struct device *dev, struct device_attribute *attr,
-			  char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_PROD]);
-}
-
-static ssize_t firmware_show(struct device *dev, struct device_attribute *attr,
-			     char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_FW_REV]);
-}
-
-static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
-			   char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_SERNO]);
-}
-
-static struct device_attribute ide_dev_attrs[] = {
-	__ATTR_RO(media),
-	__ATTR_RO(drivename),
-	__ATTR_RO(modalias),
-	__ATTR_RO(model),
-	__ATTR_RO(firmware),
-	__ATTR(serial, 0400, serial_show, NULL),
-	__ATTR(unload_heads, 0644, ide_park_show, ide_park_store),
-	__ATTR_NULL
-};
-
 static int ide_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	ide_drive_t *drive = to_ide_device(dev);
 
-	add_uevent_var(env, "MEDIA=%s", media_string(drive));
+	add_uevent_var(env, "MEDIA=%s", ide_media_string(drive));
 	add_uevent_var(env, "DRIVENAME=%s", drive->name);
-	add_uevent_var(env, "MODALIAS=ide:m-%s", media_string(drive));
+	add_uevent_var(env, "MODALIAS=ide:m-%s", ide_media_string(drive));
 	return 0;
 }
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index e99c56de7f56..62fccaea3110 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1533,6 +1533,7 @@ void ide_unregister_region(struct gendisk *);
 void ide_undecoded_slave(ide_drive_t *);
 
 void ide_port_apply_params(ide_hwif_t *);
+int ide_sysfs_register_port(ide_hwif_t *);
 
 struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **);
 void ide_host_free(struct ide_host *);
@@ -1627,6 +1628,9 @@ extern struct mutex ide_cfg_mtx;
 
 #define local_irq_set(flags)	do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0)
 
+char *ide_media_string(ide_drive_t *);
+
+extern struct device_attribute ide_dev_attrs[];
 extern struct bus_type ide_bus_type;
 extern struct class *ide_port_class;
 
-- 
cgit v1.2.3


From 295f00042aaf6b553b5f37348f89bab463d4a469 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 2 Jan 2009 16:12:48 +0100
Subject: ide: don't execute the next queued command from the hard-IRQ context
 (v2)

* Tell the block layer that we are not done handling requests by using
  blk_plug_device() in ide_do_request() (request handling function)
  and ide_timer_expiry() (timeout handler) if the queue is not empty.

* Remove optimization which directly calls ide_do_request() for the next
  queued command from the ide_intr() (IRQ handler) and ide_timer_expiry().

* Remove no longer needed IRQ masking from ide_do_request() - in case of
  IDE ports needing serialization disable_irq_nosync()/enable_irq() was
  used for the (possibly shared) IRQ of the other IDE port.

* Put the misplaced comment in the right place in ide_do_request().

* Drop no longer needed 'int masked_irq' argument from ide_do_request().

* Merge ide_do_request() into do_ide_request().

* Remove no longer needed IDE_NO_IRQ define.

While at it:

* Don't use HWGROUP() macro in do_ide_request().

* Use __func__ in ide_intr().

This patch reduces IRQ hadling latency for IDE and improves the system-wide
handling of shared IRQs (which should result in more timeout resistant and
stable IDE systems).  It also makes it possible to do some further changes
later (i.e. replace some busy-waiting delays with sleeping equivalents).

v2:
Changes per review from Elias Oltmanns:
- fix wrong goto statement in 'if (startstop == ide_stopped)' block
- use spin_unlock_irq()
- don't use obsolete HWIF() macro

Cc: Elias Oltmanns <eo@nebensachen.de>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-io.c | 67 +++++++++++++++++++++++-----------------------------
 include/linux/ide.h  |  7 ------
 2 files changed, 30 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index ecacc008fdaf..23754bc5e595 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -778,8 +778,10 @@ repeat:
  * the driver.  This makes the driver much more friendlier to shared IRQs
  * than previous designs, while remaining 100% (?) SMP safe and capable.
  */
-static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
+void do_ide_request(struct request_queue *q)
 {
+	ide_drive_t	*orig_drive = q->queuedata;
+	ide_hwgroup_t	*hwgroup = orig_drive->hwif->hwgroup;
 	ide_drive_t	*drive;
 	ide_hwif_t	*hwif;
 	struct request	*rq;
@@ -837,10 +839,14 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 			}
 
 			/* no more work for this hwgroup (for now) */
-			return;
+			goto plug_device;
 		}
-	again:
-		hwif = HWIF(drive);
+
+		if (drive != orig_drive)
+			goto plug_device;
+again:
+		hwif = drive->hwif;
+
 		if (hwif != hwgroup->hwif) {
 			/*
 			 * set nIEN for previous hwif, drives in the
@@ -888,41 +894,26 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 				goto again;
 			/* We clear busy, there should be no pending ATA command at this point. */
 			hwgroup->busy = 0;
-			break;
+			goto plug_device;
 		}
 
 		hwgroup->rq = rq;
 
-		/*
-		 * Some systems have trouble with IDE IRQs arriving while
-		 * the driver is still setting things up.  So, here we disable
-		 * the IRQ used by this interface while the request is being started.
-		 * This may look bad at first, but pretty much the same thing
-		 * happens anyway when any interrupt comes in, IDE or otherwise
-		 *  -- the kernel masks the IRQ while it is being handled.
-		 */
-		if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
-			disable_irq_nosync(hwif->irq);
-		spin_unlock(&hwgroup->lock);
-		local_irq_enable_in_hardirq();
-			/* allow other IRQs while we start this request */
+		spin_unlock_irq(&hwgroup->lock);
 		startstop = start_request(drive, rq);
 		spin_lock_irq(&hwgroup->lock);
-		if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
-			enable_irq(hwif->irq);
-		if (startstop == ide_stopped)
+
+		if (startstop == ide_stopped) {
 			hwgroup->busy = 0;
+			if (!elv_queue_empty(orig_drive->queue))
+				blk_plug_device(orig_drive->queue);
+		}
 	}
-}
+	return;
 
-/*
- * Passes the stuff to ide_do_request
- */
-void do_ide_request(struct request_queue *q)
-{
-	ide_drive_t *drive = q->queuedata;
-
-	ide_do_request(HWGROUP(drive), IDE_NO_IRQ);
+plug_device:
+	if (!elv_queue_empty(orig_drive->queue))
+		blk_plug_device(orig_drive->queue);
 }
 
 /*
@@ -1074,11 +1065,13 @@ void ide_timer_expiry (unsigned long data)
 			drive->service_time = jiffies - drive->service_start;
 			spin_lock_irq(&hwgroup->lock);
 			enable_irq(hwif->irq);
-			if (startstop == ide_stopped)
+			if (startstop == ide_stopped) {
 				hwgroup->busy = 0;
+				if (!elv_queue_empty(drive->queue))
+					blk_plug_device(drive->queue);
+			}
 		}
 	}
-	ide_do_request(hwgroup, IDE_NO_IRQ);
 	spin_unlock_irqrestore(&hwgroup->lock, flags);
 }
 
@@ -1271,11 +1264,11 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	if (startstop == ide_stopped) {
 		if (hwgroup->handler == NULL) {	/* paranoia */
 			hwgroup->busy = 0;
-			ide_do_request(hwgroup, hwif->irq);
-		} else {
-			printk(KERN_ERR "%s: ide_intr: huh? expected NULL handler "
-				"on exit\n", drive->name);
-		}
+			if (!elv_queue_empty(drive->queue))
+				blk_plug_device(drive->queue);
+		} else
+			printk(KERN_ERR "%s: %s: huh? expected NULL handler "
+					"on exit\n", __func__, drive->name);
 	}
 out_handled:
 	irq_ret = IRQ_HANDLED;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 62fccaea3110..968ca8f60531 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -32,13 +32,6 @@
 # define SUPPORT_VLB_SYNC 1
 #endif
 
-/*
- * Used to indicate "no IRQ", should be a value that cannot be an IRQ
- * number.
- */
- 
-#define IDE_NO_IRQ		(-1)
-
 typedef unsigned char	byte;	/* used everywhere */
 
 /*
-- 
cgit v1.2.3


From 631de3708d595d153e8a510a3608689290f4c0ed Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 2 Jan 2009 16:12:50 +0100
Subject: ide: add ide_[un]lock_hwgroup() helpers

Add ide_[un]lock_hwgroup() inline helpers for obtaining exclusive
access to the given hwgroup and update the core code accordingly.

[ This change besides making code saner results in more efficient
  use of ide_{get,release}_lock(). ]

Cc: Michael Schmitz <schmitz@biophys.uni-duesseldorf.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Elias Oltmanns <eo@nebensachen.de>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-io.c   | 32 +++++++++++---------------------
 drivers/ide/ide-park.c |  2 +-
 include/linux/ide.h    | 20 ++++++++++++++++++++
 3 files changed, 32 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index c60512196f61..ab480042757a 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -790,10 +790,7 @@ void do_ide_request(struct request_queue *q)
 	/* caller must own hwgroup->lock */
 	BUG_ON(!irqs_disabled());
 
-	while (!hwgroup->busy) {
-		hwgroup->busy = 1;
-		/* for atari only */
-		ide_get_lock(ide_intr, hwgroup);
+	while (!ide_lock_hwgroup(hwgroup)) {
 		drive = choose_drive(hwgroup);
 		if (drive == NULL) {
 			int sleeping = 0;
@@ -825,17 +822,10 @@ void do_ide_request(struct request_queue *q)
 				hwgroup->sleeping = 1;
 				hwgroup->req_gen_timer = hwgroup->req_gen;
 				mod_timer(&hwgroup->timer, sleep);
-				/* we purposely leave hwgroup->busy==1
+				/* we purposely leave hwgroup locked
 				 * while sleeping */
-			} else {
-				/* Ugly, but how can we sleep for the lock
-				 * otherwise? perhaps from tq_disk?
-				 */
-
-				/* for atari only */
-				ide_release_lock();
-				hwgroup->busy = 0;
-			}
+			} else
+				ide_unlock_hwgroup(hwgroup);
 
 			/* no more work for this hwgroup (for now) */
 			goto plug_device;
@@ -865,7 +855,7 @@ void do_ide_request(struct request_queue *q)
 		 */
 		rq = elv_next_request(drive->queue);
 		if (!rq) {
-			hwgroup->busy = 0;
+			ide_unlock_hwgroup(hwgroup);
 			break;
 		}
 
@@ -885,8 +875,8 @@ void do_ide_request(struct request_queue *q)
 		if ((drive->dev_flags & IDE_DFLAG_BLOCKED) &&
 		    blk_pm_request(rq) == 0 &&
 		    (rq->cmd_flags & REQ_PREEMPT) == 0) {
-			/* We clear busy, there should be no pending ATA command at this point. */
-			hwgroup->busy = 0;
+			/* there should be no pending command at this point */
+			ide_unlock_hwgroup(hwgroup);
 			goto plug_device;
 		}
 
@@ -897,7 +887,7 @@ void do_ide_request(struct request_queue *q)
 		spin_lock_irq(&hwgroup->lock);
 
 		if (startstop == ide_stopped) {
-			hwgroup->busy = 0;
+			ide_unlock_hwgroup(hwgroup);
 			if (!elv_queue_empty(orig_drive->queue))
 				blk_plug_device(orig_drive->queue);
 		}
@@ -1001,7 +991,7 @@ void ide_timer_expiry (unsigned long data)
 		 */
 		if (hwgroup->sleeping) {
 			hwgroup->sleeping = 0;
-			hwgroup->busy = 0;
+			ide_unlock_hwgroup(hwgroup);
 		}
 	} else {
 		ide_drive_t *drive = hwgroup->drive;
@@ -1056,7 +1046,7 @@ void ide_timer_expiry (unsigned long data)
 			spin_lock_irq(&hwgroup->lock);
 			enable_irq(hwif->irq);
 			if (startstop == ide_stopped) {
-				hwgroup->busy = 0;
+				ide_unlock_hwgroup(hwgroup);
 				if (!elv_queue_empty(drive->queue))
 					blk_plug_device(drive->queue);
 			}
@@ -1249,7 +1239,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	drive->service_time = jiffies - drive->service_start;
 	if (startstop == ide_stopped) {
 		if (hwgroup->handler == NULL) {	/* paranoia */
-			hwgroup->busy = 0;
+			ide_unlock_hwgroup(hwgroup);
 			if (!elv_queue_empty(drive->queue))
 				blk_plug_device(drive->queue);
 		} else
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 63d01c55f865..44c6787f8aeb 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -22,7 +22,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 		if (reset_timer && hwgroup->sleeping &&
 		    del_timer(&hwgroup->timer)) {
 			hwgroup->sleeping = 0;
-			hwgroup->busy = 0;
+			ide_unlock_hwgroup(hwgroup);
 			blk_start_queueing(q);
 		}
 		spin_unlock_irq(&hwgroup->lock);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 968ca8f60531..f408d6123f14 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1280,6 +1280,26 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout);
 
 extern void ide_timer_expiry(unsigned long);
 extern irqreturn_t ide_intr(int irq, void *dev_id);
+
+static inline int ide_lock_hwgroup(ide_hwgroup_t *hwgroup)
+{
+	if (hwgroup->busy)
+		return 1;
+
+	hwgroup->busy = 1;
+	/* for atari only */
+	ide_get_lock(ide_intr, hwgroup);
+
+	return 0;
+}
+
+static inline void ide_unlock_hwgroup(ide_hwgroup_t *hwgroup)
+{
+	/* for atari only */
+	ide_release_lock();
+	hwgroup->busy = 0;
+}
+
 extern void do_ide_request(struct request_queue *);
 
 void ide_init_disk(struct gendisk *, ide_drive_t *);
-- 
cgit v1.2.3


From 201bffa46466b4afdf7d29db8eca3fa5decb39c8 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Fri, 2 Jan 2009 16:12:50 +0100
Subject: ide: use per-device request queue locks (v2)

* Move hack for flush requests from choose_drive() to do_ide_request().

* Add ide_plug_device() helper and convert core IDE code from using
  per-hwgroup lock as a request lock to use the ->queue_lock instead.

* Remove no longer needed:
  - choose_drive() function
  - WAKEUP() macro
  - 'sleeping' flag from ide_hwif_t
  - 'service_{start,time}' fields from ide_drive_t

This patch results in much simpler and more maintainable code
(besides being a scalability improvement).

v2:
* Fixes/improvements based on review from Elias:
  - take as many requests off the queue as possible
  - remove now redundant BUG_ON()

Cc: Elias Oltmanns <eo@nebensachen.de>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-io.c    | 214 +++++++++++++++---------------------------------
 drivers/ide/ide-park.c  |  13 +--
 drivers/ide/ide-probe.c |   3 +-
 include/linux/ide.h     |   4 -
 4 files changed, 77 insertions(+), 157 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index ab480042757a..bb3248abf47d 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -667,85 +667,10 @@ void ide_stall_queue (ide_drive_t *drive, unsigned long timeout)
 	drive->sleep = timeout + jiffies;
 	drive->dev_flags |= IDE_DFLAG_SLEEPING;
 }
-
 EXPORT_SYMBOL(ide_stall_queue);
 
-#define WAKEUP(drive)	((drive)->service_start + 2 * (drive)->service_time)
-
-/**
- *	choose_drive		-	select a drive to service
- *	@hwgroup: hardware group to select on
- *
- *	choose_drive() selects the next drive which will be serviced.
- *	This is necessary because the IDE layer can't issue commands
- *	to both drives on the same cable, unlike SCSI.
- */
- 
-static inline ide_drive_t *choose_drive (ide_hwgroup_t *hwgroup)
-{
-	ide_drive_t *drive, *best;
-
-repeat:	
-	best = NULL;
-	drive = hwgroup->drive;
-
-	/*
-	 * drive is doing pre-flush, ordered write, post-flush sequence. even
-	 * though that is 3 requests, it must be seen as a single transaction.
-	 * we must not preempt this drive until that is complete
-	 */
-	if (blk_queue_flushing(drive->queue)) {
-		/*
-		 * small race where queue could get replugged during
-		 * the 3-request flush cycle, just yank the plug since
-		 * we want it to finish asap
-		 */
-		blk_remove_plug(drive->queue);
-		return drive;
-	}
-
-	do {
-		u8 dev_s = !!(drive->dev_flags & IDE_DFLAG_SLEEPING);
-		u8 best_s = (best && !!(best->dev_flags & IDE_DFLAG_SLEEPING));
-
-		if ((dev_s == 0 || time_after_eq(jiffies, drive->sleep)) &&
-		    !elv_queue_empty(drive->queue)) {
-			if (best == NULL ||
-			    (dev_s && (best_s == 0 || time_before(drive->sleep, best->sleep))) ||
-			    (best_s == 0 && time_before(WAKEUP(drive), WAKEUP(best)))) {
-				if (!blk_queue_plugged(drive->queue))
-					best = drive;
-			}
-		}
-	} while ((drive = drive->next) != hwgroup->drive);
-
-	if (best && (best->dev_flags & IDE_DFLAG_NICE1) &&
-	    (best->dev_flags & IDE_DFLAG_SLEEPING) == 0 &&
-	    best != hwgroup->drive && best->service_time > WAIT_MIN_SLEEP) {
-		long t = (signed long)(WAKEUP(best) - jiffies);
-		if (t >= WAIT_MIN_SLEEP) {
-		/*
-		 * We *may* have some time to spare, but first let's see if
-		 * someone can potentially benefit from our nice mood today..
-		 */
-			drive = best->next;
-			do {
-				if ((drive->dev_flags & IDE_DFLAG_SLEEPING) == 0
-				 && time_before(jiffies - best->service_time, WAKEUP(drive))
-				 && time_before(WAKEUP(drive), jiffies + t))
-				{
-					ide_stall_queue(best, min_t(long, t, 10 * WAIT_MIN_SLEEP));
-					goto repeat;
-				}
-			} while ((drive = drive->next) != best);
-		}
-	}
-	return best;
-}
-
 /*
  * Issue a new request to a drive from hwgroup
- * Caller must have already done spin_lock_irqsave(&hwgroup->lock, ..);
  *
  * A hwgroup is a serialized group of IDE interfaces.  Usually there is
  * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640)
@@ -757,8 +682,7 @@ repeat:
  * possibly along with many other devices.  This is especially common in
  * PCI-based systems with off-board IDE controller cards.
  *
- * The IDE driver uses a per-hwgroup spinlock to protect
- * access to the request queues, and to protect the hwgroup->busy flag.
+ * The IDE driver uses a per-hwgroup lock to protect the hwgroup->busy flag.
  *
  * The first thread into the driver for a particular hwgroup sets the
  * hwgroup->busy flag to indicate that this hwgroup is now active,
@@ -780,61 +704,38 @@ repeat:
  */
 void do_ide_request(struct request_queue *q)
 {
-	ide_drive_t	*orig_drive = q->queuedata;
-	ide_hwgroup_t	*hwgroup = orig_drive->hwif->hwgroup;
-	ide_drive_t	*drive;
-	ide_hwif_t	*hwif;
+	ide_drive_t	*drive = q->queuedata;
+	ide_hwif_t	*hwif = drive->hwif;
+	ide_hwgroup_t	*hwgroup = hwif->hwgroup;
 	struct request	*rq;
 	ide_startstop_t	startstop;
 
-	/* caller must own hwgroup->lock */
-	BUG_ON(!irqs_disabled());
-
-	while (!ide_lock_hwgroup(hwgroup)) {
-		drive = choose_drive(hwgroup);
-		if (drive == NULL) {
-			int sleeping = 0;
-			unsigned long sleep = 0; /* shut up, gcc */
-			hwgroup->rq = NULL;
-			drive = hwgroup->drive;
-			do {
-				if ((drive->dev_flags & IDE_DFLAG_SLEEPING) &&
-				    (sleeping == 0 ||
-				     time_before(drive->sleep, sleep))) {
-					sleeping = 1;
-					sleep = drive->sleep;
-				}
-			} while ((drive = drive->next) != hwgroup->drive);
-			if (sleeping) {
+	/*
+	 * drive is doing pre-flush, ordered write, post-flush sequence. even
+	 * though that is 3 requests, it must be seen as a single transaction.
+	 * we must not preempt this drive until that is complete
+	 */
+	if (blk_queue_flushing(q))
 		/*
-		 * Take a short snooze, and then wake up this hwgroup again.
-		 * This gives other hwgroups on the same a chance to
-		 * play fairly with us, just in case there are big differences
-		 * in relative throughputs.. don't want to hog the cpu too much.
+		 * small race where queue could get replugged during
+		 * the 3-request flush cycle, just yank the plug since
+		 * we want it to finish asap
 		 */
-				if (time_before(sleep, jiffies + WAIT_MIN_SLEEP))
-					sleep = jiffies + WAIT_MIN_SLEEP;
-#if 1
-				if (timer_pending(&hwgroup->timer))
-					printk(KERN_CRIT "ide_set_handler: timer already active\n");
-#endif
-				/* so that ide_timer_expiry knows what to do */
-				hwgroup->sleeping = 1;
-				hwgroup->req_gen_timer = hwgroup->req_gen;
-				mod_timer(&hwgroup->timer, sleep);
-				/* we purposely leave hwgroup locked
-				 * while sleeping */
-			} else
-				ide_unlock_hwgroup(hwgroup);
+		blk_remove_plug(q);
 
-			/* no more work for this hwgroup (for now) */
-			goto plug_device;
-		}
+	spin_unlock_irq(q->queue_lock);
+	spin_lock_irq(&hwgroup->lock);
 
-		if (drive != orig_drive)
-			goto plug_device;
+	if (!ide_lock_hwgroup(hwgroup)) {
+repeat:
+		hwgroup->rq = NULL;
 
-		hwif = drive->hwif;
+		if (drive->dev_flags & IDE_DFLAG_SLEEPING) {
+			if (time_before(drive->sleep, jiffies)) {
+				ide_unlock_hwgroup(hwgroup);
+				goto plug_device;
+			}
+		}
 
 		if (hwif != hwgroup->hwif) {
 			/*
@@ -847,16 +748,20 @@ void do_ide_request(struct request_queue *q)
 		hwgroup->hwif = hwif;
 		hwgroup->drive = drive;
 		drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED);
-		drive->service_start = jiffies;
 
+		spin_unlock_irq(&hwgroup->lock);
+		spin_lock_irq(q->queue_lock);
 		/*
 		 * we know that the queue isn't empty, but this can happen
 		 * if the q->prep_rq_fn() decides to kill a request
 		 */
 		rq = elv_next_request(drive->queue);
+		spin_unlock_irq(q->queue_lock);
+		spin_lock_irq(&hwgroup->lock);
+
 		if (!rq) {
 			ide_unlock_hwgroup(hwgroup);
-			break;
+			goto out;
 		}
 
 		/*
@@ -886,17 +791,21 @@ void do_ide_request(struct request_queue *q)
 		startstop = start_request(drive, rq);
 		spin_lock_irq(&hwgroup->lock);
 
-		if (startstop == ide_stopped) {
-			ide_unlock_hwgroup(hwgroup);
-			if (!elv_queue_empty(orig_drive->queue))
-				blk_plug_device(orig_drive->queue);
-		}
-	}
+		if (startstop == ide_stopped)
+			goto repeat;
+	} else
+		goto plug_device;
+out:
+	spin_unlock_irq(&hwgroup->lock);
+	spin_lock_irq(q->queue_lock);
 	return;
 
 plug_device:
-	if (!elv_queue_empty(orig_drive->queue))
-		blk_plug_device(orig_drive->queue);
+	spin_unlock_irq(&hwgroup->lock);
+	spin_lock_irq(q->queue_lock);
+
+	if (!elv_queue_empty(q))
+		blk_plug_device(q);
 }
 
 /*
@@ -957,6 +866,17 @@ out:
 	return ret;
 }
 
+static void ide_plug_device(ide_drive_t *drive)
+{
+	struct request_queue *q = drive->queue;
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	if (!elv_queue_empty(q))
+		blk_plug_device(q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
 /**
  *	ide_timer_expiry	-	handle lack of an IDE interrupt
  *	@data: timer callback magic (hwgroup)
@@ -974,10 +894,12 @@ out:
 void ide_timer_expiry (unsigned long data)
 {
 	ide_hwgroup_t	*hwgroup = (ide_hwgroup_t *) data;
+	ide_drive_t	*uninitialized_var(drive);
 	ide_handler_t	*handler;
 	ide_expiry_t	*expiry;
 	unsigned long	flags;
 	unsigned long	wait = -1;
+	int		plug_device = 0;
 
 	spin_lock_irqsave(&hwgroup->lock, flags);
 
@@ -989,12 +911,8 @@ void ide_timer_expiry (unsigned long data)
 		 * or we were "sleeping" to give other devices a chance.
 		 * Either way, we don't really want to complain about anything.
 		 */
-		if (hwgroup->sleeping) {
-			hwgroup->sleeping = 0;
-			ide_unlock_hwgroup(hwgroup);
-		}
 	} else {
-		ide_drive_t *drive = hwgroup->drive;
+		drive = hwgroup->drive;
 		if (!drive) {
 			printk(KERN_ERR "ide_timer_expiry: hwgroup->drive was NULL\n");
 			hwgroup->handler = NULL;
@@ -1042,17 +960,18 @@ void ide_timer_expiry (unsigned long data)
 					ide_error(drive, "irq timeout",
 						  hwif->tp_ops->read_status(hwif));
 			}
-			drive->service_time = jiffies - drive->service_start;
 			spin_lock_irq(&hwgroup->lock);
 			enable_irq(hwif->irq);
 			if (startstop == ide_stopped) {
 				ide_unlock_hwgroup(hwgroup);
-				if (!elv_queue_empty(drive->queue))
-					blk_plug_device(drive->queue);
+				plug_device = 1;
 			}
 		}
 	}
 	spin_unlock_irqrestore(&hwgroup->lock, flags);
+
+	if (plug_device)
+		ide_plug_device(drive);
 }
 
 /**
@@ -1146,10 +1065,11 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	unsigned long flags;
 	ide_hwgroup_t *hwgroup = (ide_hwgroup_t *)dev_id;
 	ide_hwif_t *hwif = hwgroup->hwif;
-	ide_drive_t *drive;
+	ide_drive_t *uninitialized_var(drive);
 	ide_handler_t *handler;
 	ide_startstop_t startstop;
 	irqreturn_t irq_ret = IRQ_NONE;
+	int plug_device = 0;
 
 	spin_lock_irqsave(&hwgroup->lock, flags);
 
@@ -1236,12 +1156,10 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	 * same irq as is currently being serviced here, and Linux
 	 * won't allow another of the same (on any CPU) until we return.
 	 */
-	drive->service_time = jiffies - drive->service_start;
 	if (startstop == ide_stopped) {
 		if (hwgroup->handler == NULL) {	/* paranoia */
 			ide_unlock_hwgroup(hwgroup);
-			if (!elv_queue_empty(drive->queue))
-				blk_plug_device(drive->queue);
+			plug_device = 1;
 		} else
 			printk(KERN_ERR "%s: %s: huh? expected NULL handler "
 					"on exit\n", __func__, drive->name);
@@ -1250,6 +1168,10 @@ out_handled:
 	irq_ret = IRQ_HANDLED;
 out:
 	spin_unlock_irqrestore(&hwgroup->lock, flags);
+
+	if (plug_device)
+		ide_plug_device(drive);
+
 	return irq_ret;
 }
 
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 44c6787f8aeb..678454ac2483 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -16,16 +16,19 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 	spin_lock_irq(&hwgroup->lock);
 	if (drive->dev_flags & IDE_DFLAG_PARKED) {
 		int reset_timer = time_before(timeout, drive->sleep);
+		int start_queue = 0;
 
 		drive->sleep = timeout;
 		wake_up_all(&ide_park_wq);
-		if (reset_timer && hwgroup->sleeping &&
-		    del_timer(&hwgroup->timer)) {
-			hwgroup->sleeping = 0;
-			ide_unlock_hwgroup(hwgroup);
+		if (reset_timer && del_timer(&hwgroup->timer))
+			start_queue = 1;
+		spin_unlock_irq(&hwgroup->lock);
+
+		if (start_queue) {
+			spin_lock_irq(q->queue_lock);
 			blk_start_queueing(q);
+			spin_unlock_irq(q->queue_lock);
 		}
-		spin_unlock_irq(&hwgroup->lock);
 		return;
 	}
 	spin_unlock_irq(&hwgroup->lock);
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index f9efd069edc2..966b74c15773 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -881,8 +881,7 @@ static int ide_init_queue(ide_drive_t *drive)
 	 *	do not.
 	 */
 
-	q = blk_init_queue_node(do_ide_request, &hwif->hwgroup->lock,
-				hwif_to_node(hwif));
+	q = blk_init_queue_node(do_ide_request, NULL, hwif_to_node(hwif));
 	if (!q)
 		return 1;
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index f408d6123f14..5f86ad40ee7e 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -603,8 +603,6 @@ struct ide_drive_s {
 	unsigned long dev_flags;
 
 	unsigned long sleep;		/* sleep until this time */
-	unsigned long service_start;	/* time we started last request */
-	unsigned long service_time;	/* service time of last request */
 	unsigned long timeout;		/* max time to wait for irq */
 
 	special_t	special;	/* special action flags */
@@ -872,8 +870,6 @@ typedef struct hwgroup_s {
 
 		/* BOOL: protects all fields below */
 	volatile int busy;
-		/* BOOL: wake us up on timer expiry */
-	unsigned int sleeping	: 1;
 		/* BOOL: polling active & poll_timeout field valid */
 	unsigned int polling	: 1;
 
-- 
cgit v1.2.3


From bf64741fe89280bd81a9e3a1beadec1570861848 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:50 +0100
Subject: ide: make IDE_AFLAG_.. numbering continuous again

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ide.h | 48 ++++++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 5f86ad40ee7e..eb4c01f7f253 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -473,53 +473,53 @@ enum {
 
 	/* ide-cd */
 	/* Drive cannot eject the disc. */
-	IDE_AFLAG_NO_EJECT		= (1 << 3),
+	IDE_AFLAG_NO_EJECT		= (1 << 1),
 	/* Drive is a pre ATAPI 1.2 drive. */
-	IDE_AFLAG_PRE_ATAPI12		= (1 << 4),
+	IDE_AFLAG_PRE_ATAPI12		= (1 << 2),
 	/* TOC addresses are in BCD. */
-	IDE_AFLAG_TOCADDR_AS_BCD	= (1 << 5),
+	IDE_AFLAG_TOCADDR_AS_BCD	= (1 << 3),
 	/* TOC track numbers are in BCD. */
-	IDE_AFLAG_TOCTRACKS_AS_BCD	= (1 << 6),
+	IDE_AFLAG_TOCTRACKS_AS_BCD	= (1 << 4),
 	/*
 	 * Drive does not provide data in multiples of SECTOR_SIZE
 	 * when more than one interrupt is needed.
 	 */
-	IDE_AFLAG_LIMIT_NFRAMES		= (1 << 7),
+	IDE_AFLAG_LIMIT_NFRAMES		= (1 << 5),
 	/* Saved TOC information is current. */
-	IDE_AFLAG_TOC_VALID		= (1 << 9),
+	IDE_AFLAG_TOC_VALID		= (1 << 6),
 	/* We think that the drive door is locked. */
-	IDE_AFLAG_DOOR_LOCKED		= (1 << 10),
+	IDE_AFLAG_DOOR_LOCKED		= (1 << 7),
 	/* SET_CD_SPEED command is unsupported. */
-	IDE_AFLAG_NO_SPEED_SELECT	= (1 << 11),
-	IDE_AFLAG_VERTOS_300_SSD	= (1 << 12),
-	IDE_AFLAG_VERTOS_600_ESD	= (1 << 13),
-	IDE_AFLAG_SANYO_3CD		= (1 << 14),
-	IDE_AFLAG_FULL_CAPS_PAGE	= (1 << 15),
-	IDE_AFLAG_PLAY_AUDIO_OK		= (1 << 16),
-	IDE_AFLAG_LE_SPEED_FIELDS	= (1 << 17),
+	IDE_AFLAG_NO_SPEED_SELECT	= (1 << 8),
+	IDE_AFLAG_VERTOS_300_SSD	= (1 << 9),
+	IDE_AFLAG_VERTOS_600_ESD	= (1 << 10),
+	IDE_AFLAG_SANYO_3CD		= (1 << 11),
+	IDE_AFLAG_FULL_CAPS_PAGE	= (1 << 12),
+	IDE_AFLAG_PLAY_AUDIO_OK		= (1 << 13),
+	IDE_AFLAG_LE_SPEED_FIELDS	= (1 << 14),
 
 	/* ide-floppy */
 	/* Avoid commands not supported in Clik drive */
-	IDE_AFLAG_CLIK_DRIVE		= (1 << 19),
+	IDE_AFLAG_CLIK_DRIVE		= (1 << 15),
 	/* Requires BH algorithm for packets */
-	IDE_AFLAG_ZIP_DRIVE		= (1 << 20),
+	IDE_AFLAG_ZIP_DRIVE		= (1 << 16),
 	/* Supports format progress report */
-	IDE_AFLAG_SRFP			= (1 << 22),
+	IDE_AFLAG_SRFP			= (1 << 17),
 
 	/* ide-tape */
-	IDE_AFLAG_IGNORE_DSC		= (1 << 23),
+	IDE_AFLAG_IGNORE_DSC		= (1 << 18),
 	/* 0 When the tape position is unknown */
-	IDE_AFLAG_ADDRESS_VALID		= (1 <<	24),
+	IDE_AFLAG_ADDRESS_VALID		= (1 <<	19),
 	/* Device already opened */
-	IDE_AFLAG_BUSY			= (1 << 25),
+	IDE_AFLAG_BUSY			= (1 << 20),
 	/* Attempt to auto-detect the current user block size */
-	IDE_AFLAG_DETECT_BS		= (1 << 26),
+	IDE_AFLAG_DETECT_BS		= (1 << 21),
 	/* Currently on a filemark */
-	IDE_AFLAG_FILEMARK		= (1 << 27),
+	IDE_AFLAG_FILEMARK		= (1 << 22),
 	/* 0 = no tape is loaded, so we don't rewind after ejecting */
-	IDE_AFLAG_MEDIUM_PRESENT	= (1 << 28),
+	IDE_AFLAG_MEDIUM_PRESENT	= (1 << 23),
 
-	IDE_AFLAG_NO_AUTOCLOSE		= (1 << 29),
+	IDE_AFLAG_NO_AUTOCLOSE		= (1 << 24),
 };
 
 /* device flags */
-- 
cgit v1.2.3


From 392de1d53dd40e2eebee3a0a26aa647a3865ca78 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:52 +0100
Subject: ide-atapi: accomodate transfer length calculation for ide-cd

... by factoring it out of ide_cd_do_request() into a helper, as suggested by
Bart.

There should be no functionality change resulting from this patch.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
[bart: BLK_DEV_IDECD needs to select IDE_ATAPI now]
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Kconfig     |  1 +
 drivers/ide/ide-atapi.c | 15 ++++++++++++++-
 drivers/ide/ide-cd.c    |  4 ++--
 include/linux/ide.h     |  2 ++
 4 files changed, 19 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 937945e471df..4ee85fcf9aaf 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -137,6 +137,7 @@ config BLK_DEV_DELKIN
 
 config BLK_DEV_IDECD
 	tristate "Include IDE/ATAPI CDROM support"
+	select IDE_ATAPI
 	---help---
 	  If you have a CD-ROM drive using the ATAPI protocol, say Y. ATAPI is
 	  a newer protocol used by IDE CD-ROM and TAPE drives, similar to the
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 74273fdc8827..8884877bd2b5 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -252,6 +252,18 @@ int ide_scsi_expiry(ide_drive_t *drive)
 }
 EXPORT_SYMBOL_GPL(ide_scsi_expiry);
 
+int ide_cd_get_xferlen(struct request *rq)
+{
+	if (blk_fs_request(rq))
+		return 32768;
+	else if (blk_sense_request(rq) || blk_pc_request(rq) ||
+			 rq->cmd_type == REQ_TYPE_ATA_PC)
+		return rq->data_len;
+	else
+		return 0;
+}
+EXPORT_SYMBOL_GPL(ide_cd_get_xferlen);
+
 /*
  * This is the usual interrupt handler which will be called during a packet
  * command.  We will transfer some of the data (as requested by the drive)
@@ -551,7 +563,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout,
 	struct ide_atapi_pc *pc = drive->pc;
 	ide_hwif_t *hwif = drive->hwif;
 	u32 tf_flags;
-	u16 bcount = 0;
+	u16 bcount;
 	u8 scsi = !!(drive->dev_flags & IDE_DFLAG_SCSI);
 
 	/* We haven't transferred any data yet */
@@ -560,6 +572,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout,
 
 	if (dev_is_idecd(drive)) {
 		tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL;
+		bcount = ide_cd_get_xferlen(hwif->hwgroup->rq);
 	} else if (scsi) {
 		tf_flags = 0;
 		bcount = min(pc->req_xfer, 63 * 1024);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 65e5513758b0..8d3c7714682e 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1214,8 +1214,9 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 		      __func__, rq->cmd[0], rq->cmd_type,
 		      (unsigned long long)block);
 
+	xferlen = ide_cd_get_xferlen(rq);
+
 	if (blk_fs_request(rq)) {
-		xferlen = 32768;
 		fn = cdrom_start_rw_cont;
 
 		if (cdrom_start_rw(drive, rq) == ide_stopped)
@@ -1225,7 +1226,6 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 			return ide_stopped;
 	} else if (blk_sense_request(rq) || blk_pc_request(rq) ||
 		   rq->cmd_type == REQ_TYPE_ATA_PC) {
-		xferlen = rq->data_len;
 		fn = cdrom_do_newpc_cont;
 
 		if (!rq->timeout)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index eb4c01f7f253..e35ff6827897 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1254,6 +1254,8 @@ static inline unsigned long ide_scsi_get_timeout(struct ide_atapi_pc *pc)
 
 int ide_scsi_expiry(ide_drive_t *);
 
+int ide_cd_get_xferlen(struct request *);
+
 ide_startstop_t ide_issue_pc(ide_drive_t *, unsigned int, ide_expiry_t *);
 
 ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *);
-- 
cgit v1.2.3


From 4cad085efbce8dcc5006b0d1034089758b4fc7ba Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@gmail.com>
Date: Fri, 2 Jan 2009 16:12:53 +0100
Subject: ide-cd: move cdrom_timer_expiry to ide-atapi.c

- cdrom_timer_expiry -> ide_cd_expiry
- remove expiry-arg to ide_issue_pc as it is redundant now
- ide_debug_log -> debug_log

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-atapi.c  | 41 ++++++++++++++++++++++++++++++++++++++---
 drivers/ide/ide-cd.c     | 38 +++-----------------------------------
 drivers/ide/ide-cd.h     |  4 ----
 drivers/ide/ide-floppy.c |  2 +-
 drivers/ide/ide-tape.c   |  2 +-
 include/linux/ide.h      |  4 +++-
 6 files changed, 46 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 8c5cf68fbd79..c110329ccb13 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -3,6 +3,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/cdrom.h>
 #include <linux/delay.h>
 #include <linux/ide.h>
 #include <scsi/scsi.h>
@@ -252,6 +253,38 @@ int ide_scsi_expiry(ide_drive_t *drive)
 }
 EXPORT_SYMBOL_GPL(ide_scsi_expiry);
 
+int ide_cd_expiry(ide_drive_t *drive)
+{
+	struct request *rq = HWGROUP(drive)->rq;
+	unsigned long wait = 0;
+
+	debug_log("%s: rq->cmd[0]: 0x%x\n", __func__, rq->cmd[0]);
+
+	/*
+	 * Some commands are *slow* and normally take a long time to complete.
+	 * Usually we can use the ATAPI "disconnect" to bypass this, but not all
+	 * commands/drives support that. Let ide_timer_expiry keep polling us
+	 * for these.
+	 */
+	switch (rq->cmd[0]) {
+	case GPCMD_BLANK:
+	case GPCMD_FORMAT_UNIT:
+	case GPCMD_RESERVE_RZONE_TRACK:
+	case GPCMD_CLOSE_TRACK:
+	case GPCMD_FLUSH_CACHE:
+		wait = ATAPI_WAIT_PC;
+		break;
+	default:
+		if (!(rq->cmd_flags & REQ_QUIET))
+			printk(KERN_INFO "cmd 0x%x timed out\n",
+					 rq->cmd[0]);
+		wait = 0;
+		break;
+	}
+	return wait;
+}
+EXPORT_SYMBOL_GPL(ide_cd_expiry);
+
 int ide_cd_get_xferlen(struct request *rq)
 {
 	if (blk_fs_request(rq))
@@ -562,11 +595,11 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 	return ide_started;
 }
 
-ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout,
-			     ide_expiry_t *expiry)
+ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout)
 {
 	struct ide_atapi_pc *pc = drive->pc;
 	ide_hwif_t *hwif = drive->hwif;
+	ide_expiry_t *expiry = NULL;
 	u32 tf_flags;
 	u16 bcount;
 	u8 scsi = !!(drive->dev_flags & IDE_DFLAG_SCSI);
@@ -578,9 +611,11 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout,
 	if (dev_is_idecd(drive)) {
 		tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL;
 		bcount = ide_cd_get_xferlen(hwif->hwgroup->rq);
+		expiry = ide_cd_expiry;
 	} else if (scsi) {
 		tf_flags = 0;
 		bcount = min(pc->req_xfer, 63 * 1024);
+		expiry = ide_scsi_expiry;
 	} else {
 		tf_flags = IDE_TFLAG_OUT_DEVICE;
 		bcount = ((drive->media == ide_tape) ?
@@ -613,7 +648,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout,
 		if (drive->dma)
 			drive->waiting_for_dma = 0;
 		ide_execute_command(drive, ATA_CMD_PACKET, ide_transfer_pc,
-				    timeout, NULL);
+				    timeout, expiry);
 		return ide_started;
 	} else {
 		ide_execute_pkt_cmd(drive);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 8d3c7714682e..105e4d855e6e 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -511,38 +511,6 @@ end_request:
 	return 1;
 }
 
-static int cdrom_timer_expiry(ide_drive_t *drive)
-{
-	struct request *rq = HWGROUP(drive)->rq;
-	unsigned long wait = 0;
-
-	ide_debug_log(IDE_DBG_RQ, "Call %s: rq->cmd[0]: 0x%x\n", __func__,
-		      rq->cmd[0]);
-
-	/*
-	 * Some commands are *slow* and normally take a long time to complete.
-	 * Usually we can use the ATAPI "disconnect" to bypass this, but not all
-	 * commands/drives support that. Let ide_timer_expiry keep polling us
-	 * for these.
-	 */
-	switch (rq->cmd[0]) {
-	case GPCMD_BLANK:
-	case GPCMD_FORMAT_UNIT:
-	case GPCMD_RESERVE_RZONE_TRACK:
-	case GPCMD_CLOSE_TRACK:
-	case GPCMD_FLUSH_CACHE:
-		wait = ATAPI_WAIT_PC;
-		break;
-	default:
-		if (!(rq->cmd_flags & REQ_QUIET))
-			printk(KERN_INFO PFX "cmd 0x%x timed out\n",
-					 rq->cmd[0]);
-		wait = 0;
-		break;
-	}
-	return wait;
-}
-
 /*
  * Set up the device registers for transferring a packet command on DEV,
  * expecting to later transfer XFERLEN bytes.  HANDLER is the routine
@@ -574,7 +542,7 @@ static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive,
 
 		/* packet command */
 		ide_execute_command(drive, ATA_CMD_PACKET, handler,
-				    ATAPI_WAIT_PC, cdrom_timer_expiry);
+				    ATAPI_WAIT_PC, ide_cd_expiry);
 		return ide_started;
 	} else {
 		ide_execute_pkt_cmd(drive);
@@ -621,7 +589,7 @@ static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive,
 	}
 
 	/* arm the interrupt handler */
-	ide_set_handler(drive, handler, rq->timeout, cdrom_timer_expiry);
+	ide_set_handler(drive, handler, rq->timeout, ide_cd_expiry);
 
 	/* ATAPI commands get padded out to 12 bytes minimum */
 	cmd_len = COMMAND_SIZE(rq->cmd[0]);
@@ -1088,7 +1056,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 	} else {
 		timeout = ATAPI_WAIT_PC;
 		if (!blk_fs_request(rq))
-			expiry = cdrom_timer_expiry;
+			expiry = ide_cd_expiry;
 	}
 
 	ide_set_handler(drive, cdrom_newpc_intr, timeout, expiry);
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index 389faa42eaa1..bf676b262181 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -16,10 +16,6 @@
 #define ide_debug_log(lvl, fmt, args...) do {} while (0)
 #endif
 
-/*
- * typical timeout for packet command
- */
-#define ATAPI_WAIT_PC		(60 * HZ)
 #define ATAPI_WAIT_WRITE_BUSY	(10 * HZ)
 
 /************************************************************************/
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 1f07f3818938..fdec729d0e49 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -197,7 +197,7 @@ static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive,
 
 	pc->retries++;
 
-	return ide_issue_pc(drive, WAIT_FLOPPY_CMD, NULL);
+	return ide_issue_pc(drive, WAIT_FLOPPY_CMD);
 }
 
 void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *pc)
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index a2d470eb2b55..ac9e29a4991f 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -694,7 +694,7 @@ static ide_startstop_t idetape_issue_pc(ide_drive_t *drive,
 
 	pc->retries++;
 
-	return ide_issue_pc(drive, WAIT_TAPE_CMD, NULL);
+	return ide_issue_pc(drive, WAIT_TAPE_CMD);
 }
 
 /* A mode sense command is used to "sense" tape parameters. */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index e35ff6827897..e20e0b5c1739 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -396,6 +396,7 @@ enum {
  * This is used for several packet commands (not for READ/WRITE commands).
  */
 #define IDE_PC_BUFFER_SIZE	256
+#define ATAPI_WAIT_PC		(60 * HZ)
 
 struct ide_atapi_pc {
 	/* actual packet bytes */
@@ -1253,10 +1254,11 @@ static inline unsigned long ide_scsi_get_timeout(struct ide_atapi_pc *pc)
 }
 
 int ide_scsi_expiry(ide_drive_t *);
+int ide_cd_expiry(ide_drive_t *);
 
 int ide_cd_get_xferlen(struct request *);
 
-ide_startstop_t ide_issue_pc(ide_drive_t *, unsigned int, ide_expiry_t *);
+ide_startstop_t ide_issue_pc(ide_drive_t *, unsigned int);
 
 ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *);
 
-- 
cgit v1.2.3


From 5d655a03b847fbe5353a8a74bbeb75e18708dca3 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:54 +0100
Subject: ide-atapi: remove ide-scsi remnants from ide_pc_intr()

As a result, remove now unused ide_scsi_get_timeout and ide_scsi_expiry.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-atapi.c | 68 ++++++++++---------------------------------------
 include/linux/ide.h     |  6 -----
 2 files changed, 13 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index f5bf405c36aa..7a04509bf962 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -240,19 +240,6 @@ void ide_retry_pc(ide_drive_t *drive, struct gendisk *disk)
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
 
-int ide_scsi_expiry(ide_drive_t *drive)
-{
-	struct ide_atapi_pc *pc = drive->pc;
-
-	debug_log("%s called for %lu at %lu\n", __func__,
-		  pc->scsi_cmd->serial_number, jiffies);
-
-	pc->flags |= PC_FLAG_TIMEDOUT;
-
-	return 0; /* we do not want the IDE subsystem to retry */
-}
-EXPORT_SYMBOL_GPL(ide_scsi_expiry);
-
 int ide_cd_expiry(ide_drive_t *drive)
 {
 	struct request *rq = HWGROUP(drive)->rq;
@@ -309,21 +296,14 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 	struct request *rq = hwif->hwgroup->rq;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	xfer_func_t *xferfunc;
-	ide_expiry_t *expiry;
 	unsigned int timeout, temp;
 	u16 bcount;
-	u8 stat, ireason, scsi = !!(drive->dev_flags & IDE_DFLAG_SCSI), dsc = 0;
+	u8 stat, ireason, dsc = 0;
 
 	debug_log("Enter %s - interrupt handler\n", __func__);
 
-	if (scsi) {
-		timeout = ide_scsi_get_timeout(pc);
-		expiry = ide_scsi_expiry;
-	} else {
-		timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD
-						       : WAIT_TAPE_CMD;
-		expiry = NULL;
-	}
+	timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD
+					       : WAIT_TAPE_CMD;
 
 	if (pc->flags & PC_FLAG_TIMEDOUT) {
 		drive->pc_callback(drive, 0);
@@ -335,8 +315,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
 	if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
 		if (hwif->dma_ops->dma_end(drive) ||
-		    (drive->media == ide_tape && !scsi && (stat & ATA_ERR))) {
-			if (drive->media == ide_floppy && !scsi)
+		    (drive->media == ide_tape && (stat & ATA_ERR))) {
+			if (drive->media == ide_floppy)
 				printk(KERN_ERR "%s: DMA %s error\n",
 					drive->name, rq_data_dir(pc->rq)
 						     ? "write" : "read");
@@ -358,7 +338,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
 		local_irq_enable_in_hardirq();
 
-		if (drive->media == ide_tape && !scsi &&
+		if (drive->media == ide_tape &&
 		    (stat & ATA_ERR) && rq->cmd[0] == REQUEST_SENSE)
 			stat &= ~ATA_ERR;
 
@@ -366,11 +346,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 			/* Error detected */
 			debug_log("%s: I/O error\n", drive->name);
 
-			if (drive->media != ide_tape || scsi) {
+			if (drive->media != ide_tape)
 				pc->rq->errors++;
-				if (scsi)
-					goto cmd_finished;
-			}
 
 			if (rq->cmd[0] == REQUEST_SENSE) {
 				printk(KERN_ERR "%s: I/O error in request sense"
@@ -386,7 +363,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 			/* queued, but not started */
 			return ide_stopped;
 		}
-cmd_finished:
 		pc->error = 0;
 
 		if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0)
@@ -433,25 +409,8 @@ cmd_finished:
 						"us more data than expected - "
 						"discarding data\n",
 						drive->name);
-				if (scsi)
-					temp = pc->buf_size - pc->xferred;
-				else
-					temp = 0;
-				if (temp) {
-					if (pc->sg)
-						drive->pc_io_buffers(drive, pc,
-								     temp, 0);
-					else
-						tp_ops->input_data(drive, NULL,
-							pc->cur_pos, temp);
-					printk(KERN_ERR "%s: transferred %d of "
-							"%d bytes\n",
-							drive->name,
-							temp, bcount);
-				}
-				pc->xferred += temp;
-				pc->cur_pos += temp;
-				ide_pad_transfer(drive, 0, bcount - temp);
+
+				ide_pad_transfer(drive, 0, bcount);
 				goto next_irq;
 			}
 			debug_log("The device wants to send us more data than "
@@ -461,14 +420,13 @@ cmd_finished:
 	} else
 		xferfunc = tp_ops->output_data;
 
-	if ((drive->media == ide_floppy && !scsi && !pc->buf) ||
-	    (drive->media == ide_tape && !scsi && pc->bh) ||
-	    (scsi && pc->sg)) {
+	if ((drive->media == ide_floppy && !pc->buf) ||
+	    (drive->media == ide_tape && pc->bh)) {
 		int done = drive->pc_io_buffers(drive, pc, bcount,
 				  !!(pc->flags & PC_FLAG_WRITING));
 
 		/* FIXME: don't do partial completions */
-		if (drive->media == ide_floppy && !scsi)
+		if (drive->media == ide_floppy)
 			ide_end_request(drive, 1, done >> 9);
 	} else
 		xferfunc(drive, NULL, pc->cur_pos, bcount);
@@ -481,7 +439,7 @@ cmd_finished:
 		  rq->cmd[0], bcount);
 next_irq:
 	/* And set the interrupt handler again */
-	ide_set_handler(drive, ide_pc_intr, timeout, expiry);
+	ide_set_handler(drive, ide_pc_intr, timeout, NULL);
 	return ide_started;
 }
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index e20e0b5c1739..257524ee1af2 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1248,12 +1248,6 @@ int ide_set_media_lock(ide_drive_t *, struct gendisk *, int);
 void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
 void ide_retry_pc(ide_drive_t *, struct gendisk *);
 
-static inline unsigned long ide_scsi_get_timeout(struct ide_atapi_pc *pc)
-{
-	return max_t(unsigned long, WAIT_CMD, pc->timeout - jiffies);
-}
-
-int ide_scsi_expiry(ide_drive_t *);
 int ide_cd_expiry(ide_drive_t *);
 
 int ide_cd_get_xferlen(struct request *);
-- 
cgit v1.2.3


From 5317464dccd0c03026d60f1e9968de4f9cd23f69 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:54 +0100
Subject: ide: remove the last ide-scsi remnants

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-atapi.c  |  3 +--
 drivers/ide/ide-io.c     |  3 ---
 drivers/ide/ide-ioctls.c |  3 +--
 drivers/ide/ide-probe.c  |  2 --
 include/linux/ide.h      | 28 +++++++++++++---------------
 5 files changed, 15 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 7a04509bf962..d412bd2bd7fd 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -17,8 +17,7 @@
 
 static inline int dev_is_idecd(ide_drive_t *drive)
 {
-	return (drive->media == ide_cdrom || drive->media == ide_optical) &&
-		!(drive->dev_flags & IDE_DFLAG_SCSI);
+	return drive->media == ide_cdrom || drive->media == ide_optical;
 }
 
 /*
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index bb3248abf47d..1c36a8e83d36 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -426,9 +426,6 @@ void ide_map_sg(ide_drive_t *drive, struct request *rq)
 	ide_hwif_t *hwif = drive->hwif;
 	struct scatterlist *sg = hwif->sg_table;
 
-	if (hwif->sg_mapped)	/* needed by ide-scsi */
-		return;
-
 	if (rq->cmd_type != REQ_TYPE_ATA_TASKFILE) {
 		hwif->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
 	} else {
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index 28232c64c346..1be263eb9c07 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -95,8 +95,7 @@ static int ide_set_nice_ioctl(ide_drive_t *drive, unsigned long arg)
 		return -EPERM;
 
 	if (((arg >> IDE_NICE_DSC_OVERLAP) & 1) &&
-	    (drive->media != ide_tape ||
-	     (drive->dev_flags & IDE_DFLAG_SCSI)))
+	    (drive->media != ide_tape))
 		return -EPERM;
 
 	if ((arg >> IDE_NICE_DSC_OVERLAP) & 1)
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 966b74c15773..c5adb7b9c5b5 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1141,8 +1141,6 @@ static struct kobject *ata_probe(dev_t dev, int *part, void *data)
 
 	if (drive->media == ide_disk)
 		request_module("ide-disk");
-	if (drive->dev_flags & IDE_DFLAG_SCSI)
-		request_module("ide-scsi");
 	if (drive->media == ide_cdrom || drive->media == ide_optical)
 		request_module("ide-cd");
 	if (drive->media == ide_tape)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 257524ee1af2..ad57a4492941 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -559,28 +559,26 @@ enum {
 	IDE_DFLAG_NODMA			= (1 << 16),
 	/* powermanagment told us not to do anything, so sleep nicely */
 	IDE_DFLAG_BLOCKED		= (1 << 17),
-	/* ide-scsi emulation */
-	IDE_DFLAG_SCSI			= (1 << 18),
 	/* sleeping & sleep field valid */
-	IDE_DFLAG_SLEEPING		= (1 << 19),
-	IDE_DFLAG_POST_RESET		= (1 << 20),
-	IDE_DFLAG_UDMA33_WARNED		= (1 << 21),
-	IDE_DFLAG_LBA48			= (1 << 22),
+	IDE_DFLAG_SLEEPING		= (1 << 18),
+	IDE_DFLAG_POST_RESET		= (1 << 19),
+	IDE_DFLAG_UDMA33_WARNED		= (1 << 20),
+	IDE_DFLAG_LBA48			= (1 << 21),
 	/* status of write cache */
-	IDE_DFLAG_WCACHE		= (1 << 23),
+	IDE_DFLAG_WCACHE		= (1 << 22),
 	/* used for ignoring ATA_DF */
-	IDE_DFLAG_NOWERR		= (1 << 24),
+	IDE_DFLAG_NOWERR		= (1 << 23),
 	/* retrying in PIO */
-	IDE_DFLAG_DMA_PIO_RETRY		= (1 << 25),
-	IDE_DFLAG_LBA			= (1 << 26),
+	IDE_DFLAG_DMA_PIO_RETRY		= (1 << 24),
+	IDE_DFLAG_LBA			= (1 << 25),
 	/* don't unload heads */
-	IDE_DFLAG_NO_UNLOAD		= (1 << 27),
+	IDE_DFLAG_NO_UNLOAD		= (1 << 26),
 	/* heads unloaded, please don't reset port */
-	IDE_DFLAG_PARKED		= (1 << 28),
-	IDE_DFLAG_MEDIA_CHANGED		= (1 << 29),
+	IDE_DFLAG_PARKED		= (1 << 27),
+	IDE_DFLAG_MEDIA_CHANGED		= (1 << 28),
 	/* write protect */
-	IDE_DFLAG_WP			= (1 << 30),
-	IDE_DFLAG_FORMAT_IN_PROGRESS	= (1 << 31),
+	IDE_DFLAG_WP			= (1 << 29),
+	IDE_DFLAG_FORMAT_IN_PROGRESS	= (1 << 30),
 };
 
 struct ide_drive_s {
-- 
cgit v1.2.3


From 28ad91db77755f1c49d79652de11b28ee2cfbf03 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@googlemail.com>
Date: Fri, 2 Jan 2009 16:12:56 +0100
Subject: ide-atapi: remove timeout arg to ide_issue_pc

There should be no functionality change resulting from this patch.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-atapi.c  | 7 ++++++-
 drivers/ide/ide-floppy.c | 2 +-
 drivers/ide/ide-tape.c   | 2 +-
 include/linux/ide.h      | 2 +-
 4 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index fa7a70a3f24c..c470dbb155ca 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -562,11 +562,12 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 	return ide_started;
 }
 
-ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout)
+ide_startstop_t ide_issue_pc(ide_drive_t *drive)
 {
 	struct ide_atapi_pc *pc;
 	ide_hwif_t *hwif = drive->hwif;
 	ide_expiry_t *expiry = NULL;
+	unsigned int timeout;
 	u32 tf_flags;
 	u16 bcount;
 
@@ -574,6 +575,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout)
 		tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL;
 		bcount = ide_cd_get_xferlen(hwif->hwgroup->rq);
 		expiry = ide_cd_expiry;
+		timeout = ATAPI_WAIT_PC;
 
 		if (drive->dma)
 			drive->dma = !hwif->dma_ops->dma_setup(drive);
@@ -600,6 +602,9 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout)
 
 		if (!drive->dma)
 			pc->flags &= ~PC_FLAG_DMA_OK;
+
+		timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD
+						       : WAIT_TAPE_CMD;
 	}
 
 	ide_pktcmd_tf_load(drive, tf_flags, bcount, drive->dma);
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index fdec729d0e49..0a48e2dc53a2 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -197,7 +197,7 @@ static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive,
 
 	pc->retries++;
 
-	return ide_issue_pc(drive, WAIT_FLOPPY_CMD);
+	return ide_issue_pc(drive);
 }
 
 void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *pc)
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index ac9e29a4991f..5d2aa22cd6e4 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -694,7 +694,7 @@ static ide_startstop_t idetape_issue_pc(ide_drive_t *drive,
 
 	pc->retries++;
 
-	return ide_issue_pc(drive, WAIT_TAPE_CMD);
+	return ide_issue_pc(drive);
 }
 
 /* A mode sense command is used to "sense" tape parameters. */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index ad57a4492941..db5ef8ae1ab9 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1250,7 +1250,7 @@ int ide_cd_expiry(ide_drive_t *);
 
 int ide_cd_get_xferlen(struct request *);
 
-ide_startstop_t ide_issue_pc(ide_drive_t *, unsigned int);
+ide_startstop_t ide_issue_pc(ide_drive_t *);
 
 ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *);
 
-- 
cgit v1.2.3


From f153b82121b0366fe0e5f9553545cce237335175 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 2 Jan 2009 09:23:03 -0800
Subject: Sanitize gcc version header includes

 - include the gcc version-dependent header files from the generic gcc
   header file, rather than the other way around (iow: don't make the
   non-gcc header file have to know about gcc versions)

 - don't include compiler-gcc4.h for gcc 5 (for whenever it gets
   released).  That's just confusing and made us do odd things in the
   gcc4 header file (testing that we really had version 4!)

 - generate the name from the __GNUC__ version directly, rather than
   having a mess of #if conditionals.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h  | 5 +++++
 include/linux/compiler-gcc3.h | 3 ---
 include/linux/compiler-gcc4.h | 5 +----
 include/linux/compiler.h      | 8 ++------
 4 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 5c8351b859f0..af40f8eb86f0 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -61,3 +61,8 @@
 #define  noinline			__attribute__((noinline))
 #define __attribute_const__		__attribute__((__const__))
 #define __maybe_unused			__attribute__((unused))
+
+#define __gcc_header(x) #x
+#define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h)
+#define gcc_header(x) _gcc_header(x)
+#include gcc_header(__GNUC__)
diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h
index e5eb795f78a1..2befe6513ce4 100644
--- a/include/linux/compiler-gcc3.h
+++ b/include/linux/compiler-gcc3.h
@@ -2,9 +2,6 @@
 #error "Please don't include <linux/compiler-gcc3.h> directly, include <linux/compiler.h> instead."
 #endif
 
-/* These definitions are for GCC v3.x.  */
-#include <linux/compiler-gcc.h>
-
 #if __GNUC_MINOR__ >= 3
 # define __used			__attribute__((__used__))
 #else
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 974f5b7bb205..aa426214331b 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -2,9 +2,6 @@
 #error "Please don't include <linux/compiler-gcc4.h> directly, include <linux/compiler.h> instead."
 #endif
 
-/* These definitions are for GCC v4.x.  */
-#include <linux/compiler-gcc.h>
-
 #define __used			__attribute__((__used__))
 #define __must_check 		__attribute__((warn_unused_result))
 #define __compiler_offsetof(a,b) __builtin_offsetof(a,b)
@@ -16,7 +13,7 @@
  */
 #define uninitialized_var(x) x = x
 
-#if !(__GNUC__ == 4 && __GNUC_MINOR__ < 3)
+#if __GNUC_MINOR__ >= 3
 /* Mark functions as cold. gcc will assume any path leading to a call
    to them will be unlikely.  This means a lot of manual unlikely()s
    are unnecessary now for any paths leading to the usual suspects
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index ea7c6be354b7..d95da1020f1c 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -36,12 +36,8 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 
 #ifdef __KERNEL__
 
-#if __GNUC__ >= 4
-# include <linux/compiler-gcc4.h>
-#elif __GNUC__ == 3 && __GNUC_MINOR__ >= 2
-# include <linux/compiler-gcc3.h>
-#else
-# error Sorry, your compiler is too old/not recognized.
+#ifdef __GNUC__
+#include <linux/compiler-gcc.h>
 #endif
 
 #define notrace __attribute__((no_instrument_function))
-- 
cgit v1.2.3


From f9d14250071eda9972e4c9cea745a11185952114 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 2 Jan 2009 09:29:43 -0800
Subject: Disallow gcc versions 4.1.{0,1}

These compiler versions are known to miscompile __weak functions and
thus generate kernels that don't necessarily work correctly.  If a weak
function is int he same compilation unit as a caller, gcc may end up
inlining it, and thus binding the weak function too early.

See

    http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27781

for details.

Cc: Adrian Bunk <bunk@kernel.org>
Cc: Helge Deller <deller@gmx.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc4.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index aa426214331b..09992718f9e8 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -2,6 +2,11 @@
 #error "Please don't include <linux/compiler-gcc4.h> directly, include <linux/compiler.h> instead."
 #endif
 
+/* GCC 4.1.[01] miscompiles __weak */
+#if __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ <= 1
+# error Your version of gcc miscompiles the __weak directive
+#endif
+
 #define __used			__attribute__((__used__))
 #define __must_check 		__attribute__((warn_unused_result))
 #define __compiler_offsetof(a,b) __builtin_offsetof(a,b)
-- 
cgit v1.2.3


From a88a69c91256418c5907c2f1f8a0ec0a36f9e6cc Mon Sep 17 00:00:00 2001
From: Joe Peterson <joe@skyrush.com>
Date: Fri, 2 Jan 2009 13:40:53 +0000
Subject: n_tty: Fix loss of echoed characters and remove bkl from n_tty

Fixes the loss of echoed (and other ldisc-generated characters) when
the tty is stopped or when the driver output buffer is full (happens
frequently for input during continuous program output, such as ^C)
and removes the Big Kernel Lock from the N_TTY line discipline.

Adds an "echo buffer" to the N_TTY line discipline that handles all
ldisc-generated output (including echoed characters).  Along with the
loss of characters, this also fixes the associated loss of sync between
tty output and the ldisc state when characters cannot be immediately
written to the tty driver.

The echo buffer stores (in addition to characters) state operations that need
to be done at the time of character output (like management of the column
position).  This allows echo to cooperate correctly with program output,
since the ldisc state remains consistent with actual characters written.

Since the echo buffer code now isolates the tty column state code
to the process_out* and process_echoes functions, we can remove the
Big Kernel Lock (BKL) and replace it with mutex locks.

Highlights are:

* Handles echo (and other ldisc output) when tty driver buffer is full
  - continuous program output can block echo
* Saves echo when tty is in stopped state (e.g. ^S)
  - (e.g.: ^Q will correctly cause held characters to be released for output)
* Control character pairs (e.g. "^C") are treated atomically and not
  split up by interleaved program output
* Line discipline state is kept consistent with characters sent to
  the tty driver
* Remove the big kernel lock (BKL) from N_TTY line discipline

Signed-off-by: Joe Peterson <joe@skyrush.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/n_tty.c  | 736 +++++++++++++++++++++++++++++++++++++++-----------
 drivers/char/tty_io.c |   6 +-
 drivers/char/vt.c     |   2 +-
 include/linux/tty.h   |   6 +
 4 files changed, 594 insertions(+), 156 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index efbfe9612658..a9bc5764fe75 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -62,6 +62,17 @@
 #define TTY_THRESHOLD_THROTTLE		128 /* now based on remaining room */
 #define TTY_THRESHOLD_UNTHROTTLE 	128
 
+/*
+ * Special byte codes used in the echo buffer to represent operations
+ * or special handling of characters.  Bytes in the echo buffer that
+ * are not part of such special blocks are treated as normal character
+ * codes.
+ */
+#define ECHO_OP_START 0xff
+#define ECHO_OP_MOVE_BACK_COL 0x80
+#define ECHO_OP_SET_CANON_COL 0x81
+#define ECHO_OP_ERASE_TAB 0x82
+
 static inline unsigned char *alloc_buf(void)
 {
 	gfp_t prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
@@ -169,6 +180,7 @@ static void check_unthrottle(struct tty_struct *tty)
  *
  *	Locking: tty_read_lock for read fields.
  */
+
 static void reset_buffer_flags(struct tty_struct *tty)
 {
 	unsigned long flags;
@@ -176,6 +188,11 @@ static void reset_buffer_flags(struct tty_struct *tty)
 	spin_lock_irqsave(&tty->read_lock, flags);
 	tty->read_head = tty->read_tail = tty->read_cnt = 0;
 	spin_unlock_irqrestore(&tty->read_lock, flags);
+
+	mutex_lock(&tty->echo_lock);
+	tty->echo_pos = tty->echo_cnt = tty->echo_overrun = 0;
+	mutex_unlock(&tty->echo_lock);
+
 	tty->canon_head = tty->canon_data = tty->erasing = 0;
 	memset(&tty->read_flags, 0, sizeof tty->read_flags);
 	n_tty_set_room(tty);
@@ -266,89 +283,116 @@ static inline int is_continuation(unsigned char c, struct tty_struct *tty)
 }
 
 /**
- *	opost			-	output post processor
+ *	do_output_char			-	output one character
  *	@c: character (or partial unicode symbol)
  *	@tty: terminal device
+ *	@space: space available in tty driver write buffer
  *
- *	Perform OPOST processing.  Returns -1 when the output device is
- *	full and the character must be retried. Note that Linux currently
- *	ignores TABDLY, CRDLY, VTDLY, FFDLY and NLDLY. They simply aren't
- *	relevant in the world today. If you ever need them, add them here.
+ *	This is a helper function that handles one output character
+ *	(including special characters like TAB, CR, LF, etc.),
+ *	putting the results in the tty driver's write buffer.
+ *
+ *	Note that Linux currently ignores TABDLY, CRDLY, VTDLY, FFDLY
+ *	and NLDLY.  They simply aren't relevant in the world today.
+ *	If you ever need them, add them here.
  *
- *	Called from both the receive and transmit sides and can be called
- *	re-entrantly. Relies on lock_kernel() for tty->column state.
+ *	Returns the number of bytes of buffer space used or -1 if
+ *	no space left.
+ *
+ *	Locking: should be called under the output_lock to protect
+ *		 the column state and space left in the buffer
  */
 
-static int opost(unsigned char c, struct tty_struct *tty)
+static int do_output_char(unsigned char c, struct tty_struct *tty, int space)
 {
-	int	space, spaces;
+	int	spaces;
 
-	space = tty_write_room(tty);
 	if (!space)
 		return -1;
-
-	lock_kernel();
-	if (O_OPOST(tty)) {
-		switch (c) {
-		case '\n':
-			if (O_ONLRET(tty))
-				tty->column = 0;
-			if (O_ONLCR(tty)) {
-				if (space < 2) {
-					unlock_kernel();
-					return -1;
-				}
-				tty_put_char(tty, '\r');
-				tty->column = 0;
-			}
-			tty->canon_column = tty->column;
-			break;
-		case '\r':
-			if (O_ONOCR(tty) && tty->column == 0) {
-				unlock_kernel();
-				return 0;
-			}
-			if (O_OCRNL(tty)) {
-				c = '\n';
-				if (O_ONLRET(tty))
-					tty->canon_column = tty->column = 0;
-				break;
-			}
+	
+	switch (c) {
+	case '\n':
+		if (O_ONLRET(tty))
+			tty->column = 0;
+		if (O_ONLCR(tty)) {
+			if (space < 2)
+				return -1;
 			tty->canon_column = tty->column = 0;
+			tty_put_char(tty, '\r');
+			tty_put_char(tty, c);
+			return 2;
+		}
+		tty->canon_column = tty->column;
+		break;
+	case '\r':
+		if (O_ONOCR(tty) && tty->column == 0)
+			return 0;
+		if (O_OCRNL(tty)) {
+			c = '\n';
+			if (O_ONLRET(tty))
+				tty->canon_column = tty->column = 0;
 			break;
-		case '\t':
-			spaces = 8 - (tty->column & 7);
-			if (O_TABDLY(tty) == XTABS) {
-				if (space < spaces) {
-					unlock_kernel();
-					return -1;
-				}
-				tty->column += spaces;
-				tty->ops->write(tty, "        ", spaces);
-				unlock_kernel();
-				return 0;
-			}
+		}
+		tty->canon_column = tty->column = 0;
+		break;
+	case '\t':
+		spaces = 8 - (tty->column & 7);
+		if (O_TABDLY(tty) == XTABS) {
+			if (space < spaces)
+				return -1;
 			tty->column += spaces;
-			break;
-		case '\b':
-			if (tty->column > 0)
-				tty->column--;
-			break;
-		default:
-			if (O_OLCUC(tty))
-				c = toupper(c);
-			if (!iscntrl(c) && !is_continuation(c, tty))
-				tty->column++;
-			break;
+			tty->ops->write(tty, "        ", spaces);
+			return spaces;
 		}
+		tty->column += spaces;
+		break;
+	case '\b':
+		if (tty->column > 0)
+			tty->column--;
+		break;
+	default:
+		if (O_OLCUC(tty))
+			c = toupper(c);
+		if (!iscntrl(c) && !is_continuation(c, tty))
+			tty->column++;
+		break;
 	}
+
 	tty_put_char(tty, c);
-	unlock_kernel();
-	return 0;
+	return 1;
+}
+
+/**
+ *	process_output			-	output post processor
+ *	@c: character (or partial unicode symbol)
+ *	@tty: terminal device
+ *
+ *	Perform OPOST processing.  Returns -1 when the output device is
+ *	full and the character must be retried.
+ *
+ *	Locking: output_lock to protect column state and space left
+ *		 (also, this is called from n_tty_write under the
+ *		  tty layer write lock)
+ */
+
+static int process_output(unsigned char c, struct tty_struct *tty)
+{
+	int	space, retval;
+
+	mutex_lock(&tty->output_lock);
+
+	space = tty_write_room(tty);
+	retval = do_output_char(c, tty, space);
+
+	mutex_unlock(&tty->output_lock);
+	if (retval < 0)
+		return -1;
+	else
+		return 0;
 }
 
 /**
- *	opost_block		-	block postprocess
+ *	process_output_block		-	block post processor
  *	@tty: terminal device
  *	@inbuf: user buffer
  *	@nr: number of bytes
@@ -358,24 +402,29 @@ static int opost(unsigned char c, struct tty_struct *tty)
  *	the simple cases normally found and helps to generate blocks of
  *	symbols for the console driver and thus improve performance.
  *
- *	Called from n_tty_write under the tty layer write lock. Relies
- *	on lock_kernel for the tty->column state.
+ *	Locking: output_lock to protect column state and space left
+ *		 (also, this is called from n_tty_write under the
+ *		  tty layer write lock)
  */
 
-static ssize_t opost_block(struct tty_struct *tty,
-		       const unsigned char *buf, unsigned int nr)
+static ssize_t process_output_block(struct tty_struct *tty,
+				    const unsigned char *buf, unsigned int nr)
 {
 	int	space;
 	int 	i;
 	const unsigned char *cp;
 
+	mutex_lock(&tty->output_lock);
+
 	space = tty_write_room(tty);
 	if (!space)
+	{
+		mutex_unlock(&tty->output_lock);
 		return 0;
+	}
 	if (nr > space)
 		nr = space;
 
-	lock_kernel();
 	for (i = 0, cp = buf; i < nr; i++, cp++) {
 		switch (*cp) {
 		case '\n':
@@ -407,46 +456,393 @@ static ssize_t opost_block(struct tty_struct *tty,
 		}
 	}
 break_out:
-	if (tty->ops->flush_chars)
-		tty->ops->flush_chars(tty);
 	i = tty->ops->write(tty, buf, i);
-	unlock_kernel();
+
+	mutex_unlock(&tty->output_lock);
 	return i;
 }
 
+/**
+ *	process_echoes	-	write pending echo characters
+ *	@tty: terminal device
+ *
+ *	Write previously buffered echo (and other ldisc-generated)
+ *	characters to the tty.
+ *
+ *	Characters generated by the ldisc (including echoes) need to
+ *	be buffered because the driver's write buffer can fill during
+ *	heavy program output.  Echoing straight to the driver will
+ *	often fail under these conditions, causing lost characters and
+ *	resulting mismatches of ldisc state information.
+ *
+ *	Since the ldisc state must represent the characters actually sent
+ *	to the driver at the time of the write, operations like certain
+ *	changes in column state are also saved in the buffer and executed
+ *	here.
+ *
+ *	A circular fifo buffer is used so that the most recent characters
+ *	are prioritized.  Also, when control characters are echoed with a
+ *	prefixed "^", the pair is treated atomically and thus not separated.
+ *
+ *	Locking: output_lock to protect column state and space left,
+ *		 echo_lock to protect the echo buffer
+ */
+
+static void process_echoes(struct tty_struct *tty)
+{
+	int	space, nr;
+	unsigned char c;
+	unsigned char *cp, *buf_end;
+
+	if (!tty->echo_cnt)
+		return;
+
+	mutex_lock(&tty->output_lock);
+	mutex_lock(&tty->echo_lock);
+
+	space = tty_write_room(tty);
+
+	buf_end = tty->echo_buf + N_TTY_BUF_SIZE;
+	cp = tty->echo_buf + tty->echo_pos;
+	nr = tty->echo_cnt;
+	while (nr > 0) {
+		c = *cp;
+		if (c == ECHO_OP_START) {
+			unsigned char op;
+			unsigned char *opp;
+			int no_space_left = 0;
+
+			/*
+			 * If the buffer byte is the start of a multi-byte
+			 * operation, get the next byte, which is either the
+			 * op code or a control character value.
+			 */
+			opp = cp + 1;
+			if (opp == buf_end)
+				opp -= N_TTY_BUF_SIZE;
+			op = *opp;
+			
+			switch (op) {
+				unsigned int num_chars, num_bs;
+
+			case ECHO_OP_ERASE_TAB:
+				if (++opp == buf_end)
+					opp -= N_TTY_BUF_SIZE;
+				num_chars = *opp;
+
+				/*
+				 * Determine how many columns to go back
+				 * in order to erase the tab.
+				 * This depends on the number of columns
+				 * used by other characters within the tab
+				 * area.  If this (modulo 8) count is from
+				 * the start of input rather than from a
+				 * previous tab, we offset by canon column.
+				 * Otherwise, tab spacing is normal.
+				 */
+				if (!(num_chars & 0x80))
+					num_chars += tty->canon_column;
+				num_bs = 8 - (num_chars & 7);
+
+				if (num_bs > space) {
+					no_space_left = 1;
+					break;
+				}
+				space -= num_bs;
+				while (num_bs--) {
+					tty_put_char(tty, '\b');
+					if (tty->column > 0)
+						tty->column--;
+				}
+				cp += 3;
+				nr -= 3;
+				break;
+
+			case ECHO_OP_SET_CANON_COL:
+				tty->canon_column = tty->column;
+				cp += 2;
+				nr -= 2;
+				break;
+
+			case ECHO_OP_MOVE_BACK_COL:
+				if (tty->column > 0)
+					tty->column--;
+				cp += 2;
+				nr -= 2;
+				break;
+
+			case ECHO_OP_START:
+				/* This is an escaped echo op start code */
+				if (!space) {
+					no_space_left = 1;
+					break;
+				}
+				tty_put_char(tty, ECHO_OP_START);
+				tty->column++;
+				space--;
+				cp += 2;
+				nr -= 2;
+				break;
+
+			default:
+				if (iscntrl(op)) {
+					if (L_ECHOCTL(tty)) {
+						/*
+						 * Ensure there is enough space
+						 * for the whole ctrl pair.
+						 */
+						if (space < 2) {
+							no_space_left = 1;
+							break;
+						}
+						tty_put_char(tty, '^');
+						tty_put_char(tty, op ^ 0100);
+						tty->column += 2;
+						space -= 2;
+					} else {
+						if (!space) {
+							no_space_left = 1;
+							break;
+						}
+						tty_put_char(tty, op);
+						space--;
+					}
+				}
+				/*
+				 * If above falls through, this was an
+				 * undefined op.
+				 */
+				cp += 2;
+				nr -= 2;
+			}
+
+			if (no_space_left)
+				break;
+		} else {
+			int retval;
+
+			if ((retval = do_output_char(c, tty, space)) < 0)
+				break;
+			space -= retval;
+			cp += 1;
+			nr -= 1;
+		}
+
+		/* When end of circular buffer reached, wrap around */
+		if (cp >= buf_end)
+			cp -= N_TTY_BUF_SIZE;
+	}
+
+	if (nr == 0) {
+		tty->echo_pos = 0;
+		tty->echo_cnt = 0;
+		tty->echo_overrun = 0;
+	} else {
+		int num_processed = tty->echo_cnt - nr;
+		tty->echo_pos += num_processed;
+		tty->echo_pos &= N_TTY_BUF_SIZE - 1;
+		tty->echo_cnt = nr;
+		if (num_processed > 0)
+			tty->echo_overrun = 0;
+	}
+
+	mutex_unlock(&tty->echo_lock);
+	mutex_unlock(&tty->output_lock);
+
+	if (tty->ops->flush_chars)
+		tty->ops->flush_chars(tty);
+}
+
+/**
+ *	add_echo_byte	-	add a byte to the echo buffer
+ *	@c: unicode byte to echo
+ *	@tty: terminal device
+ *
+ *	Add a character or operation byte to the echo buffer.
+ *
+ *	Should be called under the echo lock to protect the echo buffer.
+ */
+
+static void add_echo_byte(unsigned char c, struct tty_struct *tty)
+{
+	int	new_byte_pos;
+
+	if (tty->echo_cnt == N_TTY_BUF_SIZE) {
+		/* Circular buffer is already at capacity */
+		new_byte_pos = tty->echo_pos;
+
+		/*
+		 * Since the buffer start position needs to be advanced,
+		 * be sure to step by a whole operation byte group.
+		 */
+		if (tty->echo_buf[tty->echo_pos] == ECHO_OP_START)
+		{
+			if (tty->echo_buf[(tty->echo_pos + 1) &
+					  (N_TTY_BUF_SIZE - 1)] ==
+						ECHO_OP_ERASE_TAB) {
+				tty->echo_pos += 3;
+				tty->echo_cnt -= 2;
+			} else {
+				tty->echo_pos += 2;
+				tty->echo_cnt -= 1;
+			}
+		} else {
+			tty->echo_pos++;
+		}
+		tty->echo_pos &= N_TTY_BUF_SIZE - 1;
+
+		tty->echo_overrun = 1;
+	} else {
+		new_byte_pos = tty->echo_pos + tty->echo_cnt;
+		new_byte_pos &= N_TTY_BUF_SIZE - 1;
+		tty->echo_cnt++;
+	}
+
+	tty->echo_buf[new_byte_pos] = c;
+}
+
+/**
+ *	echo_move_back_col	-	add operation to move back a column
+ *	@tty: terminal device
+ *
+ *	Add an operation to the echo buffer to move back one column.
+ *
+ *	Locking: echo_lock to protect the echo buffer
+ */
+
+static void echo_move_back_col(struct tty_struct *tty)
+{
+	mutex_lock(&tty->echo_lock);
+
+	add_echo_byte(ECHO_OP_START, tty);
+	add_echo_byte(ECHO_OP_MOVE_BACK_COL, tty);
+
+	mutex_unlock(&tty->echo_lock);
+}
+
+/**
+ *	echo_set_canon_col	-	add operation to set the canon column
+ *	@tty: terminal device
+ *
+ *	Add an operation to the echo buffer to set the canon column
+ *	to the current column.
+ *
+ *	Locking: echo_lock to protect the echo buffer
+ */
+
+static void echo_set_canon_col(struct tty_struct *tty)
+{
+	mutex_lock(&tty->echo_lock);
+
+	add_echo_byte(ECHO_OP_START, tty);
+	add_echo_byte(ECHO_OP_SET_CANON_COL, tty);
+
+	mutex_unlock(&tty->echo_lock);
+}
+
+/**
+ *	echo_erase_tab	-	add operation to erase a tab
+ *	@num_chars: number of character columns already used
+ *	@after_tab: true if num_chars starts after a previous tab
+ *	@tty: terminal device
+ *
+ *	Add an operation to the echo buffer to erase a tab.
+ *
+ *	Called by the eraser function, which knows how many character
+ *	columns have been used since either a previous tab or the start
+ *	of input.  This information will be used later, along with
+ *	canon column (if applicable), to go back the correct number
+ *	of columns.
+ *
+ *	Locking: echo_lock to protect the echo buffer
+ */
+
+static void echo_erase_tab(unsigned int num_chars, int after_tab,
+			   struct tty_struct *tty)
+{
+	mutex_lock(&tty->echo_lock);
+
+	add_echo_byte(ECHO_OP_START, tty);
+	add_echo_byte(ECHO_OP_ERASE_TAB, tty);
+
+	/* We only need to know this modulo 8 (tab spacing) */
+	num_chars &= 7;
+
+	/* Set the high bit as a flag if num_chars is after a previous tab */
+	if (after_tab)
+		num_chars |= 0x80;
+	
+	add_echo_byte(num_chars, tty);
+
+	mutex_unlock(&tty->echo_lock);
+}
+
+/**
+ *	echo_char_raw	-	echo a character raw
+ *	@c: unicode byte to echo
+ *	@tty: terminal device
+ *
+ *	Echo user input back onto the screen. This must be called only when
+ *	L_ECHO(tty) is true. Called from the driver receive_buf path.
+ *
+ *	This variant does not treat control characters specially.
+ *
+ *	Locking: echo_lock to protect the echo buffer
+ */
+
+static void echo_char_raw(unsigned char c, struct tty_struct *tty)
+{
+	mutex_lock(&tty->echo_lock);
+
+	if (c == ECHO_OP_START) {
+		add_echo_byte(ECHO_OP_START, tty);
+		add_echo_byte(ECHO_OP_START, tty);
+	} else {
+		add_echo_byte(c, tty);
+	}
+
+	mutex_unlock(&tty->echo_lock);
+}
 
 /**
- *	echo_char	-	echo characters
+ *	echo_char	-	echo a character
  *	@c: unicode byte to echo
  *	@tty: terminal device
  *
  *	Echo user input back onto the screen. This must be called only when
  *	L_ECHO(tty) is true. Called from the driver receive_buf path.
  *
- *	Relies on BKL for tty column locking
+ *	This variant tags control characters to be possibly echoed as
+ *	as "^X" (where X is the letter representing the control char).
+ *
+ *	Locking: echo_lock to protect the echo buffer
  */
 
 static void echo_char(unsigned char c, struct tty_struct *tty)
 {
-	if (L_ECHOCTL(tty) && iscntrl(c) && c != '\t') {
-		tty_put_char(tty, '^');
-		tty_put_char(tty, c ^ 0100);
-		tty->column += 2;
-	} else
-		opost(c, tty);
+	mutex_lock(&tty->echo_lock);
+
+	if (c == ECHO_OP_START) {
+		add_echo_byte(ECHO_OP_START, tty);
+		add_echo_byte(ECHO_OP_START, tty);
+	} else {
+		if (iscntrl(c) && c != '\t')
+			add_echo_byte(ECHO_OP_START, tty);
+		add_echo_byte(c, tty);
+	}
+
+	mutex_unlock(&tty->echo_lock);
 }
 
 /**
- *	finsh_erasing		-	complete erase
+ *	finish_erasing		-	complete erase
  *	@tty: tty doing the erase
- *
- *	Relies on BKL for tty column locking
  */
+
 static inline void finish_erasing(struct tty_struct *tty)
 {
 	if (tty->erasing) {
-		tty_put_char(tty, '/');
-		tty->column++;
+		echo_char_raw('/', tty);
 		tty->erasing = 0;
 	}
 }
@@ -460,7 +856,7 @@ static inline void finish_erasing(struct tty_struct *tty)
  *	present in the stream from the driver layer. Handles the complexities
  *	of UTF-8 multibyte symbols.
  *
- *	Locking: read_lock for tty buffers, BKL for column/erasing state
+ *	Locking: read_lock for tty buffers
  */
 
 static void eraser(unsigned char c, struct tty_struct *tty)
@@ -471,7 +867,7 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 
 	/* FIXME: locking needed ? */
 	if (tty->read_head == tty->canon_head) {
-		/* opost('\a', tty); */		/* what do you think? */
+		/* echo_char_raw('\a', tty); */ /* what do you think? */
 		return;
 	}
 	if (c == ERASE_CHAR(tty))
@@ -497,7 +893,7 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 			echo_char(KILL_CHAR(tty), tty);
 			/* Add a newline if ECHOK is on and ECHOKE is off. */
 			if (L_ECHOK(tty))
-				opost('\n', tty);
+				echo_char_raw('\n', tty);
 			return;
 		}
 		kill_type = KILL;
@@ -533,67 +929,62 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 		if (L_ECHO(tty)) {
 			if (L_ECHOPRT(tty)) {
 				if (!tty->erasing) {
-					tty_put_char(tty, '\\');
-					tty->column++;
+					echo_char_raw('\\', tty);
 					tty->erasing = 1;
 				}
 				/* if cnt > 1, output a multi-byte character */
 				echo_char(c, tty);
 				while (--cnt > 0) {
 					head = (head+1) & (N_TTY_BUF_SIZE-1);
-					tty_put_char(tty, tty->read_buf[head]);
+					echo_char_raw(tty->read_buf[head], tty);
+					echo_move_back_col(tty);
 				}
 			} else if (kill_type == ERASE && !L_ECHOE(tty)) {
 				echo_char(ERASE_CHAR(tty), tty);
 			} else if (c == '\t') {
-				unsigned int col = tty->canon_column;
-				unsigned long tail = tty->canon_head;
-
-				/* Find the column of the last char. */
-				while (tail != tty->read_head) {
+				unsigned int num_chars = 0;
+				int after_tab = 0;
+				unsigned long tail = tty->read_head;
+
+				/*
+				 * Count the columns used for characters
+				 * since the start of input or after a
+				 * previous tab.
+				 * This info is used to go back the correct
+				 * number of columns.
+				 */
+				while (tail != tty->canon_head) {
+					tail = (tail-1) & (N_TTY_BUF_SIZE-1);
 					c = tty->read_buf[tail];
-					if (c == '\t')
-						col = (col | 7) + 1;
+					if (c == '\t') {
+						after_tab = 1;
+						break;
+					}
 					else if (iscntrl(c)) {
 						if (L_ECHOCTL(tty))
-							col += 2;
-					} else if (!is_continuation(c, tty))
-						col++;
-					tail = (tail+1) & (N_TTY_BUF_SIZE-1);
-				}
-
-				/* should never happen */
-				if (tty->column > 0x80000000)
-					tty->column = 0;
-
-				/* Now backup to that column. */
-				while (tty->column > col) {
-					/* Can't use opost here. */
-					tty_put_char(tty, '\b');
-					if (tty->column > 0)
-						tty->column--;
+							num_chars += 2;
+					} else if (!is_continuation(c, tty)) {
+						num_chars++;
+					}
 				}
+				echo_erase_tab(num_chars, after_tab, tty);
 			} else {
 				if (iscntrl(c) && L_ECHOCTL(tty)) {
-					tty_put_char(tty, '\b');
-					tty_put_char(tty, ' ');
-					tty_put_char(tty, '\b');
-					if (tty->column > 0)
-						tty->column--;
+					echo_char_raw('\b', tty);
+					echo_char_raw(' ', tty);
+					echo_char_raw('\b', tty);
 				}
 				if (!iscntrl(c) || L_ECHOCTL(tty)) {
-					tty_put_char(tty, '\b');
-					tty_put_char(tty, ' ');
-					tty_put_char(tty, '\b');
-					if (tty->column > 0)
-						tty->column--;
+					echo_char_raw('\b', tty);
+					echo_char_raw(' ', tty);
+					echo_char_raw('\b', tty);
 				}
 			}
 		}
 		if (kill_type == ERASE)
 			break;
 	}
-	if (tty->read_head == tty->canon_head)
+	if (tty->read_head == tty->canon_head && L_ECHO(tty))
 		finish_erasing(tty);
 }
 
@@ -724,14 +1115,18 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 		c=tolower(c);
 
 	if (tty->stopped && !tty->flow_stopped && I_IXON(tty) &&
-	    ((I_IXANY(tty) && c != START_CHAR(tty) && c != STOP_CHAR(tty)) ||
-	     c == INTR_CHAR(tty) || c == QUIT_CHAR(tty) || c == SUSP_CHAR(tty)))
+	    I_IXANY(tty) && c != START_CHAR(tty) && c != STOP_CHAR(tty) &&
+	    c != INTR_CHAR(tty) && c != QUIT_CHAR(tty) && c != SUSP_CHAR(tty)) {
 		start_tty(tty);
+		process_echoes(tty);
+	}
 
 	if (tty->closing) {
 		if (I_IXON(tty)) {
-			if (c == START_CHAR(tty))
+			if (c == START_CHAR(tty)) {
 				start_tty(tty);
+				process_echoes(tty);
+			}
 			else if (c == STOP_CHAR(tty))
 				stop_tty(tty);
 		}
@@ -745,17 +1140,20 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 	 * up.
 	 */
 	if (!test_bit(c, tty->process_char_map) || tty->lnext) {
-		finish_erasing(tty);
 		tty->lnext = 0;
 		if (L_ECHO(tty)) {
+			finish_erasing(tty);
 			if (tty->read_cnt >= N_TTY_BUF_SIZE-1) {
-				tty_put_char(tty, '\a'); /* beep if no space */
+				/* beep if no space */
+				echo_char_raw('\a', tty);
+				process_echoes(tty);
 				return;
 			}
 			/* Record the column of first canon char. */
 			if (tty->canon_head == tty->read_head)
-				tty->canon_column = tty->column;
+				echo_set_canon_col(tty);
 			echo_char(c, tty);
+			process_echoes(tty);
 		}
 		if (I_PARMRK(tty) && c == (unsigned char) '\377')
 			put_tty_queue(c, tty);
@@ -766,6 +1164,7 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 	if (I_IXON(tty)) {
 		if (c == START_CHAR(tty)) {
 			start_tty(tty);
+			process_echoes(tty);
 			return;
 		}
 		if (c == STOP_CHAR(tty)) {
@@ -786,7 +1185,6 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c)
 		if (c == SUSP_CHAR(tty)) {
 send_signal:
 			/*
-			 * Echo character, and then send the signal.
 			 * Note that we do not use isig() here because we want
 			 * the order to be:
 			 * 1) flush, 2) echo, 3) signal
@@ -795,8 +1193,12 @@ send_signal:
 				n_tty_flush_buffer(tty);
 				tty_driver_flush_buffer(tty);
 			}
-			if (L_ECHO(tty))
+			if (I_IXON(tty))
+				start_tty(tty);
+			if (L_ECHO(tty)) {
 				echo_char(c, tty);
+				process_echoes(tty);
+			}
 			if (tty->pgrp)
 				kill_pgrp(tty->pgrp, signal, 1);
 			return;
@@ -815,6 +1217,7 @@ send_signal:
 		if (c == ERASE_CHAR(tty) || c == KILL_CHAR(tty) ||
 		    (c == WERASE_CHAR(tty) && L_IEXTEN(tty))) {
 			eraser(c, tty);
+			process_echoes(tty);
 			return;
 		}
 		if (c == LNEXT_CHAR(tty) && L_IEXTEN(tty)) {
@@ -822,8 +1225,9 @@ send_signal:
 			if (L_ECHO(tty)) {
 				finish_erasing(tty);
 				if (L_ECHOCTL(tty)) {
-					tty_put_char(tty, '^');
-					tty_put_char(tty, '\b');
+					echo_char_raw('^', tty);
+					echo_char_raw('\b', tty);
+					process_echoes(tty);
 				}
 			}
 			return;
@@ -834,18 +1238,20 @@ send_signal:
 
 			finish_erasing(tty);
 			echo_char(c, tty);
-			opost('\n', tty);
+			echo_char_raw('\n', tty);
 			while (tail != tty->read_head) {
 				echo_char(tty->read_buf[tail], tty);
 				tail = (tail+1) & (N_TTY_BUF_SIZE-1);
 			}
+			process_echoes(tty);
 			return;
 		}
 		if (c == '\n') {
 			if (L_ECHO(tty) || L_ECHONL(tty)) {
 				if (tty->read_cnt >= N_TTY_BUF_SIZE-1)
-					tty_put_char(tty, '\a');
-				opost('\n', tty);
+					echo_char_raw('\a', tty);
+				echo_char_raw('\n', tty);
+				process_echoes(tty);
 			}
 			goto handle_newline;
 		}
@@ -862,11 +1268,12 @@ send_signal:
 			 */
 			if (L_ECHO(tty)) {
 				if (tty->read_cnt >= N_TTY_BUF_SIZE-1)
-					tty_put_char(tty, '\a');
+					echo_char_raw('\a', tty);
 				/* Record the column of first canon char. */
 				if (tty->canon_head == tty->read_head)
-					tty->canon_column = tty->column;
+					echo_set_canon_col(tty);
 				echo_char(c, tty);
+				process_echoes(tty);
 			}
 			/*
 			 * XXX does PARMRK doubling happen for
@@ -889,20 +1296,23 @@ handle_newline:
 		}
 	}
 
-	finish_erasing(tty);
 	if (L_ECHO(tty)) {
+		finish_erasing(tty);
 		if (tty->read_cnt >= N_TTY_BUF_SIZE-1) {
-			tty_put_char(tty, '\a'); /* beep if no space */
+			/* beep if no space */
+			echo_char_raw('\a', tty);
+			process_echoes(tty);
 			return;
 		}
 		if (c == '\n')
-			opost('\n', tty);
+			echo_char_raw('\n', tty);
 		else {
 			/* Record the column of first canon char. */
 			if (tty->canon_head == tty->read_head)
-				tty->canon_column = tty->column;
+				echo_set_canon_col(tty);
 			echo_char(c, tty);
 		}
+		process_echoes(tty);
 	}
 
 	if (I_PARMRK(tty) && c == (unsigned char) '\377')
@@ -923,6 +1333,9 @@ handle_newline:
 
 static void n_tty_write_wakeup(struct tty_struct *tty)
 {
+	/* Write out any echoed characters that are still pending */
+	process_echoes(tty);
+	
 	if (tty->fasync) {
 		set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
 		kill_fasync(&tty->fasync, SIGIO, POLL_OUT);
@@ -1134,6 +1547,10 @@ static void n_tty_close(struct tty_struct *tty)
 		free_buf(tty->read_buf);
 		tty->read_buf = NULL;
 	}
+	if (tty->echo_buf) {
+		free_buf(tty->echo_buf);
+		tty->echo_buf = NULL;
+	}
 }
 
 /**
@@ -1151,13 +1568,19 @@ static int n_tty_open(struct tty_struct *tty)
 	if (!tty)
 		return -EINVAL;
 
-	/* This one is ugly. Currently a malloc failure here can panic */
+	/* These are ugly. Currently a malloc failure here can panic */
 	if (!tty->read_buf) {
 		tty->read_buf = alloc_buf();
 		if (!tty->read_buf)
 			return -ENOMEM;
 	}
+	if (!tty->echo_buf) {
+		tty->echo_buf = alloc_buf();
+		if (!tty->echo_buf)
+			return -ENOMEM;
+	}
 	memset(tty->read_buf, 0, N_TTY_BUF_SIZE);
+	memset(tty->echo_buf, 0, N_TTY_BUF_SIZE);
 	reset_buffer_flags(tty);
 	tty->column = 0;
 	n_tty_set_termios(tty, NULL);
@@ -1487,16 +1910,23 @@ do_it_again:
  *	@buf: userspace buffer pointer
  *	@nr: size of I/O
  *
- *	Write function of the terminal device. This is serialized with
+ *	Write function of the terminal device.  This is serialized with
  *	respect to other write callers but not to termios changes, reads
- *	and other such events. We must be careful with N_TTY as the receive
- *	code will echo characters, thus calling driver write methods.
+ *	and other such events.  Since the receive code will echo characters,
+ *	thus calling driver write methods, the output_lock is used in
+ *	the output processing functions called here as well as in the
+ *	echo processing function to protect the column state and space
+ *	left in the buffer.
  *
  *	This code must be sure never to sleep through a hangup.
+ *
+ *	Locking: output_lock to protect column state and space left
+ *		 (note that the process_output*() functions take this
+ *		  lock themselves)
  */
 
 static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
-			  const unsigned char *buf, size_t nr)
+			   const unsigned char *buf, size_t nr)
 {
 	const unsigned char *b = buf;
 	DECLARE_WAITQUEUE(wait, current);
@@ -1510,6 +1940,9 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
 			return retval;
 	}
 
+	/* Write out any echoed characters that are still pending */
+	process_echoes(tty);
+	
 	add_wait_queue(&tty->write_wait, &wait);
 	while (1) {
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -1523,7 +1956,7 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
 		}
 		if (O_OPOST(tty) && !(test_bit(TTY_HW_COOK_OUT, &tty->flags))) {
 			while (nr > 0) {
-				ssize_t num = opost_block(tty, b, nr);
+				ssize_t num = process_output_block(tty, b, nr);
 				if (num < 0) {
 					if (num == -EAGAIN)
 						break;
@@ -1535,7 +1968,7 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file,
 				if (nr == 0)
 					break;
 				c = *b;
-				if (opost(c, tty) < 0)
+				if (process_output(c, tty) < 0)
 					break;
 				b++; nr--;
 			}
@@ -1663,4 +2096,3 @@ struct tty_ldisc_ops tty_ldisc_N_TTY = {
 	.receive_buf     = n_tty_receive_buf,
 	.write_wakeup    = n_tty_write_wakeup
 };
-
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index db15f9ba7c0b..d8d240c8a25a 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1111,9 +1111,7 @@ void tty_write_message(struct tty_struct *tty, char *msg)
  *		Locks the line discipline as required
  *		Writes to the tty driver are serialized by the atomic_write_lock
  *	and are then processed in chunks to the device. The line discipline
- *	write method will not be involked in parallel for each device
- *		The line discipline write method is called under the big
- *	kernel lock for historical reasons. New code should not rely on this.
+ *	write method will not be invoked in parallel for each device.
  */
 
 static ssize_t tty_write(struct file *file, const char __user *buf,
@@ -2785,6 +2783,8 @@ void initialize_tty_struct(struct tty_struct *tty,
 	INIT_WORK(&tty->hangup_work, do_tty_hangup);
 	mutex_init(&tty->atomic_read_lock);
 	mutex_init(&tty->atomic_write_lock);
+	mutex_init(&tty->output_lock);
+	mutex_init(&tty->echo_lock);
 	spin_lock_init(&tty->read_lock);
 	spin_lock_init(&tty->ctrl_lock);
 	INIT_LIST_HEAD(&tty->tty_files);
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 008176edbd64..639e126b2bff 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -2679,7 +2679,7 @@ static int con_write_room(struct tty_struct *tty)
 {
 	if (tty->stopped)
 		return 0;
-	return 4096;		/* No limit, really; we're not buffering */
+	return 32768;		/* No limit, really; we're not buffering */
 }
 
 static int con_chars_in_buffer(struct tty_struct *tty)
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 3f4954c55e53..dfc77ded198a 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -253,6 +253,7 @@ struct tty_struct {
 	unsigned int column;
 	unsigned char lnext:1, erasing:1, raw:1, real_raw:1, icanon:1;
 	unsigned char closing:1;
+	unsigned char echo_overrun:1;
 	unsigned short minimum_to_wake;
 	unsigned long overrun_time;
 	int num_overrun;
@@ -262,11 +263,16 @@ struct tty_struct {
 	int read_tail;
 	int read_cnt;
 	unsigned long read_flags[N_TTY_BUF_SIZE/(8*sizeof(unsigned long))];
+	unsigned char *echo_buf;
+	unsigned int echo_pos;
+	unsigned int echo_cnt;
 	int canon_data;
 	unsigned long canon_head;
 	unsigned int canon_column;
 	struct mutex atomic_read_lock;
 	struct mutex atomic_write_lock;
+	struct mutex output_lock;
+	struct mutex echo_lock;
 	unsigned char *write_buf;
 	int write_cnt;
 	spinlock_t read_lock;
-- 
cgit v1.2.3


From fc6f6238226e6d1248e1967eae2bf556eaf3ac17 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:43:17 +0000
Subject: pty: simplify resize

We have special case logic for resizing pty/tty pairs. We also have a per
driver resize method so for the pty case we should use it.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/hvc_console.c |  2 +-
 drivers/char/pty.c         | 54 +++++++++++++++++++++++++++++++++++++++++++++-
 drivers/char/tty_io.c      | 31 ++++++++++----------------
 drivers/char/vt.c          | 14 ++++++------
 include/linux/tty.h        |  3 +--
 include/linux/tty_driver.h |  6 ++----
 6 files changed, 74 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c
index 0587b66d6fc7..5a8a4c28c867 100644
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -529,7 +529,7 @@ static void hvc_set_winsz(struct work_struct *work)
 	tty = tty_kref_get(hp->tty);
 	spin_unlock_irqrestore(&hp->lock, hvc_flags);
 
-	tty_do_resize(tty, tty, &ws);
+	tty_do_resize(tty, &ws);
 	tty_kref_put(tty);
 }
 
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 6d4582712b1f..b5daaaa9007e 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -230,6 +230,55 @@ static void pty_set_termios(struct tty_struct *tty,
 	tty->termios->c_cflag |= (CS8 | CREAD);
 }
 
+/**
+ *	pty_do_resize		-	resize event
+ *	@tty: tty being resized
+ *	@real_tty: real tty (not the same as tty if using a pty/tty pair)
+ *	@rows: rows (character)
+ *	@cols: cols (character)
+ *
+ *	Update the termios variables and send the neccessary signals to
+ *	peform a terminal resize correctly
+ */
+
+int pty_resize(struct tty_struct *tty,  struct winsize *ws)
+{
+	struct pid *pgrp, *rpgrp;
+	unsigned long flags;
+	struct tty_struct *pty = tty->link;
+
+	/* For a PTY we need to lock the tty side */
+	mutex_lock(&tty->termios_mutex);
+	if (!memcmp(ws, &tty->winsize, sizeof(*ws)))
+		goto done;
+
+	/* Get the PID values and reference them so we can
+	   avoid holding the tty ctrl lock while sending signals.
+	   We need to lock these individually however. */
+
+	spin_lock_irqsave(&tty->ctrl_lock, flags);
+	pgrp = get_pid(tty->pgrp);
+	spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+
+	spin_lock_irqsave(&pty->ctrl_lock, flags);
+	rpgrp = get_pid(pty->pgrp);
+	spin_unlock_irqrestore(&pty->ctrl_lock, flags);
+
+	if (pgrp)
+		kill_pgrp(pgrp, SIGWINCH, 1);
+	if (rpgrp != pgrp && rpgrp)
+		kill_pgrp(rpgrp, SIGWINCH, 1);
+
+	put_pid(pgrp);
+	put_pid(rpgrp);
+
+	tty->winsize = *ws;
+	pty->winsize = *ws;	/* Never used so will go away soon */
+done:
+	mutex_unlock(&tty->termios_mutex);
+	return 0;
+}
+
 static int pty_install(struct tty_driver *driver, struct tty_struct *tty)
 {
 	struct tty_struct *o_tty;
@@ -290,6 +339,7 @@ static const struct tty_operations pty_ops = {
 	.chars_in_buffer = pty_chars_in_buffer,
 	.unthrottle = pty_unthrottle,
 	.set_termios = pty_set_termios,
+	.resize = pty_resize
 };
 
 /* Traditional BSD devices */
@@ -319,6 +369,7 @@ static const struct tty_operations pty_ops_bsd = {
 	.unthrottle = pty_unthrottle,
 	.set_termios = pty_set_termios,
 	.ioctl = pty_bsd_ioctl,
+	.resize = pty_resize
 };
 
 static void __init legacy_pty_init(void)
@@ -561,7 +612,8 @@ static const struct tty_operations ptm_unix98_ops = {
 	.unthrottle = pty_unthrottle,
 	.set_termios = pty_set_termios,
 	.ioctl = pty_unix98_ioctl,
-	.shutdown = pty_unix98_shutdown
+	.shutdown = pty_unix98_shutdown,
+	.resize = pty_resize
 };
 
 static const struct tty_operations pty_unix98_ops = {
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 2a15af65dd11..d33e5ab06177 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -2048,7 +2048,6 @@ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user *arg)
 /**
  *	tty_do_resize		-	resize event
  *	@tty: tty being resized
- *	@real_tty: real tty (not the same as tty if using a pty/tty pair)
  *	@rows: rows (character)
  *	@cols: cols (character)
  *
@@ -2056,41 +2055,34 @@ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user *arg)
  *	peform a terminal resize correctly
  */
 
-int tty_do_resize(struct tty_struct *tty, struct tty_struct *real_tty,
-					struct winsize *ws)
+int tty_do_resize(struct tty_struct *tty, struct winsize *ws)
 {
-	struct pid *pgrp, *rpgrp;
+	struct pid *pgrp;
 	unsigned long flags;
 
-	/* For a PTY we need to lock the tty side */
-	mutex_lock(&real_tty->termios_mutex);
-	if (!memcmp(ws, &real_tty->winsize, sizeof(*ws)))
+	/* Lock the tty */
+	mutex_lock(&tty->termios_mutex);
+	if (!memcmp(ws, &tty->winsize, sizeof(*ws)))
 		goto done;
 	/* Get the PID values and reference them so we can
 	   avoid holding the tty ctrl lock while sending signals */
 	spin_lock_irqsave(&tty->ctrl_lock, flags);
 	pgrp = get_pid(tty->pgrp);
-	rpgrp = get_pid(real_tty->pgrp);
 	spin_unlock_irqrestore(&tty->ctrl_lock, flags);
 
 	if (pgrp)
 		kill_pgrp(pgrp, SIGWINCH, 1);
-	if (rpgrp != pgrp && rpgrp)
-		kill_pgrp(rpgrp, SIGWINCH, 1);
-
 	put_pid(pgrp);
-	put_pid(rpgrp);
 
 	tty->winsize = *ws;
-	real_tty->winsize = *ws;
 done:
-	mutex_unlock(&real_tty->termios_mutex);
+	mutex_unlock(&tty->termios_mutex);
 	return 0;
 }
 
 /**
  *	tiocswinsz		-	implement window size set ioctl
- *	@tty; tty
+ *	@tty; tty side of tty
  *	@arg: user buffer for result
  *
  *	Copies the user idea of the window size to the kernel. Traditionally
@@ -2103,17 +2095,16 @@ done:
  *	then calls into the default method.
  */
 
-static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
-	struct winsize __user *arg)
+static int tiocswinsz(struct tty_struct *tty, struct winsize __user *arg)
 {
 	struct winsize tmp_ws;
 	if (copy_from_user(&tmp_ws, arg, sizeof(*arg)))
 		return -EFAULT;
 
 	if (tty->ops->resize)
-		return tty->ops->resize(tty, real_tty, &tmp_ws);
+		return tty->ops->resize(tty, &tmp_ws);
 	else
-		return tty_do_resize(tty, real_tty, &tmp_ws);
+		return tty_do_resize(tty, &tmp_ws);
 }
 
 /**
@@ -2538,7 +2529,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case TIOCGWINSZ:
 		return tiocgwinsz(real_tty, p);
 	case TIOCSWINSZ:
-		return tiocswinsz(tty, real_tty, p);
+		return tiocswinsz(real_tty, p);
 	case TIOCCONS:
 		return real_tty != tty ? -EINVAL : tioccons(file);
 	case FIONBIO:
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 639e126b2bff..80014213fb53 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -819,8 +819,8 @@ static inline int resize_screen(struct vc_data *vc, int width, int height,
  *	ctrl_lock of the tty IFF a tty is passed.
  */
 
-static int vc_do_resize(struct tty_struct *tty, struct tty_struct *real_tty,
-		struct vc_data *vc, unsigned int cols, unsigned int lines)
+static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
+				unsigned int cols, unsigned int lines)
 {
 	unsigned long old_origin, new_origin, new_scr_end, rlth, rrem, err = 0;
 	unsigned int old_cols, old_rows, old_row_size, old_screen_size;
@@ -932,7 +932,7 @@ static int vc_do_resize(struct tty_struct *tty, struct tty_struct *real_tty,
 		ws.ws_row = vc->vc_rows;
 		ws.ws_col = vc->vc_cols;
 		ws.ws_ypixel = vc->vc_scan_lines;
-		tty_do_resize(tty, real_tty, &ws);
+		tty_do_resize(tty, &ws);
 	}
 
 	if (CON_IS_VISIBLE(vc))
@@ -954,13 +954,12 @@ static int vc_do_resize(struct tty_struct *tty, struct tty_struct *real_tty,
 
 int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int rows)
 {
-	return vc_do_resize(vc->vc_tty, vc->vc_tty, vc, cols, rows);
+	return vc_do_resize(vc->vc_tty, vc, cols, rows);
 }
 
 /**
  *	vt_resize		-	resize a VT
  *	@tty: tty to resize
- *	@real_tty: tty if a pty/tty pair
  *	@ws: winsize attributes
  *
  *	Resize a virtual terminal. This is called by the tty layer as we
@@ -971,14 +970,13 @@ int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int rows)
  *	termios_mutex and the tty ctrl_lock in that order.
  */
 
-int vt_resize(struct tty_struct *tty, struct tty_struct *real_tty,
-	struct winsize *ws)
+int vt_resize(struct tty_struct *tty, struct winsize *ws)
 {
 	struct vc_data *vc = tty->driver_data;
 	int ret;
 
 	acquire_console_sem();
-	ret = vc_do_resize(tty, real_tty, vc, ws->ws_col, ws->ws_row);
+	ret = vc_do_resize(tty, vc, ws->ws_col, ws->ws_row);
 	release_console_sem();
 	return ret;
 }
diff --git a/include/linux/tty.h b/include/linux/tty.h
index dfc77ded198a..f88169787a5f 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -360,8 +360,7 @@ extern int tty_write_room(struct tty_struct *tty);
 extern void tty_driver_flush_buffer(struct tty_struct *tty);
 extern void tty_throttle(struct tty_struct *tty);
 extern void tty_unthrottle(struct tty_struct *tty);
-extern int tty_do_resize(struct tty_struct *tty, struct tty_struct *real_tty,
-						struct winsize *ws);
+extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws);
 extern void tty_shutdown(struct tty_struct *tty);
 extern void tty_free_termios(struct tty_struct *tty);
 extern int is_current_pgrp_orphaned(void);
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 78416b901589..08e088334dba 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -196,8 +196,7 @@
  *	Optional: If not provided then the write method is called under
  *	the atomic write lock to keep it serialized with the ldisc.
  *
- * int (*resize)(struct tty_struct *tty, struct tty_struct *real_tty,
- *				unsigned int rows, unsigned int cols);
+ * int (*resize)(struct tty_struct *tty, struct winsize *ws)
  *
  *	Called when a termios request is issued which changes the
  *	requested terminal geometry.
@@ -258,8 +257,7 @@ struct tty_operations {
 	int (*tiocmget)(struct tty_struct *tty, struct file *file);
 	int (*tiocmset)(struct tty_struct *tty, struct file *file,
 			unsigned int set, unsigned int clear);
-	int (*resize)(struct tty_struct *tty, struct tty_struct *real_tty,
-				struct winsize *ws);
+	int (*resize)(struct tty_struct *tty, struct winsize *ws);
 	int (*set_termiox)(struct tty_struct *tty, struct termiox *tnew);
 #ifdef CONFIG_CONSOLE_POLL
 	int (*poll_init)(struct tty_driver *driver, int line, char *options);
-- 
cgit v1.2.3


From 975a1a7d887048d4afc9201383e11b7af991866b Mon Sep 17 00:00:00 2001
From: Russell King <rmk+lkml@arm.linux.org.uk>
Date: Fri, 2 Jan 2009 13:44:27 +0000
Subject: And here's a patch (to be applied on top of the last) which prevents

this happening again by making use of 'const'.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/8250_pci.c | 37 +++++++++++++++++++++----------------
 include/linux/8250_pci.h  |  2 +-
 2 files changed, 22 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 057b532ccaad..0b794138f686 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -42,7 +42,8 @@ struct pci_serial_quirk {
 	u32	subvendor;
 	u32	subdevice;
 	int	(*init)(struct pci_dev *dev);
-	int	(*setup)(struct serial_private *, struct pciserial_board *,
+	int	(*setup)(struct serial_private *,
+			 const struct pciserial_board *,
 			 struct uart_port *, int);
 	void	(*exit)(struct pci_dev *dev);
 };
@@ -107,7 +108,7 @@ setup_port(struct serial_private *priv, struct uart_port *port,
  * ADDI-DATA GmbH communication cards <info@addi-data.com>
  */
 static int addidata_apci7800_setup(struct serial_private *priv,
-				struct pciserial_board *board,
+				const struct pciserial_board *board,
 				struct uart_port *port, int idx)
 {
 	unsigned int bar = 0, offset = board->first_offset;
@@ -134,7 +135,7 @@ static int addidata_apci7800_setup(struct serial_private *priv,
  * Not that ugly ;) -- HW
  */
 static int
-afavlab_setup(struct serial_private *priv, struct pciserial_board *board,
+afavlab_setup(struct serial_private *priv, const struct pciserial_board *board,
 	      struct uart_port *port, int idx)
 {
 	unsigned int bar, offset = board->first_offset;
@@ -188,8 +189,9 @@ static int pci_hp_diva_init(struct pci_dev *dev)
  * some serial ports are supposed to be hidden on certain models.
  */
 static int
-pci_hp_diva_setup(struct serial_private *priv, struct pciserial_board *board,
-	      struct uart_port *port, int idx)
+pci_hp_diva_setup(struct serial_private *priv,
+		const struct pciserial_board *board,
+		struct uart_port *port, int idx)
 {
 	unsigned int offset = board->first_offset;
 	unsigned int bar = FL_GET_BASE(board->flags);
@@ -306,7 +308,7 @@ static void __devexit pci_plx9050_exit(struct pci_dev *dev)
 
 /* SBS Technologies Inc. PMC-OCTPRO and P-OCTAL cards */
 static int
-sbs_setup(struct serial_private *priv, struct pciserial_board *board,
+sbs_setup(struct serial_private *priv, const struct pciserial_board *board,
 		struct uart_port *port, int idx)
 {
 	unsigned int bar, offset = board->first_offset;
@@ -463,7 +465,7 @@ static int pci_siig_init(struct pci_dev *dev)
 }
 
 static int pci_siig_setup(struct serial_private *priv,
-			  struct pciserial_board *board,
+			  const struct pciserial_board *board,
 			  struct uart_port *port, int idx)
 {
 	unsigned int bar = FL_GET_BASE(board->flags) + idx, offset = 0;
@@ -534,7 +536,8 @@ static int pci_timedia_init(struct pci_dev *dev)
  * Ugh, this is ugly as all hell --- TYT
  */
 static int
-pci_timedia_setup(struct serial_private *priv, struct pciserial_board *board,
+pci_timedia_setup(struct serial_private *priv,
+		  const struct pciserial_board *board,
 		  struct uart_port *port, int idx)
 {
 	unsigned int bar = 0, offset = board->first_offset;
@@ -568,7 +571,7 @@ pci_timedia_setup(struct serial_private *priv, struct pciserial_board *board,
  */
 static int
 titan_400l_800l_setup(struct serial_private *priv,
-		      struct pciserial_board *board,
+		      const struct pciserial_board *board,
 		      struct uart_port *port, int idx)
 {
 	unsigned int bar, offset = board->first_offset;
@@ -770,7 +773,8 @@ static int pci_oxsemi_tornado_init(struct pci_dev *dev)
 }
 
 static int
-pci_default_setup(struct serial_private *priv, struct pciserial_board *board,
+pci_default_setup(struct serial_private *priv,
+		  const struct pciserial_board *board,
 		  struct uart_port *port, int idx)
 {
 	unsigned int bar, offset = board->first_offset, maxnr;
@@ -1099,7 +1103,7 @@ static struct pci_serial_quirk *find_quirk(struct pci_dev *dev)
 }
 
 static inline int get_pci_irq(struct pci_dev *dev,
-				struct pciserial_board *board)
+				const struct pciserial_board *board)
 {
 	if (board->flags & FL_NOIRQ)
 		return 0;
@@ -1894,8 +1898,8 @@ serial_pci_guess_board(struct pci_dev *dev, struct pciserial_board *board)
 }
 
 static inline int
-serial_pci_matches(struct pciserial_board *board,
-		   struct pciserial_board *guessed)
+serial_pci_matches(const struct pciserial_board *board,
+		   const struct pciserial_board *guessed)
 {
 	return
 	    board->num_ports == guessed->num_ports &&
@@ -1906,7 +1910,7 @@ serial_pci_matches(struct pciserial_board *board,
 }
 
 struct serial_private *
-pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board)
+pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board)
 {
 	struct uart_port serial_port;
 	struct serial_private *priv;
@@ -2039,7 +2043,8 @@ static int __devinit
 pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent)
 {
 	struct serial_private *priv;
-	struct pciserial_board *board, tmp;
+	const struct pciserial_board *board;
+	struct pciserial_board tmp;
 	int rc;
 
 	if (ent->driver_data >= ARRAY_SIZE(pci_boards)) {
@@ -2066,7 +2071,7 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent)
 		 * We matched one of our class entries.  Try to
 		 * determine the parameters of this board.
 		 */
-		rc = serial_pci_guess_board(dev, board);
+		rc = serial_pci_guess_board(dev, &tmp);
 		if (rc)
 			goto disable;
 	} else {
diff --git a/include/linux/8250_pci.h b/include/linux/8250_pci.h
index 3209dd46ea7d..b24ff086a662 100644
--- a/include/linux/8250_pci.h
+++ b/include/linux/8250_pci.h
@@ -31,7 +31,7 @@ struct pciserial_board {
 struct serial_private;
 
 struct serial_private *
-pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board);
+pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board);
 void pciserial_remove_ports(struct serial_private *priv);
 void pciserial_suspend_ports(struct serial_private *priv);
 void pciserial_resume_ports(struct serial_private *priv);
-- 
cgit v1.2.3


From c9b3976e3fec266be25c5001a70aa0a890b6c476 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:44:56 +0000
Subject: tty: Fix PPP hang under load

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/tty_ldisc.c | 30 +++++++++++++++++++++---------
 include/linux/tty.h      |  1 +
 2 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c
index f307f135cbfb..7a84b406a952 100644
--- a/drivers/char/tty_ldisc.c
+++ b/drivers/char/tty_ldisc.c
@@ -316,8 +316,7 @@ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty)
 {
 	/* wait_event is a macro */
 	wait_event(tty_ldisc_wait, tty_ldisc_try(tty));
-	if (tty->ldisc.refcount == 0)
-		printk(KERN_ERR "tty_ldisc_ref_wait\n");
+	WARN_ON(tty->ldisc.refcount == 0);
 	return &tty->ldisc;
 }
 
@@ -376,15 +375,17 @@ EXPORT_SYMBOL_GPL(tty_ldisc_deref);
  *	@tty: terminal to activate ldisc on
  *
  *	Set the TTY_LDISC flag when the line discipline can be called
- *	again. Do necessary wakeups for existing sleepers.
+ *	again. Do necessary wakeups for existing sleepers. Clear the LDISC
+ *	changing flag to indicate any ldisc change is now over.
  *
- *	Note: nobody should set this bit except via this function. Clearing
- *	directly is allowed.
+ *	Note: nobody should set the TTY_LDISC bit except via this function.
+ *	Clearing directly is allowed.
  */
 
 void tty_ldisc_enable(struct tty_struct *tty)
 {
 	set_bit(TTY_LDISC, &tty->flags);
+	clear_bit(TTY_LDISC_CHANGING, &tty->flags);
 	wake_up(&tty_ldisc_wait);
 }
 
@@ -496,7 +497,14 @@ restart:
 	 *	reference to the line discipline. The TTY_LDISC bit
 	 *	prevents anyone taking a reference once it is clear.
 	 *	We need the lock to avoid racing reference takers.
+	 *
+	 *	We must clear the TTY_LDISC bit here to avoid a livelock
+	 *	with a userspace app continually trying to use the tty in
+	 *	parallel to the change and re-referencing the tty.
 	 */
+	clear_bit(TTY_LDISC, &tty->flags);
+	if (o_tty)
+		clear_bit(TTY_LDISC, &o_tty->flags);
 
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
 	if (tty->ldisc.refcount || (o_tty && o_tty->ldisc.refcount)) {
@@ -528,7 +536,7 @@ restart:
 	 *	If the TTY_LDISC bit is set, then we are racing against
 	 *	another ldisc change
 	 */
-	if (!test_bit(TTY_LDISC, &tty->flags)) {
+	if (test_bit(TTY_LDISC_CHANGING, &tty->flags)) {
 		struct tty_ldisc *ld;
 		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
 		tty_ldisc_put(new_ldisc.ops);
@@ -536,10 +544,14 @@ restart:
 		tty_ldisc_deref(ld);
 		goto restart;
 	}
-
-	clear_bit(TTY_LDISC, &tty->flags);
+	/*
+	 *	This flag is used to avoid two parallel ldisc changes. Once
+	 *	open and close are fine grained locked this may work better
+	 *	as a mutex shared with the open/close/hup paths
+	 */
+	set_bit(TTY_LDISC_CHANGING, &tty->flags);
 	if (o_tty)
-		clear_bit(TTY_LDISC, &o_tty->flags);
+		set_bit(TTY_LDISC_CHANGING, &o_tty->flags);
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
 	
 	/*
diff --git a/include/linux/tty.h b/include/linux/tty.h
index f88169787a5f..bbbeaef99626 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -301,6 +301,7 @@ struct tty_struct {
 #define TTY_PUSH 		6	/* n_tty private */
 #define TTY_CLOSING 		7	/* ->close() in progress */
 #define TTY_LDISC 		9	/* Line discipline attached */
+#define TTY_LDISC_CHANGING 	10	/* Line discipline changing */
 #define TTY_HW_COOK_OUT 	14	/* Hardware can do output cooking */
 #define TTY_HW_COOK_IN 		15	/* Hardware can do input cooking */
 #define TTY_PTY_LOCK 		16	/* pty private */
-- 
cgit v1.2.3


From 31f35939d1d9bcfb3099b32c67b896d2792603f9 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:45:05 +0000
Subject: tty_port: Add a port level carrier detect operation

This is the first step to generalising the various pieces of waiting logic
duplicated in all sorts of serial drivers.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/esp.c             | 61 +++++++++++++++++++++++----------------
 drivers/char/generic_serial.c  | 43 ++++++++++++++--------------
 drivers/char/isicom.c          | 51 +++++++++++++++++++++------------
 drivers/char/istallion.c       | 28 ++++++++++++------
 drivers/char/moxa.c            | 26 +++++++++++++----
 drivers/char/mxser.c           | 54 +++++++++++++++++++++--------------
 drivers/char/rio/rio_linux.c   | 18 ++++++------
 drivers/char/riscom8.c         | 65 ++++++++++++++++++++++++++----------------
 drivers/char/rocket.c          | 40 +++++++++++++++++---------
 drivers/char/ser_a2232.c       | 19 ++++++------
 drivers/char/stallion.c        | 28 +++++++++++++-----
 drivers/char/sx.c              | 27 +++++++++---------
 drivers/char/synclink.c        | 61 ++++++++++++++++++++++++++-------------
 drivers/char/synclink_gt.c     | 48 ++++++++++++++++++++-----------
 drivers/char/synclinkmp.c      | 55 ++++++++++++++++++++++-------------
 drivers/char/tty_port.c        | 17 +++++++++++
 drivers/char/vme_scc.c         | 15 ++++++----
 include/linux/generic_serial.h |  1 -
 include/linux/tty.h            |  9 ++++++
 19 files changed, 427 insertions(+), 239 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/esp.c b/drivers/char/esp.c
index 7f077c0097f6..45ec263ec012 100644
--- a/drivers/char/esp.c
+++ b/drivers/char/esp.c
@@ -2054,6 +2054,15 @@ static void esp_hangup(struct tty_struct *tty)
 	wake_up_interruptible(&info->port.open_wait);
 }
 
+static int esp_carrier_raised(struct tty_port *port)
+{
+	struct esp_struct *info = container_of(port, struct esp_struct, port);
+	serial_out(info, UART_ESI_CMD1, ESI_GET_UART_STAT);
+	if (serial_in(info, UART_ESI_STAT2) & UART_MSR_DCD)
+		return 1;
+	return 0;
+}
+
 /*
  * ------------------------------------------------------------
  * esp_open() and friends
@@ -2066,17 +2075,19 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	int		retval;
 	int		do_clocal = 0;
 	unsigned long	flags;
+	int		cd;
+	struct tty_port *port = &info->port;
 
 	/*
 	 * If the device is in the middle of being closed, then block
 	 * until it's done, and then try again.
 	 */
 	if (tty_hung_up_p(filp) ||
-	    (info->port.flags & ASYNC_CLOSING)) {
-		if (info->port.flags & ASYNC_CLOSING)
-			interruptible_sleep_on(&info->port.close_wait);
+	    (port->flags & ASYNC_CLOSING)) {
+		if (port->flags & ASYNC_CLOSING)
+			interruptible_sleep_on(&port->close_wait);
 #ifdef SERIAL_DO_RESTART
-		if (info->port.flags & ASYNC_HUP_NOTIFY)
+		if (port->flags & ASYNC_HUP_NOTIFY)
 			return -EAGAIN;
 		else
 			return -ERESTARTSYS;
@@ -2091,7 +2102,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	 */
 	if ((filp->f_flags & O_NONBLOCK) ||
 	    (tty->flags & (1 << TTY_IO_ERROR))) {
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -2101,20 +2112,20 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	/*
 	 * Block waiting for the carrier detect and the line to become
 	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, info->port.count is dropped by one, so that
+	 * this loop, port->count is dropped by one, so that
 	 * rs_close() knows when to free things.  We restore it upon
 	 * exit, either normal or abnormal.
 	 */
 	retval = 0;
-	add_wait_queue(&info->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 #ifdef SERIAL_DEBUG_OPEN
 	printk(KERN_DEBUG "block_til_ready before block: ttys%d, count = %d\n",
-	       info->line, info->port.count);
+	       info->line, port->count);
 #endif
 	spin_lock_irqsave(&info->lock, flags);
 	if (!tty_hung_up_p(filp))
-		info->port.count--;
-	info->port.blocked_open++;
+		port->count--;
+	port->blocked_open++;
 	while (1) {
 		if ((tty->termios->c_cflag & CBAUD)) {
 			unsigned int scratch;
@@ -2129,9 +2140,9 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 		}
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp) ||
-		    !(info->port.flags & ASYNC_INITIALIZED)) {
+		    !(port->flags & ASYNC_INITIALIZED)) {
 #ifdef SERIAL_DO_RESTART
-			if (info->port.flags & ASYNC_HUP_NOTIFY)
+			if (port->flags & ASYNC_HUP_NOTIFY)
 				retval = -EAGAIN;
 			else
 				retval = -ERESTARTSYS;
@@ -2141,11 +2152,9 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 			break;
 		}
 
-		serial_out(info, UART_ESI_CMD1, ESI_GET_UART_STAT);
-		if (serial_in(info, UART_ESI_STAT2) & UART_MSR_DCD)
-			do_clocal = 1;
+		cd = tty_port_carrier_raised(port);
 
-		if (!(info->port.flags & ASYNC_CLOSING) &&
+		if (!(port->flags & ASYNC_CLOSING) &&
 		    (do_clocal))
 			break;
 		if (signal_pending(current)) {
@@ -2154,25 +2163,25 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 		}
 #ifdef SERIAL_DEBUG_OPEN
 		printk(KERN_DEBUG "block_til_ready blocking: ttys%d, count = %d\n",
-		       info->line, info->port.count);
+		       info->line, port->count);
 #endif
 		spin_unlock_irqrestore(&info->lock, flags);
 		schedule();
 		spin_lock_irqsave(&info->lock, flags);
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 	if (!tty_hung_up_p(filp))
-		info->port.count++;
-	info->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 	spin_unlock_irqrestore(&info->lock, flags);
 #ifdef SERIAL_DEBUG_OPEN
 	printk(KERN_DEBUG "block_til_ready after blocking: ttys%d, count = %d\n",
-	       info->line, info->port.count);
+	       info->line, port->count);
 #endif
 	if (retval)
 		return retval;
-	info->port.flags |= ASYNC_NORMAL_ACTIVE;
+	port->flags |= ASYNC_NORMAL_ACTIVE;
 	return 0;
 }
 
@@ -2329,6 +2338,10 @@ static const struct tty_operations esp_ops = {
 	.tiocmset = esp_tiocmset,
 };
 
+static const struct tty_port_operations esp_port_ops = {
+	.esp_carrier_raised,
+};
+
 /*
  * The serial driver boot-time initialization code!
  */
@@ -2415,6 +2428,8 @@ static int __init espserial_init(void)
 	offset = 0;
 
 	do {
+		tty_port_init(&info->port);
+		info->port.ops = &esp_port_ops;
 		info->io_port = esp[i] + offset;
 		info->irq = irq[i];
 		info->line = (i * 8) + (offset / 8);
@@ -2437,8 +2452,6 @@ static int __init espserial_init(void)
 		info->config.flow_off = flow_off;
 		info->config.pio_threshold = pio_threshold;
 		info->next_port = ports;
-		init_waitqueue_head(&info->port.open_wait);
-		init_waitqueue_head(&info->port.close_wait);
 		init_waitqueue_head(&info->delta_msr_wait);
 		init_waitqueue_head(&info->break_wait);
 		ports = info;
diff --git a/drivers/char/generic_serial.c b/drivers/char/generic_serial.c
index c6090f84a2e4..2356994ee010 100644
--- a/drivers/char/generic_serial.c
+++ b/drivers/char/generic_serial.c
@@ -397,7 +397,8 @@ void gs_hangup(struct tty_struct *tty)
 
 int gs_block_til_ready(void *port_, struct file * filp)
 {
-	struct gs_port *port = port_;
+	struct gs_port *gp = port_;
+	struct tty_port *port = &gp->port;
 	DECLARE_WAITQUEUE(wait, current);
 	int    retval;
 	int    do_clocal = 0;
@@ -409,16 +410,16 @@ int gs_block_til_ready(void *port_, struct file * filp)
 
 	if (!port) return 0;
 
-	tty = port->port.tty;
+	tty = port->tty;
 
 	gs_dprintk (GS_DEBUG_BTR, "Entering gs_block_till_ready.\n"); 
 	/*
 	 * If the device is in the middle of being closed, then block
 	 * until it's done, and then try again.
 	 */
-	if (tty_hung_up_p(filp) || port->port.flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&port->port.close_wait);
-		if (port->port.flags & ASYNC_HUP_NOTIFY)
+	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
+		interruptible_sleep_on(&port->close_wait);
+		if (port->flags & ASYNC_HUP_NOTIFY)
 			return -EAGAIN;
 		else
 			return -ERESTARTSYS;
@@ -432,7 +433,7 @@ int gs_block_til_ready(void *port_, struct file * filp)
 	 */
 	if ((filp->f_flags & O_NONBLOCK) ||
 	    (tty->flags & (1 << TTY_IO_ERROR))) {
-		port->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -444,34 +445,34 @@ int gs_block_til_ready(void *port_, struct file * filp)
 	/*
 	 * Block waiting for the carrier detect and the line to become
 	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, port->port.count is dropped by one, so that
+	 * this loop, port->count is dropped by one, so that
 	 * rs_close() knows when to free things.  We restore it upon
 	 * exit, either normal or abnormal.
 	 */
 	retval = 0;
 
-	add_wait_queue(&port->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 
 	gs_dprintk (GS_DEBUG_BTR, "after add waitq.\n"); 
-	spin_lock_irqsave(&port->driver_lock, flags);
+	spin_lock_irqsave(&gp->driver_lock, flags);
 	if (!tty_hung_up_p(filp)) {
-		port->port.count--;
+		port->count--;
 	}
-	spin_unlock_irqrestore(&port->driver_lock, flags);
-	port->port.blocked_open++;
+	spin_unlock_irqrestore(&gp->driver_lock, flags);
+	port->blocked_open++;
 	while (1) {
-		CD = port->rd->get_CD (port);
+		CD = tty_port_carrier_raised(port);
 		gs_dprintk (GS_DEBUG_BTR, "CD is now %d.\n", CD);
 		set_current_state (TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp) ||
-		    !(port->port.flags & ASYNC_INITIALIZED)) {
-			if (port->port.flags & ASYNC_HUP_NOTIFY)
+		    !(port->flags & ASYNC_INITIALIZED)) {
+			if (port->flags & ASYNC_HUP_NOTIFY)
 				retval = -EAGAIN;
 			else
 				retval = -ERESTARTSYS;
 			break;
 		}
-		if (!(port->port.flags & ASYNC_CLOSING) &&
+		if (!(port->flags & ASYNC_CLOSING) &&
 		    (do_clocal || CD))
 			break;
 		gs_dprintk (GS_DEBUG_BTR, "signal_pending is now: %d (%lx)\n", 
@@ -483,17 +484,17 @@ int gs_block_til_ready(void *port_, struct file * filp)
 		schedule();
 	}
 	gs_dprintk (GS_DEBUG_BTR, "Got out of the loop. (%d)\n",
-		    port->port.blocked_open);
+		    port->blocked_open);
 	set_current_state (TASK_RUNNING);
-	remove_wait_queue(&port->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 	if (!tty_hung_up_p(filp)) {
-		port->port.count++;
+		port->count++;
 	}
-	port->port.blocked_open--;
+	port->blocked_open--;
 	if (retval)
 		return retval;
 
-	port->port.flags |= ASYNC_NORMAL_ACTIVE;
+	port->flags |= ASYNC_NORMAL_ACTIVE;
 	func_exit ();
 	return 0;
 }			 
diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index 04e4549299ba..b3da4858fd4a 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -830,20 +830,28 @@ static int isicom_setup_port(struct tty_struct *tty)
 	return 0;
 }
 
+static int isicom_carrier_raised(struct tty_port *port)
+{
+	struct isi_port *ip = container_of(port, struct isi_port, port);
+	return (ip->status & ISI_DCD)?1 : 0;
+}
+
 static int block_til_ready(struct tty_struct *tty, struct file *filp,
-	struct isi_port *port)
+	struct isi_port *ip)
 {
-	struct isi_board *card = port->card;
+	struct isi_board *card = ip->card;
+	struct tty_port *port = &ip->port;
 	int do_clocal = 0, retval;
 	unsigned long flags;
 	DECLARE_WAITQUEUE(wait, current);
+	int cd;
 
 	/* block if port is in the process of being closed */
 
-	if (tty_hung_up_p(filp) || port->port.flags & ASYNC_CLOSING) {
+	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
 		pr_dbg("block_til_ready: close in progress.\n");
-		interruptible_sleep_on(&port->port.close_wait);
-		if (port->port.flags & ASYNC_HUP_NOTIFY)
+		interruptible_sleep_on(&port->close_wait);
+		if (port->flags & ASYNC_HUP_NOTIFY)
 			return -EAGAIN;
 		else
 			return -ERESTARTSYS;
@@ -854,7 +862,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	if ((filp->f_flags & O_NONBLOCK) ||
 			(tty->flags & (1 << TTY_IO_ERROR))) {
 		pr_dbg("block_til_ready: non-block mode.\n");
-		port->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -864,29 +872,29 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	/* block waiting for DCD to be asserted, and while
 						callout dev is busy */
 	retval = 0;
-	add_wait_queue(&port->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 
 	spin_lock_irqsave(&card->card_lock, flags);
 	if (!tty_hung_up_p(filp))
-		port->port.count--;
-	port->port.blocked_open++;
+		port->count--;
+	port->blocked_open++;
 	spin_unlock_irqrestore(&card->card_lock, flags);
 
 	while (1) {
-		raise_dtr_rts(port);
+		raise_dtr_rts(ip);
 
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) || !(port->port.flags & ASYNC_INITIALIZED)) {
-			if (port->port.flags & ASYNC_HUP_NOTIFY)
+		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
+			if (port->flags & ASYNC_HUP_NOTIFY)
 				retval = -EAGAIN;
 			else
 				retval = -ERESTARTSYS;
 			break;
 		}
-		if (!(port->port.flags & ASYNC_CLOSING) &&
-				(do_clocal || (port->status & ISI_DCD))) {
+		cd = tty_port_carrier_raised(port);
+		if (!(port->flags & ASYNC_CLOSING) &&
+				(do_clocal || cd))
 			break;
-		}
 		if (signal_pending(current)) {
 			retval = -ERESTARTSYS;
 			break;
@@ -894,15 +902,15 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 		schedule();
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 	spin_lock_irqsave(&card->card_lock, flags);
 	if (!tty_hung_up_p(filp))
-		port->port.count++;
-	port->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 	spin_unlock_irqrestore(&card->card_lock, flags);
 	if (retval)
 		return retval;
-	port->port.flags |= ASYNC_NORMAL_ACTIVE;
+	port->flags |= ASYNC_NORMAL_ACTIVE;
 	return 0;
 }
 
@@ -1452,6 +1460,10 @@ static const struct tty_operations isicom_ops = {
 	.break_ctl		= isicom_send_break,
 };
 
+static const struct tty_port_operations isicom_port_ops = {
+	.carrier_raised		= isicom_carrier_raised,
+};
+
 static int __devinit reset_card(struct pci_dev *pdev,
 	const unsigned int card, unsigned int *signature)
 {
@@ -1794,6 +1806,7 @@ static int __init isicom_init(void)
 		spin_lock_init(&isi_card[idx].card_lock);
 		for (channel = 0; channel < 16; channel++, port++) {
 			tty_port_init(&port->port);
+			port->port.ops = &isicom_port_ops;
 			port->magic = ISICOM_MAGIC;
 			port->card = &isi_card[idx];
 			port->channel = channel;
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index 4b10770fa937..c4682f9e34bb 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -151,7 +151,7 @@ static char	*stli_drvversion = "5.6.0";
 static char	*stli_serialname = "ttyE";
 
 static struct tty_driver	*stli_serial;
-
+static const struct tty_port_operations stli_port_ops;
 
 #define	STLI_TXBUFSIZE		4096
 
@@ -1183,6 +1183,12 @@ static int stli_setport(struct tty_struct *tty)
 
 /*****************************************************************************/
 
+static int stli_carrier_raised(struct tty_port *port)
+{
+	struct stliport *portp = container_of(port, struct stliport, port);
+	return (portp->sigs & TIOCM_CD) ? 1 : 0;
+}
+
 /*
  *	Possibly need to wait for carrier (DCD signal) to come high. Say
  *	maybe because if we are clocal then we don't need to wait...
@@ -1193,6 +1199,7 @@ static int stli_waitcarrier(struct tty_struct *tty, struct stlibrd *brdp,
 {
 	unsigned long flags;
 	int rc, doclocal;
+	struct tty_port *port = &portp->port;
 
 	rc = 0;
 	doclocal = 0;
@@ -1203,7 +1210,7 @@ static int stli_waitcarrier(struct tty_struct *tty, struct stlibrd *brdp,
 	spin_lock_irqsave(&stli_lock, flags);
 	portp->openwaitcnt++;
 	if (! tty_hung_up_p(filp))
-		portp->port.count--;
+		port->count--;
 	spin_unlock_irqrestore(&stli_lock, flags);
 
 	for (;;) {
@@ -1212,27 +1219,27 @@ static int stli_waitcarrier(struct tty_struct *tty, struct stlibrd *brdp,
 		    &portp->asig, sizeof(asysigs_t), 0)) < 0)
 			break;
 		if (tty_hung_up_p(filp) ||
-		    ((portp->port.flags & ASYNC_INITIALIZED) == 0)) {
-			if (portp->port.flags & ASYNC_HUP_NOTIFY)
+		    ((port->flags & ASYNC_INITIALIZED) == 0)) {
+			if (port->flags & ASYNC_HUP_NOTIFY)
 				rc = -EBUSY;
 			else
 				rc = -ERESTARTSYS;
 			break;
 		}
-		if (((portp->port.flags & ASYNC_CLOSING) == 0) &&
-		    (doclocal || (portp->sigs & TIOCM_CD))) {
+		if (((port->flags & ASYNC_CLOSING) == 0) &&
+		    (doclocal || tty_port_carrier_raised(port))) {
 			break;
 		}
 		if (signal_pending(current)) {
 			rc = -ERESTARTSYS;
 			break;
 		}
-		interruptible_sleep_on(&portp->port.open_wait);
+		interruptible_sleep_on(&port->open_wait);
 	}
 
 	spin_lock_irqsave(&stli_lock, flags);
 	if (! tty_hung_up_p(filp))
-		portp->port.count++;
+		port->count++;
 	portp->openwaitcnt--;
 	spin_unlock_irqrestore(&stli_lock, flags);
 
@@ -2696,6 +2703,7 @@ static int stli_initports(struct stlibrd *brdp)
 			continue;
 		}
 		tty_port_init(&portp->port);
+		portp->port.ops = &stli_port_ops;
 		portp->magic = STLI_PORTMAGIC;
 		portp->portnr = i;
 		portp->brdnr = brdp->brdnr;
@@ -4518,6 +4526,10 @@ static const struct tty_operations stli_ops = {
 	.tiocmset = stli_tiocmset,
 };
 
+static const struct tty_port_operations stli_port_ops = {
+	.carrier_raised = stli_carrier_raised,
+};
+
 /*****************************************************************************/
 /*
  *	Loadable module initialization stuff.
diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c
index 12d327a2c9ba..8b0da97d5293 100644
--- a/drivers/char/moxa.c
+++ b/drivers/char/moxa.c
@@ -206,6 +206,7 @@ static void moxa_poll(unsigned long);
 static void moxa_set_tty_param(struct tty_struct *, struct ktermios *);
 static void moxa_setup_empty_event(struct tty_struct *);
 static void moxa_shut_down(struct tty_struct *);
+static int moxa_carrier_raised(struct tty_port *);
 /*
  * moxa board interface functions:
  */
@@ -405,6 +406,10 @@ static const struct tty_operations moxa_ops = {
 	.tiocmset = moxa_tiocmset,
 };
 
+static const struct tty_port_operations moxa_port_ops = {
+	.carrier_raised = moxa_carrier_raised,
+};
+
 static struct tty_driver *moxaDriver;
 static DEFINE_TIMER(moxaTimer, moxa_poll, 0, 0);
 static DEFINE_SPINLOCK(moxa_lock);
@@ -826,6 +831,7 @@ static int moxa_init_board(struct moxa_board_conf *brd, struct device *dev)
 
 	for (i = 0, p = brd->ports; i < MAX_PORTS_PER_BOARD; i++, p++) {
 		tty_port_init(&p->port);
+		p->port.ops = &moxa_port_ops;
 		p->type = PORT_16550A;
 		p->cflag = B9600 | CS8 | CREAD | CLOCAL | HUPCL;
 	}
@@ -1115,15 +1121,27 @@ static void moxa_close_port(struct tty_struct *tty)
 	tty_port_tty_set(&ch->port, NULL);
 }
 
+static int moxa_carrier_raised(struct tty_port *port)
+{
+	struct moxa_port *ch = container_of(port, struct moxa_port, port);
+	int dcd;
+
+	spin_lock_bh(&moxa_lock);
+	dcd = ch->DCDState;
+	spin_unlock_bh(&moxa_lock);
+	return dcd;
+}
+
 static int moxa_block_till_ready(struct tty_struct *tty, struct file *filp,
 			    struct moxa_port *ch)
 {
+	struct tty_port *port = &ch->port;
 	DEFINE_WAIT(wait);
 	int retval = 0;
 	u8 dcd;
 
 	while (1) {
-		prepare_to_wait(&ch->port.open_wait, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(&port->open_wait, &wait, TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp)) {
 #ifdef SERIAL_DO_RESTART
 			retval = -ERESTARTSYS;
@@ -1132,9 +1150,7 @@ static int moxa_block_till_ready(struct tty_struct *tty, struct file *filp,
 #endif
 			break;
 		}
-		spin_lock_bh(&moxa_lock);
-		dcd = ch->DCDState;
-		spin_unlock_bh(&moxa_lock);
+		dcd = tty_port_carrier_raised(port);
 		if (dcd)
 			break;
 
@@ -1144,7 +1160,7 @@ static int moxa_block_till_ready(struct tty_struct *tty, struct file *filp,
 		}
 		schedule();
 	}
-	finish_wait(&ch->port.open_wait, &wait);
+	finish_wait(&port->open_wait, &wait);
 
 	return retval;
 }
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 047766915411..eafbbcf355e7 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -541,13 +541,21 @@ static unsigned char mxser_get_msr(int baseaddr, int mode, int port)
 	return status;
 }
 
+static int mxser_carrier_raised(struct tty_port *port)
+{
+	struct mxser_port *mp = container_of(port, struct mxser_port, port);
+	return (inb(mp->ioaddr + UART_MSR) & UART_MSR_DCD)?1:0;
+}
+
 static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp,
-		struct mxser_port *port)
+		struct mxser_port *mp)
 {
 	DECLARE_WAITQUEUE(wait, current);
 	int retval;
 	int do_clocal = 0;
 	unsigned long flags;
+	int cd;
+	struct tty_port *port = &mp->port;
 
 	/*
 	 * If non-blocking mode is set, or the port is not enabled,
@@ -555,7 +563,7 @@ static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp,
 	 */
 	if ((filp->f_flags & O_NONBLOCK) ||
 			test_bit(TTY_IO_ERROR, &tty->flags)) {
-		port->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -565,34 +573,33 @@ static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp,
 	/*
 	 * Block waiting for the carrier detect and the line to become
 	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, port->port.count is dropped by one, so that
+	 * this loop, port->count is dropped by one, so that
 	 * mxser_close() knows when to free things.  We restore it upon
 	 * exit, either normal or abnormal.
 	 */
 	retval = 0;
-	add_wait_queue(&port->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 
-	spin_lock_irqsave(&port->slock, flags);
+	spin_lock_irqsave(&mp->slock, flags);
 	if (!tty_hung_up_p(filp))
-		port->port.count--;
-	spin_unlock_irqrestore(&port->slock, flags);
-	port->port.blocked_open++;
+		port->count--;
+	spin_unlock_irqrestore(&mp->slock, flags);
+	port->blocked_open++;
 	while (1) {
-		spin_lock_irqsave(&port->slock, flags);
-		outb(inb(port->ioaddr + UART_MCR) |
-			UART_MCR_DTR | UART_MCR_RTS, port->ioaddr + UART_MCR);
-		spin_unlock_irqrestore(&port->slock, flags);
+		spin_lock_irqsave(&mp->slock, flags);
+		outb(inb(mp->ioaddr + UART_MCR) |
+			UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
+		spin_unlock_irqrestore(&mp->slock, flags);
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) || !(port->port.flags & ASYNC_INITIALIZED)) {
-			if (port->port.flags & ASYNC_HUP_NOTIFY)
+		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
+			if (port->flags & ASYNC_HUP_NOTIFY)
 				retval = -EAGAIN;
 			else
 				retval = -ERESTARTSYS;
 			break;
 		}
-		if (!(port->port.flags & ASYNC_CLOSING) &&
-				(do_clocal ||
-				(inb(port->ioaddr + UART_MSR) & UART_MSR_DCD)))
+		cd = tty_port_carrier_raised(port);
+		if (!(port->flags & ASYNC_CLOSING) && (do_clocal || cd))
 			break;
 		if (signal_pending(current)) {
 			retval = -ERESTARTSYS;
@@ -601,13 +608,13 @@ static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp,
 		schedule();
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 	if (!tty_hung_up_p(filp))
-		port->port.count++;
-	port->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 	if (retval)
 		return retval;
-	port->port.flags |= ASYNC_NORMAL_ACTIVE;
+	port->flags |= ASYNC_NORMAL_ACTIVE;
 	return 0;
 }
 
@@ -2449,6 +2456,10 @@ static const struct tty_operations mxser_ops = {
 	.tiocmset = mxser_tiocmset,
 };
 
+struct tty_port_operations mxser_port_ops = {
+	.carrier_raised = mxser_carrier_raised,
+};
+
 /*
  * The MOXA Smartio/Industio serial driver boot-time initialization code!
  */
@@ -2482,6 +2493,7 @@ static int __devinit mxser_initbrd(struct mxser_board *brd,
 	for (i = 0; i < brd->info->nports; i++) {
 		info = &brd->ports[i];
 		tty_port_init(&info->port);
+		info->port.ops = &mxser_port_ops;
 		info->board = brd;
 		info->stop_rx = 0;
 		info->ldisc_stop_rx = 0;
diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c
index a8f68a3f14dd..ec2afd139472 100644
--- a/drivers/char/rio/rio_linux.c
+++ b/drivers/char/rio/rio_linux.c
@@ -173,7 +173,7 @@ static void rio_disable_tx_interrupts(void *ptr);
 static void rio_enable_tx_interrupts(void *ptr);
 static void rio_disable_rx_interrupts(void *ptr);
 static void rio_enable_rx_interrupts(void *ptr);
-static int rio_get_CD(void *ptr);
+static int rio_carrier_raised(struct tty_port *port);
 static void rio_shutdown_port(void *ptr);
 static int rio_set_real_termios(void *ptr);
 static void rio_hungup(void *ptr);
@@ -224,7 +224,6 @@ static struct real_driver rio_real_driver = {
 	rio_enable_tx_interrupts,
 	rio_disable_rx_interrupts,
 	rio_enable_rx_interrupts,
-	rio_get_CD,
 	rio_shutdown_port,
 	rio_set_real_termios,
 	rio_chars_in_buffer,
@@ -476,9 +475,9 @@ static void rio_enable_rx_interrupts(void *ptr)
 
 
 /* Jeez. Isn't this simple?  */
-static int rio_get_CD(void *ptr)
+static int rio_carrier_raised(struct tty_port *port)
 {
-	struct Port *PortP = ptr;
+	struct Port *PortP = container_of(port, struct Port, gs.port);
 	int rv;
 
 	func_enter();
@@ -806,7 +805,9 @@ static void *ckmalloc(int size)
 	return p;
 }
 
-
+static const struct tty_port_operations rio_port_ops = {
+	.carrier_raised = rio_carrier_raised,
+};
 
 static int rio_init_datastructures(void)
 {
@@ -842,17 +843,14 @@ static int rio_init_datastructures(void)
 			goto free6;
 		}
 		rio_dprintk(RIO_DEBUG_INIT, "initing port %d (%d)\n", i, port->Mapped);
+		tty_port_init(&port->gs.port);
+		port->gs.port.ops = &rio_port_ops;
 		port->PortNum = i;
 		port->gs.magic = RIO_MAGIC;
 		port->gs.close_delay = HZ / 2;
 		port->gs.closing_wait = 30 * HZ;
 		port->gs.rd = &rio_real_driver;
 		spin_lock_init(&port->portSem);
-		/*
-		 * Initializing wait queue
-		 */
-		init_waitqueue_head(&port->gs.port.open_wait);
-		init_waitqueue_head(&port->gs.port.close_wait);
 	}
 #else
 	/* We could postpone initializing them to when they are configured. */
diff --git a/drivers/char/riscom8.c b/drivers/char/riscom8.c
index 2c6c8f33d6b4..6ad1c2aa2a98 100644
--- a/drivers/char/riscom8.c
+++ b/drivers/char/riscom8.c
@@ -857,23 +857,40 @@ static void rc_shutdown_port(struct tty_struct *tty,
 		rc_shutdown_board(bp);
 }
 
+static int carrier_raised(struct tty_port *port)
+{
+	struct riscom_port *p = container_of(port, struct riscom_port, port);
+	struct riscom_board *bp = port_Board(p);
+	unsigned long flags;
+	int CD;
+	
+	spin_lock_irqsave(&riscom_lock, flags);
+	rc_out(bp, CD180_CAR, port_No(p));
+	CD = rc_in(bp, CD180_MSVR) & MSVR_CD;
+	rc_out(bp, CD180_MSVR, MSVR_RTS);
+	bp->DTR &= ~(1u << port_No(p));
+	rc_out(bp, RC_DTR, bp->DTR);
+	spin_unlock_irqrestore(&riscom_lock, flags);
+	return CD;
+}
+
 static int block_til_ready(struct tty_struct *tty, struct file *filp,
-			   struct riscom_port *port)
+			   struct riscom_port *rp)
 {
 	DECLARE_WAITQUEUE(wait, current);
-	struct riscom_board *bp = port_Board(port);
 	int    retval;
 	int    do_clocal = 0;
 	int    CD;
 	unsigned long flags;
+	struct tty_port *port = &rp->port;
 
 	/*
 	 * If the device is in the middle of being closed, then block
 	 * until it's done, and then try again.
 	 */
-	if (tty_hung_up_p(filp) || port->port.flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&port->port.close_wait);
-		if (port->port.flags & ASYNC_HUP_NOTIFY)
+	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
+		interruptible_sleep_on(&port->close_wait);
+		if (port->flags & ASYNC_HUP_NOTIFY)
 			return -EAGAIN;
 		else
 			return -ERESTARTSYS;
@@ -885,7 +902,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	 */
 	if ((filp->f_flags & O_NONBLOCK) ||
 	    (tty->flags & (1 << TTY_IO_ERROR))) {
-		port->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -900,37 +917,29 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	 * exit, either normal or abnormal.
 	 */
 	retval = 0;
-	add_wait_queue(&port->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 
 	spin_lock_irqsave(&riscom_lock, flags);
 
 	if (!tty_hung_up_p(filp))
-		port->port.count--;
+		port->count--;
 
 	spin_unlock_irqrestore(&riscom_lock, flags);
 
-	port->port.blocked_open++;
+	port->blocked_open++;
 	while (1) {
-		spin_lock_irqsave(&riscom_lock, flags);
-
-		rc_out(bp, CD180_CAR, port_No(port));
-		CD = rc_in(bp, CD180_MSVR) & MSVR_CD;
-		rc_out(bp, CD180_MSVR, MSVR_RTS);
-		bp->DTR &= ~(1u << port_No(port));
-		rc_out(bp, RC_DTR, bp->DTR);
-
-		spin_unlock_irqrestore(&riscom_lock, flags);
 
+		CD = tty_port_carrier_raised(port);
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp) ||
-		    !(port->port.flags & ASYNC_INITIALIZED)) {
-			if (port->port.flags & ASYNC_HUP_NOTIFY)
+		    !(port->flags & ASYNC_INITIALIZED)) {
+			if (port->flags & ASYNC_HUP_NOTIFY)
 				retval = -EAGAIN;
 			else
 				retval = -ERESTARTSYS;
 			break;
 		}
-		if (!(port->port.flags & ASYNC_CLOSING) &&
+		if (!(port->flags & ASYNC_CLOSING) &&
 		    (do_clocal || CD))
 			break;
 		if (signal_pending(current)) {
@@ -940,14 +949,14 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 		schedule();
 	}
 	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 	if (!tty_hung_up_p(filp))
-		port->port.count++;
-	port->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 	if (retval)
 		return retval;
 
-	port->port.flags |= ASYNC_NORMAL_ACTIVE;
+	port->flags |= ASYNC_NORMAL_ACTIVE;
 	return 0;
 }
 
@@ -1510,6 +1519,11 @@ static const struct tty_operations riscom_ops = {
 	.break_ctl = rc_send_break,
 };
 
+static const struct tty_port_operations riscom_port_ops = {
+	.carrier_raised = carrier_raised,
+};
+
+
 static int __init rc_init_drivers(void)
 {
 	int error;
@@ -1541,6 +1555,7 @@ static int __init rc_init_drivers(void)
 	memset(rc_port, 0, sizeof(rc_port));
 	for (i = 0; i < RC_NPORT * RC_NBOARD; i++)  {
 		tty_port_init(&rc_port[i].port);
+		rc_port[i].port.ops = &riscom_port_ops;
 		rc_port[i].magic = RISCOM8_MAGIC;
 	}
 	return 0;
diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index 584d791e84a6..4a4110e703a5 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -135,6 +135,7 @@ static int rcktpt_type[NUM_BOARDS];
 static int is_PCI[NUM_BOARDS];
 static rocketModel_t rocketModel[NUM_BOARDS];
 static int max_board;
+static const struct tty_port_operations rocket_port_ops;
 
 /*
  * The following arrays define the interrupt bits corresponding to each AIOP.
@@ -649,9 +650,8 @@ static void init_r_port(int board, int aiop, int chan, struct pci_dev *pci_dev)
 	info->board = board;
 	info->aiop = aiop;
 	info->chan = chan;
-	info->port.closing_wait = 3000;
-	info->port.close_delay = 50;
-	init_waitqueue_head(&info->port.open_wait);
+	tty_port_init(&info->port);
+	info->port.ops = &rocket_port_ops;
 	init_completion(&info->close_wait);
 	info->flags &= ~ROCKET_MODE_MASK;
 	switch (pc104[board][line]) {
@@ -864,11 +864,18 @@ static void configure_r_port(struct r_port *info,
 	}
 }
 
+static int carrier_raised(struct tty_port *port)
+{
+	struct r_port *info = container_of(port, struct r_port, port);
+	return (sGetChanStatusLo(&info->channel) & CD_ACT) ? 1 : 0;
+}
+
 /*  info->port.count is considered critical, protected by spinlocks.  */
 static int block_til_ready(struct tty_struct *tty, struct file *filp,
 			   struct r_port *info)
 {
 	DECLARE_WAITQUEUE(wait, current);
+	struct tty_port *port = &info->port;
 	int retval;
 	int do_clocal = 0, extra_count = 0;
 	unsigned long flags;
@@ -898,13 +905,13 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 
 	/*
 	 * Block waiting for the carrier detect and the line to become free.  While we are in
-	 * this loop, info->port.count is dropped by one, so that rp_close() knows when to free things.
+	 * this loop, port->count is dropped by one, so that rp_close() knows when to free things.
          * We restore it upon exit, either normal or abnormal.
 	 */
 	retval = 0;
-	add_wait_queue(&info->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 #ifdef ROCKET_DEBUG_OPEN
-	printk(KERN_INFO "block_til_ready before block: ttyR%d, count = %d\n", info->line, info->port.count);
+	printk(KERN_INFO "block_til_ready before block: ttyR%d, count = %d\n", info->line, port->count);
 #endif
 	spin_lock_irqsave(&info->slock, flags);
 
@@ -913,10 +920,10 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 #else
 	if (!tty_hung_up_p(filp)) {
 		extra_count = 1;
-		info->port.count--;
+		port->count--;
 	}
 #endif
-	info->port.blocked_open++;
+	port->blocked_open++;
 
 	spin_unlock_irqrestore(&info->slock, flags);
 
@@ -933,7 +940,8 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 				retval = -ERESTARTSYS;
 			break;
 		}
-		if (!(info->flags & ROCKET_CLOSING) && (do_clocal || (sGetChanStatusLo(&info->channel) & CD_ACT)))
+		if (!(info->flags & ROCKET_CLOSING) &&
+			(do_clocal || tty_port_carrier_raised(port)))
 			break;
 		if (signal_pending(current)) {
 			retval = -ERESTARTSYS;
@@ -941,24 +949,24 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 		}
 #ifdef ROCKET_DEBUG_OPEN
 		printk(KERN_INFO "block_til_ready blocking: ttyR%d, count = %d, flags=0x%0x\n",
-		     info->line, info->port.count, info->flags);
+		     info->line, port->count, info->flags);
 #endif
 		schedule();	/*  Don't hold spinlock here, will hang PC */
 	}
 	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 
 	spin_lock_irqsave(&info->slock, flags);
 
 	if (extra_count)
-		info->port.count++;
-	info->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 
 	spin_unlock_irqrestore(&info->slock, flags);
 
 #ifdef ROCKET_DEBUG_OPEN
 	printk(KERN_INFO "block_til_ready after blocking: ttyR%d, count = %d\n",
-	       info->line, info->port.count);
+	       info->line, port->count);
 #endif
 	if (retval)
 		return retval;
@@ -2371,6 +2379,10 @@ static const struct tty_operations rocket_ops = {
 	.tiocmset = rp_tiocmset,
 };
 
+static const struct tty_port_operations rocket_port_ops = {
+	.carrier_raised = carrier_raised,
+};
+
 /*
  * The module "startup" routine; it's run when the module is loaded.
  */
diff --git a/drivers/char/ser_a2232.c b/drivers/char/ser_a2232.c
index 7b0c35207d9b..0c97f34df63a 100644
--- a/drivers/char/ser_a2232.c
+++ b/drivers/char/ser_a2232.c
@@ -122,7 +122,7 @@ static void a2232_disable_tx_interrupts(void *ptr);
 static void a2232_enable_tx_interrupts(void *ptr);
 static void a2232_disable_rx_interrupts(void *ptr);
 static void a2232_enable_rx_interrupts(void *ptr);
-static int  a2232_get_CD(void *ptr);
+static int  a2232_carrier_raised(struct tty_port *port);
 static void a2232_shutdown_port(void *ptr);
 static int  a2232_set_real_termios(void *ptr);
 static int  a2232_chars_in_buffer(void *ptr);
@@ -148,7 +148,6 @@ static struct real_driver a2232_real_driver = {
         a2232_enable_tx_interrupts,
         a2232_disable_rx_interrupts,
         a2232_enable_rx_interrupts,
-        a2232_get_CD,
         a2232_shutdown_port,
         a2232_set_real_termios,
         a2232_chars_in_buffer,
@@ -260,9 +259,10 @@ static void a2232_enable_rx_interrupts(void *ptr)
 	port->disable_rx = 0;
 }
 
-static int  a2232_get_CD(void *ptr)
+static int  a2232_carrier_raised(struct tty_port *port)
 {
-	return ((struct a2232_port *) ptr)->cd_status;
+	struct a2232_port *ap = container_of(port, struct a2232_port, gs.port);
+	return ap->cd_status;
 }
 
 static void a2232_shutdown_port(void *ptr)
@@ -638,6 +638,10 @@ int ch, err, n, p;
 	return IRQ_HANDLED;
 }
 
+static const struct tty_port_operations a2232_port_ops = {
+	.carrier_raised = a2232_carrier_raised,
+};
+
 static void a2232_init_portstructs(void)
 {
 	struct a2232_port *port;
@@ -645,6 +649,8 @@ static void a2232_init_portstructs(void)
 
 	for (i = 0; i < MAX_A2232_BOARDS*NUMLINES; i++) {
 		port = a2232_ports + i;
+		tty_port_init(&port->gs.port);
+		port->gs.port.ops = &a2232_port_ops;
 		port->which_a2232 = i/NUMLINES;
 		port->which_port_on_a2232 = i%NUMLINES;
 		port->disable_rx = port->throttle_input = port->cd_status = 0;
@@ -652,11 +658,6 @@ static void a2232_init_portstructs(void)
 		port->gs.close_delay = HZ/2;
 		port->gs.closing_wait = 30 * HZ;
 		port->gs.rd = &a2232_real_driver;
-#ifdef NEW_WRITE_LOCKING
-		mutex_init(&(port->gs.port_write_mutex));
-#endif
-		init_waitqueue_head(&port->gs.port.open_wait);
-		init_waitqueue_head(&port->gs.port.close_wait);
 	}
 }
 
diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index 963b03fb29e5..12aecdaf61ec 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -130,6 +130,8 @@ static char		stl_unwanted[SC26198_RXFIFOSIZE];
 static DEFINE_MUTEX(stl_brdslock);
 static struct stlbrd		*stl_brds[STL_MAXBRDS];
 
+static const struct tty_port_operations stl_port_ops;
+
 /*
  *	Per board state flags. Used with the state field of the board struct.
  *	Not really much here!
@@ -786,6 +788,12 @@ static int stl_open(struct tty_struct *tty, struct file *filp)
 
 /*****************************************************************************/
 
+static int stl_carrier_raised(struct tty_port *port)
+{
+	struct stlport *portp = container_of(port, struct stlport, port);
+	return (portp->sigs & TIOCM_CD) ? 1 : 0;
+}
+
 /*
  *	Possibly need to wait for carrier (DCD signal) to come high. Say
  *	maybe because if we are clocal then we don't need to wait...
@@ -796,6 +804,7 @@ static int stl_waitcarrier(struct tty_struct *tty, struct stlport *portp,
 {
 	unsigned long	flags;
 	int		rc, doclocal;
+	struct tty_port *port = &portp->port;
 
 	pr_debug("stl_waitcarrier(portp=%p,filp=%p)\n", portp, filp);
 
@@ -809,32 +818,32 @@ static int stl_waitcarrier(struct tty_struct *tty, struct stlport *portp,
 
 	portp->openwaitcnt++;
 	if (! tty_hung_up_p(filp))
-		portp->port.count--;
+		port->count--;
 
 	for (;;) {
 		/* Takes brd_lock internally */
 		stl_setsignals(portp, 1, 1);
 		if (tty_hung_up_p(filp) ||
-		    ((portp->port.flags & ASYNC_INITIALIZED) == 0)) {
-			if (portp->port.flags & ASYNC_HUP_NOTIFY)
+		    ((port->flags & ASYNC_INITIALIZED) == 0)) {
+			if (port->flags & ASYNC_HUP_NOTIFY)
 				rc = -EBUSY;
 			else
 				rc = -ERESTARTSYS;
 			break;
 		}
-		if (((portp->port.flags & ASYNC_CLOSING) == 0) &&
-		    (doclocal || (portp->sigs & TIOCM_CD)))
+		if (((port->flags & ASYNC_CLOSING) == 0) &&
+		    (doclocal || tty_port_carrier_raised(port)))
 			break;
 		if (signal_pending(current)) {
 			rc = -ERESTARTSYS;
 			break;
 		}
 		/* FIXME */
-		interruptible_sleep_on(&portp->port.open_wait);
+		interruptible_sleep_on(&port->open_wait);
 	}
 
 	if (! tty_hung_up_p(filp))
-		portp->port.count++;
+		port->count++;
 	portp->openwaitcnt--;
 	spin_unlock_irqrestore(&stallion_lock, flags);
 
@@ -1776,6 +1785,7 @@ static int __devinit stl_initports(struct stlbrd *brdp, struct stlpanel *panelp)
 			break;
 		}
 		tty_port_init(&portp->port);
+		portp->port.ops = &stl_port_ops;
 		portp->magic = STL_PORTMAGIC;
 		portp->portnr = i;
 		portp->brdnr = panelp->brdnr;
@@ -2659,6 +2669,10 @@ static const struct tty_operations stl_ops = {
 	.tiocmset = stl_tiocmset,
 };
 
+static const struct tty_port_operations stl_port_ops = {
+	.carrier_raised = stl_carrier_raised,
+};
+
 /*****************************************************************************/
 /*                       CD1400 HARDWARE FUNCTIONS                           */
 /*****************************************************************************/
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index ba4e86281fbf..a71bc58abe7f 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -279,7 +279,7 @@ static void sx_disable_tx_interrupts(void *ptr);
 static void sx_enable_tx_interrupts(void *ptr);
 static void sx_disable_rx_interrupts(void *ptr);
 static void sx_enable_rx_interrupts(void *ptr);
-static int sx_get_CD(void *ptr);
+static int sx_carrier_raised(struct tty_port *port);
 static void sx_shutdown_port(void *ptr);
 static int sx_set_real_termios(void *ptr);
 static void sx_close(void *ptr);
@@ -360,7 +360,6 @@ static struct real_driver sx_real_driver = {
 	sx_enable_tx_interrupts,
 	sx_disable_rx_interrupts,
 	sx_enable_rx_interrupts,
-	sx_get_CD,
 	sx_shutdown_port,
 	sx_set_real_termios,
 	sx_chars_in_buffer,
@@ -791,7 +790,7 @@ static int sx_getsignals(struct sx_port *port)
 	sx_dprintk(SX_DEBUG_MODEMSIGNALS, "getsignals: %d/%d  (%d/%d) "
 			"%02x/%02x\n",
 			(o_stat & OP_DTR) != 0, (o_stat & OP_RTS) != 0,
-			port->c_dcd, sx_get_CD(port),
+			port->c_dcd, tty_port_carrier_raised(&port->gs.port),
 			sx_read_channel_byte(port, hi_ip),
 			sx_read_channel_byte(port, hi_state));
 
@@ -1190,7 +1189,7 @@ static inline void sx_check_modem_signals(struct sx_port *port)
 
 	hi_state = sx_read_channel_byte(port, hi_state);
 	sx_dprintk(SX_DEBUG_MODEMSIGNALS, "Checking modem signals (%d/%d)\n",
-			port->c_dcd, sx_get_CD(port));
+			port->c_dcd, tty_port_carrier_raised(&port->gs.port));
 
 	if (hi_state & ST_BREAK) {
 		hi_state &= ~ST_BREAK;
@@ -1202,11 +1201,11 @@ static inline void sx_check_modem_signals(struct sx_port *port)
 		hi_state &= ~ST_DCD;
 		sx_dprintk(SX_DEBUG_MODEMSIGNALS, "got a DCD change.\n");
 		sx_write_channel_byte(port, hi_state, hi_state);
-		c_dcd = sx_get_CD(port);
+		c_dcd = tty_port_carrier_raised(&port->gs.port);
 		sx_dprintk(SX_DEBUG_MODEMSIGNALS, "DCD is now %d\n", c_dcd);
 		if (c_dcd != port->c_dcd) {
 			port->c_dcd = c_dcd;
-			if (sx_get_CD(port)) {
+			if (tty_port_carrier_raised(&port->gs.port)) {
 				/* DCD went UP */
 				if ((sx_read_channel_byte(port, hi_hstat) !=
 						HS_IDLE_CLOSED) &&
@@ -1415,13 +1414,10 @@ static void sx_enable_rx_interrupts(void *ptr)
 }
 
 /* Jeez. Isn't this simple? */
-static int sx_get_CD(void *ptr)
+static int sx_carrier_raised(struct tty_port *port)
 {
-	struct sx_port *port = ptr;
-	func_enter2();
-
-	func_exit();
-	return ((sx_read_channel_byte(port, hi_ip) & IP_DCD) != 0);
+	struct sx_port *sp = container_of(port, struct sx_port, gs.port);
+	return ((sx_read_channel_byte(sp, hi_ip) & IP_DCD) != 0);
 }
 
 /* Jeez. Isn't this simple? */
@@ -1536,7 +1532,7 @@ static int sx_open(struct tty_struct *tty, struct file *filp)
 	}
 	/* tty->low_latency = 1; */
 
-	port->c_dcd = sx_get_CD(port);
+	port->c_dcd = sx_carrier_raised(&port->gs.port);
 	sx_dprintk(SX_DEBUG_OPEN, "at open: cd=%d\n", port->c_dcd);
 
 	func_exit();
@@ -2354,6 +2350,10 @@ static const struct tty_operations sx_ops = {
 	.tiocmset = sx_tiocmset,
 };
 
+static const struct tty_port_operations sx_port_ops = {
+	.carrier_raised = sx_carrier_raised,
+};
+
 static int sx_init_drivers(void)
 {
 	int error;
@@ -2410,6 +2410,7 @@ static int sx_init_portstructs(int nboards, int nports)
 		for (j = 0; j < boards[i].nports; j++) {
 			sx_dprintk(SX_DEBUG_INIT, "initing port %d\n", j);
 			tty_port_init(&port->gs.port);
+			port->gs.port.ops = &sx_port_ops;
 			port->gs.magic = SX_MAGIC;
 			port->gs.close_delay = HZ / 2;
 			port->gs.closing_wait = 30 * HZ;
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index 500f5176b6ba..fb2e6b5e0ef1 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -3281,6 +3281,23 @@ static void mgsl_hangup(struct tty_struct *tty)
 	
 }	/* end of mgsl_hangup() */
 
+/*
+ * carrier_raised()
+ *
+ *	Return true if carrier is raised
+ */
+
+static int carrier_raised(struct tty_port *port)
+{
+	unsigned long flags;
+	struct mgsl_struct *info = container_of(port, struct mgsl_struct, port);
+	
+	spin_lock_irqsave(&info->irq_spinlock, flags);
+ 	usc_get_serial_signals(info);
+	spin_unlock_irqrestore(&info->irq_spinlock, flags);
+	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
+}
+
 /* block_til_ready()
  * 
  * 	Block the current process until the specified port
@@ -3302,6 +3319,8 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
 	bool		do_clocal = false;
 	bool		extra_count = false;
 	unsigned long	flags;
+	int		dcd;
+	struct tty_port *port = &info->port;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):block_til_ready on %s\n",
@@ -3309,7 +3328,7 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
 
 	if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){
 		/* nonblock mode is set or port is not enabled */
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -3318,25 +3337,25 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
 
 	/* Wait for carrier detect and the line to become
 	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, info->port.count is dropped by one, so that
+	 * this loop, port->count is dropped by one, so that
 	 * mgsl_close() knows when to free things.  We restore it upon
 	 * exit, either normal or abnormal.
 	 */
 	 
 	retval = 0;
-	add_wait_queue(&info->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):block_til_ready before block on %s count=%d\n",
-			 __FILE__,__LINE__, tty->driver->name, info->port.count );
+			 __FILE__,__LINE__, tty->driver->name, port->count );
 
 	spin_lock_irqsave(&info->irq_spinlock, flags);
 	if (!tty_hung_up_p(filp)) {
 		extra_count = true;
-		info->port.count--;
+		port->count--;
 	}
 	spin_unlock_irqrestore(&info->irq_spinlock, flags);
-	info->port.blocked_open++;
+	port->blocked_open++;
 	
 	while (1) {
 		if (tty->termios->c_cflag & CBAUD) {
@@ -3348,20 +3367,16 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
 		
 		set_current_state(TASK_INTERRUPTIBLE);
 		
-		if (tty_hung_up_p(filp) || !(info->port.flags & ASYNC_INITIALIZED)){
-			retval = (info->port.flags & ASYNC_HUP_NOTIFY) ?
+		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)){
+			retval = (port->flags & ASYNC_HUP_NOTIFY) ?
 					-EAGAIN : -ERESTARTSYS;
 			break;
 		}
 		
-		spin_lock_irqsave(&info->irq_spinlock,flags);
-	 	usc_get_serial_signals(info);
-		spin_unlock_irqrestore(&info->irq_spinlock,flags);
+		dcd = tty_port_carrier_raised(&info->port);
 		
- 		if (!(info->port.flags & ASYNC_CLOSING) &&
- 		    (do_clocal || (info->serial_signals & SerialSignal_DCD)) ) {
+ 		if (!(port->flags & ASYNC_CLOSING) && (do_clocal || dcd))
  			break;
-		}
 			
 		if (signal_pending(current)) {
 			retval = -ERESTARTSYS;
@@ -3370,24 +3385,24 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
 		
 		if (debug_level >= DEBUG_LEVEL_INFO)
 			printk("%s(%d):block_til_ready blocking on %s count=%d\n",
-				 __FILE__,__LINE__, tty->driver->name, info->port.count );
+				 __FILE__,__LINE__, tty->driver->name, port->count );
 				 
 		schedule();
 	}
 	
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 	
 	if (extra_count)
-		info->port.count++;
-	info->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 	
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):block_til_ready after blocking on %s count=%d\n",
-			 __FILE__,__LINE__, tty->driver->name, info->port.count );
+			 __FILE__,__LINE__, tty->driver->name, port->count );
 			 
 	if (!retval)
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		
 	return retval;
 	
@@ -4304,6 +4319,11 @@ static void mgsl_add_device( struct mgsl_struct *info )
 
 }	/* end of mgsl_add_device() */
 
+static const struct tty_port_operations mgsl_port_ops = {
+	.carrier_raised = carrier_raised,
+};
+
+
 /* mgsl_allocate_device()
  * 
  * 	Allocate and initialize a device instance structure
@@ -4322,6 +4342,7 @@ static struct mgsl_struct* mgsl_allocate_device(void)
 		printk("Error can't allocate device instance data\n");
 	} else {
 		tty_port_init(&info->port);
+		info->port.ops = &mgsl_port_ops;
 		info->magic = MGSL_MAGIC;
 		INIT_WORK(&info->task, mgsl_bh_handler);
 		info->max_frame_size = 4096;
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 08911ed66494..39ccaba8ca37 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -3132,6 +3132,17 @@ static int tiocmset(struct tty_struct *tty, struct file *file,
 	return 0;
 }
 
+static int carrier_raised(struct tty_port *port)
+{
+	unsigned long flags;
+	struct slgt_info *info = container_of(port, struct slgt_info, port);
+
+	spin_lock_irqsave(&info->lock,flags);
+ 	get_signals(info);
+	spin_unlock_irqrestore(&info->lock,flags);
+	return (info->signals & SerialSignal_DCD) ? 1 : 0;
+}
+
 /*
  *  block current process until the device is ready to open
  */
@@ -3143,12 +3154,14 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	bool		do_clocal = false;
 	bool		extra_count = false;
 	unsigned long	flags;
+	int		cd;
+	struct tty_port *port = &info->port;
 
 	DBGINFO(("%s block_til_ready\n", tty->driver->name));
 
 	if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){
 		/* nonblock mode is set or port is not enabled */
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -3157,21 +3170,21 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 
 	/* Wait for carrier detect and the line to become
 	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, info->port.count is dropped by one, so that
+	 * this loop, port->count is dropped by one, so that
 	 * close() knows when to free things.  We restore it upon
 	 * exit, either normal or abnormal.
 	 */
 
 	retval = 0;
-	add_wait_queue(&info->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 
 	spin_lock_irqsave(&info->lock, flags);
 	if (!tty_hung_up_p(filp)) {
 		extra_count = true;
-		info->port.count--;
+		port->count--;
 	}
 	spin_unlock_irqrestore(&info->lock, flags);
-	info->port.blocked_open++;
+	port->blocked_open++;
 
 	while (1) {
 		if ((tty->termios->c_cflag & CBAUD)) {
@@ -3183,20 +3196,16 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 
 		set_current_state(TASK_INTERRUPTIBLE);
 
-		if (tty_hung_up_p(filp) || !(info->port.flags & ASYNC_INITIALIZED)){
-			retval = (info->port.flags & ASYNC_HUP_NOTIFY) ?
+		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)){
+			retval = (port->flags & ASYNC_HUP_NOTIFY) ?
 					-EAGAIN : -ERESTARTSYS;
 			break;
 		}
 
-		spin_lock_irqsave(&info->lock,flags);
-	 	get_signals(info);
-		spin_unlock_irqrestore(&info->lock,flags);
+		cd = tty_port_carrier_raised(port);
 
- 		if (!(info->port.flags & ASYNC_CLOSING) &&
- 		    (do_clocal || (info->signals & SerialSignal_DCD)) ) {
+ 		if (!(port->flags & ASYNC_CLOSING) && (do_clocal || cd ))
  			break;
-		}
 
 		if (signal_pending(current)) {
 			retval = -ERESTARTSYS;
@@ -3208,14 +3217,14 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	}
 
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 
 	if (extra_count)
-		info->port.count++;
-	info->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 
 	if (!retval)
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 
 	DBGINFO(("%s block_til_ready ready, rc=%d\n", tty->driver->name, retval));
 	return retval;
@@ -3444,6 +3453,10 @@ static void add_device(struct slgt_info *info)
 #endif
 }
 
+static const struct tty_port_operations slgt_port_ops = {
+	.carrier_raised = carrier_raised,
+};
+
 /*
  *  allocate device instance structure, return NULL on failure
  */
@@ -3458,6 +3471,7 @@ static struct slgt_info *alloc_dev(int adapter_num, int port_num, struct pci_dev
 			driver_name, adapter_num, port_num));
 	} else {
 		tty_port_init(&info->port);
+		info->port.ops = &slgt_port_ops;
 		info->magic = MGSL_MAGIC;
 		INIT_WORK(&info->task, bh_handler);
 		info->max_frame_size = 4096;
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index 6bdb44f7bec2..fcf1ec77450d 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -558,6 +558,7 @@ static void release_resources(SLMP_INFO *info);
 
 static int  startup(SLMP_INFO *info);
 static int  block_til_ready(struct tty_struct *tty, struct file * filp,SLMP_INFO *info);
+static int carrier_raised(struct tty_port *port);
 static void shutdown(SLMP_INFO *info);
 static void program_hw(SLMP_INFO *info);
 static void change_params(SLMP_INFO *info);
@@ -3318,7 +3319,17 @@ static int tiocmset(struct tty_struct *tty, struct file *file,
 	return 0;
 }
 
+static int carrier_raised(struct tty_port *port)
+{
+	SLMP_INFO *info = container_of(port, SLMP_INFO, port);
+	unsigned long flags;
 
+	spin_lock_irqsave(&info->lock,flags);
+ 	get_signals(info);
+	spin_unlock_irqrestore(&info->lock,flags);
+
+	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
+}
 
 /* Block the current process until the specified port is ready to open.
  */
@@ -3330,6 +3341,8 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	bool		do_clocal = false;
 	bool		extra_count = false;
 	unsigned long	flags;
+	int		cd;
+	struct tty_port *port = &info->port;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):%s block_til_ready()\n",
@@ -3338,7 +3351,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){
 		/* nonblock mode is set or port is not enabled */
 		/* just verify that callout device is not active */
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 		return 0;
 	}
 
@@ -3347,25 +3360,25 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 
 	/* Wait for carrier detect and the line to become
 	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, info->port.count is dropped by one, so that
+	 * this loop, port->count is dropped by one, so that
 	 * close() knows when to free things.  We restore it upon
 	 * exit, either normal or abnormal.
 	 */
 
 	retval = 0;
-	add_wait_queue(&info->port.open_wait, &wait);
+	add_wait_queue(&port->open_wait, &wait);
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):%s block_til_ready() before block, count=%d\n",
-			 __FILE__,__LINE__, tty->driver->name, info->port.count );
+			 __FILE__,__LINE__, tty->driver->name, port->count );
 
 	spin_lock_irqsave(&info->lock, flags);
 	if (!tty_hung_up_p(filp)) {
 		extra_count = true;
-		info->port.count--;
+		port->count--;
 	}
 	spin_unlock_irqrestore(&info->lock, flags);
-	info->port.blocked_open++;
+	port->blocked_open++;
 
 	while (1) {
 		if ((tty->termios->c_cflag & CBAUD)) {
@@ -3377,20 +3390,16 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 
 		set_current_state(TASK_INTERRUPTIBLE);
 
-		if (tty_hung_up_p(filp) || !(info->port.flags & ASYNC_INITIALIZED)){
-			retval = (info->port.flags & ASYNC_HUP_NOTIFY) ?
+		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)){
+			retval = (port->flags & ASYNC_HUP_NOTIFY) ?
 					-EAGAIN : -ERESTARTSYS;
 			break;
 		}
 
-		spin_lock_irqsave(&info->lock,flags);
-	 	get_signals(info);
-		spin_unlock_irqrestore(&info->lock,flags);
+		cd = tty_port_carrier_raised(port);
 
- 		if (!(info->port.flags & ASYNC_CLOSING) &&
- 		    (do_clocal || (info->serial_signals & SerialSignal_DCD)) ) {
+ 		if (!(port->flags & ASYNC_CLOSING) && (do_clocal || cd))
  			break;
-		}
 
 		if (signal_pending(current)) {
 			retval = -ERESTARTSYS;
@@ -3399,24 +3408,24 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 
 		if (debug_level >= DEBUG_LEVEL_INFO)
 			printk("%s(%d):%s block_til_ready() count=%d\n",
-				 __FILE__,__LINE__, tty->driver->name, info->port.count );
+				 __FILE__,__LINE__, tty->driver->name, port->count );
 
 		schedule();
 	}
 
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->port.open_wait, &wait);
+	remove_wait_queue(&port->open_wait, &wait);
 
 	if (extra_count)
-		info->port.count++;
-	info->port.blocked_open--;
+		port->count++;
+	port->blocked_open--;
 
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):%s block_til_ready() after, count=%d\n",
-			 __FILE__,__LINE__, tty->driver->name, info->port.count );
+			 __FILE__,__LINE__, tty->driver->name, port->count );
 
 	if (!retval)
-		info->port.flags |= ASYNC_NORMAL_ACTIVE;
+		port->flags |= ASYNC_NORMAL_ACTIVE;
 
 	return retval;
 }
@@ -3782,6 +3791,10 @@ static void add_device(SLMP_INFO *info)
 #endif
 }
 
+static const struct tty_port_operations port_ops = {
+	.carrier_raised = carrier_raised,
+};
+
 /* Allocate and initialize a device instance structure
  *
  * Return Value:	pointer to SLMP_INFO if success, otherwise NULL
@@ -3798,6 +3811,7 @@ static SLMP_INFO *alloc_dev(int adapter_num, int port_num, struct pci_dev *pdev)
 			__FILE__,__LINE__, adapter_num, port_num);
 	} else {
 		tty_port_init(&info->port);
+		info->port.ops = &port_ops;
 		info->magic = MGSL_MAGIC;
 		INIT_WORK(&info->task, bh_handler);
 		info->max_frame_size = 4096;
@@ -3940,6 +3954,7 @@ static const struct tty_operations ops = {
 	.tiocmset = tiocmset,
 };
 
+
 static void synclinkmp_cleanup(void)
 {
 	int rc;
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index c8f8024cb40e..f54e40cbf023 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -94,3 +94,20 @@ void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty)
 	spin_unlock_irqrestore(&port->lock, flags);
 }
 EXPORT_SYMBOL(tty_port_tty_set);
+
+/**
+ *	tty_port_carrier_raised	-	carrier raised check
+ *	@port: tty port
+ *
+ *	Wrapper for the carrier detect logic. For the moment this is used
+ *	to hide some internal details. This will eventually become entirely
+ *	internal to the tty port.
+ */
+
+int tty_port_carrier_raised(struct tty_port *port)
+{
+	if (port->ops->carrier_raised == NULL)
+		return 1;
+	return port->ops->carrier_raised(port);
+}
+EXPORT_SYMBOL(tty_port_carrier_raised);
diff --git a/drivers/char/vme_scc.c b/drivers/char/vme_scc.c
index 1718b3c481db..d4e1534c0e03 100644
--- a/drivers/char/vme_scc.c
+++ b/drivers/char/vme_scc.c
@@ -69,7 +69,7 @@ static void scc_disable_tx_interrupts(void * ptr);
 static void scc_enable_tx_interrupts(void * ptr);
 static void scc_disable_rx_interrupts(void * ptr);
 static void scc_enable_rx_interrupts(void * ptr);
-static int  scc_get_CD(void * ptr);
+static int  scc_carrier_raised(struct tty_port *port);
 static void scc_shutdown_port(void * ptr);
 static int scc_set_real_termios(void  *ptr);
 static void scc_hungup(void  *ptr);
@@ -100,7 +100,6 @@ static struct real_driver scc_real_driver = {
         scc_enable_tx_interrupts,
         scc_disable_rx_interrupts,
         scc_enable_rx_interrupts,
-        scc_get_CD,
         scc_shutdown_port,
         scc_set_real_termios,
         scc_chars_in_buffer,
@@ -129,6 +128,10 @@ static const struct tty_operations scc_ops = {
 	.break_ctl = scc_break_ctl,
 };
 
+static const struct tty_port_operations scc_port_ops = {
+	.carrier_raised = scc_carrier_raised,
+};
+
 /*----------------------------------------------------------------------------
  * vme_scc_init() and support functions
  *---------------------------------------------------------------------------*/
@@ -176,6 +179,8 @@ static void scc_init_portstructs(void)
 
 	for (i = 0; i < 2; i++) {
 		port = scc_ports + i;
+		tty_port_init(&port->gs.port);
+		port->gs.port.ops = &scc_port_ops;
 		port->gs.magic = SCC_MAGIC;
 		port->gs.close_delay = HZ/2;
 		port->gs.closing_wait = 30 * HZ;
@@ -624,9 +629,9 @@ static void scc_enable_rx_interrupts(void *ptr)
 }
 
 
-static int scc_get_CD(void *ptr)
+static int scc_carrier_raised(struct tty_port *port)
 {
-	struct scc_port *port = ptr;
+	struct scc_port *scc = container_of(port, struct scc_port, gs.port);
 	unsigned channel = port->channel;
 
 	return !!(scc_last_status_reg[channel] & SR_DCD);
@@ -896,7 +901,7 @@ static int scc_open (struct tty_struct * tty, struct file * filp)
 		return retval;
 	}
 
-	port->c_dcd = scc_get_CD (port);
+	port->c_dcd = tty_port_carrier_raised(&port->gs.port);
 
 	scc_enable_rx_interrupts(port);
 
diff --git a/include/linux/generic_serial.h b/include/linux/generic_serial.h
index 4cc913939817..fadff28505bb 100644
--- a/include/linux/generic_serial.h
+++ b/include/linux/generic_serial.h
@@ -21,7 +21,6 @@ struct real_driver {
   void                    (*enable_tx_interrupts) (void *);
   void                    (*disable_rx_interrupts) (void *);
   void                    (*enable_rx_interrupts) (void *);
-  int                     (*get_CD) (void *);
   void                    (*shutdown_port) (void*);
   int                     (*set_real_termios) (void*);
   int                     (*chars_in_buffer) (void*);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index bbbeaef99626..bc7bae78e22f 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -180,8 +180,16 @@ struct signal_struct;
  * until a hangup so don't use the wrong path.
  */
 
+struct tty_port;
+
+struct tty_port_operations {
+	/* Return 1 if the carrier is raised */
+	int (*carrier_raised)(struct tty_port *port);
+};
+	
 struct tty_port {
 	struct tty_struct	*tty;		/* Back pointer */
+	const struct tty_port_operations *ops;	/* Port operations */
 	spinlock_t		lock;		/* Lock protecting tty field */
 	int			blocked_open;	/* Waiting to open */
 	int			count;		/* Usage count */
@@ -427,6 +435,7 @@ extern int tty_port_alloc_xmit_buf(struct tty_port *port);
 extern void tty_port_free_xmit_buf(struct tty_port *port);
 extern struct tty_struct *tty_port_tty_get(struct tty_port *port);
 extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty);
+extern int tty_port_carrier_raised(struct tty_port *port);
 
 extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
-- 
cgit v1.2.3


From 5d951fb458f847e5485b5251597fbf326000bb3b Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:45:19 +0000
Subject: tty: Pull the dtr raise into tty port

This moves another per device special out of what should be shared open
wait paths into private methods

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/isicom.c      | 13 ++++++++-----
 drivers/char/mxser.c       | 17 +++++++++++++----
 drivers/char/rocket.c      | 14 ++++++++++----
 drivers/char/synclink.c    | 21 +++++++++++++++------
 drivers/char/synclink_gt.c | 21 +++++++++++++++------
 drivers/char/tty_port.c    | 16 ++++++++++++++++
 include/linux/tty.h        |  2 ++
 7 files changed, 79 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index b3da4858fd4a..a449449e301c 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -328,11 +328,13 @@ static inline void drop_rts(struct isi_port *port)
 }
 
 /* card->lock MUST NOT be held */
-static inline void raise_dtr_rts(struct isi_port *port)
+
+static void isicom_raise_dtr_rts(struct tty_port *port)
 {
-	struct isi_board *card = port->card;
+	struct isi_port *ip = container_of(port, struct isi_port, port);
+	struct isi_board *card = ip->card;
 	unsigned long base = card->base;
-	u16 channel = port->channel;
+	u16 channel = ip->channel;
 
 	if (!lock_card(card))
 		return;
@@ -340,7 +342,7 @@ static inline void raise_dtr_rts(struct isi_port *port)
 	outw(0x8000 | (channel << card->shift_count) | 0x02, base);
 	outw(0x0f04, base);
 	InterruptTheCard(base);
-	port->status |= (ISI_DTR | ISI_RTS);
+	ip->status |= (ISI_DTR | ISI_RTS);
 	unlock_card(card);
 }
 
@@ -881,7 +883,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	spin_unlock_irqrestore(&card->card_lock, flags);
 
 	while (1) {
-		raise_dtr_rts(ip);
+		tty_port_raise_dtr_rts(port);
 
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
@@ -1462,6 +1464,7 @@ static const struct tty_operations isicom_ops = {
 
 static const struct tty_port_operations isicom_port_ops = {
 	.carrier_raised		= isicom_carrier_raised,
+	.raise_dtr_rts		= isicom_raise_dtr_rts,
 };
 
 static int __devinit reset_card(struct pci_dev *pdev,
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index eafbbcf355e7..ff5ff6188809 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -547,6 +547,17 @@ static int mxser_carrier_raised(struct tty_port *port)
 	return (inb(mp->ioaddr + UART_MSR) & UART_MSR_DCD)?1:0;
 }
 
+static void mxser_raise_dtr_rts(struct tty_port *port)
+{
+	struct mxser_port *mp = container_of(port, struct mxser_port, port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&mp->slock, flags);
+	outb(inb(mp->ioaddr + UART_MCR) |
+		UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
+	spin_unlock_irqrestore(&mp->slock, flags);
+}
+
 static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp,
 		struct mxser_port *mp)
 {
@@ -586,10 +597,7 @@ static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp,
 	spin_unlock_irqrestore(&mp->slock, flags);
 	port->blocked_open++;
 	while (1) {
-		spin_lock_irqsave(&mp->slock, flags);
-		outb(inb(mp->ioaddr + UART_MCR) |
-			UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
-		spin_unlock_irqrestore(&mp->slock, flags);
+		tty_port_raise_dtr_rts(port);
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
 			if (port->flags & ASYNC_HUP_NOTIFY)
@@ -2458,6 +2466,7 @@ static const struct tty_operations mxser_ops = {
 
 struct tty_port_operations mxser_port_ops = {
 	.carrier_raised = mxser_carrier_raised,
+	.raise_dtr_rts = mxser_raise_dtr_rts,
 };
 
 /*
diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index 4a4110e703a5..f4d9c3993488 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -870,6 +870,13 @@ static int carrier_raised(struct tty_port *port)
 	return (sGetChanStatusLo(&info->channel) & CD_ACT) ? 1 : 0;
 }
 
+static void raise_dtr_rts(struct tty_port *port)
+{
+	struct r_port *info = container_of(port, struct r_port, port);
+	sSetDTR(&info->channel);
+	sSetRTS(&info->channel);
+}
+
 /*  info->port.count is considered critical, protected by spinlocks.  */
 static int block_til_ready(struct tty_struct *tty, struct file *filp,
 			   struct r_port *info)
@@ -928,10 +935,8 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	spin_unlock_irqrestore(&info->slock, flags);
 
 	while (1) {
-		if (tty->termios->c_cflag & CBAUD) {
-			sSetDTR(&info->channel);
-			sSetRTS(&info->channel);
-		}
+		if (tty->termios->c_cflag & CBAUD)
+			tty_port_raise_dtr_rts(port);
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (tty_hung_up_p(filp) || !(info->flags & ROCKET_INITIALIZED)) {
 			if (info->flags & ROCKET_HUP_NOTIFY)
@@ -2381,6 +2386,7 @@ static const struct tty_operations rocket_ops = {
 
 static const struct tty_port_operations rocket_port_ops = {
 	.carrier_raised = carrier_raised,
+	.raise_dtr_rts = raise_dtr_rts,
 };
 
 /*
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index fb2e6b5e0ef1..ac9f21e18c3f 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -3298,6 +3298,18 @@ static int carrier_raised(struct tty_port *port)
 	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
 }
 
+static void raise_dtr_rts(struct tty_port *port)
+{
+	struct mgsl_struct *info = container_of(port, struct mgsl_struct, port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&info->irq_spinlock,flags);
+	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+ 	usc_set_serial_signals(info);
+	spin_unlock_irqrestore(&info->irq_spinlock,flags);
+}
+
+
 /* block_til_ready()
  * 
  * 	Block the current process until the specified port
@@ -3358,12 +3370,8 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
 	port->blocked_open++;
 	
 	while (1) {
-		if (tty->termios->c_cflag & CBAUD) {
-			spin_lock_irqsave(&info->irq_spinlock,flags);
-			info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
-		 	usc_set_serial_signals(info);
-			spin_unlock_irqrestore(&info->irq_spinlock,flags);
-		}
+		if (tty->termios->c_cflag & CBAUD)
+			tty_port_raise_dtr_rts(port);
 		
 		set_current_state(TASK_INTERRUPTIBLE);
 		
@@ -4321,6 +4329,7 @@ static void mgsl_add_device( struct mgsl_struct *info )
 
 static const struct tty_port_operations mgsl_port_ops = {
 	.carrier_raised = carrier_raised,
+	.raise_dtr_rts = raise_dtr_rts,
 };
 
 
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 39ccaba8ca37..625c9bde3be8 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -3143,6 +3143,18 @@ static int carrier_raised(struct tty_port *port)
 	return (info->signals & SerialSignal_DCD) ? 1 : 0;
 }
 
+static void raise_dtr_rts(struct tty_port *port)
+{
+	unsigned long flags;
+	struct slgt_info *info = container_of(port, struct slgt_info, port);
+
+	spin_lock_irqsave(&info->lock,flags);
+	info->signals |= SerialSignal_RTS + SerialSignal_DTR;
+ 	set_signals(info);
+	spin_unlock_irqrestore(&info->lock,flags);
+}
+
+
 /*
  *  block current process until the device is ready to open
  */
@@ -3187,12 +3199,8 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	port->blocked_open++;
 
 	while (1) {
-		if ((tty->termios->c_cflag & CBAUD)) {
-			spin_lock_irqsave(&info->lock,flags);
-			info->signals |= SerialSignal_RTS + SerialSignal_DTR;
-		 	set_signals(info);
-			spin_unlock_irqrestore(&info->lock,flags);
-		}
+		if ((tty->termios->c_cflag & CBAUD))
+			tty_port_raise_dtr_rts(port);
 
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -3455,6 +3463,7 @@ static void add_device(struct slgt_info *info)
 
 static const struct tty_port_operations slgt_port_ops = {
 	.carrier_raised = carrier_raised,
+	.raise_dtr_rts = raise_dtr_rts,
 };
 
 /*
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index f54e40cbf023..0557b6384747 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -111,3 +111,19 @@ int tty_port_carrier_raised(struct tty_port *port)
 	return port->ops->carrier_raised(port);
 }
 EXPORT_SYMBOL(tty_port_carrier_raised);
+
+/**
+ *	tty_port_raise_dtr_rts	-	Riase DTR/RTS
+ *	@port: tty port
+ *
+ *	Wrapper for the DTR/RTS raise logic. For the moment this is used
+ *	to hide some internal details. This will eventually become entirely
+ *	internal to the tty port.
+ */
+
+void tty_port_raise_dtr_rts(struct tty_port *port)
+{
+	if (port->ops->raise_dtr_rts)
+		port->ops->raise_dtr_rts(port);
+}
+EXPORT_SYMBOL(tty_port_raise_dtr_rts);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index bc7bae78e22f..5001bbcacff6 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -185,6 +185,7 @@ struct tty_port;
 struct tty_port_operations {
 	/* Return 1 if the carrier is raised */
 	int (*carrier_raised)(struct tty_port *port);
+	void (*raise_dtr_rts)(struct tty_port *port);
 };
 	
 struct tty_port {
@@ -436,6 +437,7 @@ extern void tty_port_free_xmit_buf(struct tty_port *port);
 extern struct tty_struct *tty_port_tty_get(struct tty_port *port);
 extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty);
 extern int tty_port_carrier_raised(struct tty_port *port);
+extern void tty_port_raise_dtr_rts(struct tty_port *port);
 
 extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
-- 
cgit v1.2.3


From 3e61696bdc2103107674b06d0daf30b76193e922 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:45:26 +0000
Subject: isicom: redo locking to use tty port locks

This helps set the basis for moving block_til_ready into common code. We also
introduce a tty_port_hangup helper as this will also be generally needed.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/isicom.c     | 35 +++++++++++++++--------------------
 drivers/char/synclinkmp.c | 20 ++++++++++++++------
 drivers/char/tty_port.c   | 24 ++++++++++++++++++++++++
 include/linux/tty.h       |  1 +
 4 files changed, 54 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index a449449e301c..db53db91ae4a 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -841,7 +841,6 @@ static int isicom_carrier_raised(struct tty_port *port)
 static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	struct isi_port *ip)
 {
-	struct isi_board *card = ip->card;
 	struct tty_port *port = &ip->port;
 	int do_clocal = 0, retval;
 	unsigned long flags;
@@ -876,11 +875,11 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	retval = 0;
 	add_wait_queue(&port->open_wait, &wait);
 
-	spin_lock_irqsave(&card->card_lock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 	if (!tty_hung_up_p(filp))
 		port->count--;
 	port->blocked_open++;
-	spin_unlock_irqrestore(&card->card_lock, flags);
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	while (1) {
 		tty_port_raise_dtr_rts(port);
@@ -905,14 +904,13 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	}
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&port->open_wait, &wait);
-	spin_lock_irqsave(&card->card_lock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 	if (!tty_hung_up_p(filp))
 		port->count++;
 	port->blocked_open--;
-	spin_unlock_irqrestore(&card->card_lock, flags);
-	if (retval)
-		return retval;
-	port->flags |= ASYNC_NORMAL_ACTIVE;
+	if (retval == 0)
+		port->flags |= ASYNC_NORMAL_ACTIVE;
+	spin_unlock_irqrestore(&port->lock, flags);
 	return 0;
 }
 
@@ -1034,9 +1032,9 @@ static void isicom_close(struct tty_struct *tty, struct file *filp)
 
 	pr_dbg("Close start!!!.\n");
 
-	spin_lock_irqsave(&card->card_lock, flags);
+	spin_lock_irqsave(&port->port.lock, flags);
 	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&card->card_lock, flags);
+		spin_unlock_irqrestore(&port->port.lock, flags);
 		return;
 	}
 
@@ -1054,12 +1052,12 @@ static void isicom_close(struct tty_struct *tty, struct file *filp)
 	}
 
 	if (port->port.count) {
-		spin_unlock_irqrestore(&card->card_lock, flags);
+		spin_unlock_irqrestore(&port->port.lock, flags);
 		return;
 	}
 	port->port.flags |= ASYNC_CLOSING;
 	tty->closing = 1;
-	spin_unlock_irqrestore(&card->card_lock, flags);
+	spin_unlock_irqrestore(&port->port.lock, flags);
 
 	if (port->port.closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, port->port.closing_wait);
@@ -1076,22 +1074,22 @@ static void isicom_close(struct tty_struct *tty, struct file *filp)
 	isicom_flush_buffer(tty);
 	tty_ldisc_flush(tty);
 
-	spin_lock_irqsave(&card->card_lock, flags);
+	spin_lock_irqsave(&port->port.lock, flags);
 	tty->closing = 0;
 
 	if (port->port.blocked_open) {
-		spin_unlock_irqrestore(&card->card_lock, flags);
+		spin_unlock_irqrestore(&port->port.lock, flags);
 		if (port->port.close_delay) {
 			pr_dbg("scheduling until time out.\n");
 			msleep_interruptible(
 				jiffies_to_msecs(port->port.close_delay));
 		}
-		spin_lock_irqsave(&card->card_lock, flags);
+		spin_lock_irqsave(&port->port.lock, flags);
 		wake_up_interruptible(&port->port.open_wait);
 	}
 	port->port.flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_CLOSING);
 	wake_up_interruptible(&port->port.close_wait);
-	spin_unlock_irqrestore(&card->card_lock, flags);
+	spin_unlock_irqrestore(&port->port.lock, flags);
 }
 
 /* write et all */
@@ -1430,10 +1428,7 @@ static void isicom_hangup(struct tty_struct *tty)
 	isicom_shutdown_port(port);
 	spin_unlock_irqrestore(&port->card->card_lock, flags);
 
-	port->port.count = 0;
-	port->port.flags &= ~ASYNC_NORMAL_ACTIVE;
-	tty_port_tty_set(&port->port, NULL);
-	wake_up_interruptible(&port->port.open_wait);
+	tty_port_hangup(&port->port);
 }
 
 
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index fcf1ec77450d..1f5c21ec4b14 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -3331,6 +3331,17 @@ static int carrier_raised(struct tty_port *port)
 	return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
 }
 
+static void raise_dtr_rts(struct tty_port *port)
+{
+	SLMP_INFO *info = container_of(port, SLMP_INFO, port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&info->lock,flags);
+	info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+ 	set_signals(info);
+	spin_unlock_irqrestore(&info->lock,flags);
+}
+
 /* Block the current process until the specified port is ready to open.
  */
 static int block_til_ready(struct tty_struct *tty, struct file *filp,
@@ -3381,12 +3392,8 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 	port->blocked_open++;
 
 	while (1) {
-		if ((tty->termios->c_cflag & CBAUD)) {
-			spin_lock_irqsave(&info->lock,flags);
-			info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
-		 	set_signals(info);
-			spin_unlock_irqrestore(&info->lock,flags);
-		}
+		if (tty->termios->c_cflag & CBAUD)
+			tty_port_raise_dtr_rts(port);
 
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -3793,6 +3800,7 @@ static void add_device(SLMP_INFO *info)
 
 static const struct tty_port_operations port_ops = {
 	.carrier_raised = carrier_raised,
+	.raise_dtr_rts = raise_dtr_rts,
 };
 
 /* Allocate and initialize a device instance structure
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 0557b6384747..9f418bca4a22 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -7,6 +7,7 @@
 #include <linux/tty.h>
 #include <linux/tty_driver.h>
 #include <linux/tty_flip.h>
+#include <linux/serial.h>
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/slab.h>
@@ -95,6 +96,29 @@ void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty)
 }
 EXPORT_SYMBOL(tty_port_tty_set);
 
+/**
+ *	tty_port_hangup		-	hangup helper
+ *	@port: tty port
+ *
+ *	Perform port level tty hangup flag and count changes. Drop the tty
+ *	reference.
+ */
+
+void tty_port_hangup(struct tty_port *port)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	port->count = 0;
+	port->flags &= ~ASYNC_NORMAL_ACTIVE;
+	if (port->tty)
+		tty_kref_put(port->tty);
+	port->tty = NULL;
+	spin_unlock_irqrestore(&port->lock, flags);
+	wake_up_interruptible(&port->open_wait);
+}
+EXPORT_SYMBOL(tty_port_hangup);
+
 /**
  *	tty_port_carrier_raised	-	carrier raised check
  *	@port: tty port
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 5001bbcacff6..a1a93140e6e4 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -438,6 +438,7 @@ extern struct tty_struct *tty_port_tty_get(struct tty_port *port);
 extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty);
 extern int tty_port_carrier_raised(struct tty_port *port);
 extern void tty_port_raise_dtr_rts(struct tty_port *port);
+extern void tty_port_hangup(struct tty_port *port);
 
 extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
-- 
cgit v1.2.3


From 36c621d82b956ff6ff72273f848af53e6c581aba Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:46:10 +0000
Subject: tty: Introduce a tty_port generic block_til_ready

Start sucking more commonality out of the drivers into a single piece of
core code.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/isicom.c   |  79 +-----------------------------------
 drivers/char/mxser.c    |  71 +-------------------------------
 drivers/char/riscom8.c  |  86 +--------------------------------------
 drivers/char/synclink.c |   1 +
 drivers/char/tty_port.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/char/vme_scc.c  |   6 +--
 include/linux/tty.h     |   2 +
 7 files changed, 115 insertions(+), 235 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index db53db91ae4a..bac55cf44243 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -838,82 +838,6 @@ static int isicom_carrier_raised(struct tty_port *port)
 	return (ip->status & ISI_DCD)?1 : 0;
 }
 
-static int block_til_ready(struct tty_struct *tty, struct file *filp,
-	struct isi_port *ip)
-{
-	struct tty_port *port = &ip->port;
-	int do_clocal = 0, retval;
-	unsigned long flags;
-	DECLARE_WAITQUEUE(wait, current);
-	int cd;
-
-	/* block if port is in the process of being closed */
-
-	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
-		pr_dbg("block_til_ready: close in progress.\n");
-		interruptible_sleep_on(&port->close_wait);
-		if (port->flags & ASYNC_HUP_NOTIFY)
-			return -EAGAIN;
-		else
-			return -ERESTARTSYS;
-	}
-
-	/* if non-blocking mode is set ... */
-
-	if ((filp->f_flags & O_NONBLOCK) ||
-			(tty->flags & (1 << TTY_IO_ERROR))) {
-		pr_dbg("block_til_ready: non-block mode.\n");
-		port->flags |= ASYNC_NORMAL_ACTIVE;
-		return 0;
-	}
-
-	if (C_CLOCAL(tty))
-		do_clocal = 1;
-
-	/* block waiting for DCD to be asserted, and while
-						callout dev is busy */
-	retval = 0;
-	add_wait_queue(&port->open_wait, &wait);
-
-	spin_lock_irqsave(&port->lock, flags);
-	if (!tty_hung_up_p(filp))
-		port->count--;
-	port->blocked_open++;
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	while (1) {
-		tty_port_raise_dtr_rts(port);
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
-			if (port->flags & ASYNC_HUP_NOTIFY)
-				retval = -EAGAIN;
-			else
-				retval = -ERESTARTSYS;
-			break;
-		}
-		cd = tty_port_carrier_raised(port);
-		if (!(port->flags & ASYNC_CLOSING) &&
-				(do_clocal || cd))
-			break;
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-		schedule();
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->open_wait, &wait);
-	spin_lock_irqsave(&port->lock, flags);
-	if (!tty_hung_up_p(filp))
-		port->count++;
-	port->blocked_open--;
-	if (retval == 0)
-		port->flags |= ASYNC_NORMAL_ACTIVE;
-	spin_unlock_irqrestore(&port->lock, flags);
-	return 0;
-}
-
 static int isicom_open(struct tty_struct *tty, struct file *filp)
 {
 	struct isi_port *port;
@@ -940,12 +864,13 @@ static int isicom_open(struct tty_struct *tty, struct file *filp)
 
 	isicom_setup_board(card);
 
+	/* FIXME: locking on port.count etc */
 	port->port.count++;
 	tty->driver_data = port;
 	tty_port_tty_set(&port->port, tty);
 	error = isicom_setup_port(tty);
 	if (error == 0)
-		error = block_til_ready(tty, filp, port);
+		error = tty_port_block_til_ready(&port->port, tty, filp);
 	return error;
 }
 
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index e2471cf1ee95..08ba6eb1a380 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -558,75 +558,6 @@ static void mxser_raise_dtr_rts(struct tty_port *port)
 	spin_unlock_irqrestore(&mp->slock, flags);
 }
 
-static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp,
-		struct mxser_port *mp)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	int retval;
-	int do_clocal = 0;
-	unsigned long flags;
-	int cd;
-	struct tty_port *port = &mp->port;
-
-	/*
-	 * If non-blocking mode is set, or the port is not enabled,
-	 * then make the check up front and then exit.
-	 */
-	if ((filp->f_flags & O_NONBLOCK) ||
-			test_bit(TTY_IO_ERROR, &tty->flags)) {
-		port->flags |= ASYNC_NORMAL_ACTIVE;
-		return 0;
-	}
-
-	if (tty->termios->c_cflag & CLOCAL)
-		do_clocal = 1;
-
-	/*
-	 * Block waiting for the carrier detect and the line to become
-	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, port->count is dropped by one, so that
-	 * mxser_close() knows when to free things.  We restore it upon
-	 * exit, either normal or abnormal.
-	 */
-	retval = 0;
-	add_wait_queue(&port->open_wait, &wait);
-
-	spin_lock_irqsave(&port->lock, flags);
-	if (!tty_hung_up_p(filp))
-		port->count--;
-	port->blocked_open++;
-	spin_unlock_irqrestore(&port->lock, flags);
-	while (1) {
-		tty_port_raise_dtr_rts(port);
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
-			if (port->flags & ASYNC_HUP_NOTIFY)
-				retval = -EAGAIN;
-			else
-				retval = -ERESTARTSYS;
-			break;
-		}
-		cd = tty_port_carrier_raised(port);
-		if (!(port->flags & ASYNC_CLOSING) && (do_clocal || cd))
-			break;
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-		schedule();
-	}
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->open_wait, &wait);
-	spin_lock_irqsave(&port->lock, flags);
-	if (!tty_hung_up_p(filp))
-		port->count++;
-	port->blocked_open--;
-	if (retval == 0)
-		port->flags |= ASYNC_NORMAL_ACTIVE;
-	spin_unlock_irqrestore(&port->lock, flags);
-	return 0;
-}
-
 static int mxser_set_baud(struct tty_struct *tty, long newspd)
 {
 	struct mxser_port *info = tty->driver_data;
@@ -1110,7 +1041,7 @@ static int mxser_open(struct tty_struct *tty, struct file *filp)
 	if (retval)
 		return retval;
 
-	retval = mxser_block_til_ready(tty, filp, info);
+	retval = tty_port_block_til_ready(&info->port, tty, filp);
 	if (retval)
 		return retval;
 
diff --git a/drivers/char/riscom8.c b/drivers/char/riscom8.c
index 14662d7aa628..af34c2054a09 100644
--- a/drivers/char/riscom8.c
+++ b/drivers/char/riscom8.c
@@ -874,90 +874,6 @@ static int carrier_raised(struct tty_port *port)
 	return CD;
 }
 
-static int block_til_ready(struct tty_struct *tty, struct file *filp,
-			   struct riscom_port *rp)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	int    retval;
-	int    do_clocal = 0;
-	int    CD;
-	unsigned long flags;
-	struct tty_port *port = &rp->port;
-
-	/*
-	 * If the device is in the middle of being closed, then block
-	 * until it's done, and then try again.
-	 */
-	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&port->close_wait);
-		if (port->flags & ASYNC_HUP_NOTIFY)
-			return -EAGAIN;
-		else
-			return -ERESTARTSYS;
-	}
-
-	/*
-	 * If non-blocking mode is set, or the port is not enabled,
-	 * then make the check up front and then exit.
-	 */
-	if ((filp->f_flags & O_NONBLOCK) ||
-	    (tty->flags & (1 << TTY_IO_ERROR))) {
-		port->flags |= ASYNC_NORMAL_ACTIVE;
-		return 0;
-	}
-
-	if (C_CLOCAL(tty))
-		do_clocal = 1;
-
-	/*
-	 * Block waiting for the carrier detect and the line to become
-	 * free (i.e., not in use by the callout).  While we are in
-	 * this loop, info->count is dropped by one, so that
-	 * rs_close() knows when to free things.  We restore it upon
-	 * exit, either normal or abnormal.
-	 */
-	retval = 0;
-	add_wait_queue(&port->open_wait, &wait);
-
-	spin_lock_irqsave(&port->lock, flags);
-	if (!tty_hung_up_p(filp))
-		port->count--;
-	port->blocked_open++;
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	while (1) {
-
-		CD = tty_port_carrier_raised(port);
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (tty_hung_up_p(filp) ||
-		    !(port->flags & ASYNC_INITIALIZED)) {
-			if (port->flags & ASYNC_HUP_NOTIFY)
-				retval = -EAGAIN;
-			else
-				retval = -ERESTARTSYS;
-			break;
-		}
-		if (!(port->flags & ASYNC_CLOSING) &&
-		    (do_clocal || CD))
-			break;
-		if (signal_pending(current)) {
-			retval = -ERESTARTSYS;
-			break;
-		}
-		schedule();
-	}
-	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&port->open_wait, &wait);
-	spin_lock_irqsave(&port->lock, flags);
-	if (!tty_hung_up_p(filp))
-		port->count++;
-	port->blocked_open--;
-	if (retval == 0)
-		port->flags |= ASYNC_NORMAL_ACTIVE;
-	spin_unlock_irqrestore(&port->lock, flags);
-	return 0;
-}
-
 static int rc_open(struct tty_struct *tty, struct file *filp)
 {
 	int board;
@@ -984,7 +900,7 @@ static int rc_open(struct tty_struct *tty, struct file *filp)
 
 	error = rc_setup_port(bp, port);
 	if (error == 0)
-		error = block_til_ready(tty, filp, port);
+		error = tty_port_block_til_ready(&port->port, tty, filp);
 	return error;
 }
 
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index ac9f21e18c3f..0ded4ed3da3c 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -3401,6 +3401,7 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&port->open_wait, &wait);
 	
+	/* FIXME: Racy on hangup during close wait */
 	if (extra_count)
 		port->count++;
 	port->blocked_open--;
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 9f418bca4a22..ff94182b3813 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -151,3 +151,108 @@ void tty_port_raise_dtr_rts(struct tty_port *port)
 		port->ops->raise_dtr_rts(port);
 }
 EXPORT_SYMBOL(tty_port_raise_dtr_rts);
+
+/**
+ *	tty_port_block_til_ready	-	Waiting logic for tty open
+ *	@port: the tty port being opened
+ *	@tty: the tty device being bound
+ *	@filp: the file pointer of the opener
+ *
+ *	Implement the core POSIX/SuS tty behaviour when opening a tty device.
+ *	Handles:
+ *		- hangup (both before and during)
+ *		- non blocking open
+ *		- rts/dtr/dcd
+ *		- signals
+ *		- port flags and counts
+ *
+ *	The passed tty_port must implement the carrier_raised method if it can
+ *	do carrier detect and the raise_dtr_rts method if it supports software
+ *	management of these lines. Note that the dtr/rts raise is done each
+ *	iteration as a hangup may have previously dropped them while we wait.
+ */
+ 
+int tty_port_block_til_ready(struct tty_port *port,
+				struct tty_struct *tty, struct file *filp)
+{
+	int do_clocal = 0, retval;
+	unsigned long flags;
+	DECLARE_WAITQUEUE(wait, current);
+	int cd;
+
+	/* block if port is in the process of being closed */
+	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
+		interruptible_sleep_on(&port->close_wait);
+		if (port->flags & ASYNC_HUP_NOTIFY)
+			return -EAGAIN;
+		else
+			return -ERESTARTSYS;
+	}
+
+	/* if non-blocking mode is set we can pass directly to open unless
+	   the port has just hung up or is in another error state */
+	if ((filp->f_flags & O_NONBLOCK) ||
+			(tty->flags & (1 << TTY_IO_ERROR))) {
+		port->flags |= ASYNC_NORMAL_ACTIVE;
+		return 0;
+	}
+
+	if (C_CLOCAL(tty))
+		do_clocal = 1;
+
+	/* Block waiting until we can proceed. We may need to wait for the
+	   carrier, but we must also wait for any close that is in progress
+	   before the next open may complete */
+
+	retval = 0;
+	add_wait_queue(&port->open_wait, &wait);
+
+	/* The port lock protects the port counts */
+	spin_lock_irqsave(&port->lock, flags);
+	if (!tty_hung_up_p(filp))
+		port->count--;
+	port->blocked_open++;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	while (1) {
+		/* Indicate we are open */
+		tty_port_raise_dtr_rts(port);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		/* Check for a hangup or uninitialised port. Return accordingly */
+		if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
+			if (port->flags & ASYNC_HUP_NOTIFY)
+				retval = -EAGAIN;
+			else
+				retval = -ERESTARTSYS;
+			break;
+		}
+		/* Probe the carrier. For devices with no carrier detect this
+		   will always return true */
+		cd = tty_port_carrier_raised(port);
+		if (!(port->flags & ASYNC_CLOSING) &&
+				(do_clocal || cd))
+			break;
+		if (signal_pending(current)) {
+			retval = -ERESTARTSYS;
+			break;
+		}
+		schedule();
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&port->open_wait, &wait);
+
+	/* Update counts. A parallel hangup will have set count to zero and
+	   we must not mess that up further */
+	spin_lock_irqsave(&port->lock, flags);
+	if (!tty_hung_up_p(filp))
+		port->count++;
+	port->blocked_open--;
+	if (retval == 0)
+		port->flags |= ASYNC_NORMAL_ACTIVE;
+	spin_unlock_irqrestore(&port->lock, flags);
+	return 0;
+	
+}
+EXPORT_SYMBOL(tty_port_block_til_ready);
+
diff --git a/drivers/char/vme_scc.c b/drivers/char/vme_scc.c
index d4e1534c0e03..2d9242a45a0d 100644
--- a/drivers/char/vme_scc.c
+++ b/drivers/char/vme_scc.c
@@ -631,8 +631,8 @@ static void scc_enable_rx_interrupts(void *ptr)
 
 static int scc_carrier_raised(struct tty_port *port)
 {
-	struct scc_port *scc = container_of(port, struct scc_port, gs.port);
-	unsigned channel = port->channel;
+	struct scc_port *sc = container_of(port, struct scc_port, gs.port);
+	unsigned channel = sc->channel;
 
 	return !!(scc_last_status_reg[channel] & SR_DCD);
 }
@@ -643,7 +643,7 @@ static void scc_shutdown_port(void *ptr)
 	struct scc_port *port = ptr;
 
 	port->gs.port.flags &= ~ GS_ACTIVE;
-	if (port->gs.port.tty && port->gs.port.tty->termios->c_cflag & HUPCL) {
+	if (port->gs.port.tty && (port->gs.port.tty->termios->c_cflag & HUPCL)) {
 		scc_setsignals (port, 0, 0);
 	}
 }
diff --git a/include/linux/tty.h b/include/linux/tty.h
index a1a93140e6e4..61a0ab32cf11 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -439,6 +439,8 @@ extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty);
 extern int tty_port_carrier_raised(struct tty_port *port);
 extern void tty_port_raise_dtr_rts(struct tty_port *port);
 extern void tty_port_hangup(struct tty_port *port);
+extern int tty_port_block_til_ready(struct tty_port *port,
+				struct tty_struct *tty, struct file *filp);
 
 extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
-- 
cgit v1.2.3


From 2a6eadbd5a2ae8f458e421f3614f1ad13c0f9a1c Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:46:18 +0000
Subject: tty: Rework istallion to use the tty port changes

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/istallion.c  | 159 +++++++++++++---------------------------------
 include/linux/istallion.h |   1 -
 2 files changed, 43 insertions(+), 117 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index c4682f9e34bb..4c69ab97339a 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -626,8 +626,6 @@ static int	stli_hostcmd(struct stlibrd *brdp, struct stliport *portp);
 static int	stli_initopen(struct tty_struct *tty, struct stlibrd *brdp, struct stliport *portp);
 static int	stli_rawopen(struct stlibrd *brdp, struct stliport *portp, unsigned long arg, int wait);
 static int	stli_rawclose(struct stlibrd *brdp, struct stliport *portp, unsigned long arg, int wait);
-static int	stli_waitcarrier(struct tty_struct *tty, struct stlibrd *brdp,
-				struct stliport *portp, struct file *filp);
 static int	stli_setport(struct tty_struct *tty);
 static int	stli_cmdwait(struct stlibrd *brdp, struct stliport *portp, unsigned long cmd, void *arg, int size, int copyback);
 static void	stli_sendcmd(struct stlibrd *brdp, struct stliport *portp, unsigned long cmd, void *arg, int size, int copyback);
@@ -787,6 +785,7 @@ static int stli_open(struct tty_struct *tty, struct file *filp)
 {
 	struct stlibrd *brdp;
 	struct stliport *portp;
+	struct tty_port *port;
 	unsigned int minordev, brdnr, portnr;
 	int rc;
 
@@ -808,30 +807,19 @@ static int stli_open(struct tty_struct *tty, struct file *filp)
 		return -ENODEV;
 	if (portp->devnr < 1)
 		return -ENODEV;
-
-
-/*
- *	Check if this port is in the middle of closing. If so then wait
- *	until it is closed then return error status based on flag settings.
- *	The sleep here does not need interrupt protection since the wakeup
- *	for it is done with the same context.
- */
-	if (portp->port.flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&portp->port.close_wait);
-		if (portp->port.flags & ASYNC_HUP_NOTIFY)
-			return -EAGAIN;
-		return -ERESTARTSYS;
-	}
+	port = &portp->port;
 
 /*
  *	On the first open of the device setup the port hardware, and
  *	initialize the per port data structure. Since initializing the port
  *	requires several commands to the board we will need to wait for any
  *	other open that is already initializing the port.
+ *
+ *	Review - locking
  */
-	tty_port_tty_set(&portp->port, tty);
+	tty_port_tty_set(port, tty);
 	tty->driver_data = portp;
-	portp->port.count++;
+	port->count++;
 
 	wait_event_interruptible(portp->raw_wait,
 			!test_bit(ST_INITIALIZING, &portp->state));
@@ -841,7 +829,8 @@ static int stli_open(struct tty_struct *tty, struct file *filp)
 	if ((portp->port.flags & ASYNC_INITIALIZED) == 0) {
 		set_bit(ST_INITIALIZING, &portp->state);
 		if ((rc = stli_initopen(tty, brdp, portp)) >= 0) {
-			portp->port.flags |= ASYNC_INITIALIZED;
+			/* Locking */
+			port->flags |= ASYNC_INITIALIZED;
 			clear_bit(TTY_IO_ERROR, &tty->flags);
 		}
 		clear_bit(ST_INITIALIZING, &portp->state);
@@ -849,31 +838,7 @@ static int stli_open(struct tty_struct *tty, struct file *filp)
 		if (rc < 0)
 			return rc;
 	}
-
-/*
- *	Check if this port is in the middle of closing. If so then wait
- *	until it is closed then return error status, based on flag settings.
- *	The sleep here does not need interrupt protection since the wakeup
- *	for it is done with the same context.
- */
-	if (portp->port.flags & ASYNC_CLOSING) {
-		interruptible_sleep_on(&portp->port.close_wait);
-		if (portp->port.flags & ASYNC_HUP_NOTIFY)
-			return -EAGAIN;
-		return -ERESTARTSYS;
-	}
-
-/*
- *	Based on type of open being done check if it can overlap with any
- *	previous opens still in effect. If we are a normal serial device
- *	then also we might have to wait for carrier.
- */
-	if (!(filp->f_flags & O_NONBLOCK)) {
-		if ((rc = stli_waitcarrier(tty, brdp, portp, filp)) != 0)
-			return rc;
-	}
-	portp->port.flags |= ASYNC_NORMAL_ACTIVE;
-	return 0;
+	return tty_port_block_til_ready(&portp->port, tty, filp);
 }
 
 /*****************************************************************************/
@@ -882,25 +847,29 @@ static void stli_close(struct tty_struct *tty, struct file *filp)
 {
 	struct stlibrd *brdp;
 	struct stliport *portp;
+	struct tty_port *port;
 	unsigned long flags;
 
 	portp = tty->driver_data;
 	if (portp == NULL)
 		return;
+	port = &portp->port;
 
-	spin_lock_irqsave(&stli_lock, flags);
+	spin_lock_irqsave(&port->lock, flags);
 	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&stli_lock, flags);
+		spin_unlock_irqrestore(&port->lock, flags);
 		return;
 	}
-	if ((tty->count == 1) && (portp->port.count != 1))
-		portp->port.count = 1;
-	if (portp->port.count-- > 1) {
-		spin_unlock_irqrestore(&stli_lock, flags);
+	if (tty->count == 1 && port->count != 1)
+		port->count = 1;
+	if (port->count-- > 1) {
+		spin_unlock_irqrestore(&port->lock, flags);
 		return;
 	}
 
-	portp->port.flags |= ASYNC_CLOSING;
+	port->flags |= ASYNC_CLOSING;
+	tty->closing = 1;
+	spin_unlock_irqrestore(&port->lock, flags);
 
 /*
  *	May want to wait for data to drain before closing. The BUSY flag
@@ -908,15 +877,17 @@ static void stli_close(struct tty_struct *tty, struct file *filp)
  *	updated by messages from the slave - indicating when all chars
  *	really have drained.
  */
+ 	spin_lock_irqsave(&stli_lock, flags);
 	if (tty == stli_txcooktty)
 		stli_flushchars(tty);
-	tty->closing = 1;
 	spin_unlock_irqrestore(&stli_lock, flags);
 
 	if (portp->closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, portp->closing_wait);
 
-	portp->port.flags &= ~ASYNC_INITIALIZED;
+	/* FIXME: port locking here needs attending to */
+	port->flags &= ~ASYNC_INITIALIZED;
+
 	brdp = stli_brds[portp->brdnr];
 	stli_rawclose(brdp, portp, 0, 0);
 	if (tty->termios->c_cflag & HUPCL) {
@@ -937,14 +908,14 @@ static void stli_close(struct tty_struct *tty, struct file *filp)
 	tty->closing = 0;
 	tty_port_tty_set(&portp->port, NULL);
 
-	if (portp->openwaitcnt) {
+	if (port->blocked_open) {
 		if (portp->close_delay)
 			msleep_interruptible(jiffies_to_msecs(portp->close_delay));
-		wake_up_interruptible(&portp->port.open_wait);
+		wake_up_interruptible(&port->open_wait);
 	}
 
-	portp->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-	wake_up_interruptible(&portp->port.close_wait);
+	port->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
+	wake_up_interruptible(&port->close_wait);
 }
 
 /*****************************************************************************/
@@ -1189,63 +1160,17 @@ static int stli_carrier_raised(struct tty_port *port)
 	return (portp->sigs & TIOCM_CD) ? 1 : 0;
 }
 
-/*
- *	Possibly need to wait for carrier (DCD signal) to come high. Say
- *	maybe because if we are clocal then we don't need to wait...
- */
-
-static int stli_waitcarrier(struct tty_struct *tty, struct stlibrd *brdp,
-				struct stliport *portp, struct file *filp)
+static void stli_raise_dtr_rts(struct tty_port *port)
 {
-	unsigned long flags;
-	int rc, doclocal;
-	struct tty_port *port = &portp->port;
-
-	rc = 0;
-	doclocal = 0;
-
-	if (tty->termios->c_cflag & CLOCAL)
-		doclocal++;
-
-	spin_lock_irqsave(&stli_lock, flags);
-	portp->openwaitcnt++;
-	if (! tty_hung_up_p(filp))
-		port->count--;
-	spin_unlock_irqrestore(&stli_lock, flags);
-
-	for (;;) {
-		stli_mkasysigs(&portp->asig, 1, 1);
-		if ((rc = stli_cmdwait(brdp, portp, A_SETSIGNALS,
-		    &portp->asig, sizeof(asysigs_t), 0)) < 0)
-			break;
-		if (tty_hung_up_p(filp) ||
-		    ((port->flags & ASYNC_INITIALIZED) == 0)) {
-			if (port->flags & ASYNC_HUP_NOTIFY)
-				rc = -EBUSY;
-			else
-				rc = -ERESTARTSYS;
-			break;
-		}
-		if (((port->flags & ASYNC_CLOSING) == 0) &&
-		    (doclocal || tty_port_carrier_raised(port))) {
-			break;
-		}
-		if (signal_pending(current)) {
-			rc = -ERESTARTSYS;
-			break;
-		}
-		interruptible_sleep_on(&port->open_wait);
-	}
-
-	spin_lock_irqsave(&stli_lock, flags);
-	if (! tty_hung_up_p(filp))
-		port->count++;
-	portp->openwaitcnt--;
-	spin_unlock_irqrestore(&stli_lock, flags);
-
-	return rc;
+	struct stliport *portp = container_of(port, struct stliport, port);
+	struct stlibrd *brdp = stli_brds[portp->brdnr];
+	stli_mkasysigs(&portp->asig, 1, 1);
+	if (stli_cmdwait(brdp, portp, A_SETSIGNALS, &portp->asig,
+		sizeof(asysigs_t), 0) < 0)
+			printk(KERN_WARNING "istallion: dtr raise failed.\n");
 }
 
+
 /*****************************************************************************/
 
 /*
@@ -1828,6 +1753,7 @@ static void stli_hangup(struct tty_struct *tty)
 {
 	struct stliport *portp;
 	struct stlibrd *brdp;
+	struct tty_port *port;
 	unsigned long flags;
 
 	portp = tty->driver_data;
@@ -1838,8 +1764,11 @@ static void stli_hangup(struct tty_struct *tty)
 	brdp = stli_brds[portp->brdnr];
 	if (brdp == NULL)
 		return;
+	port = &portp->port;
 
-	portp->port.flags &= ~ASYNC_INITIALIZED;
+	spin_lock_irqsave(&port->lock, flags);
+	port->flags &= ~ASYNC_INITIALIZED;
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	if (!test_bit(ST_CLOSING, &portp->state))
 		stli_rawclose(brdp, portp, 0, 0);
@@ -1860,12 +1789,9 @@ static void stli_hangup(struct tty_struct *tty)
 	clear_bit(ST_TXBUSY, &portp->state);
 	clear_bit(ST_RXSTOP, &portp->state);
 	set_bit(TTY_IO_ERROR, &tty->flags);
-	tty_port_tty_set(&portp->port, NULL);
-	portp->port.flags &= ~ASYNC_NORMAL_ACTIVE;
-	portp->port.count = 0;
 	spin_unlock_irqrestore(&stli_lock, flags);
 
-	wake_up_interruptible(&portp->port.open_wait);
+	tty_port_hangup(port);
 }
 
 /*****************************************************************************/
@@ -4528,6 +4454,7 @@ static const struct tty_operations stli_ops = {
 
 static const struct tty_port_operations stli_port_ops = {
 	.carrier_raised = stli_carrier_raised,
+	.raise_dtr_rts = stli_raise_dtr_rts,
 };
 
 /*****************************************************************************/
diff --git a/include/linux/istallion.h b/include/linux/istallion.h
index 0d1840723249..053d5aea925c 100644
--- a/include/linux/istallion.h
+++ b/include/linux/istallion.h
@@ -61,7 +61,6 @@ struct stliport {
 	int			custom_divisor;
 	int			close_delay;
 	int			closing_wait;
-	int			openwaitcnt;
 	int			rc;
 	int			argsize;
 	void			*argp;
-- 
cgit v1.2.3


From a6614999e800cf3a134ce93ea46ef837e3c0e76e Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:46:50 +0000
Subject: tty: Introduce some close helpers for ports

Again this is a lot of common code we can unify

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/isicom.c      | 66 ++++++---------------------------------
 drivers/char/istallion.c   | 78 +++++++++++++++++-----------------------------
 drivers/char/mxser.c       | 56 ++++++---------------------------
 drivers/char/riscom8.c     | 49 +++--------------------------
 drivers/char/stallion.c    | 39 +++--------------------
 drivers/char/synclink.c    | 58 ++--------------------------------
 drivers/char/synclink_gt.c | 51 ++----------------------------
 drivers/char/synclinkmp.c  | 58 ++--------------------------------
 drivers/char/tty_port.c    | 58 ++++++++++++++++++++++++++++++++++
 include/linux/istallion.h  |  1 -
 include/linux/tty.h        |  3 ++
 11 files changed, 126 insertions(+), 391 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index bac55cf44243..24aa6e88e223 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -945,76 +945,30 @@ static void isicom_flush_buffer(struct tty_struct *tty)
 
 static void isicom_close(struct tty_struct *tty, struct file *filp)
 {
-	struct isi_port *port = tty->driver_data;
+	struct isi_port *ip = tty->driver_data;
+	struct tty_port *port = &ip->port;
 	struct isi_board *card;
 	unsigned long flags;
 
-	if (!port)
-		return;
-	card = port->card;
-	if (isicom_paranoia_check(port, tty->name, "isicom_close"))
-		return;
-
-	pr_dbg("Close start!!!.\n");
-
-	spin_lock_irqsave(&port->port.lock, flags);
-	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&port->port.lock, flags);
-		return;
-	}
-
-	if (tty->count == 1 && port->port.count != 1) {
-		printk(KERN_WARNING "ISICOM:(0x%lx) isicom_close: bad port "
-			"count tty->count = 1 port count = %d.\n",
-			card->base, port->port.count);
-		port->port.count = 1;
-	}
-	if (--port->port.count < 0) {
-		printk(KERN_WARNING "ISICOM:(0x%lx) isicom_close: bad port "
-			"count for channel%d = %d", card->base, port->channel,
-			port->port.count);
-		port->port.count = 0;
-	}
+	BUG_ON(!ip);
 
-	if (port->port.count) {
-		spin_unlock_irqrestore(&port->port.lock, flags);
+	card = ip->card;
+	if (isicom_paranoia_check(ip, tty->name, "isicom_close"))
 		return;
-	}
-	port->port.flags |= ASYNC_CLOSING;
-	tty->closing = 1;
-	spin_unlock_irqrestore(&port->port.lock, flags);
 
-	if (port->port.closing_wait != ASYNC_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, port->port.closing_wait);
 	/* indicate to the card that no more data can be received
 	   on this port */
 	spin_lock_irqsave(&card->card_lock, flags);
-	if (port->port.flags & ASYNC_INITIALIZED) {
-		card->port_status &= ~(1 << port->channel);
+	if (port->flags & ASYNC_INITIALIZED) {
+		card->port_status &= ~(1 << ip->channel);
 		outw(card->port_status, card->base + 0x02);
 	}
-	isicom_shutdown_port(port);
+	isicom_shutdown_port(ip);
 	spin_unlock_irqrestore(&card->card_lock, flags);
 
 	isicom_flush_buffer(tty);
-	tty_ldisc_flush(tty);
-
-	spin_lock_irqsave(&port->port.lock, flags);
-	tty->closing = 0;
-
-	if (port->port.blocked_open) {
-		spin_unlock_irqrestore(&port->port.lock, flags);
-		if (port->port.close_delay) {
-			pr_dbg("scheduling until time out.\n");
-			msleep_interruptible(
-				jiffies_to_msecs(port->port.close_delay));
-		}
-		spin_lock_irqsave(&port->port.lock, flags);
-		wake_up_interruptible(&port->port.open_wait);
-	}
-	port->port.flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_CLOSING);
-	wake_up_interruptible(&port->port.close_wait);
-	spin_unlock_irqrestore(&port->port.lock, flags);
+	
+	tty_port_close_end(port, tty);
 }
 
 /* write et all */
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index 4c69ab97339a..5c3dc6b8411c 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -767,7 +767,7 @@ static int stli_parsebrd(struct stlconf *confp, char **argp)
 			break;
 	}
 	if (i == ARRAY_SIZE(stli_brdstr)) {
-		printk("STALLION: unknown board name, %s?\n", argp[0]);
+		printk(KERN_WARNING "istallion: unknown board name, %s?\n", argp[0]);
 		return 0;
 	}
 
@@ -855,21 +855,8 @@ static void stli_close(struct tty_struct *tty, struct file *filp)
 		return;
 	port = &portp->port;
 
-	spin_lock_irqsave(&port->lock, flags);
-	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&port->lock, flags);
-		return;
-	}
-	if (tty->count == 1 && port->count != 1)
-		port->count = 1;
-	if (port->count-- > 1) {
-		spin_unlock_irqrestore(&port->lock, flags);
+	if (tty_port_close_start(port, tty, filp) == 0)
 		return;
-	}
-
-	port->flags |= ASYNC_CLOSING;
-	tty->closing = 1;
-	spin_unlock_irqrestore(&port->lock, flags);
 
 /*
  *	May want to wait for data to drain before closing. The BUSY flag
@@ -882,6 +869,8 @@ static void stli_close(struct tty_struct *tty, struct file *filp)
 		stli_flushchars(tty);
 	spin_unlock_irqrestore(&stli_lock, flags);
 
+	/* We end up doing this twice for the moment. This needs looking at
+	   eventually. Note we still use portp->closing_wait as a result */
 	if (portp->closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, portp->closing_wait);
 
@@ -905,17 +894,8 @@ static void stli_close(struct tty_struct *tty, struct file *filp)
 	set_bit(ST_DOFLUSHRX, &portp->state);
 	stli_flushbuffer(tty);
 
-	tty->closing = 0;
-	tty_port_tty_set(&portp->port, NULL);
-
-	if (port->blocked_open) {
-		if (portp->close_delay)
-			msleep_interruptible(jiffies_to_msecs(portp->close_delay));
-		wake_up_interruptible(&port->open_wait);
-	}
-
-	port->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-	wake_up_interruptible(&port->close_wait);
+	tty_port_close_end(port, tty);
+	tty_port_tty_set(port, NULL);
 }
 
 /*****************************************************************************/
@@ -1482,7 +1462,7 @@ static int stli_getserial(struct stliport *portp, struct serial_struct __user *s
 	sio.irq = 0;
 	sio.flags = portp->port.flags;
 	sio.baud_base = portp->baud_base;
-	sio.close_delay = portp->close_delay;
+	sio.close_delay = portp->port.close_delay;
 	sio.closing_wait = portp->closing_wait;
 	sio.custom_divisor = portp->custom_divisor;
 	sio.xmit_fifo_size = 0;
@@ -1514,7 +1494,7 @@ static int stli_setserial(struct tty_struct *tty, struct serial_struct __user *s
 		return -EFAULT;
 	if (!capable(CAP_SYS_ADMIN)) {
 		if ((sio.baud_base != portp->baud_base) ||
-		    (sio.close_delay != portp->close_delay) ||
+		    (sio.close_delay != portp->port.close_delay) ||
 		    ((sio.flags & ~ASYNC_USR_MASK) !=
 		    (portp->port.flags & ~ASYNC_USR_MASK)))
 			return -EPERM;
@@ -1523,7 +1503,7 @@ static int stli_setserial(struct tty_struct *tty, struct serial_struct __user *s
 	portp->port.flags = (portp->port.flags & ~ASYNC_USR_MASK) |
 		(sio.flags & ASYNC_USR_MASK);
 	portp->baud_base = sio.baud_base;
-	portp->close_delay = sio.close_delay;
+	portp->port.close_delay = sio.close_delay;
 	portp->closing_wait = sio.closing_wait;
 	portp->custom_divisor = sio.custom_divisor;
 
@@ -2065,7 +2045,7 @@ static void __stli_sendcmd(struct stlibrd *brdp, struct stliport *portp, unsigne
 	unsigned char __iomem *bits;
 
 	if (test_bit(ST_CMDING, &portp->state)) {
-		printk(KERN_ERR "STALLION: command already busy, cmd=%x!\n",
+		printk(KERN_ERR "istallion: command already busy, cmd=%x!\n",
 				(int) cmd);
 		return;
 	}
@@ -2625,7 +2605,7 @@ static int stli_initports(struct stlibrd *brdp)
 	for (i = 0, panelnr = 0, panelport = 0; (i < brdp->nrports); i++) {
 		portp = kzalloc(sizeof(struct stliport), GFP_KERNEL);
 		if (!portp) {
-			printk("STALLION: failed to allocate port structure\n");
+			printk(KERN_WARNING "istallion: failed to allocate port structure\n");
 			continue;
 		}
 		tty_port_init(&portp->port);
@@ -2635,7 +2615,7 @@ static int stli_initports(struct stlibrd *brdp)
 		portp->brdnr = brdp->brdnr;
 		portp->panelnr = panelnr;
 		portp->baud_base = STL_BAUDBASE;
-		portp->close_delay = STL_CLOSEDELAY;
+		portp->port.close_delay = STL_CLOSEDELAY;
 		portp->closing_wait = 30 * HZ;
 		init_waitqueue_head(&portp->port.open_wait);
 		init_waitqueue_head(&portp->port.close_wait);
@@ -2692,7 +2672,7 @@ static void __iomem *stli_ecpgetmemptr(struct stlibrd *brdp, unsigned long offse
 	unsigned char val;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), brd=%d\n",
 			(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -2766,7 +2746,7 @@ static void __iomem *stli_ecpeigetmemptr(struct stlibrd *brdp, unsigned long off
 	unsigned char	val;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), brd=%d\n",
 			(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -2818,7 +2798,7 @@ static void __iomem *stli_ecpmcgetmemptr(struct stlibrd *brdp, unsigned long off
 	unsigned char val;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), brd=%d\n",
 			(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -2863,7 +2843,7 @@ static void __iomem *stli_ecppcigetmemptr(struct stlibrd *brdp, unsigned long of
 	unsigned char	val;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), board=%d\n",
 				(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -2928,7 +2908,7 @@ static void __iomem *stli_onbgetmemptr(struct stlibrd *brdp, unsigned long offse
 	void __iomem *ptr;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), brd=%d\n",
 				(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -2994,7 +2974,7 @@ static void __iomem *stli_onbegetmemptr(struct stlibrd *brdp, unsigned long offs
 	unsigned char val;
 
 	if (offset > brdp->memsize) {
-		printk(KERN_ERR "STALLION: shared memory pointer=%x out of "
+		printk(KERN_ERR "istallion: shared memory pointer=%x out of "
 				"range at line=%d(%d), brd=%d\n",
 			(int) offset, line, __LINE__, brdp->brdnr);
 		ptr = NULL;
@@ -3433,7 +3413,7 @@ static int stli_startbrd(struct stlibrd *brdp)
 #endif
 
 	if (nrdevs < (brdp->nrports + 1)) {
-		printk(KERN_ERR "STALLION: slave failed to allocate memory for "
+		printk(KERN_ERR "istallion: slave failed to allocate memory for "
 				"all devices, devices=%d\n", nrdevs);
 		brdp->nrports = nrdevs - 1;
 	}
@@ -3443,13 +3423,13 @@ static int stli_startbrd(struct stlibrd *brdp)
 	brdp->bitsize = (nrdevs + 7) / 8;
 	memoff = readl(&hdrp->memp);
 	if (memoff > brdp->memsize) {
-		printk(KERN_ERR "STALLION: corrupted shared memory region?\n");
+		printk(KERN_ERR "istallion: corrupted shared memory region?\n");
 		rc = -EIO;
 		goto stli_donestartup;
 	}
 	memp = (cdkmem_t __iomem *) EBRDGETMEMPTR(brdp, memoff);
 	if (readw(&memp->dtype) != TYP_ASYNCTRL) {
-		printk(KERN_ERR "STALLION: no slave control device found\n");
+		printk(KERN_ERR "istallion: no slave control device found\n");
 		goto stli_donestartup;
 	}
 	memp++;
@@ -3534,7 +3514,7 @@ static int __devinit stli_brdinit(struct stlibrd *brdp)
 		retval = stli_initonb(brdp);
 		break;
 	default:
-		printk(KERN_ERR "STALLION: board=%d is unknown board "
+		printk(KERN_ERR "istallion: board=%d is unknown board "
 				"type=%d\n", brdp->brdnr, brdp->brdtype);
 		retval = -ENODEV;
 	}
@@ -3543,7 +3523,7 @@ static int __devinit stli_brdinit(struct stlibrd *brdp)
 		return retval;
 
 	stli_initports(brdp);
-	printk(KERN_INFO "STALLION: %s found, board=%d io=%x mem=%x "
+	printk(KERN_INFO "istallion: %s found, board=%d io=%x mem=%x "
 		"nrpanels=%d nrports=%d\n", stli_brdnames[brdp->brdtype],
 		brdp->brdnr, brdp->iobase, (int) brdp->memaddr,
 		brdp->nrpanels, brdp->nrports);
@@ -3637,7 +3617,7 @@ static int stli_eisamemprobe(struct stlibrd *brdp)
 	if (! foundit) {
 		brdp->memaddr = 0;
 		brdp->membase = NULL;
-		printk(KERN_ERR "STALLION: failed to probe shared memory "
+		printk(KERN_ERR "istallion: failed to probe shared memory "
 				"region for %s in EISA slot=%d\n",
 			stli_brdnames[brdp->brdtype], (brdp->iobase >> 12));
 		return -ENODEV;
@@ -3782,7 +3762,7 @@ static int __devinit stli_pciprobe(struct pci_dev *pdev,
 	mutex_lock(&stli_brdslock);
 	brdnr = stli_getbrdnr();
 	if (brdnr < 0) {
-		printk(KERN_INFO "STALLION: too many boards found, "
+		printk(KERN_INFO "istallion: too many boards found, "
 			"maximum supported %d\n", STL_MAXBRDS);
 		mutex_unlock(&stli_brdslock);
 		retval = -EIO;
@@ -3854,7 +3834,7 @@ static struct stlibrd *stli_allocbrd(void)
 
 	brdp = kzalloc(sizeof(struct stlibrd), GFP_KERNEL);
 	if (!brdp) {
-		printk(KERN_ERR "STALLION: failed to allocate memory "
+		printk(KERN_ERR "istallion: failed to allocate memory "
 				"(size=%Zd)\n", sizeof(struct stlibrd));
 		return NULL;
 	}
@@ -4493,7 +4473,7 @@ static int __init istallion_module_init(void)
 
 	stli_txcookbuf = kmalloc(STLI_TXBUFSIZE, GFP_KERNEL);
 	if (!stli_txcookbuf) {
-		printk(KERN_ERR "STALLION: failed to allocate memory "
+		printk(KERN_ERR "istallion: failed to allocate memory "
 				"(size=%d)\n", STLI_TXBUFSIZE);
 		retval = -ENOMEM;
 		goto err;
@@ -4518,7 +4498,7 @@ static int __init istallion_module_init(void)
 
 	retval = tty_register_driver(stli_serial);
 	if (retval) {
-		printk(KERN_ERR "STALLION: failed to register serial driver\n");
+		printk(KERN_ERR "istallion: failed to register serial driver\n");
 		goto err_ttyput;
 	}
 
@@ -4532,7 +4512,7 @@ static int __init istallion_module_init(void)
  */
 	retval = register_chrdev(STL_SIOMEMMAJOR, "staliomem", &stli_fsiomem);
 	if (retval) {
-		printk(KERN_ERR "STALLION: failed to register serial memory "
+		printk(KERN_ERR "istallion: failed to register serial memory "
 				"device\n");
 		goto err_deinit;
 	}
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 08ba6eb1a380..402c9f217f83 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -1080,57 +1080,26 @@ static void mxser_flush_buffer(struct tty_struct *tty)
 static void mxser_close(struct tty_struct *tty, struct file *filp)
 {
 	struct mxser_port *info = tty->driver_data;
+	struct tty_port *port = &info->port;
 
 	unsigned long timeout;
-	unsigned long flags;
 
 	if (tty->index == MXSER_PORTS)
 		return;
 	if (!info)
 		return;
 
-	spin_lock_irqsave(&info->port.lock, flags);
-
-	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&info->port.lock, flags);
-		return;
-	}
-	if ((tty->count == 1) && (info->port.count != 1)) {
-		/*
-		 * Uh, oh.  tty->count is 1, which means that the tty
-		 * structure will be freed.  Info->port.count should always
-		 * be one in these conditions.  If it's greater than
-		 * one, we've got real problems, since it means the
-		 * serial port won't be shutdown.
-		 */
-		printk(KERN_ERR "mxser_close: bad serial port count; "
-			"tty->count is 1, info->port.count is %d\n", info->port.count);
-		info->port.count = 1;
-	}
-	if (--info->port.count < 0) {
-		printk(KERN_ERR "mxser_close: bad serial port count for "
-			"ttys%d: %d\n", tty->index, info->port.count);
-		info->port.count = 0;
-	}
-	if (info->port.count) {
-		spin_unlock_irqrestore(&info->port.lock, flags);
+	if (tty_port_close_start(port, tty, filp) == 0)
 		return;
-	}
-	info->port.flags |= ASYNC_CLOSING;
-	spin_unlock_irqrestore(&info->port.lock, flags);
+
 	/*
 	 * Save the termios structure, since this port may have
 	 * separate termios for callout and dialin.
+	 *
+	 * FIXME: Can this go ?
 	 */
 	if (info->port.flags & ASYNC_NORMAL_ACTIVE)
 		info->normal_termios = *tty->termios;
-	/*
-	 * Now we wait for the transmit buffer to clear; and we notify
-	 * the line discipline to only process XON/XOFF characters.
-	 */
-	tty->closing = 1;
-	if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, info->port.closing_wait);
 	/*
 	 * At this point we stop accepting input.  To do this, we
 	 * disable the receive line status interrupts, and tell the
@@ -1156,19 +1125,12 @@ static void mxser_close(struct tty_struct *tty, struct file *filp)
 		}
 	}
 	mxser_shutdown(tty);
-
 	mxser_flush_buffer(tty);
-	tty_ldisc_flush(tty);
-
-	tty->closing = 0;
-	tty_port_tty_set(&info->port, NULL);
-	if (info->port.blocked_open) {
-		if (info->port.close_delay)
-			schedule_timeout_interruptible(info->port.close_delay);
-		wake_up_interruptible(&info->port.open_wait);
-	}
 
-	info->port.flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_CLOSING);
+	/* Right now the tty_port set is done outside of the close_end helper
+	   as we don't yet have everyone using refcounts */	
+	tty_port_close_end(port, tty);
+	tty_port_tty_set(port, NULL);
 }
 
 static int mxser_write(struct tty_struct *tty, const unsigned char *buf, int count)
diff --git a/drivers/char/riscom8.c b/drivers/char/riscom8.c
index af34c2054a09..9ac5febd8abd 100644
--- a/drivers/char/riscom8.c
+++ b/drivers/char/riscom8.c
@@ -929,35 +929,11 @@ static void rc_close(struct tty_struct *tty, struct file *filp)
 	if (!port || rc_paranoia_check(port, tty->name, "close"))
 		return;
 
-	spin_lock_irqsave(&port->port.lock, flags);
-
-	if (tty_hung_up_p(filp))
-		goto out;
-
 	bp = port_Board(port);
-	if ((tty->count == 1) && (port->port.count != 1))  {
-		printk(KERN_INFO "rc%d: rc_close: bad port count;"
-		       " tty->count is 1, port count is %d\n",
-		       board_No(bp), port->port.count);
-		port->port.count = 1;
-	}
-	if (--port->port.count < 0)  {
-		printk(KERN_INFO "rc%d: rc_close: bad port count "
-				 "for tty%d: %d\n",
-		       board_No(bp), port_No(port), port->port.count);
-		port->port.count = 0;
-	}
-	if (port->port.count)
-		goto out;
-	port->port.flags |= ASYNC_CLOSING;
-	/*
-	 * Now we wait for the transmit buffer to clear; and we notify
-	 * the line discipline to only process XON/XOFF characters.
-	 */
-	tty->closing = 1;
-	spin_unlock_irqrestore(&port->port.lock, flags);
-	if (port->port.closing_wait != ASYNC_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, port->port.closing_wait);
+	
+	if (tty_port_close_start(&port->port, tty, filp) == 0)
+		return;
+	
 	/*
 	 * At this point we stop accepting input.  To do this, we
 	 * disable the receive line status interrupts, and tell the
@@ -989,23 +965,8 @@ static void rc_close(struct tty_struct *tty, struct file *filp)
 	rc_shutdown_port(tty, bp, port);
 	rc_flush_buffer(tty);
 	spin_unlock_irqrestore(&riscom_lock, flags);
-	tty_ldisc_flush(tty);
 
-	spin_lock_irqsave(&port->port.lock, flags);
-	tty->closing = 0;
-	port->port.tty = NULL;
-	if (port->port.blocked_open) {
-		spin_unlock_irqrestore(&port->port.lock, flags);
-		if (port->port.close_delay)
-			msleep_interruptible(jiffies_to_msecs(port->port.close_delay));
-		wake_up_interruptible(&port->port.open_wait);
-		spin_lock_irqsave(&port->port.lock, flags);
-	}
-	port->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-	wake_up_interruptible(&port->port.close_wait);
-
-out:
-	spin_unlock_irqrestore(&riscom_lock, flags);
+	tty_port_close_end(&port->port, tty);
 }
 
 static int rc_write(struct tty_struct *tty,
diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index 77eef61c46f3..e1e0dd89ac9a 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -833,40 +833,20 @@ static void stl_close(struct tty_struct *tty, struct file *filp)
 	pr_debug("stl_close(tty=%p,filp=%p)\n", tty, filp);
 
 	portp = tty->driver_data;
-	if (portp == NULL)
-		return;
+	BUG_ON(portp == NULL);
+
 	port = &portp->port;
 
-	spin_lock_irqsave(&port->lock, flags);
-	if (tty_hung_up_p(filp)) {
-		spin_unlock_irqrestore(&port->lock, flags);
+	if (tty_port_close_start(port, tty, filp) == 0)
 		return;
-	}
-	if (tty->count == 1 && port->count != 1)
-		port->count = 1;
-	if (port->count-- > 1) {
-		spin_unlock_irqrestore(&port->lock, flags);
-		return;
-	}
-
-	port->count = 0;
-	port->flags |= ASYNC_CLOSING;
-
 /*
  *	May want to wait for any data to drain before closing. The BUSY
  *	flag keeps track of whether we are still sending or not - it is
  *	very accurate for the cd1400, not quite so for the sc26198.
  *	(The sc26198 has no "end-of-data" interrupt only empty FIFO)
  */
-	tty->closing = 1;
-
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	if (portp->closing_wait != ASYNC_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, portp->closing_wait);
 	stl_waituntilsent(tty, (HZ / 2));
 
-
 	spin_lock_irqsave(&port->lock, flags);
 	portp->port.flags &= ~ASYNC_INITIALIZED;
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -883,20 +863,9 @@ static void stl_close(struct tty_struct *tty, struct file *filp)
 		portp->tx.head = NULL;
 		portp->tx.tail = NULL;
 	}
-	set_bit(TTY_IO_ERROR, &tty->flags);
-	tty_ldisc_flush(tty);
 
-	tty->closing = 0;
+	tty_port_close_end(port, tty);
 	tty_port_tty_set(port, NULL);
-
-	if (port->blocked_open) {
-		if (portp->close_delay)
-			msleep_interruptible(jiffies_to_msecs(portp->close_delay));
-		wake_up_interruptible(&portp->port.open_wait);
-	}
-
-	portp->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-	wake_up_interruptible(&port->close_wait);
 }
 
 /*****************************************************************************/
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index 0ded4ed3da3c..fbd5a5ce2e1c 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -3104,70 +3104,18 @@ static void mgsl_close(struct tty_struct *tty, struct file * filp)
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgsl_close(%s) entry, count=%d\n",
 			 __FILE__,__LINE__, info->device_name, info->port.count);
-			 
-	if (!info->port.count)
-		return;
 
-	if (tty_hung_up_p(filp))
+	if (tty_port_close_start(&info->port, tty, filp) == 0)			 
 		goto cleanup;
 			
-	if ((tty->count == 1) && (info->port.count != 1)) {
-		/*
-		 * tty->count is 1 and the tty structure will be freed.
-		 * info->port.count should be one in this case.
-		 * if it's not, correct it so that the port is shutdown.
-		 */
-		printk("mgsl_close: bad refcount; tty->count is 1, "
-		       "info->port.count is %d\n", info->port.count);
-		info->port.count = 1;
-	}
-	
-	info->port.count--;
-	
-	/* if at least one open remaining, leave hardware active */
-	if (info->port.count)
-		goto cleanup;
-	
-	info->port.flags |= ASYNC_CLOSING;
-	
-	/* set tty->closing to notify line discipline to 
-	 * only process XON/XOFF characters. Only the N_TTY
-	 * discipline appears to use this (ppp does not).
-	 */
-	tty->closing = 1;
-	
-	/* wait for transmit data to clear all layers */
-	
-	if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE) {
-		if (debug_level >= DEBUG_LEVEL_INFO)
-			printk("%s(%d):mgsl_close(%s) calling tty_wait_until_sent\n",
-				 __FILE__,__LINE__, info->device_name );
-		tty_wait_until_sent(tty, info->port.closing_wait);
-	}
-		
  	if (info->port.flags & ASYNC_INITIALIZED)
  		mgsl_wait_until_sent(tty, info->timeout);
-
 	mgsl_flush_buffer(tty);
-
 	tty_ldisc_flush(tty);
-		
 	shutdown(info);
-	
-	tty->closing = 0;
+
+	tty_port_close_end(&info->port, tty);	
 	info->port.tty = NULL;
-	
-	if (info->port.blocked_open) {
-		if (info->port.close_delay) {
-			msleep_interruptible(jiffies_to_msecs(info->port.close_delay));
-		}
-		wake_up_interruptible(&info->port.open_wait);
-	}
-	
-	info->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-			 
-	wake_up_interruptible(&info->port.close_wait);
-	
 cleanup:			
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):mgsl_close(%s) exit, count=%d\n", __FILE__,__LINE__,
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 625c9bde3be8..53544e21f191 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -720,44 +720,9 @@ static void close(struct tty_struct *tty, struct file *filp)
 		return;
 	DBGINFO(("%s close entry, count=%d\n", info->device_name, info->port.count));
 
-	if (!info->port.count)
-		return;
-
-	if (tty_hung_up_p(filp))
+	if (tty_port_close_start(&info->port, tty, filp) == 0)
 		goto cleanup;
 
-	if ((tty->count == 1) && (info->port.count != 1)) {
-		/*
-		 * tty->count is 1 and the tty structure will be freed.
-		 * info->port.count should be one in this case.
-		 * if it's not, correct it so that the port is shutdown.
-		 */
-		DBGERR(("%s close: bad refcount; tty->count=1, "
-		       "info->port.count=%d\n", info->device_name, info->port.count));
-		info->port.count = 1;
-	}
-
-	info->port.count--;
-
-	/* if at least one open remaining, leave hardware active */
-	if (info->port.count)
-		goto cleanup;
-
-	info->port.flags |= ASYNC_CLOSING;
-
-	/* set tty->closing to notify line discipline to
-	 * only process XON/XOFF characters. Only the N_TTY
-	 * discipline appears to use this (ppp does not).
-	 */
-	tty->closing = 1;
-
-	/* wait for transmit data to clear all layers */
-
-	if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE) {
-		DBGINFO(("%s call tty_wait_until_sent\n", info->device_name));
-		tty_wait_until_sent(tty, info->port.closing_wait);
-	}
-
  	if (info->port.flags & ASYNC_INITIALIZED)
  		wait_until_sent(tty, info->timeout);
 	flush_buffer(tty);
@@ -765,20 +730,8 @@ static void close(struct tty_struct *tty, struct file *filp)
 
 	shutdown(info);
 
-	tty->closing = 0;
+	tty_port_close_end(&info->port, tty);
 	info->port.tty = NULL;
-
-	if (info->port.blocked_open) {
-		if (info->port.close_delay) {
-			msleep_interruptible(jiffies_to_msecs(info->port.close_delay));
-		}
-		wake_up_interruptible(&info->port.open_wait);
-	}
-
-	info->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-
-	wake_up_interruptible(&info->port.close_wait);
-
 cleanup:
 	DBGINFO(("%s close exit, count=%d\n", tty->driver->name, info->port.count));
 }
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index 1f5c21ec4b14..2aac55bcf5fd 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -810,70 +810,18 @@ static void close(struct tty_struct *tty, struct file *filp)
 		printk("%s(%d):%s close() entry, count=%d\n",
 			 __FILE__,__LINE__, info->device_name, info->port.count);
 
-	if (!info->port.count)
-		return;
-
-	if (tty_hung_up_p(filp))
-		goto cleanup;
-
-	if ((tty->count == 1) && (info->port.count != 1)) {
-		/*
-		 * tty->count is 1 and the tty structure will be freed.
-		 * info->port.count should be one in this case.
-		 * if it's not, correct it so that the port is shutdown.
-		 */
-		printk("%s(%d):%s close: bad refcount; tty->count is 1, "
-		       "info->port.count is %d\n",
-			 __FILE__,__LINE__, info->device_name, info->port.count);
-		info->port.count = 1;
-	}
-
-	info->port.count--;
-
-	/* if at least one open remaining, leave hardware active */
-	if (info->port.count)
+	if (tty_port_close_start(&info->port, tty, filp) == 0)
 		goto cleanup;
-
-	info->port.flags |= ASYNC_CLOSING;
-
-	/* set tty->closing to notify line discipline to
-	 * only process XON/XOFF characters. Only the N_TTY
-	 * discipline appears to use this (ppp does not).
-	 */
-	tty->closing = 1;
-
-	/* wait for transmit data to clear all layers */
-
-	if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE) {
-		if (debug_level >= DEBUG_LEVEL_INFO)
-			printk("%s(%d):%s close() calling tty_wait_until_sent\n",
-				 __FILE__,__LINE__, info->device_name );
-		tty_wait_until_sent(tty, info->port.closing_wait);
-	}
-
+		
  	if (info->port.flags & ASYNC_INITIALIZED)
  		wait_until_sent(tty, info->timeout);
 
 	flush_buffer(tty);
-
 	tty_ldisc_flush(tty);
-
 	shutdown(info);
 
-	tty->closing = 0;
+	tty_port_close_end(&info->port, tty);
 	info->port.tty = NULL;
-
-	if (info->port.blocked_open) {
-		if (info->port.close_delay) {
-			msleep_interruptible(jiffies_to_msecs(info->port.close_delay));
-		}
-		wake_up_interruptible(&info->port.open_wait);
-	}
-
-	info->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
-
-	wake_up_interruptible(&info->port.close_wait);
-
 cleanup:
 	if (debug_level >= DEBUG_LEVEL_INFO)
 		printk("%s(%d):%s close() exit, count=%d\n", __FILE__,__LINE__,
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 0723664fe0ab..b3175f54fe05 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -257,3 +257,61 @@ int tty_port_block_til_ready(struct tty_port *port,
 }
 EXPORT_SYMBOL(tty_port_block_til_ready);
 
+int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct file *filp)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	if (tty_hung_up_p(filp)) {
+		spin_unlock_irqrestore(&port->lock, flags);
+		return 0;
+	}
+
+	if( tty->count == 1 && port->count != 1) {
+		printk(KERN_WARNING
+		    "tty_port_close_start: tty->count = 1 port count = %d.\n",
+								port->count);
+		port->count = 1;
+	}
+	if (--port->count < 0) {
+		printk(KERN_WARNING "tty_port_close_start: count = %d\n",
+								port->count);
+		port->count = 0;
+	}
+
+	if (port->count) {
+		spin_unlock_irqrestore(&port->lock, flags);
+		return 0;
+	}
+	port->flags |= ASYNC_CLOSING;
+	tty->closing = 1;
+	spin_unlock_irqrestore(&port->lock, flags);
+	if (port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
+		tty_wait_until_sent(tty, port->closing_wait);
+	return 1;
+}
+EXPORT_SYMBOL(tty_port_close_start);
+
+void tty_port_close_end(struct tty_port *port, struct tty_struct *tty)
+{
+	unsigned long flags;
+
+	tty_ldisc_flush(tty);
+
+	spin_lock_irqsave(&port->lock, flags);
+	tty->closing = 0;
+
+	if (port->blocked_open) {
+		spin_unlock_irqrestore(&port->lock, flags);
+		if (port->close_delay) {
+			msleep_interruptible(
+				jiffies_to_msecs(port->close_delay));
+		}
+		spin_lock_irqsave(&port->lock, flags);
+		wake_up_interruptible(&port->open_wait);
+	}
+	port->flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_CLOSING);
+	wake_up_interruptible(&port->close_wait);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+EXPORT_SYMBOL(tty_port_close_end);
diff --git a/include/linux/istallion.h b/include/linux/istallion.h
index 053d5aea925c..7faca98c7d14 100644
--- a/include/linux/istallion.h
+++ b/include/linux/istallion.h
@@ -59,7 +59,6 @@ struct stliport {
 	unsigned int		devnr;
 	int			baud_base;
 	int			custom_divisor;
-	int			close_delay;
 	int			closing_wait;
 	int			rc;
 	int			argsize;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 61a0ab32cf11..fc39db95499f 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -441,6 +441,9 @@ extern void tty_port_raise_dtr_rts(struct tty_port *port);
 extern void tty_port_hangup(struct tty_port *port);
 extern int tty_port_block_til_ready(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
+extern int tty_port_close_start(struct tty_port *port,
+				struct tty_struct *tty, struct file *filp);
+extern void tty_port_close_end(struct tty_port *port, struct tty_struct *tty);
 
 extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
-- 
cgit v1.2.3


From 39aced68d664291db3324d0fcf0985ab5626aac2 Mon Sep 17 00:00:00 2001
From: Niels de Vos <niels.devos@wincor-nixdorf.com>
Date: Fri, 2 Jan 2009 13:46:58 +0000
Subject: serial: set correct baud_base for Oxford Semiconductor Ltd EXSYS
 EX-41092 Dual 16950 Serial adapter

The PCI-card identified as "Oxford Semiconductor Ltd EXSYS EX-41092 Dual
16950 Serial adapter" is only usable with other devices (i.e. not the same
card) after doing a "setserial /dev/ttyS<n> baud_base 115200".  This
baud_base should be default for this card.

Signed-off-by: Niels de Vos <niels.devos@wincor-nixdorf.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/8250_pci.c | 3 +++
 include/linux/pci_ids.h   | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 0b794138f686..2a2e1c717e8e 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -2387,6 +2387,9 @@ static struct pci_device_id serial_pci_tbl[] = {
 		 * www.ussg.iu.edu/hypermail/linux/kernel/0303.1/0516.html).
 		 * For now just used the hex ID 0x950a.
 		 */
+	{	PCI_VENDOR_ID_OXSEMI, 0x950a,
+		PCI_SUBVENDOR_ID_SIIG, PCI_SUBDEVICE_ID_SIIG_DUAL_SERIAL, 0, 0,
+		pbn_b0_2_115200 },
 	{	PCI_VENDOR_ID_OXSEMI, 0x950a,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b0_2_1130000 },
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index b6e694454280..fa83dfefc5e0 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1766,6 +1766,7 @@
 #define PCI_DEVICE_ID_SIIG_8S_20x_650	0x2081
 #define PCI_DEVICE_ID_SIIG_8S_20x_850	0x2082
 #define PCI_SUBDEVICE_ID_SIIG_QUARTET_SERIAL	0x2050
+#define PCI_SUBDEVICE_ID_SIIG_DUAL_SERIAL	0x2530
 
 #define PCI_VENDOR_ID_RADISYS		0x1331
 
-- 
cgit v1.2.3


From 60c20fb8c00a2b23308ae4517f145383bc66d291 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@canonical.com>
Date: Fri, 2 Jan 2009 13:49:04 +0000
Subject: serial: RS485 ioctl structure uses __u32 include linux/types.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the commit below a new struct serial_rs485 was introduced for a new
ioctl:

    commit c26c56c0f40e200e61d1390629c806f6adaffbcc
    Author: Alan Cox <alan@redhat.com>
    Date:   Mon Oct 13 10:37:48 2008 +0100

	tty: Cris has a nice RS485 ioctl so we should steal it

This structure uses the __u32 types for some of its members, which leads
to the following compile error:

    $ cc -I.../include -c X.c
    In file included from X.c:2: .../include/linux/serial.h:185:
		error: expected specifier-qualifier-list before ‘__u32’
    $

It seems that these types are appropriate for this structure as it is
to be exposed to userspace.  These types are available via linux/types.h
so move the include of that outside the __KERNEL__ section.

Signed-off-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/serial.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/serial.h b/include/linux/serial.h
index 1ea8d9265bf6..9136cc5608c3 100644
--- a/include/linux/serial.h
+++ b/include/linux/serial.h
@@ -10,8 +10,9 @@
 #ifndef _LINUX_SERIAL_H
 #define _LINUX_SERIAL_H
 
-#ifdef __KERNEL__
 #include <linux/types.h>
+
+#ifdef __KERNEL__
 #include <asm/page.h>
 
 /*
-- 
cgit v1.2.3


From f751928e0ddf54ea4fe5546f35e99efc5b5d9938 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Fri, 2 Jan 2009 13:49:21 +0000
Subject: tty: We want the port object to be persistent

Move the tty_port and uart_info bits around a little. By embedding the uart_info
into the uart_port we get rid of lots of corner case testing and also get the
ability to go port<->state<->info which is a bit more elegant than the current
data structures.

Downsides - we allocate a tiny bit more memory for unused ports, upside we've
removed as much code as it saved for most users..

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/jsm/jsm_tty.c |   2 +-
 drivers/serial/serial_core.c | 144 ++++++++++++++++++-------------------------
 include/linux/serial_core.h  |  62 ++++++++++---------
 3 files changed, 96 insertions(+), 112 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/serial/jsm/jsm_tty.c b/drivers/serial/jsm/jsm_tty.c
index a697914ae3d0..3547558d2caf 100644
--- a/drivers/serial/jsm/jsm_tty.c
+++ b/drivers/serial/jsm/jsm_tty.c
@@ -272,7 +272,7 @@ static void jsm_tty_close(struct uart_port *port)
 	jsm_printk(CLOSE, INFO, &channel->ch_bd->pci_dev, "start\n");
 
 	bd = channel->ch_bd;
-	ts = channel->uart_port.info->port.tty->termios;
+	ts = port->info->port.tty->termios;
 
 	channel->ch_flags &= ~(CH_STOPI);
 
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 874786a11fe9..daeba1c52c8e 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -50,7 +50,7 @@ static struct lock_class_key port_lock_key;
 
 #define HIGH_BITS_OFFSET	((sizeof(long)-sizeof(int))*8)
 
-#define uart_users(state)	((state)->count + ((state)->info ? (state)->info->port.blocked_open : 0))
+#define uart_users(state)	((state)->count + (state)->info.port.blocked_open)
 
 #ifdef CONFIG_SERIAL_CORE_CONSOLE
 #define uart_console(port)	((port)->cons && (port)->cons->index == (port)->line)
@@ -94,7 +94,7 @@ static void __uart_start(struct tty_struct *tty)
 	struct uart_state *state = tty->driver_data;
 	struct uart_port *port = state->port;
 
-	if (!uart_circ_empty(&state->info->xmit) && state->info->xmit.buf &&
+	if (!uart_circ_empty(&state->info.xmit) && state->info.xmit.buf &&
 	    !tty->stopped && !tty->hw_stopped)
 		port->ops->start_tx(port);
 }
@@ -113,7 +113,7 @@ static void uart_start(struct tty_struct *tty)
 static void uart_tasklet_action(unsigned long data)
 {
 	struct uart_state *state = (struct uart_state *)data;
-	tty_wakeup(state->info->port.tty);
+	tty_wakeup(state->info.port.tty);
 }
 
 static inline void
@@ -139,7 +139,7 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear)
  */
 static int uart_startup(struct uart_state *state, int init_hw)
 {
-	struct uart_info *info = state->info;
+	struct uart_info *info = &state->info;
 	struct uart_port *port = state->port;
 	unsigned long page;
 	int retval = 0;
@@ -212,14 +212,15 @@ static int uart_startup(struct uart_state *state, int init_hw)
  */
 static void uart_shutdown(struct uart_state *state)
 {
-	struct uart_info *info = state->info;
+	struct uart_info *info = &state->info;
 	struct uart_port *port = state->port;
+	struct tty_struct *tty = info->port.tty;
 
 	/*
 	 * Set the TTY IO error marker
 	 */
-	if (info->port.tty)
-		set_bit(TTY_IO_ERROR, &info->port.tty->flags);
+	if (tty)
+		set_bit(TTY_IO_ERROR, &tty->flags);
 
 	if (info->flags & UIF_INITIALIZED) {
 		info->flags &= ~UIF_INITIALIZED;
@@ -227,7 +228,7 @@ static void uart_shutdown(struct uart_state *state)
 		/*
 		 * Turn off DTR and RTS early.
 		 */
-		if (!info->port.tty || (info->port.tty->termios->c_cflag & HUPCL))
+		if (!tty || (tty->termios->c_cflag & HUPCL))
 			uart_clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
 
 		/*
@@ -427,7 +428,7 @@ EXPORT_SYMBOL(uart_get_divisor);
 static void
 uart_change_speed(struct uart_state *state, struct ktermios *old_termios)
 {
-	struct tty_struct *tty = state->info->port.tty;
+	struct tty_struct *tty = state->info.port.tty;
 	struct uart_port *port = state->port;
 	struct ktermios *termios;
 
@@ -444,14 +445,14 @@ uart_change_speed(struct uart_state *state, struct ktermios *old_termios)
 	 * Set flags based on termios cflag
 	 */
 	if (termios->c_cflag & CRTSCTS)
-		state->info->flags |= UIF_CTS_FLOW;
+		state->info.flags |= UIF_CTS_FLOW;
 	else
-		state->info->flags &= ~UIF_CTS_FLOW;
+		state->info.flags &= ~UIF_CTS_FLOW;
 
 	if (termios->c_cflag & CLOCAL)
-		state->info->flags &= ~UIF_CHECK_CD;
+		state->info.flags &= ~UIF_CHECK_CD;
 	else
-		state->info->flags |= UIF_CHECK_CD;
+		state->info.flags |= UIF_CHECK_CD;
 
 	port->ops->set_termios(port, termios, old_termios);
 }
@@ -479,7 +480,7 @@ static int uart_put_char(struct tty_struct *tty, unsigned char ch)
 {
 	struct uart_state *state = tty->driver_data;
 
-	return __uart_put_char(state->port, &state->info->xmit, ch);
+	return __uart_put_char(state->port, &state->info.xmit, ch);
 }
 
 static void uart_flush_chars(struct tty_struct *tty)
@@ -500,13 +501,13 @@ uart_write(struct tty_struct *tty, const unsigned char *buf, int count)
 	 * This means you called this function _after_ the port was
 	 * closed.  No cookie for you.
 	 */
-	if (!state || !state->info) {
+	if (!state) {
 		WARN_ON(1);
 		return -EL3HLT;
 	}
 
 	port = state->port;
-	circ = &state->info->xmit;
+	circ = &state->info.xmit;
 
 	if (!circ->buf)
 		return 0;
@@ -537,7 +538,7 @@ static int uart_write_room(struct tty_struct *tty)
 	int ret;
 
 	spin_lock_irqsave(&state->port->lock, flags);
-	ret = uart_circ_chars_free(&state->info->xmit);
+	ret = uart_circ_chars_free(&state->info.xmit);
 	spin_unlock_irqrestore(&state->port->lock, flags);
 	return ret;
 }
@@ -549,7 +550,7 @@ static int uart_chars_in_buffer(struct tty_struct *tty)
 	int ret;
 
 	spin_lock_irqsave(&state->port->lock, flags);
-	ret = uart_circ_chars_pending(&state->info->xmit);
+	ret = uart_circ_chars_pending(&state->info.xmit);
 	spin_unlock_irqrestore(&state->port->lock, flags);
 	return ret;
 }
@@ -564,7 +565,7 @@ static void uart_flush_buffer(struct tty_struct *tty)
 	 * This means you called this function _after_ the port was
 	 * closed.  No cookie for you.
 	 */
-	if (!state || !state->info) {
+	if (!state) {
 		WARN_ON(1);
 		return;
 	}
@@ -573,7 +574,7 @@ static void uart_flush_buffer(struct tty_struct *tty)
 	pr_debug("uart_flush_buffer(%d) called\n", tty->index);
 
 	spin_lock_irqsave(&port->lock, flags);
-	uart_circ_clear(&state->info->xmit);
+	uart_circ_clear(&state->info.xmit);
 	if (port->ops->flush_buffer)
 		port->ops->flush_buffer(port);
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -837,15 +838,15 @@ static int uart_set_info(struct uart_state *state,
 	state->closing_wait    = closing_wait;
 	if (new_serial.xmit_fifo_size)
 		port->fifosize = new_serial.xmit_fifo_size;
-	if (state->info->port.tty)
-		state->info->port.tty->low_latency =
+	if (state->info.port.tty)
+		state->info.port.tty->low_latency =
 			(port->flags & UPF_LOW_LATENCY) ? 1 : 0;
 
  check_and_exit:
 	retval = 0;
 	if (port->type == PORT_UNKNOWN)
 		goto exit;
-	if (state->info->flags & UIF_INITIALIZED) {
+	if (state->info.flags & UIF_INITIALIZED) {
 		if (((old_flags ^ port->flags) & UPF_SPD_MASK) ||
 		    old_custom_divisor != port->custom_divisor) {
 			/*
@@ -858,7 +859,7 @@ static int uart_set_info(struct uart_state *state,
 				printk(KERN_NOTICE
 				       "%s sets custom speed on %s. This "
 				       "is deprecated.\n", current->comm,
-				       tty_name(state->info->port.tty, buf));
+				       tty_name(state->info.port.tty, buf));
 			}
 			uart_change_speed(state, NULL);
 		}
@@ -889,8 +890,8 @@ static int uart_get_lsr_info(struct uart_state *state,
 	 * interrupt happens).
 	 */
 	if (port->x_char ||
-	    ((uart_circ_chars_pending(&state->info->xmit) > 0) &&
-	     !state->info->port.tty->stopped && !state->info->port.tty->hw_stopped))
+	    ((uart_circ_chars_pending(&state->info.xmit) > 0) &&
+	     !state->info.port.tty->stopped && !state->info.port.tty->hw_stopped))
 		result &= ~TIOCSER_TEMT;
 
 	return put_user(result, value);
@@ -1017,7 +1018,7 @@ uart_wait_modem_status(struct uart_state *state, unsigned long arg)
 	port->ops->enable_ms(port);
 	spin_unlock_irq(&port->lock);
 
-	add_wait_queue(&state->info->delta_msr_wait, &wait);
+	add_wait_queue(&state->info.delta_msr_wait, &wait);
 	for (;;) {
 		spin_lock_irq(&port->lock);
 		memcpy(&cnow, &port->icount, sizeof(struct uart_icount));
@@ -1045,7 +1046,7 @@ uart_wait_modem_status(struct uart_state *state, unsigned long arg)
 	}
 
 	current->state = TASK_RUNNING;
-	remove_wait_queue(&state->info->delta_msr_wait, &wait);
+	remove_wait_queue(&state->info.delta_msr_wait, &wait);
 
 	return ret;
 }
@@ -1241,7 +1242,7 @@ static void uart_set_termios(struct tty_struct *tty,
 	 */
 	if (!(old_termios->c_cflag & CLOCAL) &&
 	    (tty->termios->c_cflag & CLOCAL))
-		wake_up_interruptible(&state->info->port.open_wait);
+		wake_up_interruptible(&info->port.open_wait);
 #endif
 }
 
@@ -1303,7 +1304,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	 * At this point, we stop accepting input.  To do this, we
 	 * disable the receive line status interrupts.
 	 */
-	if (state->info->flags & UIF_INITIALIZED) {
+	if (state->info.flags & UIF_INITIALIZED) {
 		unsigned long flags;
 		spin_lock_irqsave(&port->lock, flags);
 		port->ops->stop_rx(port);
@@ -1322,9 +1323,9 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	tty_ldisc_flush(tty);
 
 	tty->closing = 0;
-	state->info->port.tty = NULL;
+	state->info.port.tty = NULL;
 
-	if (state->info->port.blocked_open) {
+	if (state->info.port.blocked_open) {
 		if (state->close_delay)
 			msleep_interruptible(state->close_delay);
 	} else if (!uart_console(port)) {
@@ -1334,8 +1335,8 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	/*
 	 * Wake up anyone trying to open this port.
 	 */
-	state->info->flags &= ~UIF_NORMAL_ACTIVE;
-	wake_up_interruptible(&state->info->port.open_wait);
+	state->info.flags &= ~UIF_NORMAL_ACTIVE;
+	wake_up_interruptible(&state->info.port.open_wait);
 
  done:
 	mutex_unlock(&state->mutex);
@@ -1409,19 +1410,20 @@ static void uart_wait_until_sent(struct tty_struct *tty, int timeout)
 static void uart_hangup(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
+	struct uart_info *info = &state->info;
 
 	BUG_ON(!kernel_locked());
 	pr_debug("uart_hangup(%d)\n", state->port->line);
 
 	mutex_lock(&state->mutex);
-	if (state->info && state->info->flags & UIF_NORMAL_ACTIVE) {
+	if (info->flags & UIF_NORMAL_ACTIVE) {
 		uart_flush_buffer(tty);
 		uart_shutdown(state);
 		state->count = 0;
-		state->info->flags &= ~UIF_NORMAL_ACTIVE;
-		state->info->port.tty = NULL;
-		wake_up_interruptible(&state->info->port.open_wait);
-		wake_up_interruptible(&state->info->delta_msr_wait);
+		info->flags &= ~UIF_NORMAL_ACTIVE;
+		info->port.tty = NULL;
+		wake_up_interruptible(&info->port.open_wait);
+		wake_up_interruptible(&info->delta_msr_wait);
 	}
 	mutex_unlock(&state->mutex);
 }
@@ -1434,7 +1436,7 @@ static void uart_hangup(struct tty_struct *tty)
  */
 static void uart_update_termios(struct uart_state *state)
 {
-	struct tty_struct *tty = state->info->port.tty;
+	struct tty_struct *tty = state->info.port.tty;
 	struct uart_port *port = state->port;
 
 	if (uart_console(port) && port->cons->cflag) {
@@ -1469,7 +1471,7 @@ static int
 uart_block_til_ready(struct file *filp, struct uart_state *state)
 {
 	DECLARE_WAITQUEUE(wait, current);
-	struct uart_info *info = state->info;
+	struct uart_info *info = &state->info;
 	struct uart_port *port = state->port;
 	unsigned int mctrl;
 
@@ -1563,28 +1565,6 @@ static struct uart_state *uart_get(struct uart_driver *drv, int line)
 		ret = -ENXIO;
 		goto err_unlock;
 	}
-
-	/* BKL: RACE HERE - LEAK */
-	/* We should move this into the uart_state structure and kill off
-	   this whole complexity */
-	if (!state->info) {
-		state->info = kzalloc(sizeof(struct uart_info), GFP_KERNEL);
-		if (state->info) {
-			init_waitqueue_head(&state->info->port.open_wait);
-			init_waitqueue_head(&state->info->delta_msr_wait);
-
-			/*
-			 * Link the info into the other structures.
-			 */
-			state->port->info = state->info;
-
-			tasklet_init(&state->info->tlet, uart_tasklet_action,
-				     (unsigned long)state);
-		} else {
-			ret = -ENOMEM;
-			goto err_unlock;
-		}
-	}
 	return state;
 
  err_unlock:
@@ -1641,9 +1621,10 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	 * Any failures from here onwards should not touch the count.
 	 */
 	tty->driver_data = state;
+	state->port->info = &state->info;
 	tty->low_latency = (state->port->flags & UPF_LOW_LATENCY) ? 1 : 0;
 	tty->alt_speed = 0;
-	state->info->port.tty = tty;
+	state->info.port.tty = tty;
 
 	/*
 	 * If the port is in the middle of closing, bail out now.
@@ -1676,8 +1657,8 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	/*
 	 * If this is the first open to succeed, adjust things to suit.
 	 */
-	if (retval == 0 && !(state->info->flags & UIF_NORMAL_ACTIVE)) {
-		state->info->flags |= UIF_NORMAL_ACTIVE;
+	if (retval == 0 && !(state->info.flags & UIF_NORMAL_ACTIVE)) {
+		state->info.flags |= UIF_NORMAL_ACTIVE;
 
 		uart_update_termios(state);
 	}
@@ -2028,11 +2009,11 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *port)
 	}
 	port->suspended = 1;
 
-	if (state->info && state->info->flags & UIF_INITIALIZED) {
+	if (state->info.flags & UIF_INITIALIZED) {
 		const struct uart_ops *ops = port->ops;
 		int tries;
 
-		state->info->flags = (state->info->flags & ~UIF_INITIALIZED)
+		state->info.flags = (state->info.flags & ~UIF_INITIALIZED)
 				     | UIF_SUSPENDED;
 
 		spin_lock_irq(&port->lock);
@@ -2107,15 +2088,15 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
 		/*
 		 * If that's unset, use the tty termios setting.
 		 */
-		if (state->info && state->info->port.tty && termios.c_cflag == 0)
-			termios = *state->info->port.tty->termios;
+		if (state->info.port.tty && termios.c_cflag == 0)
+			termios = *state->info.port.tty->termios;
 
 		uart_change_pm(state, 0);
 		port->ops->set_termios(port, &termios, NULL);
 		console_start(port->cons);
 	}
 
-	if (state->info && state->info->flags & UIF_SUSPENDED) {
+	if (state->info.flags & UIF_SUSPENDED) {
 		const struct uart_ops *ops = port->ops;
 		int ret;
 
@@ -2130,7 +2111,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
 			ops->set_mctrl(port, port->mctrl);
 			ops->start_tx(port);
 			spin_unlock_irq(&port->lock);
-			state->info->flags |= UIF_INITIALIZED;
+			state->info.flags |= UIF_INITIALIZED;
 		} else {
 			/*
 			 * Failed to resume - maybe hardware went away?
@@ -2140,7 +2121,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
 			uart_shutdown(state);
 		}
 
-		state->info->flags &= ~UIF_SUSPENDED;
+		state->info.flags &= ~UIF_SUSPENDED;
 	}
 
 	mutex_unlock(&state->mutex);
@@ -2383,8 +2364,12 @@ int uart_register_driver(struct uart_driver *drv)
 
 		state->close_delay     = 500;	/* .5 seconds */
 		state->closing_wait    = 30000;	/* 30 seconds */
-
 		mutex_init(&state->mutex);
+
+		tty_port_init(&state->info.port);
+		init_waitqueue_head(&state->info.delta_msr_wait);
+		tasklet_init(&state->info.tlet, uart_tasklet_action,
+			     (unsigned long)state);
 	}
 
 	retval = tty_register_driver(normal);
@@ -2455,7 +2440,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *port)
 	state->pm_state = -1;
 
 	port->cons = drv->cons;
-	port->info = state->info;
+	port->info = &state->info;
 
 	/*
 	 * If this port is a console, then the spinlock is already
@@ -2527,17 +2512,10 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *port)
 	 */
 	tty_unregister_device(drv->tty_driver, port->line);
 
-	info = state->info;
+	info = &state->info;
 	if (info && info->port.tty)
 		tty_vhangup(info->port.tty);
 
-	/*
-	 * All users of this port should now be disconnected from
-	 * this driver, and the port shut down.  We should be the
-	 * only thread fiddling with this port from now on.
-	 */
-	state->info = NULL;
-
 	/*
 	 * Free the port IO and memory resources, if any.
 	 */
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index feb3b939ec4b..2395969faa04 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -315,36 +315,14 @@ struct uart_port {
 	void			*private_data;		/* generic platform data pointer */
 };
 
-/*
- * This is the state information which is persistent across opens.
- * The low level driver must not to touch any elements contained
- * within.
- */
-struct uart_state {
-	unsigned int		close_delay;		/* msec */
-	unsigned int		closing_wait;		/* msec */
-
-#define USF_CLOSING_WAIT_INF	(0)
-#define USF_CLOSING_WAIT_NONE	(~0U)
-
-	int			count;
-	int			pm_state;
-	struct uart_info	*info;
-	struct uart_port	*port;
-
-	struct mutex		mutex;
-};
-
-#define UART_XMIT_SIZE	PAGE_SIZE
-
-typedef unsigned int __bitwise__ uif_t;
-
 /*
  * This is the state information which is only valid when the port
- * is open; it may be freed by the core driver once the device has
+ * is open; it may be cleared the core driver once the device has
  * been closed.  Either the low level driver or the core can modify
  * stuff here.
  */
+typedef unsigned int __bitwise__ uif_t;
+
 struct uart_info {
 	struct tty_port		port;
 	struct circ_buf		xmit;
@@ -366,6 +344,29 @@ struct uart_info {
 	wait_queue_head_t	delta_msr_wait;
 };
 
+/*
+ * This is the state information which is persistent across opens.
+ * The low level driver must not to touch any elements contained
+ * within.
+ */
+struct uart_state {
+	unsigned int		close_delay;		/* msec */
+	unsigned int		closing_wait;		/* msec */
+
+#define USF_CLOSING_WAIT_INF	(0)
+#define USF_CLOSING_WAIT_NONE	(~0U)
+
+	int			count;
+	int			pm_state;
+	struct uart_info	info;
+	struct uart_port	*port;
+
+	struct mutex		mutex;
+};
+
+#define UART_XMIT_SIZE	PAGE_SIZE
+
+
 /* number of characters left in xmit buffer before we ask for more */
 #define WAKEUP_CHARS		256
 
@@ -439,8 +440,13 @@ int uart_resume_port(struct uart_driver *reg, struct uart_port *port);
 #define uart_circ_chars_free(circ)	\
 	(CIRC_SPACE((circ)->head, (circ)->tail, UART_XMIT_SIZE))
 
-#define uart_tx_stopped(portp)		\
-	((portp)->info->port.tty->stopped || (portp)->info->port.tty->hw_stopped)
+static inline int uart_tx_stopped(struct uart_port *port)
+{
+	struct tty_struct *tty = port->info->port.tty;
+	if(tty->stopped || tty->hw_stopped)
+		return 1;
+	return 0;
+}
 
 /*
  * The following are helper functions for the low level drivers.
@@ -451,7 +457,7 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch)
 #ifdef SUPPORT_SYSRQ
 	if (port->sysrq) {
 		if (ch && time_before(jiffies, port->sysrq)) {
-			handle_sysrq(ch, port->info ? port->info->port.tty : NULL);
+			handle_sysrq(ch, port->info->port.tty);
 			port->sysrq = 0;
 			return 1;
 		}
-- 
cgit v1.2.3


From 7d6a07d123b62bf4fa71867420c23da3ca36c995 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Fri, 2 Jan 2009 13:49:47 +0000
Subject: 8250: Serial driver changes to support future Cavium OCTEON serial
 patches.

In order to use Cavium OCTEON specific serial i/o drivers, we first
patch the 8250 driver to use replaceable I/O functions.  Compatible
I/O functions are added for existing iotypeS.

An added benefit of this change is that it makes it easy to factor
some of the existing special cases out to board/SOC specific support
code.

The alternative is to load up 8250.c with a bunch of OCTEON specific
iotype code and bug work-arounds.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Signed-off-by: Tomaso Paoletti <tpaoletti@caviumnetworks.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/8250.c       | 194 +++++++++++++++++++++++++++++++-------------
 include/linux/serial_8250.h |   2 +
 include/linux/serial_core.h |   2 +
 3 files changed, 140 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 8e28750a4058..849af9d21feb 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -303,16 +303,16 @@ static const u8 au_io_out_map[] = {
 };
 
 /* sane hardware needs no mapping */
-static inline int map_8250_in_reg(struct uart_8250_port *up, int offset)
+static inline int map_8250_in_reg(struct uart_port *p, int offset)
 {
-	if (up->port.iotype != UPIO_AU)
+	if (p->iotype != UPIO_AU)
 		return offset;
 	return au_io_in_map[offset];
 }
 
-static inline int map_8250_out_reg(struct uart_8250_port *up, int offset)
+static inline int map_8250_out_reg(struct uart_port *p, int offset)
 {
-	if (up->port.iotype != UPIO_AU)
+	if (p->iotype != UPIO_AU)
 		return offset;
 	return au_io_out_map[offset];
 }
@@ -341,16 +341,16 @@ static const u8
 		[UART_SCR]	= 0x2c
 	};
 
-static inline int map_8250_in_reg(struct uart_8250_port *up, int offset)
+static inline int map_8250_in_reg(struct uart_port *p, int offset)
 {
-	if (up->port.iotype != UPIO_RM9000)
+	if (p->iotype != UPIO_RM9000)
 		return offset;
 	return regmap_in[offset];
 }
 
-static inline int map_8250_out_reg(struct uart_8250_port *up, int offset)
+static inline int map_8250_out_reg(struct uart_port *p, int offset)
 {
-	if (up->port.iotype != UPIO_RM9000)
+	if (p->iotype != UPIO_RM9000)
 		return offset;
 	return regmap_out[offset];
 }
@@ -363,108 +363,170 @@ static inline int map_8250_out_reg(struct uart_8250_port *up, int offset)
 
 #endif
 
-static unsigned int serial_in(struct uart_8250_port *up, int offset)
+static unsigned int hub6_serial_in(struct uart_port *p, int offset)
 {
-	unsigned int tmp;
-	offset = map_8250_in_reg(up, offset) << up->port.regshift;
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	outb(p->hub6 - 1 + offset, p->iobase);
+	return inb(p->iobase + 1);
+}
 
-	switch (up->port.iotype) {
-	case UPIO_HUB6:
-		outb(up->port.hub6 - 1 + offset, up->port.iobase);
-		return inb(up->port.iobase + 1);
+static void hub6_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	outb(p->hub6 - 1 + offset, p->iobase);
+	outb(value, p->iobase + 1);
+}
 
-	case UPIO_MEM:
-	case UPIO_DWAPB:
-		return readb(up->port.membase + offset);
+static unsigned int mem_serial_in(struct uart_port *p, int offset)
+{
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	return readb(p->membase + offset);
+}
 
-	case UPIO_RM9000:
-	case UPIO_MEM32:
-		return readl(up->port.membase + offset);
+static void mem_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	writeb(value, p->membase + offset);
+}
+
+static void mem32_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	writel(value, p->membase + offset);
+}
+
+static unsigned int mem32_serial_in(struct uart_port *p, int offset)
+{
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	return readl(p->membase + offset);
+}
 
 #ifdef CONFIG_SERIAL_8250_AU1X00
-	case UPIO_AU:
-		return __raw_readl(up->port.membase + offset);
+static unsigned int au_serial_in(struct uart_port *p, int offset)
+{
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	return __raw_readl(p->membase + offset);
+}
+
+static void au_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	__raw_writel(value, p->membase + offset);
+}
 #endif
 
-	case UPIO_TSI:
-		if (offset == UART_IIR) {
-			tmp = readl(up->port.membase + (UART_IIR & ~3));
-			return (tmp >> 16) & 0xff; /* UART_IIR % 4 == 2 */
-		} else
-			return readb(up->port.membase + offset);
+static unsigned int tsi_serial_in(struct uart_port *p, int offset)
+{
+	unsigned int tmp;
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	if (offset == UART_IIR) {
+		tmp = readl(p->membase + (UART_IIR & ~3));
+		return (tmp >> 16) & 0xff; /* UART_IIR % 4 == 2 */
+	} else
+		return readb(p->membase + offset);
+}
 
-	default:
-		return inb(up->port.iobase + offset);
-	}
+static void tsi_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	if (!((offset == UART_IER) && (value & UART_IER_UUE)))
+		writeb(value, p->membase + offset);
 }
 
-static void
-serial_out(struct uart_8250_port *up, int offset, int value)
+static void dwapb_serial_out(struct uart_port *p, int offset, int value)
 {
-	/* Save the offset before it's remapped */
 	int save_offset = offset;
-	offset = map_8250_out_reg(up, offset) << up->port.regshift;
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	/* Save the LCR value so it can be re-written when a
+	 * Busy Detect interrupt occurs. */
+	if (save_offset == UART_LCR) {
+		struct uart_8250_port *up = (struct uart_8250_port *)p;
+		up->lcr = value;
+	}
+	writeb(value, p->membase + offset);
+	/* Read the IER to ensure any interrupt is cleared before
+	 * returning from ISR. */
+	if (save_offset == UART_TX || save_offset == UART_IER)
+		value = p->serial_in(p, UART_IER);
+}
 
-	switch (up->port.iotype) {
+static unsigned int io_serial_in(struct uart_port *p, int offset)
+{
+	offset = map_8250_in_reg(p, offset) << p->regshift;
+	return inb(p->iobase + offset);
+}
+
+static void io_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = map_8250_out_reg(p, offset) << p->regshift;
+	outb(value, p->iobase + offset);
+}
+
+static void set_io_from_upio(struct uart_port *p)
+{
+	switch (p->iotype) {
 	case UPIO_HUB6:
-		outb(up->port.hub6 - 1 + offset, up->port.iobase);
-		outb(value, up->port.iobase + 1);
+		p->serial_in = hub6_serial_in;
+		p->serial_out = hub6_serial_out;
 		break;
 
 	case UPIO_MEM:
-		writeb(value, up->port.membase + offset);
+		p->serial_in = mem_serial_in;
+		p->serial_out = mem_serial_out;
 		break;
 
 	case UPIO_RM9000:
 	case UPIO_MEM32:
-		writel(value, up->port.membase + offset);
+		p->serial_in = mem32_serial_in;
+		p->serial_out = mem32_serial_out;
 		break;
 
 #ifdef CONFIG_SERIAL_8250_AU1X00
 	case UPIO_AU:
-		__raw_writel(value, up->port.membase + offset);
+		p->serial_in = au_serial_in;
+		p->serial_out = au_serial_out;
 		break;
 #endif
 	case UPIO_TSI:
-		if (!((offset == UART_IER) && (value & UART_IER_UUE)))
-			writeb(value, up->port.membase + offset);
+		p->serial_in = tsi_serial_in;
+		p->serial_out = tsi_serial_out;
 		break;
 
 	case UPIO_DWAPB:
-		/* Save the LCR value so it can be re-written when a
-		 * Busy Detect interrupt occurs. */
-		if (save_offset == UART_LCR)
-			up->lcr = value;
-		writeb(value, up->port.membase + offset);
-		/* Read the IER to ensure any interrupt is cleared before
-		 * returning from ISR. */
-		if (save_offset == UART_TX || save_offset == UART_IER)
-			value = serial_in(up, UART_IER);
+		p->serial_in = mem_serial_in;
+		p->serial_out = dwapb_serial_out;
 		break;
 
 	default:
-		outb(value, up->port.iobase + offset);
+		p->serial_in = io_serial_in;
+		p->serial_out = io_serial_out;
+		break;
 	}
 }
 
 static void
 serial_out_sync(struct uart_8250_port *up, int offset, int value)
 {
-	switch (up->port.iotype) {
+	struct uart_port *p = &up->port;
+	switch (p->iotype) {
 	case UPIO_MEM:
 	case UPIO_MEM32:
 #ifdef CONFIG_SERIAL_8250_AU1X00
 	case UPIO_AU:
 #endif
 	case UPIO_DWAPB:
-		serial_out(up, offset, value);
-		serial_in(up, UART_LCR);	/* safe, no side-effects */
+		p->serial_out(p, offset, value);
+		p->serial_in(p, UART_LCR);	/* safe, no side-effects */
 		break;
 	default:
-		serial_out(up, offset, value);
+		p->serial_out(p, offset, value);
 	}
 }
 
+#define serial_in(up, offset)		\
+	(up->port.serial_in(&(up)->port, (offset)))
+#define serial_out(up, offset, value)	\
+	(up->port.serial_out(&(up)->port, (offset), (value)))
 /*
  * We used to support using pause I/O for certain machines.  We
  * haven't supported this for a while, but just in case it's badly
@@ -2576,6 +2638,7 @@ static void __init serial8250_isa_init_ports(void)
 		up->port.membase  = old_serial_port[i].iomem_base;
 		up->port.iotype   = old_serial_port[i].io_type;
 		up->port.regshift = old_serial_port[i].iomem_reg_shift;
+		set_io_from_upio(&up->port);
 		if (share_irqs)
 			up->port.flags |= UPF_SHARE_IRQ;
 	}
@@ -2769,6 +2832,13 @@ int __init early_serial_setup(struct uart_port *port)
 	p->flags        = port->flags;
 	p->mapbase      = port->mapbase;
 	p->private_data = port->private_data;
+
+	set_io_from_upio(p);
+	if (port->serial_in)
+		p->serial_in = port->serial_in;
+	if (port->serial_out)
+		p->serial_out = port->serial_out;
+
 	return 0;
 }
 
@@ -2833,6 +2903,8 @@ static int __devinit serial8250_probe(struct platform_device *dev)
 		port.mapbase		= p->mapbase;
 		port.hub6		= p->hub6;
 		port.private_data	= p->private_data;
+		port.serial_in		= p->serial_in;
+		port.serial_out		= p->serial_out;
 		port.dev		= &dev->dev;
 		if (share_irqs)
 			port.flags |= UPF_SHARE_IRQ;
@@ -2986,6 +3058,12 @@ int serial8250_register_port(struct uart_port *port)
 		uart->port.private_data = port->private_data;
 		if (port->dev)
 			uart->port.dev = port->dev;
+		set_io_from_upio(&uart->port);
+		/* Possibly override default I/O functions.  */
+		if (port->serial_in)
+			uart->port.serial_in = port->serial_in;
+		if (port->serial_out)
+			uart->port.serial_out = port->serial_out;
 
 		ret = uart_add_one_port(&serial8250_reg, &uart->port);
 		if (ret == 0)
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 3d37c94abbc8..77d83d929f2c 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -28,6 +28,8 @@ struct plat_serial8250_port {
 	unsigned char	iotype;		/* UPIO_* */
 	unsigned char	hub6;
 	upf_t		flags;		/* UPF_* flags */
+	unsigned int	(*serial_in)(struct uart_port *, int);
+	void		(*serial_out)(struct uart_port *, int, int);
 };
 
 /*
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 2395969faa04..60061f44f3d8 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -248,6 +248,8 @@ struct uart_port {
 	spinlock_t		lock;			/* port lock */
 	unsigned long		iobase;			/* in/out[bwl] */
 	unsigned char __iomem	*membase;		/* read/write[bwl] */
+	unsigned int		(*serial_in)(struct uart_port *, int);
+	void			(*serial_out)(struct uart_port *, int, int);
 	unsigned int		irq;			/* irq number */
 	unsigned int		uartclk;		/* base uart clock */
 	unsigned int		fifosize;		/* tx fifo size */
-- 
cgit v1.2.3


From 8e23fcc89c8091790903927449f8efb9b4e23960 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Fri, 2 Jan 2009 13:49:54 +0000
Subject: Serial: Allow port type to be specified when calling
 serial8250_register_port.

Add flag value UPF_FIXED_TYPE which specifies that the UART type is
known and should not be probed.  For this case the UARTs properties
are just copied out of the uart_config entry.

This allows us to keep SOC specific 8250 probe code out of 8250.c.  In
this case we know the serial hardware will not be changing as it is on
the same silicon as the CPU, and we can specify it with certainty in
the board/cpu setup code.

The alternative is to load up 8250.c with a bunch of OCTEON specific
special cases in the probing code.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/8250.c        | 9 +++++++++
 drivers/serial/serial_core.c | 7 +++++--
 include/linux/serial_8250.h  | 1 +
 include/linux/serial_core.h  | 2 ++
 4 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 849af9d21feb..3ae497422db5 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -2903,6 +2903,7 @@ static int __devinit serial8250_probe(struct platform_device *dev)
 		port.mapbase		= p->mapbase;
 		port.hub6		= p->hub6;
 		port.private_data	= p->private_data;
+		port.type		= p->type;
 		port.serial_in		= p->serial_in;
 		port.serial_out		= p->serial_out;
 		port.dev		= &dev->dev;
@@ -3058,6 +3059,14 @@ int serial8250_register_port(struct uart_port *port)
 		uart->port.private_data = port->private_data;
 		if (port->dev)
 			uart->port.dev = port->dev;
+
+		if (port->flags & UPF_FIXED_TYPE) {
+			uart->port.type = port->type;
+			uart->port.fifosize = uart_config[port->type].fifo_size;
+			uart->capabilities = uart_config[port->type].flags;
+			uart->tx_loadsz = uart_config[port->type].tx_loadsz;
+		}
+
 		set_io_from_upio(&uart->port);
 		/* Possibly override default I/O functions.  */
 		if (port->serial_in)
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 9425ed69e0f7..dc68b7e0c930 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -2179,11 +2179,14 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
 	 * Now do the auto configuration stuff.  Note that config_port
 	 * is expected to claim the resources and map the port for us.
 	 */
-	flags = UART_CONFIG_TYPE;
+	flags = 0;
 	if (port->flags & UPF_AUTO_IRQ)
 		flags |= UART_CONFIG_IRQ;
 	if (port->flags & UPF_BOOT_AUTOCONF) {
-		port->type = PORT_UNKNOWN;
+		if (!(port->flags & UPF_FIXED_TYPE)) {
+			port->type = PORT_UNKNOWN;
+			flags |= UART_CONFIG_TYPE;
+		}
 		port->ops->config_port(port, flags);
 	}
 
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 77d83d929f2c..d4d2a78ad43e 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -28,6 +28,7 @@ struct plat_serial8250_port {
 	unsigned char	iotype;		/* UPIO_* */
 	unsigned char	hub6;
 	upf_t		flags;		/* UPF_* flags */
+	unsigned int	type;		/* If UPF_FIXED_TYPE */
 	unsigned int	(*serial_in)(struct uart_port *, int);
 	void		(*serial_out)(struct uart_port *, int, int);
 };
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 60061f44f3d8..f155252f148c 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -295,6 +295,8 @@ struct uart_port {
 #define UPF_MAGIC_MULTIPLIER	((__force upf_t) (1 << 16))
 #define UPF_CONS_FLOW		((__force upf_t) (1 << 23))
 #define UPF_SHARE_IRQ		((__force upf_t) (1 << 24))
+/* The exact UART type is known and should not be probed.  */
+#define UPF_FIXED_TYPE		((__force upf_t) (1 << 27))
 #define UPF_BOOT_AUTOCONF	((__force upf_t) (1 << 28))
 #define UPF_FIXED_PORT		((__force upf_t) (1 << 29))
 #define UPF_DEAD		((__force upf_t) (1 << 30))
-- 
cgit v1.2.3


From 6b06f19151c335ee0c5b61839fa4e6838182ebb8 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Fri, 2 Jan 2009 13:50:00 +0000
Subject: Serial: UART driver changes for Cavium OCTEON.

Cavium UART implementation is not covered by existing uart_configS.
Define a new uart_config (PORT_OCTEON) which is specified by OCTEON
platform device registration code.

Signed-off-by: Tomaso Paoletti <tpaoletti@caviumnetworks.com>
Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/8250.c       | 7 +++++++
 include/linux/serial_core.h | 3 ++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 3ae497422db5..daa00567bc44 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -279,6 +279,13 @@ static const struct serial8250_config uart_config[] = {
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
 		.flags		= UART_CAP_FIFO,
 	},
+	[PORT_OCTEON] = {
+		.name		= "OCTEON",
+		.fifo_size	= 64,
+		.tx_loadsz	= 64,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.flags		= UART_CAP_FIFO,
+	},
 };
 
 #if defined (CONFIG_SERIAL_8250_AU1X00)
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index f155252f148c..b4199841f1fc 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -40,7 +40,8 @@
 #define PORT_NS16550A	14
 #define PORT_XSCALE	15
 #define PORT_RM9000	16	/* PMC-Sierra RM9xxx internal UART */
-#define PORT_MAX_8250	16	/* max port ID */
+#define PORT_OCTEON	17	/* Cavium OCTEON internal UART */
+#define PORT_MAX_8250	17	/* max port ID */
 
 /*
  * ARM specific type numbers.  These are not currently guaranteed
-- 
cgit v1.2.3


From e65f0f8271b1b0452334e5da37fd35413a000de4 Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fleitner@redhat.com>
Date: Fri, 2 Jan 2009 13:50:43 +0000
Subject: serial_8250: support for Sealevel Systems Model 7803 COMM+8

Add support for Sealevel Systems Model 7803 COMM+8

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/8250_pci.c | 3 +++
 include/linux/pci_ids.h   | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 2a2e1c717e8e..c088146b7513 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -2287,6 +2287,9 @@ static struct pci_device_id serial_pci_tbl[] = {
 	{	PCI_VENDOR_ID_SEALEVEL, PCI_DEVICE_ID_SEALEVEL_COMM8,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b2_8_115200 },
+	{	PCI_VENDOR_ID_SEALEVEL, PCI_DEVICE_ID_SEALEVEL_7803,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b2_8_460800 },
 	{	PCI_VENDOR_ID_SEALEVEL, PCI_DEVICE_ID_SEALEVEL_UCOMM8,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b2_8_115200 },
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index fa83dfefc5e0..218c73b1e6d4 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1796,6 +1796,7 @@
 #define PCI_DEVICE_ID_SEALEVEL_UCOMM232	0x7202
 #define PCI_DEVICE_ID_SEALEVEL_COMM4	0x7401
 #define PCI_DEVICE_ID_SEALEVEL_COMM8	0x7801
+#define PCI_DEVICE_ID_SEALEVEL_7803	0x7803
 #define PCI_DEVICE_ID_SEALEVEL_UCOMM8	0x7804
 
 #define PCI_VENDOR_ID_HYPERCOPE		0x1365
-- 
cgit v1.2.3


From aecde8b53b8ee1330a5a8206200f0d6b8845a6e0 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 30 Dec 2008 07:14:19 -0300
Subject: V4L/DVB (10141): v4l2: debugging API changed to match against driver
 name instead of ID.

Since the i2c driver ID will be removed in the near future we have to
modify the v4l2 debugging API to use the driver name instead of driver ID.

Note that this API is not used in applications other than v4l2-dbg.cpp
as it is for debugging and testing only.

Should anyone use the old VIDIOC_G_CHIP_IDENT, then this will be logged
with a warning that it is deprecated and will be removed in 2.6.30.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/v4l2-framework.txt    |  2 +-
 drivers/media/video/bt8xx/bttv-driver.c         |  9 +++--
 drivers/media/video/cafe_ccic.c                 |  7 ++--
 drivers/media/video/cs5345.c                    | 13 +++----
 drivers/media/video/cs53l32a.c                  |  2 +-
 drivers/media/video/cx18/cx18-i2c.c             | 28 +-------------
 drivers/media/video/cx18/cx18-i2c.h             |  1 -
 drivers/media/video/cx18/cx18-ioctl.c           | 41 ++++++++------------
 drivers/media/video/cx23885/cx23885-video.c     |  8 ++--
 drivers/media/video/cx25840/cx25840-core.c      | 13 +++----
 drivers/media/video/cx88/cx88-video.c           | 13 ++++---
 drivers/media/video/em28xx/em28xx-video.c       | 28 +++++++-------
 drivers/media/video/ivtv/ivtv-driver.c          |  7 ++--
 drivers/media/video/ivtv/ivtv-ioctl.c           | 21 +++++-----
 drivers/media/video/m52790.c                    | 13 +++----
 drivers/media/video/msp3400-driver.c            |  2 +-
 drivers/media/video/mt9m001.c                   | 19 ++++-----
 drivers/media/video/mt9m111.c                   | 19 ++++-----
 drivers/media/video/mt9t031.c                   | 18 ++++-----
 drivers/media/video/mt9v022.c                   | 19 ++++-----
 drivers/media/video/ov7670.c                    |  2 +-
 drivers/media/video/ov772x.c                    |  7 ++--
 drivers/media/video/pvrusb2/pvrusb2-hdw.c       | 11 +++---
 drivers/media/video/pvrusb2/pvrusb2-hdw.h       |  4 +-
 drivers/media/video/pvrusb2/pvrusb2-v4l2.c      |  6 +--
 drivers/media/video/saa7115.c                   | 13 +++----
 drivers/media/video/saa7127.c                   | 13 +++----
 drivers/media/video/saa7134/saa6752hs.c         |  2 +-
 drivers/media/video/saa7134/saa7134-empress.c   | 14 +++----
 drivers/media/video/saa7134/saa7134-video.c     |  9 +++--
 drivers/media/video/saa717x.c                   |  9 +++--
 drivers/media/video/soc_camera.c                |  6 +--
 drivers/media/video/tvaudio.c                   |  2 +-
 drivers/media/video/tvp5150.c                   | 13 +++----
 drivers/media/video/tw9910.c                    |  6 +--
 drivers/media/video/upd64031a.c                 | 13 +++----
 drivers/media/video/upd64083.c                  | 13 +++----
 drivers/media/video/usbvision/usbvision-video.c |  9 +++--
 drivers/media/video/v4l2-common.c               | 29 +++++++++-----
 drivers/media/video/v4l2-compat-ioctl32.c       |  3 +-
 drivers/media/video/v4l2-ioctl.c                | 15 +++++---
 drivers/media/video/v4l2-subdev.c               |  2 +-
 drivers/media/video/vp27smpx.c                  |  2 +-
 drivers/media/video/wm8739.c                    |  2 +-
 drivers/media/video/wm8775.c                    |  2 +-
 include/linux/videodev2.h                       | 51 ++++++++++++++++++-------
 include/media/soc_camera.h                      |  6 +--
 include/media/v4l2-chip-ident.h                 |  4 +-
 include/media/v4l2-common.h                     |  6 +--
 include/media/v4l2-int-device.h                 |  2 +-
 include/media/v4l2-ioctl.h                      |  6 +--
 include/media/v4l2-subdev.h                     |  6 +--
 52 files changed, 290 insertions(+), 281 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt
index 3b483c1e0124..ff124374e9ba 100644
--- a/Documentation/video4linux/v4l2-framework.txt
+++ b/Documentation/video4linux/v4l2-framework.txt
@@ -184,7 +184,7 @@ may be NULL if the subdev driver does not support anything from that category.
 It looks like this:
 
 struct v4l2_subdev_core_ops {
-	int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip);
+	int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip);
 	int (*log_status)(struct v4l2_subdev *sd);
 	int (*init)(struct v4l2_subdev *sd, u32 val);
 	...
diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c
index ebcb8e5e9c4d..d2f43bd2f841 100644
--- a/drivers/media/video/bt8xx/bttv-driver.c
+++ b/drivers/media/video/bt8xx/bttv-driver.c
@@ -2039,7 +2039,7 @@ static int bttv_log_status(struct file *file, void *f)
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int bttv_g_register(struct file *file, void *f,
-					struct v4l2_register *reg)
+					struct v4l2_dbg_register *reg)
 {
 	struct bttv_fh *fh = f;
 	struct bttv *btv = fh->btv;
@@ -2047,18 +2047,19 @@ static int bttv_g_register(struct file *file, void *f,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 
 	/* bt848 has a 12-bit register space */
 	reg->reg &= 0xfff;
 	reg->val = btread(reg->reg);
+	reg->size = 1;
 
 	return 0;
 }
 
 static int bttv_s_register(struct file *file, void *f,
-					struct v4l2_register *reg)
+					struct v4l2_dbg_register *reg)
 {
 	struct bttv_fh *fh = f;
 	struct bttv *btv = fh->btv;
@@ -2066,7 +2067,7 @@ static int bttv_s_register(struct file *file, void *f,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 
 	/* bt848 has a 12-bit register space */
diff --git a/drivers/media/video/cafe_ccic.c b/drivers/media/video/cafe_ccic.c
index 476171cf5001..34a39d2e4703 100644
--- a/drivers/media/video/cafe_ccic.c
+++ b/drivers/media/video/cafe_ccic.c
@@ -859,7 +859,7 @@ static int __cafe_cam_reset(struct cafe_camera *cam)
  */
 static int cafe_cam_init(struct cafe_camera *cam)
 {
-	struct v4l2_chip_ident chip = { V4L2_CHIP_MATCH_I2C_ADDR, 0, 0, 0 };
+	struct v4l2_dbg_chip_ident chip;
 	int ret;
 
 	mutex_lock(&cam->s_mutex);
@@ -869,8 +869,9 @@ static int cafe_cam_init(struct cafe_camera *cam)
 	ret = __cafe_cam_reset(cam);
 	if (ret)
 		goto out;
-	chip.match_chip = cam->sensor->addr;
-	ret = __cafe_cam_cmd(cam, VIDIOC_G_CHIP_IDENT, &chip);
+	chip.match.type = V4L2_CHIP_MATCH_I2C_ADDR;
+	chip.match.addr = cam->sensor->addr;
+	ret = __cafe_cam_cmd(cam, VIDIOC_DBG_G_CHIP_IDENT, &chip);
 	if (ret)
 		goto out;
 	cam->sensor_type = chip.ident;
diff --git a/drivers/media/video/cs5345.c b/drivers/media/video/cs5345.c
index 70fcd0d5de13..14bebf8a116f 100644
--- a/drivers/media/video/cs5345.c
+++ b/drivers/media/video/cs5345.c
@@ -95,25 +95,24 @@ static int cs5345_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int cs5345_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int cs5345_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
+	reg->size = 1;
 	reg->val = cs5345_read(sd, reg->reg & 0x1f);
 	return 0;
 }
 
-static int cs5345_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int cs5345_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -122,7 +121,7 @@ static int cs5345_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
 }
 #endif
 
-static int cs5345_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int cs5345_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
diff --git a/drivers/media/video/cs53l32a.c b/drivers/media/video/cs53l32a.c
index cb65d519cf78..7292a6316e63 100644
--- a/drivers/media/video/cs53l32a.c
+++ b/drivers/media/video/cs53l32a.c
@@ -102,7 +102,7 @@ static int cs53l32a_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 	return 0;
 }
 
-static int cs53l32a_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int cs53l32a_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
diff --git a/drivers/media/video/cx18/cx18-i2c.c b/drivers/media/video/cx18/cx18-i2c.c
index 8941f58bed7f..83e1c6333126 100644
--- a/drivers/media/video/cx18/cx18-i2c.c
+++ b/drivers/media/video/cx18/cx18-i2c.c
@@ -242,7 +242,7 @@ int cx18_call_i2c_client(struct cx18 *cx, int addr, unsigned cmd, void *arg)
 			return retval;
 		}
 	}
-	if (cmd != VIDIOC_G_CHIP_IDENT)
+	if (cmd != VIDIOC_DBG_G_CHIP_IDENT)
 		CX18_ERR("i2c addr 0x%02x not found for cmd 0x%x!\n",
 			       addr, cmd);
 	return -ENODEV;
@@ -268,17 +268,6 @@ static int cx18_i2c_id_addr(struct cx18 *cx, u32 id)
 	return retval;
 }
 
-/* Find the i2c device name matching the DRIVERID */
-static const char *cx18_i2c_id_name(u32 id)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(hw_driverids); i++)
-		if (hw_driverids[i] == id)
-			return hw_devicenames[i];
-	return "unknown device";
-}
-
 /* Find the i2c device name matching the CX18_HW_ flag */
 static const char *cx18_i2c_hw_name(u32 hw)
 {
@@ -326,21 +315,6 @@ int cx18_i2c_hw(struct cx18 *cx, u32 hw, unsigned int cmd, void *arg)
 	return cx18_call_i2c_client(cx, addr, cmd, arg);
 }
 
-/* Calls i2c device based on I2C driver ID. */
-int cx18_i2c_id(struct cx18 *cx, u32 id, unsigned int cmd, void *arg)
-{
-	int addr;
-
-	addr = cx18_i2c_id_addr(cx, id);
-	if (addr < 0) {
-		if (cmd != VIDIOC_G_CHIP_IDENT)
-			CX18_ERR("i2c ID 0x%08x (%s) not found for cmd 0x%x!\n",
-				id, cx18_i2c_id_name(id), cmd);
-		return addr;
-	}
-	return cx18_call_i2c_client(cx, addr, cmd, arg);
-}
-
 /* broadcast cmd for all I2C clients and for the gpio subsystem */
 void cx18_call_i2c_clients(struct cx18 *cx, unsigned int cmd, void *arg)
 {
diff --git a/drivers/media/video/cx18/cx18-i2c.h b/drivers/media/video/cx18/cx18-i2c.h
index 113c3f9a2cc0..4869739013bd 100644
--- a/drivers/media/video/cx18/cx18-i2c.h
+++ b/drivers/media/video/cx18/cx18-i2c.h
@@ -23,7 +23,6 @@
 
 int cx18_i2c_hw_addr(struct cx18 *cx, u32 hw);
 int cx18_i2c_hw(struct cx18 *cx, u32 hw, unsigned int cmd, void *arg);
-int cx18_i2c_id(struct cx18 *cx, u32 id, unsigned int cmd, void *arg);
 int cx18_call_i2c_client(struct cx18 *cx, int addr, unsigned cmd, void *arg);
 void cx18_call_i2c_clients(struct cx18 *cx, unsigned int cmd, void *arg);
 int cx18_i2c_register(struct cx18 *cx, unsigned idx);
diff --git a/drivers/media/video/cx18/cx18-ioctl.c b/drivers/media/video/cx18/cx18-ioctl.c
index 8aa152b39545..7086aaba77d6 100644
--- a/drivers/media/video/cx18/cx18-ioctl.c
+++ b/drivers/media/video/cx18/cx18-ioctl.c
@@ -254,30 +254,24 @@ static int cx18_s_fmt_sliced_vbi_cap(struct file *file, void *fh,
 }
 
 static int cx18_g_chip_ident(struct file *file, void *fh,
-				struct v4l2_chip_ident *chip)
+				struct v4l2_dbg_chip_ident *chip)
 {
 	struct cx18 *cx = ((struct cx18_open_id *)fh)->cx;
 
 	chip->ident = V4L2_IDENT_NONE;
 	chip->revision = 0;
-	if (chip->match_type == V4L2_CHIP_MATCH_HOST) {
-		if (v4l2_chip_match_host(chip->match_type, chip->match_chip))
-			chip->ident = V4L2_IDENT_CX23418;
+	if (v4l2_chip_match_host(&chip->match)) {
+		chip->ident = V4L2_IDENT_CX23418;
 		return 0;
 	}
-	if (chip->match_type == V4L2_CHIP_MATCH_I2C_DRIVER)
-		return cx18_i2c_id(cx, chip->match_chip, VIDIOC_G_CHIP_IDENT,
-					chip);
-	if (chip->match_type == V4L2_CHIP_MATCH_I2C_ADDR)
-		return cx18_call_i2c_client(cx, chip->match_chip,
-						VIDIOC_G_CHIP_IDENT, chip);
-	return -EINVAL;
+	cx18_call_i2c_clients(cx, VIDIOC_DBG_G_CHIP_IDENT, chip);
+	return 0;
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int cx18_cxc(struct cx18 *cx, unsigned int cmd, void *arg)
 {
-	struct v4l2_register *regs = arg;
+	struct v4l2_dbg_register *regs = arg;
 	unsigned long flags;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -286,6 +280,7 @@ static int cx18_cxc(struct cx18 *cx, unsigned int cmd, void *arg)
 		return -EINVAL;
 
 	spin_lock_irqsave(&cx18_cards_lock, flags);
+	regs->size = 4;
 	if (cmd == VIDIOC_DBG_G_REGISTER)
 		regs->val = cx18_read_enc(cx, regs->reg);
 	else
@@ -295,31 +290,25 @@ static int cx18_cxc(struct cx18 *cx, unsigned int cmd, void *arg)
 }
 
 static int cx18_g_register(struct file *file, void *fh,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct cx18 *cx = ((struct cx18_open_id *)fh)->cx;
 
-	if (v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (v4l2_chip_match_host(&reg->match))
 		return cx18_cxc(cx, VIDIOC_DBG_G_REGISTER, reg);
-	if (reg->match_type == V4L2_CHIP_MATCH_I2C_DRIVER)
-		return cx18_i2c_id(cx, reg->match_chip, VIDIOC_DBG_G_REGISTER,
-					reg);
-	return cx18_call_i2c_client(cx, reg->match_chip, VIDIOC_DBG_G_REGISTER,
-					reg);
+	cx18_call_i2c_clients(cx, VIDIOC_DBG_G_REGISTER, reg);
+	return 0;
 }
 
 static int cx18_s_register(struct file *file, void *fh,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct cx18 *cx = ((struct cx18_open_id *)fh)->cx;
 
-	if (v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (v4l2_chip_match_host(&reg->match))
 		return cx18_cxc(cx, VIDIOC_DBG_S_REGISTER, reg);
-	if (reg->match_type == V4L2_CHIP_MATCH_I2C_DRIVER)
-		return cx18_i2c_id(cx, reg->match_chip, VIDIOC_DBG_S_REGISTER,
-					reg);
-	return cx18_call_i2c_client(cx, reg->match_chip, VIDIOC_DBG_S_REGISTER,
-					reg);
+	cx18_call_i2c_clients(cx, VIDIOC_DBG_S_REGISTER, reg);
+	return 0;
 }
 #endif
 
diff --git a/drivers/media/video/cx23885/cx23885-video.c b/drivers/media/video/cx23885/cx23885-video.c
index 637c4d008846..2d81c4d04340 100644
--- a/drivers/media/video/cx23885/cx23885-video.c
+++ b/drivers/media/video/cx23885/cx23885-video.c
@@ -1326,11 +1326,11 @@ static int vidioc_s_frequency(struct file *file, void *priv,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int vidioc_g_register(struct file *file, void *fh,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct cx23885_dev *dev = ((struct cx23885_fh *)fh)->dev;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 
 	cx23885_call_i2c_clients(&dev->i2c_bus[2], VIDIOC_DBG_G_REGISTER, reg);
@@ -1339,11 +1339,11 @@ static int vidioc_g_register(struct file *file, void *fh,
 }
 
 static int vidioc_s_register(struct file *file, void *fh,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct cx23885_dev *dev = ((struct cx23885_fh *)fh)->dev;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 
 	cx23885_call_i2c_clients(&dev->i2c_bus[2], VIDIOC_DBG_S_REGISTER, reg);
diff --git a/drivers/media/video/cx25840/cx25840-core.c b/drivers/media/video/cx25840/cx25840-core.c
index 2ad277189da8..88f2fd32bfe3 100644
--- a/drivers/media/video/cx25840/cx25840-core.c
+++ b/drivers/media/video/cx25840/cx25840-core.c
@@ -1120,25 +1120,24 @@ static int cx25840_init(struct v4l2_subdev *sd, u32 val)
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int cx25840_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int cx25840_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
+	reg->size = 1;
 	reg->val = cx25840_read(client, reg->reg & 0x0fff);
 	return 0;
 }
 
-static int cx25840_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int cx25840_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -1362,7 +1361,7 @@ static int cx25840_reset(struct v4l2_subdev *sd, u32 val)
 	return 0;
 }
 
-static int cx25840_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int cx25840_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct cx25840_state *state = to_state(sd);
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index b93b7ab99d8c..791e69d804f9 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -1447,25 +1447,26 @@ static int vidioc_s_frequency (struct file *file, void *priv,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int vidioc_g_register (struct file *file, void *fh,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct cx88_core *core = ((struct cx8800_fh*)fh)->dev->core;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 	/* cx2388x has a 24-bit register space */
-	reg->val = cx_read(reg->reg&0xffffff);
+	reg->val = cx_read(reg->reg & 0xffffff);
+	reg->size = 4;
 	return 0;
 }
 
 static int vidioc_s_register (struct file *file, void *fh,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct cx88_core *core = ((struct cx8800_fh*)fh)->dev->core;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
-	cx_write(reg->reg&0xffffff, reg->val);
+	cx_write(reg->reg & 0xffffff, reg->val);
 	return 0;
 }
 #endif
diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c
index 9cb7c64a88fa..416b691c33c1 100644
--- a/drivers/media/video/em28xx/em28xx-video.c
+++ b/drivers/media/video/em28xx/em28xx-video.c
@@ -1154,7 +1154,7 @@ static int em28xx_reg_len(int reg)
 }
 
 static int vidioc_g_chip_ident(struct file *file, void *priv,
-	       struct v4l2_chip_ident *chip)
+	       struct v4l2_dbg_chip_ident *chip)
 {
 	struct em28xx_fh      *fh  = priv;
 	struct em28xx         *dev = fh->dev;
@@ -1162,20 +1162,20 @@ static int vidioc_g_chip_ident(struct file *file, void *priv,
 	chip->ident = V4L2_IDENT_NONE;
 	chip->revision = 0;
 
-	em28xx_i2c_call_clients(dev, VIDIOC_G_CHIP_IDENT, chip);
+	em28xx_i2c_call_clients(dev, VIDIOC_DBG_G_CHIP_IDENT, chip);
 
 	return 0;
 }
 
 
 static int vidioc_g_register(struct file *file, void *priv,
-			     struct v4l2_register *reg)
+			     struct v4l2_dbg_register *reg)
 {
 	struct em28xx_fh      *fh  = priv;
 	struct em28xx         *dev = fh->dev;
 	int ret;
 
-	switch (reg->match_type) {
+	switch (reg->match.type) {
 	case V4L2_CHIP_MATCH_AC97:
 		mutex_lock(&dev->lock);
 		ret = em28xx_read_ac97(dev, reg->reg);
@@ -1184,6 +1184,7 @@ static int vidioc_g_register(struct file *file, void *priv,
 			return ret;
 
 		reg->val = ret;
+		reg->size = 1;
 		return 0;
 	case V4L2_CHIP_MATCH_I2C_DRIVER:
 		em28xx_i2c_call_clients(dev, VIDIOC_DBG_G_REGISTER, reg);
@@ -1192,12 +1193,13 @@ static int vidioc_g_register(struct file *file, void *priv,
 		/* Not supported yet */
 		return -EINVAL;
 	default:
-		if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+		if (!v4l2_chip_match_host(&reg->match))
 			return -EINVAL;
 	}
 
 	/* Match host */
-	if (em28xx_reg_len(reg->reg) == 1) {
+	reg->size = em28xx_reg_len(reg->reg);
+	if (reg->size == 1) {
 		mutex_lock(&dev->lock);
 		ret = em28xx_read_reg(dev, reg->reg);
 		mutex_unlock(&dev->lock);
@@ -1207,7 +1209,7 @@ static int vidioc_g_register(struct file *file, void *priv,
 
 		reg->val = ret;
 	} else {
-		__le64 val = 0;
+		__le16 val = 0;
 		mutex_lock(&dev->lock);
 		ret = em28xx_read_reg_req_len(dev, USB_REQ_GET_STATUS,
 						   reg->reg, (char *)&val, 2);
@@ -1215,21 +1217,21 @@ static int vidioc_g_register(struct file *file, void *priv,
 		if (ret < 0)
 			return ret;
 
-		reg->val = le64_to_cpu(val);
+		reg->val = le16_to_cpu(val);
 	}
 
 	return 0;
 }
 
 static int vidioc_s_register(struct file *file, void *priv,
-			     struct v4l2_register *reg)
+			     struct v4l2_dbg_register *reg)
 {
 	struct em28xx_fh      *fh  = priv;
 	struct em28xx         *dev = fh->dev;
-	__le64 buf;
+	__le16 buf;
 	int    rc;
 
-	switch (reg->match_type) {
+	switch (reg->match.type) {
 	case V4L2_CHIP_MATCH_AC97:
 		mutex_lock(&dev->lock);
 		rc = em28xx_write_ac97(dev, reg->reg, reg->val);
@@ -1243,12 +1245,12 @@ static int vidioc_s_register(struct file *file, void *priv,
 		/* Not supported yet */
 		return -EINVAL;
 	default:
-		if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+		if (!v4l2_chip_match_host(&reg->match))
 			return -EINVAL;
 	}
 
 	/* Match host */
-	buf = cpu_to_le64(reg->val);
+	buf = cpu_to_le16(reg->val);
 
 	mutex_lock(&dev->lock);
 	rc = em28xx_write_regs(dev, reg->reg, (char *)&buf,
diff --git a/drivers/media/video/ivtv/ivtv-driver.c b/drivers/media/video/ivtv/ivtv-driver.c
index 08b762951759..e8e5921cdc34 100644
--- a/drivers/media/video/ivtv/ivtv-driver.c
+++ b/drivers/media/video/ivtv/ivtv-driver.c
@@ -902,18 +902,19 @@ static void ivtv_load_and_init_modules(struct ivtv *itv)
 	}
 
 	if (hw & IVTV_HW_SAA711X) {
-		struct v4l2_chip_ident v = { V4L2_CHIP_MATCH_I2C_DRIVER, I2C_DRIVERID_SAA711X };
+		struct v4l2_dbg_chip_ident v;
 
 		/* determine the exact saa711x model */
 		itv->hw_flags &= ~IVTV_HW_SAA711X;
 
+		v.match.type = V4L2_CHIP_MATCH_I2C_DRIVER;
+		strlcpy(v.match.name, "saa7115", sizeof(v.match.name));
 		ivtv_call_hw(itv, IVTV_HW_SAA711X, core, g_chip_ident, &v);
 		if (v.ident == V4L2_IDENT_SAA7114) {
 			itv->hw_flags |= IVTV_HW_SAA7114;
 			/* VBI is not yet supported by the saa7114 driver. */
 			itv->v4l2_cap &= ~(V4L2_CAP_SLICED_VBI_CAPTURE|V4L2_CAP_VBI_CAPTURE);
-		}
-		else {
+		} else {
 			itv->hw_flags |= IVTV_HW_SAA7115;
 		}
 		itv->vbi.raw_decoder_line_size = 1443;
diff --git a/drivers/media/video/ivtv/ivtv-ioctl.c b/drivers/media/video/ivtv/ivtv-ioctl.c
index 1f6ca93b9840..f6b3ef6e691b 100644
--- a/drivers/media/video/ivtv/ivtv-ioctl.c
+++ b/drivers/media/video/ivtv/ivtv-ioctl.c
@@ -674,19 +674,19 @@ static int ivtv_s_fmt_vid_out_overlay(struct file *file, void *fh, struct v4l2_f
 	return ret;
 }
 
-static int ivtv_g_chip_ident(struct file *file, void *fh, struct v4l2_chip_ident *chip)
+static int ivtv_g_chip_ident(struct file *file, void *fh, struct v4l2_dbg_chip_ident *chip)
 {
 	struct ivtv *itv = ((struct ivtv_open_id *)fh)->itv;
 
 	chip->ident = V4L2_IDENT_NONE;
 	chip->revision = 0;
-	if (chip->match_type == V4L2_CHIP_MATCH_HOST) {
-		if (v4l2_chip_match_host(chip->match_type, chip->match_chip))
+	if (chip->match.type == V4L2_CHIP_MATCH_HOST) {
+		if (v4l2_chip_match_host(&chip->match))
 			chip->ident = itv->has_cx23415 ? V4L2_IDENT_CX23415 : V4L2_IDENT_CX23416;
 		return 0;
 	}
-	if (chip->match_type != V4L2_CHIP_MATCH_I2C_DRIVER &&
-	    chip->match_type != V4L2_CHIP_MATCH_I2C_ADDR)
+	if (chip->match.type != V4L2_CHIP_MATCH_I2C_DRIVER &&
+	    chip->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 	/* TODO: is this correct? */
 	return ivtv_call_all_err(itv, core, g_chip_ident, chip);
@@ -695,7 +695,7 @@ static int ivtv_g_chip_ident(struct file *file, void *fh, struct v4l2_chip_ident
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int ivtv_itvc(struct ivtv *itv, unsigned int cmd, void *arg)
 {
-	struct v4l2_register *regs = arg;
+	struct v4l2_dbg_register *regs = arg;
 	volatile u8 __iomem *reg_start;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -710,6 +710,7 @@ static int ivtv_itvc(struct ivtv *itv, unsigned int cmd, void *arg)
 	else
 		return -EINVAL;
 
+	regs->size = 4;
 	if (cmd == VIDIOC_DBG_G_REGISTER)
 		regs->val = readl(regs->reg + reg_start);
 	else
@@ -717,11 +718,11 @@ static int ivtv_itvc(struct ivtv *itv, unsigned int cmd, void *arg)
 	return 0;
 }
 
-static int ivtv_g_register(struct file *file, void *fh, struct v4l2_register *reg)
+static int ivtv_g_register(struct file *file, void *fh, struct v4l2_dbg_register *reg)
 {
 	struct ivtv *itv = ((struct ivtv_open_id *)fh)->itv;
 
-	if (v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (v4l2_chip_match_host(&reg->match))
 		return ivtv_itvc(itv, VIDIOC_DBG_G_REGISTER, reg);
 	/* TODO: subdev errors should not be ignored, this should become a
 	   subdev helper function. */
@@ -729,11 +730,11 @@ static int ivtv_g_register(struct file *file, void *fh, struct v4l2_register *re
 	return 0;
 }
 
-static int ivtv_s_register(struct file *file, void *fh, struct v4l2_register *reg)
+static int ivtv_s_register(struct file *file, void *fh, struct v4l2_dbg_register *reg)
 {
 	struct ivtv *itv = ((struct ivtv_open_id *)fh)->itv;
 
-	if (v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (v4l2_chip_match_host(&reg->match))
 		return ivtv_itvc(itv, VIDIOC_DBG_S_REGISTER, reg);
 	/* TODO: subdev errors should not be ignored, this should become a
 	   subdev helper function. */
diff --git a/drivers/media/video/m52790.c b/drivers/media/video/m52790.c
index 07be14a9fe7b..de397ef57b44 100644
--- a/drivers/media/video/m52790.c
+++ b/drivers/media/video/m52790.c
@@ -80,29 +80,28 @@ static int m52790_s_routing(struct v4l2_subdev *sd, const struct v4l2_routing *r
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int m52790_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int m52790_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct m52790_state *state = to_state(sd);
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	if (reg->reg != 0)
 		return -EINVAL;
+	reg->size = 1;
 	reg->val = state->input | state->output;
 	return 0;
 }
 
-static int m52790_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int m52790_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct m52790_state *state = to_state(sd);
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -115,7 +114,7 @@ static int m52790_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
 }
 #endif
 
-static int m52790_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int m52790_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
diff --git a/drivers/media/video/msp3400-driver.c b/drivers/media/video/msp3400-driver.c
index b8577ade4050..4d7a91852117 100644
--- a/drivers/media/video/msp3400-driver.c
+++ b/drivers/media/video/msp3400-driver.c
@@ -733,7 +733,7 @@ static int msp_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc)
 	return 0;
 }
 
-static int msp_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int msp_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct msp_state *state = to_state(sd);
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index 1a1a12453672..c1bf75ef2741 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -343,14 +343,14 @@ static int mt9m001_try_fmt(struct soc_camera_device *icd,
 }
 
 static int mt9m001_get_chip_id(struct soc_camera_device *icd,
-			       struct v4l2_chip_ident *id)
+			       struct v4l2_dbg_chip_ident *id)
 {
 	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
 
-	if (id->match_type != V4L2_CHIP_MATCH_I2C_ADDR)
+	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 
-	if (id->match_chip != mt9m001->client->addr)
+	if (id->match.addr != mt9m001->client->addr)
 		return -ENODEV;
 
 	id->ident	= mt9m001->model;
@@ -361,16 +361,17 @@ static int mt9m001_get_chip_id(struct soc_camera_device *icd,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int mt9m001_get_register(struct soc_camera_device *icd,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
 
-	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
+	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
 
-	if (reg->match_chip != mt9m001->client->addr)
+	if (reg->match.addr != mt9m001->client->addr)
 		return -ENODEV;
 
+	reg->size = 2;
 	reg->val = reg_read(icd, reg->reg);
 
 	if (reg->val > 0xffff)
@@ -380,14 +381,14 @@ static int mt9m001_get_register(struct soc_camera_device *icd,
 }
 
 static int mt9m001_set_register(struct soc_camera_device *icd,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
 
-	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
+	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
 
-	if (reg->match_chip != mt9m001->client->addr)
+	if (reg->match.addr != mt9m001->client->addr)
 		return -ENODEV;
 
 	if (reg_write(icd, reg->reg, reg->val) < 0)
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index c89ea41fe259..5b8e20979cce 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -514,14 +514,14 @@ static int mt9m111_try_fmt(struct soc_camera_device *icd,
 }
 
 static int mt9m111_get_chip_id(struct soc_camera_device *icd,
-			       struct v4l2_chip_ident *id)
+			       struct v4l2_dbg_chip_ident *id)
 {
 	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
 
-	if (id->match_type != V4L2_CHIP_MATCH_I2C_ADDR)
+	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 
-	if (id->match_chip != mt9m111->client->addr)
+	if (id->match.addr != mt9m111->client->addr)
 		return -ENODEV;
 
 	id->ident	= mt9m111->model;
@@ -532,18 +532,19 @@ static int mt9m111_get_chip_id(struct soc_camera_device *icd,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int mt9m111_get_register(struct soc_camera_device *icd,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	int val;
 
 	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
 
-	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff)
+	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff)
 		return -EINVAL;
-	if (reg->match_chip != mt9m111->client->addr)
+	if (reg->match.addr != mt9m111->client->addr)
 		return -ENODEV;
 
 	val = mt9m111_reg_read(icd, reg->reg);
+	reg->size = 2;
 	reg->val = (u64)val;
 
 	if (reg->val > 0xffff)
@@ -553,14 +554,14 @@ static int mt9m111_get_register(struct soc_camera_device *icd,
 }
 
 static int mt9m111_set_register(struct soc_camera_device *icd,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
 
-	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff)
+	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff)
 		return -EINVAL;
 
-	if (reg->match_chip != mt9m111->client->addr)
+	if (reg->match.addr != mt9m111->client->addr)
 		return -ENODEV;
 
 	if (mt9m111_reg_write(icd, reg->reg, reg->val) < 0)
diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c
index 1a9d53966d06..349d8e365530 100644
--- a/drivers/media/video/mt9t031.c
+++ b/drivers/media/video/mt9t031.c
@@ -326,14 +326,14 @@ static int mt9t031_try_fmt(struct soc_camera_device *icd,
 }
 
 static int mt9t031_get_chip_id(struct soc_camera_device *icd,
-			       struct v4l2_chip_ident *id)
+			       struct v4l2_dbg_chip_ident *id)
 {
 	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
 
-	if (id->match_type != V4L2_CHIP_MATCH_I2C_ADDR)
+	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 
-	if (id->match_chip != mt9t031->client->addr)
+	if (id->match.addr != mt9t031->client->addr)
 		return -ENODEV;
 
 	id->ident	= mt9t031->model;
@@ -344,14 +344,14 @@ static int mt9t031_get_chip_id(struct soc_camera_device *icd,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int mt9t031_get_register(struct soc_camera_device *icd,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
 
-	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
+	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
 
-	if (reg->match_chip != mt9t031->client->addr)
+	if (reg->match.addr != mt9t031->client->addr)
 		return -ENODEV;
 
 	reg->val = reg_read(icd, reg->reg);
@@ -363,14 +363,14 @@ static int mt9t031_get_register(struct soc_camera_device *icd,
 }
 
 static int mt9t031_set_register(struct soc_camera_device *icd,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
 
-	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
+	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
 
-	if (reg->match_chip != mt9t031->client->addr)
+	if (reg->match.addr != mt9t031->client->addr)
 		return -ENODEV;
 
 	if (reg_write(icd, reg->reg, reg->val) < 0)
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index 14a5f9c21ffa..b04c8cb1644d 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -422,14 +422,14 @@ static int mt9v022_try_fmt(struct soc_camera_device *icd,
 }
 
 static int mt9v022_get_chip_id(struct soc_camera_device *icd,
-			       struct v4l2_chip_ident *id)
+			       struct v4l2_dbg_chip_ident *id)
 {
 	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
 
-	if (id->match_type != V4L2_CHIP_MATCH_I2C_ADDR)
+	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 
-	if (id->match_chip != mt9v022->client->addr)
+	if (id->match.addr != mt9v022->client->addr)
 		return -ENODEV;
 
 	id->ident	= mt9v022->model;
@@ -440,16 +440,17 @@ static int mt9v022_get_chip_id(struct soc_camera_device *icd,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int mt9v022_get_register(struct soc_camera_device *icd,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
 
-	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
+	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
 
-	if (reg->match_chip != mt9v022->client->addr)
+	if (reg->match.addr != mt9v022->client->addr)
 		return -ENODEV;
 
+	reg->size = 2;
 	reg->val = reg_read(icd, reg->reg);
 
 	if (reg->val > 0xffff)
@@ -459,14 +460,14 @@ static int mt9v022_get_register(struct soc_camera_device *icd,
 }
 
 static int mt9v022_set_register(struct soc_camera_device *icd,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
 
-	if (reg->match_type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
+	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
 
-	if (reg->match_chip != mt9v022->client->addr)
+	if (reg->match.addr != mt9v022->client->addr)
 		return -ENODEV;
 
 	if (reg_write(icd, reg->reg, reg->val) < 0)
diff --git a/drivers/media/video/ov7670.c b/drivers/media/video/ov7670.c
index ea032f5f2f41..ca26b0c50cf2 100644
--- a/drivers/media/video/ov7670.c
+++ b/drivers/media/video/ov7670.c
@@ -1310,7 +1310,7 @@ static int ov7670_command(struct i2c_client *client, unsigned int cmd,
 		void *arg)
 {
 	switch (cmd) {
-	case VIDIOC_G_CHIP_IDENT:
+	case VIDIOC_DBG_G_CHIP_IDENT:
 		return v4l2_chip_ident_i2c_client(client, arg, V4L2_IDENT_OV7670, 0);
 
 	case VIDIOC_INT_RESET:
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index 54b736fcc07a..3c9e0ba974e9 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -724,7 +724,7 @@ static unsigned long ov772x_query_bus_param(struct soc_camera_device *icd)
 }
 
 static int ov772x_get_chip_id(struct soc_camera_device *icd,
-			      struct v4l2_chip_ident   *id)
+			      struct v4l2_dbg_chip_ident   *id)
 {
 	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
 
@@ -736,11 +736,12 @@ static int ov772x_get_chip_id(struct soc_camera_device *icd,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int ov772x_get_register(struct soc_camera_device *icd,
-			       struct v4l2_register *reg)
+			       struct v4l2_dbg_register *reg)
 {
 	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
 	int                 ret;
 
+	reg->size = 1;
 	if (reg->reg > 0xff)
 		return -EINVAL;
 
@@ -754,7 +755,7 @@ static int ov772x_get_register(struct soc_camera_device *icd,
 }
 
 static int ov772x_set_register(struct soc_camera_device *icd,
-			       struct v4l2_register *reg)
+			       struct v4l2_dbg_register *reg)
 {
 	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
 
diff --git a/drivers/media/video/pvrusb2/pvrusb2-hdw.c b/drivers/media/video/pvrusb2/pvrusb2-hdw.c
index 4358079f1966..8fb92ac78c7b 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-hdw.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-hdw.c
@@ -4732,26 +4732,25 @@ static int pvr2_hdw_get_eeprom_addr(struct pvr2_hdw *hdw)
 
 
 int pvr2_hdw_register_access(struct pvr2_hdw *hdw,
-			     u32 match_type, u32 match_chip, u64 reg_id,
-			     int setFl,u64 *val_ptr)
+			     struct v4l2_dbg_match *match, u64 reg_id,
+			     int setFl, u64 *val_ptr)
 {
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 	struct pvr2_i2c_client *cp;
-	struct v4l2_register req;
+	struct v4l2_dbg_register req;
 	int stat = 0;
 	int okFl = 0;
 
 	if (!capable(CAP_SYS_ADMIN)) return -EPERM;
 
-	req.match_type = match_type;
-	req.match_chip = match_chip;
+	req.match = *match;
 	req.reg = reg_id;
 	if (setFl) req.val = *val_ptr;
 	mutex_lock(&hdw->i2c_list_lock); do {
 		list_for_each_entry(cp, &hdw->i2c_clients, list) {
 			if (!v4l2_chip_match_i2c_client(
 				    cp->client,
-				    req.match_type, req.match_chip)) {
+				    &req.match)) {
 				continue;
 			}
 			stat = pvr2_i2c_client_cmd(
diff --git a/drivers/media/video/pvrusb2/pvrusb2-hdw.h b/drivers/media/video/pvrusb2/pvrusb2-hdw.h
index 49482d1f2b28..1b4fec337c6b 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-hdw.h
+++ b/drivers/media/video/pvrusb2/pvrusb2-hdw.h
@@ -242,8 +242,8 @@ void pvr2_hdw_v4l_store_minor_number(struct pvr2_hdw *,
    setFl   - true to set the register, false to read it
    val_ptr - storage location for source / result. */
 int pvr2_hdw_register_access(struct pvr2_hdw *,
-			     u32 match_type, u32 match_chip,u64 reg_id,
-			     int setFl,u64 *val_ptr);
+			     struct v4l2_dbg_match *match, u64 reg_id,
+			     int setFl, u64 *val_ptr);
 
 /* The following entry points are all lower level things you normally don't
    want to worry about. */
diff --git a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
index b9aedceb2c44..878fd52a73b3 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-v4l2.c
@@ -851,11 +851,11 @@ static long pvr2_v4l2_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 	case VIDIOC_DBG_G_REGISTER:
 	{
 		u64 val;
-		struct v4l2_register *req = (struct v4l2_register *)arg;
+		struct v4l2_dbg_register *req = (struct v4l2_dbg_register *)arg;
 		if (cmd == VIDIOC_DBG_S_REGISTER) val = req->val;
 		ret = pvr2_hdw_register_access(
-			hdw,req->match_type,req->match_chip,req->reg,
-			cmd == VIDIOC_DBG_S_REGISTER,&val);
+			hdw, &req->match, req->reg,
+			cmd == VIDIOC_DBG_S_REGISTER, &val);
 		if (cmd == VIDIOC_DBG_G_REGISTER) req->val = val;
 		break;
 	}
diff --git a/drivers/media/video/saa7115.c b/drivers/media/video/saa7115.c
index 22708ecdf1bb..46c796c3fec8 100644
--- a/drivers/media/video/saa7115.c
+++ b/drivers/media/video/saa7115.c
@@ -1371,25 +1371,24 @@ static int saa711x_g_vbi_data(struct v4l2_subdev *sd, struct v4l2_sliced_vbi_dat
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int saa711x_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int saa711x_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	reg->val = saa711x_read(sd, reg->reg & 0xff);
+	reg->size = 1;
 	return 0;
 }
 
-static int saa711x_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int saa711x_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -1398,7 +1397,7 @@ static int saa711x_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
 }
 #endif
 
-static int saa711x_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int saa711x_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct saa711x_state *state = to_state(sd);
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
diff --git a/drivers/media/video/saa7127.c b/drivers/media/video/saa7127.c
index bfc85654795e..d6848f7a503b 100644
--- a/drivers/media/video/saa7127.c
+++ b/drivers/media/video/saa7127.c
@@ -623,25 +623,24 @@ static int saa7127_s_vbi_data(struct v4l2_subdev *sd, const struct v4l2_sliced_v
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int saa7127_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int saa7127_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	reg->val = saa7127_read(sd, reg->reg & 0xff);
+	reg->size = 1;
 	return 0;
 }
 
-static int saa7127_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int saa7127_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -650,7 +649,7 @@ static int saa7127_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
 }
 #endif
 
-static int saa7127_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int saa7127_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct saa7127_state *state = to_state(sd);
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
diff --git a/drivers/media/video/saa7134/saa6752hs.c b/drivers/media/video/saa7134/saa6752hs.c
index 1fb6eccdade3..1fee6e84a512 100644
--- a/drivers/media/video/saa7134/saa6752hs.c
+++ b/drivers/media/video/saa7134/saa6752hs.c
@@ -838,7 +838,7 @@ saa6752hs_command(struct i2c_client *client, unsigned int cmd, void *arg)
 		h->standard = *((v4l2_std_id *) arg);
 		break;
 
-	case VIDIOC_G_CHIP_IDENT:
+	case VIDIOC_DBG_G_CHIP_IDENT:
 		return v4l2_chip_ident_i2c_client(client,
 				arg, h->chip, h->revision);
 
diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c
index 3beba480137f..c9d8beb87a60 100644
--- a/drivers/media/video/saa7134/saa7134-empress.c
+++ b/drivers/media/video/saa7134/saa7134-empress.c
@@ -405,7 +405,7 @@ static int empress_querymenu(struct file *file, void *priv,
 }
 
 static int empress_g_chip_ident(struct file *file, void *fh,
-	       struct v4l2_chip_ident *chip)
+	       struct v4l2_dbg_chip_ident *chip)
 {
 	struct saa7134_dev *dev = file->private_data;
 
@@ -413,12 +413,12 @@ static int empress_g_chip_ident(struct file *file, void *fh,
 	chip->revision = 0;
 	if (dev->mpeg_i2c_client == NULL)
 		return -EINVAL;
-	if (chip->match_type == V4L2_CHIP_MATCH_I2C_DRIVER &&
-	    chip->match_chip == I2C_DRIVERID_SAA6752HS)
-		return saa7134_i2c_call_saa6752(dev, VIDIOC_G_CHIP_IDENT, chip);
-	if (chip->match_type == V4L2_CHIP_MATCH_I2C_ADDR &&
-	    chip->match_chip == dev->mpeg_i2c_client->addr)
-		return saa7134_i2c_call_saa6752(dev, VIDIOC_G_CHIP_IDENT, chip);
+	if (chip->match.type == V4L2_CHIP_MATCH_I2C_DRIVER &&
+	    !strcmp(chip->match.name, "saa6752hs"))
+		return saa7134_i2c_call_saa6752(dev, VIDIOC_DBG_G_CHIP_IDENT, chip);
+	if (chip->match.type == V4L2_CHIP_MATCH_I2C_ADDR &&
+	    chip->match.addr == dev->mpeg_i2c_client->addr)
+		return saa7134_i2c_call_saa6752(dev, VIDIOC_DBG_G_CHIP_IDENT, chip);
 	return -EINVAL;
 }
 
diff --git a/drivers/media/video/saa7134/saa7134-video.c b/drivers/media/video/saa7134/saa7134-video.c
index 6b2ab57538ee..a1f7e351f572 100644
--- a/drivers/media/video/saa7134/saa7134-video.c
+++ b/drivers/media/video/saa7134/saa7134-video.c
@@ -2247,24 +2247,25 @@ static int saa7134_g_parm(struct file *file, void *fh,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int vidioc_g_register (struct file *file, void *priv,
-			      struct v4l2_register *reg)
+			      struct v4l2_dbg_register *reg)
 {
 	struct saa7134_fh *fh = priv;
 	struct saa7134_dev *dev = fh->dev;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 	reg->val = saa_readb(reg->reg);
+	reg->size = 1;
 	return 0;
 }
 
 static int vidioc_s_register (struct file *file, void *priv,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct saa7134_fh *fh = priv;
 	struct saa7134_dev *dev = fh->dev;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 	saa_writeb(reg->reg&0xffffff, reg->val);
 	return 0;
diff --git a/drivers/media/video/saa717x.c b/drivers/media/video/saa717x.c
index 9befca65905e..454ad1dd7507 100644
--- a/drivers/media/video/saa717x.c
+++ b/drivers/media/video/saa717x.c
@@ -1171,25 +1171,26 @@ static int saa717x_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc)
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int saa717x_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int saa717x_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client, reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	reg->val = saa717x_read(sd, reg->reg);
+	reg->size = 1;
 	return 0;
 }
 
-static int saa717x_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int saa717x_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 	u16 addr = reg->reg & 0xffff;
 	u8 val = reg->val & 0xff;
 
-	if (!v4l2_chip_match_i2c_client(client, reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 9986e02bcf1a..fcb05f06de8f 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -699,7 +699,7 @@ static int soc_camera_s_crop(struct file *file, void *fh,
 }
 
 static int soc_camera_g_chip_ident(struct file *file, void *fh,
-				   struct v4l2_chip_ident *id)
+				   struct v4l2_dbg_chip_ident *id)
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
@@ -712,7 +712,7 @@ static int soc_camera_g_chip_ident(struct file *file, void *fh,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int soc_camera_g_register(struct file *file, void *fh,
-				 struct v4l2_register *reg)
+				 struct v4l2_dbg_register *reg)
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
@@ -724,7 +724,7 @@ static int soc_camera_g_register(struct file *file, void *fh,
 }
 
 static int soc_camera_s_register(struct file *file, void *fh,
-				 struct v4l2_register *reg)
+				 struct v4l2_dbg_register *reg)
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c
index d0c794da735b..5aeccb301cea 100644
--- a/drivers/media/video/tvaudio.c
+++ b/drivers/media/video/tvaudio.c
@@ -1762,7 +1762,7 @@ static int tvaudio_s_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *fr
 	return 0;
 }
 
-static int tvaudio_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int tvaudio_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
diff --git a/drivers/media/video/tvp5150.c b/drivers/media/video/tvp5150.c
index a388a9f0cb18..2cd64ef27b95 100644
--- a/drivers/media/video/tvp5150.c
+++ b/drivers/media/video/tvp5150.c
@@ -963,7 +963,7 @@ static int tvp5150_g_fmt(struct v4l2_subdev *sd, struct v4l2_format *fmt)
 
 
 static int tvp5150_g_chip_ident(struct v4l2_subdev *sd,
-				struct v4l2_chip_ident *chip)
+				struct v4l2_dbg_chip_ident *chip)
 {
 	int rev;
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -977,25 +977,24 @@ static int tvp5150_g_chip_ident(struct v4l2_subdev *sd,
 
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int tvp5150_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int tvp5150_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	reg->val = tvp5150_read(sd, reg->reg & 0xff);
+	reg->size = 1;
 	return 0;
 }
 
-static int tvp5150_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int tvp5150_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
diff --git a/drivers/media/video/tw9910.c b/drivers/media/video/tw9910.c
index d5cdc4be1a35..52c0357faa5d 100644
--- a/drivers/media/video/tw9910.c
+++ b/drivers/media/video/tw9910.c
@@ -575,7 +575,7 @@ static unsigned long tw9910_query_bus_param(struct soc_camera_device *icd)
 }
 
 static int tw9910_get_chip_id(struct soc_camera_device *icd,
-			      struct v4l2_chip_ident *id)
+			      struct v4l2_dbg_chip_ident *id)
 {
 	id->ident = V4L2_IDENT_TW9910;
 	id->revision = 0;
@@ -606,7 +606,7 @@ static int tw9910_enum_input(struct soc_camera_device *icd,
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int tw9910_get_register(struct soc_camera_device *icd,
-			       struct v4l2_register *reg)
+			       struct v4l2_dbg_register *reg)
 {
 	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
 	int ret;
@@ -627,7 +627,7 @@ static int tw9910_get_register(struct soc_camera_device *icd,
 }
 
 static int tw9910_set_register(struct soc_camera_device *icd,
-			       struct v4l2_register *reg)
+			       struct v4l2_dbg_register *reg)
 {
 	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
 
diff --git a/drivers/media/video/upd64031a.c b/drivers/media/video/upd64031a.c
index 7a609a3a6dbe..4f16effb530f 100644
--- a/drivers/media/video/upd64031a.c
+++ b/drivers/media/video/upd64031a.c
@@ -147,7 +147,7 @@ static int upd64031a_s_routing(struct v4l2_subdev *sd, const struct v4l2_routing
 	return upd64031a_s_frequency(sd, NULL);
 }
 
-static int upd64031a_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int upd64031a_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
@@ -162,25 +162,24 @@ static int upd64031a_log_status(struct v4l2_subdev *sd)
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int upd64031a_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int upd64031a_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	reg->val = upd64031a_read(sd, reg->reg & 0xff);
+	reg->size = 1;
 	return 0;
 }
 
-static int upd64031a_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int upd64031a_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
diff --git a/drivers/media/video/upd64083.c b/drivers/media/video/upd64083.c
index 58412cb9c01a..4b712f69d1b7 100644
--- a/drivers/media/video/upd64083.c
+++ b/drivers/media/video/upd64083.c
@@ -120,25 +120,24 @@ static int upd64083_s_routing(struct v4l2_subdev *sd, const struct v4l2_routing
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int upd64083_g_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int upd64083_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	reg->val = upd64083_read(sd, reg->reg & 0xff);
+	reg->size = 1;
 	return 0;
 }
 
-static int upd64083_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg)
+static int upd64083_s_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	if (!v4l2_chip_match_i2c_client(client,
-				reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -147,7 +146,7 @@ static int upd64083_s_register(struct v4l2_subdev *sd, struct v4l2_register *reg
 }
 #endif
 
-static int upd64083_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int upd64083_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c
index 7c61c6d5cede..2be5e47ed081 100644
--- a/drivers/media/video/usbvision/usbvision-video.c
+++ b/drivers/media/video/usbvision/usbvision-video.c
@@ -477,12 +477,12 @@ static int usbvision_v4l2_close(struct file *file)
  */
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 static int vidioc_g_register (struct file *file, void *priv,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct usb_usbvision *usbvision = video_drvdata(file);
 	int errCode;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 	/* NT100x has a 8-bit register space */
 	errCode = usbvision_read_reg(usbvision, reg->reg&0xff);
@@ -492,16 +492,17 @@ static int vidioc_g_register (struct file *file, void *priv,
 		return errCode;
 	}
 	reg->val = errCode;
+	reg->size = 1;
 	return 0;
 }
 
 static int vidioc_s_register (struct file *file, void *priv,
-				struct v4l2_register *reg)
+				struct v4l2_dbg_register *reg)
 {
 	struct usb_usbvision *usbvision = video_drvdata(file);
 	int errCode;
 
-	if (!v4l2_chip_match_host(reg->match_type, reg->match_chip))
+	if (!v4l2_chip_match_host(&reg->match))
 		return -EINVAL;
 	/* NT100x has a 8-bit register space */
 	errCode = usbvision_write_reg(usbvision, reg->reg&0xff, reg->val);
diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index c676b0b0f708..b8f2be8d5c0e 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -797,11 +797,11 @@ u32 v4l2_ctrl_next(const u32 * const * ctrl_classes, u32 id)
 }
 EXPORT_SYMBOL(v4l2_ctrl_next);
 
-int v4l2_chip_match_host(u32 match_type, u32 match_chip)
+int v4l2_chip_match_host(const struct v4l2_dbg_match *match)
 {
-	switch (match_type) {
+	switch (match->type) {
 	case V4L2_CHIP_MATCH_HOST:
-		return match_chip == 0;
+		return match->addr == 0;
 	default:
 		return 0;
 	}
@@ -809,23 +809,34 @@ int v4l2_chip_match_host(u32 match_type, u32 match_chip)
 EXPORT_SYMBOL(v4l2_chip_match_host);
 
 #if defined(CONFIG_I2C) || (defined(CONFIG_I2C_MODULE) && defined(MODULE))
-int v4l2_chip_match_i2c_client(struct i2c_client *c, u32 match_type, u32 match_chip)
+int v4l2_chip_match_i2c_client(struct i2c_client *c, const struct v4l2_dbg_match *match)
 {
-	switch (match_type) {
+	int len;
+
+	if (c == NULL || match == NULL)
+		return 0;
+
+	switch (match->type) {
 	case V4L2_CHIP_MATCH_I2C_DRIVER:
-		return (c != NULL && c->driver != NULL && c->driver->id == match_chip);
+		if (c->driver == NULL || c->driver->driver.name == NULL)
+			return 0;
+		len = strlen(c->driver->driver.name);
+		/* legacy drivers have a ' suffix, don't try to match that */
+		if (len && c->driver->driver.name[len - 1] == '\'')
+			len--;
+		return len && !strncmp(c->driver->driver.name, match->name, len);
 	case V4L2_CHIP_MATCH_I2C_ADDR:
-		return (c != NULL && c->addr == match_chip);
+		return c->addr == match->addr;
 	default:
 		return 0;
 	}
 }
 EXPORT_SYMBOL(v4l2_chip_match_i2c_client);
 
-int v4l2_chip_ident_i2c_client(struct i2c_client *c, struct v4l2_chip_ident *chip,
+int v4l2_chip_ident_i2c_client(struct i2c_client *c, struct v4l2_dbg_chip_ident *chip,
 		u32 ident, u32 revision)
 {
-	if (!v4l2_chip_match_i2c_client(c, chip->match_type, chip->match_chip))
+	if (!v4l2_chip_match_i2c_client(c, &chip->match))
 		return 0;
 	if (chip->ident == V4L2_IDENT_NONE) {
 		chip->ident = ident;
diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index ec81b9737bd7..110376be5d2b 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -1046,7 +1046,8 @@ long v4l2_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 	case VIDIOC_TRY_ENCODER_CMD:
 	case VIDIOC_DBG_S_REGISTER:
 	case VIDIOC_DBG_G_REGISTER:
-	case VIDIOC_G_CHIP_IDENT:
+	case VIDIOC_DBG_G_CHIP_IDENT:
+	case VIDIOC_G_CHIP_IDENT_OLD:
 	case VIDIOC_S_HW_FREQ_SEEK:
 		ret = do_video_ioctl(file, cmd, arg);
 		break;
diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c
index 8f629ef5b9ec..52d687b165e0 100644
--- a/drivers/media/video/v4l2-ioctl.c
+++ b/drivers/media/video/v4l2-ioctl.c
@@ -266,7 +266,7 @@ static const char *v4l2_ioctls[] = {
 	[_IOC_NR(VIDIOC_DBG_S_REGISTER)]   = "VIDIOC_DBG_S_REGISTER",
 	[_IOC_NR(VIDIOC_DBG_G_REGISTER)]   = "VIDIOC_DBG_G_REGISTER",
 
-	[_IOC_NR(VIDIOC_G_CHIP_IDENT)]     = "VIDIOC_G_CHIP_IDENT",
+	[_IOC_NR(VIDIOC_DBG_G_CHIP_IDENT)] = "VIDIOC_DBG_G_CHIP_IDENT",
 	[_IOC_NR(VIDIOC_S_HW_FREQ_SEEK)]   = "VIDIOC_S_HW_FREQ_SEEK",
 #endif
 };
@@ -1720,7 +1720,7 @@ static long __video_do_ioctl(struct file *file,
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 	case VIDIOC_DBG_G_REGISTER:
 	{
-		struct v4l2_register *p = arg;
+		struct v4l2_dbg_register *p = arg;
 
 		if (!capable(CAP_SYS_ADMIN))
 			ret = -EPERM;
@@ -1730,7 +1730,7 @@ static long __video_do_ioctl(struct file *file,
 	}
 	case VIDIOC_DBG_S_REGISTER:
 	{
-		struct v4l2_register *p = arg;
+		struct v4l2_dbg_register *p = arg;
 
 		if (!capable(CAP_SYS_ADMIN))
 			ret = -EPERM;
@@ -1739,9 +1739,9 @@ static long __video_do_ioctl(struct file *file,
 		break;
 	}
 #endif
-	case VIDIOC_G_CHIP_IDENT:
+	case VIDIOC_DBG_G_CHIP_IDENT:
 	{
-		struct v4l2_chip_ident *p = arg;
+		struct v4l2_dbg_chip_ident *p = arg;
 
 		if (!ops->vidioc_g_chip_ident)
 			break;
@@ -1750,6 +1750,11 @@ static long __video_do_ioctl(struct file *file,
 			dbgarg(cmd, "chip_ident=%u, revision=0x%x\n", p->ident, p->revision);
 		break;
 	}
+	case VIDIOC_G_CHIP_IDENT_OLD:
+		printk(KERN_ERR "VIDIOC_G_CHIP_IDENT has been deprecated and will disappear in 2.6.30.\n");
+		printk(KERN_ERR "It is a debugging ioctl and must not be used in applications!\n");
+		return -EINVAL;
+
 	case VIDIOC_S_HW_FREQ_SEEK:
 	{
 		struct v4l2_hw_freq_seek *p = arg;
diff --git a/drivers/media/video/v4l2-subdev.c b/drivers/media/video/v4l2-subdev.c
index e3612f29d0df..fbe9cc0d433a 100644
--- a/drivers/media/video/v4l2-subdev.c
+++ b/drivers/media/video/v4l2-subdev.c
@@ -37,7 +37,7 @@ int v4l2_subdev_command(struct v4l2_subdev *sd, unsigned cmd, void *arg)
 		return v4l2_subdev_call(sd, core, queryctrl, arg);
 	case VIDIOC_LOG_STATUS:
 		return v4l2_subdev_call(sd, core, log_status);
-	case VIDIOC_G_CHIP_IDENT:
+	case VIDIOC_DBG_G_CHIP_IDENT:
 		return v4l2_subdev_call(sd, core, g_chip_ident, arg);
 	case VIDIOC_INT_S_STANDBY:
 		return v4l2_subdev_call(sd, core, s_standby, arg ? (*(u32 *)arg) : 0);
diff --git a/drivers/media/video/vp27smpx.c b/drivers/media/video/vp27smpx.c
index f72b859486ad..5d73f66d9f55 100644
--- a/drivers/media/video/vp27smpx.c
+++ b/drivers/media/video/vp27smpx.c
@@ -113,7 +113,7 @@ static int vp27smpx_g_tuner(struct v4l2_subdev *sd, struct v4l2_tuner *vt)
 	return 0;
 }
 
-static int vp27smpx_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int vp27smpx_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
diff --git a/drivers/media/video/wm8739.c b/drivers/media/video/wm8739.c
index 12a31e7a5f6d..f2864d5cd180 100644
--- a/drivers/media/video/wm8739.c
+++ b/drivers/media/video/wm8739.c
@@ -233,7 +233,7 @@ static int wm8739_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc)
 	return -EINVAL;
 }
 
-static int wm8739_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int wm8739_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
diff --git a/drivers/media/video/wm8775.c b/drivers/media/video/wm8775.c
index d0220b0ec0bc..53fcd42843e0 100644
--- a/drivers/media/video/wm8775.c
+++ b/drivers/media/video/wm8775.c
@@ -130,7 +130,7 @@ static int wm8775_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 	return 0;
 }
 
-static int wm8775_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip)
+static int wm8775_g_chip_ident(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 1f126e30766c..5571dbe1c0ad 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1370,25 +1370,41 @@ struct v4l2_streamparm {
 /*
  *	A D V A N C E D   D E B U G G I N G
  *
- *	NOTE: EXPERIMENTAL API
+ *	NOTE: EXPERIMENTAL API, NEVER RELY ON THIS IN APPLICATIONS!
+ *	FOR DEBUGGING, TESTING AND INTERNAL USE ONLY!
  */
 
 /* VIDIOC_DBG_G_REGISTER and VIDIOC_DBG_S_REGISTER */
 
 #define V4L2_CHIP_MATCH_HOST       0  /* Match against chip ID on host (0 for the host) */
-#define V4L2_CHIP_MATCH_I2C_DRIVER 1  /* Match against I2C driver ID */
+#define V4L2_CHIP_MATCH_I2C_DRIVER 1  /* Match against I2C driver name */
 #define V4L2_CHIP_MATCH_I2C_ADDR   2  /* Match against I2C 7-bit address */
 #define V4L2_CHIP_MATCH_AC97       3  /* Match against anciliary AC97 chip */
 
-struct v4l2_register {
-	__u32 match_type; /* Match type */
-	__u32 match_chip; /* Match this chip, meaning determined by match_type */
+struct v4l2_dbg_match {
+	__u32 type; /* Match type */
+	union {     /* Match this chip, meaning determined by type */
+		__u32 addr;
+		char name[32];
+	};
+} __attribute__ ((packed));
+
+struct v4l2_dbg_register {
+	struct v4l2_dbg_match match;
+	__u32 size;	/* register size in bytes */
 	__u64 reg;
 	__u64 val;
-};
+} __attribute__ ((packed));
+
+/* VIDIOC_DBG_G_CHIP_IDENT */
+struct v4l2_dbg_chip_ident {
+	struct v4l2_dbg_match match;
+	__u32 ident;       /* chip identifier as specified in <media/v4l2-chip-ident.h> */
+	__u32 revision;    /* chip revision, chip specific */
+} __attribute__ ((packed));
 
-/* VIDIOC_G_CHIP_IDENT */
-struct v4l2_chip_ident {
+/* VIDIOC_G_CHIP_IDENT_OLD: Deprecated, do not use */
+struct v4l2_chip_ident_old {
 	__u32 match_type;  /* Match type */
 	__u32 match_chip;  /* Match this chip, meaning determined by match_type */
 	__u32 ident;       /* chip identifier as specified in <media/v4l2-chip-ident.h> */
@@ -1460,13 +1476,22 @@ struct v4l2_chip_ident {
 #define VIDIOC_G_ENC_INDEX       _IOR('V', 76, struct v4l2_enc_idx)
 #define VIDIOC_ENCODER_CMD      _IOWR('V', 77, struct v4l2_encoder_cmd)
 #define VIDIOC_TRY_ENCODER_CMD  _IOWR('V', 78, struct v4l2_encoder_cmd)
+#endif
 
-/* Experimental, only implemented if CONFIG_VIDEO_ADV_DEBUG is defined */
-#define	VIDIOC_DBG_S_REGISTER 	 _IOW('V', 79, struct v4l2_register)
-#define	VIDIOC_DBG_G_REGISTER 	_IOWR('V', 80, struct v4l2_register)
-
-#define VIDIOC_G_CHIP_IDENT     _IOWR('V', 81, struct v4l2_chip_ident)
+#if 1
+/* Experimental, meant for debugging, testing and internal use.
+   Only implemented if CONFIG_VIDEO_ADV_DEBUG is defined.
+   You must be root to use these ioctls. Never use these in applications! */
+#define	VIDIOC_DBG_S_REGISTER 	 _IOW('V', 79, struct v4l2_dbg_register)
+#define	VIDIOC_DBG_G_REGISTER 	_IOWR('V', 80, struct v4l2_dbg_register)
+
+/* Experimental, meant for debugging, testing and internal use.
+   Never use this ioctl in applications! */
+#define VIDIOC_DBG_G_CHIP_IDENT _IOWR('V', 81, struct v4l2_dbg_chip_ident)
+/* This is deprecated and will go away in 2.6.30 */
+#define VIDIOC_G_CHIP_IDENT_OLD _IOWR('V', 81, struct v4l2_chip_ident_old)
 #endif
+
 #define VIDIOC_S_HW_FREQ_SEEK	 _IOW('V', 82, struct v4l2_hw_freq_seek)
 /* Reminder: when adding new ioctls please add support for them to
    drivers/media/video/v4l2-compat-ioctl32.c as well! */
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 425b6a98c95c..7440d9250665 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -164,12 +164,12 @@ struct soc_camera_ops {
 	unsigned long (*query_bus_param)(struct soc_camera_device *);
 	int (*set_bus_param)(struct soc_camera_device *, unsigned long);
 	int (*get_chip_id)(struct soc_camera_device *,
-			   struct v4l2_chip_ident *);
+			   struct v4l2_dbg_chip_ident *);
 	int (*set_std)(struct soc_camera_device *, v4l2_std_id *);
 	int (*enum_input)(struct soc_camera_device *, struct v4l2_input *);
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-	int (*get_register)(struct soc_camera_device *, struct v4l2_register *);
-	int (*set_register)(struct soc_camera_device *, struct v4l2_register *);
+	int (*get_register)(struct soc_camera_device *, struct v4l2_dbg_register *);
+	int (*set_register)(struct soc_camera_device *, struct v4l2_dbg_register *);
 #endif
 	int (*get_control)(struct soc_camera_device *, struct v4l2_control *);
 	int (*set_control)(struct soc_camera_device *, struct v4l2_control *);
diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index 43dbb659f1f5..9aaf652b20ef 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -2,7 +2,7 @@
     v4l2 chip identifiers header
 
     This header provides a list of chip identifiers that can be returned
-    through the VIDIOC_G_CHIP_IDENT ioctl.
+    through the VIDIOC_DBG_G_CHIP_IDENT ioctl.
 
     Copyright (C) 2007 Hans Verkuil <hverkuil@xs4all.nl>
 
@@ -24,7 +24,7 @@
 #ifndef V4L2_CHIP_IDENT_H_
 #define V4L2_CHIP_IDENT_H_
 
-/* VIDIOC_G_CHIP_IDENT: identifies the actual chip installed on the board */
+/* VIDIOC_DBG_G_CHIP_IDENT: identifies the actual chip installed on the board */
 enum {
 	/* general idents: reserved range 0-49 */
 	V4L2_IDENT_NONE      = 0,       /* No chip matched */
diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
index f99c866d8c37..95e74f1874e1 100644
--- a/include/media/v4l2-common.h
+++ b/include/media/v4l2-common.h
@@ -114,10 +114,10 @@ u32 v4l2_ctrl_next(const u32 * const *ctrl_classes, u32 id);
 /* Register/chip ident helper function */
 
 struct i2c_client; /* forward reference */
-int v4l2_chip_match_i2c_client(struct i2c_client *c, u32 id_type, u32 chip_id);
-int v4l2_chip_ident_i2c_client(struct i2c_client *c, struct v4l2_chip_ident *chip,
+int v4l2_chip_match_i2c_client(struct i2c_client *c, const struct v4l2_dbg_match *match);
+int v4l2_chip_ident_i2c_client(struct i2c_client *c, struct v4l2_dbg_chip_ident *chip,
 		u32 ident, u32 revision);
-int v4l2_chip_match_host(u32 id_type, u32 chip_id);
+int v4l2_chip_match_host(const struct v4l2_dbg_match *match);
 
 /* ------------------------------------------------------------------------- */
 
diff --git a/include/media/v4l2-int-device.h b/include/media/v4l2-int-device.h
index ecda3c725837..fbf585561570 100644
--- a/include/media/v4l2-int-device.h
+++ b/include/media/v4l2-int-device.h
@@ -219,7 +219,7 @@ enum v4l2_int_ioctl_num {
 	vidioc_int_reset_num,
 	/* VIDIOC_INT_INIT */
 	vidioc_int_init_num,
-	/* VIDIOC_INT_G_CHIP_IDENT */
+	/* VIDIOC_DBG_G_CHIP_IDENT */
 	vidioc_int_g_chip_ident_num,
 
 	/*
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index bf0e723a99c1..b01c044868d0 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -225,12 +225,12 @@ struct v4l2_ioctl_ops {
 	/* Debugging ioctls */
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 	int (*vidioc_g_register)       (struct file *file, void *fh,
-					struct v4l2_register *reg);
+					struct v4l2_dbg_register *reg);
 	int (*vidioc_s_register)       (struct file *file, void *fh,
-					struct v4l2_register *reg);
+					struct v4l2_dbg_register *reg);
 #endif
 	int (*vidioc_g_chip_ident)     (struct file *file, void *fh,
-					struct v4l2_chip_ident *chip);
+					struct v4l2_dbg_chip_ident *chip);
 
 	int (*vidioc_enum_framesizes)   (struct file *file, void *fh,
 					 struct v4l2_frmsizeenum *fsize);
diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 2517344313b6..37b09e56e943 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -69,7 +69,7 @@ struct tuner_setup;
    not yet implemented) since ops provide proper type-checking.
  */
 struct v4l2_subdev_core_ops {
-	int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_chip_ident *chip);
+	int (*g_chip_ident)(struct v4l2_subdev *sd, struct v4l2_dbg_chip_ident *chip);
 	int (*log_status)(struct v4l2_subdev *sd);
 	int (*init)(struct v4l2_subdev *sd, u32 val);
 	int (*s_standby)(struct v4l2_subdev *sd, u32 standby);
@@ -81,8 +81,8 @@ struct v4l2_subdev_core_ops {
 	int (*querymenu)(struct v4l2_subdev *sd, struct v4l2_querymenu *qm);
 	long (*ioctl)(struct v4l2_subdev *sd, unsigned int cmd, void *arg);
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-	int (*g_register)(struct v4l2_subdev *sd, struct v4l2_register *reg);
-	int (*s_register)(struct v4l2_subdev *sd, struct v4l2_register *reg);
+	int (*g_register)(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg);
+	int (*s_register)(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg);
 #endif
 };
 
-- 
cgit v1.2.3


From cb889a2f3515b140bef193cf6ffcdb099349b8aa Mon Sep 17 00:00:00 2001
From: Klaus Schmidinger <Klaus.Schmidinger@cadsoft.de>
Date: Wed, 31 Dec 2008 14:11:23 -0300
Subject: V4L/DVB (10164): Add missing S2 caps flag to S2API

The attached patch adds a capability flag that allows an application
to determine whether a particular device can handle "second generation
modulation" transponders. This is necessary in order for applications
to be able to decide which device to use for a given channel in
a multi device environment, where DVB-S and DVB-S2 devices are mixed.

It is assumed that a device capable of handling "second generation
modulation" can implicitly handle "first generation modulation".
The flag is not named anything with DVBS2 in order to allow its
use with future DVBT2 devices as well (should they ever come).

Signed-off by: Klaus Schmidinger <Klaus.Schmidinger@cadsoft.de>

Acked-by: Steven Toth <stoth@linuxtv.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/dvb/frontend.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index 79a8ed8e6a7d..926d28d526e7 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -63,6 +63,7 @@ typedef enum fe_caps {
 	FE_CAN_8VSB			= 0x200000,
 	FE_CAN_16VSB			= 0x400000,
 	FE_HAS_EXTENDED_CAPS		= 0x800000,   // We need more bitspace for newer APIs, indicate this.
+	FE_CAN_2G_MODULATION		= 0x10000000, // frontend supports "2nd generation modulation" (DVB-S2)
 	FE_NEEDS_BENDING		= 0x20000000, // not supported anymore, don't use (frontend requires frequency bending)
 	FE_CAN_RECOVER			= 0x40000000, // frontend can recover from a cable unplug automatically
 	FE_CAN_MUTE_TS			= 0x80000000  // frontend can stop spurious TS data output
-- 
cgit v1.2.3


From e4cda3e0728156c6be1d03e72ef20ea811da4ad5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Wed, 31 Dec 2008 14:26:57 -0300
Subject: V4L/DVB (10166): dvb frontend: stop using non-C99 compliant comments

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/dvb/frontend.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index 926d28d526e7..55026b1a40bd 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -62,11 +62,11 @@ typedef enum fe_caps {
 	FE_CAN_HIERARCHY_AUTO		= 0x100000,
 	FE_CAN_8VSB			= 0x200000,
 	FE_CAN_16VSB			= 0x400000,
-	FE_HAS_EXTENDED_CAPS		= 0x800000,   // We need more bitspace for newer APIs, indicate this.
-	FE_CAN_2G_MODULATION		= 0x10000000, // frontend supports "2nd generation modulation" (DVB-S2)
-	FE_NEEDS_BENDING		= 0x20000000, // not supported anymore, don't use (frontend requires frequency bending)
-	FE_CAN_RECOVER			= 0x40000000, // frontend can recover from a cable unplug automatically
-	FE_CAN_MUTE_TS			= 0x80000000  // frontend can stop spurious TS data output
+	FE_HAS_EXTENDED_CAPS		= 0x800000,   /* We need more bitspace for newer APIs, indicate this. */
+	FE_CAN_2G_MODULATION		= 0x10000000, /* frontend supports "2nd generation modulation" (DVB-S2) */
+	FE_NEEDS_BENDING		= 0x20000000, /* not supported anymore, don't use (frontend requires frequency bending) */
+	FE_CAN_RECOVER			= 0x40000000, /* frontend can recover from a cable unplug automatically */
+	FE_CAN_MUTE_TS			= 0x80000000  /* frontend can stop spurious TS data output */
 } fe_caps_t;
 
 
@@ -122,15 +122,15 @@ typedef enum fe_sec_mini_cmd {
 
 
 typedef enum fe_status {
-	FE_HAS_SIGNAL	= 0x01,   /*  found something above the noise level */
-	FE_HAS_CARRIER	= 0x02,   /*  found a DVB signal  */
-	FE_HAS_VITERBI	= 0x04,   /*  FEC is stable  */
-	FE_HAS_SYNC	= 0x08,   /*  found sync bytes  */
-	FE_HAS_LOCK	= 0x10,   /*  everything's working... */
-	FE_TIMEDOUT	= 0x20,   /*  no lock within the last ~2 seconds */
-	FE_REINIT	= 0x40    /*  frontend was reinitialized,  */
-} fe_status_t;			  /*  application is recommended to reset */
-				  /*  DiSEqC, tone and parameters */
+	FE_HAS_SIGNAL	= 0x01,   /* found something above the noise level */
+	FE_HAS_CARRIER	= 0x02,   /* found a DVB signal  */
+	FE_HAS_VITERBI	= 0x04,   /* FEC is stable  */
+	FE_HAS_SYNC	= 0x08,   /* found sync bytes  */
+	FE_HAS_LOCK	= 0x10,   /* everything's working... */
+	FE_TIMEDOUT	= 0x20,   /* no lock within the last ~2 seconds */
+	FE_REINIT	= 0x40    /* frontend was reinitialized,  */
+} fe_status_t;			  /* application is recommended to reset */
+				  /* DiSEqC, tone and parameters */
 
 typedef enum fe_spectral_inversion {
 	INVERSION_OFF,
-- 
cgit v1.2.3


From 6680598b44ed3c0052d155522eb21fc5a00de5f3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 2 Jan 2009 18:53:14 +0100
Subject: Disallow gcc versions 3.{0,1}

GCC 3.0 and 3.1 are too old to build a working kernel.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
[ This check got dropped as obsolete when I simplified the gcc header
  inclusion mess in f153b82121b0366fe0e5f9553545cce237335175, but Willy
  Tarreau reports actually having those old versions still..  -Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc3.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h
index 2befe6513ce4..8005effc04f1 100644
--- a/include/linux/compiler-gcc3.h
+++ b/include/linux/compiler-gcc3.h
@@ -2,6 +2,10 @@
 #error "Please don't include <linux/compiler-gcc3.h> directly, include <linux/compiler.h> instead."
 #endif
 
+#if __GNUC_MINOR__ < 2
+# error Sorry, your compiler is too old - please upgrade it.
+#endif
+
 #if __GNUC_MINOR__ >= 3
 # define __used			__attribute__((__used__))
 #else
-- 
cgit v1.2.3


From 015ab17dc2e9de805c26e74f498b12ee5e8de07e Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 14:04:20 +0000
Subject: intel-iommu: remove some unused struct intel_iommu fields

The seg, saved_msg and sysdev fields appear to be unused since
before the code was first merged.

linux/msi.h is not needed in linux/intel-iommu.h anymore since
there is no longer a reference to struct msi_msg. The MSI code
in drivers/pci/intel-iommu.c still has linux/msi.h included
via linux/dmar.h.

linux/sysdev.h isn't needed because there is no reference to
struct sys_device.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c   | 1 -
 include/linux/intel-iommu.h | 5 -----
 2 files changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 5c8baa43ac9c..8e5a445ba93a 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -27,7 +27,6 @@
 #include <linux/slab.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
-#include <linux/sysdev.h>
 #include <linux/spinlock.h>
 #include <linux/pci.h>
 #include <linux/dmar.h>
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 3d017cfd245b..1bff7bf1bc2c 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -23,8 +23,6 @@
 #define _INTEL_IOMMU_H_
 
 #include <linux/types.h>
-#include <linux/msi.h>
-#include <linux/sysdev.h>
 #include <linux/iova.h>
 #include <linux/io.h>
 #include <linux/dma_remapping.h>
@@ -289,7 +287,6 @@ struct intel_iommu {
 	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
 	u64		cap;
 	u64		ecap;
-	int		seg;
 	u32		gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
 	spinlock_t	register_lock; /* protect register handling */
 	int		seq_id;	/* sequence id of the iommu */
@@ -302,8 +299,6 @@ struct intel_iommu {
 
 	unsigned int irq;
 	unsigned char name[7];    /* Device Name */
-	struct msi_msg saved_msg;
-	struct sys_device sysdev;
 	struct iommu_flush flush;
 #endif
 	struct q_inval  *qi;            /* Queued invalidation info */
-- 
cgit v1.2.3


From 519a05491586dad04e687660e54c57882315b22b Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 14:21:13 +0000
Subject: intel-iommu: make init_dmars() static

init_dmars() is not used outside of drivers/pci/intel-iommu.c

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c     | 2 +-
 include/linux/dma_remapping.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 8e5a445ba93a..95ae3a9aea8a 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1589,7 +1589,7 @@ static inline void iommu_prepare_isa(void)
 }
 #endif /* !CONFIG_DMAR_FLPY_WA */
 
-int __init init_dmars(void)
+static int __init init_dmars(void)
 {
 	struct dmar_drhd_unit *drhd;
 	struct dmar_rmrr_unit *rmrr;
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 952df39c989d..cf92c4924b8c 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -141,7 +141,6 @@ struct device_domain_info {
 	struct dmar_domain *domain; /* pointer to domain */
 };
 
-extern int init_dmars(void);
 extern void free_dmar_iommu(struct intel_iommu *iommu);
 
 extern int dmar_disabled;
-- 
cgit v1.2.3


From f27be03b271851fd54529f292c0f25b4c1f1a553 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 15:49:43 +0000
Subject: intel-iommu: move DMA_32/64BIT_PFN into intel-iommu.c

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c     | 3 +++
 include/linux/dma_remapping.h | 5 -----
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 95ae3a9aea8a..6fadbb9bc180 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -53,6 +53,9 @@
 
 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
 
+#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
+#define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
+#define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
 
 static void flush_unmaps_timeout(unsigned long data);
 
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index cf92c4924b8c..2e5a5c0b6acd 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -9,11 +9,6 @@
 #define VTD_PAGE_MASK		(((u64)-1) << VTD_PAGE_SHIFT)
 #define VTD_PAGE_ALIGN(addr)	(((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
 
-#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT)
-#define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
-#define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
-
-
 /*
  * 0: Present
  * 1-11: Reserved
-- 
cgit v1.2.3


From 46b08e1a76b758193b0e7b889c6486a16eb1e9e2 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 15:49:44 +0000
Subject: intel-iommu: move root entry defs from dma_remapping.h

We keep the struct root_entry forward declaration for the
pointer in struct intel_iommu.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c     | 33 +++++++++++++++++++++++++++++++++
 include/linux/dma_remapping.h | 34 +---------------------------------
 2 files changed, 34 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 6fadbb9bc180..29bf2d8176e2 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -57,6 +57,39 @@
 #define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
 #define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
 
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+	u64	val;
+	u64	rsvd1;
+};
+#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
+static inline bool root_present(struct root_entry *root)
+{
+	return (root->val & 1);
+}
+static inline void set_root_present(struct root_entry *root)
+{
+	root->val |= 1;
+}
+static inline void set_root_value(struct root_entry *root, unsigned long value)
+{
+	root->val |= value & VTD_PAGE_MASK;
+}
+
+static inline struct context_entry *
+get_context_addr_from_root(struct root_entry *root)
+{
+	return (struct context_entry *)
+		(root_present(root)?phys_to_virt(
+		root->val & VTD_PAGE_MASK) :
+		NULL);
+}
+
 static void flush_unmaps_timeout(unsigned long data);
 
 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 2e5a5c0b6acd..d8521662a495 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -9,39 +9,7 @@
 #define VTD_PAGE_MASK		(((u64)-1) << VTD_PAGE_SHIFT)
 #define VTD_PAGE_ALIGN(addr)	(((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
 
-/*
- * 0: Present
- * 1-11: Reserved
- * 12-63: Context Ptr (12 - (haw-1))
- * 64-127: Reserved
- */
-struct root_entry {
-	u64	val;
-	u64	rsvd1;
-};
-#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
-static inline bool root_present(struct root_entry *root)
-{
-	return (root->val & 1);
-}
-static inline void set_root_present(struct root_entry *root)
-{
-	root->val |= 1;
-}
-static inline void set_root_value(struct root_entry *root, unsigned long value)
-{
-	root->val |= value & VTD_PAGE_MASK;
-}
-
-struct context_entry;
-static inline struct context_entry *
-get_context_addr_from_root(struct root_entry *root)
-{
-	return (struct context_entry *)
-		(root_present(root)?phys_to_virt(
-		root->val & VTD_PAGE_MASK) :
-		NULL);
-}
+struct root_entry;
 
 /*
  * low 64 bits:
-- 
cgit v1.2.3


From 7a8fc25e0cc6e75fa6fdb0a856490e324218550b Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 15:49:45 +0000
Subject: intel-iommu: move context entry defs out from dma_remapping.h

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c     | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/dma_remapping.h | 38 --------------------------------------
 2 files changed, 38 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 29bf2d8176e2..9d06f4bb6b5e 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -90,6 +90,44 @@ get_context_addr_from_root(struct root_entry *root)
 		NULL);
 }
 
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+	u64 lo;
+	u64 hi;
+};
+#define context_present(c) ((c).lo & 1)
+#define context_fault_disable(c) (((c).lo >> 1) & 1)
+#define context_translation_type(c) (((c).lo >> 2) & 3)
+#define context_address_root(c) ((c).lo & VTD_PAGE_MASK)
+#define context_address_width(c) ((c).hi &  7)
+#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
+
+#define context_set_present(c) do {(c).lo |= 1;} while (0)
+#define context_set_fault_enable(c) \
+	do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
+#define context_set_translation_type(c, val) \
+	do { \
+		(c).lo &= (((u64)-1) << 4) | 3; \
+		(c).lo |= ((val) & 3) << 2; \
+	} while (0)
+#define CONTEXT_TT_MULTI_LEVEL 0
+#define context_set_address_root(c, val) \
+	do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0)
+#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
+#define context_set_domain_id(c, val) \
+	do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
+#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
+
 static void flush_unmaps_timeout(unsigned long data);
 
 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index d8521662a495..9a88f7d0262f 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -11,44 +11,6 @@
 
 struct root_entry;
 
-/*
- * low 64 bits:
- * 0: present
- * 1: fault processing disable
- * 2-3: translation type
- * 12-63: address space root
- * high 64 bits:
- * 0-2: address width
- * 3-6: aval
- * 8-23: domain id
- */
-struct context_entry {
-	u64 lo;
-	u64 hi;
-};
-#define context_present(c) ((c).lo & 1)
-#define context_fault_disable(c) (((c).lo >> 1) & 1)
-#define context_translation_type(c) (((c).lo >> 2) & 3)
-#define context_address_root(c) ((c).lo & VTD_PAGE_MASK)
-#define context_address_width(c) ((c).hi &  7)
-#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
-
-#define context_set_present(c) do {(c).lo |= 1;} while (0)
-#define context_set_fault_enable(c) \
-	do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
-#define context_set_translation_type(c, val) \
-	do { \
-		(c).lo &= (((u64)-1) << 4) | 3; \
-		(c).lo |= ((val) & 3) << 2; \
-	} while (0)
-#define CONTEXT_TT_MULTI_LEVEL 0
-#define context_set_address_root(c, val) \
-	do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0)
-#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
-#define context_set_domain_id(c, val) \
-	do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
-#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
-
 /*
  * 0: readable
  * 1: writable
-- 
cgit v1.2.3


From 622ba12a4c2148999bda9b891bfd0c6ddcb6c57e Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 15:49:46 +0000
Subject: intel-iommu: move DMA PTE defs out of dma_remapping.h

DMA_PTE_READ/WRITE are needed by kvm.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c     | 22 ++++++++++++++++++++++
 include/linux/dma_remapping.h | 22 ----------------------
 2 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 9d06f4bb6b5e..26c5402b6f7c 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -128,6 +128,28 @@ struct context_entry {
 	do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
 #define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
 
+/*
+ * 0: readable
+ * 1: writable
+ * 2-6: reserved
+ * 7: super page
+ * 8-11: available
+ * 12-63: Host physcial address
+ */
+struct dma_pte {
+	u64 val;
+};
+#define dma_clear_pte(p)	do {(p).val = 0;} while (0)
+
+#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
+#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
+#define dma_set_pte_prot(p, prot) \
+		do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
+#define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK)
+#define dma_set_pte_addr(p, addr) do {\
+		(p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
+#define dma_pte_present(p) (((p).val & 3) != 0)
+
 static void flush_unmaps_timeout(unsigned long data);
 
 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 9a88f7d0262f..9d5874e3bec9 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -11,31 +11,9 @@
 
 struct root_entry;
 
-/*
- * 0: readable
- * 1: writable
- * 2-6: reserved
- * 7: super page
- * 8-11: available
- * 12-63: Host physcial address
- */
-struct dma_pte {
-	u64 val;
-};
-#define dma_clear_pte(p)	do {(p).val = 0;} while (0)
-
 #define DMA_PTE_READ (1)
 #define DMA_PTE_WRITE (2)
 
-#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
-#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
-#define dma_set_pte_prot(p, prot) \
-		do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
-#define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK)
-#define dma_set_pte_addr(p, addr) do {\
-		(p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
-#define dma_pte_present(p) (((p).val & 3) != 0)
-
 struct intel_iommu;
 
 struct dmar_domain {
-- 
cgit v1.2.3


From 99126f7ce14aff5f9371b2fa81fddb82be815794 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 15:49:47 +0000
Subject: intel-iommu: move struct dmar_domain def out dma_remapping.h

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c     | 18 ++++++++++++++++++
 include/linux/dma_remapping.h | 22 ++--------------------
 2 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 26c5402b6f7c..97c36b2ee611 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -150,6 +150,24 @@ struct dma_pte {
 		(p).val |= ((addr) & VTD_PAGE_MASK); } while (0)
 #define dma_pte_present(p) (((p).val & 3) != 0)
 
+struct dmar_domain {
+	int	id;			/* domain id */
+	struct intel_iommu *iommu;	/* back pointer to owning iommu */
+
+	struct list_head devices; 	/* all devices' list */
+	struct iova_domain iovad;	/* iova's that belong to this domain */
+
+	struct dma_pte	*pgd;		/* virtual address */
+	spinlock_t	mapping_lock;	/* page table lock */
+	int		gaw;		/* max guest address width */
+
+	/* adjusted guest address width, 0 is level 2 30-bit */
+	int		agaw;
+
+#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
+	int		flags;
+};
+
 static void flush_unmaps_timeout(unsigned long data);
 
 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 9d5874e3bec9..333014468f17 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -9,30 +9,12 @@
 #define VTD_PAGE_MASK		(((u64)-1) << VTD_PAGE_SHIFT)
 #define VTD_PAGE_ALIGN(addr)	(((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
 
-struct root_entry;
-
 #define DMA_PTE_READ (1)
 #define DMA_PTE_WRITE (2)
 
 struct intel_iommu;
-
-struct dmar_domain {
-	int	id;			/* domain id */
-	struct intel_iommu *iommu;	/* back pointer to owning iommu */
-
-	struct list_head devices; 	/* all devices' list */
-	struct iova_domain iovad;	/* iova's that belong to this domain */
-
-	struct dma_pte	*pgd;		/* virtual address */
-	spinlock_t	mapping_lock;	/* page table lock */
-	int		gaw;		/* max guest address width */
-
-	/* adjusted guest address width, 0 is level 2 30-bit */
-	int		agaw;
-
-#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
-	int		flags;
-};
+struct dmar_domain;
+struct root_entry;
 
 /* PCI domain-device relationship */
 struct device_domain_info {
-- 
cgit v1.2.3


From a647dacbb1389aa6a5fa631766c1eaea35905890 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 15:49:48 +0000
Subject: intel-iommu: move struct device_domain_info out of dma_remapping.h

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c     | 10 ++++++++++
 include/linux/dma_remapping.h | 10 ----------
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 97c36b2ee611..f23a02054bf7 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -168,6 +168,16 @@ struct dmar_domain {
 	int		flags;
 };
 
+/* PCI domain-device relationship */
+struct device_domain_info {
+	struct list_head link;	/* link to domain siblings */
+	struct list_head global; /* link to global list */
+	u8 bus;			/* PCI bus numer */
+	u8 devfn;		/* PCI devfn number */
+	struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
+	struct dmar_domain *domain; /* pointer to domain */
+};
+
 static void flush_unmaps_timeout(unsigned long data);
 
 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 333014468f17..4ef5f6bc0d68 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -16,16 +16,6 @@ struct intel_iommu;
 struct dmar_domain;
 struct root_entry;
 
-/* PCI domain-device relationship */
-struct device_domain_info {
-	struct list_head link;	/* link to domain siblings */
-	struct list_head global; /* link to global list */
-	u8 bus;			/* PCI bus numer */
-	u8 devfn;		/* PCI devfn number */
-	struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
-	struct dmar_domain *domain; /* pointer to domain */
-};
-
 extern void free_dmar_iommu(struct intel_iommu *iommu);
 
 extern int dmar_disabled;
-- 
cgit v1.2.3


From 58fa7304a2c2bfd46e505c293ef779aa1d9715c2 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 15:49:49 +0000
Subject: intel-iommu: kill off duplicate def of dmar_disabled

This is only used in dmar.c and intel-iommu.h, so dma_remapping.h
seems like the appropriate place for it.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/dmar.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index f1984fc3e06d..f28440784cf0 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -144,7 +144,6 @@ struct dmar_rmrr_unit {
 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
 /* Intel DMAR  initialization functions */
 extern int intel_iommu_init(void);
-extern int dmar_disabled;
 #else
 static inline int intel_iommu_init(void)
 {
-- 
cgit v1.2.3


From 2abd7e167c1b281f99bb58d302225872bfae9123 Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Thu, 20 Nov 2008 15:49:50 +0000
Subject: intel-iommu: move iommu_prepare_gfx_mapping() out of dma_remapping.h

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c     | 5 +++++
 include/linux/dma_remapping.h | 7 -------
 2 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index f23a02054bf7..c1c59a619650 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1686,6 +1686,11 @@ static void __init iommu_prepare_gfx_mapping(void)
 			printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
 	}
 }
+#else /* !CONFIG_DMAR_GFX_WA */
+static inline void iommu_prepare_gfx_mapping(void)
+{
+	return;
+}
 #endif
 
 #ifdef CONFIG_DMAR_FLOPPY_WA
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 4ef5f6bc0d68..7799a85614c1 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -20,11 +20,4 @@ extern void free_dmar_iommu(struct intel_iommu *iommu);
 
 extern int dmar_disabled;
 
-#ifndef CONFIG_DMAR_GFX_WA
-static inline void iommu_prepare_gfx_mapping(void)
-{
-	return;
-}
-#endif /* !CONFIG_DMAR_GFX_WA */
-
 #endif
-- 
cgit v1.2.3


From 1b5736839ae13dadc5947940144f95dd0f4a4a8c Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 8 Dec 2008 15:34:06 +0800
Subject: calculate agaw for each iommu

"SAGAW" capability may be different across iommus. Use a default agaw, but if default agaw is not supported in some iommus, choose a less supported agaw.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/pci/dmar.c            | 10 ++++++++++
 drivers/pci/intel-iommu.c     | 22 ++++++++++++++++++++++
 include/linux/dma_remapping.h |  1 +
 include/linux/intel-iommu.h   |  1 +
 4 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 5f164ff3026e..f5a662a50acb 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -491,6 +491,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 	int map_size;
 	u32 ver;
 	static int iommu_allocated = 0;
+	int agaw;
 
 	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
 	if (!iommu)
@@ -506,6 +507,15 @@ int alloc_iommu(struct dmar_drhd_unit *drhd)
 	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
 	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
 
+	agaw = iommu_calculate_agaw(iommu);
+	if (agaw < 0) {
+		printk(KERN_ERR
+			"Cannot get a valid agaw for iommu (seq_id = %d)\n",
+			iommu->seq_id);
+		goto error;
+	}
+	iommu->agaw = agaw;
+
 	/* the registers might be more than one page */
 	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
 		cap_max_fault_reg_offset(iommu->cap));
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 9dca689215eb..3ecfa2304c2c 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -362,6 +362,28 @@ void free_iova_mem(struct iova *iova)
 	kmem_cache_free(iommu_iova_cache, iova);
 }
 
+
+static inline int width_to_agaw(int width);
+
+/* calculate agaw for each iommu.
+ * "SAGAW" may be different across iommus, use a default agaw, and
+ * get a supported less agaw for iommus that don't support the default agaw.
+ */
+int iommu_calculate_agaw(struct intel_iommu *iommu)
+{
+	unsigned long sagaw;
+	int agaw = -1;
+
+	sagaw = cap_sagaw(iommu->cap);
+	for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
+	     agaw >= 0; agaw--) {
+		if (test_bit(agaw, &sagaw))
+			break;
+	}
+
+	return agaw;
+}
+
 /* in native case, each domain is related to only one iommu */
 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
 {
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 7799a85614c1..136f170cecc2 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -17,6 +17,7 @@ struct dmar_domain;
 struct root_entry;
 
 extern void free_dmar_iommu(struct intel_iommu *iommu);
+extern int iommu_calculate_agaw(struct intel_iommu *iommu);
 
 extern int dmar_disabled;
 
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 1bff7bf1bc2c..06349fd5871b 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -290,6 +290,7 @@ struct intel_iommu {
 	u32		gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
 	spinlock_t	register_lock; /* protect register handling */
 	int		seq_id;	/* sequence id of the iommu */
+	int		agaw; /* agaw of this iommu */
 
 #ifdef CONFIG_DMAR
 	unsigned long 	*domain_ids; /* bitmap of domains */
-- 
cgit v1.2.3


From faa3d6f5ffe7bf60ebfd0d36513fbcda0eb0ea1a Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 8 Dec 2008 23:09:29 +0800
Subject: Change intel iommu APIs of virtual machine domain

These APIs are used by KVM to use VT-d

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/pci/intel-iommu.c   | 129 ++++++++++++++++++++------------------------
 include/linux/intel-iommu.h |  20 +++----
 2 files changed, 70 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 8a204d5bb427..f1380269cabd 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2944,96 +2944,87 @@ static void vm_domain_exit(struct dmar_domain *domain)
 	free_domain_mem(domain);
 }
 
-void intel_iommu_domain_exit(struct dmar_domain *domain)
+struct dmar_domain *intel_iommu_alloc_domain(void)
 {
-	u64 end;
-
-	/* Domain 0 is reserved, so dont process it */
-	if (!domain)
-		return;
-
-	end = DOMAIN_MAX_ADDR(domain->gaw);
-	end = end & (~VTD_PAGE_MASK);
-
-	/* clear ptes */
-	dma_pte_clear_range(domain, 0, end);
-
-	/* free page tables */
-	dma_pte_free_pagetable(domain, 0, end);
-
-	iommu_free_domain(domain);
-	free_domain_mem(domain);
-}
-EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
-
-struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
-{
-	struct dmar_drhd_unit *drhd;
 	struct dmar_domain *domain;
-	struct intel_iommu *iommu;
 
-	drhd = dmar_find_matched_drhd_unit(pdev);
-	if (!drhd) {
-		printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
-		return NULL;
-	}
-
-	iommu = drhd->iommu;
-	if (!iommu) {
-		printk(KERN_ERR
-			"intel_iommu_domain_alloc: iommu == NULL\n");
-		return NULL;
-	}
-	domain = iommu_alloc_domain(iommu);
+	domain = iommu_alloc_vm_domain();
 	if (!domain) {
 		printk(KERN_ERR
 			"intel_iommu_domain_alloc: domain == NULL\n");
 		return NULL;
 	}
-	if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+	if (vm_domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
 		printk(KERN_ERR
 			"intel_iommu_domain_alloc: domain_init() failed\n");
-		intel_iommu_domain_exit(domain);
+		vm_domain_exit(domain);
 		return NULL;
 	}
+
 	return domain;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
+EXPORT_SYMBOL_GPL(intel_iommu_alloc_domain);
 
-int intel_iommu_context_mapping(
-	struct dmar_domain *domain, struct pci_dev *pdev)
+void intel_iommu_free_domain(struct dmar_domain *domain)
 {
-	int rc;
-	rc = domain_context_mapping(domain, pdev);
-	return rc;
+	vm_domain_exit(domain);
 }
-EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
+EXPORT_SYMBOL_GPL(intel_iommu_free_domain);
 
-int intel_iommu_page_mapping(
-	struct dmar_domain *domain, dma_addr_t iova,
-	u64 hpa, size_t size, int prot)
+int intel_iommu_attach_device(struct dmar_domain *domain,
+			      struct pci_dev *pdev)
 {
-	int rc;
-	rc = domain_page_mapping(domain, iova, hpa, size, prot);
-	return rc;
+	int ret;
+
+	/* normally pdev is not mapped */
+	if (unlikely(domain_context_mapped(pdev))) {
+		struct dmar_domain *old_domain;
+
+		old_domain = find_domain(pdev);
+		if (old_domain) {
+			if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+				vm_domain_remove_one_dev_info(old_domain, pdev);
+			else
+				domain_remove_dev_info(old_domain);
+		}
+	}
+
+	ret = domain_context_mapping(domain, pdev);
+	if (ret)
+		return ret;
+
+	ret = vm_domain_add_dev_info(domain, pdev);
+	return ret;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
+EXPORT_SYMBOL_GPL(intel_iommu_attach_device);
 
-void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+void intel_iommu_detach_device(struct dmar_domain *domain,
+			       struct pci_dev *pdev)
 {
-	struct intel_iommu *iommu;
+	vm_domain_remove_one_dev_info(domain, pdev);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_detach_device);
 
-	iommu = device_to_iommu(bus, devfn);
-	iommu_detach_dev(iommu, bus, devfn);
+int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
+			    u64 hpa, size_t size, int prot)
+{
+	int ret;
+	ret = domain_page_mapping(domain, iova, hpa, size, prot);
+	return ret;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
+EXPORT_SYMBOL_GPL(intel_iommu_map_address);
 
-struct dmar_domain *
-intel_iommu_find_domain(struct pci_dev *pdev)
+void intel_iommu_unmap_address(struct dmar_domain *domain,
+			       dma_addr_t iova, size_t size)
 {
-	return find_domain(pdev);
+	dma_addr_t base;
+
+	/* The address might not be aligned */
+	base = iova & VTD_PAGE_MASK;
+	size = VTD_PAGE_ALIGN(size);
+	dma_pte_clear_range(domain, base, base + size);
 }
-EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
+EXPORT_SYMBOL_GPL(intel_iommu_unmap_address);
 
 int intel_iommu_found(void)
 {
@@ -3041,17 +3032,15 @@ int intel_iommu_found(void)
 }
 EXPORT_SYMBOL_GPL(intel_iommu_found);
 
-u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
+u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova)
 {
 	struct dma_pte *pte;
-	u64 pfn;
+	u64 phys = 0;
 
-	pfn = 0;
 	pte = addr_to_dma_pte(domain, iova);
-
 	if (pte)
-		pfn = dma_pte_addr(pte);
+		phys = dma_pte_addr(pte);
 
-	return pfn >> VTD_PAGE_SHIFT;
+	return phys;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
+EXPORT_SYMBOL_GPL(intel_iommu_iova_to_phys);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 06349fd5871b..07973c4e4acc 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -330,15 +330,17 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 
 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
-void intel_iommu_domain_exit(struct dmar_domain *domain);
-struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev);
-int intel_iommu_context_mapping(struct dmar_domain *domain,
-				struct pci_dev *pdev);
-int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
-			     u64 hpa, size_t size, int prot);
-void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn);
-struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
-u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
+struct dmar_domain *intel_iommu_alloc_domain(void);
+void intel_iommu_free_domain(struct dmar_domain *domain);
+int intel_iommu_attach_device(struct dmar_domain *domain,
+			      struct pci_dev *pdev);
+void intel_iommu_detach_device(struct dmar_domain *domain,
+			       struct pci_dev *pdev);
+int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
+			    u64 hpa, size_t size, int prot);
+void intel_iommu_unmap_address(struct dmar_domain *domain,
+			       dma_addr_t iova, size_t size);
+u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova);
 
 #ifdef CONFIG_DMAR
 int intel_iommu_found(void);
-- 
cgit v1.2.3


From 260782bcfdaaa7850f29d6bb2ec6603019168c57 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Tue, 2 Dec 2008 21:03:39 +0800
Subject: KVM: use the new intel iommu APIs

intel iommu APIs are updated, use the new APIs.

In addition, change kvm_iommu_map_guest() to just create the domain, let kvm_iommu_assign_device() assign device.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/kvm_host.h | 15 +++++---
 virt/kvm/kvm_main.c      |  7 +++-
 virt/kvm/vtd.c           | 98 +++++++++++++++++++++++++++---------------------
 3 files changed, 71 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index eafabd5c66b2..c96739b4b7a3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -330,9 +330,10 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 #ifdef CONFIG_DMAR
 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
 			unsigned long npages);
-int kvm_iommu_map_guest(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_map_guest(struct kvm *kvm);
 int kvm_iommu_unmap_guest(struct kvm *kvm);
+int kvm_assign_device(struct kvm *kvm,
+		      struct kvm_assigned_dev_kernel *assigned_dev);
 #else /* CONFIG_DMAR */
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
 				      gfn_t base_gfn,
@@ -341,9 +342,7 @@ static inline int kvm_iommu_map_pages(struct kvm *kvm,
 	return 0;
 }
 
-static inline int kvm_iommu_map_guest(struct kvm *kvm,
-				      struct kvm_assigned_dev_kernel
-				      *assigned_dev)
+static inline int kvm_iommu_map_guest(struct kvm *kvm)
 {
 	return -ENODEV;
 }
@@ -352,6 +351,12 @@ static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
 {
 	return 0;
 }
+
+static inline int kvm_assign_device(struct kvm *kvm,
+		struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	return 0;
+}
 #endif /* CONFIG_DMAR */
 
 static inline void kvm_guest_enter(void)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fc6127cbea1f..c92b63462b79 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -503,7 +503,12 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
 	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
-		r = kvm_iommu_map_guest(kvm, match);
+		if (!kvm->arch.intel_iommu_domain) {
+			r = kvm_iommu_map_guest(kvm);
+			if (r)
+				goto out_list_del;
+		}
+		r = kvm_assign_device(kvm, match);
 		if (r)
 			goto out_list_del;
 	}
diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c
index a770874f3a3a..44bb58a395a5 100644
--- a/virt/kvm/vtd.c
+++ b/virt/kvm/vtd.c
@@ -45,20 +45,18 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 
 	for (i = 0; i < npages; i++) {
 		/* check if already mapped */
-		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
-						     gfn_to_gpa(gfn));
-		if (pfn)
+		if (intel_iommu_iova_to_phys(domain,
+					     gfn_to_gpa(gfn)))
 			continue;
 
 		pfn = gfn_to_pfn(kvm, gfn);
-		r = intel_iommu_page_mapping(domain,
-					     gfn_to_gpa(gfn),
-					     pfn_to_hpa(pfn),
-					     PAGE_SIZE,
-					     DMA_PTE_READ |
-					     DMA_PTE_WRITE);
+		r = intel_iommu_map_address(domain,
+					    gfn_to_gpa(gfn),
+					    pfn_to_hpa(pfn),
+					    PAGE_SIZE,
+					    DMA_PTE_READ | DMA_PTE_WRITE);
 		if (r) {
-			printk(KERN_ERR "kvm_iommu_map_pages:"
+			printk(KERN_ERR "kvm_iommu_map_address:"
 			       "iommu failed to map pfn=%lx\n", pfn);
 			goto unmap_pages;
 		}
@@ -86,50 +84,55 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
 	return r;
 }
 
-int kvm_iommu_map_guest(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *assigned_dev)
+int kvm_assign_device(struct kvm *kvm,
+		      struct kvm_assigned_dev_kernel *assigned_dev)
 {
 	struct pci_dev *pdev = NULL;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
 	int r;
 
-	if (!intel_iommu_found()) {
-		printk(KERN_ERR "%s: intel iommu not found\n", __func__);
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	pdev = assigned_dev->dev;
+	if (pdev == NULL)
 		return -ENODEV;
+
+	r = intel_iommu_attach_device(domain, pdev);
+	if (r) {
+		printk(KERN_ERR "assign device %x:%x.%x failed",
+			pdev->bus->number,
+			PCI_SLOT(pdev->devfn),
+			PCI_FUNC(pdev->devfn));
+		return r;
 	}
 
-	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
-	       assigned_dev->host_busnr,
-	       PCI_SLOT(assigned_dev->host_devfn),
-	       PCI_FUNC(assigned_dev->host_devfn));
+	printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
+		assigned_dev->host_busnr,
+		PCI_SLOT(assigned_dev->host_devfn),
+		PCI_FUNC(assigned_dev->host_devfn));
 
-	pdev = assigned_dev->dev;
+	return 0;
+}
 
-	if (pdev == NULL) {
-		if (kvm->arch.intel_iommu_domain) {
-			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
-			kvm->arch.intel_iommu_domain = NULL;
-		}
+int kvm_iommu_map_guest(struct kvm *kvm)
+{
+	int r;
+
+	if (!intel_iommu_found()) {
+		printk(KERN_ERR "%s: intel iommu not found\n", __func__);
 		return -ENODEV;
 	}
 
-	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+	kvm->arch.intel_iommu_domain = intel_iommu_alloc_domain();
 	if (!kvm->arch.intel_iommu_domain)
-		return -ENODEV;
+		return -ENOMEM;
 
 	r = kvm_iommu_map_memslots(kvm);
 	if (r)
 		goto out_unmap;
 
-	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
-			       pdev->bus->number, pdev->devfn);
-
-	r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
-					pdev);
-	if (r) {
-		printk(KERN_ERR "Domain context map for %s failed",
-		       pci_name(pdev));
-		goto out_unmap;
-	}
 	return 0;
 
 out_unmap:
@@ -138,19 +141,29 @@ out_unmap:
 }
 
 static void kvm_iommu_put_pages(struct kvm *kvm,
-			       gfn_t base_gfn, unsigned long npages)
+				gfn_t base_gfn, unsigned long npages)
 {
 	gfn_t gfn = base_gfn;
 	pfn_t pfn;
 	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
-	int i;
+	unsigned long i;
+	u64 phys;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return;
 
 	for (i = 0; i < npages; i++) {
-		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
-						     gfn_to_gpa(gfn));
+		phys = intel_iommu_iova_to_phys(domain,
+						gfn_to_gpa(gfn));
+		pfn = phys >> PAGE_SHIFT;
 		kvm_release_pfn_clean(pfn);
 		gfn++;
 	}
+
+	intel_iommu_unmap_address(domain,
+				  gfn_to_gpa(base_gfn),
+				  PAGE_SIZE * npages);
 }
 
 static int kvm_iommu_unmap_memslots(struct kvm *kvm)
@@ -182,10 +195,9 @@ int kvm_iommu_unmap_guest(struct kvm *kvm)
 		       PCI_FUNC(entry->host_devfn));
 
 		/* detach kvm dmar domain */
-		intel_iommu_detach_dev(domain, entry->host_busnr,
-				       entry->host_devfn);
+		intel_iommu_detach_device(domain, entry->dev);
 	}
 	kvm_iommu_unmap_memslots(kvm);
-	intel_iommu_domain_exit(domain);
+	intel_iommu_free_domain(domain);
 	return 0;
 }
-- 
cgit v1.2.3


From 0a920356748df4fb06e86c21c23d2ed6d31d37ad Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Tue, 2 Dec 2008 21:24:23 +0800
Subject: KVM: support device deassignment

Support device deassignment, it can be used in device hotplug.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/kvm_host.h |  8 ++++++++
 virt/kvm/kvm_main.c      | 42 ++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/vtd.c           | 24 ++++++++++++++++++++++++
 3 files changed, 74 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c96739b4b7a3..ce5d1c17ce26 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -334,6 +334,8 @@ int kvm_iommu_map_guest(struct kvm *kvm);
 int kvm_iommu_unmap_guest(struct kvm *kvm);
 int kvm_assign_device(struct kvm *kvm,
 		      struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_deassign_device(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev);
 #else /* CONFIG_DMAR */
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
 				      gfn_t base_gfn,
@@ -357,6 +359,12 @@ static inline int kvm_assign_device(struct kvm *kvm,
 {
 	return 0;
 }
+
+static inline int kvm_deassign_device(struct kvm *kvm,
+		struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	return 0;
+}
 #endif /* CONFIG_DMAR */
 
 static inline void kvm_guest_enter(void)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c92b63462b79..3238e08e4651 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -530,6 +530,35 @@ out_free:
 }
 #endif
 
+#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
+static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
+		struct kvm_assigned_pci_dev *assigned_dev)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *match;
+
+	mutex_lock(&kvm->lock);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_dev->assigned_dev_id);
+	if (!match) {
+		printk(KERN_INFO "%s: device hasn't been assigned before, "
+		  "so cannot be deassigned\n", __func__);
+		r = -EINVAL;
+		goto out;
+	}
+
+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
+		kvm_deassign_device(kvm, match);
+
+	kvm_free_assigned_device(kvm, match);
+
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+#endif
+
 static inline int valid_vcpu(int n)
 {
 	return likely(n >= 0 && n < KVM_MAX_VCPUS);
@@ -1862,6 +1891,19 @@ static long kvm_vm_ioctl(struct file *filp,
 			goto out;
 		break;
 	}
+#endif
+#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
+	case KVM_DEASSIGN_PCI_DEVICE: {
+		struct kvm_assigned_pci_dev assigned_dev;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
+			goto out;
+		r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
+		if (r)
+			goto out;
+		break;
+	}
 #endif
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c
index 44bb58a395a5..174ea1f8cee5 100644
--- a/virt/kvm/vtd.c
+++ b/virt/kvm/vtd.c
@@ -116,6 +116,30 @@ int kvm_assign_device(struct kvm *kvm,
 	return 0;
 }
 
+int kvm_deassign_device(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	struct pci_dev *pdev = NULL;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	pdev = assigned_dev->dev;
+	if (pdev == NULL)
+		return -ENODEV;
+
+	intel_iommu_detach_device(domain, pdev);
+
+	printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n",
+		assigned_dev->host_busnr,
+		PCI_SLOT(assigned_dev->host_devfn),
+		PCI_FUNC(assigned_dev->host_devfn));
+
+	return 0;
+}
+
 int kvm_iommu_map_guest(struct kvm *kvm)
 {
 	int r;
-- 
cgit v1.2.3


From b653574a7d14b663cc812cb20be6a114939ba186 Mon Sep 17 00:00:00 2001
From: Weidong Han <weidong.han@intel.com>
Date: Mon, 8 Dec 2008 23:29:53 +0800
Subject: Deassign device in kvm_free_assgined_device

In kvm_iommu_unmap_memslots(), assigned_dev_head is already empty.

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      |  1 +
 virt/kvm/vtd.c           | 10 ----------
 3 files changed, 2 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ce5d1c17ce26..e62a4629e51c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -316,6 +316,7 @@ struct kvm_assigned_dev_kernel {
 #define KVM_ASSIGNED_DEV_HOST_MSI	(1 << 9)
 	unsigned long irq_requested_type;
 	int irq_source_id;
+	int flags;
 	struct pci_dev *dev;
 	struct kvm *kvm;
 };
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3238e08e4651..4ef0fb43d1f9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -496,6 +496,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 	match->assigned_dev_id = assigned_dev->assigned_dev_id;
 	match->host_busnr = assigned_dev->busnr;
 	match->host_devfn = assigned_dev->devfn;
+	match->flags = assigned_dev->flags;
 	match->dev = dev;
 	match->irq_source_id = -1;
 	match->kvm = kvm;
diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c
index 174ea1f8cee5..d46de9af838b 100644
--- a/virt/kvm/vtd.c
+++ b/virt/kvm/vtd.c
@@ -205,22 +205,12 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm)
 
 int kvm_iommu_unmap_guest(struct kvm *kvm)
 {
-	struct kvm_assigned_dev_kernel *entry;
 	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
 
 	/* check if iommu exists and in use */
 	if (!domain)
 		return 0;
 
-	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
-		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
-		       entry->host_busnr,
-		       PCI_SLOT(entry->host_devfn),
-		       PCI_FUNC(entry->host_devfn));
-
-		/* detach kvm dmar domain */
-		intel_iommu_detach_device(domain, entry->dev);
-	}
 	kvm_iommu_unmap_memslots(kvm);
 	intel_iommu_free_domain(domain);
 	return 0;
-- 
cgit v1.2.3


From 4a77a6cf6d9bf9f5c74b27f62bd2bfe6dcc88392 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 26 Nov 2008 17:02:33 +0100
Subject: introcude linux/iommu.h for an iommu api

This patch introduces the API to abstract the exported VT-d functions
for KVM into a generic API. This way the AMD IOMMU implementation can
plug into this API later.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/iommu.h | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 include/linux/iommu.h

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
new file mode 100644
index 000000000000..8a7bfb1b6ca0
--- /dev/null
+++ b/include/linux/iommu.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef __LINUX_IOMMU_H
+#define __LINUX_IOMMU_H
+
+#define IOMMU_READ	(1)
+#define IOMMU_WRITE	(2)
+
+struct device;
+
+struct iommu_domain {
+	void *priv;
+};
+
+struct iommu_ops {
+	int (*domain_init)(struct iommu_domain *domain);
+	void (*domain_destroy)(struct iommu_domain *domain);
+	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
+	void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
+	int (*map)(struct iommu_domain *domain, unsigned long iova,
+		   phys_addr_t paddr, size_t size, int prot);
+	void (*unmap)(struct iommu_domain *domain, unsigned long iova,
+		      size_t size);
+	phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
+				    unsigned long iova);
+};
+
+#ifdef CONFIG_IOMMU_API
+
+extern void register_iommu(struct iommu_ops *ops);
+extern bool iommu_found(void);
+extern struct iommu_domain *iommu_domain_alloc(void);
+extern void iommu_domain_free(struct iommu_domain *domain);
+extern int iommu_attach_device(struct iommu_domain *domain,
+			       struct device *dev);
+extern void iommu_detach_device(struct iommu_domain *domain,
+				struct device *dev);
+extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
+			   phys_addr_t paddr, size_t size, int prot);
+extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
+			      size_t size);
+extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
+				      unsigned long iova);
+
+#else /* CONFIG_IOMMU_API */
+
+static inline void register_iommu(struct iommu_ops *ops)
+{
+}
+
+static inline bool iommu_found(void)
+{
+	return false;
+}
+
+static inline struct iommu_domain *iommu_domain_alloc(void)
+{
+	return NULL;
+}
+
+static inline void iommu_domain_free(struct iommu_domain *domain)
+{
+}
+
+static inline int iommu_attach_device(struct iommu_domain *domain,
+				      struct device *dev)
+{
+	return -ENODEV;
+}
+
+static inline void iommu_detach_device(struct iommu_domain *domain,
+				       struct device *dev)
+{
+}
+
+static inline int iommu_map_range(struct iommu_domain *domain,
+				  unsigned long iova, phys_addr_t paddr,
+				  size_t size, int prot)
+{
+	return -ENODEV;
+}
+
+static inline void iommu_unmap_range(struct iommu_domain *domain,
+				     unsigned long iova, size_t size)
+{
+}
+
+static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
+					     unsigned long iova)
+{
+	return 0;
+}
+
+#endif /* CONFIG_IOMMU_API */
+
+#endif /* __LINUX_IOMMU_H */
-- 
cgit v1.2.3


From 19de40a8472fa64693eab844911eec277d489f6c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 3 Dec 2008 14:43:34 +0100
Subject: KVM: change KVM to use IOMMU API

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/ia64/include/asm/kvm_host.h |  2 +-
 arch/ia64/kvm/Makefile           |  2 +-
 arch/ia64/kvm/kvm-ia64.c         |  3 ++-
 arch/x86/include/asm/kvm_host.h  |  2 +-
 arch/x86/kvm/Makefile            |  2 +-
 arch/x86/kvm/x86.c               |  3 ++-
 include/linux/kvm_host.h         |  6 +++---
 virt/kvm/iommu.c                 | 45 +++++++++++++++++++---------------------
 virt/kvm/kvm_main.c              |  2 +-
 9 files changed, 33 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 0560f3fae538..348663661659 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -467,7 +467,7 @@ struct kvm_arch {
 	struct kvm_sal_data rdv_sal_data;
 
 	struct list_head assigned_dev_head;
-	struct dmar_domain *intel_iommu_domain;
+	struct iommu_domain *iommu_domain;
 	struct hlist_head irq_ack_notifier_list;
 
 	unsigned long irq_sources_bitmap;
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index cb69dfcfb1a6..0bb99b732908 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -51,7 +51,7 @@ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
 		coalesced_mmio.o irq_comm.o)
 
-ifeq ($(CONFIG_DMAR),y)
+ifeq ($(CONFIG_IOMMU_API),y)
 common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
 endif
 
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 0f5ebd948437..4e586f6110aa 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -31,6 +31,7 @@
 #include <linux/bitops.h>
 #include <linux/hrtimer.h>
 #include <linux/uaccess.h>
+#include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 
 #include <asm/pgtable.h>
@@ -188,7 +189,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
 	case KVM_CAP_IOMMU:
-		r = intel_iommu_found();
+		r = iommu_found();
 		break;
 	default:
 		r = 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 97215a458e5f..730843d1d2fb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -360,7 +360,7 @@ struct kvm_arch{
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
 	struct list_head oos_global_pages;
-	struct dmar_domain *intel_iommu_domain;
+	struct iommu_domain *iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 00f46c2d14bd..d3ec292f00f2 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,7 +7,7 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
 ifeq ($(CONFIG_KVM_TRACE),y)
 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 endif
-ifeq ($(CONFIG_DMAR),y)
+ifeq ($(CONFIG_IOMMU_API),y)
 common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
 endif
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0e6aa8141dcd..cc17546a2406 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -34,6 +34,7 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
@@ -989,7 +990,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 		r = !tdp_enabled;
 		break;
 	case KVM_CAP_IOMMU:
-		r = intel_iommu_found();
+		r = iommu_found();
 		break;
 	default:
 		r = 0;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e62a4629e51c..ec49d0be7f52 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -328,7 +328,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
-#ifdef CONFIG_DMAR
+#ifdef CONFIG_IOMMU_API
 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
 			unsigned long npages);
 int kvm_iommu_map_guest(struct kvm *kvm);
@@ -337,7 +337,7 @@ int kvm_assign_device(struct kvm *kvm,
 		      struct kvm_assigned_dev_kernel *assigned_dev);
 int kvm_deassign_device(struct kvm *kvm,
 			struct kvm_assigned_dev_kernel *assigned_dev);
-#else /* CONFIG_DMAR */
+#else /* CONFIG_IOMMU_API */
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
 				      gfn_t base_gfn,
 				      unsigned long npages)
@@ -366,7 +366,7 @@ static inline int kvm_deassign_device(struct kvm *kvm,
 {
 	return 0;
 }
-#endif /* CONFIG_DMAR */
+#endif /* CONFIG_IOMMU_API */
 
 static inline void kvm_guest_enter(void)
 {
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index d46de9af838b..d0bebaa5bf0a 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -25,6 +25,7 @@
 #include <linux/kvm_host.h>
 #include <linux/pci.h>
 #include <linux/dmar.h>
+#include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 
 static int kvm_iommu_unmap_memslots(struct kvm *kvm);
@@ -37,7 +38,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 	gfn_t gfn = base_gfn;
 	pfn_t pfn;
 	int i, r = 0;
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
 
 	/* check if iommu exists and in use */
 	if (!domain)
@@ -45,16 +46,15 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 
 	for (i = 0; i < npages; i++) {
 		/* check if already mapped */
-		if (intel_iommu_iova_to_phys(domain,
-					     gfn_to_gpa(gfn)))
+		if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
 			continue;
 
 		pfn = gfn_to_pfn(kvm, gfn);
-		r = intel_iommu_map_address(domain,
-					    gfn_to_gpa(gfn),
-					    pfn_to_hpa(pfn),
-					    PAGE_SIZE,
-					    DMA_PTE_READ | DMA_PTE_WRITE);
+		r = iommu_map_range(domain,
+				    gfn_to_gpa(gfn),
+				    pfn_to_hpa(pfn),
+				    PAGE_SIZE,
+				    IOMMU_READ | IOMMU_WRITE);
 		if (r) {
 			printk(KERN_ERR "kvm_iommu_map_address:"
 			       "iommu failed to map pfn=%lx\n", pfn);
@@ -88,7 +88,7 @@ int kvm_assign_device(struct kvm *kvm,
 		      struct kvm_assigned_dev_kernel *assigned_dev)
 {
 	struct pci_dev *pdev = NULL;
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
 	int r;
 
 	/* check if iommu exists and in use */
@@ -99,7 +99,7 @@ int kvm_assign_device(struct kvm *kvm,
 	if (pdev == NULL)
 		return -ENODEV;
 
-	r = intel_iommu_attach_device(domain, pdev);
+	r = iommu_attach_device(domain, &pdev->dev);
 	if (r) {
 		printk(KERN_ERR "assign device %x:%x.%x failed",
 			pdev->bus->number,
@@ -119,7 +119,7 @@ int kvm_assign_device(struct kvm *kvm,
 int kvm_deassign_device(struct kvm *kvm,
 			struct kvm_assigned_dev_kernel *assigned_dev)
 {
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
 	struct pci_dev *pdev = NULL;
 
 	/* check if iommu exists and in use */
@@ -130,7 +130,7 @@ int kvm_deassign_device(struct kvm *kvm,
 	if (pdev == NULL)
 		return -ENODEV;
 
-	intel_iommu_detach_device(domain, pdev);
+	iommu_detach_device(domain, &pdev->dev);
 
 	printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n",
 		assigned_dev->host_busnr,
@@ -144,13 +144,13 @@ int kvm_iommu_map_guest(struct kvm *kvm)
 {
 	int r;
 
-	if (!intel_iommu_found()) {
-		printk(KERN_ERR "%s: intel iommu not found\n", __func__);
+	if (!iommu_found()) {
+		printk(KERN_ERR "%s: iommu not found\n", __func__);
 		return -ENODEV;
 	}
 
-	kvm->arch.intel_iommu_domain = intel_iommu_alloc_domain();
-	if (!kvm->arch.intel_iommu_domain)
+	kvm->arch.iommu_domain = iommu_domain_alloc();
+	if (!kvm->arch.iommu_domain)
 		return -ENOMEM;
 
 	r = kvm_iommu_map_memslots(kvm);
@@ -169,7 +169,7 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
 {
 	gfn_t gfn = base_gfn;
 	pfn_t pfn;
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
 	unsigned long i;
 	u64 phys;
 
@@ -178,16 +178,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
 		return;
 
 	for (i = 0; i < npages; i++) {
-		phys = intel_iommu_iova_to_phys(domain,
-						gfn_to_gpa(gfn));
+		phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
 		pfn = phys >> PAGE_SHIFT;
 		kvm_release_pfn_clean(pfn);
 		gfn++;
 	}
 
-	intel_iommu_unmap_address(domain,
-				  gfn_to_gpa(base_gfn),
-				  PAGE_SIZE * npages);
+	iommu_unmap_range(domain, gfn_to_gpa(base_gfn), PAGE_SIZE * npages);
 }
 
 static int kvm_iommu_unmap_memslots(struct kvm *kvm)
@@ -205,13 +202,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm)
 
 int kvm_iommu_unmap_guest(struct kvm *kvm)
 {
-	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
 
 	/* check if iommu exists and in use */
 	if (!domain)
 		return 0;
 
 	kvm_iommu_unmap_memslots(kvm);
-	intel_iommu_free_domain(domain);
+	iommu_domain_free(domain);
 	return 0;
 }
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4ef0fb43d1f9..3a5a08298aab 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -504,7 +504,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
 	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
-		if (!kvm->arch.intel_iommu_domain) {
+		if (!kvm->arch.iommu_domain) {
 			r = kvm_iommu_map_guest(kvm);
 			if (r)
 				goto out_list_del;
-- 
cgit v1.2.3


From 5d450806eb0e569c5846a5825e7f535980b0da32 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 3 Dec 2008 14:52:32 +0100
Subject: VT-d: adapt domain init and destroy functions for IOMMU API

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/pci/intel-iommu.c   | 33 ++++++++++++++++++---------------
 include/linux/intel-iommu.h |  2 --
 2 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 772fb22e1be0..5c95a5a65440 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -34,6 +34,7 @@
 #include <linux/mempool.h>
 #include <linux/timer.h>
 #include <linux/iova.h>
+#include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
@@ -2962,32 +2963,34 @@ static void vm_domain_exit(struct dmar_domain *domain)
 	free_domain_mem(domain);
 }
 
-struct dmar_domain *intel_iommu_alloc_domain(void)
+static int intel_iommu_domain_init(struct iommu_domain *domain)
 {
-	struct dmar_domain *domain;
+	struct dmar_domain *dmar_domain;
 
-	domain = iommu_alloc_vm_domain();
-	if (!domain) {
+	dmar_domain = iommu_alloc_vm_domain();
+	if (!dmar_domain) {
 		printk(KERN_ERR
-			"intel_iommu_domain_alloc: domain == NULL\n");
-		return NULL;
+			"intel_iommu_domain_init: dmar_domain == NULL\n");
+		return -ENOMEM;
 	}
-	if (vm_domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+	if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
 		printk(KERN_ERR
-			"intel_iommu_domain_alloc: domain_init() failed\n");
-		vm_domain_exit(domain);
-		return NULL;
+			"intel_iommu_domain_init() failed\n");
+		vm_domain_exit(dmar_domain);
+		return -ENOMEM;
 	}
+	domain->priv = dmar_domain;
 
-	return domain;
+	return 0;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_alloc_domain);
 
-void intel_iommu_free_domain(struct dmar_domain *domain)
+static void intel_iommu_domain_destroy(struct iommu_domain *domain)
 {
-	vm_domain_exit(domain);
+	struct dmar_domain *dmar_domain = domain->priv;
+
+	domain->priv = NULL;
+	vm_domain_exit(dmar_domain);
 }
-EXPORT_SYMBOL_GPL(intel_iommu_free_domain);
 
 int intel_iommu_attach_device(struct dmar_domain *domain,
 			      struct pci_dev *pdev)
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 07973c4e4acc..0a7ba0cefc74 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -330,8 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 
 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
-struct dmar_domain *intel_iommu_alloc_domain(void);
-void intel_iommu_free_domain(struct dmar_domain *domain);
 int intel_iommu_attach_device(struct dmar_domain *domain,
 			      struct pci_dev *pdev);
 void intel_iommu_detach_device(struct dmar_domain *domain,
-- 
cgit v1.2.3


From 4c5478c94eb29e6101f1f13175f7455bc8b5d953 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 3 Dec 2008 14:58:24 +0100
Subject: VT-d: adapt device attach and detach functions for IOMMU API

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/pci/intel-iommu.c   | 27 +++++++++++++++------------
 include/linux/intel-iommu.h |  4 ----
 2 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 5c95a5a65440..db9a26cfeb8f 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2992,9 +2992,11 @@ static void intel_iommu_domain_destroy(struct iommu_domain *domain)
 	vm_domain_exit(dmar_domain);
 }
 
-int intel_iommu_attach_device(struct dmar_domain *domain,
-			      struct pci_dev *pdev)
+static int intel_iommu_attach_device(struct iommu_domain *domain,
+				     struct device *dev)
 {
+	struct dmar_domain *dmar_domain = domain->priv;
+	struct pci_dev *pdev = to_pci_dev(dev);
 	struct intel_iommu *iommu;
 	int addr_width;
 	u64 end;
@@ -3006,7 +3008,7 @@ int intel_iommu_attach_device(struct dmar_domain *domain,
 
 		old_domain = find_domain(pdev);
 		if (old_domain) {
-			if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
+			if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
 				vm_domain_remove_one_dev_info(old_domain, pdev);
 			else
 				domain_remove_dev_info(old_domain);
@@ -3021,28 +3023,29 @@ int intel_iommu_attach_device(struct dmar_domain *domain,
 	addr_width = agaw_to_width(iommu->agaw);
 	end = DOMAIN_MAX_ADDR(addr_width);
 	end = end & VTD_PAGE_MASK;
-	if (end < domain->max_addr) {
+	if (end < dmar_domain->max_addr) {
 		printk(KERN_ERR "%s: iommu agaw (%d) is not "
 		       "sufficient for the mapped address (%llx)\n",
-		       __func__, iommu->agaw, domain->max_addr);
+		       __func__, iommu->agaw, dmar_domain->max_addr);
 		return -EFAULT;
 	}
 
-	ret = domain_context_mapping(domain, pdev);
+	ret = domain_context_mapping(dmar_domain, pdev);
 	if (ret)
 		return ret;
 
-	ret = vm_domain_add_dev_info(domain, pdev);
+	ret = vm_domain_add_dev_info(dmar_domain, pdev);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_attach_device);
 
-void intel_iommu_detach_device(struct dmar_domain *domain,
-			       struct pci_dev *pdev)
+static void intel_iommu_detach_device(struct iommu_domain *domain,
+				      struct device *dev)
 {
-	vm_domain_remove_one_dev_info(domain, pdev);
+	struct dmar_domain *dmar_domain = domain->priv;
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	vm_domain_remove_one_dev_info(dmar_domain, pdev);
 }
-EXPORT_SYMBOL_GPL(intel_iommu_detach_device);
 
 int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
 			    u64 hpa, size_t size, int prot)
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 0a7ba0cefc74..9909c5a1b20f 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -330,10 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 
 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
-int intel_iommu_attach_device(struct dmar_domain *domain,
-			      struct pci_dev *pdev);
-void intel_iommu_detach_device(struct dmar_domain *domain,
-			       struct pci_dev *pdev);
 int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
 			    u64 hpa, size_t size, int prot);
 void intel_iommu_unmap_address(struct dmar_domain *domain,
-- 
cgit v1.2.3


From dde57a210dcdce85e2813bab8f88687761d9f6a6 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 3 Dec 2008 15:04:09 +0100
Subject: VT-d: adapt domain map and unmap functions for IOMMU API

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/pci/intel-iommu.c   | 33 ++++++++++++++++++++-------------
 include/linux/intel-iommu.h |  4 ----
 2 files changed, 20 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index db9a26cfeb8f..8af6c96f31b3 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -3047,20 +3047,28 @@ static void intel_iommu_detach_device(struct iommu_domain *domain,
 	vm_domain_remove_one_dev_info(dmar_domain, pdev);
 }
 
-int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
-			    u64 hpa, size_t size, int prot)
+static int intel_iommu_map_range(struct iommu_domain *domain,
+				 unsigned long iova, phys_addr_t hpa,
+				 size_t size, int iommu_prot)
 {
+	struct dmar_domain *dmar_domain = domain->priv;
 	u64 max_addr;
 	int addr_width;
+	int prot = 0;
 	int ret;
 
+	if (iommu_prot & IOMMU_READ)
+		prot |= DMA_PTE_READ;
+	if (iommu_prot & IOMMU_WRITE)
+		prot |= DMA_PTE_WRITE;
+
 	max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
-	if (domain->max_addr < max_addr) {
+	if (dmar_domain->max_addr < max_addr) {
 		int min_agaw;
 		u64 end;
 
 		/* check if minimum agaw is sufficient for mapped address */
-		min_agaw = vm_domain_min_agaw(domain);
+		min_agaw = vm_domain_min_agaw(dmar_domain);
 		addr_width = agaw_to_width(min_agaw);
 		end = DOMAIN_MAX_ADDR(addr_width);
 		end = end & VTD_PAGE_MASK;
@@ -3070,28 +3078,27 @@ int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
 			       __func__, min_agaw, max_addr);
 			return -EFAULT;
 		}
-		domain->max_addr = max_addr;
+		dmar_domain->max_addr = max_addr;
 	}
 
-	ret = domain_page_mapping(domain, iova, hpa, size, prot);
+	ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_map_address);
 
-void intel_iommu_unmap_address(struct dmar_domain *domain,
-			       dma_addr_t iova, size_t size)
+static void intel_iommu_unmap_range(struct iommu_domain *domain,
+				    unsigned long iova, size_t size)
 {
+	struct dmar_domain *dmar_domain = domain->priv;
 	dma_addr_t base;
 
 	/* The address might not be aligned */
 	base = iova & VTD_PAGE_MASK;
 	size = VTD_PAGE_ALIGN(size);
-	dma_pte_clear_range(domain, base, base + size);
+	dma_pte_clear_range(dmar_domain, base, base + size);
 
-	if (domain->max_addr == base + size)
-		domain->max_addr = base;
+	if (dmar_domain->max_addr == base + size)
+		dmar_domain->max_addr = base;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_unmap_address);
 
 int intel_iommu_found(void)
 {
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 9909c5a1b20f..6bc26e03858c 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -330,10 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 
 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
-int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
-			    u64 hpa, size_t size, int prot);
-void intel_iommu_unmap_address(struct dmar_domain *domain,
-			       dma_addr_t iova, size_t size);
 u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova);
 
 #ifdef CONFIG_DMAR
-- 
cgit v1.2.3


From d14d65777c2491dd5baf1e17f444b8f653f3cbb1 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 3 Dec 2008 15:06:57 +0100
Subject: VT-d: adapt domain iova_to_phys function for IOMMU API

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/pci/intel-iommu.c   | 7 ++++---
 include/linux/intel-iommu.h | 2 --
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 8af6c96f31b3..712810598a2e 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -3106,15 +3106,16 @@ int intel_iommu_found(void)
 }
 EXPORT_SYMBOL_GPL(intel_iommu_found);
 
-u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova)
+static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
+					    unsigned long iova)
 {
+	struct dmar_domain *dmar_domain = domain->priv;
 	struct dma_pte *pte;
 	u64 phys = 0;
 
-	pte = addr_to_dma_pte(domain, iova);
+	pte = addr_to_dma_pte(dmar_domain, iova);
 	if (pte)
 		phys = dma_pte_addr(pte);
 
 	return phys;
 }
-EXPORT_SYMBOL_GPL(intel_iommu_iova_to_phys);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 6bc26e03858c..26ccc0294567 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -330,8 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 
 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
-u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova);
-
 #ifdef CONFIG_DMAR
 int intel_iommu_found(void);
 #else /* CONFIG_DMAR */
-- 
cgit v1.2.3


From e4754c96cf8b82a754dc5ba791d6c0bf1fbe8e8e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 3 Dec 2008 15:26:42 +0100
Subject: VT-d: remove now unused intel_iommu_found function

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/pci/intel-iommu.c   | 6 ------
 include/linux/intel-iommu.h | 9 ---------
 2 files changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 81e04ec85d97..ecb5fd3b71f7 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -3105,12 +3105,6 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain,
 		dmar_domain->max_addr = base;
 }
 
-int intel_iommu_found(void)
-{
-	return g_num_of_iommus;
-}
-EXPORT_SYMBOL_GPL(intel_iommu_found);
-
 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
 					    unsigned long iova)
 {
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 26ccc0294567..c4f6c101dbcd 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -330,15 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 
 extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
-#ifdef CONFIG_DMAR
-int intel_iommu_found(void);
-#else /* CONFIG_DMAR */
-static inline int intel_iommu_found(void)
-{
-	return 0;
-}
-#endif /* CONFIG_DMAR */
-
 extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t);
 extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t);
 extern dma_addr_t intel_map_single(struct device *, phys_addr_t, size_t, int);
-- 
cgit v1.2.3


From 2f983570010a0dcb26d988da02d7ccfad00c807c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 3 Jan 2009 00:06:34 -0800
Subject: sparseirq: move set/get_timer_rand_state back to .c

those two functions only used in that C file

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/random.c  | 40 ++++++++++++++++++++++++++++++++++------
 include/linux/random.h | 50 --------------------------------------------------
 2 files changed, 34 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index d26891bfcd41..c7afc068c28d 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -559,7 +559,40 @@ struct timer_rand_state {
 };
 
 #ifndef CONFIG_SPARSE_IRQ
-struct timer_rand_state *irq_timer_state[NR_IRQS];
+
+static struct timer_rand_state *irq_timer_state[NR_IRQS];
+
+static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
+{
+	return irq_timer_state[irq];
+}
+
+static void set_timer_rand_state(unsigned int irq,
+				 struct timer_rand_state *state)
+{
+	irq_timer_state[irq] = state;
+}
+
+#else
+
+static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	return desc->timer_rand_state;
+}
+
+static void set_timer_rand_state(unsigned int irq,
+				 struct timer_rand_state *state)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	desc->timer_rand_state = state;
+}
 #endif
 
 static struct timer_rand_state input_timer_state;
@@ -919,11 +952,6 @@ void rand_initialize_irq(int irq)
 {
 	struct timer_rand_state *state;
 
-#ifndef CONFIG_SPARSE_IRQ
-	if (irq >= nr_irqs)
-		return;
-#endif
-
 	state = get_timer_rand_state(irq);
 
 	if (state)
diff --git a/include/linux/random.h b/include/linux/random.h
index adbf3bd3c6b3..407ea3646f8f 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -45,56 +45,6 @@ struct rand_pool_info {
 
 extern void rand_initialize_irq(int irq);
 
-struct timer_rand_state;
-#ifndef CONFIG_SPARSE_IRQ
-
-extern struct timer_rand_state *irq_timer_state[];
-
-static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq)
-{
-	if (irq >= nr_irqs)
-		return NULL;
-
-	return irq_timer_state[irq];
-}
-
-static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
-{
-	if (irq >= nr_irqs)
-		return;
-
-	irq_timer_state[irq] = state;
-}
-
-#else
-
-#include <linux/irq.h>
-static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-
-	if (!desc)
-		return NULL;
-
-	return desc->timer_rand_state;
-}
-
-static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-
-	if (!desc)
-		return;
-
-	desc->timer_rand_state = state;
-}
-#endif
-
-
 extern void add_input_randomness(unsigned int type, unsigned int code,
 				 unsigned int value);
 extern void add_interrupt_randomness(int irq);
-- 
cgit v1.2.3


From 9188e79ec3fd43a0a605274324aecfb731baa88b Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Wed, 12 Nov 2008 16:14:08 +0100
Subject: HID: add phys and name ioctls to hidraw

The hiddev interface provides ioctl() calls which can be used
to obtain phys and raw name of the underlying device.

Add the corresponding support also into hidraw.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hidraw.c   | 28 ++++++++++++++++++++++++++++
 include/linux/hidraw.h |  2 ++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 7685ae6808c4..975edd88a3db 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -265,6 +265,34 @@ static long hidraw_ioctl(struct file *file, unsigned int cmd,
 				break;
 			}
 		default:
+			{
+				struct hid_device *hid = dev->hid;
+				if (_IOC_TYPE(cmd) != 'H' || _IOC_DIR(cmd) != _IOC_READ)
+					return -EINVAL;
+
+				if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGRAWNAME(0))) {
+					int len;
+					if (!hid->name)
+						return 0;
+					len = strlen(hid->name) + 1;
+					if (len > _IOC_SIZE(cmd))
+						len = _IOC_SIZE(cmd);
+					return copy_to_user(user_arg, hid->name, len) ?
+						-EFAULT : len;
+				}
+
+				if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGRAWPHYS(0))) {
+					int len;
+					if (!hid->phys)
+						return 0;
+					len = strlen(hid->phys) + 1;
+					if (len > _IOC_SIZE(cmd))
+						len = _IOC_SIZE(cmd);
+					return copy_to_user(user_arg, hid->phys, len) ?
+						-EFAULT : len;
+				}
+                }
+
 			ret = -ENOTTY;
 	}
 	unlock_kernel();
diff --git a/include/linux/hidraw.h b/include/linux/hidraw.h
index dbb5c8c374f0..dd8d69269176 100644
--- a/include/linux/hidraw.h
+++ b/include/linux/hidraw.h
@@ -33,6 +33,8 @@ struct hidraw_devinfo {
 #define HIDIOCGRDESCSIZE	_IOR('H', 0x01, int)
 #define HIDIOCGRDESC		_IOR('H', 0x02, struct hidraw_report_descriptor)
 #define HIDIOCGRAWINFO		_IOR('H', 0x03, struct hidraw_devinfo)
+#define HIDIOCGRAWNAME(len)     _IOC(_IOC_READ, 'H', 0x04, len)
+#define HIDIOCGRAWPHYS(len)     _IOC(_IOC_READ, 'H', 0x05, len)
 
 #define HIDRAW_FIRST_MINOR 0
 #define HIDRAW_MAX_DEVICES 64
-- 
cgit v1.2.3


From 0ed94b334265b6ee3e3336b4fedacfa9cb2ccaba Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Mon, 24 Nov 2008 16:20:07 +0100
Subject: HID: move usbhid flags to usbhid.h

Move usbhid specific flags from global hid.h into local usbhid.h.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/usbhid/usbhid.h | 10 ++++++++++
 include/linux/hid.h         |  9 ---------
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/usbhid/usbhid.h b/drivers/hid/usbhid/usbhid.h
index 332abcdf9956..9eb30564be9c 100644
--- a/drivers/hid/usbhid/usbhid.h
+++ b/drivers/hid/usbhid/usbhid.h
@@ -40,6 +40,16 @@ int usbhid_open(struct hid_device *hid);
 void usbhid_init_reports(struct hid_device *hid);
 void usbhid_submit_report(struct hid_device *hid, struct hid_report *report, unsigned char dir);
 
+/* iofl flags */
+#define HID_CTRL_RUNNING	1
+#define HID_OUT_RUNNING		2
+#define HID_IN_RUNNING		3
+#define HID_RESET_PENDING	4
+#define HID_SUSPENDED		5
+#define HID_CLEAR_HALT		6
+#define HID_DISCONNECTED	7
+#define HID_STARTED		8
+
 /*
  * USB-specific HID struct, to be pointed to
  * from struct hid_device->driver_data
diff --git a/include/linux/hid.h b/include/linux/hid.h
index e5780f8c934a..2c20f20283b2 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -403,15 +403,6 @@ struct hid_output_fifo {
 #define HID_STAT_ADDED		1
 #define HID_STAT_PARSED		2
 
-#define HID_CTRL_RUNNING	1
-#define HID_OUT_RUNNING		2
-#define HID_IN_RUNNING		3
-#define HID_RESET_PENDING	4
-#define HID_SUSPENDED		5
-#define HID_CLEAR_HALT		6
-#define HID_DISCONNECTED	7
-#define HID_STARTED		8
-
 struct hid_input {
 	struct list_head list;
 	struct hid_report *report;
-- 
cgit v1.2.3


From 3a6f82f7a22cf19687f556997c6978b31c109360 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Mon, 24 Nov 2008 16:20:09 +0100
Subject: HID: add dynids facility

Allow adding new devices to the hid drivers on the fly without
a need of kernel recompilation.

Now, one can test a driver e.g. by:
echo 0003:045E:00F0.0003 > ../generic-usb/unbind
echo 0003 045E 00F0 > new_id
from some driver subdir.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/hid.h    |   5 +++
 2 files changed, 103 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 344f8fdb2824..34cc3b0d01f6 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1304,12 +1304,92 @@ static const struct hid_device_id hid_blacklist[] = {
 	{ }
 };
 
+struct hid_dynid {
+	struct list_head list;
+	struct hid_device_id id;
+};
+
+/**
+ * store_new_id - add a new HID device ID to this driver and re-probe devices
+ * @driver: target device driver
+ * @buf: buffer for scanning device ID data
+ * @count: input size
+ *
+ * Adds a new dynamic hid device ID to this driver,
+ * and causes the driver to probe for all devices again.
+ */
+static ssize_t store_new_id(struct device_driver *drv, const char *buf,
+		size_t count)
+{
+	struct hid_driver *hdrv = container_of(drv, struct hid_driver, driver);
+	struct hid_dynid *dynid;
+	__u32 bus, vendor, product;
+	unsigned long driver_data = 0;
+	int ret;
+
+	ret = sscanf(buf, "%x %x %x %lx",
+			&bus, &vendor, &product, &driver_data);
+	if (ret < 3)
+		return -EINVAL;
+
+	dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
+	if (!dynid)
+		return -ENOMEM;
+
+	dynid->id.bus = bus;
+	dynid->id.vendor = vendor;
+	dynid->id.product = product;
+	dynid->id.driver_data = driver_data;
+
+	spin_lock(&hdrv->dyn_lock);
+	list_add_tail(&dynid->list, &hdrv->dyn_list);
+	spin_unlock(&hdrv->dyn_lock);
+
+	ret = 0;
+	if (get_driver(&hdrv->driver)) {
+		ret = driver_attach(&hdrv->driver);
+		put_driver(&hdrv->driver);
+	}
+
+	return ret ? : count;
+}
+static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id);
+
+static void hid_free_dynids(struct hid_driver *hdrv)
+{
+	struct hid_dynid *dynid, *n;
+
+	spin_lock(&hdrv->dyn_lock);
+	list_for_each_entry_safe(dynid, n, &hdrv->dyn_list, list) {
+		list_del(&dynid->list);
+		kfree(dynid);
+	}
+	spin_unlock(&hdrv->dyn_lock);
+}
+
+static const struct hid_device_id *hid_match_device(struct hid_device *hdev,
+		struct hid_driver *hdrv)
+{
+	struct hid_dynid *dynid;
+
+	spin_lock(&hdrv->dyn_lock);
+	list_for_each_entry(dynid, &hdrv->dyn_list, list) {
+		if (hid_match_one_id(hdev, &dynid->id)) {
+			spin_unlock(&hdrv->dyn_lock);
+			return &dynid->id;
+		}
+	}
+	spin_unlock(&hdrv->dyn_lock);
+
+	return hid_match_id(hdev, hdrv->id_table);
+}
+
 static int hid_bus_match(struct device *dev, struct device_driver *drv)
 {
 	struct hid_driver *hdrv = container_of(drv, struct hid_driver, driver);
 	struct hid_device *hdev = container_of(dev, struct hid_device, dev);
 
-	if (!hid_match_id(hdev, hdrv->id_table))
+	if (!hid_match_device(hdev, hdrv))
 		return 0;
 
 	/* generic wants all non-blacklisted */
@@ -1328,7 +1408,7 @@ static int hid_device_probe(struct device *dev)
 	int ret = 0;
 
 	if (!hdev->driver) {
-		id = hid_match_id(hdev, hdrv->id_table);
+		id = hid_match_device(hdev, hdrv);
 		if (id == NULL)
 			return -ENODEV;
 
@@ -1695,18 +1775,33 @@ EXPORT_SYMBOL_GPL(hid_destroy_device);
 int __hid_register_driver(struct hid_driver *hdrv, struct module *owner,
 		const char *mod_name)
 {
+	int ret;
+
 	hdrv->driver.name = hdrv->name;
 	hdrv->driver.bus = &hid_bus_type;
 	hdrv->driver.owner = owner;
 	hdrv->driver.mod_name = mod_name;
 
-	return driver_register(&hdrv->driver);
+	INIT_LIST_HEAD(&hdrv->dyn_list);
+	spin_lock_init(&hdrv->dyn_lock);
+
+	ret = driver_register(&hdrv->driver);
+	if (ret)
+		return ret;
+
+	ret = driver_create_file(&hdrv->driver, &driver_attr_new_id);
+	if (ret)
+		driver_unregister(&hdrv->driver);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(__hid_register_driver);
 
 void hid_unregister_driver(struct hid_driver *hdrv)
 {
+	driver_remove_file(&hdrv->driver, &driver_attr_new_id);
 	driver_unregister(&hdrv->driver);
+	hid_free_dynids(hdrv);
 }
 EXPORT_SYMBOL_GPL(hid_unregister_driver);
 
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 2c20f20283b2..215035bbb288 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -531,6 +531,8 @@ struct hid_usage_id {
  * @name: driver name (e.g. "Footech_bar-wheel")
  * @id_table: which devices is this driver for (must be non-NULL for probe
  * 	      to be called)
+ * @dyn_list: list of dynamically added device ids
+ * @dyn_lock: lock protecting @dyn_list
  * @probe: new device inserted
  * @remove: device removed (NULL if not a hot-plug capable driver)
  * @report_table: on which reports to call raw_event (NULL means all)
@@ -558,6 +560,9 @@ struct hid_driver {
 	char *name;
 	const struct hid_device_id *id_table;
 
+	struct list_head dyn_list;
+	spinlock_t dyn_lock;
+
 	int (*probe)(struct hid_device *dev, const struct hid_device_id *id);
 	void (*remove)(struct hid_device *dev);
 
-- 
cgit v1.2.3


From 725cf0f47dbb02e0482f081828cff73f55479b79 Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Tue, 16 Dec 2008 14:20:23 +0100
Subject: HID: avoid sparse warning in HID_COMPAT_LOAD_DRIVER

Impact: include a prototype for the exported function in the macro

Fix about 20 of this warnings:

  drivers/hid/hid-a4tech.c:162:1: warning: symbol 'hid_compat_a4tech' was not declared. Should it be static?

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/hid.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 215035bbb288..81aa84d60c6b 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -793,6 +793,8 @@ dbg_hid(const char *fmt, ...)
 
 #ifdef CONFIG_HID_COMPAT
 #define HID_COMPAT_LOAD_DRIVER(name)	\
+/* prototype to avoid sparse warning */	\
+extern void hid_compat_##name(void);	\
 void hid_compat_##name(void) { }	\
 EXPORT_SYMBOL(hid_compat_##name)
 #else
-- 
cgit v1.2.3


From c66b9906f863696159e05890bb7123269bb9a9de Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 4 Jan 2009 10:55:02 +0100
Subject: intel-iommu: fix build error with INTR_REMAP=y and DMAR=n

dmar.o can be built in the CONFIG_INTR_REMAP=y case but
iommu_calculate_agaw() is only available if VT-d is built as well.

So create an inline version of iommu_calculate_agaw() for the
!CONFIG_DMAR case. The iommu->agaw value wont be used in this
case, but the code is cleaner (has less #ifdefs) if we have it around
unconditionally.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/dma_remapping.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
index 136f170cecc2..af1dab41674b 100644
--- a/include/linux/dma_remapping.h
+++ b/include/linux/dma_remapping.h
@@ -17,7 +17,15 @@ struct dmar_domain;
 struct root_entry;
 
 extern void free_dmar_iommu(struct intel_iommu *iommu);
+
+#ifdef CONFIG_DMAR
 extern int iommu_calculate_agaw(struct intel_iommu *iommu);
+#else
+static inline int iommu_calculate_agaw(struct intel_iommu *iommu)
+{
+	return 0;
+}
+#endif
 
 extern int dmar_disabled;
 
-- 
cgit v1.2.3


From 0c8a601678960fbcc1c1185a283d6d107575810b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 8 Nov 2008 01:10:16 +0100
Subject: mfd: Add WM8350 revision H support

No other software changes are required.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/wm8350-core.c       | 7 ++++++-
 include/linux/mfd/wm8350/core.h | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index d63a530c4fe8..c013afde260d 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1202,9 +1202,14 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 			dev_info(wm8350->dev, "Found Rev G device\n");
 			wm8350->rev = WM8350_REV_G;
 			break;
+		case WM8350_REV_H:
+			dev_info(wm8350->dev, "Found Rev H device\n");
+			wm8350->rev = WM8350_REV_H;
+			break;
 		default:
 			/* For safety we refuse to run on unknown hardware */
-			dev_info(wm8350->dev, "Found unknown rev\n");
+			dev_info(wm8350->dev, "Found unknown rev %x\n",
+				 (id2 & WM8350_CHIP_REV_MASK) >> 12);
 			ret = -ENODEV;
 			goto err;
 		}
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 6ebf97f2a475..9490ec175d5a 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -536,6 +536,7 @@
 #define WM8350_REV_E				0x4
 #define WM8350_REV_F				0x5
 #define WM8350_REV_G				0x6
+#define WM8350_REV_H				0x7
 
 #define WM8350_NUM_IRQ				63
 
-- 
cgit v1.2.3


From 67488526349d043372d141c054f4dc6313780b3c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 8 Nov 2008 01:10:21 +0100
Subject: mfd: Add AUXADC support for WM8350

The auxiliary ADC in the WM8350 is shared between several subdevices
so access to it needs to be arbitrated by the core driver.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/wm8350-core.c             | 51 ++++++++++++++++++++++++++++++++++-
 include/linux/mfd/wm8350/comparator.h |  8 ++++++
 include/linux/mfd/wm8350/core.h       |  2 ++
 3 files changed, 60 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index c013afde260d..60439bd3984d 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -63,7 +63,6 @@
  */
 static DEFINE_MUTEX(io_mutex);
 static DEFINE_MUTEX(reg_lock_mutex);
-static DEFINE_MUTEX(auxadc_mutex);
 
 /* Perform a physical read from the device.
  */
@@ -1082,6 +1081,55 @@ int wm8350_unmask_irq(struct wm8350 *wm8350, int irq)
 }
 EXPORT_SYMBOL_GPL(wm8350_unmask_irq);
 
+int wm8350_read_auxadc(struct wm8350 *wm8350, int channel, int scale, int vref)
+{
+	u16 reg, result = 0;
+	int tries = 5;
+
+	if (channel < WM8350_AUXADC_AUX1 || channel > WM8350_AUXADC_TEMP)
+		return -EINVAL;
+	if (channel >= WM8350_AUXADC_USB && channel <= WM8350_AUXADC_TEMP
+	    && (scale != 0 || vref != 0))
+		return -EINVAL;
+
+	mutex_lock(&wm8350->auxadc_mutex);
+
+	/* Turn on the ADC */
+	reg = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_5);
+	wm8350_reg_write(wm8350, WM8350_POWER_MGMT_5, reg | WM8350_AUXADC_ENA);
+
+	if (scale || vref) {
+		reg = scale << 13;
+		reg |= vref << 12;
+		wm8350_reg_write(wm8350, WM8350_AUX1_READBACK + channel, reg);
+	}
+
+	reg = wm8350_reg_read(wm8350, WM8350_DIGITISER_CONTROL_1);
+	reg |= 1 << channel | WM8350_AUXADC_POLL;
+	wm8350_reg_write(wm8350, WM8350_DIGITISER_CONTROL_1, reg);
+
+	do {
+		schedule_timeout_interruptible(1);
+		reg = wm8350_reg_read(wm8350, WM8350_DIGITISER_CONTROL_1);
+	} while (tries-- && (reg & WM8350_AUXADC_POLL));
+
+	if (!tries)
+		dev_err(wm8350->dev, "adc chn %d read timeout\n", channel);
+	else
+		result = wm8350_reg_read(wm8350,
+					 WM8350_AUX1_READBACK + channel);
+
+	/* Turn off the ADC */
+	reg = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_5);
+	wm8350_reg_write(wm8350, WM8350_POWER_MGMT_5,
+			 reg & ~WM8350_AUXADC_ENA);
+
+	mutex_unlock(&wm8350->auxadc_mutex);
+
+	return result & WM8350_AUXADC_DATA1_MASK;
+}
+EXPORT_SYMBOL_GPL(wm8350_read_auxadc);
+
 /*
  * Cache is always host endian.
  */
@@ -1239,6 +1287,7 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 		}
 	}
 
+	mutex_init(&wm8350->auxadc_mutex);
 	mutex_init(&wm8350->irq_mutex);
 	INIT_WORK(&wm8350->irq_work, wm8350_irq_worker);
 	if (irq) {
diff --git a/include/linux/mfd/wm8350/comparator.h b/include/linux/mfd/wm8350/comparator.h
index 053788649452..54bc5d0fd502 100644
--- a/include/linux/mfd/wm8350/comparator.h
+++ b/include/linux/mfd/wm8350/comparator.h
@@ -164,4 +164,12 @@
 #define WM8350_AUXADC_BATT			6
 #define WM8350_AUXADC_TEMP			7
 
+struct wm8350;
+
+/*
+ * AUX ADC Readback
+ */
+int wm8350_read_auxadc(struct wm8350 *wm8350, int channel, int scale,
+		       int vref);
+
 #endif
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 9490ec175d5a..cc190055b9c4 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -573,6 +573,8 @@ struct wm8350 {
 			 void *src);
 	u16 *reg_cache;
 
+	struct mutex auxadc_mutex;
+
 	/* Interrupt handling */
 	struct work_struct irq_work;
 	struct mutex irq_mutex; /* IRQ table mutex */
-- 
cgit v1.2.3


From 3fba19ec1ae5b460c73a7f32efed8d3b3300b246 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Sat, 8 Nov 2008 01:13:16 +0100
Subject: mfd: allow reading entire register banks on twl4030

Minor change to the TWL4030 utility interface:  support reads
of all 256 bytes in each register bank (vs just 255).  This
can help when debugging, but is otherwise a NOP.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/twl4030-core.c  | 4 ++--
 include/linux/i2c/twl4030.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index dd843c4fbcc7..ef9a971e3ead 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -225,7 +225,7 @@ static struct twl4030mapping twl4030_map[TWL4030_MODULE_LAST + 1] = {
  *
  * Returns the result of operation - 0 is success
  */
-int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, u8 num_bytes)
+int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 {
 	int ret;
 	int sid;
@@ -274,7 +274,7 @@ EXPORT_SYMBOL(twl4030_i2c_write);
  *
  * Returns result of operation - num_bytes is success else failure.
  */
-int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, u8 num_bytes)
+int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 {
 	int ret;
 	u8 val;
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index fb604dcd38f1..ae25c907b7cf 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -78,8 +78,8 @@ int twl4030_i2c_read_u8(u8 mod_no, u8 *val, u8 reg);
  * IMPORTANT:  For twl4030_i2c_write(), allocate num_bytes + 1
  * for the value, and populate your data starting at offset 1.
  */
-int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, u8 num_bytes);
-int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, u8 num_bytes);
+int twl4030_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
+int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 
 /*----------------------------------------------------------------------*/
 
-- 
cgit v1.2.3


From 14431aa0c5a443d13d24e6f865a8838f97dab973 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 16 Nov 2008 20:16:47 +0100
Subject: power_supply: Add support for WM8350 PMU

This patch adds support for the PMU provided by the WM8350 which
implements battery, line and USB supplies including a battery charger.
The hardware functions largely autonomously, with minimal software
control required to initiate fast charging.

Support for configuration of the USB supply is not yet implemented.
This means that the hardware will remain in the mode configured at
startup, by default limiting the current drawn from USB to 100mA.

This driver was originally written by Liam Girdwood with subsequent
updates for submission by Mark Brown.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Anton Vorontsov <cbouatmailru@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/power/Kconfig             |   7 +
 drivers/power/Makefile            |   1 +
 drivers/power/wm8350_power.c      | 515 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm8350/core.h   |  26 ++
 include/linux/mfd/wm8350/supply.h |  23 +-
 5 files changed, 571 insertions(+), 1 deletion(-)
 create mode 100644 drivers/power/wm8350_power.c

(limited to 'include/linux')

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 8e0c2b47803c..52f86767f72e 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -29,6 +29,13 @@ config APM_POWER
 	  Say Y here to enable support APM status emulation using
 	  battery class devices.
 
+config WM8350_POWER
+        tristate "WM8350 PMU support"
+        depends on MFD_WM8350
+        help
+          Say Y here to enable support for the power management unit
+	  provided by the Wolfson Microelectronics WM8350 PMIC.
+
 config BATTERY_DS2760
 	tristate "DS2760 battery driver (HP iPAQ & others)"
 	select W1
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index e8f1ecec5d8f..e6f68655d9e8 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_POWER_SUPPLY)	+= power_supply.o
 
 obj-$(CONFIG_PDA_POWER)		+= pda_power.o
 obj-$(CONFIG_APM_POWER)		+= apm_power.o
+obj-$(CONFIG_WM8350_POWER)	+= wm8350_power.o
 
 obj-$(CONFIG_BATTERY_DS2760)	+= ds2760_battery.o
 obj-$(CONFIG_BATTERY_PMU)	+= pmu_battery.o
diff --git a/drivers/power/wm8350_power.c b/drivers/power/wm8350_power.c
new file mode 100644
index 000000000000..9c0a847a1139
--- /dev/null
+++ b/drivers/power/wm8350_power.c
@@ -0,0 +1,515 @@
+/*
+ * Battery driver for wm8350 PMIC
+ *
+ * Copyright 2007, 2008 Wolfson Microelectronics PLC.
+ *
+ * Based on OLPC Battery Driver
+ *
+ * Copyright 2006  David Woodhouse <dwmw2@infradead.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/mfd/wm8350/supply.h>
+#include <linux/mfd/wm8350/core.h>
+#include <linux/mfd/wm8350/comparator.h>
+
+static int wm8350_read_battery_uvolts(struct wm8350 *wm8350)
+{
+	return wm8350_read_auxadc(wm8350, WM8350_AUXADC_BATT, 0, 0)
+		* WM8350_AUX_COEFF;
+}
+
+static int wm8350_read_line_uvolts(struct wm8350 *wm8350)
+{
+	return wm8350_read_auxadc(wm8350, WM8350_AUXADC_LINE, 0, 0)
+		* WM8350_AUX_COEFF;
+}
+
+static int wm8350_read_usb_uvolts(struct wm8350 *wm8350)
+{
+	return wm8350_read_auxadc(wm8350, WM8350_AUXADC_USB, 0, 0)
+		* WM8350_AUX_COEFF;
+}
+
+#define WM8350_BATT_SUPPLY	1
+#define WM8350_USB_SUPPLY	2
+#define WM8350_LINE_SUPPLY	4
+
+static inline int wm8350_charge_time_min(struct wm8350 *wm8350, int min)
+{
+	if (wm8350->rev < WM8350_REV_G)
+		return (((min - 30) / 15) & 0xf) << 8;
+	else
+		return (((min - 30) / 30) & 0xf) << 8;
+}
+
+static int wm8350_get_supplies(struct wm8350 *wm8350)
+{
+	u16 sm, ov, co, chrg;
+	int supplies = 0;
+
+	sm = wm8350_reg_read(wm8350, WM8350_STATE_MACHINE_STATUS);
+	ov = wm8350_reg_read(wm8350, WM8350_MISC_OVERRIDES);
+	co = wm8350_reg_read(wm8350, WM8350_COMPARATOR_OVERRIDES);
+	chrg = wm8350_reg_read(wm8350, WM8350_BATTERY_CHARGER_CONTROL_2);
+
+	/* USB_SM */
+	sm = (sm & WM8350_USB_SM_MASK) >> WM8350_USB_SM_SHIFT;
+
+	/* CHG_ISEL */
+	chrg &= WM8350_CHG_ISEL_MASK;
+
+	/* If the USB state machine is active then we're using that with or
+	 * without battery, otherwise check for wall supply */
+	if (((sm == WM8350_USB_SM_100_SLV) ||
+	     (sm == WM8350_USB_SM_500_SLV) ||
+	     (sm == WM8350_USB_SM_STDBY_SLV))
+	    && !(ov & WM8350_USB_LIMIT_OVRDE))
+		supplies = WM8350_USB_SUPPLY;
+	else if (((sm == WM8350_USB_SM_100_SLV) ||
+		  (sm == WM8350_USB_SM_500_SLV) ||
+		  (sm == WM8350_USB_SM_STDBY_SLV))
+		 && (ov & WM8350_USB_LIMIT_OVRDE) && (chrg == 0))
+		supplies = WM8350_USB_SUPPLY | WM8350_BATT_SUPPLY;
+	else if (co & WM8350_WALL_FB_OVRDE)
+		supplies = WM8350_LINE_SUPPLY;
+	else
+		supplies = WM8350_BATT_SUPPLY;
+
+	return supplies;
+}
+
+static int wm8350_charger_config(struct wm8350 *wm8350,
+				 struct wm8350_charger_policy *policy)
+{
+	u16 reg, eoc_mA, fast_limit_mA;
+
+	if (!policy) {
+		dev_warn(wm8350->dev,
+			 "No charger policy, charger not configured.\n");
+		return -EINVAL;
+	}
+
+	/* make sure USB fast charge current is not > 500mA */
+	if (policy->fast_limit_USB_mA > 500) {
+		dev_err(wm8350->dev, "USB fast charge > 500mA\n");
+		return -EINVAL;
+	}
+
+	eoc_mA = WM8350_CHG_EOC_mA(policy->eoc_mA);
+
+	wm8350_reg_unlock(wm8350);
+
+	reg = wm8350_reg_read(wm8350, WM8350_BATTERY_CHARGER_CONTROL_1)
+		& WM8350_CHG_ENA_R168;
+	wm8350_reg_write(wm8350, WM8350_BATTERY_CHARGER_CONTROL_1,
+			 reg | eoc_mA | policy->trickle_start_mV |
+			 WM8350_CHG_TRICKLE_TEMP_CHOKE |
+			 WM8350_CHG_TRICKLE_USB_CHOKE |
+			 WM8350_CHG_FAST_USB_THROTTLE);
+
+	if (wm8350_get_supplies(wm8350) & WM8350_USB_SUPPLY) {
+		fast_limit_mA =
+			WM8350_CHG_FAST_LIMIT_mA(policy->fast_limit_USB_mA);
+		wm8350_reg_write(wm8350, WM8350_BATTERY_CHARGER_CONTROL_2,
+			    policy->charge_mV | policy->trickle_charge_USB_mA |
+			    fast_limit_mA | wm8350_charge_time_min(wm8350,
+						policy->charge_timeout));
+
+	} else {
+		fast_limit_mA =
+			WM8350_CHG_FAST_LIMIT_mA(policy->fast_limit_mA);
+		wm8350_reg_write(wm8350, WM8350_BATTERY_CHARGER_CONTROL_2,
+			    policy->charge_mV | policy->trickle_charge_mA |
+			    fast_limit_mA | wm8350_charge_time_min(wm8350,
+						policy->charge_timeout));
+	}
+
+	wm8350_reg_lock(wm8350);
+	return 0;
+}
+
+static int wm8350_batt_status(struct wm8350 *wm8350)
+{
+	u16 state;
+
+	state = wm8350_reg_read(wm8350, WM8350_BATTERY_CHARGER_CONTROL_2);
+	state &= WM8350_CHG_STS_MASK;
+
+	switch (state) {
+	case WM8350_CHG_STS_OFF:
+		return POWER_SUPPLY_STATUS_DISCHARGING;
+
+	case WM8350_CHG_STS_TRICKLE:
+	case WM8350_CHG_STS_FAST:
+		return POWER_SUPPLY_STATUS_CHARGING;
+
+	default:
+		return POWER_SUPPLY_STATUS_UNKNOWN;
+	}
+}
+
+static ssize_t charger_state_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct wm8350 *wm8350 = dev_get_drvdata(dev);
+	char *charge;
+	int state;
+
+	state = wm8350_reg_read(wm8350, WM8350_BATTERY_CHARGER_CONTROL_2) &
+	    WM8350_CHG_STS_MASK;
+	switch (state) {
+	case WM8350_CHG_STS_OFF:
+		charge = "Charger Off";
+		break;
+	case WM8350_CHG_STS_TRICKLE:
+		charge = "Trickle Charging";
+		break;
+	case WM8350_CHG_STS_FAST:
+		charge = "Fast Charging";
+		break;
+	default:
+		return 0;
+	}
+
+	return sprintf(buf, "%s\n", charge);
+}
+
+static DEVICE_ATTR(charger_state, 0444, charger_state_show, NULL);
+
+static void wm8350_charger_handler(struct wm8350 *wm8350, int irq, void *data)
+{
+	struct wm8350_power *power = &wm8350->power;
+	struct wm8350_charger_policy *policy = power->policy;
+
+	switch (irq) {
+	case WM8350_IRQ_CHG_BAT_HOT:
+		dev_err(wm8350->dev, "battery too hot\n");
+		break;
+	case WM8350_IRQ_CHG_BAT_COLD:
+		dev_err(wm8350->dev, "battery too cold\n");
+		break;
+	case WM8350_IRQ_CHG_BAT_FAIL:
+		dev_err(wm8350->dev, "battery failed\n");
+		break;
+	case WM8350_IRQ_CHG_TO:
+		dev_err(wm8350->dev, "charger timeout\n");
+		break;
+	case WM8350_IRQ_CHG_END:
+		power_supply_changed(&power->battery);
+		break;
+	case WM8350_IRQ_CHG_START:
+		power_supply_changed(&power->battery);
+		break;
+
+	case WM8350_IRQ_CHG_FAST_RDY:
+		dev_dbg(wm8350->dev, "fast charger ready\n");
+		wm8350_charger_config(wm8350, policy);
+		wm8350_reg_unlock(wm8350);
+		wm8350_set_bits(wm8350, WM8350_BATTERY_CHARGER_CONTROL_1,
+				WM8350_CHG_FAST);
+		wm8350_reg_lock(wm8350);
+		break;
+
+	case WM8350_IRQ_CHG_VBATT_LT_3P9:
+		dev_warn(wm8350->dev, "battery < 3.9V\n");
+		break;
+	case WM8350_IRQ_CHG_VBATT_LT_3P1:
+		dev_warn(wm8350->dev, "battery < 3.1V\n");
+		break;
+	case WM8350_IRQ_CHG_VBATT_LT_2P85:
+		dev_warn(wm8350->dev, "battery < 2.85V\n");
+		break;
+
+		/* Supply change.  We will overnotify but it should do
+		 * no harm. */
+	case WM8350_IRQ_EXT_USB_FB:
+	case WM8350_IRQ_EXT_WALL_FB:
+		wm8350_charger_config(wm8350, policy);
+	case WM8350_IRQ_EXT_BAT_FB:   /* Fall through */
+		power_supply_changed(&power->battery);
+		power_supply_changed(&power->usb);
+		power_supply_changed(&power->ac);
+		break;
+
+	default:
+		dev_err(wm8350->dev, "Unknown interrupt %d\n", irq);
+	}
+}
+
+/*********************************************************************
+ *		AC Power
+ *********************************************************************/
+static int wm8350_ac_get_prop(struct power_supply *psy,
+			      enum power_supply_property psp,
+			      union power_supply_propval *val)
+{
+	struct wm8350 *wm8350 = dev_get_drvdata(psy->dev->parent);
+	int ret = 0;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = !!(wm8350_get_supplies(wm8350) &
+				 WM8350_LINE_SUPPLY);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		val->intval = wm8350_read_line_uvolts(wm8350);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+static enum power_supply_property wm8350_ac_props[] = {
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+};
+
+/*********************************************************************
+ *		USB Power
+ *********************************************************************/
+static int wm8350_usb_get_prop(struct power_supply *psy,
+			       enum power_supply_property psp,
+			       union power_supply_propval *val)
+{
+	struct wm8350 *wm8350 = dev_get_drvdata(psy->dev->parent);
+	int ret = 0;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = !!(wm8350_get_supplies(wm8350) &
+				 WM8350_USB_SUPPLY);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		val->intval = wm8350_read_usb_uvolts(wm8350);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+static enum power_supply_property wm8350_usb_props[] = {
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+};
+
+/*********************************************************************
+ *		Battery properties
+ *********************************************************************/
+
+static int wm8350_bat_get_property(struct power_supply *psy,
+				   enum power_supply_property psp,
+				   union power_supply_propval *val)
+{
+	struct wm8350 *wm8350 = dev_get_drvdata(psy->dev->parent);
+	int ret = 0;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		val->intval = wm8350_batt_status(wm8350);
+		break;
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = !!(wm8350_get_supplies(wm8350) &
+				 WM8350_BATT_SUPPLY);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		val->intval = wm8350_read_battery_uvolts(wm8350);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static enum power_supply_property wm8350_bat_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+};
+
+/*********************************************************************
+ *		Initialisation
+ *********************************************************************/
+
+static void wm8350_init_charger(struct wm8350 *wm8350)
+{
+	/* register our interest in charger events */
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_TO,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_TO);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_END,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_END);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_START,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_START);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_FAST_RDY);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85);
+
+	/* and supply change events */
+	wm8350_register_irq(wm8350, WM8350_IRQ_EXT_USB_FB,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_EXT_USB_FB);
+	wm8350_register_irq(wm8350, WM8350_IRQ_EXT_WALL_FB,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_EXT_WALL_FB);
+	wm8350_register_irq(wm8350, WM8350_IRQ_EXT_BAT_FB,
+			    wm8350_charger_handler, NULL);
+	wm8350_unmask_irq(wm8350, WM8350_IRQ_EXT_BAT_FB);
+}
+
+static void free_charger_irq(struct wm8350 *wm8350)
+{
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_HOT);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_COLD);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_BAT_FAIL);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_TO);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_TO);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_END);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_END);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_START);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_START);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P9);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_3P1);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CHG_VBATT_LT_2P85);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_EXT_USB_FB);
+	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_USB_FB);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_EXT_WALL_FB);
+	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_WALL_FB);
+	wm8350_mask_irq(wm8350, WM8350_IRQ_EXT_BAT_FB);
+	wm8350_free_irq(wm8350, WM8350_IRQ_EXT_BAT_FB);
+}
+
+static __devinit int wm8350_power_probe(struct platform_device *pdev)
+{
+	struct wm8350 *wm8350 = platform_get_drvdata(pdev);
+	struct wm8350_power *power = &wm8350->power;
+	struct wm8350_charger_policy *policy = power->policy;
+	struct power_supply *usb = &power->usb;
+	struct power_supply *battery = &power->battery;
+	struct power_supply *ac = &power->ac;
+	int ret;
+
+	ac->name = "wm8350-ac";
+	ac->type = POWER_SUPPLY_TYPE_MAINS;
+	ac->properties = wm8350_ac_props;
+	ac->num_properties = ARRAY_SIZE(wm8350_ac_props);
+	ac->get_property = wm8350_ac_get_prop;
+	ret = power_supply_register(&pdev->dev, ac);
+	if (ret)
+		return ret;
+
+	battery->name = "wm8350-battery";
+	battery->properties = wm8350_bat_props;
+	battery->num_properties = ARRAY_SIZE(wm8350_bat_props);
+	battery->get_property = wm8350_bat_get_property;
+	battery->use_for_apm = 1;
+	ret = power_supply_register(&pdev->dev, battery);
+	if (ret)
+		goto battery_failed;
+
+	usb->name = "wm8350-usb",
+	usb->type = POWER_SUPPLY_TYPE_USB;
+	usb->properties = wm8350_usb_props;
+	usb->num_properties = ARRAY_SIZE(wm8350_usb_props);
+	usb->get_property = wm8350_usb_get_prop;
+	ret = power_supply_register(&pdev->dev, usb);
+	if (ret)
+		goto usb_failed;
+
+	ret = device_create_file(&pdev->dev, &dev_attr_charger_state);
+	if (ret < 0)
+		dev_warn(wm8350->dev, "failed to add charge sysfs: %d\n", ret);
+	ret = 0;
+
+	wm8350_init_charger(wm8350);
+	if (wm8350_charger_config(wm8350, policy) == 0) {
+		wm8350_reg_unlock(wm8350);
+		wm8350_set_bits(wm8350, WM8350_POWER_MGMT_5, WM8350_CHG_ENA);
+		wm8350_reg_lock(wm8350);
+	}
+
+	return ret;
+
+usb_failed:
+	power_supply_unregister(battery);
+battery_failed:
+	power_supply_unregister(ac);
+
+	return ret;
+}
+
+static __devexit int wm8350_power_remove(struct platform_device *pdev)
+{
+	struct wm8350 *wm8350 = platform_get_drvdata(pdev);
+	struct wm8350_power *power = &wm8350->power;
+
+	free_charger_irq(wm8350);
+	device_remove_file(&pdev->dev, &dev_attr_charger_state);
+	power_supply_unregister(&power->battery);
+	power_supply_unregister(&power->ac);
+	power_supply_unregister(&power->usb);
+	return 0;
+}
+
+static struct platform_driver wm8350_power_driver = {
+	.probe = wm8350_power_probe,
+	.remove = __devexit_p(wm8350_power_remove),
+	.driver = {
+		.name = "wm8350-power",
+	},
+};
+
+static int __init wm8350_power_init(void)
+{
+	return platform_driver_register(&wm8350_power_driver);
+}
+module_init(wm8350_power_init);
+
+static void __exit wm8350_power_exit(void)
+{
+	platform_driver_unregister(&wm8350_power_driver);
+}
+module_exit(wm8350_power_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Power supply driver for WM8350");
+MODULE_ALIAS("platform:wm8350-power");
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index cc190055b9c4..d2614dfc9397 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -57,6 +57,9 @@
 #define WM8350_OVER_CURRENT_INT_STATUS_MASK     0x25
 #define WM8350_GPIO_INT_STATUS_MASK             0x26
 #define WM8350_COMPARATOR_INT_STATUS_MASK       0x27
+#define WM8350_MISC_OVERRIDES			0xE3
+#define WM8350_COMPARATOR_OVERRIDES		0xE7
+#define WM8350_STATE_MACHINE_STATUS		0xE9
 
 #define WM8350_MAX_REGISTER                     0xFF
 
@@ -523,6 +526,29 @@
 #define WM8350_DC2_STS                          0x0002
 #define WM8350_DC1_STS                          0x0001
 
+/*
+ * R227 (0xE3) - Misc Overrides
+ */
+#define WM8350_USB_LIMIT_OVRDE			0x0400
+
+/*
+ * R227 (0xE7) - Comparator Overrides
+ */
+#define WM8350_USB_FB_OVRDE			0x8000
+#define WM8350_WALL_FB_OVRDE			0x4000
+#define WM8350_BATT_FB_OVRDE			0x2000
+
+
+/*
+ * R233 (0xE9) - State Machinine Status
+ */
+#define WM8350_USB_SM_MASK			0x0700
+#define WM8350_USB_SM_SHIFT			8
+
+#define WM8350_USB_SM_100_SLV   1
+#define WM8350_USB_SM_500_SLV   5
+#define WM8350_USB_SM_STDBY_SLV 7
+
 /* WM8350 wake up conditions */
 #define WM8350_IRQ_WKUP_OFF_STATE		43
 #define WM8350_IRQ_WKUP_HIB_STATE		44
diff --git a/include/linux/mfd/wm8350/supply.h b/include/linux/mfd/wm8350/supply.h
index 1c8f3cde79b0..79721513fa9f 100644
--- a/include/linux/mfd/wm8350/supply.h
+++ b/include/linux/mfd/wm8350/supply.h
@@ -13,7 +13,8 @@
 #ifndef __LINUX_MFD_WM8350_SUPPLY_H_
 #define __LINUX_MFD_WM8350_SUPPLY_H_
 
-#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/power_supply.h>
 
 /*
  * Charger registers
@@ -104,8 +105,28 @@
 #define WM8350_IRQ_EXT_WALL_FB			37
 #define WM8350_IRQ_EXT_BAT_FB			38
 
+/*
+ * Policy to control charger state machine.
+ */
+struct wm8350_charger_policy {
+
+	/* charger state machine policy  - set in machine driver */
+	int eoc_mA;		/* end of charge current (mA)  */
+	int charge_mV;		/* charge voltage */
+	int fast_limit_mA;	/* fast charge current limit */
+	int fast_limit_USB_mA;	/* USB fast charge current limit */
+	int charge_timeout;	/* charge timeout (mins) */
+	int trickle_start_mV;	/* trickle charge starts at mV */
+	int trickle_charge_mA;	/* trickle charge current */
+	int trickle_charge_USB_mA;	/* USB trickle charge current */
+};
+
 struct wm8350_power {
 	struct platform_device *pdev;
+	struct power_supply battery;
+	struct power_supply usb;
+	struct power_supply ac;
+	struct wm8350_charger_policy *policy;
 };
 
 #endif
-- 
cgit v1.2.3


From d756f4a4446227ca9626087939a6769ca55ab036 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 24 Nov 2008 20:20:30 +0100
Subject: mfd: Switch WM8350 revision detection to a feature based model

Rather than check for chip revisions in the WM8350 drivers have the core
code set flags for relevant differences.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/wm8350-core.c         | 6 ++----
 drivers/power/wm8350_power.c      | 2 +-
 include/linux/mfd/wm8350/core.h   | 2 --
 include/linux/mfd/wm8350/supply.h | 2 ++
 4 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 60439bd3984d..764bf15ea684 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1240,19 +1240,17 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 		switch ((id2 & WM8350_CHIP_REV_MASK) >> 12) {
 		case WM8350_REV_E:
 			dev_info(wm8350->dev, "Found Rev E device\n");
-			wm8350->rev = WM8350_REV_E;
 			break;
 		case WM8350_REV_F:
 			dev_info(wm8350->dev, "Found Rev F device\n");
-			wm8350->rev = WM8350_REV_F;
 			break;
 		case WM8350_REV_G:
 			dev_info(wm8350->dev, "Found Rev G device\n");
-			wm8350->rev = WM8350_REV_G;
+			wm8350->power.rev_g_coeff = 1;
 			break;
 		case WM8350_REV_H:
 			dev_info(wm8350->dev, "Found Rev H device\n");
-			wm8350->rev = WM8350_REV_H;
+			wm8350->power.rev_g_coeff = 1;
 			break;
 		default:
 			/* For safety we refuse to run on unknown hardware */
diff --git a/drivers/power/wm8350_power.c b/drivers/power/wm8350_power.c
index 9c0a847a1139..74e7593beffb 100644
--- a/drivers/power/wm8350_power.c
+++ b/drivers/power/wm8350_power.c
@@ -44,7 +44,7 @@ static int wm8350_read_usb_uvolts(struct wm8350 *wm8350)
 
 static inline int wm8350_charge_time_min(struct wm8350 *wm8350, int min)
 {
-	if (wm8350->rev < WM8350_REV_G)
+	if (!wm8350->power.rev_g_coeff)
 		return (((min - 30) / 15) & 0xf) << 8;
 	else
 		return (((min - 30) / 30) & 0xf) << 8;
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index d2614dfc9397..3c9735663f36 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -585,8 +585,6 @@ struct wm8350_irq {
 };
 
 struct wm8350 {
-	int rev;		/* chip revision */
-
 	struct device *dev;
 
 	/* device IO */
diff --git a/include/linux/mfd/wm8350/supply.h b/include/linux/mfd/wm8350/supply.h
index 79721513fa9f..2b9479310bbd 100644
--- a/include/linux/mfd/wm8350/supply.h
+++ b/include/linux/mfd/wm8350/supply.h
@@ -127,6 +127,8 @@ struct wm8350_power {
 	struct power_supply usb;
 	struct power_supply ac;
 	struct wm8350_charger_policy *policy;
+
+	int rev_g_coeff;
 };
 
 #endif
-- 
cgit v1.2.3


From b797a5551979da22b0a35632198ffc8a330d9537 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 24 Nov 2008 20:22:58 +0100
Subject: mfd: Refactor WM8350 chip identification

Since the WM8350 driver was originally written the semantics for the
identification registers of the chip have been clarified, allowing
us to do an exact match on all the fields. This avoids mistakenly
running on unsupported hardware.

Also change to using the datasheet names more consistently for
legibility and fix a printk() that should be dev_err().

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/wm8350-core.c       | 54 ++++++++++++++++++++++++++++-------------
 include/linux/mfd/wm8350/core.h |  6 +++++
 2 files changed, 43 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 764bf15ea684..2188d759cbde 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1227,52 +1227,72 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 		       struct wm8350_platform_data *pdata)
 {
 	int ret = -EINVAL;
-	u16 id1, id2, mask, mode;
+	u16 id1, id2, mask_rev;
+	u16 cust_id, mode, chip_rev;
 
 	/* get WM8350 revision and config mode */
 	wm8350->read_dev(wm8350, WM8350_RESET_ID, sizeof(id1), &id1);
 	wm8350->read_dev(wm8350, WM8350_ID, sizeof(id2), &id2);
+	wm8350->read_dev(wm8350, WM8350_REVISION, sizeof(mask_rev), &mask_rev);
 
 	id1 = be16_to_cpu(id1);
 	id2 = be16_to_cpu(id2);
+	mask_rev = be16_to_cpu(mask_rev);
 
-	if (id1 == 0x6143) {
-		switch ((id2 & WM8350_CHIP_REV_MASK) >> 12) {
+	if (id1 != 0x6143) {
+		dev_err(wm8350->dev,
+			"Device with ID %x is not a WM8350\n", id1);
+		ret = -ENODEV;
+		goto err;
+	}
+
+	mode = id2 & WM8350_CONF_STS_MASK >> 10;
+	cust_id = id2 & WM8350_CUST_ID_MASK;
+	chip_rev = (id2 & WM8350_CHIP_REV_MASK) >> 12;
+	dev_info(wm8350->dev,
+		 "CONF_STS %d, CUST_ID %d, MASK_REV %d, CHIP_REV %d\n",
+		 mode, cust_id, mask_rev, chip_rev);
+
+	if (cust_id != 0) {
+		dev_err(wm8350->dev, "Unsupported CUST_ID\n");
+		ret = -ENODEV;
+		goto err;
+	}
+
+	switch (mask_rev) {
+	case 0:
+		switch (chip_rev) {
 		case WM8350_REV_E:
-			dev_info(wm8350->dev, "Found Rev E device\n");
+			dev_info(wm8350->dev, "WM8350 Rev E\n");
 			break;
 		case WM8350_REV_F:
-			dev_info(wm8350->dev, "Found Rev F device\n");
+			dev_info(wm8350->dev, "WM8350 Rev F\n");
 			break;
 		case WM8350_REV_G:
-			dev_info(wm8350->dev, "Found Rev G device\n");
+			dev_info(wm8350->dev, "WM8350 Rev G\n");
 			wm8350->power.rev_g_coeff = 1;
 			break;
 		case WM8350_REV_H:
-			dev_info(wm8350->dev, "Found Rev H device\n");
+			dev_info(wm8350->dev, "WM8350 Rev H\n");
 			wm8350->power.rev_g_coeff = 1;
 			break;
 		default:
 			/* For safety we refuse to run on unknown hardware */
-			dev_info(wm8350->dev, "Found unknown rev %x\n",
-				 (id2 & WM8350_CHIP_REV_MASK) >> 12);
+			dev_err(wm8350->dev, "Unknown WM8350 CHIP_REV\n");
 			ret = -ENODEV;
 			goto err;
 		}
-	} else {
-		dev_info(wm8350->dev, "Device with ID %x is not a WM8350\n",
-			 id1);
+		break;
+
+	default:
+		dev_err(wm8350->dev, "Unknown MASK_REV\n");
 		ret = -ENODEV;
 		goto err;
 	}
 
-	mode = id2 & WM8350_CONF_STS_MASK >> 10;
-	mask = id2 & WM8350_CUST_ID_MASK;
-	dev_info(wm8350->dev, "Config mode %d, ROM mask %d\n", mode, mask);
-
 	ret = wm8350_create_cache(wm8350, mode);
 	if (ret < 0) {
-		printk(KERN_ERR "wm8350: failed to create register cache\n");
+		dev_err(wm8350->dev, "Failed to create register cache\n");
 		return ret;
 	}
 
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 3c9735663f36..2a7abeebe777 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -29,6 +29,7 @@
  */
 #define WM8350_RESET_ID                         0x00
 #define WM8350_ID                               0x01
+#define WM8350_REVISION				0x02
 #define WM8350_SYSTEM_CONTROL_1                 0x03
 #define WM8350_SYSTEM_CONTROL_2                 0x04
 #define WM8350_SYSTEM_HIBERNATE                 0x05
@@ -79,6 +80,11 @@
 #define WM8350_CONF_STS_MASK                    0x0C00
 #define WM8350_CUST_ID_MASK                     0x00FF
 
+/*
+ * R2 (0x02) - Revision
+ */
+#define WM8350_MASK_REV_MASK			0x00FF
+
 /*
  * R3 (0x03) - System Control 1
  */
-- 
cgit v1.2.3


From 7e386e6e0e4f34f0545e8923e22fe4dd61ef9d48 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 30 Nov 2008 22:43:21 +0100
Subject: power_supply: Add cold to the POWER_SUPPLY_HEALTH report values

Some systems are able to report problems with batteries being under
temperature.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Anton Vorontsov <cbouatmailru@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/power/power_supply_sysfs.c | 2 +-
 include/linux/power_supply.h       | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 23ae8460f5c1..ac01e06817fb 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -45,7 +45,7 @@ static ssize_t power_supply_show_property(struct device *dev,
 	};
 	static char *health_text[] = {
 		"Unknown", "Good", "Overheat", "Dead", "Over voltage",
-		"Unspecified failure"
+		"Unspecified failure", "Cold",
 	};
 	static char *technology_text[] = {
 		"Unknown", "NiMH", "Li-ion", "Li-poly", "LiFe", "NiCd",
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index f9348cba6dc1..8ff25e0e7f7a 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -45,6 +45,7 @@ enum {
 	POWER_SUPPLY_HEALTH_DEAD,
 	POWER_SUPPLY_HEALTH_OVERVOLTAGE,
 	POWER_SUPPLY_HEALTH_UNSPEC_FAILURE,
+	POWER_SUPPLY_HEALTH_COLD,
 };
 
 enum {
-- 
cgit v1.2.3


From 4008e879e1325c29362aa2c3fa4b527273ae15a8 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 30 Nov 2008 22:45:14 +0100
Subject: power_supply: Add battery health reporting for WM8350

Implement support for reporting battery health in the WM8350 battery
interface. Since we are now able to report this via the classs remove
the diagnostics from the interrupt handler.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Anton Vorontsov <cbouatmailru@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/wm8350-regmap.c     |  2 +-
 drivers/power/wm8350_power.c    | 33 +++++++++++++++++++++++++--------
 include/linux/mfd/wm8350/core.h |  7 +++++++
 3 files changed, 33 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8350-regmap.c b/drivers/mfd/wm8350-regmap.c
index aaf394aa9c2d..b43d64c2b920 100644
--- a/drivers/mfd/wm8350-regmap.c
+++ b/drivers/mfd/wm8350-regmap.c
@@ -1314,7 +1314,7 @@ const struct wm8350_reg_access wm8350_reg_io_map[] = {
 	{ 0xFFFF, 0xFFFF, 0xFFFF }, /* R223 */
 	{ 0x0000, 0x0000, 0x0000 }, /* R224 */
 	{ 0x8F3F, 0x0000, 0xFFFF }, /* R225 - DCDC/LDO status */
-	{ 0x0000, 0x0000, 0x0000 }, /* R226 */
+	{ 0x0000, 0x0000, 0xFFFF }, /* R226 - Charger status */
 	{ 0x0000, 0x0000, 0xFFFF }, /* R227 */
 	{ 0x0000, 0x0000, 0x0000 }, /* R228 */
 	{ 0x0000, 0x0000, 0x0000 }, /* R229 */
diff --git a/drivers/power/wm8350_power.c b/drivers/power/wm8350_power.c
index 74e7593beffb..1b16bf343f2f 100644
--- a/drivers/power/wm8350_power.c
+++ b/drivers/power/wm8350_power.c
@@ -190,22 +190,18 @@ static void wm8350_charger_handler(struct wm8350 *wm8350, int irq, void *data)
 	struct wm8350_charger_policy *policy = power->policy;
 
 	switch (irq) {
-	case WM8350_IRQ_CHG_BAT_HOT:
-		dev_err(wm8350->dev, "battery too hot\n");
-		break;
-	case WM8350_IRQ_CHG_BAT_COLD:
-		dev_err(wm8350->dev, "battery too cold\n");
-		break;
 	case WM8350_IRQ_CHG_BAT_FAIL:
 		dev_err(wm8350->dev, "battery failed\n");
 		break;
 	case WM8350_IRQ_CHG_TO:
 		dev_err(wm8350->dev, "charger timeout\n");
-		break;
-	case WM8350_IRQ_CHG_END:
 		power_supply_changed(&power->battery);
 		break;
+
+	case WM8350_IRQ_CHG_BAT_HOT:
+	case WM8350_IRQ_CHG_BAT_COLD:
 	case WM8350_IRQ_CHG_START:
+	case WM8350_IRQ_CHG_END:
 		power_supply_changed(&power->battery);
 		break;
 
@@ -308,6 +304,23 @@ static enum power_supply_property wm8350_usb_props[] = {
  *		Battery properties
  *********************************************************************/
 
+static int wm8350_bat_check_health(struct wm8350 *wm8350)
+{
+	u16 reg;
+
+	if (wm8350_read_battery_uvolts(wm8350) < 2850000)
+		return POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
+
+	reg = wm8350_reg_read(wm8350, WM8350_CHARGER_OVERRIDES);
+	if (reg & WM8350_CHG_BATT_HOT_OVRDE)
+		return POWER_SUPPLY_HEALTH_OVERHEAT;
+
+	if (reg & WM8350_CHG_BATT_COLD_OVRDE)
+		return POWER_SUPPLY_HEALTH_COLD;
+
+	return POWER_SUPPLY_HEALTH_GOOD;
+}
+
 static int wm8350_bat_get_property(struct power_supply *psy,
 				   enum power_supply_property psp,
 				   union power_supply_propval *val)
@@ -326,6 +339,9 @@ static int wm8350_bat_get_property(struct power_supply *psy,
 	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
 		val->intval = wm8350_read_battery_uvolts(wm8350);
 		break;
+	case POWER_SUPPLY_PROP_HEALTH:
+		val->intval = wm8350_bat_check_health(wm8350);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -338,6 +354,7 @@ static enum power_supply_property wm8350_bat_props[] = {
 	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_ONLINE,
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_HEALTH,
 };
 
 /*********************************************************************
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 2a7abeebe777..afeff6f1316c 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -58,6 +58,7 @@
 #define WM8350_OVER_CURRENT_INT_STATUS_MASK     0x25
 #define WM8350_GPIO_INT_STATUS_MASK             0x26
 #define WM8350_COMPARATOR_INT_STATUS_MASK       0x27
+#define WM8350_CHARGER_OVERRIDES		0xE2
 #define WM8350_MISC_OVERRIDES			0xE3
 #define WM8350_COMPARATOR_OVERRIDES		0xE7
 #define WM8350_STATE_MACHINE_STATUS		0xE9
@@ -532,6 +533,12 @@
 #define WM8350_DC2_STS                          0x0002
 #define WM8350_DC1_STS                          0x0001
 
+/*
+ * R226 (0xE2) - Charger status
+ */
+#define WM8350_CHG_BATT_HOT_OVRDE		0x8000
+#define WM8350_CHG_BATT_COLD_OVRDE		0x4000
+
 /*
  * R227 (0xE3) - Misc Overrides
  */
-- 
cgit v1.2.3


From 67460a7c26271fd7a32e5d51b2c806a84ce78a62 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Mon, 1 Dec 2008 00:35:33 +0100
Subject: mfd: twl4030: cleanup symbols and OMAP dependency

Finish removing dependency of TWL driver stack on platform-specific
IRQ definitions ... and remove the build dependency on OMAP.

This lets the TWL4030 code be included in test builds for most
platforms, and will make it easier for non-OMAP folk to update
most of this code for new APIs etc.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/Kconfig         |  2 +-
 include/linux/i2c/twl4030.h | 31 -------------------------------
 2 files changed, 1 insertion(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 257277394f8c..8cd3dd9a69b2 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -63,7 +63,7 @@ config UCB1400_CORE
 
 config TWL4030_CORE
 	bool "Texas Instruments TWL4030/TPS659x0 Support"
-	depends on I2C=y && GENERIC_HARDIRQS && (ARCH_OMAP2 || ARCH_OMAP3)
+	depends on I2C=y && GENERIC_HARDIRQS
 	help
 	  Say yes here if you have TWL4030 family chip on your board.
 	  This core driver provides register access and IRQ handling
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index ae25c907b7cf..d4846695bcd0 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -285,33 +285,6 @@ struct twl4030_platform_data {
 
 int twl4030_sih_setup(int module);
 
-/*
- * FIXME completely stop using TWL4030_IRQ_BASE ... instead, pass the
- * IRQ data to subsidiary devices using platform device resources.
- */
-
-/* IRQ information-need base */
-#include <mach/irqs.h>
-/* TWL4030 interrupts */
-
-/* #define TWL4030_MODIRQ_GPIO		(TWL4030_IRQ_BASE + 0) */
-#define TWL4030_MODIRQ_KEYPAD		(TWL4030_IRQ_BASE + 1)
-#define TWL4030_MODIRQ_BCI		(TWL4030_IRQ_BASE + 2)
-#define TWL4030_MODIRQ_MADC		(TWL4030_IRQ_BASE + 3)
-/* #define TWL4030_MODIRQ_USB		(TWL4030_IRQ_BASE + 4) */
-/* #define TWL4030_MODIRQ_PWR		(TWL4030_IRQ_BASE + 5) */
-
-#define TWL4030_PWRIRQ_PWRBTN		(TWL4030_PWR_IRQ_BASE + 0)
-/* #define TWL4030_PWRIRQ_CHG_PRES		(TWL4030_PWR_IRQ_BASE + 1) */
-/* #define TWL4030_PWRIRQ_USB_PRES		(TWL4030_PWR_IRQ_BASE + 2) */
-/* #define TWL4030_PWRIRQ_RTC		(TWL4030_PWR_IRQ_BASE + 3) */
-/* #define TWL4030_PWRIRQ_HOT_DIE		(TWL4030_PWR_IRQ_BASE + 4) */
-/* #define TWL4030_PWRIRQ_PWROK_TIMEOUT	(TWL4030_PWR_IRQ_BASE + 5) */
-/* #define TWL4030_PWRIRQ_MBCHG		(TWL4030_PWR_IRQ_BASE + 6) */
-/* #define TWL4030_PWRIRQ_SC_DETECT	(TWL4030_PWR_IRQ_BASE + 7) */
-
-/* Rest are unsued currently*/
-
 /* Offsets to Power Registers */
 #define TWL4030_VDAC_DEV_GRP		0x3B
 #define TWL4030_VDAC_DEDICATED		0x3E
@@ -322,10 +295,6 @@ int twl4030_sih_setup(int module);
 #define TWL4030_VAUX3_DEV_GRP		0x1F
 #define TWL4030_VAUX3_DEDICATED		0x22
 
-/* TWL4030 GPIO interrupt definitions */
-
-#define TWL4030_GPIO_IRQ_NO(n)		(TWL4030_GPIO_IRQ_BASE + (n))
-
 /*
  * Exported TWL4030 GPIO APIs
  *
-- 
cgit v1.2.3


From dad759ff8ba79927766e3f0159bfc5fb6de0f982 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Mon, 1 Dec 2008 00:43:58 +0100
Subject: mfd: twl4030: create some regulator devices

Initial code to create twl4030 voltage regulator devices, using
the new regulator framework.  Note that this now starts to care
what name is used to declare the TWL chip:

 - TWL4030 is the "old" chip; newer ones have a bigger variety
   of VAUX2 voltages.

 - TWL5030 is the core "new" chip; TPS65950 is its catalog version.

 - The TPS65930 and TPS65920 are cost-reduced catalog versions of
   TWL5030 parts ... fewer regulators, no battery charger, etc.

Board-specific regulator configuration should be provided, listing
which regulators are used and their constraints (e.g. 1.8V only).

Code that could ("should"?) leverage the regulator stuff includes
TWL4030 USB transceiver support and MMC glue, LCD support for the
3430SDP and Labrador boards, and S-Video output.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/twl4030-core.c  | 174 +++++++++++++++++++++++++++++++++++++++++---
 include/linux/i2c/twl4030.h |  47 ++++++++++++
 2 files changed, 211 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index f5486cce86f4..8ab9ee8543a6 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -33,6 +33,8 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 
+#include <linux/regulator/machine.h>
+
 #include <linux/i2c.h>
 #include <linux/i2c/twl4030.h>
 
@@ -71,6 +73,13 @@
 #define twl_has_gpio()	false
 #endif
 
+#if defined(CONFIG_REGULATOR_TWL4030) \
+	|| defined(CONFIG_REGULATOR_TWL4030_MODULE)
+#define twl_has_regulator()	true
+#else
+#define twl_has_regulator()	false
+#endif
+
 #if defined(CONFIG_TWL4030_MADC) || defined(CONFIG_TWL4030_MADC_MODULE)
 #define twl_has_madc()	true
 #else
@@ -149,6 +158,10 @@
 #define HIGH_PERF_SQ			(1 << 3)
 
 
+/* chip-specific feature flags, for i2c_device_id.driver_data */
+#define TWL4030_VAUX2		BIT(0)	/* pre-5030 voltage ranges */
+#define TPS_SUBSET		BIT(1)	/* tps659[23]0 have fewer LDOs */
+
 /*----------------------------------------------------------------------*/
 
 /* is driver active, bound to a chip? */
@@ -352,7 +365,8 @@ EXPORT_SYMBOL(twl4030_i2c_read_u8);
 
 /*----------------------------------------------------------------------*/
 
-static struct device *add_child(unsigned chip, const char *name,
+static struct device *
+add_numbered_child(unsigned chip, const char *name, int num,
 		void *pdata, unsigned pdata_len,
 		bool can_wakeup, int irq0, int irq1)
 {
@@ -360,7 +374,7 @@ static struct device *add_child(unsigned chip, const char *name,
 	struct twl4030_client	*twl = &twl4030_modules[chip];
 	int			status;
 
-	pdev = platform_device_alloc(name, -1);
+	pdev = platform_device_alloc(name, num);
 	if (!pdev) {
 		dev_dbg(&twl->client->dev, "can't alloc dev\n");
 		status = -ENOMEM;
@@ -402,17 +416,52 @@ err:
 	return &pdev->dev;
 }
 
+static inline struct device *add_child(unsigned chip, const char *name,
+		void *pdata, unsigned pdata_len,
+		bool can_wakeup, int irq0, int irq1)
+{
+	return add_numbered_child(chip, name, -1, pdata, pdata_len,
+		can_wakeup, irq0, irq1);
+}
+
+static struct device *
+add_regulator_linked(int num, struct regulator_init_data *pdata,
+		struct regulator_consumer_supply *consumers,
+		unsigned num_consumers)
+{
+	/* regulator framework demands init_data ... */
+	if (!pdata)
+		return NULL;
+
+	if (consumers && !pdata->consumer_supplies) {
+		pdata->consumer_supplies = consumers;
+		pdata->num_consumer_supplies = num_consumers;
+	}
+
+	/* NOTE:  we currently ignore regulator IRQs, e.g. for short circuits */
+	return add_numbered_child(3, "twl4030_reg", num,
+		pdata, sizeof(*pdata), false, 0, 0);
+}
+
+static struct device *
+add_regulator(int num, struct regulator_init_data *pdata)
+{
+	return add_regulator_linked(num, pdata, NULL, 0);
+}
+
 /*
  * NOTE:  We know the first 8 IRQs after pdata->base_irq are
  * for the PIH, and the next are for the PWR_INT SIH, since
  * that's how twl_init_irq() sets things up.
  */
 
-static int add_children(struct twl4030_platform_data *pdata)
+static int
+add_children(struct twl4030_platform_data *pdata, unsigned long features)
 {
 	struct device	*child;
+	struct device	*usb_transceiver = NULL;
 
-	if (twl_has_bci() && pdata->bci) {
+	if (twl_has_bci() && pdata->bci && !(features & TPS_SUBSET)) {
 		child = add_child(3, "twl4030_bci",
 				pdata->bci, sizeof(*pdata->bci),
 				false,
@@ -469,6 +518,111 @@ static int add_children(struct twl4030_platform_data *pdata)
 				pdata->irq_base + 8 + 2, pdata->irq_base + 4);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
+
+		/* we need to connect regulators to this transceiver */
+		usb_transceiver = child;
+	}
+
+	if (twl_has_regulator()) {
+		/*
+		child = add_regulator(TWL4030_REG_VPLL1, pdata->vpll1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+		*/
+
+		child = add_regulator(TWL4030_REG_VMMC1, pdata->vmmc1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VDAC, pdata->vdac);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator((features & TWL4030_VAUX2)
+					? TWL4030_REG_VAUX2_4030
+					: TWL4030_REG_VAUX2,
+				pdata->vaux2);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	if (twl_has_regulator() && usb_transceiver) {
+		static struct regulator_consumer_supply usb1v5 = {
+			.supply =	"usb1v5",
+		};
+		static struct regulator_consumer_supply usb1v8 = {
+			.supply =	"usb1v8",
+		};
+		static struct regulator_consumer_supply usb3v1 = {
+			.supply =	"usb3v1",
+		};
+		static struct regulator_consumer_supply usbcp = {
+			.supply =	"usbcp",
+		};
+
+		/* this is a template that gets copied */
+		struct regulator_init_data usb_fixed = {
+			.constraints.valid_modes_mask =
+				  REGULATOR_MODE_NORMAL
+				| REGULATOR_MODE_STANDBY,
+			.constraints.valid_ops_mask =
+				  REGULATOR_CHANGE_MODE
+				| REGULATOR_CHANGE_STATUS,
+		};
+
+		usb1v5.dev = usb_transceiver;
+		usb1v8.dev = usb_transceiver;
+		usb3v1.dev = usb_transceiver;
+		usbcp.dev = usb_transceiver;
+
+		child = add_regulator_linked(TWL4030_REG_VUSB1V5, &usb_fixed,
+				&usb1v5, 1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator_linked(TWL4030_REG_VUSB1V8, &usb_fixed,
+				&usb1v8, 1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator_linked(TWL4030_REG_VUSB3V1, &usb_fixed,
+				&usb3v1, 1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator_linked(TWL4030_REG_VUSBCP, &usb_fixed,
+				&usbcp, 1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	/* maybe add LDOs that are omitted on cost-reduced parts */
+	if (twl_has_regulator() && !(features & TPS_SUBSET)) {
+		/*
+		child = add_regulator(TWL4030_REG_VPLL2, pdata->vpll2);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+		*/
+
+		child = add_regulator(TWL4030_REG_VMMC2, pdata->vmmc2);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VSIM, pdata->vsim);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VAUX1, pdata->vaux1);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VAUX3, pdata->vaux3);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		child = add_regulator(TWL4030_REG_VAUX4, pdata->vaux4);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
 	}
 
 	return 0;
@@ -632,7 +786,7 @@ twl4030_probe(struct i2c_client *client, const struct i2c_device_id *id)
 			goto fail;
 	}
 
-	status = add_children(pdata);
+	status = add_children(pdata, id->driver_data);
 fail:
 	if (status < 0)
 		twl4030_remove(client);
@@ -640,11 +794,11 @@ fail:
 }
 
 static const struct i2c_device_id twl4030_ids[] = {
-	{ "twl4030", 0 },	/* "Triton 2" */
-	{ "tps65950", 0 },	/* catalog version of twl4030 */
-	{ "tps65930", 0 },	/* fewer LDOs and DACs; no charger */
-	{ "tps65920", 0 },	/* fewer LDOs; no codec or charger */
-	{ "twl5030", 0 },	/* T2 updated */
+	{ "twl4030", TWL4030_VAUX2 },	/* "Triton 2" */
+	{ "twl5030", 0 },		/* T2 updated */
+	{ "tps65950", 0 },		/* catalog version of twl5030 */
+	{ "tps65930", TPS_SUBSET },	/* fewer LDOs and DACs; no charger */
+	{ "tps65920", TPS_SUBSET },	/* fewer LDOs; no codec or charger */
 	{ /* end of list */ },
 };
 MODULE_DEVICE_TABLE(i2c, twl4030_ids);
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index d4846695bcd0..e06555d40d35 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -278,6 +278,18 @@ struct twl4030_platform_data {
 	struct twl4030_keypad_data		*keypad;
 	struct twl4030_usb_data			*usb;
 
+	/* LDO regulators */
+	struct regulator_init_data		*vdac;
+	struct regulator_init_data		*vpll1;
+	struct regulator_init_data		*vpll2;
+	struct regulator_init_data		*vmmc1;
+	struct regulator_init_data		*vmmc2;
+	struct regulator_init_data		*vsim;
+	struct regulator_init_data		*vaux1;
+	struct regulator_init_data		*vaux2;
+	struct regulator_init_data		*vaux3;
+	struct regulator_init_data		*vaux4;
+
 	/* REVISIT more to come ... _nothing_ should be hard-wired */
 };
 
@@ -309,4 +321,39 @@ int twl4030_set_gpio_debounce(int gpio, int enable);
 	static inline int twl4030charger_usb_en(int enable) { return 0; }
 #endif
 
+/*----------------------------------------------------------------------*/
+
+/* Linux-specific regulator identifiers ... for now, we only support
+ * the LDOs, and leave the three buck converters alone.  VDD1 and VDD2
+ * need to tie into hardware based voltage scaling (cpufreq etc), while
+ * VIO is generally fixed.
+ */
+
+/* EXTERNAL dc-to-dc buck converters */
+#define TWL4030_REG_VDD1	0
+#define TWL4030_REG_VDD2	1
+#define TWL4030_REG_VIO		2
+
+/* EXTERNAL LDOs */
+#define TWL4030_REG_VDAC	3
+#define TWL4030_REG_VPLL1	4
+#define TWL4030_REG_VPLL2	5	/* not on all chips */
+#define TWL4030_REG_VMMC1	6
+#define TWL4030_REG_VMMC2	7	/* not on all chips */
+#define TWL4030_REG_VSIM	8	/* not on all chips */
+#define TWL4030_REG_VAUX1	9	/* not on all chips */
+#define TWL4030_REG_VAUX2_4030	10	/* (twl4030-specific) */
+#define TWL4030_REG_VAUX2	11	/* (twl5030 and newer) */
+#define TWL4030_REG_VAUX3	12	/* not on all chips */
+#define TWL4030_REG_VAUX4	13	/* not on all chips */
+
+/* INTERNAL LDOs */
+#define TWL4030_REG_VINTANA1	14
+#define TWL4030_REG_VINTANA2	15
+#define TWL4030_REG_VINTDIG	16
+#define TWL4030_REG_VUSB1V5	17
+#define TWL4030_REG_VUSB1V8	18
+#define TWL4030_REG_VUSB3V1	19
+#define TWL4030_REG_VUSBCP	20
+
 #endif /* End of __TWL4030_H */
-- 
cgit v1.2.3


From b73eac7871d002835be17d4602cced2c15c0db4b Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Sun, 7 Dec 2008 19:10:58 +0100
Subject: mfd: twl4030 regulator bug fixes

This contains two bugfixes to the initial twl4030 regulator
support patch related to USB:

 (a) always overwrite the old list of consumers ... else
     the regulator handles all use the same "usb1v5" name;
 (b) don't set up the "usbcp" regulator, which turns out
     to be managed through separate controls, usually ULPI
     directly from the OTG controller.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/twl4030-core.c  | 11 +----------
 include/linux/i2c/twl4030.h |  1 -
 2 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index 8ab9ee8543a6..fdfbd313ae09 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -433,7 +433,7 @@ add_regulator_linked(int num, struct regulator_init_data *pdata,
 	if (!pdata)
 		return NULL;
 
-	if (consumers && !pdata->consumer_supplies) {
+	if (consumers) {
 		pdata->consumer_supplies = consumers;
 		pdata->num_consumer_supplies = num_consumers;
 	}
@@ -556,9 +556,6 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 		static struct regulator_consumer_supply usb3v1 = {
 			.supply =	"usb3v1",
 		};
-		static struct regulator_consumer_supply usbcp = {
-			.supply =	"usbcp",
-		};
 
 		/* this is a template that gets copied */
 		struct regulator_init_data usb_fixed = {
@@ -573,7 +570,6 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 		usb1v5.dev = usb_transceiver;
 		usb1v8.dev = usb_transceiver;
 		usb3v1.dev = usb_transceiver;
-		usbcp.dev = usb_transceiver;
 
 		child = add_regulator_linked(TWL4030_REG_VUSB1V5, &usb_fixed,
 				&usb1v5, 1);
@@ -589,11 +585,6 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 				&usb3v1, 1);
 		if (IS_ERR(child))
 			return PTR_ERR(child);
-
-		child = add_regulator_linked(TWL4030_REG_VUSBCP, &usb_fixed,
-				&usbcp, 1);
-		if (IS_ERR(child))
-			return PTR_ERR(child);
 	}
 
 	/* maybe add LDOs that are omitted on cost-reduced parts */
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index e06555d40d35..a8f84c01f82e 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -354,6 +354,5 @@ int twl4030_set_gpio_debounce(int gpio, int enable);
 #define TWL4030_REG_VUSB1V5	17
 #define TWL4030_REG_VUSB1V8	18
 #define TWL4030_REG_VUSB3V1	19
-#define TWL4030_REG_VUSBCP	20
 
 #endif /* End of __TWL4030_H */
-- 
cgit v1.2.3


From 856f6fd119411d5701d5db96e1aae1dd69923887 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Thu, 18 Dec 2008 10:54:27 +0100
Subject: mfd: Dialog DA9030 battery charger MFD driver

This patch amends DA903x MFD driver with definitions and methods
needed for battery charger driver.

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/da903x.c       | 12 ++++++++++++
 include/linux/mfd/da903x.h | 44 ++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 54 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/da903x.c b/drivers/mfd/da903x.c
index 0b5bd85dfcec..fcaf1f6028dd 100644
--- a/drivers/mfd/da903x.c
+++ b/drivers/mfd/da903x.c
@@ -151,12 +151,24 @@ int da903x_write(struct device *dev, int reg, uint8_t val)
 }
 EXPORT_SYMBOL_GPL(da903x_write);
 
+int da903x_writes(struct device *dev, int reg, int len, uint8_t *val)
+{
+	return __da903x_writes(to_i2c_client(dev), reg, len, val);
+}
+EXPORT_SYMBOL_GPL(da903x_writes);
+
 int da903x_read(struct device *dev, int reg, uint8_t *val)
 {
 	return __da903x_read(to_i2c_client(dev), reg, val);
 }
 EXPORT_SYMBOL_GPL(da903x_read);
 
+int da903x_reads(struct device *dev, int reg, int len, uint8_t *val)
+{
+	return __da903x_reads(to_i2c_client(dev), reg, len, val);
+}
+EXPORT_SYMBOL_GPL(da903x_reads);
+
 int da903x_set_bits(struct device *dev, int reg, uint8_t bit_mask)
 {
 	struct da903x_chip *chip = dev_get_drvdata(dev);
diff --git a/include/linux/mfd/da903x.h b/include/linux/mfd/da903x.h
index cad314c12439..115dbe965082 100644
--- a/include/linux/mfd/da903x.h
+++ b/include/linux/mfd/da903x.h
@@ -32,6 +32,7 @@ enum {
 	DA9030_ID_LDO18,
 	DA9030_ID_LDO19,
 	DA9030_ID_LDO_INT,	/* LDO Internal */
+	DA9030_ID_BAT,		/* battery charger */
 
 	DA9034_ID_LED_1,
 	DA9034_ID_LED_2,
@@ -93,6 +94,43 @@ struct da9034_touch_pdata {
 	int	y_inverted;
 };
 
+/* DA9030 battery charger data */
+struct power_supply_info;
+
+struct da9030_battery_info {
+	/* battery parameters */
+	struct power_supply_info *battery_info;
+
+	/* current and voltage to use for battery charging */
+	unsigned int charge_milliamp;
+	unsigned int charge_millivolt;
+
+	/* voltage thresholds (in millivolts) */
+	int vbat_low;
+	int vbat_crit;
+	int vbat_charge_start;
+	int vbat_charge_stop;
+	int vbat_charge_restart;
+
+	/* battery nominal minimal and maximal voltages in millivolts */
+	int vcharge_min;
+	int vcharge_max;
+
+	/* Temperature thresholds. These are DA9030 register values
+	   "as is" and should be measured for each battery type */
+	int tbat_low;
+	int tbat_high;
+	int tbat_restart;
+
+
+	/* battery monitor interval (seconds) */
+	unsigned int batmon_interval;
+
+	/* platform callbacks for battery low and critical events */
+	void (*battery_low)(void);
+	void (*battery_critical)(void);
+};
+
 struct da903x_subdev_info {
 	int		id;
 	const char	*name;
@@ -190,11 +228,13 @@ extern int da903x_unregister_notifier(struct device *dev,
 extern int da903x_query_status(struct device *dev, unsigned int status);
 
 
-/* NOTE: the two functions below are not intended for use outside
- * of the DA9034 sub-device drivers
+/* NOTE: the functions below are not intended for use outside
+ * of the DA903x sub-device drivers
  */
 extern int da903x_write(struct device *dev, int reg, uint8_t val);
+extern int da903x_writes(struct device *dev, int reg, int len, uint8_t *val);
 extern int da903x_read(struct device *dev, int reg, uint8_t *val);
+extern int da903x_reads(struct device *dev, int reg, int len, uint8_t *val);
 extern int da903x_update(struct device *dev, int reg, uint8_t val, uint8_t mask);
 extern int da903x_set_bits(struct device *dev, int reg, uint8_t bit_mask);
 extern int da903x_clr_bits(struct device *dev, int reg, uint8_t bit_mask);
-- 
cgit v1.2.3


From 96920630624868add3f63f596523e70dbb64549a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 18 Dec 2008 23:09:50 +0100
Subject: mfd: Add WM8352 support

The WM8352 is a variant of the WM8350. Aside from the register defaults
there are no software visible differences to the WM8350.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/Kconfig             |   16 +
 drivers/mfd/wm8350-core.c       |   86 +++-
 drivers/mfd/wm8350-i2c.c        |    1 +
 drivers/mfd/wm8350-regmap.c     | 1052 +++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm8350/core.h |    4 +
 5 files changed, 1143 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index ddfb12b52f54..76a482dfc177 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -143,6 +143,22 @@ config MFD_WM8350_CONFIG_MODE_3
 	bool
 	depends on MFD_WM8350
 
+config MFD_WM8352_CONFIG_MODE_0
+	bool
+	depends on MFD_WM8350
+
+config MFD_WM8352_CONFIG_MODE_1
+	bool
+	depends on MFD_WM8350
+
+config MFD_WM8352_CONFIG_MODE_2
+	bool
+	depends on MFD_WM8350
+
+config MFD_WM8352_CONFIG_MODE_3
+	bool
+	depends on MFD_WM8350
+
 config MFD_WM8350_I2C
 	tristate "Support Wolfson Microelectronics WM8350 with I2C"
 	select MFD_WM8350
diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 2188d759cbde..fa505ac76c84 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1133,35 +1133,75 @@ EXPORT_SYMBOL_GPL(wm8350_read_auxadc);
 /*
  * Cache is always host endian.
  */
-static int wm8350_create_cache(struct wm8350 *wm8350, int mode)
+static int wm8350_create_cache(struct wm8350 *wm8350, int type, int mode)
 {
 	int i, ret = 0;
 	u16 value;
 	const u16 *reg_map;
 
-	switch (mode) {
-#ifdef CONFIG_MFD_WM8350_CONFIG_MODE_0
+	switch (type) {
 	case 0:
-		reg_map = wm8350_mode0_defaults;
-		break;
+		switch (mode) {
+#ifdef CONFIG_MFD_WM8350_CONFIG_MODE_0
+		case 0:
+			reg_map = wm8350_mode0_defaults;
+			break;
 #endif
 #ifdef CONFIG_MFD_WM8350_CONFIG_MODE_1
-	case 1:
-		reg_map = wm8350_mode1_defaults;
-		break;
+		case 1:
+			reg_map = wm8350_mode1_defaults;
+			break;
 #endif
 #ifdef CONFIG_MFD_WM8350_CONFIG_MODE_2
-	case 2:
-		reg_map = wm8350_mode2_defaults;
-		break;
+		case 2:
+			reg_map = wm8350_mode2_defaults;
+			break;
 #endif
 #ifdef CONFIG_MFD_WM8350_CONFIG_MODE_3
-	case 3:
-		reg_map = wm8350_mode3_defaults;
-		break;
+		case 3:
+			reg_map = wm8350_mode3_defaults;
+			break;
 #endif
+		default:
+			dev_err(wm8350->dev,
+				"WM8350 configuration mode %d not supported\n",
+				mode);
+			return -EINVAL;
+		}
+
+	case 2:
+		switch (mode) {
+#ifdef CONFIG_MFD_WM8352_CONFIG_MODE_0
+		case 0:
+			reg_map = wm8352_mode0_defaults;
+			break;
+#endif
+#ifdef CONFIG_MFD_WM8352_CONFIG_MODE_1
+		case 1:
+			reg_map = wm8352_mode1_defaults;
+			break;
+#endif
+#ifdef CONFIG_MFD_WM8352_CONFIG_MODE_2
+		case 2:
+			reg_map = wm8352_mode2_defaults;
+			break;
+#endif
+#ifdef CONFIG_MFD_WM8352_CONFIG_MODE_3
+		case 3:
+			reg_map = wm8352_mode3_defaults;
+			break;
+#endif
+		default:
+			dev_err(wm8350->dev,
+				"WM8352 configuration mode %d not supported\n",
+				mode);
+			return -EINVAL;
+		}
+		break;
+
 	default:
-		dev_err(wm8350->dev, "Configuration mode %d not supported\n",
+		dev_err(wm8350->dev,
+			"WM835x configuration mode %d not supported\n",
 			mode);
 		return -EINVAL;
 	}
@@ -1284,13 +1324,27 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 		}
 		break;
 
+	case 2:
+		switch (chip_rev) {
+		case 0:
+			dev_info(wm8350->dev, "WM8352 Rev A\n");
+			wm8350->power.rev_g_coeff = 1;
+			break;
+
+		default:
+			dev_err(wm8350->dev, "Unknown WM8352 CHIP_REV\n");
+			ret = -ENODEV;
+			goto err;
+		}
+		break;
+
 	default:
 		dev_err(wm8350->dev, "Unknown MASK_REV\n");
 		ret = -ENODEV;
 		goto err;
 	}
 
-	ret = wm8350_create_cache(wm8350, mode);
+	ret = wm8350_create_cache(wm8350, mask_rev, mode);
 	if (ret < 0) {
 		dev_err(wm8350->dev, "Failed to create register cache\n");
 		return ret;
diff --git a/drivers/mfd/wm8350-i2c.c b/drivers/mfd/wm8350-i2c.c
index 876e693582bd..878051253084 100644
--- a/drivers/mfd/wm8350-i2c.c
+++ b/drivers/mfd/wm8350-i2c.c
@@ -97,6 +97,7 @@ static int wm8350_i2c_remove(struct i2c_client *i2c)
 
 static const struct i2c_device_id wm8350_i2c_id[] = {
        { "wm8350", 0 },
+       { "wm8352", 0 },
        { }
 };
 MODULE_DEVICE_TABLE(i2c, wm8350_i2c_id);
diff --git a/drivers/mfd/wm8350-regmap.c b/drivers/mfd/wm8350-regmap.c
index b43d64c2b920..3e2cc37961f3 100644
--- a/drivers/mfd/wm8350-regmap.c
+++ b/drivers/mfd/wm8350-regmap.c
@@ -1074,6 +1074,1058 @@ const u16 wm8350_mode3_defaults[] = {
 };
 #endif
 
+#ifdef CONFIG_MFD_WM8352_CONFIG_MODE_0
+
+#undef WM8350_HAVE_CONFIG_MODE
+#define WM8350_HAVE_CONFIG_MODE
+
+const u16 wm8352_mode0_defaults[] = {
+	0x6143,     /* R0   - Reset/ID */
+	0x0000,     /* R1   - ID */
+	0x0002,     /* R2   - Revision */
+	0x1C02,     /* R3   - System Control 1 */
+	0x0004,     /* R4   - System Control 2 */
+	0x0000,     /* R5   - System Hibernate */
+	0x8A00,     /* R6   - Interface Control */
+	0x0000,     /* R7 */
+	0x8000,     /* R8   - Power mgmt (1) */
+	0x0000,     /* R9   - Power mgmt (2) */
+	0x0000,     /* R10  - Power mgmt (3) */
+	0x2000,     /* R11  - Power mgmt (4) */
+	0x0E00,     /* R12  - Power mgmt (5) */
+	0x0000,     /* R13  - Power mgmt (6) */
+	0x0000,     /* R14  - Power mgmt (7) */
+	0x0000,     /* R15 */
+	0x0000,     /* R16  - RTC Seconds/Minutes */
+	0x0100,     /* R17  - RTC Hours/Day */
+	0x0101,     /* R18  - RTC Date/Month */
+	0x1400,     /* R19  - RTC Year */
+	0x0000,     /* R20  - Alarm Seconds/Minutes */
+	0x0000,     /* R21  - Alarm Hours/Day */
+	0x0000,     /* R22  - Alarm Date/Month */
+	0x0320,     /* R23  - RTC Time Control */
+	0x0000,     /* R24  - System Interrupts */
+	0x0000,     /* R25  - Interrupt Status 1 */
+	0x0000,     /* R26  - Interrupt Status 2 */
+	0x0000,     /* R27 */
+	0x0000,     /* R28  - Under Voltage Interrupt status */
+	0x0000,     /* R29  - Over Current Interrupt status */
+	0x0000,     /* R30  - GPIO Interrupt Status */
+	0x0000,     /* R31  - Comparator Interrupt Status */
+	0x3FFF,     /* R32  - System Interrupts Mask */
+	0x0000,     /* R33  - Interrupt Status 1 Mask */
+	0x0000,     /* R34  - Interrupt Status 2 Mask */
+	0x0000,     /* R35 */
+	0x0000,     /* R36  - Under Voltage Interrupt status Mask */
+	0x0000,     /* R37  - Over Current Interrupt status Mask */
+	0x0000,     /* R38  - GPIO Interrupt Status Mask */
+	0x0000,     /* R39  - Comparator Interrupt Status Mask */
+	0x0040,     /* R40  - Clock Control 1 */
+	0x0000,     /* R41  - Clock Control 2 */
+	0x3A00,     /* R42  - FLL Control 1 */
+	0x7086,     /* R43  - FLL Control 2 */
+	0xC226,     /* R44  - FLL Control 3 */
+	0x0000,     /* R45  - FLL Control 4 */
+	0x0000,     /* R46 */
+	0x0000,     /* R47 */
+	0x0000,     /* R48  - DAC Control */
+	0x0000,     /* R49 */
+	0x00C0,     /* R50  - DAC Digital Volume L */
+	0x00C0,     /* R51  - DAC Digital Volume R */
+	0x0000,     /* R52 */
+	0x0040,     /* R53  - DAC LR Rate */
+	0x0000,     /* R54  - DAC Clock Control */
+	0x0000,     /* R55 */
+	0x0000,     /* R56 */
+	0x0000,     /* R57 */
+	0x4000,     /* R58  - DAC Mute */
+	0x0000,     /* R59  - DAC Mute Volume */
+	0x0000,     /* R60  - DAC Side */
+	0x0000,     /* R61 */
+	0x0000,     /* R62 */
+	0x0000,     /* R63 */
+	0x8000,     /* R64  - ADC Control */
+	0x0000,     /* R65 */
+	0x00C0,     /* R66  - ADC Digital Volume L */
+	0x00C0,     /* R67  - ADC Digital Volume R */
+	0x0000,     /* R68  - ADC Divider */
+	0x0000,     /* R69 */
+	0x0040,     /* R70  - ADC LR Rate */
+	0x0000,     /* R71 */
+	0x0303,     /* R72  - Input Control */
+	0x0000,     /* R73  - IN3 Input Control */
+	0x0000,     /* R74  - Mic Bias Control */
+	0x0000,     /* R75 */
+	0x0000,     /* R76  - Output Control */
+	0x0000,     /* R77  - Jack Detect */
+	0x0000,     /* R78  - Anti Pop Control */
+	0x0000,     /* R79 */
+	0x0040,     /* R80  - Left Input Volume */
+	0x0040,     /* R81  - Right Input Volume */
+	0x0000,     /* R82 */
+	0x0000,     /* R83 */
+	0x0000,     /* R84 */
+	0x0000,     /* R85 */
+	0x0000,     /* R86 */
+	0x0000,     /* R87 */
+	0x0800,     /* R88  - Left Mixer Control */
+	0x1000,     /* R89  - Right Mixer Control */
+	0x0000,     /* R90 */
+	0x0000,     /* R91 */
+	0x0000,     /* R92  - OUT3 Mixer Control */
+	0x0000,     /* R93  - OUT4 Mixer Control */
+	0x0000,     /* R94 */
+	0x0000,     /* R95 */
+	0x0000,     /* R96  - Output Left Mixer Volume */
+	0x0000,     /* R97  - Output Right Mixer Volume */
+	0x0000,     /* R98  - Input Mixer Volume L */
+	0x0000,     /* R99  - Input Mixer Volume R */
+	0x0000,     /* R100 - Input Mixer Volume */
+	0x0000,     /* R101 */
+	0x0000,     /* R102 */
+	0x0000,     /* R103 */
+	0x00E4,     /* R104 - OUT1L Volume */
+	0x00E4,     /* R105 - OUT1R Volume */
+	0x00E4,     /* R106 - OUT2L Volume */
+	0x02E4,     /* R107 - OUT2R Volume */
+	0x0000,     /* R108 */
+	0x0000,     /* R109 */
+	0x0000,     /* R110 */
+	0x0000,     /* R111 - BEEP Volume */
+	0x0A00,     /* R112 - AI Formating */
+	0x0000,     /* R113 - ADC DAC COMP */
+	0x0020,     /* R114 - AI ADC Control */
+	0x0020,     /* R115 - AI DAC Control */
+	0x0000,     /* R116 */
+	0x0000,     /* R117 */
+	0x0000,     /* R118 */
+	0x0000,     /* R119 */
+	0x0000,     /* R120 */
+	0x0000,     /* R121 */
+	0x0000,     /* R122 */
+	0x0000,     /* R123 */
+	0x0000,     /* R124 */
+	0x0000,     /* R125 */
+	0x0000,     /* R126 */
+	0x0000,     /* R127 */
+	0x1FFF,     /* R128 - GPIO Debounce */
+	0x0000,     /* R129 - GPIO Pin pull up Control */
+	0x0000,     /* R130 - GPIO Pull down Control */
+	0x0000,     /* R131 - GPIO Interrupt Mode */
+	0x0000,     /* R132 */
+	0x0000,     /* R133 - GPIO Control */
+	0x0FFC,     /* R134 - GPIO Configuration (i/o) */
+	0x0FFC,     /* R135 - GPIO Pin Polarity / Type */
+	0x0000,     /* R136 */
+	0x0000,     /* R137 */
+	0x0000,     /* R138 */
+	0x0000,     /* R139 */
+	0x0013,     /* R140 - GPIO Function Select 1 */
+	0x0000,     /* R141 - GPIO Function Select 2 */
+	0x0000,     /* R142 - GPIO Function Select 3 */
+	0x0003,     /* R143 - GPIO Function Select 4 */
+	0x0000,     /* R144 - Digitiser Control (1) */
+	0x0002,     /* R145 - Digitiser Control (2) */
+	0x0000,     /* R146 */
+	0x0000,     /* R147 */
+	0x0000,     /* R148 */
+	0x0000,     /* R149 */
+	0x0000,     /* R150 */
+	0x0000,     /* R151 */
+	0x7000,     /* R152 - AUX1 Readback */
+	0x7000,     /* R153 - AUX2 Readback */
+	0x7000,     /* R154 - AUX3 Readback */
+	0x7000,     /* R155 - AUX4 Readback */
+	0x0000,     /* R156 - USB Voltage Readback */
+	0x0000,     /* R157 - LINE Voltage Readback */
+	0x0000,     /* R158 - BATT Voltage Readback */
+	0x0000,     /* R159 - Chip Temp Readback */
+	0x0000,     /* R160 */
+	0x0000,     /* R161 */
+	0x0000,     /* R162 */
+	0x0000,     /* R163 - Generic Comparator Control */
+	0x0000,     /* R164 - Generic comparator 1 */
+	0x0000,     /* R165 - Generic comparator 2 */
+	0x0000,     /* R166 - Generic comparator 3 */
+	0x0000,     /* R167 - Generic comparator 4 */
+	0xA00F,     /* R168 - Battery Charger Control 1 */
+	0x0B06,     /* R169 - Battery Charger Control 2 */
+	0x0000,     /* R170 - Battery Charger Control 3 */
+	0x0000,     /* R171 */
+	0x0000,     /* R172 - Current Sink Driver A */
+	0x0000,     /* R173 - CSA Flash control */
+	0x0000,     /* R174 - Current Sink Driver B */
+	0x0000,     /* R175 - CSB Flash control */
+	0x0000,     /* R176 - DCDC/LDO requested */
+	0x032D,     /* R177 - DCDC Active options */
+	0x0000,     /* R178 - DCDC Sleep options */
+	0x0025,     /* R179 - Power-check comparator */
+	0x000E,     /* R180 - DCDC1 Control */
+	0x0000,     /* R181 - DCDC1 Timeouts */
+	0x1006,     /* R182 - DCDC1 Low Power */
+	0x0018,     /* R183 - DCDC2 Control */
+	0x0000,     /* R184 - DCDC2 Timeouts */
+	0x0000,     /* R185 */
+	0x0000,     /* R186 - DCDC3 Control */
+	0x0000,     /* R187 - DCDC3 Timeouts */
+	0x0006,     /* R188 - DCDC3 Low Power */
+	0x0000,     /* R189 - DCDC4 Control */
+	0x0000,     /* R190 - DCDC4 Timeouts */
+	0x0006,     /* R191 - DCDC4 Low Power */
+	0x0008,     /* R192 - DCDC5 Control */
+	0x0000,     /* R193 - DCDC5 Timeouts */
+	0x0000,     /* R194 */
+	0x0000,     /* R195 - DCDC6 Control */
+	0x0000,     /* R196 - DCDC6 Timeouts */
+	0x0006,     /* R197 - DCDC6 Low Power */
+	0x0000,     /* R198 */
+	0x0003,     /* R199 - Limit Switch Control */
+	0x001C,     /* R200 - LDO1 Control */
+	0x0000,     /* R201 - LDO1 Timeouts */
+	0x001C,     /* R202 - LDO1 Low Power */
+	0x001B,     /* R203 - LDO2 Control */
+	0x0000,     /* R204 - LDO2 Timeouts */
+	0x001C,     /* R205 - LDO2 Low Power */
+	0x001B,     /* R206 - LDO3 Control */
+	0x0000,     /* R207 - LDO3 Timeouts */
+	0x001C,     /* R208 - LDO3 Low Power */
+	0x001B,     /* R209 - LDO4 Control */
+	0x0000,     /* R210 - LDO4 Timeouts */
+	0x001C,     /* R211 - LDO4 Low Power */
+	0x0000,     /* R212 */
+	0x0000,     /* R213 */
+	0x0000,     /* R214 */
+	0x0000,     /* R215 - VCC_FAULT Masks */
+	0x001F,     /* R216 - Main Bandgap Control */
+	0x0000,     /* R217 - OSC Control */
+	0x9000,     /* R218 - RTC Tick Control */
+	0x0000,     /* R219 - Security1 */
+	0x4000,     /* R220 */
+	0x0000,     /* R221 */
+	0x0000,     /* R222 */
+	0x0000,     /* R223 */
+	0x0000,     /* R224 - Signal overrides */
+	0x0000,     /* R225 - DCDC/LDO status */
+	0x0000,     /* R226 - Charger Overides/status */
+	0x0000,     /* R227 - misc overrides */
+	0x0000,     /* R228 - Supply overrides/status 1 */
+	0x0000,     /* R229 - Supply overrides/status 2 */
+	0xE000,     /* R230 - GPIO Pin Status */
+	0x0000,     /* R231 - comparotor overrides */
+	0x0000,     /* R232 */
+	0x0000,     /* R233 - State Machine status */
+	0x1200,     /* R234 */
+	0x0000,     /* R235 */
+	0x8000,     /* R236 */
+	0x0000,     /* R237 */
+	0x0000,     /* R238 */
+	0x0000,     /* R239 */
+	0x0003,     /* R240 */
+	0x0000,     /* R241 */
+	0x0000,     /* R242 */
+	0x0004,     /* R243 */
+	0x0300,     /* R244 */
+	0x0000,     /* R245 */
+	0x0200,     /* R246 */
+	0x0000,     /* R247 */
+	0x1000,     /* R248 - DCDC1 Test Controls */
+	0x5000,     /* R249 */
+	0x1000,     /* R250 - DCDC3 Test Controls */
+	0x1000,     /* R251 - DCDC4 Test Controls */
+	0x5100,     /* R252 */
+	0x1000,     /* R253 - DCDC6 Test Controls */
+};
+#endif
+
+#ifdef CONFIG_MFD_WM8352_CONFIG_MODE_1
+
+#undef WM8350_HAVE_CONFIG_MODE
+#define WM8350_HAVE_CONFIG_MODE
+
+const u16 wm8352_mode1_defaults[] = {
+	0x6143,     /* R0   - Reset/ID */
+	0x0000,     /* R1   - ID */
+	0x0002,     /* R2   - Revision */
+	0x1C02,     /* R3   - System Control 1 */
+	0x0204,     /* R4   - System Control 2 */
+	0x0000,     /* R5   - System Hibernate */
+	0x8A00,     /* R6   - Interface Control */
+	0x0000,     /* R7 */
+	0x8000,     /* R8   - Power mgmt (1) */
+	0x0000,     /* R9   - Power mgmt (2) */
+	0x0000,     /* R10  - Power mgmt (3) */
+	0x2000,     /* R11  - Power mgmt (4) */
+	0x0E00,     /* R12  - Power mgmt (5) */
+	0x0000,     /* R13  - Power mgmt (6) */
+	0x0000,     /* R14  - Power mgmt (7) */
+	0x0000,     /* R15 */
+	0x0000,     /* R16  - RTC Seconds/Minutes */
+	0x0100,     /* R17  - RTC Hours/Day */
+	0x0101,     /* R18  - RTC Date/Month */
+	0x1400,     /* R19  - RTC Year */
+	0x0000,     /* R20  - Alarm Seconds/Minutes */
+	0x0000,     /* R21  - Alarm Hours/Day */
+	0x0000,     /* R22  - Alarm Date/Month */
+	0x0320,     /* R23  - RTC Time Control */
+	0x0000,     /* R24  - System Interrupts */
+	0x0000,     /* R25  - Interrupt Status 1 */
+	0x0000,     /* R26  - Interrupt Status 2 */
+	0x0000,     /* R27 */
+	0x0000,     /* R28  - Under Voltage Interrupt status */
+	0x0000,     /* R29  - Over Current Interrupt status */
+	0x0000,     /* R30  - GPIO Interrupt Status */
+	0x0000,     /* R31  - Comparator Interrupt Status */
+	0x3FFF,     /* R32  - System Interrupts Mask */
+	0x0000,     /* R33  - Interrupt Status 1 Mask */
+	0x0000,     /* R34  - Interrupt Status 2 Mask */
+	0x0000,     /* R35 */
+	0x0000,     /* R36  - Under Voltage Interrupt status Mask */
+	0x0000,     /* R37  - Over Current Interrupt status Mask */
+	0x0000,     /* R38  - GPIO Interrupt Status Mask */
+	0x0000,     /* R39  - Comparator Interrupt Status Mask */
+	0x0040,     /* R40  - Clock Control 1 */
+	0x0000,     /* R41  - Clock Control 2 */
+	0x3A00,     /* R42  - FLL Control 1 */
+	0x7086,     /* R43  - FLL Control 2 */
+	0xC226,     /* R44  - FLL Control 3 */
+	0x0000,     /* R45  - FLL Control 4 */
+	0x0000,     /* R46 */
+	0x0000,     /* R47 */
+	0x0000,     /* R48  - DAC Control */
+	0x0000,     /* R49 */
+	0x00C0,     /* R50  - DAC Digital Volume L */
+	0x00C0,     /* R51  - DAC Digital Volume R */
+	0x0000,     /* R52 */
+	0x0040,     /* R53  - DAC LR Rate */
+	0x0000,     /* R54  - DAC Clock Control */
+	0x0000,     /* R55 */
+	0x0000,     /* R56 */
+	0x0000,     /* R57 */
+	0x4000,     /* R58  - DAC Mute */
+	0x0000,     /* R59  - DAC Mute Volume */
+	0x0000,     /* R60  - DAC Side */
+	0x0000,     /* R61 */
+	0x0000,     /* R62 */
+	0x0000,     /* R63 */
+	0x8000,     /* R64  - ADC Control */
+	0x0000,     /* R65 */
+	0x00C0,     /* R66  - ADC Digital Volume L */
+	0x00C0,     /* R67  - ADC Digital Volume R */
+	0x0000,     /* R68  - ADC Divider */
+	0x0000,     /* R69 */
+	0x0040,     /* R70  - ADC LR Rate */
+	0x0000,     /* R71 */
+	0x0303,     /* R72  - Input Control */
+	0x0000,     /* R73  - IN3 Input Control */
+	0x0000,     /* R74  - Mic Bias Control */
+	0x0000,     /* R75 */
+	0x0000,     /* R76  - Output Control */
+	0x0000,     /* R77  - Jack Detect */
+	0x0000,     /* R78  - Anti Pop Control */
+	0x0000,     /* R79 */
+	0x0040,     /* R80  - Left Input Volume */
+	0x0040,     /* R81  - Right Input Volume */
+	0x0000,     /* R82 */
+	0x0000,     /* R83 */
+	0x0000,     /* R84 */
+	0x0000,     /* R85 */
+	0x0000,     /* R86 */
+	0x0000,     /* R87 */
+	0x0800,     /* R88  - Left Mixer Control */
+	0x1000,     /* R89  - Right Mixer Control */
+	0x0000,     /* R90 */
+	0x0000,     /* R91 */
+	0x0000,     /* R92  - OUT3 Mixer Control */
+	0x0000,     /* R93  - OUT4 Mixer Control */
+	0x0000,     /* R94 */
+	0x0000,     /* R95 */
+	0x0000,     /* R96  - Output Left Mixer Volume */
+	0x0000,     /* R97  - Output Right Mixer Volume */
+	0x0000,     /* R98  - Input Mixer Volume L */
+	0x0000,     /* R99  - Input Mixer Volume R */
+	0x0000,     /* R100 - Input Mixer Volume */
+	0x0000,     /* R101 */
+	0x0000,     /* R102 */
+	0x0000,     /* R103 */
+	0x00E4,     /* R104 - OUT1L Volume */
+	0x00E4,     /* R105 - OUT1R Volume */
+	0x00E4,     /* R106 - OUT2L Volume */
+	0x02E4,     /* R107 - OUT2R Volume */
+	0x0000,     /* R108 */
+	0x0000,     /* R109 */
+	0x0000,     /* R110 */
+	0x0000,     /* R111 - BEEP Volume */
+	0x0A00,     /* R112 - AI Formating */
+	0x0000,     /* R113 - ADC DAC COMP */
+	0x0020,     /* R114 - AI ADC Control */
+	0x0020,     /* R115 - AI DAC Control */
+	0x0000,     /* R116 */
+	0x0000,     /* R117 */
+	0x0000,     /* R118 */
+	0x0000,     /* R119 */
+	0x0000,     /* R120 */
+	0x0000,     /* R121 */
+	0x0000,     /* R122 */
+	0x0000,     /* R123 */
+	0x0000,     /* R124 */
+	0x0000,     /* R125 */
+	0x0000,     /* R126 */
+	0x0000,     /* R127 */
+	0x1FFF,     /* R128 - GPIO Debounce */
+	0x0000,     /* R129 - GPIO Pin pull up Control */
+	0x0000,     /* R130 - GPIO Pull down Control */
+	0x0000,     /* R131 - GPIO Interrupt Mode */
+	0x0000,     /* R132 */
+	0x0000,     /* R133 - GPIO Control */
+	0x0BFB,     /* R134 - GPIO Configuration (i/o) */
+	0x0FFF,     /* R135 - GPIO Pin Polarity / Type */
+	0x0000,     /* R136 */
+	0x0000,     /* R137 */
+	0x0000,     /* R138 */
+	0x0000,     /* R139 */
+	0x0300,     /* R140 - GPIO Function Select 1 */
+	0x0000,     /* R141 - GPIO Function Select 2 */
+	0x2300,     /* R142 - GPIO Function Select 3 */
+	0x0003,     /* R143 - GPIO Function Select 4 */
+	0x0000,     /* R144 - Digitiser Control (1) */
+	0x0002,     /* R145 - Digitiser Control (2) */
+	0x0000,     /* R146 */
+	0x0000,     /* R147 */
+	0x0000,     /* R148 */
+	0x0000,     /* R149 */
+	0x0000,     /* R150 */
+	0x0000,     /* R151 */
+	0x7000,     /* R152 - AUX1 Readback */
+	0x7000,     /* R153 - AUX2 Readback */
+	0x7000,     /* R154 - AUX3 Readback */
+	0x7000,     /* R155 - AUX4 Readback */
+	0x0000,     /* R156 - USB Voltage Readback */
+	0x0000,     /* R157 - LINE Voltage Readback */
+	0x0000,     /* R158 - BATT Voltage Readback */
+	0x0000,     /* R159 - Chip Temp Readback */
+	0x0000,     /* R160 */
+	0x0000,     /* R161 */
+	0x0000,     /* R162 */
+	0x0000,     /* R163 - Generic Comparator Control */
+	0x0000,     /* R164 - Generic comparator 1 */
+	0x0000,     /* R165 - Generic comparator 2 */
+	0x0000,     /* R166 - Generic comparator 3 */
+	0x0000,     /* R167 - Generic comparator 4 */
+	0xA00F,     /* R168 - Battery Charger Control 1 */
+	0x0B06,     /* R169 - Battery Charger Control 2 */
+	0x0000,     /* R170 - Battery Charger Control 3 */
+	0x0000,     /* R171 */
+	0x0000,     /* R172 - Current Sink Driver A */
+	0x0000,     /* R173 - CSA Flash control */
+	0x0000,     /* R174 - Current Sink Driver B */
+	0x0000,     /* R175 - CSB Flash control */
+	0x0000,     /* R176 - DCDC/LDO requested */
+	0x032D,     /* R177 - DCDC Active options */
+	0x0000,     /* R178 - DCDC Sleep options */
+	0x0025,     /* R179 - Power-check comparator */
+	0x0062,     /* R180 - DCDC1 Control */
+	0x0400,     /* R181 - DCDC1 Timeouts */
+	0x1006,     /* R182 - DCDC1 Low Power */
+	0x0018,     /* R183 - DCDC2 Control */
+	0x0000,     /* R184 - DCDC2 Timeouts */
+	0x0000,     /* R185 */
+	0x0006,     /* R186 - DCDC3 Control */
+	0x0800,     /* R187 - DCDC3 Timeouts */
+	0x0006,     /* R188 - DCDC3 Low Power */
+	0x0006,     /* R189 - DCDC4 Control */
+	0x0C00,     /* R190 - DCDC4 Timeouts */
+	0x0006,     /* R191 - DCDC4 Low Power */
+	0x0008,     /* R192 - DCDC5 Control */
+	0x0000,     /* R193 - DCDC5 Timeouts */
+	0x0000,     /* R194 */
+	0x0026,     /* R195 - DCDC6 Control */
+	0x1000,     /* R196 - DCDC6 Timeouts */
+	0x0006,     /* R197 - DCDC6 Low Power */
+	0x0000,     /* R198 */
+	0x0003,     /* R199 - Limit Switch Control */
+	0x0002,     /* R200 - LDO1 Control */
+	0x0000,     /* R201 - LDO1 Timeouts */
+	0x001C,     /* R202 - LDO1 Low Power */
+	0x001A,     /* R203 - LDO2 Control */
+	0x0000,     /* R204 - LDO2 Timeouts */
+	0x001C,     /* R205 - LDO2 Low Power */
+	0x001F,     /* R206 - LDO3 Control */
+	0x0000,     /* R207 - LDO3 Timeouts */
+	0x001C,     /* R208 - LDO3 Low Power */
+	0x001F,     /* R209 - LDO4 Control */
+	0x0000,     /* R210 - LDO4 Timeouts */
+	0x001C,     /* R211 - LDO4 Low Power */
+	0x0000,     /* R212 */
+	0x0000,     /* R213 */
+	0x0000,     /* R214 */
+	0x0000,     /* R215 - VCC_FAULT Masks */
+	0x001F,     /* R216 - Main Bandgap Control */
+	0x0000,     /* R217 - OSC Control */
+	0x9000,     /* R218 - RTC Tick Control */
+	0x0000,     /* R219 - Security1 */
+	0x4000,     /* R220 */
+	0x0000,     /* R221 */
+	0x0000,     /* R222 */
+	0x0000,     /* R223 */
+	0x0000,     /* R224 - Signal overrides */
+	0x0000,     /* R225 - DCDC/LDO status */
+	0x0000,     /* R226 - Charger Overides/status */
+	0x0000,     /* R227 - misc overrides */
+	0x0000,     /* R228 - Supply overrides/status 1 */
+	0x0000,     /* R229 - Supply overrides/status 2 */
+	0xE000,     /* R230 - GPIO Pin Status */
+	0x0000,     /* R231 - comparotor overrides */
+	0x0000,     /* R232 */
+	0x0000,     /* R233 - State Machine status */
+	0x1200,     /* R234 */
+	0x0000,     /* R235 */
+	0x8000,     /* R236 */
+	0x0000,     /* R237 */
+	0x0000,     /* R238 */
+	0x0000,     /* R239 */
+	0x0003,     /* R240 */
+	0x0000,     /* R241 */
+	0x0000,     /* R242 */
+	0x0004,     /* R243 */
+	0x0300,     /* R244 */
+	0x0000,     /* R245 */
+	0x0200,     /* R246 */
+	0x0000,     /* R247 */
+	0x1000,     /* R248 - DCDC1 Test Controls */
+	0x5000,     /* R249 */
+	0x1000,     /* R250 - DCDC3 Test Controls */
+	0x1000,     /* R251 - DCDC4 Test Controls */
+	0x5100,     /* R252 */
+	0x1000,     /* R253 - DCDC6 Test Controls */
+};
+#endif
+
+#ifdef CONFIG_MFD_WM8352_CONFIG_MODE_2
+
+#undef WM8350_HAVE_CONFIG_MODE
+#define WM8350_HAVE_CONFIG_MODE
+
+const u16 wm8352_mode2_defaults[] = {
+	0x6143,     /* R0   - Reset/ID */
+	0x0000,     /* R1   - ID */
+	0x0002,     /* R2   - Revision */
+	0x1C02,     /* R3   - System Control 1 */
+	0x0204,     /* R4   - System Control 2 */
+	0x0000,     /* R5   - System Hibernate */
+	0x8A00,     /* R6   - Interface Control */
+	0x0000,     /* R7 */
+	0x8000,     /* R8   - Power mgmt (1) */
+	0x0000,     /* R9   - Power mgmt (2) */
+	0x0000,     /* R10  - Power mgmt (3) */
+	0x2000,     /* R11  - Power mgmt (4) */
+	0x0E00,     /* R12  - Power mgmt (5) */
+	0x0000,     /* R13  - Power mgmt (6) */
+	0x0000,     /* R14  - Power mgmt (7) */
+	0x0000,     /* R15 */
+	0x0000,     /* R16  - RTC Seconds/Minutes */
+	0x0100,     /* R17  - RTC Hours/Day */
+	0x0101,     /* R18  - RTC Date/Month */
+	0x1400,     /* R19  - RTC Year */
+	0x0000,     /* R20  - Alarm Seconds/Minutes */
+	0x0000,     /* R21  - Alarm Hours/Day */
+	0x0000,     /* R22  - Alarm Date/Month */
+	0x0320,     /* R23  - RTC Time Control */
+	0x0000,     /* R24  - System Interrupts */
+	0x0000,     /* R25  - Interrupt Status 1 */
+	0x0000,     /* R26  - Interrupt Status 2 */
+	0x0000,     /* R27 */
+	0x0000,     /* R28  - Under Voltage Interrupt status */
+	0x0000,     /* R29  - Over Current Interrupt status */
+	0x0000,     /* R30  - GPIO Interrupt Status */
+	0x0000,     /* R31  - Comparator Interrupt Status */
+	0x3FFF,     /* R32  - System Interrupts Mask */
+	0x0000,     /* R33  - Interrupt Status 1 Mask */
+	0x0000,     /* R34  - Interrupt Status 2 Mask */
+	0x0000,     /* R35 */
+	0x0000,     /* R36  - Under Voltage Interrupt status Mask */
+	0x0000,     /* R37  - Over Current Interrupt status Mask */
+	0x0000,     /* R38  - GPIO Interrupt Status Mask */
+	0x0000,     /* R39  - Comparator Interrupt Status Mask */
+	0x0040,     /* R40  - Clock Control 1 */
+	0x0000,     /* R41  - Clock Control 2 */
+	0x3A00,     /* R42  - FLL Control 1 */
+	0x7086,     /* R43  - FLL Control 2 */
+	0xC226,     /* R44  - FLL Control 3 */
+	0x0000,     /* R45  - FLL Control 4 */
+	0x0000,     /* R46 */
+	0x0000,     /* R47 */
+	0x0000,     /* R48  - DAC Control */
+	0x0000,     /* R49 */
+	0x00C0,     /* R50  - DAC Digital Volume L */
+	0x00C0,     /* R51  - DAC Digital Volume R */
+	0x0000,     /* R52 */
+	0x0040,     /* R53  - DAC LR Rate */
+	0x0000,     /* R54  - DAC Clock Control */
+	0x0000,     /* R55 */
+	0x0000,     /* R56 */
+	0x0000,     /* R57 */
+	0x4000,     /* R58  - DAC Mute */
+	0x0000,     /* R59  - DAC Mute Volume */
+	0x0000,     /* R60  - DAC Side */
+	0x0000,     /* R61 */
+	0x0000,     /* R62 */
+	0x0000,     /* R63 */
+	0x8000,     /* R64  - ADC Control */
+	0x0000,     /* R65 */
+	0x00C0,     /* R66  - ADC Digital Volume L */
+	0x00C0,     /* R67  - ADC Digital Volume R */
+	0x0000,     /* R68  - ADC Divider */
+	0x0000,     /* R69 */
+	0x0040,     /* R70  - ADC LR Rate */
+	0x0000,     /* R71 */
+	0x0303,     /* R72  - Input Control */
+	0x0000,     /* R73  - IN3 Input Control */
+	0x0000,     /* R74  - Mic Bias Control */
+	0x0000,     /* R75 */
+	0x0000,     /* R76  - Output Control */
+	0x0000,     /* R77  - Jack Detect */
+	0x0000,     /* R78  - Anti Pop Control */
+	0x0000,     /* R79 */
+	0x0040,     /* R80  - Left Input Volume */
+	0x0040,     /* R81  - Right Input Volume */
+	0x0000,     /* R82 */
+	0x0000,     /* R83 */
+	0x0000,     /* R84 */
+	0x0000,     /* R85 */
+	0x0000,     /* R86 */
+	0x0000,     /* R87 */
+	0x0800,     /* R88  - Left Mixer Control */
+	0x1000,     /* R89  - Right Mixer Control */
+	0x0000,     /* R90 */
+	0x0000,     /* R91 */
+	0x0000,     /* R92  - OUT3 Mixer Control */
+	0x0000,     /* R93  - OUT4 Mixer Control */
+	0x0000,     /* R94 */
+	0x0000,     /* R95 */
+	0x0000,     /* R96  - Output Left Mixer Volume */
+	0x0000,     /* R97  - Output Right Mixer Volume */
+	0x0000,     /* R98  - Input Mixer Volume L */
+	0x0000,     /* R99  - Input Mixer Volume R */
+	0x0000,     /* R100 - Input Mixer Volume */
+	0x0000,     /* R101 */
+	0x0000,     /* R102 */
+	0x0000,     /* R103 */
+	0x00E4,     /* R104 - OUT1L Volume */
+	0x00E4,     /* R105 - OUT1R Volume */
+	0x00E4,     /* R106 - OUT2L Volume */
+	0x02E4,     /* R107 - OUT2R Volume */
+	0x0000,     /* R108 */
+	0x0000,     /* R109 */
+	0x0000,     /* R110 */
+	0x0000,     /* R111 - BEEP Volume */
+	0x0A00,     /* R112 - AI Formating */
+	0x0000,     /* R113 - ADC DAC COMP */
+	0x0020,     /* R114 - AI ADC Control */
+	0x0020,     /* R115 - AI DAC Control */
+	0x0000,     /* R116 */
+	0x0000,     /* R117 */
+	0x0000,     /* R118 */
+	0x0000,     /* R119 */
+	0x0000,     /* R120 */
+	0x0000,     /* R121 */
+	0x0000,     /* R122 */
+	0x0000,     /* R123 */
+	0x0000,     /* R124 */
+	0x0000,     /* R125 */
+	0x0000,     /* R126 */
+	0x0000,     /* R127 */
+	0x1FFF,     /* R128 - GPIO Debounce */
+	0x0000,     /* R129 - GPIO Pin pull up Control */
+	0x0110,     /* R130 - GPIO Pull down Control */
+	0x0000,     /* R131 - GPIO Interrupt Mode */
+	0x0000,     /* R132 */
+	0x0000,     /* R133 - GPIO Control */
+	0x09DA,     /* R134 - GPIO Configuration (i/o) */
+	0x0DD6,     /* R135 - GPIO Pin Polarity / Type */
+	0x0000,     /* R136 */
+	0x0000,     /* R137 */
+	0x0000,     /* R138 */
+	0x0000,     /* R139 */
+	0x1310,     /* R140 - GPIO Function Select 1 */
+	0x0033,     /* R141 - GPIO Function Select 2 */
+	0x2000,     /* R142 - GPIO Function Select 3 */
+	0x0000,     /* R143 - GPIO Function Select 4 */
+	0x0000,     /* R144 - Digitiser Control (1) */
+	0x0002,     /* R145 - Digitiser Control (2) */
+	0x0000,     /* R146 */
+	0x0000,     /* R147 */
+	0x0000,     /* R148 */
+	0x0000,     /* R149 */
+	0x0000,     /* R150 */
+	0x0000,     /* R151 */
+	0x7000,     /* R152 - AUX1 Readback */
+	0x7000,     /* R153 - AUX2 Readback */
+	0x7000,     /* R154 - AUX3 Readback */
+	0x7000,     /* R155 - AUX4 Readback */
+	0x0000,     /* R156 - USB Voltage Readback */
+	0x0000,     /* R157 - LINE Voltage Readback */
+	0x0000,     /* R158 - BATT Voltage Readback */
+	0x0000,     /* R159 - Chip Temp Readback */
+	0x0000,     /* R160 */
+	0x0000,     /* R161 */
+	0x0000,     /* R162 */
+	0x0000,     /* R163 - Generic Comparator Control */
+	0x0000,     /* R164 - Generic comparator 1 */
+	0x0000,     /* R165 - Generic comparator 2 */
+	0x0000,     /* R166 - Generic comparator 3 */
+	0x0000,     /* R167 - Generic comparator 4 */
+	0xA00F,     /* R168 - Battery Charger Control 1 */
+	0x0B06,     /* R169 - Battery Charger Control 2 */
+	0x0000,     /* R170 - Battery Charger Control 3 */
+	0x0000,     /* R171 */
+	0x0000,     /* R172 - Current Sink Driver A */
+	0x0000,     /* R173 - CSA Flash control */
+	0x0000,     /* R174 - Current Sink Driver B */
+	0x0000,     /* R175 - CSB Flash control */
+	0x0000,     /* R176 - DCDC/LDO requested */
+	0x032D,     /* R177 - DCDC Active options */
+	0x0000,     /* R178 - DCDC Sleep options */
+	0x0025,     /* R179 - Power-check comparator */
+	0x000E,     /* R180 - DCDC1 Control */
+	0x0800,     /* R181 - DCDC1 Timeouts */
+	0x1006,     /* R182 - DCDC1 Low Power */
+	0x0018,     /* R183 - DCDC2 Control */
+	0x0000,     /* R184 - DCDC2 Timeouts */
+	0x0000,     /* R185 */
+	0x0056,     /* R186 - DCDC3 Control */
+	0x1800,     /* R187 - DCDC3 Timeouts */
+	0x0006,     /* R188 - DCDC3 Low Power */
+	0x000E,     /* R189 - DCDC4 Control */
+	0x1000,     /* R190 - DCDC4 Timeouts */
+	0x0006,     /* R191 - DCDC4 Low Power */
+	0x0008,     /* R192 - DCDC5 Control */
+	0x0000,     /* R193 - DCDC5 Timeouts */
+	0x0000,     /* R194 */
+	0x0026,     /* R195 - DCDC6 Control */
+	0x0C00,     /* R196 - DCDC6 Timeouts */
+	0x0006,     /* R197 - DCDC6 Low Power */
+	0x0000,     /* R198 */
+	0x0003,     /* R199 - Limit Switch Control */
+	0x001C,     /* R200 - LDO1 Control */
+	0x0000,     /* R201 - LDO1 Timeouts */
+	0x001C,     /* R202 - LDO1 Low Power */
+	0x0006,     /* R203 - LDO2 Control */
+	0x0400,     /* R204 - LDO2 Timeouts */
+	0x001C,     /* R205 - LDO2 Low Power */
+	0x001C,     /* R206 - LDO3 Control */
+	0x1400,     /* R207 - LDO3 Timeouts */
+	0x001C,     /* R208 - LDO3 Low Power */
+	0x001A,     /* R209 - LDO4 Control */
+	0x0000,     /* R210 - LDO4 Timeouts */
+	0x001C,     /* R211 - LDO4 Low Power */
+	0x0000,     /* R212 */
+	0x0000,     /* R213 */
+	0x0000,     /* R214 */
+	0x0000,     /* R215 - VCC_FAULT Masks */
+	0x001F,     /* R216 - Main Bandgap Control */
+	0x0000,     /* R217 - OSC Control */
+	0x9000,     /* R218 - RTC Tick Control */
+	0x0000,     /* R219 - Security1 */
+	0x4000,     /* R220 */
+	0x0000,     /* R221 */
+	0x0000,     /* R222 */
+	0x0000,     /* R223 */
+	0x0000,     /* R224 - Signal overrides */
+	0x0000,     /* R225 - DCDC/LDO status */
+	0x0000,     /* R226 - Charger Overides/status */
+	0x0000,     /* R227 - misc overrides */
+	0x0000,     /* R228 - Supply overrides/status 1 */
+	0x0000,     /* R229 - Supply overrides/status 2 */
+	0xE000,     /* R230 - GPIO Pin Status */
+	0x0000,     /* R231 - comparotor overrides */
+	0x0000,     /* R232 */
+	0x0000,     /* R233 - State Machine status */
+	0x1200,     /* R234 */
+	0x0000,     /* R235 */
+	0x8000,     /* R236 */
+	0x0000,     /* R237 */
+	0x0000,     /* R238 */
+	0x0000,     /* R239 */
+	0x0003,     /* R240 */
+	0x0000,     /* R241 */
+	0x0000,     /* R242 */
+	0x0004,     /* R243 */
+	0x0300,     /* R244 */
+	0x0000,     /* R245 */
+	0x0200,     /* R246 */
+	0x0000,     /* R247 */
+	0x1000,     /* R248 - DCDC1 Test Controls */
+	0x5000,     /* R249 */
+	0x1000,     /* R250 - DCDC3 Test Controls */
+	0x1000,     /* R251 - DCDC4 Test Controls */
+	0x5100,     /* R252 */
+	0x1000,     /* R253 - DCDC6 Test Controls */
+};
+#endif
+
+#ifdef CONFIG_MFD_WM8352_CONFIG_MODE_3
+
+#undef WM8350_HAVE_CONFIG_MODE
+#define WM8350_HAVE_CONFIG_MODE
+
+const u16 wm8352_mode3_defaults[] = {
+	0x6143,     /* R0   - Reset/ID */
+	0x0000,     /* R1   - ID */
+	0x0002,     /* R2   - Revision */
+	0x1C02,     /* R3   - System Control 1 */
+	0x0204,     /* R4   - System Control 2 */
+	0x0000,     /* R5   - System Hibernate */
+	0x8A00,     /* R6   - Interface Control */
+	0x0000,     /* R7 */
+	0x8000,     /* R8   - Power mgmt (1) */
+	0x0000,     /* R9   - Power mgmt (2) */
+	0x0000,     /* R10  - Power mgmt (3) */
+	0x2000,     /* R11  - Power mgmt (4) */
+	0x0E00,     /* R12  - Power mgmt (5) */
+	0x0000,     /* R13  - Power mgmt (6) */
+	0x0000,     /* R14  - Power mgmt (7) */
+	0x0000,     /* R15 */
+	0x0000,     /* R16  - RTC Seconds/Minutes */
+	0x0100,     /* R17  - RTC Hours/Day */
+	0x0101,     /* R18  - RTC Date/Month */
+	0x1400,     /* R19  - RTC Year */
+	0x0000,     /* R20  - Alarm Seconds/Minutes */
+	0x0000,     /* R21  - Alarm Hours/Day */
+	0x0000,     /* R22  - Alarm Date/Month */
+	0x0320,     /* R23  - RTC Time Control */
+	0x0000,     /* R24  - System Interrupts */
+	0x0000,     /* R25  - Interrupt Status 1 */
+	0x0000,     /* R26  - Interrupt Status 2 */
+	0x0000,     /* R27 */
+	0x0000,     /* R28  - Under Voltage Interrupt status */
+	0x0000,     /* R29  - Over Current Interrupt status */
+	0x0000,     /* R30  - GPIO Interrupt Status */
+	0x0000,     /* R31  - Comparator Interrupt Status */
+	0x3FFF,     /* R32  - System Interrupts Mask */
+	0x0000,     /* R33  - Interrupt Status 1 Mask */
+	0x0000,     /* R34  - Interrupt Status 2 Mask */
+	0x0000,     /* R35 */
+	0x0000,     /* R36  - Under Voltage Interrupt status Mask */
+	0x0000,     /* R37  - Over Current Interrupt status Mask */
+	0x0000,     /* R38  - GPIO Interrupt Status Mask */
+	0x0000,     /* R39  - Comparator Interrupt Status Mask */
+	0x0040,     /* R40  - Clock Control 1 */
+	0x0000,     /* R41  - Clock Control 2 */
+	0x3A00,     /* R42  - FLL Control 1 */
+	0x7086,     /* R43  - FLL Control 2 */
+	0xC226,     /* R44  - FLL Control 3 */
+	0x0000,     /* R45  - FLL Control 4 */
+	0x0000,     /* R46 */
+	0x0000,     /* R47 */
+	0x0000,     /* R48  - DAC Control */
+	0x0000,     /* R49 */
+	0x00C0,     /* R50  - DAC Digital Volume L */
+	0x00C0,     /* R51  - DAC Digital Volume R */
+	0x0000,     /* R52 */
+	0x0040,     /* R53  - DAC LR Rate */
+	0x0000,     /* R54  - DAC Clock Control */
+	0x0000,     /* R55 */
+	0x0000,     /* R56 */
+	0x0000,     /* R57 */
+	0x4000,     /* R58  - DAC Mute */
+	0x0000,     /* R59  - DAC Mute Volume */
+	0x0000,     /* R60  - DAC Side */
+	0x0000,     /* R61 */
+	0x0000,     /* R62 */
+	0x0000,     /* R63 */
+	0x8000,     /* R64  - ADC Control */
+	0x0000,     /* R65 */
+	0x00C0,     /* R66  - ADC Digital Volume L */
+	0x00C0,     /* R67  - ADC Digital Volume R */
+	0x0000,     /* R68  - ADC Divider */
+	0x0000,     /* R69 */
+	0x0040,     /* R70  - ADC LR Rate */
+	0x0000,     /* R71 */
+	0x0303,     /* R72  - Input Control */
+	0x0000,     /* R73  - IN3 Input Control */
+	0x0000,     /* R74  - Mic Bias Control */
+	0x0000,     /* R75 */
+	0x0000,     /* R76  - Output Control */
+	0x0000,     /* R77  - Jack Detect */
+	0x0000,     /* R78  - Anti Pop Control */
+	0x0000,     /* R79 */
+	0x0040,     /* R80  - Left Input Volume */
+	0x0040,     /* R81  - Right Input Volume */
+	0x0000,     /* R82 */
+	0x0000,     /* R83 */
+	0x0000,     /* R84 */
+	0x0000,     /* R85 */
+	0x0000,     /* R86 */
+	0x0000,     /* R87 */
+	0x0800,     /* R88  - Left Mixer Control */
+	0x1000,     /* R89  - Right Mixer Control */
+	0x0000,     /* R90 */
+	0x0000,     /* R91 */
+	0x0000,     /* R92  - OUT3 Mixer Control */
+	0x0000,     /* R93  - OUT4 Mixer Control */
+	0x0000,     /* R94 */
+	0x0000,     /* R95 */
+	0x0000,     /* R96  - Output Left Mixer Volume */
+	0x0000,     /* R97  - Output Right Mixer Volume */
+	0x0000,     /* R98  - Input Mixer Volume L */
+	0x0000,     /* R99  - Input Mixer Volume R */
+	0x0000,     /* R100 - Input Mixer Volume */
+	0x0000,     /* R101 */
+	0x0000,     /* R102 */
+	0x0000,     /* R103 */
+	0x00E4,     /* R104 - OUT1L Volume */
+	0x00E4,     /* R105 - OUT1R Volume */
+	0x00E4,     /* R106 - OUT2L Volume */
+	0x02E4,     /* R107 - OUT2R Volume */
+	0x0000,     /* R108 */
+	0x0000,     /* R109 */
+	0x0000,     /* R110 */
+	0x0000,     /* R111 - BEEP Volume */
+	0x0A00,     /* R112 - AI Formating */
+	0x0000,     /* R113 - ADC DAC COMP */
+	0x0020,     /* R114 - AI ADC Control */
+	0x0020,     /* R115 - AI DAC Control */
+	0x0000,     /* R116 */
+	0x0000,     /* R117 */
+	0x0000,     /* R118 */
+	0x0000,     /* R119 */
+	0x0000,     /* R120 */
+	0x0000,     /* R121 */
+	0x0000,     /* R122 */
+	0x0000,     /* R123 */
+	0x0000,     /* R124 */
+	0x0000,     /* R125 */
+	0x0000,     /* R126 */
+	0x0000,     /* R127 */
+	0x1FFF,     /* R128 - GPIO Debounce */
+	0x0010,     /* R129 - GPIO Pin pull up Control */
+	0x0000,     /* R130 - GPIO Pull down Control */
+	0x0000,     /* R131 - GPIO Interrupt Mode */
+	0x0000,     /* R132 */
+	0x0000,     /* R133 - GPIO Control */
+	0x0BFB,     /* R134 - GPIO Configuration (i/o) */
+	0x0FFD,     /* R135 - GPIO Pin Polarity / Type */
+	0x0000,     /* R136 */
+	0x0000,     /* R137 */
+	0x0000,     /* R138 */
+	0x0000,     /* R139 */
+	0x0310,     /* R140 - GPIO Function Select 1 */
+	0x0001,     /* R141 - GPIO Function Select 2 */
+	0x2300,     /* R142 - GPIO Function Select 3 */
+	0x0003,     /* R143 - GPIO Function Select 4 */
+	0x0000,     /* R144 - Digitiser Control (1) */
+	0x0002,     /* R145 - Digitiser Control (2) */
+	0x0000,     /* R146 */
+	0x0000,     /* R147 */
+	0x0000,     /* R148 */
+	0x0000,     /* R149 */
+	0x0000,     /* R150 */
+	0x0000,     /* R151 */
+	0x7000,     /* R152 - AUX1 Readback */
+	0x7000,     /* R153 - AUX2 Readback */
+	0x7000,     /* R154 - AUX3 Readback */
+	0x7000,     /* R155 - AUX4 Readback */
+	0x0000,     /* R156 - USB Voltage Readback */
+	0x0000,     /* R157 - LINE Voltage Readback */
+	0x0000,     /* R158 - BATT Voltage Readback */
+	0x0000,     /* R159 - Chip Temp Readback */
+	0x0000,     /* R160 */
+	0x0000,     /* R161 */
+	0x0000,     /* R162 */
+	0x0000,     /* R163 - Generic Comparator Control */
+	0x0000,     /* R164 - Generic comparator 1 */
+	0x0000,     /* R165 - Generic comparator 2 */
+	0x0000,     /* R166 - Generic comparator 3 */
+	0x0000,     /* R167 - Generic comparator 4 */
+	0xA00F,     /* R168 - Battery Charger Control 1 */
+	0x0B06,     /* R169 - Battery Charger Control 2 */
+	0x0000,     /* R170 - Battery Charger Control 3 */
+	0x0000,     /* R171 */
+	0x0000,     /* R172 - Current Sink Driver A */
+	0x0000,     /* R173 - CSA Flash control */
+	0x0000,     /* R174 - Current Sink Driver B */
+	0x0000,     /* R175 - CSB Flash control */
+	0x0000,     /* R176 - DCDC/LDO requested */
+	0x032D,     /* R177 - DCDC Active options */
+	0x0000,     /* R178 - DCDC Sleep options */
+	0x0025,     /* R179 - Power-check comparator */
+	0x0006,     /* R180 - DCDC1 Control */
+	0x0400,     /* R181 - DCDC1 Timeouts */
+	0x1006,     /* R182 - DCDC1 Low Power */
+	0x0018,     /* R183 - DCDC2 Control */
+	0x0000,     /* R184 - DCDC2 Timeouts */
+	0x0000,     /* R185 */
+	0x0050,     /* R186 - DCDC3 Control */
+	0x0C00,     /* R187 - DCDC3 Timeouts */
+	0x0006,     /* R188 - DCDC3 Low Power */
+	0x000E,     /* R189 - DCDC4 Control */
+	0x0400,     /* R190 - DCDC4 Timeouts */
+	0x0006,     /* R191 - DCDC4 Low Power */
+	0x0008,     /* R192 - DCDC5 Control */
+	0x0000,     /* R193 - DCDC5 Timeouts */
+	0x0000,     /* R194 */
+	0x0029,     /* R195 - DCDC6 Control */
+	0x0800,     /* R196 - DCDC6 Timeouts */
+	0x0006,     /* R197 - DCDC6 Low Power */
+	0x0000,     /* R198 */
+	0x0003,     /* R199 - Limit Switch Control */
+	0x001D,     /* R200 - LDO1 Control */
+	0x1000,     /* R201 - LDO1 Timeouts */
+	0x001C,     /* R202 - LDO1 Low Power */
+	0x0017,     /* R203 - LDO2 Control */
+	0x1000,     /* R204 - LDO2 Timeouts */
+	0x001C,     /* R205 - LDO2 Low Power */
+	0x0006,     /* R206 - LDO3 Control */
+	0x1000,     /* R207 - LDO3 Timeouts */
+	0x001C,     /* R208 - LDO3 Low Power */
+	0x0010,     /* R209 - LDO4 Control */
+	0x1000,     /* R210 - LDO4 Timeouts */
+	0x001C,     /* R211 - LDO4 Low Power */
+	0x0000,     /* R212 */
+	0x0000,     /* R213 */
+	0x0000,     /* R214 */
+	0x0000,     /* R215 - VCC_FAULT Masks */
+	0x001F,     /* R216 - Main Bandgap Control */
+	0x0000,     /* R217 - OSC Control */
+	0x9000,     /* R218 - RTC Tick Control */
+	0x0000,     /* R219 - Security1 */
+	0x4000,     /* R220 */
+	0x0000,     /* R221 */
+	0x0000,     /* R222 */
+	0x0000,     /* R223 */
+	0x0000,     /* R224 - Signal overrides */
+	0x0000,     /* R225 - DCDC/LDO status */
+	0x0000,     /* R226 - Charger Overides/status */
+	0x0000,     /* R227 - misc overrides */
+	0x0000,     /* R228 - Supply overrides/status 1 */
+	0x0000,     /* R229 - Supply overrides/status 2 */
+	0xE000,     /* R230 - GPIO Pin Status */
+	0x0000,     /* R231 - comparotor overrides */
+	0x0000,     /* R232 */
+	0x0000,     /* R233 - State Machine status */
+	0x1200,     /* R234 */
+	0x0000,     /* R235 */
+	0x8000,     /* R236 */
+	0x0000,     /* R237 */
+	0x0000,     /* R238 */
+	0x0000,     /* R239 */
+	0x0003,     /* R240 */
+	0x0000,     /* R241 */
+	0x0000,     /* R242 */
+	0x0004,     /* R243 */
+	0x0300,     /* R244 */
+	0x0000,     /* R245 */
+	0x0200,     /* R246 */
+	0x0000,     /* R247 */
+	0x1000,     /* R248 - DCDC1 Test Controls */
+	0x5000,     /* R249 */
+	0x1000,     /* R250 - DCDC3 Test Controls */
+	0x1000,     /* R251 - DCDC4 Test Controls */
+	0x5100,     /* R252 */
+	0x1000,     /* R253 - DCDC6 Test Controls */
+};
+#endif
+
 /* The register defaults for the config mode used must be compiled in but
  * due to the impact on kernel size it is possible to disable
  */
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index afeff6f1316c..737579086d01 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -589,6 +589,10 @@ extern const u16 wm8350_mode0_defaults[];
 extern const u16 wm8350_mode1_defaults[];
 extern const u16 wm8350_mode2_defaults[];
 extern const u16 wm8350_mode3_defaults[];
+extern const u16 wm8352_mode0_defaults[];
+extern const u16 wm8352_mode1_defaults[];
+extern const u16 wm8352_mode2_defaults[];
+extern const u16 wm8352_mode3_defaults[];
 
 struct wm8350;
 
-- 
cgit v1.2.3


From 645524a9c6e1e42dc4fe03217befb20e2fc4d43e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 18 Dec 2008 23:12:16 +0100
Subject: mfd: Support configurable numbers of DCDCs and ISINKs on WM8350

Some WM8350 variants have fewer DCDCs and ISINKs. Identify these at
probe and refuse to use the absent DCDCs when running on these chips.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/wm8350-core.c            | 6 ++++++
 drivers/regulator/wm8350-regulator.c | 7 +++++++
 include/linux/mfd/wm8350/pmic.h      | 4 ++++
 3 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 03af3b12c020..56c363c240a9 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1301,6 +1301,9 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 
 	switch (mask_rev) {
 	case 0:
+		wm8350->pmic.max_dcdc = WM8350_DCDC_6;
+		wm8350->pmic.max_isink = WM8350_ISINK_B;
+
 		switch (chip_rev) {
 		case WM8350_REV_E:
 			dev_info(wm8350->dev, "WM8350 Rev E\n");
@@ -1325,6 +1328,9 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 		break;
 
 	case 2:
+		wm8350->pmic.max_dcdc = WM8350_DCDC_6;
+		wm8350->pmic.max_isink = WM8350_ISINK_B;
+
 		switch (chip_rev) {
 		case 0:
 			dev_info(wm8350->dev, "WM8352 Rev A\n");
diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c
index 1f44b17e23b1..c68c496b2c49 100644
--- a/drivers/regulator/wm8350-regulator.c
+++ b/drivers/regulator/wm8350-regulator.c
@@ -1380,6 +1380,13 @@ int wm8350_register_regulator(struct wm8350 *wm8350, int reg,
 	if (wm8350->pmic.pdev[reg])
 		return -EBUSY;
 
+	if (reg >= WM8350_DCDC_1 && reg <= WM8350_DCDC_6 &&
+	    reg > wm8350->pmic.max_dcdc)
+		return -ENODEV;
+	if (reg >= WM8350_ISINK_A && reg <= WM8350_ISINK_B &&
+	    reg > wm8350->pmic.max_isink)
+		return -ENODEV;
+
 	pdev = platform_device_alloc("wm8350-regulator", reg);
 	if (!pdev)
 		return -ENOMEM;
diff --git a/include/linux/mfd/wm8350/pmic.h b/include/linux/mfd/wm8350/pmic.h
index 69b69e07f62f..96acbfc8aa12 100644
--- a/include/linux/mfd/wm8350/pmic.h
+++ b/include/linux/mfd/wm8350/pmic.h
@@ -701,6 +701,10 @@ struct platform_device;
 struct regulator_init_data;
 
 struct wm8350_pmic {
+	/* Number of regulators of each type on this device */
+	int max_dcdc;
+	int max_isink;
+
 	/* ISINK to DCDC mapping */
 	int isink_A_dcdc;
 	int isink_B_dcdc;
-- 
cgit v1.2.3


From ca23f8c1b0aa15dc69565244fc5dffa67a72dd02 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 18 Dec 2008 23:12:28 +0100
Subject: mfd: Add WM8351 support

The WM8351 is a WM8350 variant. As well as register default changes the
WM8351 has fewer voltage and current regulators than the WM8350.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/Kconfig             |   16 +
 drivers/mfd/wm8350-core.c       |   47 ++
 drivers/mfd/wm8350-i2c.c        |    1 +
 drivers/mfd/wm8350-regmap.c     | 1044 +++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm8350/core.h |    4 +
 5 files changed, 1112 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 76a482dfc177..781a27955ff5 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -143,6 +143,22 @@ config MFD_WM8350_CONFIG_MODE_3
 	bool
 	depends on MFD_WM8350
 
+config MFD_WM8351_CONFIG_MODE_0
+	bool
+	depends on MFD_WM8350
+
+config MFD_WM8351_CONFIG_MODE_1
+	bool
+	depends on MFD_WM8350
+
+config MFD_WM8351_CONFIG_MODE_2
+	bool
+	depends on MFD_WM8350
+
+config MFD_WM8351_CONFIG_MODE_3
+	bool
+	depends on MFD_WM8350
+
 config MFD_WM8352_CONFIG_MODE_0
 	bool
 	depends on MFD_WM8350
diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 56c363c240a9..e03fe60b55bf 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1169,6 +1169,36 @@ static int wm8350_create_cache(struct wm8350 *wm8350, int type, int mode)
 			return -EINVAL;
 		}
 
+	case 1:
+		switch (mode) {
+#ifdef CONFIG_MFD_WM8351_CONFIG_MODE_0
+		case 0:
+			reg_map = wm8351_mode0_defaults;
+			break;
+#endif
+#ifdef CONFIG_MFD_WM8351_CONFIG_MODE_1
+		case 1:
+			reg_map = wm8351_mode1_defaults;
+			break;
+#endif
+#ifdef CONFIG_MFD_WM8351_CONFIG_MODE_2
+		case 2:
+			reg_map = wm8351_mode2_defaults;
+			break;
+#endif
+#ifdef CONFIG_MFD_WM8351_CONFIG_MODE_3
+		case 3:
+			reg_map = wm8351_mode3_defaults;
+			break;
+#endif
+		default:
+			dev_err(wm8350->dev,
+				"WM8351 configuration mode %d not supported\n",
+				mode);
+			return -EINVAL;
+		}
+		break;
+
 	case 2:
 		switch (mode) {
 #ifdef CONFIG_MFD_WM8352_CONFIG_MODE_0
@@ -1327,6 +1357,23 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 		}
 		break;
 
+	case 1:
+		wm8350->pmic.max_dcdc = WM8350_DCDC_4;
+		wm8350->pmic.max_isink = WM8350_ISINK_A;
+
+		switch (chip_rev) {
+		case 0:
+			dev_info(wm8350->dev, "WM8351 Rev A\n");
+			wm8350->power.rev_g_coeff = 1;
+			break;
+
+		default:
+			dev_err(wm8350->dev, "Unknown WM8351 CHIP_REV\n");
+			ret = -ENODEV;
+			goto err;
+		}
+		break;
+
 	case 2:
 		wm8350->pmic.max_dcdc = WM8350_DCDC_6;
 		wm8350->pmic.max_isink = WM8350_ISINK_B;
diff --git a/drivers/mfd/wm8350-i2c.c b/drivers/mfd/wm8350-i2c.c
index 878051253084..8d8c93217572 100644
--- a/drivers/mfd/wm8350-i2c.c
+++ b/drivers/mfd/wm8350-i2c.c
@@ -97,6 +97,7 @@ static int wm8350_i2c_remove(struct i2c_client *i2c)
 
 static const struct i2c_device_id wm8350_i2c_id[] = {
        { "wm8350", 0 },
+       { "wm8351", 0 },
        { "wm8352", 0 },
        { }
 };
diff --git a/drivers/mfd/wm8350-regmap.c b/drivers/mfd/wm8350-regmap.c
index 3e2cc37961f3..68887b817d17 100644
--- a/drivers/mfd/wm8350-regmap.c
+++ b/drivers/mfd/wm8350-regmap.c
@@ -1074,6 +1074,1050 @@ const u16 wm8350_mode3_defaults[] = {
 };
 #endif
 
+#ifdef CONFIG_MFD_WM8351_CONFIG_MODE_0
+
+#undef WM8350_HAVE_CONFIG_MODE
+#define WM8350_HAVE_CONFIG_MODE
+
+const u16 wm8351_mode0_defaults[] = {
+	0x6143,     /* R0   - Reset/ID */
+	0x0000,     /* R1   - ID */
+	0x0001,     /* R2   - Revision */
+	0x1C02,     /* R3   - System Control 1 */
+	0x0004,     /* R4   - System Control 2 */
+	0x0000,     /* R5   - System Hibernate */
+	0x8A00,     /* R6   - Interface Control */
+	0x0000,     /* R7 */
+	0x8000,     /* R8   - Power mgmt (1) */
+	0x0000,     /* R9   - Power mgmt (2) */
+	0x0000,     /* R10  - Power mgmt (3) */
+	0x2000,     /* R11  - Power mgmt (4) */
+	0x0E00,     /* R12  - Power mgmt (5) */
+	0x0000,     /* R13  - Power mgmt (6) */
+	0x0000,     /* R14  - Power mgmt (7) */
+	0x0000,     /* R15 */
+	0x0000,     /* R16  - RTC Seconds/Minutes */
+	0x0100,     /* R17  - RTC Hours/Day */
+	0x0101,     /* R18  - RTC Date/Month */
+	0x1400,     /* R19  - RTC Year */
+	0x0000,     /* R20  - Alarm Seconds/Minutes */
+	0x0000,     /* R21  - Alarm Hours/Day */
+	0x0000,     /* R22  - Alarm Date/Month */
+	0x0320,     /* R23  - RTC Time Control */
+	0x0000,     /* R24  - System Interrupts */
+	0x0000,     /* R25  - Interrupt Status 1 */
+	0x0000,     /* R26  - Interrupt Status 2 */
+	0x0000,     /* R27 */
+	0x0000,     /* R28  - Under Voltage Interrupt status */
+	0x0000,     /* R29  - Over Current Interrupt status */
+	0x0000,     /* R30  - GPIO Interrupt Status */
+	0x0000,     /* R31  - Comparator Interrupt Status */
+	0x3FFF,     /* R32  - System Interrupts Mask */
+	0x0000,     /* R33  - Interrupt Status 1 Mask */
+	0x0000,     /* R34  - Interrupt Status 2 Mask */
+	0x0000,     /* R35 */
+	0x0000,     /* R36  - Under Voltage Interrupt status Mask */
+	0x0000,     /* R37  - Over Current Interrupt status Mask */
+	0x0000,     /* R38  - GPIO Interrupt Status Mask */
+	0x0000,     /* R39  - Comparator Interrupt Status Mask */
+	0x0040,     /* R40  - Clock Control 1 */
+	0x0000,     /* R41  - Clock Control 2 */
+	0x3A00,     /* R42  - FLL Control 1 */
+	0x7086,     /* R43  - FLL Control 2 */
+	0xC226,     /* R44  - FLL Control 3 */
+	0x0000,     /* R45  - FLL Control 4 */
+	0x0000,     /* R46 */
+	0x0000,     /* R47 */
+	0x0000,     /* R48  - DAC Control */
+	0x0000,     /* R49 */
+	0x00C0,     /* R50  - DAC Digital Volume L */
+	0x00C0,     /* R51  - DAC Digital Volume R */
+	0x0000,     /* R52 */
+	0x0040,     /* R53  - DAC LR Rate */
+	0x0000,     /* R54  - DAC Clock Control */
+	0x0000,     /* R55 */
+	0x0000,     /* R56 */
+	0x0000,     /* R57 */
+	0x4000,     /* R58  - DAC Mute */
+	0x0000,     /* R59  - DAC Mute Volume */
+	0x0000,     /* R60  - DAC Side */
+	0x0000,     /* R61 */
+	0x0000,     /* R62 */
+	0x0000,     /* R63 */
+	0x8000,     /* R64  - ADC Control */
+	0x0000,     /* R65 */
+	0x00C0,     /* R66  - ADC Digital Volume L */
+	0x00C0,     /* R67  - ADC Digital Volume R */
+	0x0000,     /* R68  - ADC Divider */
+	0x0000,     /* R69 */
+	0x0040,     /* R70  - ADC LR Rate */
+	0x0000,     /* R71 */
+	0x0303,     /* R72  - Input Control */
+	0x0000,     /* R73  - IN3 Input Control */
+	0x0000,     /* R74  - Mic Bias Control */
+	0x0000,     /* R75 */
+	0x0000,     /* R76  - Output Control */
+	0x0000,     /* R77  - Jack Detect */
+	0x0000,     /* R78  - Anti Pop Control */
+	0x0000,     /* R79 */
+	0x0040,     /* R80  - Left Input Volume */
+	0x0040,     /* R81  - Right Input Volume */
+	0x0000,     /* R82 */
+	0x0000,     /* R83 */
+	0x0000,     /* R84 */
+	0x0000,     /* R85 */
+	0x0000,     /* R86 */
+	0x0000,     /* R87 */
+	0x0800,     /* R88  - Left Mixer Control */
+	0x1000,     /* R89  - Right Mixer Control */
+	0x0000,     /* R90 */
+	0x0000,     /* R91 */
+	0x0000,     /* R92  - OUT3 Mixer Control */
+	0x0000,     /* R93  - OUT4 Mixer Control */
+	0x0000,     /* R94 */
+	0x0000,     /* R95 */
+	0x0000,     /* R96  - Output Left Mixer Volume */
+	0x0000,     /* R97  - Output Right Mixer Volume */
+	0x0000,     /* R98  - Input Mixer Volume L */
+	0x0000,     /* R99  - Input Mixer Volume R */
+	0x0000,     /* R100 - Input Mixer Volume */
+	0x0000,     /* R101 */
+	0x0000,     /* R102 */
+	0x0000,     /* R103 */
+	0x00E4,     /* R104 - OUT1L Volume */
+	0x00E4,     /* R105 - OUT1R Volume */
+	0x00E4,     /* R106 - OUT2L Volume */
+	0x02E4,     /* R107 - OUT2R Volume */
+	0x0000,     /* R108 */
+	0x0000,     /* R109 */
+	0x0000,     /* R110 */
+	0x0000,     /* R111 - BEEP Volume */
+	0x0A00,     /* R112 - AI Formating */
+	0x0000,     /* R113 - ADC DAC COMP */
+	0x0020,     /* R114 - AI ADC Control */
+	0x0020,     /* R115 - AI DAC Control */
+	0x0000,     /* R116 */
+	0x0000,     /* R117 */
+	0x0000,     /* R118 */
+	0x0000,     /* R119 */
+	0x0000,     /* R120 */
+	0x0000,     /* R121 */
+	0x0000,     /* R122 */
+	0x0000,     /* R123 */
+	0x0000,     /* R124 */
+	0x0000,     /* R125 */
+	0x0000,     /* R126 */
+	0x0000,     /* R127 */
+	0x1FFF,     /* R128 - GPIO Debounce */
+	0x0000,     /* R129 - GPIO Pin pull up Control */
+	0x0000,     /* R130 - GPIO Pull down Control */
+	0x0000,     /* R131 - GPIO Interrupt Mode */
+	0x0000,     /* R132 */
+	0x0000,     /* R133 - GPIO Control */
+	0x0FFC,     /* R134 - GPIO Configuration (i/o) */
+	0x0FFC,     /* R135 - GPIO Pin Polarity / Type */
+	0x0000,     /* R136 */
+	0x0000,     /* R137 */
+	0x0000,     /* R138 */
+	0x0000,     /* R139 */
+	0x0013,     /* R140 - GPIO Function Select 1 */
+	0x0000,     /* R141 - GPIO Function Select 2 */
+	0x0000,     /* R142 - GPIO Function Select 3 */
+	0x0003,     /* R143 - GPIO Function Select 4 */
+	0x0000,     /* R144 - Digitiser Control (1) */
+	0x0002,     /* R145 - Digitiser Control (2) */
+	0x0000,     /* R146 */
+	0x0000,     /* R147 */
+	0x0000,     /* R148 */
+	0x0000,     /* R149 */
+	0x0000,     /* R150 */
+	0x0000,     /* R151 */
+	0x7000,     /* R152 - AUX1 Readback */
+	0x7000,     /* R153 - AUX2 Readback */
+	0x7000,     /* R154 - AUX3 Readback */
+	0x7000,     /* R155 - AUX4 Readback */
+	0x0000,     /* R156 - USB Voltage Readback */
+	0x0000,     /* R157 - LINE Voltage Readback */
+	0x0000,     /* R158 - BATT Voltage Readback */
+	0x0000,     /* R159 - Chip Temp Readback */
+	0x0000,     /* R160 */
+	0x0000,     /* R161 */
+	0x0000,     /* R162 */
+	0x0000,     /* R163 - Generic Comparator Control */
+	0x0000,     /* R164 - Generic comparator 1 */
+	0x0000,     /* R165 - Generic comparator 2 */
+	0x0000,     /* R166 - Generic comparator 3 */
+	0x0000,     /* R167 - Generic comparator 4 */
+	0xA00F,     /* R168 - Battery Charger Control 1 */
+	0x0B06,     /* R169 - Battery Charger Control 2 */
+	0x0000,     /* R170 - Battery Charger Control 3 */
+	0x0000,     /* R171 */
+	0x0000,     /* R172 - Current Sink Driver A */
+	0x0000,     /* R173 - CSA Flash control */
+	0x0000,     /* R174 */
+	0x0000,     /* R175 */
+	0x0000,     /* R176 - DCDC/LDO requested */
+	0x032D,     /* R177 - DCDC Active options */
+	0x0000,     /* R178 - DCDC Sleep options */
+	0x0025,     /* R179 - Power-check comparator */
+	0x000E,     /* R180 - DCDC1 Control */
+	0x0000,     /* R181 - DCDC1 Timeouts */
+	0x1006,     /* R182 - DCDC1 Low Power */
+	0x0018,     /* R183 - DCDC2 Control */
+	0x0000,     /* R184 - DCDC2 Timeouts */
+	0x0000,     /* R185 */
+	0x0000,     /* R186 - DCDC3 Control */
+	0x0000,     /* R187 - DCDC3 Timeouts */
+	0x0006,     /* R188 - DCDC3 Low Power */
+	0x0000,     /* R189 - DCDC4 Control */
+	0x0000,     /* R190 - DCDC4 Timeouts */
+	0x0006,     /* R191 - DCDC4 Low Power */
+	0x0008,     /* R192 */
+	0x0000,     /* R193 */
+	0x0000,     /* R194 */
+	0x0000,     /* R195 */
+	0x0000,     /* R196 */
+	0x0006,     /* R197 */
+	0x0000,     /* R198 */
+	0x0003,     /* R199 - Limit Switch Control */
+	0x001C,     /* R200 - LDO1 Control */
+	0x0000,     /* R201 - LDO1 Timeouts */
+	0x001C,     /* R202 - LDO1 Low Power */
+	0x001B,     /* R203 - LDO2 Control */
+	0x0000,     /* R204 - LDO2 Timeouts */
+	0x001C,     /* R205 - LDO2 Low Power */
+	0x001B,     /* R206 - LDO3 Control */
+	0x0000,     /* R207 - LDO3 Timeouts */
+	0x001C,     /* R208 - LDO3 Low Power */
+	0x001B,     /* R209 - LDO4 Control */
+	0x0000,     /* R210 - LDO4 Timeouts */
+	0x001C,     /* R211 - LDO4 Low Power */
+	0x0000,     /* R212 */
+	0x0000,     /* R213 */
+	0x0000,     /* R214 */
+	0x0000,     /* R215 - VCC_FAULT Masks */
+	0x001F,     /* R216 - Main Bandgap Control */
+	0x0000,     /* R217 - OSC Control */
+	0x9000,     /* R218 - RTC Tick Control */
+	0x0000,     /* R219 - Security1 */
+	0x4000,     /* R220 */
+	0x0000,     /* R221 */
+	0x0000,     /* R222 */
+	0x0000,     /* R223 */
+	0x0000,     /* R224 - Signal overrides */
+	0x0000,     /* R225 - DCDC/LDO status */
+	0x0000,     /* R226 - Charger Overides/status */
+	0x0000,     /* R227 - misc overrides */
+	0x0000,     /* R228 - Supply overrides/status 1 */
+	0x0000,     /* R229 - Supply overrides/status 2 */
+	0xE000,     /* R230 - GPIO Pin Status */
+	0x0000,     /* R231 - comparotor overrides */
+	0x0000,     /* R232 */
+	0x0000,     /* R233 - State Machine status */
+	0x1200,     /* R234 - FLL Test 1 */
+	0x0000,     /* R235 */
+	0x8000,     /* R236 */
+	0x0000,     /* R237 */
+	0x0000,     /* R238 */
+	0x0000,     /* R239 */
+	0x0003,     /* R240 */
+	0x0000,     /* R241 */
+	0x0000,     /* R242 */
+	0x0004,     /* R243 */
+	0x0300,     /* R244 */
+	0x0000,     /* R245 */
+	0x0200,     /* R246 */
+	0x0000,     /* R247 */
+	0x1000,     /* R248 - DCDC1 Test Controls */
+	0x1000,     /* R249 */
+	0x1000,     /* R250 - DCDC3 Test Controls */
+	0x1000,     /* R251 - DCDC4 Test Controls */
+};
+#endif
+
+#ifdef CONFIG_MFD_WM8351_CONFIG_MODE_1
+
+#undef WM8350_HAVE_CONFIG_MODE
+#define WM8350_HAVE_CONFIG_MODE
+
+const u16 wm8351_mode1_defaults[] = {
+	0x6143,     /* R0   - Reset/ID */
+	0x0000,     /* R1   - ID */
+	0x0001,     /* R2   - Revision */
+	0x1C02,     /* R3   - System Control 1 */
+	0x0204,     /* R4   - System Control 2 */
+	0x0000,     /* R5   - System Hibernate */
+	0x8A00,     /* R6   - Interface Control */
+	0x0000,     /* R7 */
+	0x8000,     /* R8   - Power mgmt (1) */
+	0x0000,     /* R9   - Power mgmt (2) */
+	0x0000,     /* R10  - Power mgmt (3) */
+	0x2000,     /* R11  - Power mgmt (4) */
+	0x0E00,     /* R12  - Power mgmt (5) */
+	0x0000,     /* R13  - Power mgmt (6) */
+	0x0000,     /* R14  - Power mgmt (7) */
+	0x0000,     /* R15 */
+	0x0000,     /* R16  - RTC Seconds/Minutes */
+	0x0100,     /* R17  - RTC Hours/Day */
+	0x0101,     /* R18  - RTC Date/Month */
+	0x1400,     /* R19  - RTC Year */
+	0x0000,     /* R20  - Alarm Seconds/Minutes */
+	0x0000,     /* R21  - Alarm Hours/Day */
+	0x0000,     /* R22  - Alarm Date/Month */
+	0x0320,     /* R23  - RTC Time Control */
+	0x0000,     /* R24  - System Interrupts */
+	0x0000,     /* R25  - Interrupt Status 1 */
+	0x0000,     /* R26  - Interrupt Status 2 */
+	0x0000,     /* R27 */
+	0x0000,     /* R28  - Under Voltage Interrupt status */
+	0x0000,     /* R29  - Over Current Interrupt status */
+	0x0000,     /* R30  - GPIO Interrupt Status */
+	0x0000,     /* R31  - Comparator Interrupt Status */
+	0x3FFF,     /* R32  - System Interrupts Mask */
+	0x0000,     /* R33  - Interrupt Status 1 Mask */
+	0x0000,     /* R34  - Interrupt Status 2 Mask */
+	0x0000,     /* R35 */
+	0x0000,     /* R36  - Under Voltage Interrupt status Mask */
+	0x0000,     /* R37  - Over Current Interrupt status Mask */
+	0x0000,     /* R38  - GPIO Interrupt Status Mask */
+	0x0000,     /* R39  - Comparator Interrupt Status Mask */
+	0x0040,     /* R40  - Clock Control 1 */
+	0x0000,     /* R41  - Clock Control 2 */
+	0x3A00,     /* R42  - FLL Control 1 */
+	0x7086,     /* R43  - FLL Control 2 */
+	0xC226,     /* R44  - FLL Control 3 */
+	0x0000,     /* R45  - FLL Control 4 */
+	0x0000,     /* R46 */
+	0x0000,     /* R47 */
+	0x0000,     /* R48  - DAC Control */
+	0x0000,     /* R49 */
+	0x00C0,     /* R50  - DAC Digital Volume L */
+	0x00C0,     /* R51  - DAC Digital Volume R */
+	0x0000,     /* R52 */
+	0x0040,     /* R53  - DAC LR Rate */
+	0x0000,     /* R54  - DAC Clock Control */
+	0x0000,     /* R55 */
+	0x0000,     /* R56 */
+	0x0000,     /* R57 */
+	0x4000,     /* R58  - DAC Mute */
+	0x0000,     /* R59  - DAC Mute Volume */
+	0x0000,     /* R60  - DAC Side */
+	0x0000,     /* R61 */
+	0x0000,     /* R62 */
+	0x0000,     /* R63 */
+	0x8000,     /* R64  - ADC Control */
+	0x0000,     /* R65 */
+	0x00C0,     /* R66  - ADC Digital Volume L */
+	0x00C0,     /* R67  - ADC Digital Volume R */
+	0x0000,     /* R68  - ADC Divider */
+	0x0000,     /* R69 */
+	0x0040,     /* R70  - ADC LR Rate */
+	0x0000,     /* R71 */
+	0x0303,     /* R72  - Input Control */
+	0x0000,     /* R73  - IN3 Input Control */
+	0x0000,     /* R74  - Mic Bias Control */
+	0x0000,     /* R75 */
+	0x0000,     /* R76  - Output Control */
+	0x0000,     /* R77  - Jack Detect */
+	0x0000,     /* R78  - Anti Pop Control */
+	0x0000,     /* R79 */
+	0x0040,     /* R80  - Left Input Volume */
+	0x0040,     /* R81  - Right Input Volume */
+	0x0000,     /* R82 */
+	0x0000,     /* R83 */
+	0x0000,     /* R84 */
+	0x0000,     /* R85 */
+	0x0000,     /* R86 */
+	0x0000,     /* R87 */
+	0x0800,     /* R88  - Left Mixer Control */
+	0x1000,     /* R89  - Right Mixer Control */
+	0x0000,     /* R90 */
+	0x0000,     /* R91 */
+	0x0000,     /* R92  - OUT3 Mixer Control */
+	0x0000,     /* R93  - OUT4 Mixer Control */
+	0x0000,     /* R94 */
+	0x0000,     /* R95 */
+	0x0000,     /* R96  - Output Left Mixer Volume */
+	0x0000,     /* R97  - Output Right Mixer Volume */
+	0x0000,     /* R98  - Input Mixer Volume L */
+	0x0000,     /* R99  - Input Mixer Volume R */
+	0x0000,     /* R100 - Input Mixer Volume */
+	0x0000,     /* R101 */
+	0x0000,     /* R102 */
+	0x0000,     /* R103 */
+	0x00E4,     /* R104 - OUT1L Volume */
+	0x00E4,     /* R105 - OUT1R Volume */
+	0x00E4,     /* R106 - OUT2L Volume */
+	0x02E4,     /* R107 - OUT2R Volume */
+	0x0000,     /* R108 */
+	0x0000,     /* R109 */
+	0x0000,     /* R110 */
+	0x0000,     /* R111 - BEEP Volume */
+	0x0A00,     /* R112 - AI Formating */
+	0x0000,     /* R113 - ADC DAC COMP */
+	0x0020,     /* R114 - AI ADC Control */
+	0x0020,     /* R115 - AI DAC Control */
+	0x0000,     /* R116 */
+	0x0000,     /* R117 */
+	0x0000,     /* R118 */
+	0x0000,     /* R119 */
+	0x0000,     /* R120 */
+	0x0000,     /* R121 */
+	0x0000,     /* R122 */
+	0x0000,     /* R123 */
+	0x0000,     /* R124 */
+	0x0000,     /* R125 */
+	0x0000,     /* R126 */
+	0x0000,     /* R127 */
+	0x1FFF,     /* R128 - GPIO Debounce */
+	0x0000,     /* R129 - GPIO Pin pull up Control */
+	0x0000,     /* R130 - GPIO Pull down Control */
+	0x0000,     /* R131 - GPIO Interrupt Mode */
+	0x0000,     /* R132 */
+	0x0000,     /* R133 - GPIO Control */
+	0x0CFB,     /* R134 - GPIO Configuration (i/o) */
+	0x0C1F,     /* R135 - GPIO Pin Polarity / Type */
+	0x0000,     /* R136 */
+	0x0000,     /* R137 */
+	0x0000,     /* R138 */
+	0x0000,     /* R139 */
+	0x0300,     /* R140 - GPIO Function Select 1 */
+	0x1110,     /* R141 - GPIO Function Select 2 */
+	0x0013,     /* R142 - GPIO Function Select 3 */
+	0x0003,     /* R143 - GPIO Function Select 4 */
+	0x0000,     /* R144 - Digitiser Control (1) */
+	0x0002,     /* R145 - Digitiser Control (2) */
+	0x0000,     /* R146 */
+	0x0000,     /* R147 */
+	0x0000,     /* R148 */
+	0x0000,     /* R149 */
+	0x0000,     /* R150 */
+	0x0000,     /* R151 */
+	0x7000,     /* R152 - AUX1 Readback */
+	0x7000,     /* R153 - AUX2 Readback */
+	0x7000,     /* R154 - AUX3 Readback */
+	0x7000,     /* R155 - AUX4 Readback */
+	0x0000,     /* R156 - USB Voltage Readback */
+	0x0000,     /* R157 - LINE Voltage Readback */
+	0x0000,     /* R158 - BATT Voltage Readback */
+	0x0000,     /* R159 - Chip Temp Readback */
+	0x0000,     /* R160 */
+	0x0000,     /* R161 */
+	0x0000,     /* R162 */
+	0x0000,     /* R163 - Generic Comparator Control */
+	0x0000,     /* R164 - Generic comparator 1 */
+	0x0000,     /* R165 - Generic comparator 2 */
+	0x0000,     /* R166 - Generic comparator 3 */
+	0x0000,     /* R167 - Generic comparator 4 */
+	0xA00F,     /* R168 - Battery Charger Control 1 */
+	0x0B06,     /* R169 - Battery Charger Control 2 */
+	0x0000,     /* R170 - Battery Charger Control 3 */
+	0x0000,     /* R171 */
+	0x0000,     /* R172 - Current Sink Driver A */
+	0x0000,     /* R173 - CSA Flash control */
+	0x0000,     /* R174 */
+	0x0000,     /* R175 */
+	0x0000,     /* R176 - DCDC/LDO requested */
+	0x032D,     /* R177 - DCDC Active options */
+	0x0000,     /* R178 - DCDC Sleep options */
+	0x0025,     /* R179 - Power-check comparator */
+	0x000E,     /* R180 - DCDC1 Control */
+	0x0C00,     /* R181 - DCDC1 Timeouts */
+	0x1006,     /* R182 - DCDC1 Low Power */
+	0x0018,     /* R183 - DCDC2 Control */
+	0x0000,     /* R184 - DCDC2 Timeouts */
+	0x0000,     /* R185 */
+	0x0026,     /* R186 - DCDC3 Control */
+	0x0400,     /* R187 - DCDC3 Timeouts */
+	0x0006,     /* R188 - DCDC3 Low Power */
+	0x0062,     /* R189 - DCDC4 Control */
+	0x0800,     /* R190 - DCDC4 Timeouts */
+	0x0006,     /* R191 - DCDC4 Low Power */
+	0x0008,     /* R192 */
+	0x0000,     /* R193 */
+	0x0000,     /* R194 */
+	0x000A,     /* R195 */
+	0x1000,     /* R196 */
+	0x0006,     /* R197 */
+	0x0000,     /* R198 */
+	0x0003,     /* R199 - Limit Switch Control */
+	0x0006,     /* R200 - LDO1 Control */
+	0x0000,     /* R201 - LDO1 Timeouts */
+	0x001C,     /* R202 - LDO1 Low Power */
+	0x0010,     /* R203 - LDO2 Control */
+	0x0C00,     /* R204 - LDO2 Timeouts */
+	0x001C,     /* R205 - LDO2 Low Power */
+	0x001F,     /* R206 - LDO3 Control */
+	0x0800,     /* R207 - LDO3 Timeouts */
+	0x001C,     /* R208 - LDO3 Low Power */
+	0x000A,     /* R209 - LDO4 Control */
+	0x0800,     /* R210 - LDO4 Timeouts */
+	0x001C,     /* R211 - LDO4 Low Power */
+	0x0000,     /* R212 */
+	0x0000,     /* R213 */
+	0x0000,     /* R214 */
+	0x0000,     /* R215 - VCC_FAULT Masks */
+	0x001F,     /* R216 - Main Bandgap Control */
+	0x0000,     /* R217 - OSC Control */
+	0x9000,     /* R218 - RTC Tick Control */
+	0x0000,     /* R219 - Security1 */
+	0x4000,     /* R220 */
+	0x0000,     /* R221 */
+	0x0000,     /* R222 */
+	0x0000,     /* R223 */
+	0x0000,     /* R224 - Signal overrides */
+	0x0000,     /* R225 - DCDC/LDO status */
+	0x0000,     /* R226 - Charger Overides/status */
+	0x0000,     /* R227 - misc overrides */
+	0x0000,     /* R228 - Supply overrides/status 1 */
+	0x0000,     /* R229 - Supply overrides/status 2 */
+	0xE000,     /* R230 - GPIO Pin Status */
+	0x0000,     /* R231 - comparotor overrides */
+	0x0000,     /* R232 */
+	0x0000,     /* R233 - State Machine status */
+	0x1200,     /* R234 - FLL Test 1 */
+	0x0000,     /* R235 */
+	0x8000,     /* R236 */
+	0x0000,     /* R237 */
+	0x0000,     /* R238 */
+	0x0000,     /* R239 */
+	0x0003,     /* R240 */
+	0x0000,     /* R241 */
+	0x0000,     /* R242 */
+	0x0004,     /* R243 */
+	0x0300,     /* R244 */
+	0x0000,     /* R245 */
+	0x0200,     /* R246 */
+	0x1000,     /* R247 */
+	0x1000,     /* R248 - DCDC1 Test Controls */
+	0x1000,     /* R249 */
+	0x1000,     /* R250 - DCDC3 Test Controls */
+	0x1000,     /* R251 - DCDC4 Test Controls */
+};
+#endif
+
+#ifdef CONFIG_MFD_WM8351_CONFIG_MODE_2
+
+#undef WM8350_HAVE_CONFIG_MODE
+#define WM8350_HAVE_CONFIG_MODE
+
+const u16 wm8351_mode2_defaults[] = {
+	0x6143,     /* R0   - Reset/ID */
+	0x0000,     /* R1   - ID */
+	0x0001,     /* R2   - Revision */
+	0x1C02,     /* R3   - System Control 1 */
+	0x0214,     /* R4   - System Control 2 */
+	0x0000,     /* R5   - System Hibernate */
+	0x8A00,     /* R6   - Interface Control */
+	0x0000,     /* R7 */
+	0x8000,     /* R8   - Power mgmt (1) */
+	0x0000,     /* R9   - Power mgmt (2) */
+	0x0000,     /* R10  - Power mgmt (3) */
+	0x2000,     /* R11  - Power mgmt (4) */
+	0x0E00,     /* R12  - Power mgmt (5) */
+	0x0000,     /* R13  - Power mgmt (6) */
+	0x0000,     /* R14  - Power mgmt (7) */
+	0x0000,     /* R15 */
+	0x0000,     /* R16  - RTC Seconds/Minutes */
+	0x0100,     /* R17  - RTC Hours/Day */
+	0x0101,     /* R18  - RTC Date/Month */
+	0x1400,     /* R19  - RTC Year */
+	0x0000,     /* R20  - Alarm Seconds/Minutes */
+	0x0000,     /* R21  - Alarm Hours/Day */
+	0x0000,     /* R22  - Alarm Date/Month */
+	0x0320,     /* R23  - RTC Time Control */
+	0x0000,     /* R24  - System Interrupts */
+	0x0000,     /* R25  - Interrupt Status 1 */
+	0x0000,     /* R26  - Interrupt Status 2 */
+	0x0000,     /* R27 */
+	0x0000,     /* R28  - Under Voltage Interrupt status */
+	0x0000,     /* R29  - Over Current Interrupt status */
+	0x0000,     /* R30  - GPIO Interrupt Status */
+	0x0000,     /* R31  - Comparator Interrupt Status */
+	0x3FFF,     /* R32  - System Interrupts Mask */
+	0x0000,     /* R33  - Interrupt Status 1 Mask */
+	0x0000,     /* R34  - Interrupt Status 2 Mask */
+	0x0000,     /* R35 */
+	0x0000,     /* R36  - Under Voltage Interrupt status Mask */
+	0x0000,     /* R37  - Over Current Interrupt status Mask */
+	0x0000,     /* R38  - GPIO Interrupt Status Mask */
+	0x0000,     /* R39  - Comparator Interrupt Status Mask */
+	0x0040,     /* R40  - Clock Control 1 */
+	0x0000,     /* R41  - Clock Control 2 */
+	0x3A00,     /* R42  - FLL Control 1 */
+	0x7086,     /* R43  - FLL Control 2 */
+	0xC226,     /* R44  - FLL Control 3 */
+	0x0000,     /* R45  - FLL Control 4 */
+	0x0000,     /* R46 */
+	0x0000,     /* R47 */
+	0x0000,     /* R48  - DAC Control */
+	0x0000,     /* R49 */
+	0x00C0,     /* R50  - DAC Digital Volume L */
+	0x00C0,     /* R51  - DAC Digital Volume R */
+	0x0000,     /* R52 */
+	0x0040,     /* R53  - DAC LR Rate */
+	0x0000,     /* R54  - DAC Clock Control */
+	0x0000,     /* R55 */
+	0x0000,     /* R56 */
+	0x0000,     /* R57 */
+	0x4000,     /* R58  - DAC Mute */
+	0x0000,     /* R59  - DAC Mute Volume */
+	0x0000,     /* R60  - DAC Side */
+	0x0000,     /* R61 */
+	0x0000,     /* R62 */
+	0x0000,     /* R63 */
+	0x8000,     /* R64  - ADC Control */
+	0x0000,     /* R65 */
+	0x00C0,     /* R66  - ADC Digital Volume L */
+	0x00C0,     /* R67  - ADC Digital Volume R */
+	0x0000,     /* R68  - ADC Divider */
+	0x0000,     /* R69 */
+	0x0040,     /* R70  - ADC LR Rate */
+	0x0000,     /* R71 */
+	0x0303,     /* R72  - Input Control */
+	0x0000,     /* R73  - IN3 Input Control */
+	0x0000,     /* R74  - Mic Bias Control */
+	0x0000,     /* R75 */
+	0x0000,     /* R76  - Output Control */
+	0x0000,     /* R77  - Jack Detect */
+	0x0000,     /* R78  - Anti Pop Control */
+	0x0000,     /* R79 */
+	0x0040,     /* R80  - Left Input Volume */
+	0x0040,     /* R81  - Right Input Volume */
+	0x0000,     /* R82 */
+	0x0000,     /* R83 */
+	0x0000,     /* R84 */
+	0x0000,     /* R85 */
+	0x0000,     /* R86 */
+	0x0000,     /* R87 */
+	0x0800,     /* R88  - Left Mixer Control */
+	0x1000,     /* R89  - Right Mixer Control */
+	0x0000,     /* R90 */
+	0x0000,     /* R91 */
+	0x0000,     /* R92  - OUT3 Mixer Control */
+	0x0000,     /* R93  - OUT4 Mixer Control */
+	0x0000,     /* R94 */
+	0x0000,     /* R95 */
+	0x0000,     /* R96  - Output Left Mixer Volume */
+	0x0000,     /* R97  - Output Right Mixer Volume */
+	0x0000,     /* R98  - Input Mixer Volume L */
+	0x0000,     /* R99  - Input Mixer Volume R */
+	0x0000,     /* R100 - Input Mixer Volume */
+	0x0000,     /* R101 */
+	0x0000,     /* R102 */
+	0x0000,     /* R103 */
+	0x00E4,     /* R104 - OUT1L Volume */
+	0x00E4,     /* R105 - OUT1R Volume */
+	0x00E4,     /* R106 - OUT2L Volume */
+	0x02E4,     /* R107 - OUT2R Volume */
+	0x0000,     /* R108 */
+	0x0000,     /* R109 */
+	0x0000,     /* R110 */
+	0x0000,     /* R111 - BEEP Volume */
+	0x0A00,     /* R112 - AI Formating */
+	0x0000,     /* R113 - ADC DAC COMP */
+	0x0020,     /* R114 - AI ADC Control */
+	0x0020,     /* R115 - AI DAC Control */
+	0x0000,     /* R116 */
+	0x0000,     /* R117 */
+	0x0000,     /* R118 */
+	0x0000,     /* R119 */
+	0x0000,     /* R120 */
+	0x0000,     /* R121 */
+	0x0000,     /* R122 */
+	0x0000,     /* R123 */
+	0x0000,     /* R124 */
+	0x0000,     /* R125 */
+	0x0000,     /* R126 */
+	0x0000,     /* R127 */
+	0x1FFF,     /* R128 - GPIO Debounce */
+	0x0000,     /* R129 - GPIO Pin pull up Control */
+	0x0110,     /* R130 - GPIO Pull down Control */
+	0x0000,     /* R131 - GPIO Interrupt Mode */
+	0x0000,     /* R132 */
+	0x0000,     /* R133 - GPIO Control */
+	0x09FA,     /* R134 - GPIO Configuration (i/o) */
+	0x0DF6,     /* R135 - GPIO Pin Polarity / Type */
+	0x0000,     /* R136 */
+	0x0000,     /* R137 */
+	0x0000,     /* R138 */
+	0x0000,     /* R139 */
+	0x1310,     /* R140 - GPIO Function Select 1 */
+	0x0003,     /* R141 - GPIO Function Select 2 */
+	0x2000,     /* R142 - GPIO Function Select 3 */
+	0x0000,     /* R143 - GPIO Function Select 4 */
+	0x0000,     /* R144 - Digitiser Control (1) */
+	0x0002,     /* R145 - Digitiser Control (2) */
+	0x0000,     /* R146 */
+	0x0000,     /* R147 */
+	0x0000,     /* R148 */
+	0x0000,     /* R149 */
+	0x0000,     /* R150 */
+	0x0000,     /* R151 */
+	0x7000,     /* R152 - AUX1 Readback */
+	0x7000,     /* R153 - AUX2 Readback */
+	0x7000,     /* R154 - AUX3 Readback */
+	0x7000,     /* R155 - AUX4 Readback */
+	0x0000,     /* R156 - USB Voltage Readback */
+	0x0000,     /* R157 - LINE Voltage Readback */
+	0x0000,     /* R158 - BATT Voltage Readback */
+	0x0000,     /* R159 - Chip Temp Readback */
+	0x0000,     /* R160 */
+	0x0000,     /* R161 */
+	0x0000,     /* R162 */
+	0x0000,     /* R163 - Generic Comparator Control */
+	0x0000,     /* R164 - Generic comparator 1 */
+	0x0000,     /* R165 - Generic comparator 2 */
+	0x0000,     /* R166 - Generic comparator 3 */
+	0x0000,     /* R167 - Generic comparator 4 */
+	0xA00F,     /* R168 - Battery Charger Control 1 */
+	0x0B06,     /* R169 - Battery Charger Control 2 */
+	0x0000,     /* R170 - Battery Charger Control 3 */
+	0x0000,     /* R171 */
+	0x0000,     /* R172 - Current Sink Driver A */
+	0x0000,     /* R173 - CSA Flash control */
+	0x0000,     /* R174 */
+	0x0000,     /* R175 */
+	0x0000,     /* R176 - DCDC/LDO requested */
+	0x032D,     /* R177 - DCDC Active options */
+	0x0000,     /* R178 - DCDC Sleep options */
+	0x0025,     /* R179 - Power-check comparator */
+	0x001A,     /* R180 - DCDC1 Control */
+	0x0800,     /* R181 - DCDC1 Timeouts */
+	0x1006,     /* R182 - DCDC1 Low Power */
+	0x0018,     /* R183 - DCDC2 Control */
+	0x0000,     /* R184 - DCDC2 Timeouts */
+	0x0000,     /* R185 */
+	0x0056,     /* R186 - DCDC3 Control */
+	0x0400,     /* R187 - DCDC3 Timeouts */
+	0x0006,     /* R188 - DCDC3 Low Power */
+	0x0026,     /* R189 - DCDC4 Control */
+	0x0C00,     /* R190 - DCDC4 Timeouts */
+	0x0006,     /* R191 - DCDC4 Low Power */
+	0x0008,     /* R192 */
+	0x0000,     /* R193 */
+	0x0000,     /* R194 */
+	0x0026,     /* R195 */
+	0x0C00,     /* R196 */
+	0x0006,     /* R197 */
+	0x0000,     /* R198 */
+	0x0003,     /* R199 - Limit Switch Control */
+	0x001C,     /* R200 - LDO1 Control */
+	0x0400,     /* R201 - LDO1 Timeouts */
+	0x001C,     /* R202 - LDO1 Low Power */
+	0x0010,     /* R203 - LDO2 Control */
+	0x0C00,     /* R204 - LDO2 Timeouts */
+	0x001C,     /* R205 - LDO2 Low Power */
+	0x0015,     /* R206 - LDO3 Control */
+	0x0000,     /* R207 - LDO3 Timeouts */
+	0x001C,     /* R208 - LDO3 Low Power */
+	0x001A,     /* R209 - LDO4 Control */
+	0x0000,     /* R210 - LDO4 Timeouts */
+	0x001C,     /* R211 - LDO4 Low Power */
+	0x0000,     /* R212 */
+	0x0000,     /* R213 */
+	0x0000,     /* R214 */
+	0x0000,     /* R215 - VCC_FAULT Masks */
+	0x001F,     /* R216 - Main Bandgap Control */
+	0x0000,     /* R217 - OSC Control */
+	0x9000,     /* R218 - RTC Tick Control */
+	0x0000,     /* R219 - Security1 */
+	0x4000,     /* R220 */
+	0x0000,     /* R221 */
+	0x0000,     /* R222 */
+	0x0000,     /* R223 */
+	0x0000,     /* R224 - Signal overrides */
+	0x0000,     /* R225 - DCDC/LDO status */
+	0x0000,     /* R226 - Charger Overides/status */
+	0x0000,     /* R227 - misc overrides */
+	0x0000,     /* R228 - Supply overrides/status 1 */
+	0x0000,     /* R229 - Supply overrides/status 2 */
+	0xE000,     /* R230 - GPIO Pin Status */
+	0x0000,     /* R231 - comparotor overrides */
+	0x0000,     /* R232 */
+	0x0000,     /* R233 - State Machine status */
+	0x1200,     /* R234 - FLL Test 1 */
+	0x0000,     /* R235 */
+	0x8000,     /* R236 */
+	0x0000,     /* R237 */
+	0x0000,     /* R238 */
+	0x0000,     /* R239 */
+	0x0003,     /* R240 */
+	0x0000,     /* R241 */
+	0x0000,     /* R242 */
+	0x0004,     /* R243 */
+	0x0300,     /* R244 */
+	0x0000,     /* R245 */
+	0x0200,     /* R246 */
+	0x0000,     /* R247 */
+	0x1000,     /* R248 - DCDC1 Test Controls */
+	0x1000,     /* R249 */
+	0x1000,     /* R250 - DCDC3 Test Controls */
+	0x1000,     /* R251 - DCDC4 Test Controls */
+};
+#endif
+
+#ifdef CONFIG_MFD_WM8351_CONFIG_MODE_3
+
+#undef WM8350_HAVE_CONFIG_MODE
+#define WM8350_HAVE_CONFIG_MODE
+
+const u16 wm8351_mode3_defaults[] = {
+	0x6143,     /* R0   - Reset/ID */
+	0x0000,     /* R1   - ID */
+	0x0001,     /* R2   - Revision */
+	0x1C02,     /* R3   - System Control 1 */
+	0x0204,     /* R4   - System Control 2 */
+	0x0000,     /* R5   - System Hibernate */
+	0x8A00,     /* R6   - Interface Control */
+	0x0000,     /* R7 */
+	0x8000,     /* R8   - Power mgmt (1) */
+	0x0000,     /* R9   - Power mgmt (2) */
+	0x0000,     /* R10  - Power mgmt (3) */
+	0x2000,     /* R11  - Power mgmt (4) */
+	0x0E00,     /* R12  - Power mgmt (5) */
+	0x0000,     /* R13  - Power mgmt (6) */
+	0x0000,     /* R14  - Power mgmt (7) */
+	0x0000,     /* R15 */
+	0x0000,     /* R16  - RTC Seconds/Minutes */
+	0x0100,     /* R17  - RTC Hours/Day */
+	0x0101,     /* R18  - RTC Date/Month */
+	0x1400,     /* R19  - RTC Year */
+	0x0000,     /* R20  - Alarm Seconds/Minutes */
+	0x0000,     /* R21  - Alarm Hours/Day */
+	0x0000,     /* R22  - Alarm Date/Month */
+	0x0320,     /* R23  - RTC Time Control */
+	0x0000,     /* R24  - System Interrupts */
+	0x0000,     /* R25  - Interrupt Status 1 */
+	0x0000,     /* R26  - Interrupt Status 2 */
+	0x0000,     /* R27 */
+	0x0000,     /* R28  - Under Voltage Interrupt status */
+	0x0000,     /* R29  - Over Current Interrupt status */
+	0x0000,     /* R30  - GPIO Interrupt Status */
+	0x0000,     /* R31  - Comparator Interrupt Status */
+	0x3FFF,     /* R32  - System Interrupts Mask */
+	0x0000,     /* R33  - Interrupt Status 1 Mask */
+	0x0000,     /* R34  - Interrupt Status 2 Mask */
+	0x0000,     /* R35 */
+	0x0000,     /* R36  - Under Voltage Interrupt status Mask */
+	0x0000,     /* R37  - Over Current Interrupt status Mask */
+	0x0000,     /* R38  - GPIO Interrupt Status Mask */
+	0x0000,     /* R39  - Comparator Interrupt Status Mask */
+	0x0040,     /* R40  - Clock Control 1 */
+	0x0000,     /* R41  - Clock Control 2 */
+	0x3A00,     /* R42  - FLL Control 1 */
+	0x7086,     /* R43  - FLL Control 2 */
+	0xC226,     /* R44  - FLL Control 3 */
+	0x0000,     /* R45  - FLL Control 4 */
+	0x0000,     /* R46 */
+	0x0000,     /* R47 */
+	0x0000,     /* R48  - DAC Control */
+	0x0000,     /* R49 */
+	0x00C0,     /* R50  - DAC Digital Volume L */
+	0x00C0,     /* R51  - DAC Digital Volume R */
+	0x0000,     /* R52 */
+	0x0040,     /* R53  - DAC LR Rate */
+	0x0000,     /* R54  - DAC Clock Control */
+	0x0000,     /* R55 */
+	0x0000,     /* R56 */
+	0x0000,     /* R57 */
+	0x4000,     /* R58  - DAC Mute */
+	0x0000,     /* R59  - DAC Mute Volume */
+	0x0000,     /* R60  - DAC Side */
+	0x0000,     /* R61 */
+	0x0000,     /* R62 */
+	0x0000,     /* R63 */
+	0x8000,     /* R64  - ADC Control */
+	0x0000,     /* R65 */
+	0x00C0,     /* R66  - ADC Digital Volume L */
+	0x00C0,     /* R67  - ADC Digital Volume R */
+	0x0000,     /* R68  - ADC Divider */
+	0x0000,     /* R69 */
+	0x0040,     /* R70  - ADC LR Rate */
+	0x0000,     /* R71 */
+	0x0303,     /* R72  - Input Control */
+	0x0000,     /* R73  - IN3 Input Control */
+	0x0000,     /* R74  - Mic Bias Control */
+	0x0000,     /* R75 */
+	0x0000,     /* R76  - Output Control */
+	0x0000,     /* R77  - Jack Detect */
+	0x0000,     /* R78  - Anti Pop Control */
+	0x0000,     /* R79 */
+	0x0040,     /* R80  - Left Input Volume */
+	0x0040,     /* R81  - Right Input Volume */
+	0x0000,     /* R82 */
+	0x0000,     /* R83 */
+	0x0000,     /* R84 */
+	0x0000,     /* R85 */
+	0x0000,     /* R86 */
+	0x0000,     /* R87 */
+	0x0800,     /* R88  - Left Mixer Control */
+	0x1000,     /* R89  - Right Mixer Control */
+	0x0000,     /* R90 */
+	0x0000,     /* R91 */
+	0x0000,     /* R92  - OUT3 Mixer Control */
+	0x0000,     /* R93  - OUT4 Mixer Control */
+	0x0000,     /* R94 */
+	0x0000,     /* R95 */
+	0x0000,     /* R96  - Output Left Mixer Volume */
+	0x0000,     /* R97  - Output Right Mixer Volume */
+	0x0000,     /* R98  - Input Mixer Volume L */
+	0x0000,     /* R99  - Input Mixer Volume R */
+	0x0000,     /* R100 - Input Mixer Volume */
+	0x0000,     /* R101 */
+	0x0000,     /* R102 */
+	0x0000,     /* R103 */
+	0x00E4,     /* R104 - OUT1L Volume */
+	0x00E4,     /* R105 - OUT1R Volume */
+	0x00E4,     /* R106 - OUT2L Volume */
+	0x02E4,     /* R107 - OUT2R Volume */
+	0x0000,     /* R108 */
+	0x0000,     /* R109 */
+	0x0000,     /* R110 */
+	0x0000,     /* R111 - BEEP Volume */
+	0x0A00,     /* R112 - AI Formating */
+	0x0000,     /* R113 - ADC DAC COMP */
+	0x0020,     /* R114 - AI ADC Control */
+	0x0020,     /* R115 - AI DAC Control */
+	0x0000,     /* R116 */
+	0x0000,     /* R117 */
+	0x0000,     /* R118 */
+	0x0000,     /* R119 */
+	0x0000,     /* R120 */
+	0x0000,     /* R121 */
+	0x0000,     /* R122 */
+	0x0000,     /* R123 */
+	0x0000,     /* R124 */
+	0x0000,     /* R125 */
+	0x0000,     /* R126 */
+	0x0000,     /* R127 */
+	0x1FFF,     /* R128 - GPIO Debounce */
+	0x0010,     /* R129 - GPIO Pin pull up Control */
+	0x0000,     /* R130 - GPIO Pull down Control */
+	0x0000,     /* R131 - GPIO Interrupt Mode */
+	0x0000,     /* R132 */
+	0x0000,     /* R133 - GPIO Control */
+	0x0BFB,     /* R134 - GPIO Configuration (i/o) */
+	0x0FFD,     /* R135 - GPIO Pin Polarity / Type */
+	0x0000,     /* R136 */
+	0x0000,     /* R137 */
+	0x0000,     /* R138 */
+	0x0000,     /* R139 */
+	0x0310,     /* R140 - GPIO Function Select 1 */
+	0x0001,     /* R141 - GPIO Function Select 2 */
+	0x2300,     /* R142 - GPIO Function Select 3 */
+	0x0003,     /* R143 - GPIO Function Select 4 */
+	0x0000,     /* R144 - Digitiser Control (1) */
+	0x0002,     /* R145 - Digitiser Control (2) */
+	0x0000,     /* R146 */
+	0x0000,     /* R147 */
+	0x0000,     /* R148 */
+	0x0000,     /* R149 */
+	0x0000,     /* R150 */
+	0x0000,     /* R151 */
+	0x7000,     /* R152 - AUX1 Readback */
+	0x7000,     /* R153 - AUX2 Readback */
+	0x7000,     /* R154 - AUX3 Readback */
+	0x7000,     /* R155 - AUX4 Readback */
+	0x0000,     /* R156 - USB Voltage Readback */
+	0x0000,     /* R157 - LINE Voltage Readback */
+	0x0000,     /* R158 - BATT Voltage Readback */
+	0x0000,     /* R159 - Chip Temp Readback */
+	0x0000,     /* R160 */
+	0x0000,     /* R161 */
+	0x0000,     /* R162 */
+	0x0000,     /* R163 - Generic Comparator Control */
+	0x0000,     /* R164 - Generic comparator 1 */
+	0x0000,     /* R165 - Generic comparator 2 */
+	0x0000,     /* R166 - Generic comparator 3 */
+	0x0000,     /* R167 - Generic comparator 4 */
+	0xA00F,     /* R168 - Battery Charger Control 1 */
+	0x0B06,     /* R169 - Battery Charger Control 2 */
+	0x0000,     /* R170 - Battery Charger Control 3 */
+	0x0000,     /* R171 */
+	0x0000,     /* R172 - Current Sink Driver A */
+	0x0000,     /* R173 - CSA Flash control */
+	0x0000,     /* R174 */
+	0x0000,     /* R175 */
+	0x0000,     /* R176 - DCDC/LDO requested */
+	0x032D,     /* R177 - DCDC Active options */
+	0x0000,     /* R178 - DCDC Sleep options */
+	0x0025,     /* R179 - Power-check comparator */
+	0x000E,     /* R180 - DCDC1 Control */
+	0x0400,     /* R181 - DCDC1 Timeouts */
+	0x1006,     /* R182 - DCDC1 Low Power */
+	0x0018,     /* R183 - DCDC2 Control */
+	0x0000,     /* R184 - DCDC2 Timeouts */
+	0x0000,     /* R185 */
+	0x0026,     /* R186 - DCDC3 Control */
+	0x0800,     /* R187 - DCDC3 Timeouts */
+	0x0006,     /* R188 - DCDC3 Low Power */
+	0x0062,     /* R189 - DCDC4 Control */
+	0x1400,     /* R190 - DCDC4 Timeouts */
+	0x0006,     /* R191 - DCDC4 Low Power */
+	0x0008,     /* R192 */
+	0x0000,     /* R193 */
+	0x0000,     /* R194 */
+	0x0026,     /* R195 */
+	0x0400,     /* R196 */
+	0x0006,     /* R197 */
+	0x0000,     /* R198 */
+	0x0003,     /* R199 - Limit Switch Control */
+	0x0006,     /* R200 - LDO1 Control */
+	0x0C00,     /* R201 - LDO1 Timeouts */
+	0x001C,     /* R202 - LDO1 Low Power */
+	0x0016,     /* R203 - LDO2 Control */
+	0x0000,     /* R204 - LDO2 Timeouts */
+	0x001C,     /* R205 - LDO2 Low Power */
+	0x0019,     /* R206 - LDO3 Control */
+	0x0000,     /* R207 - LDO3 Timeouts */
+	0x001C,     /* R208 - LDO3 Low Power */
+	0x001A,     /* R209 - LDO4 Control */
+	0x1000,     /* R210 - LDO4 Timeouts */
+	0x001C,     /* R211 - LDO4 Low Power */
+	0x0000,     /* R212 */
+	0x0000,     /* R213 */
+	0x0000,     /* R214 */
+	0x0000,     /* R215 - VCC_FAULT Masks */
+	0x001F,     /* R216 - Main Bandgap Control */
+	0x0000,     /* R217 - OSC Control */
+	0x9000,     /* R218 - RTC Tick Control */
+	0x0000,     /* R219 - Security1 */
+	0x4000,     /* R220 */
+	0x0000,     /* R221 */
+	0x0000,     /* R222 */
+	0x0000,     /* R223 */
+	0x0000,     /* R224 - Signal overrides */
+	0x0000,     /* R225 - DCDC/LDO status */
+	0x0000,     /* R226 - Charger Overides/status */
+	0x0000,     /* R227 - misc overrides */
+	0x0000,     /* R228 - Supply overrides/status 1 */
+	0x0000,     /* R229 - Supply overrides/status 2 */
+	0xE000,     /* R230 - GPIO Pin Status */
+	0x0000,     /* R231 - comparotor overrides */
+	0x0000,     /* R232 */
+	0x0000,     /* R233 - State Machine status */
+	0x1200,     /* R234 - FLL Test 1 */
+	0x0000,     /* R235 */
+	0x8000,     /* R236 */
+	0x0000,     /* R237 */
+	0x0000,     /* R238 */
+	0x0000,     /* R239 */
+	0x0003,     /* R240 */
+	0x0000,     /* R241 */
+	0x0000,     /* R242 */
+	0x0004,     /* R243 */
+	0x0300,     /* R244 */
+	0x0000,     /* R245 */
+	0x0200,     /* R246 */
+	0x0000,     /* R247 */
+	0x1000,     /* R248 - DCDC1 Test Controls */
+	0x1000,     /* R249 */
+	0x1000,     /* R250 - DCDC3 Test Controls */
+	0x1000,     /* R251 - DCDC4 Test Controls */
+};
+#endif
+
 #ifdef CONFIG_MFD_WM8352_CONFIG_MODE_0
 
 #undef WM8350_HAVE_CONFIG_MODE
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 737579086d01..980669d50dca 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -589,6 +589,10 @@ extern const u16 wm8350_mode0_defaults[];
 extern const u16 wm8350_mode1_defaults[];
 extern const u16 wm8350_mode2_defaults[];
 extern const u16 wm8350_mode3_defaults[];
+extern const u16 wm8351_mode0_defaults[];
+extern const u16 wm8351_mode1_defaults[];
+extern const u16 wm8351_mode2_defaults[];
+extern const u16 wm8351_mode3_defaults[];
 extern const u16 wm8352_mode0_defaults[];
 extern const u16 wm8352_mode1_defaults[];
 extern const u16 wm8352_mode2_defaults[];
-- 
cgit v1.2.3


From 0931a4c6dbfab03f2bfd22a9170130f7b155d53a Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Mon, 22 Dec 2008 12:05:27 +0100
Subject: mfd: dm355evm msp430 driver

Basic MFD framework for the MSP430 microcontroller firmware used
on the dm355evm board:

 - Provides an interface for other drivers: register read/write
   utilities, and register declarations.

 - Directly exports:
     * Many signals through the GPIO framework
         + LEDs
         + SW6 through gpio sysfs
	 + NTSC/nPAL jumper through gpio sysfs
	 + ... more could be added later, e.g. MMC signals
     * Child devices:
	+ LEDs, via leds-gpio child (and default triggers)
	+ RTC, via rtc-dm355evm child device
	+ Buttons and IR control, via dm355evm_keys

 - Supports power-off system call.  Use the reset button to power
   the board back up; the power supply LED will be on, but the
   MSP430 waits to re-activate the regulators.

 - On probe() this:
     * Announces firmware revision
     * Turns off the banked LEDs
     * Exports the resources noted above
     * Hooks the power-off support
     * Muxes tvp5146 -or- imager for video input

Unless the new tvp514x driver (tracked for mainline) is configured,
this assumes that some custom imager driver handles video-in.

This completely ignores the registers reporting the output voltages
on the various power supplies.  Someone could add a hwmon interface
if that seems useful.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Samuel Ortiz <sameo@openedhand.com>
---
 drivers/mfd/Kconfig              |   8 +
 drivers/mfd/Makefile             |   2 +
 drivers/mfd/dm355evm_msp.c       | 420 +++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/dm355evm_msp.h |  79 ++++++++
 4 files changed, 509 insertions(+)
 create mode 100644 drivers/mfd/dm355evm_msp.c
 create mode 100644 include/linux/i2c/dm355evm_msp.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 781a27955ff5..02e9146ca44a 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -34,6 +34,14 @@ config MFD_ASIC3
 	  This driver supports the ASIC3 multifunction chip found on many
 	  PDAs (mainly iPAQ and HTC based ones)
 
+config MFD_DM355EVM_MSP
+	bool "DaVinci DM355 EVM microcontroller"
+	depends on I2C && MACH_DAVINCI_DM355_EVM
+	help
+	  This driver supports the MSP430 microcontroller used on these
+	  boards.  MSP430 firmware manages resets and power sequencing,
+	  inputs from buttons and the IR remote, LEDs, an RTC, and more.
+
 config HTC_EGPIO
 	bool "HTC EGPIO support"
 	depends on GENERIC_HARDIRQS && GPIOLIB && ARM
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 9a5ad8af9116..8f6cd5c48930 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -8,6 +8,8 @@ obj-$(CONFIG_MFD_ASIC3)		+= asic3.o
 obj-$(CONFIG_HTC_EGPIO)		+= htc-egpio.o
 obj-$(CONFIG_HTC_PASIC3)	+= htc-pasic3.o
 
+obj-$(CONFIG_MFD_DM355EVM_MSP)	+= dm355evm_msp.o
+
 obj-$(CONFIG_MFD_T7L66XB)	+= t7l66xb.o
 obj-$(CONFIG_MFD_TC6387XB)	+= tc6387xb.o
 obj-$(CONFIG_MFD_TC6393XB)	+= tc6393xb.o
diff --git a/drivers/mfd/dm355evm_msp.c b/drivers/mfd/dm355evm_msp.c
new file mode 100644
index 000000000000..4214b3f72426
--- /dev/null
+++ b/drivers/mfd/dm355evm_msp.c
@@ -0,0 +1,420 @@
+/*
+ * dm355evm_msp.c - driver for MSP430 firmware on DM355EVM board
+ *
+ * Copyright (C) 2008 David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/leds.h>
+#include <linux/i2c.h>
+#include <linux/i2c/dm355evm_msp.h>
+
+
+/*
+ * The DM355 is a DaVinci chip with video support but no C64+ DSP.  Its
+ * EVM board has an MSP430 programmed with firmware for various board
+ * support functions.  This driver exposes some of them directly, and
+ * supports other drivers (e.g. RTC, input) for more complex access.
+ *
+ * Because this firmware is entirely board-specific, this file embeds
+ * knowledge that would be passed as platform_data in a generic driver.
+ *
+ * This driver was tested with firmware revision A4.
+ */
+
+#if defined(CONFIG_KEYBOARD_DM355EVM) \
+		|| defined(CONFIG_KEYBOARD_DM355EVM_MODULE)
+#define msp_has_keyboard()	true
+#else
+#define msp_has_keyboard()	false
+#endif
+
+#if defined(CONFIG_LEDS_GPIO) || defined(CONFIG_LEDS_GPIO_MODULE)
+#define msp_has_leds()		true
+#else
+#define msp_has_leds()		false
+#endif
+
+#if defined(CONFIG_RTC_DRV_DM355EVM) || defined(CONFIG_RTC_DRV_DM355EVM_MODULE)
+#define msp_has_rtc()		true
+#else
+#define msp_has_rtc()		false
+#endif
+
+#if defined(CONFIG_VIDEO_TVP514X) || defined(CONFIG_VIDEO_TVP514X_MODULE)
+#define msp_has_tvp()		true
+#else
+#define msp_has_tvp()		false
+#endif
+
+
+/*----------------------------------------------------------------------*/
+
+/* REVISIT for paranoia's sake, retry reads/writes on error */
+
+static struct i2c_client *msp430;
+
+/**
+ * dm355evm_msp_write - Writes a register in dm355evm_msp
+ * @value: the value to be written
+ * @reg: register address
+ *
+ * Returns result of operation - 0 is success, else negative errno
+ */
+int dm355evm_msp_write(u8 value, u8 reg)
+{
+	return i2c_smbus_write_byte_data(msp430, reg, value);
+}
+EXPORT_SYMBOL(dm355evm_msp_write);
+
+/**
+ * dm355evm_msp_read - Reads a register from dm355evm_msp
+ * @reg: register address
+ *
+ * Returns result of operation - value, or negative errno
+ */
+int dm355evm_msp_read(u8 reg)
+{
+	return i2c_smbus_read_byte_data(msp430, reg);
+}
+EXPORT_SYMBOL(dm355evm_msp_read);
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Many of the msp430 pins are just used as fixed-direction GPIOs.
+ * We could export a few more of them this way, if we wanted.
+ */
+#define MSP_GPIO(bit,reg)	((DM355EVM_MSP_ ## reg) << 3 | (bit))
+
+static const u8 msp_gpios[] = {
+	/* eight leds */
+	MSP_GPIO(0, LED), MSP_GPIO(1, LED),
+	MSP_GPIO(2, LED), MSP_GPIO(3, LED),
+	MSP_GPIO(4, LED), MSP_GPIO(5, LED),
+	MSP_GPIO(6, LED), MSP_GPIO(7, LED),
+	/* SW6 and the NTSC/nPAL jumper */
+	MSP_GPIO(0, SWITCH1), MSP_GPIO(1, SWITCH1),
+	MSP_GPIO(2, SWITCH1), MSP_GPIO(3, SWITCH1),
+	MSP_GPIO(4, SWITCH1),
+};
+
+#define MSP_GPIO_REG(offset)	(msp_gpios[(offset)] >> 3)
+#define MSP_GPIO_MASK(offset)	BIT(msp_gpios[(offset)] & 0x07)
+
+static int msp_gpio_in(struct gpio_chip *chip, unsigned offset)
+{
+	switch (MSP_GPIO_REG(offset)) {
+	case DM355EVM_MSP_SWITCH1:
+	case DM355EVM_MSP_SWITCH2:
+	case DM355EVM_MSP_SDMMC:
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+static u8 msp_led_cache;
+
+static int msp_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	int reg, status;
+
+	reg = MSP_GPIO_REG(offset);
+	status = dm355evm_msp_read(reg);
+	if (status < 0)
+		return status;
+	if (reg == DM355EVM_MSP_LED)
+		msp_led_cache = status;
+	return status & MSP_GPIO_MASK(offset);
+}
+
+static int msp_gpio_out(struct gpio_chip *chip, unsigned offset, int value)
+{
+	int mask, bits;
+
+	/* NOTE:  there are some other signals that could be
+	 * packaged as output GPIOs, but they aren't as useful
+	 * as the LEDs ... so for now we don't.
+	 */
+	if (MSP_GPIO_REG(offset) != DM355EVM_MSP_LED)
+		return -EINVAL;
+
+	mask = MSP_GPIO_MASK(offset);
+	bits = msp_led_cache;
+
+	bits &= ~mask;
+	if (value)
+		bits |= mask;
+	msp_led_cache = bits;
+
+	return dm355evm_msp_write(bits, DM355EVM_MSP_LED);
+}
+
+static void msp_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	msp_gpio_out(chip, offset, value);
+}
+
+static struct gpio_chip dm355evm_msp_gpio = {
+	.label			= "dm355evm_msp",
+	.owner			= THIS_MODULE,
+	.direction_input	= msp_gpio_in,
+	.get			= msp_gpio_get,
+	.direction_output	= msp_gpio_out,
+	.set			= msp_gpio_set,
+	.base			= -EINVAL,		/* dynamic assignment */
+	.ngpio			= ARRAY_SIZE(msp_gpios),
+	.can_sleep		= true,
+};
+
+/*----------------------------------------------------------------------*/
+
+static struct device *add_child(struct i2c_client *client, const char *name,
+		void *pdata, unsigned pdata_len,
+		bool can_wakeup, int irq)
+{
+	struct platform_device	*pdev;
+	int			status;
+
+	pdev = platform_device_alloc(name, -1);
+	if (!pdev) {
+		dev_dbg(&client->dev, "can't alloc dev\n");
+		status = -ENOMEM;
+		goto err;
+	}
+
+	device_init_wakeup(&pdev->dev, can_wakeup);
+	pdev->dev.parent = &client->dev;
+
+	if (pdata) {
+		status = platform_device_add_data(pdev, pdata, pdata_len);
+		if (status < 0) {
+			dev_dbg(&pdev->dev, "can't add platform_data\n");
+			goto err;
+		}
+	}
+
+	if (irq) {
+		struct resource r = {
+			.start = irq,
+			.flags = IORESOURCE_IRQ,
+		};
+
+		status = platform_device_add_resources(pdev, &r, 1);
+		if (status < 0) {
+			dev_dbg(&pdev->dev, "can't add irq\n");
+			goto err;
+		}
+	}
+
+	status = platform_device_add(pdev);
+
+err:
+	if (status < 0) {
+		platform_device_put(pdev);
+		dev_err(&client->dev, "can't add %s dev\n", name);
+		return ERR_PTR(status);
+	}
+	return &pdev->dev;
+}
+
+static int add_children(struct i2c_client *client)
+{
+	static const struct {
+		int offset;
+		char *label;
+	} config_inputs[] = {
+		/* 8 == right after the LEDs */
+		{ 8 + 0, "sw6_1", },
+		{ 8 + 1, "sw6_2", },
+		{ 8 + 2, "sw6_3", },
+		{ 8 + 3, "sw6_4", },
+		{ 8 + 4, "NTSC/nPAL", },
+	};
+
+	struct device	*child;
+	int		status;
+	int		i;
+
+	/* GPIO-ish stuff */
+	dm355evm_msp_gpio.dev = &client->dev;
+	status = gpiochip_add(&dm355evm_msp_gpio);
+	if (status < 0)
+		return status;
+
+	/* LED output */
+	if (msp_has_leds()) {
+#define GPIO_LED(l)	.name = l, .active_low = true
+		static struct gpio_led evm_leds[] = {
+			{ GPIO_LED("dm355evm::ds14"),
+				.default_trigger = "heartbeat", },
+			{ GPIO_LED("dm355evm::ds15"),
+				.default_trigger = "mmc0", },
+			{ GPIO_LED("dm355evm::ds16"),
+				/* could also be a CE-ATA drive */
+				.default_trigger = "mmc1", },
+			{ GPIO_LED("dm355evm::ds17"),
+				.default_trigger = "nand-disk", },
+			{ GPIO_LED("dm355evm::ds18"), },
+			{ GPIO_LED("dm355evm::ds19"), },
+			{ GPIO_LED("dm355evm::ds20"), },
+			{ GPIO_LED("dm355evm::ds21"), },
+		};
+#undef GPIO_LED
+
+		struct gpio_led_platform_data evm_led_data = {
+			.num_leds	= ARRAY_SIZE(evm_leds),
+			.leds		= evm_leds,
+		};
+
+		for (i = 0; i < ARRAY_SIZE(evm_leds); i++)
+			evm_leds[i].gpio = i + dm355evm_msp_gpio.base;
+
+		/* NOTE:  these are the only fully programmable LEDs
+		 * on the board, since GPIO-61/ds22 (and many signals
+		 * going to DC7) must be used for AEMIF address lines
+		 * unless the top 1 GB of NAND is unused...
+		 */
+		child = add_child(client, "leds-gpio",
+				&evm_led_data, sizeof(evm_led_data),
+				false, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	/* configuration inputs */
+	for (i = 0; i < ARRAY_SIZE(config_inputs); i++) {
+		int gpio = dm355evm_msp_gpio.base + config_inputs[i].offset;
+
+		gpio_request(gpio, config_inputs[i].label);
+		gpio_direction_input(gpio);
+
+		/* make it easy for userspace to see these */
+		gpio_export(gpio, false);
+	}
+
+	/* RTC is a 32 bit counter, no alarm */
+	if (msp_has_rtc()) {
+		child = add_child(client, "rtc-dm355evm",
+				NULL, 0, false, 0);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	/* input from buttons and IR remote (uses the IRQ) */
+	if (msp_has_keyboard()) {
+		child = add_child(client, "dm355evm_keys",
+				NULL, 0, true, client->irq);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	return 0;
+}
+
+/*----------------------------------------------------------------------*/
+
+static void dm355evm_command(unsigned command)
+{
+	int status;
+
+	status = dm355evm_msp_write(command, DM355EVM_MSP_COMMAND);
+	if (status < 0)
+		dev_err(&msp430->dev, "command %d failure %d\n",
+				command, status);
+}
+
+static void dm355evm_power_off(void)
+{
+	dm355evm_command(MSP_COMMAND_POWEROFF);
+}
+
+static int dm355evm_msp_remove(struct i2c_client *client)
+{
+	pm_power_off = NULL;
+	msp430 = NULL;
+	return 0;
+}
+
+static int
+dm355evm_msp_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	int		status;
+	const char	*video = msp_has_tvp() ? "TVP5146" : "imager";
+
+	if (msp430)
+		return -EBUSY;
+	msp430 = client;
+
+	/* display revision status; doubles as sanity check */
+	status = dm355evm_msp_read(DM355EVM_MSP_FIRMREV);
+	if (status < 0)
+		goto fail;
+	dev_info(&client->dev, "firmware v.%02X, %s as video-in\n",
+			status, video);
+
+	/* mux video input:  either tvp5146 or some external imager */
+	status = dm355evm_msp_write(msp_has_tvp() ? 0 : MSP_VIDEO_IMAGER,
+			DM355EVM_MSP_VIDEO_IN);
+	if (status < 0)
+		dev_warn(&client->dev, "error %d muxing %s as video-in\n",
+			status, video);
+
+	/* init LED cache, and turn off the LEDs */
+	msp_led_cache = 0xff;
+	dm355evm_msp_write(msp_led_cache, DM355EVM_MSP_LED);
+
+	/* export capabilities we support */
+	status = add_children(client);
+	if (status < 0)
+		goto fail;
+
+	/* PM hookup */
+	pm_power_off = dm355evm_power_off;
+
+	return 0;
+
+fail:
+	/* FIXME remove children ... */
+	dm355evm_msp_remove(client);
+	return status;
+}
+
+static const struct i2c_device_id dm355evm_msp_ids[] = {
+	{ "dm355evm_msp", 0 },
+	{ /* end of list */ },
+};
+MODULE_DEVICE_TABLE(i2c, dm355evm_msp_ids);
+
+static struct i2c_driver dm355evm_msp_driver = {
+	.driver.name	= "dm355evm_msp",
+	.id_table	= dm355evm_msp_ids,
+	.probe		= dm355evm_msp_probe,
+	.remove		= dm355evm_msp_remove,
+};
+
+static int __init dm355evm_msp_init(void)
+{
+	return i2c_add_driver(&dm355evm_msp_driver);
+}
+subsys_initcall(dm355evm_msp_init);
+
+static void __exit dm355evm_msp_exit(void)
+{
+	i2c_del_driver(&dm355evm_msp_driver);
+}
+module_exit(dm355evm_msp_exit);
+
+MODULE_DESCRIPTION("Interface to MSP430 firmware on DM355EVM");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/dm355evm_msp.h b/include/linux/i2c/dm355evm_msp.h
new file mode 100644
index 000000000000..372470350fab
--- /dev/null
+++ b/include/linux/i2c/dm355evm_msp.h
@@ -0,0 +1,79 @@
+/*
+ * dm355evm_msp.h - support MSP430 microcontroller on DM355EVM board
+ */
+#ifndef __LINUX_I2C_DM355EVM_MSP
+#define __LINUX_I2C_DM355EVM_MSP
+
+/*
+ * Written against Spectrum's writeup for the A4 firmware revision,
+ * and tweaked to match source and rev D2 schematics by removing CPLD
+ * and NOR flash hooks (which were last appropriate in rev B boards).
+ *
+ * Note that the firmware supports a flavor of write posting ... to be
+ * sure a write completes, issue another read or write.
+ */
+
+/* utilities to access "registers" emulated by msp430 firmware */
+extern int dm355evm_msp_write(u8 value, u8 reg);
+extern int dm355evm_msp_read(u8 reg);
+
+
+/* command/control registers */
+#define DM355EVM_MSP_COMMAND		0x00
+#	define MSP_COMMAND_NULL		0
+#	define MSP_COMMAND_RESET_COLD	1
+#	define MSP_COMMAND_RESET_WARM	2
+#	define MSP_COMMAND_RESET_WARM_I	3
+#	define MSP_COMMAND_POWEROFF	4
+#	define MSP_COMMAND_IR_REINIT	5
+#define DM355EVM_MSP_STATUS		0x01
+#	define MSP_STATUS_BAD_OFFSET	BIT(0)
+#	define MSP_STATUS_BAD_COMMAND	BIT(1)
+#	define MSP_STATUS_POWER_ERROR	BIT(2)
+#	define MSP_STATUS_RXBUF_OVERRUN	BIT(3)
+#define DM355EVM_MSP_RESET		0x02	/* 0 bits == in reset */
+#	define MSP_RESET_DC5		BIT(0)
+#	define MSP_RESET_TVP5154	BIT(2)
+#	define MSP_RESET_IMAGER		BIT(3)
+#	define MSP_RESET_ETHERNET	BIT(4)
+#	define MSP_RESET_SYS		BIT(5)
+#	define MSP_RESET_AIC33		BIT(7)
+
+/* GPIO registers ... bit patterns mostly match the source MSP ports */
+#define DM355EVM_MSP_LED		0x03	/* active low (MSP P4) */
+#define DM355EVM_MSP_SWITCH1		0x04	/* (MSP P5, masked) */
+#	define MSP_SWITCH1_SW6_1	BIT(0)
+#	define MSP_SWITCH1_SW6_2	BIT(1)
+#	define MSP_SWITCH1_SW6_3	BIT(2)
+#	define MSP_SWITCH1_SW6_4	BIT(3)
+#	define MSP_SWITCH1_J1		BIT(4)	/* NTSC/PAL */
+#	define MSP_SWITCH1_MSP_INT	BIT(5)	/* active low */
+#define DM355EVM_MSP_SWITCH2		0x05	/* (MSP P6, masked) */
+#	define MSP_SWITCH2_SW10		BIT(3)
+#	define MSP_SWITCH2_SW11		BIT(4)
+#	define MSP_SWITCH2_SW12		BIT(5)
+#	define MSP_SWITCH2_SW13		BIT(6)
+#	define MSP_SWITCH2_SW14		BIT(7)
+#define DM355EVM_MSP_SDMMC		0x06	/* (MSP P2, masked) */
+#	define MSP_SDMMC_0_WP		BIT(1)
+#	define MSP_SDMMC_0_CD		BIT(2)	/* active low */
+#	define MSP_SDMMC_1_WP		BIT(3)
+#	define MSP_SDMMC_1_CD		BIT(4)	/* active low */
+#define DM355EVM_MSP_FIRMREV		0x07	/* not a GPIO (out of order) */
+#define DM355EVM_MSP_VIDEO_IN		0x08	/* (MSP P3, masked) */
+#	define MSP_VIDEO_IMAGER		BIT(7)	/* low == tvp5146 */
+
+/* power supply registers are currently omitted */
+
+/* RTC registers */
+#define DM355EVM_MSP_RTC_0		0x12	/* LSB */
+#define DM355EVM_MSP_RTC_1		0x13
+#define DM355EVM_MSP_RTC_2		0x14
+#define DM355EVM_MSP_RTC_3		0x15	/* MSB */
+
+/* input event queue registers; code == ((HIGH << 8) | LOW) */
+#define DM355EVM_MSP_INPUT_COUNT	0x16	/* decrement by reading LOW */
+#define DM355EVM_MSP_INPUT_HIGH		0x17
+#define DM355EVM_MSP_INPUT_LOW		0x18
+
+#endif /* __LINUX_I2C_DM355EVM_MSP */
-- 
cgit v1.2.3


From f3298dc4f2277874d40cb4fc3a6e277317d6603b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Dec 2008 03:16:51 -0500
Subject: sanitize audit_socketcall

* don't bother with allocations
* now that it can't fail, make it return void

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h |  4 ++--
 kernel/auditsc.c      | 66 +++++++++++++++++++++++++++++----------------------
 net/socket.c          |  4 +---
 3 files changed, 41 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 26c4f6f65a46..466a953d4bf6 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -446,7 +446,7 @@ extern void audit_log_task_context(struct audit_buffer *ab);
 extern int __audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode);
 extern int audit_bprm(struct linux_binprm *bprm);
-extern int audit_socketcall(int nargs, unsigned long *args);
+extern void audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
 extern int __audit_fd_pair(int fd1, int fd2);
 extern int audit_set_macxattr(const char *name);
@@ -549,7 +549,7 @@ extern int audit_signals;
 #define audit_ipc_obj(i) ({ 0; })
 #define audit_ipc_set_perm(q,u,g,m) ({ 0; })
 #define audit_bprm(p) ({ 0; })
-#define audit_socketcall(n,a) ({ 0; })
+#define audit_socketcall(n,a) ((void)0)
 #define audit_fd_pair(n,a) ({ 0; })
 #define audit_sockaddr(len, addr) ({ 0; })
 #define audit_set_macxattr(n) do { ; } while (0)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index c2e43ebb1b68..5cda66466e14 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -168,12 +168,6 @@ struct audit_aux_data_execve {
 	struct mm_struct *mm;
 };
 
-struct audit_aux_data_socketcall {
-	struct audit_aux_data	d;
-	int			nargs;
-	unsigned long		args[0];
-};
-
 struct audit_aux_data_fd_pair {
 	struct	audit_aux_data d;
 	int	fd[2];
@@ -247,6 +241,14 @@ struct audit_context {
 	struct audit_tree_refs *trees, *first_trees;
 	int tree_count;
 
+	int type;
+	union {
+		struct {
+			int nargs;
+			long args[6];
+		} socketcall;
+	};
+
 #if AUDIT_DEBUG
 	int		    put_count;
 	int		    ino_count;
@@ -1226,6 +1228,27 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
 		audit_log_format(ab, " cap_fe=%d cap_fver=%x", name->fcap.fE, name->fcap_ver);
 }
 
+static void show_special(struct audit_context *context)
+{
+	struct audit_buffer *ab;
+	int i;
+
+	ab = audit_log_start(context, GFP_KERNEL, context->type);
+	if (!ab)
+		return;
+
+	switch (context->type) {
+	case AUDIT_SOCKETCALL: {
+		int nargs = context->socketcall.nargs;
+		audit_log_format(ab, "nargs=%d", nargs);
+		for (i = 0; i < nargs; i++)
+			audit_log_format(ab, " a%d=%lx", i,
+				context->socketcall.args[i]);
+		break; }
+	}
+	audit_log_end(ab);
+}
+
 static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
 {
 	const struct cred *cred;
@@ -1372,13 +1395,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			audit_log_execve_info(context, &ab, axi);
 			break; }
 
-		case AUDIT_SOCKETCALL: {
-			struct audit_aux_data_socketcall *axs = (void *)aux;
-			audit_log_format(ab, "nargs=%d", axs->nargs);
-			for (i=0; i<axs->nargs; i++)
-				audit_log_format(ab, " a%d=%lx", i, axs->args[i]);
-			break; }
-
 		case AUDIT_FD_PAIR: {
 			struct audit_aux_data_fd_pair *axs = (void *)aux;
 			audit_log_format(ab, "fd0=%d fd1=%d", axs->fd[0], axs->fd[1]);
@@ -1410,6 +1426,9 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		audit_log_end(ab);
 	}
 
+	if (context->type)
+		show_special(context);
+
 	if (context->sockaddr_len) {
 		ab = audit_log_start(context, GFP_KERNEL, AUDIT_SOCKADDR);
 		if (ab) {
@@ -1689,6 +1708,7 @@ void audit_syscall_exit(int valid, long return_code)
 		context->target_pid = 0;
 		context->target_sid = 0;
 		context->sockaddr_len = 0;
+		context->type = 0;
 		kfree(context->filterkey);
 		context->filterkey = NULL;
 		tsk->audit_context = context;
@@ -2406,27 +2426,17 @@ int audit_bprm(struct linux_binprm *bprm)
  * @nargs: number of args
  * @args: args array
  *
- * Returns 0 for success or NULL context or < 0 on error.
  */
-int audit_socketcall(int nargs, unsigned long *args)
+void audit_socketcall(int nargs, unsigned long *args)
 {
-	struct audit_aux_data_socketcall *ax;
 	struct audit_context *context = current->audit_context;
 
 	if (likely(!context || context->dummy))
-		return 0;
-
-	ax = kmalloc(sizeof(*ax) + nargs * sizeof(unsigned long), GFP_KERNEL);
-	if (!ax)
-		return -ENOMEM;
-
-	ax->nargs = nargs;
-	memcpy(ax->args, args, nargs * sizeof(unsigned long));
+		return;
 
-	ax->d.type = AUDIT_SOCKETCALL;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
+	context->type = AUDIT_SOCKETCALL;
+	context->socketcall.nargs = nargs;
+	memcpy(context->socketcall.args, args, nargs * sizeof(unsigned long));
 }
 
 /**
diff --git a/net/socket.c b/net/socket.c
index 2c730fc718ab..b41a92093e40 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2065,9 +2065,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
 	if (copy_from_user(a, args, nargs[call]))
 		return -EFAULT;
 
-	err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
-	if (err)
-		return err;
+	audit_socketcall(nargs[call] / sizeof(unsigned long), a);
 
 	a0 = a[0];
 	a1 = a[1];
-- 
cgit v1.2.3


From a33e6751003c5ade603737d828b1519d980ce392 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Dec 2008 03:40:06 -0500
Subject: sanitize audit_ipc_obj()

* get rid of allocations
* make it return void
* simplify callers

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h |  9 +++---
 ipc/shm.c             |  4 +--
 ipc/util.c            |  9 ++----
 kernel/auditsc.c      | 88 ++++++++++++++++++++++-----------------------------
 4 files changed, 45 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 466a953d4bf6..f8578b9088e1 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -443,7 +443,7 @@ extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 #define audit_get_loginuid(t) ((t)->loginuid)
 #define audit_get_sessionid(t) ((t)->sessionid)
 extern void audit_log_task_context(struct audit_buffer *ab);
-extern int __audit_ipc_obj(struct kern_ipc_perm *ipcp);
+extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode);
 extern int audit_bprm(struct linux_binprm *bprm);
 extern void audit_socketcall(int nargs, unsigned long *args);
@@ -460,11 +460,10 @@ extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
 				  const struct cred *old);
 extern int __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old);
 
-static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp)
+static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_ipc_obj(ipcp);
-	return 0;
+		__audit_ipc_obj(ipcp);
 }
 static inline int audit_fd_pair(int fd1, int fd2)
 {
@@ -546,7 +545,7 @@ extern int audit_signals;
 #define audit_get_loginuid(t) (-1)
 #define audit_get_sessionid(t) (-1)
 #define audit_log_task_context(b) do { ; } while (0)
-#define audit_ipc_obj(i) ({ 0; })
+#define audit_ipc_obj(i) ((void)0)
 #define audit_ipc_set_perm(q,u,g,m) ({ 0; })
 #define audit_bprm(p) ({ 0; })
 #define audit_socketcall(n,a) ((void)0)
diff --git a/ipc/shm.c b/ipc/shm.c
index 38a055758a9b..57dd50046cef 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -747,9 +747,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
 			goto out;
 		}
 
-		err = audit_ipc_obj(&(shp->shm_perm));
-		if (err)
-			goto out_unlock;
+		audit_ipc_obj(&(shp->shm_perm));
 
 		if (!capable(CAP_IPC_LOCK)) {
 			uid_t euid = current_euid();
diff --git a/ipc/util.c b/ipc/util.c
index 5a1808c774a2..579552abd50a 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -624,10 +624,9 @@ void ipc_rcu_putref(void *ptr)
 int ipcperms (struct kern_ipc_perm *ipcp, short flag)
 {	/* flag will most probably be 0 or S_...UGO from <linux/stat.h> */
 	uid_t euid = current_euid();
-	int requested_mode, granted_mode, err;
+	int requested_mode, granted_mode;
 
-	if (unlikely((err = audit_ipc_obj(ipcp))))
-		return err;
+	audit_ipc_obj(ipcp);
 	requested_mode = (flag >> 6) | (flag >> 3) | flag;
 	granted_mode = ipcp->mode;
 	if (euid == ipcp->cuid ||
@@ -803,9 +802,7 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd,
 		goto out_up;
 	}
 
-	err = audit_ipc_obj(ipcp);
-	if (err)
-		goto out_unlock;
+	audit_ipc_obj(ipcp);
 
 	if (cmd == IPC_SET) {
 		err = audit_ipc_set_perm(extra_perm, perm->uid,
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 5cda66466e14..73504313264f 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -247,6 +247,12 @@ struct audit_context {
 			int nargs;
 			long args[6];
 		} socketcall;
+		struct {
+			uid_t			uid;
+			gid_t			gid;
+			mode_t			mode;
+			u32			osid;
+		} ipc;
 	};
 
 #if AUDIT_DEBUG
@@ -605,19 +611,12 @@ static int audit_filter_rules(struct task_struct *tsk,
 					}
 				}
 				/* Find ipc objects that match */
-				if (ctx) {
-					struct audit_aux_data *aux;
-					for (aux = ctx->aux; aux;
-					     aux = aux->next) {
-						if (aux->type == AUDIT_IPC) {
-							struct audit_aux_data_ipcctl *axi = (void *)aux;
-							if (security_audit_rule_match(axi->osid, f->type, f->op, f->lsm_rule, ctx)) {
-								++result;
-								break;
-							}
-						}
-					}
-				}
+				if (!ctx || ctx->type != AUDIT_IPC)
+					break;
+				if (security_audit_rule_match(ctx->ipc.osid,
+							      f->type, f->op,
+							      f->lsm_rule, ctx))
+					++result;
 			}
 			break;
 		case AUDIT_ARG0:
@@ -1228,7 +1227,7 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
 		audit_log_format(ab, " cap_fe=%d cap_fver=%x", name->fcap.fE, name->fcap_ver);
 }
 
-static void show_special(struct audit_context *context)
+static void show_special(struct audit_context *context, int *call_panic)
 {
 	struct audit_buffer *ab;
 	int i;
@@ -1245,6 +1244,23 @@ static void show_special(struct audit_context *context)
 			audit_log_format(ab, " a%d=%lx", i,
 				context->socketcall.args[i]);
 		break; }
+	case AUDIT_IPC: {
+		u32 osid = context->ipc.osid;
+
+		audit_log_format(ab, "ouid=%u ogid=%u mode=%#o",
+			 context->ipc.uid, context->ipc.gid, context->ipc.mode);
+		if (osid) {
+			char *ctx = NULL;
+			u32 len;
+			if (security_secid_to_secctx(osid, &ctx, &len)) {
+				audit_log_format(ab, " osid=%u", osid);
+				*call_panic = 1;
+			} else {
+				audit_log_format(ab, " obj=%s", ctx);
+				security_release_secctx(ctx, len);
+			}
+		}
+		break; }
 	}
 	audit_log_end(ab);
 }
@@ -1363,26 +1379,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 				axi->mqstat.mq_msgsize, axi->mqstat.mq_curmsgs);
 			break; }
 
-		case AUDIT_IPC: {
-			struct audit_aux_data_ipcctl *axi = (void *)aux;
-			audit_log_format(ab, 
-				 "ouid=%u ogid=%u mode=%#o",
-				 axi->uid, axi->gid, axi->mode);
-			if (axi->osid != 0) {
-				char *ctx = NULL;
-				u32 len;
-				if (security_secid_to_secctx(
-						axi->osid, &ctx, &len)) {
-					audit_log_format(ab, " osid=%u",
-							axi->osid);
-					call_panic = 1;
-				} else {
-					audit_log_format(ab, " obj=%s", ctx);
-					security_release_secctx(ctx, len);
-				}
-			}
-			break; }
-
 		case AUDIT_IPC_SET_PERM: {
 			struct audit_aux_data_ipcctl *axi = (void *)aux;
 			audit_log_format(ab,
@@ -1427,7 +1423,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 	}
 
 	if (context->type)
-		show_special(context);
+		show_special(context, &call_panic);
 
 	if (context->sockaddr_len) {
 		ab = audit_log_start(context, GFP_KERNEL, AUDIT_SOCKADDR);
@@ -2349,25 +2345,15 @@ int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
  * audit_ipc_obj - record audit data for ipc object
  * @ipcp: ipc permissions
  *
- * Returns 0 for success or NULL context or < 0 on error.
  */
-int __audit_ipc_obj(struct kern_ipc_perm *ipcp)
+void __audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
-	struct audit_aux_data_ipcctl *ax;
 	struct audit_context *context = current->audit_context;
-
-	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
-	if (!ax)
-		return -ENOMEM;
-
-	ax->uid = ipcp->uid;
-	ax->gid = ipcp->gid;
-	ax->mode = ipcp->mode;
-	security_ipc_getsecid(ipcp, &ax->osid);
-	ax->d.type = AUDIT_IPC;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
+	context->ipc.uid = ipcp->uid;
+	context->ipc.gid = ipcp->gid;
+	context->ipc.mode = ipcp->mode;
+	security_ipc_getsecid(ipcp, &context->ipc.osid);
+	context->type = AUDIT_IPC;
 }
 
 /**
-- 
cgit v1.2.3


From e816f370cbadd2afea9f1a42f232d0636137d563 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Dec 2008 03:47:15 -0500
Subject: sanitize audit_ipc_set_perm()

* get rid of allocations
* make it return void
* simplify callers

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h |  9 ++++----
 ipc/util.c            |  9 ++------
 kernel/auditsc.c      | 59 +++++++++++++++++++++++----------------------------
 3 files changed, 32 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index f8578b9088e1..b7abfe0d6737 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -444,7 +444,7 @@ extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 #define audit_get_sessionid(t) ((t)->sessionid)
 extern void audit_log_task_context(struct audit_buffer *ab);
 extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
-extern int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode);
+extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode);
 extern int audit_bprm(struct linux_binprm *bprm);
 extern void audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
@@ -471,11 +471,10 @@ static inline int audit_fd_pair(int fd1, int fd2)
 		return __audit_fd_pair(fd1, fd2);
 	return 0;
 }
-static inline int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
+static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_ipc_set_perm(qbytes, uid, gid, mode);
-	return 0;
+		__audit_ipc_set_perm(qbytes, uid, gid, mode);
 }
 static inline int audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr)
 {
@@ -546,7 +545,7 @@ extern int audit_signals;
 #define audit_get_sessionid(t) (-1)
 #define audit_log_task_context(b) do { ; } while (0)
 #define audit_ipc_obj(i) ((void)0)
-#define audit_ipc_set_perm(q,u,g,m) ({ 0; })
+#define audit_ipc_set_perm(q,u,g,m) ((void)0)
 #define audit_bprm(p) ({ 0; })
 #define audit_socketcall(n,a) ((void)0)
 #define audit_fd_pair(n,a) ({ 0; })
diff --git a/ipc/util.c b/ipc/util.c
index 579552abd50a..7585a72e259b 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -803,13 +803,9 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd,
 	}
 
 	audit_ipc_obj(ipcp);
-
-	if (cmd == IPC_SET) {
-		err = audit_ipc_set_perm(extra_perm, perm->uid,
+	if (cmd == IPC_SET)
+		audit_ipc_set_perm(extra_perm, perm->uid,
 					 perm->gid, perm->mode);
-		if (err)
-			goto out_unlock;
-	}
 
 	euid = current_euid();
 	if (euid == ipcp->cuid ||
@@ -817,7 +813,6 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd,
 		return ipcp;
 
 	err = -EPERM;
-out_unlock:
 	ipc_unlock(ipcp);
 out_up:
 	up_write(&ids->rw_mutex);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 73504313264f..fbed62e05bce 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -151,16 +151,6 @@ struct audit_aux_data_mq_getsetattr {
 	struct mq_attr 		mqstat;
 };
 
-struct audit_aux_data_ipcctl {
-	struct audit_aux_data	d;
-	struct ipc_perm		p;
-	unsigned long		qbytes;
-	uid_t			uid;
-	gid_t			gid;
-	mode_t			mode;
-	u32			osid;
-};
-
 struct audit_aux_data_execve {
 	struct audit_aux_data	d;
 	int argc;
@@ -252,6 +242,11 @@ struct audit_context {
 			gid_t			gid;
 			mode_t			mode;
 			u32			osid;
+			int			has_perm;
+			uid_t			perm_uid;
+			gid_t			perm_gid;
+			mode_t			perm_mode;
+			unsigned long		qbytes;
 		} ipc;
 	};
 
@@ -1260,6 +1255,19 @@ static void show_special(struct audit_context *context, int *call_panic)
 				security_release_secctx(ctx, len);
 			}
 		}
+		if (context->ipc.has_perm) {
+			audit_log_end(ab);
+			ab = audit_log_start(context, GFP_KERNEL,
+					     AUDIT_IPC_SET_PERM);
+			audit_log_format(ab,
+				"qbytes=%lx ouid=%u ogid=%u mode=%#o",
+				context->ipc.qbytes,
+				context->ipc.perm_uid,
+				context->ipc.perm_gid,
+				context->ipc.perm_mode);
+			if (!ab)
+				return;
+		}
 		break; }
 	}
 	audit_log_end(ab);
@@ -1379,13 +1387,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 				axi->mqstat.mq_msgsize, axi->mqstat.mq_curmsgs);
 			break; }
 
-		case AUDIT_IPC_SET_PERM: {
-			struct audit_aux_data_ipcctl *axi = (void *)aux;
-			audit_log_format(ab,
-				"qbytes=%lx ouid=%u ogid=%u mode=%#o",
-				axi->qbytes, axi->uid, axi->gid, axi->mode);
-			break; }
-
 		case AUDIT_EXECVE: {
 			struct audit_aux_data_execve *axi = (void *)aux;
 			audit_log_execve_info(context, &ab, axi);
@@ -2352,6 +2353,7 @@ void __audit_ipc_obj(struct kern_ipc_perm *ipcp)
 	context->ipc.uid = ipcp->uid;
 	context->ipc.gid = ipcp->gid;
 	context->ipc.mode = ipcp->mode;
+	context->ipc.has_perm = 0;
 	security_ipc_getsecid(ipcp, &context->ipc.osid);
 	context->type = AUDIT_IPC;
 }
@@ -2363,26 +2365,17 @@ void __audit_ipc_obj(struct kern_ipc_perm *ipcp)
  * @gid: msgq group id
  * @mode: msgq mode (permissions)
  *
- * Returns 0 for success or NULL context or < 0 on error.
+ * Called only after audit_ipc_obj().
  */
-int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
+void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
 {
-	struct audit_aux_data_ipcctl *ax;
 	struct audit_context *context = current->audit_context;
 
-	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
-	if (!ax)
-		return -ENOMEM;
-
-	ax->qbytes = qbytes;
-	ax->uid = uid;
-	ax->gid = gid;
-	ax->mode = mode;
-
-	ax->d.type = AUDIT_IPC_SET_PERM;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
+	context->ipc.qbytes = qbytes;
+	context->ipc.perm_uid = uid;
+	context->ipc.perm_gid = gid;
+	context->ipc.perm_mode = mode;
+	context->ipc.has_perm = 1;
 }
 
 int audit_bprm(struct linux_binprm *bprm)
-- 
cgit v1.2.3


From 7392906ea915b9a2c14dea32b3604b4e178f82f7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Dec 2008 06:58:59 -0500
Subject: sanitize audit_mq_getsetattr()

* get rid of allocations
* make it return void
* don't duplicate parts of audit_dummy_context()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h |  9 ++++-----
 ipc/mqueue.c          |  6 +-----
 kernel/auditsc.c      | 54 ++++++++++++++++-----------------------------------
 3 files changed, 22 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index b7abfe0d6737..b7707e577b80 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -454,7 +454,7 @@ extern int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr
 extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout);
 extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout);
 extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification);
-extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
+extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
 extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
 				  const struct cred *new,
 				  const struct cred *old);
@@ -500,11 +500,10 @@ static inline int audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_n
 		return __audit_mq_notify(mqdes, u_notification);
 	return 0;
 }
-static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
+static inline void audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_mq_getsetattr(mqdes, mqstat);
-	return 0;
+		__audit_mq_getsetattr(mqdes, mqstat);
 }
 
 static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm,
@@ -555,7 +554,7 @@ extern int audit_signals;
 #define audit_mq_timedsend(d,l,p,t) ({ 0; })
 #define audit_mq_timedreceive(d,l,p,t) ({ 0; })
 #define audit_mq_notify(d,n) ({ 0; })
-#define audit_mq_getsetattr(d,s) ({ 0; })
+#define audit_mq_getsetattr(d,s) ((void)0)
 #define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; })
 #define audit_log_capset(pid, ncr, ocr) ({ 0; })
 #define audit_ptrace(t) ((void)0)
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index d9393f8e4c3e..7563611c6615 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -1150,11 +1150,7 @@ asmlinkage long sys_mq_getsetattr(mqd_t mqdes,
 	omqstat = info->attr;
 	omqstat.mq_flags = filp->f_flags & O_NONBLOCK;
 	if (u_mqstat) {
-		ret = audit_mq_getsetattr(mqdes, &mqstat);
-		if (ret != 0) {
-			spin_unlock(&info->lock);
-			goto out_fput;
-		}
+		audit_mq_getsetattr(mqdes, &mqstat);
 		if (mqstat.mq_flags & O_NONBLOCK)
 			filp->f_flags |= O_NONBLOCK;
 		else
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index fbed62e05bce..c50178c7e245 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -145,12 +145,6 @@ struct audit_aux_data_mq_notify {
 	struct sigevent 	notification;
 };
 
-struct audit_aux_data_mq_getsetattr {
-	struct audit_aux_data	d;
-	mqd_t			mqdes;
-	struct mq_attr 		mqstat;
-};
-
 struct audit_aux_data_execve {
 	struct audit_aux_data	d;
 	int argc;
@@ -248,6 +242,10 @@ struct audit_context {
 			mode_t			perm_mode;
 			unsigned long		qbytes;
 		} ipc;
+		struct {
+			mqd_t			mqdes;
+			struct mq_attr 		mqstat;
+		} mq_getsetattr;
 	};
 
 #if AUDIT_DEBUG
@@ -1269,6 +1267,15 @@ static void show_special(struct audit_context *context, int *call_panic)
 				return;
 		}
 		break; }
+	case AUDIT_MQ_GETSETATTR: {
+		struct mq_attr *attr = &context->mq_getsetattr.mqstat;
+		audit_log_format(ab,
+			"mqdes=%d mq_flags=0x%lx mq_maxmsg=%ld mq_msgsize=%ld "
+			"mq_curmsgs=%ld ",
+			context->mq_getsetattr.mqdes,
+			attr->mq_flags, attr->mq_maxmsg,
+			attr->mq_msgsize, attr->mq_curmsgs);
+		break; }
 	}
 	audit_log_end(ab);
 }
@@ -1377,16 +1384,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 				axi->notification.sigev_signo);
 			break; }
 
-		case AUDIT_MQ_GETSETATTR: {
-			struct audit_aux_data_mq_getsetattr *axi = (void *)aux;
-			audit_log_format(ab,
-				"mqdes=%d mq_flags=0x%lx mq_maxmsg=%ld mq_msgsize=%ld "
-				"mq_curmsgs=%ld ",
-				axi->mqdes,
-				axi->mqstat.mq_flags, axi->mqstat.mq_maxmsg,
-				axi->mqstat.mq_msgsize, axi->mqstat.mq_curmsgs);
-			break; }
-
 		case AUDIT_EXECVE: {
 			struct audit_aux_data_execve *axi = (void *)aux;
 			audit_log_execve_info(context, &ab, axi);
@@ -2316,30 +2313,13 @@ int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification)
  * @mqdes: MQ descriptor
  * @mqstat: MQ flags
  *
- * Returns 0 for success or NULL context or < 0 on error.
  */
-int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
+void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
 {
-	struct audit_aux_data_mq_getsetattr *ax;
 	struct audit_context *context = current->audit_context;
-
-	if (!audit_enabled)
-		return 0;
-
-	if (likely(!context))
-		return 0;
-
-	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
-	if (!ax)
-		return -ENOMEM;
-
-	ax->mqdes = mqdes;
-	ax->mqstat = *mqstat;
-
-	ax->d.type = AUDIT_MQ_GETSETATTR;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
+	context->mq_getsetattr.mqdes = mqdes;
+	context->mq_getsetattr.mqstat = *mqstat;
+	context->type = AUDIT_MQ_GETSETATTR;
 }
 
 /**
-- 
cgit v1.2.3


From 20114f71b27cafeb7c7e41d2b0f0b68c3fbb022b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Dec 2008 07:16:12 -0500
Subject: sanitize audit_mq_notify()

* don't copy_from_user() twice
* don't bother with allocations
* don't duplicate parts of audit_dummy_context()
* make it return void

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h |  9 ++++-----
 ipc/mqueue.c          | 14 ++++++-------
 kernel/auditsc.c      | 56 +++++++++++++++------------------------------------
 3 files changed, 27 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index b7707e577b80..8101d2c4a995 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -453,7 +453,7 @@ extern int audit_set_macxattr(const char *name);
 extern int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr);
 extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout);
 extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout);
-extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification);
+extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification);
 extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
 extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
 				  const struct cred *new,
@@ -494,11 +494,10 @@ static inline int audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned in
 		return __audit_mq_timedreceive(mqdes, msg_len, u_msg_prio, u_abs_timeout);
 	return 0;
 }
-static inline int audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification)
+static inline void audit_mq_notify(mqd_t mqdes, const struct sigevent *notification)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_mq_notify(mqdes, u_notification);
-	return 0;
+		__audit_mq_notify(mqdes, notification);
 }
 static inline void audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
 {
@@ -553,7 +552,7 @@ extern int audit_signals;
 #define audit_mq_open(o,m,a) ({ 0; })
 #define audit_mq_timedsend(d,l,p,t) ({ 0; })
 #define audit_mq_timedreceive(d,l,p,t) ({ 0; })
-#define audit_mq_notify(d,n) ({ 0; })
+#define audit_mq_notify(d,n) ((void)0)
 #define audit_mq_getsetattr(d,s) ((void)0)
 #define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; })
 #define audit_log_capset(pid, ncr, ocr) ({ 0; })
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 7563611c6615..e7b2f68f8d77 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -1003,17 +1003,17 @@ asmlinkage long sys_mq_notify(mqd_t mqdes,
 	struct mqueue_inode_info *info;
 	struct sk_buff *nc;
 
-	ret = audit_mq_notify(mqdes, u_notification);
-	if (ret != 0)
-		return ret;
-
-	nc = NULL;
-	sock = NULL;
-	if (u_notification != NULL) {
+	if (u_notification) {
 		if (copy_from_user(&notification, u_notification,
 					sizeof(struct sigevent)))
 			return -EFAULT;
+	}
+
+	audit_mq_notify(mqdes, u_notification ? &notification : NULL);
 
+	nc = NULL;
+	sock = NULL;
+	if (u_notification != NULL) {
 		if (unlikely(notification.sigev_notify != SIGEV_NONE &&
 			     notification.sigev_notify != SIGEV_SIGNAL &&
 			     notification.sigev_notify != SIGEV_THREAD))
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index c50178c7e245..3ece960de894 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -139,12 +139,6 @@ struct audit_aux_data_mq_sendrecv {
 	struct timespec		abs_timeout;
 };
 
-struct audit_aux_data_mq_notify {
-	struct audit_aux_data	d;
-	mqd_t			mqdes;
-	struct sigevent 	notification;
-};
-
 struct audit_aux_data_execve {
 	struct audit_aux_data	d;
 	int argc;
@@ -246,6 +240,10 @@ struct audit_context {
 			mqd_t			mqdes;
 			struct mq_attr 		mqstat;
 		} mq_getsetattr;
+		struct {
+			mqd_t			mqdes;
+			int			sigev_signo;
+		} mq_notify;
 	};
 
 #if AUDIT_DEBUG
@@ -1267,6 +1265,11 @@ static void show_special(struct audit_context *context, int *call_panic)
 				return;
 		}
 		break; }
+	case AUDIT_MQ_NOTIFY: {
+		audit_log_format(ab, "mqdes=%d sigev_signo=%d",
+				context->mq_notify.mqdes,
+				context->mq_notify.sigev_signo);
+		break; }
 	case AUDIT_MQ_GETSETATTR: {
 		struct mq_attr *attr = &context->mq_getsetattr.mqstat;
 		audit_log_format(ab,
@@ -1376,14 +1379,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 				axi->abs_timeout.tv_sec, axi->abs_timeout.tv_nsec);
 			break; }
 
-		case AUDIT_MQ_NOTIFY: {
-			struct audit_aux_data_mq_notify *axi = (void *)aux;
-			audit_log_format(ab,
-				"mqdes=%d sigev_signo=%d",
-				axi->mqdes,
-				axi->notification.sigev_signo);
-			break; }
-
 		case AUDIT_EXECVE: {
 			struct audit_aux_data_execve *axi = (void *)aux;
 			audit_log_execve_info(context, &ab, axi);
@@ -2274,38 +2269,19 @@ int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len,
  * @mqdes: MQ descriptor
  * @u_notification: Notification event
  *
- * Returns 0 for success or NULL context or < 0 on error.
  */
 
-int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification)
+void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification)
 {
-	struct audit_aux_data_mq_notify *ax;
 	struct audit_context *context = current->audit_context;
 
-	if (!audit_enabled)
-		return 0;
-
-	if (likely(!context))
-		return 0;
-
-	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
-	if (!ax)
-		return -ENOMEM;
-
-	if (u_notification != NULL) {
-		if (copy_from_user(&ax->notification, u_notification, sizeof(ax->notification))) {
-			kfree(ax);
-			return -EFAULT;
-		}
-	} else
-		memset(&ax->notification, 0, sizeof(ax->notification));
-
-	ax->mqdes = mqdes;
+	if (notification)
+		context->mq_notify.sigev_signo = notification->sigev_signo;
+	else
+		context->mq_notify.sigev_signo = 0;
 
-	ax->d.type = AUDIT_MQ_NOTIFY;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
+	context->mq_notify.mqdes = mqdes;
+	context->type = AUDIT_MQ_NOTIFY;
 }
 
 /**
-- 
cgit v1.2.3


From c32c8af43b9adde8d6f938d8e6328c13b8de79ac Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 Dec 2008 03:46:48 -0500
Subject: sanitize AUDIT_MQ_SENDRECV

* logging the original value of *msg_prio in mq_timedreceive(2)
  is insane - the argument is write-only (i.e. syscall always
  ignores the original value and only overwrites it).
* merge __audit_mq_timed{send,receive}
* don't do copy_from_user() twice
* don't mess with allocations in auditsc part
* ... and don't bother checking !audit_enabled and !context in there -
  we'd already checked for audit_dummy_context().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h |  17 ++-----
 ipc/mqueue.c          |  54 +++++++++++----------
 kernel/auditsc.c      | 127 ++++++++++++--------------------------------------
 3 files changed, 63 insertions(+), 135 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 8101d2c4a995..67f0cdd991ba 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -451,8 +451,7 @@ extern int audit_sockaddr(int len, void *addr);
 extern int __audit_fd_pair(int fd1, int fd2);
 extern int audit_set_macxattr(const char *name);
 extern int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr);
-extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout);
-extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout);
+extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout);
 extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification);
 extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
 extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
@@ -482,17 +481,10 @@ static inline int audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u
 		return __audit_mq_open(oflag, mode, u_attr);
 	return 0;
 }
-static inline int audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout)
+static inline void audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_mq_timedsend(mqdes, msg_len, msg_prio, u_abs_timeout);
-	return 0;
-}
-static inline int audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout)
-{
-	if (unlikely(!audit_dummy_context()))
-		return __audit_mq_timedreceive(mqdes, msg_len, u_msg_prio, u_abs_timeout);
-	return 0;
+		__audit_mq_sendrecv(mqdes, msg_len, msg_prio, abs_timeout);
 }
 static inline void audit_mq_notify(mqd_t mqdes, const struct sigevent *notification)
 {
@@ -550,8 +542,7 @@ extern int audit_signals;
 #define audit_sockaddr(len, addr) ({ 0; })
 #define audit_set_macxattr(n) do { ; } while (0)
 #define audit_mq_open(o,m,a) ({ 0; })
-#define audit_mq_timedsend(d,l,p,t) ({ 0; })
-#define audit_mq_timedreceive(d,l,p,t) ({ 0; })
+#define audit_mq_sendrecv(d,l,p,t) ((void)0)
 #define audit_mq_notify(d,n) ((void)0)
 #define audit_mq_getsetattr(d,s) ((void)0)
 #define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; })
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index e7b2f68f8d77..192da806c283 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -524,31 +524,27 @@ static void __do_notify(struct mqueue_inode_info *info)
 	wake_up(&info->wait_q);
 }
 
-static long prepare_timeout(const struct timespec __user *u_arg)
+static long prepare_timeout(struct timespec *p)
 {
-	struct timespec ts, nowts;
+	struct timespec nowts;
 	long timeout;
 
-	if (u_arg) {
-		if (unlikely(copy_from_user(&ts, u_arg,
-					sizeof(struct timespec))))
-			return -EFAULT;
-
-		if (unlikely(ts.tv_nsec < 0 || ts.tv_sec < 0
-			|| ts.tv_nsec >= NSEC_PER_SEC))
+	if (p) {
+		if (unlikely(p->tv_nsec < 0 || p->tv_sec < 0
+			|| p->tv_nsec >= NSEC_PER_SEC))
 			return -EINVAL;
 		nowts = CURRENT_TIME;
 		/* first subtract as jiffies can't be too big */
-		ts.tv_sec -= nowts.tv_sec;
-		if (ts.tv_nsec < nowts.tv_nsec) {
-			ts.tv_nsec += NSEC_PER_SEC;
-			ts.tv_sec--;
+		p->tv_sec -= nowts.tv_sec;
+		if (p->tv_nsec < nowts.tv_nsec) {
+			p->tv_nsec += NSEC_PER_SEC;
+			p->tv_sec--;
 		}
-		ts.tv_nsec -= nowts.tv_nsec;
-		if (ts.tv_sec < 0)
+		p->tv_nsec -= nowts.tv_nsec;
+		if (p->tv_sec < 0)
 			return 0;
 
-		timeout = timespec_to_jiffies(&ts) + 1;
+		timeout = timespec_to_jiffies(p) + 1;
 	} else
 		return MAX_SCHEDULE_TIMEOUT;
 
@@ -829,17 +825,22 @@ asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
 	struct ext_wait_queue *receiver;
 	struct msg_msg *msg_ptr;
 	struct mqueue_inode_info *info;
+	struct timespec ts, *p = NULL;
 	long timeout;
 	int ret;
 
-	ret = audit_mq_timedsend(mqdes, msg_len, msg_prio, u_abs_timeout);
-	if (ret != 0)
-		return ret;
+	if (u_abs_timeout) {
+		if (copy_from_user(&ts, u_abs_timeout, 
+					sizeof(struct timespec)))
+			return -EFAULT;
+		p = &ts;
+	}
 
 	if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX))
 		return -EINVAL;
 
-	timeout = prepare_timeout(u_abs_timeout);
+	audit_mq_sendrecv(mqdes, msg_len, msg_prio, p);
+	timeout = prepare_timeout(p);
 
 	ret = -EBADF;
 	filp = fget(mqdes);
@@ -918,12 +919,17 @@ asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
 	struct inode *inode;
 	struct mqueue_inode_info *info;
 	struct ext_wait_queue wait;
+	struct timespec ts, *p = NULL;
 
-	ret = audit_mq_timedreceive(mqdes, msg_len, u_msg_prio, u_abs_timeout);
-	if (ret != 0)
-		return ret;
+	if (u_abs_timeout) {
+		if (copy_from_user(&ts, u_abs_timeout, 
+					sizeof(struct timespec)))
+			return -EFAULT;
+		p = &ts;
+	}
 
-	timeout = prepare_timeout(u_abs_timeout);
+	audit_mq_sendrecv(mqdes, msg_len, 0, p);
+	timeout = prepare_timeout(p);
 
 	ret = -EBADF;
 	filp = fget(mqdes);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 3ece960de894..140c47453470 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -131,14 +131,6 @@ struct audit_aux_data_mq_open {
 	struct mq_attr		attr;
 };
 
-struct audit_aux_data_mq_sendrecv {
-	struct audit_aux_data	d;
-	mqd_t			mqdes;
-	size_t			msg_len;
-	unsigned int		msg_prio;
-	struct timespec		abs_timeout;
-};
-
 struct audit_aux_data_execve {
 	struct audit_aux_data	d;
 	int argc;
@@ -244,6 +236,12 @@ struct audit_context {
 			mqd_t			mqdes;
 			int			sigev_signo;
 		} mq_notify;
+		struct {
+			mqd_t			mqdes;
+			size_t			msg_len;
+			unsigned int		msg_prio;
+			struct timespec		abs_timeout;
+		} mq_sendrecv;
 	};
 
 #if AUDIT_DEBUG
@@ -1265,6 +1263,16 @@ static void show_special(struct audit_context *context, int *call_panic)
 				return;
 		}
 		break; }
+	case AUDIT_MQ_SENDRECV: {
+		audit_log_format(ab,
+			"mqdes=%d msg_len=%zd msg_prio=%u "
+			"abs_timeout_sec=%ld abs_timeout_nsec=%ld",
+			context->mq_sendrecv.mqdes,
+			context->mq_sendrecv.msg_len,
+			context->mq_sendrecv.msg_prio,
+			context->mq_sendrecv.abs_timeout.tv_sec,
+			context->mq_sendrecv.abs_timeout.tv_nsec);
+		break; }
 	case AUDIT_MQ_NOTIFY: {
 		audit_log_format(ab, "mqdes=%d sigev_signo=%d",
 				context->mq_notify.mqdes,
@@ -1370,15 +1378,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 				axi->attr.mq_curmsgs);
 			break; }
 
-		case AUDIT_MQ_SENDRECV: {
-			struct audit_aux_data_mq_sendrecv *axi = (void *)aux;
-			audit_log_format(ab,
-				"mqdes=%d msg_len=%zd msg_prio=%u "
-				"abs_timeout_sec=%ld abs_timeout_nsec=%ld",
-				axi->mqdes, axi->msg_len, axi->msg_prio,
-				axi->abs_timeout.tv_sec, axi->abs_timeout.tv_nsec);
-			break; }
-
 		case AUDIT_EXECVE: {
 			struct audit_aux_data_execve *axi = (void *)aux;
 			audit_log_execve_info(context, &ab, axi);
@@ -2171,97 +2170,29 @@ int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr)
 }
 
 /**
- * __audit_mq_timedsend - record audit data for a POSIX MQ timed send
+ * __audit_mq_sendrecv - record audit data for a POSIX MQ timed send/receive
  * @mqdes: MQ descriptor
  * @msg_len: Message length
  * @msg_prio: Message priority
- * @u_abs_timeout: Message timeout in absolute time
- *
- * Returns 0 for success or NULL context or < 0 on error.
- */
-int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio,
-			const struct timespec __user *u_abs_timeout)
-{
-	struct audit_aux_data_mq_sendrecv *ax;
-	struct audit_context *context = current->audit_context;
-
-	if (!audit_enabled)
-		return 0;
-
-	if (likely(!context))
-		return 0;
-
-	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
-	if (!ax)
-		return -ENOMEM;
-
-	if (u_abs_timeout != NULL) {
-		if (copy_from_user(&ax->abs_timeout, u_abs_timeout, sizeof(ax->abs_timeout))) {
-			kfree(ax);
-			return -EFAULT;
-		}
-	} else
-		memset(&ax->abs_timeout, 0, sizeof(ax->abs_timeout));
-
-	ax->mqdes = mqdes;
-	ax->msg_len = msg_len;
-	ax->msg_prio = msg_prio;
-
-	ax->d.type = AUDIT_MQ_SENDRECV;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
-}
-
-/**
- * __audit_mq_timedreceive - record audit data for a POSIX MQ timed receive
- * @mqdes: MQ descriptor
- * @msg_len: Message length
- * @u_msg_prio: Message priority
- * @u_abs_timeout: Message timeout in absolute time
+ * @abs_timeout: Message timeout in absolute time
  *
- * Returns 0 for success or NULL context or < 0 on error.
  */
-int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len,
-				unsigned int __user *u_msg_prio,
-				const struct timespec __user *u_abs_timeout)
+void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio,
+			const struct timespec *abs_timeout)
 {
-	struct audit_aux_data_mq_sendrecv *ax;
 	struct audit_context *context = current->audit_context;
+	struct timespec *p = &context->mq_sendrecv.abs_timeout;
 
-	if (!audit_enabled)
-		return 0;
-
-	if (likely(!context))
-		return 0;
-
-	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
-	if (!ax)
-		return -ENOMEM;
-
-	if (u_msg_prio != NULL) {
-		if (get_user(ax->msg_prio, u_msg_prio)) {
-			kfree(ax);
-			return -EFAULT;
-		}
-	} else
-		ax->msg_prio = 0;
-
-	if (u_abs_timeout != NULL) {
-		if (copy_from_user(&ax->abs_timeout, u_abs_timeout, sizeof(ax->abs_timeout))) {
-			kfree(ax);
-			return -EFAULT;
-		}
-	} else
-		memset(&ax->abs_timeout, 0, sizeof(ax->abs_timeout));
+	if (abs_timeout)
+		memcpy(p, abs_timeout, sizeof(struct timespec));
+	else
+		memset(p, 0, sizeof(struct timespec));
 
-	ax->mqdes = mqdes;
-	ax->msg_len = msg_len;
+	context->mq_sendrecv.mqdes = mqdes;
+	context->mq_sendrecv.msg_len = msg_len;
+	context->mq_sendrecv.msg_prio = msg_prio;
 
-	ax->d.type = AUDIT_MQ_SENDRECV;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
+	context->type = AUDIT_MQ_SENDRECV;
 }
 
 /**
-- 
cgit v1.2.3


From 564f6993ffef656aebaf46cf2f1f6cb4f5c97207 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 Dec 2008 04:02:26 -0500
Subject: sanitize audit_mq_open()

* don't bother with allocations
* don't do double copy_from_user()
* don't duplicate parts of check for audit_dummy_context()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h |  9 ++++---
 ipc/mqueue.c          | 23 +++++++++---------
 kernel/auditsc.c      | 65 ++++++++++++++++++---------------------------------
 3 files changed, 38 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 67f0cdd991ba..54978bdd2bd4 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -450,7 +450,7 @@ extern void audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
 extern int __audit_fd_pair(int fd1, int fd2);
 extern int audit_set_macxattr(const char *name);
-extern int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr);
+extern void __audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr);
 extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout);
 extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification);
 extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
@@ -475,11 +475,10 @@ static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid
 	if (unlikely(!audit_dummy_context()))
 		__audit_ipc_set_perm(qbytes, uid, gid, mode);
 }
-static inline int audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr)
+static inline void audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_mq_open(oflag, mode, u_attr);
-	return 0;
+		__audit_mq_open(oflag, mode, attr);
 }
 static inline void audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout)
 {
@@ -541,7 +540,7 @@ extern int audit_signals;
 #define audit_fd_pair(n,a) ({ 0; })
 #define audit_sockaddr(len, addr) ({ 0; })
 #define audit_set_macxattr(n) do { ; } while (0)
-#define audit_mq_open(o,m,a) ({ 0; })
+#define audit_mq_open(o,m,a) ((void)0)
 #define audit_mq_sendrecv(d,l,p,t) ((void)0)
 #define audit_mq_notify(d,n) ((void)0)
 #define audit_mq_getsetattr(d,s) ((void)0)
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 192da806c283..d448b69672b5 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -588,22 +588,18 @@ static int mq_attr_ok(struct mq_attr *attr)
  * Invoked when creating a new queue via sys_mq_open
  */
 static struct file *do_create(struct dentry *dir, struct dentry *dentry,
-			int oflag, mode_t mode, struct mq_attr __user *u_attr)
+			int oflag, mode_t mode, struct mq_attr *attr)
 {
 	const struct cred *cred = current_cred();
-	struct mq_attr attr;
 	struct file *result;
 	int ret;
 
-	if (u_attr) {
-		ret = -EFAULT;
-		if (copy_from_user(&attr, u_attr, sizeof(attr)))
-			goto out;
+	if (attr) {
 		ret = -EINVAL;
-		if (!mq_attr_ok(&attr))
+		if (!mq_attr_ok(attr))
 			goto out;
 		/* store for use during create */
-		dentry->d_fsdata = &attr;
+		dentry->d_fsdata = attr;
 	}
 
 	mode &= ~current->fs->umask;
@@ -660,11 +656,13 @@ asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
 	struct dentry *dentry;
 	struct file *filp;
 	char *name;
+	struct mq_attr attr;
 	int fd, error;
 
-	error = audit_mq_open(oflag, mode, u_attr);
-	if (error != 0)
-		return error;
+	if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr)))
+		return -EFAULT;
+
+	audit_mq_open(oflag, mode, u_attr ? &attr : NULL);
 
 	if (IS_ERR(name = getname(u_name)))
 		return PTR_ERR(name);
@@ -690,7 +688,8 @@ asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
 			filp = do_open(dentry, oflag);
 		} else {
 			filp = do_create(mqueue_mnt->mnt_root, dentry,
-						oflag, mode, u_attr);
+						oflag, mode,
+						u_attr ? &attr : NULL);
 		}
 	} else {
 		error = -ENOENT;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 140c47453470..83e946f1cdde 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -124,13 +124,6 @@ struct audit_aux_data {
 /* Number of target pids per aux struct. */
 #define AUDIT_AUX_PIDS	16
 
-struct audit_aux_data_mq_open {
-	struct audit_aux_data	d;
-	int			oflag;
-	mode_t			mode;
-	struct mq_attr		attr;
-};
-
 struct audit_aux_data_execve {
 	struct audit_aux_data	d;
 	int argc;
@@ -242,6 +235,11 @@ struct audit_context {
 			unsigned int		msg_prio;
 			struct timespec		abs_timeout;
 		} mq_sendrecv;
+		struct {
+			int			oflag;
+			mode_t			mode;
+			struct mq_attr		attr;
+		} mq_open;
 	};
 
 #if AUDIT_DEBUG
@@ -1263,6 +1261,16 @@ static void show_special(struct audit_context *context, int *call_panic)
 				return;
 		}
 		break; }
+	case AUDIT_MQ_OPEN: {
+		audit_log_format(ab,
+			"oflag=0x%x mode=%#o mq_flags=0x%lx mq_maxmsg=%ld "
+			"mq_msgsize=%ld mq_curmsgs=%ld",
+			context->mq_open.oflag, context->mq_open.mode,
+			context->mq_open.attr.mq_flags,
+			context->mq_open.attr.mq_maxmsg,
+			context->mq_open.attr.mq_msgsize,
+			context->mq_open.attr.mq_curmsgs);
+		break; }
 	case AUDIT_MQ_SENDRECV: {
 		audit_log_format(ab,
 			"mqdes=%d msg_len=%zd msg_prio=%u "
@@ -1368,15 +1376,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			continue; /* audit_panic has been called */
 
 		switch (aux->type) {
-		case AUDIT_MQ_OPEN: {
-			struct audit_aux_data_mq_open *axi = (void *)aux;
-			audit_log_format(ab,
-				"oflag=0x%x mode=%#o mq_flags=0x%lx mq_maxmsg=%ld "
-				"mq_msgsize=%ld mq_curmsgs=%ld",
-				axi->oflag, axi->mode, axi->attr.mq_flags,
-				axi->attr.mq_maxmsg, axi->attr.mq_msgsize,
-				axi->attr.mq_curmsgs);
-			break; }
 
 		case AUDIT_EXECVE: {
 			struct audit_aux_data_execve *axi = (void *)aux;
@@ -2135,38 +2134,20 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid)
  * @mode: mode bits
  * @u_attr: queue attributes
  *
- * Returns 0 for success or NULL context or < 0 on error.
  */
-int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr)
+void __audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr)
 {
-	struct audit_aux_data_mq_open *ax;
 	struct audit_context *context = current->audit_context;
 
-	if (!audit_enabled)
-		return 0;
-
-	if (likely(!context))
-		return 0;
-
-	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
-	if (!ax)
-		return -ENOMEM;
-
-	if (u_attr != NULL) {
-		if (copy_from_user(&ax->attr, u_attr, sizeof(ax->attr))) {
-			kfree(ax);
-			return -EFAULT;
-		}
-	} else
-		memset(&ax->attr, 0, sizeof(ax->attr));
+	if (attr)
+		memcpy(&context->mq_open.attr, attr, sizeof(struct mq_attr));
+	else
+		memset(&context->mq_open.attr, 0, sizeof(struct mq_attr));
 
-	ax->oflag = oflag;
-	ax->mode = mode;
+	context->mq_open.oflag = oflag;
+	context->mq_open.mode = mode;
 
-	ax->d.type = AUDIT_MQ_OPEN;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
+	context->type = AUDIT_MQ_OPEN;
 }
 
 /**
-- 
cgit v1.2.3


From 157cf649a735a2f7e8dba0ed08e6e38b6c30d886 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 Dec 2008 04:57:47 -0500
Subject: sanitize audit_fd_pair()

* no allocations
* return void

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/pipe.c             |  7 +------
 include/linux/audit.h |  9 ++++-----
 kernel/auditsc.c      | 44 ++++++++++++++------------------------------
 net/socket.c          |  9 +--------
 4 files changed, 20 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/fs/pipe.c b/fs/pipe.c
index aaf797bd57b9..891697112f66 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1016,10 +1016,7 @@ int do_pipe_flags(int *fd, int flags)
 		goto err_fdr;
 	fdw = error;
 
-	error = audit_fd_pair(fdr, fdw);
-	if (error < 0)
-		goto err_fdw;
-
+	audit_fd_pair(fdr, fdw);
 	fd_install(fdr, fr);
 	fd_install(fdw, fw);
 	fd[0] = fdr;
@@ -1027,8 +1024,6 @@ int do_pipe_flags(int *fd, int flags)
 
 	return 0;
 
- err_fdw:
-	put_unused_fd(fdw);
  err_fdr:
 	put_unused_fd(fdr);
  err_read_pipe:
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 54978bdd2bd4..bd59cd1e3219 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -448,7 +448,7 @@ extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mod
 extern int audit_bprm(struct linux_binprm *bprm);
 extern void audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
-extern int __audit_fd_pair(int fd1, int fd2);
+extern void __audit_fd_pair(int fd1, int fd2);
 extern int audit_set_macxattr(const char *name);
 extern void __audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr);
 extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout);
@@ -464,11 +464,10 @@ static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 	if (unlikely(!audit_dummy_context()))
 		__audit_ipc_obj(ipcp);
 }
-static inline int audit_fd_pair(int fd1, int fd2)
+static inline void audit_fd_pair(int fd1, int fd2)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_fd_pair(fd1, fd2);
-	return 0;
+		__audit_fd_pair(fd1, fd2);
 }
 static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
 {
@@ -537,7 +536,7 @@ extern int audit_signals;
 #define audit_ipc_set_perm(q,u,g,m) ((void)0)
 #define audit_bprm(p) ({ 0; })
 #define audit_socketcall(n,a) ((void)0)
-#define audit_fd_pair(n,a) ({ 0; })
+#define audit_fd_pair(n,a) ((void)0)
 #define audit_sockaddr(len, addr) ({ 0; })
 #define audit_set_macxattr(n) do { ; } while (0)
 #define audit_mq_open(o,m,a) ((void)0)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 83e946f1cdde..327e65d50674 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -131,11 +131,6 @@ struct audit_aux_data_execve {
 	struct mm_struct *mm;
 };
 
-struct audit_aux_data_fd_pair {
-	struct	audit_aux_data d;
-	int	fd[2];
-};
-
 struct audit_aux_data_pids {
 	struct audit_aux_data	d;
 	pid_t			target_pid[AUDIT_AUX_PIDS];
@@ -241,6 +236,7 @@ struct audit_context {
 			struct mq_attr		attr;
 		} mq_open;
 	};
+	int fds[2];
 
 #if AUDIT_DEBUG
 	int		    put_count;
@@ -1382,11 +1378,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			audit_log_execve_info(context, &ab, axi);
 			break; }
 
-		case AUDIT_FD_PAIR: {
-			struct audit_aux_data_fd_pair *axs = (void *)aux;
-			audit_log_format(ab, "fd0=%d fd1=%d", axs->fd[0], axs->fd[1]);
-			break; }
-
 		case AUDIT_BPRM_FCAPS: {
 			struct audit_aux_data_bprm_fcaps *axs = (void *)aux;
 			audit_log_format(ab, "fver=%x", axs->fcap_ver);
@@ -1416,6 +1407,15 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 	if (context->type)
 		show_special(context, &call_panic);
 
+	if (context->fds[0] >= 0) {
+		ab = audit_log_start(context, GFP_KERNEL, AUDIT_FD_PAIR);
+		if (ab) {
+			audit_log_format(ab, "fd0=%d fd1=%d",
+					context->fds[0], context->fds[1]);
+			audit_log_end(ab);
+		}
+	}
+
 	if (context->sockaddr_len) {
 		ab = audit_log_start(context, GFP_KERNEL, AUDIT_SOCKADDR);
 		if (ab) {
@@ -1696,6 +1696,7 @@ void audit_syscall_exit(int valid, long return_code)
 		context->target_sid = 0;
 		context->sockaddr_len = 0;
 		context->type = 0;
+		context->fds[0] = -1;
 		kfree(context->filterkey);
 		context->filterkey = NULL;
 		tsk->audit_context = context;
@@ -2291,29 +2292,12 @@ void audit_socketcall(int nargs, unsigned long *args)
  * @fd1: the first file descriptor
  * @fd2: the second file descriptor
  *
- * Returns 0 for success or NULL context or < 0 on error.
  */
-int __audit_fd_pair(int fd1, int fd2)
+void __audit_fd_pair(int fd1, int fd2)
 {
 	struct audit_context *context = current->audit_context;
-	struct audit_aux_data_fd_pair *ax;
-
-	if (likely(!context)) {
-		return 0;
-	}
-
-	ax = kmalloc(sizeof(*ax), GFP_KERNEL);
-	if (!ax) {
-		return -ENOMEM;
-	}
-
-	ax->fd[0] = fd1;
-	ax->fd[1] = fd2;
-
-	ax->d.type = AUDIT_FD_PAIR;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-	return 0;
+	context->fds[0] = fd1;
+	context->fds[1] = fd2;
 }
 
 /**
diff --git a/net/socket.c b/net/socket.c
index b41a92093e40..06603d73c411 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1313,13 +1313,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol,
 		goto out_fd1;
 	}
 
-	err = audit_fd_pair(fd1, fd2);
-	if (err < 0) {
-		fput(newfile1);
-		fput(newfile2);
-		goto out_fd;
-	}
-
+	audit_fd_pair(fd1, fd2);
 	fd_install(fd1, newfile1);
 	fd_install(fd2, newfile2);
 	/* fd1 and fd2 may be already another descriptors.
@@ -1349,7 +1343,6 @@ out_fd2:
 out_fd1:
 	put_filp(newfile2);
 	sock_release(sock2);
-out_fd:
 	put_unused_fd(fd1);
 	put_unused_fd(fd2);
 	goto out;
-- 
cgit v1.2.3


From 57f71a0af4244d9ba3c0bce74b1d2e66e8d520bd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 4 Jan 2009 14:52:57 -0500
Subject: sanitize audit_log_capset()

* no allocations
* return void
* don't duplicate checked for dummy context

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h |  9 ++++-----
 kernel/auditsc.c      | 44 ++++++++++++++++----------------------------
 kernel/capability.c   |  4 +---
 3 files changed, 21 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index bd59cd1e3219..7ddcb6a29eb1 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -457,7 +457,7 @@ extern void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
 extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
 				  const struct cred *new,
 				  const struct cred *old);
-extern int __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old);
+extern void __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old);
 
 static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
@@ -504,12 +504,11 @@ static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm,
 	return 0;
 }
 
-static inline int audit_log_capset(pid_t pid, const struct cred *new,
+static inline void audit_log_capset(pid_t pid, const struct cred *new,
 				   const struct cred *old)
 {
 	if (unlikely(!audit_dummy_context()))
-		return __audit_log_capset(pid, new, old);
-	return 0;
+		__audit_log_capset(pid, new, old);
 }
 
 extern int audit_n_rules;
@@ -544,7 +543,7 @@ extern int audit_signals;
 #define audit_mq_notify(d,n) ((void)0)
 #define audit_mq_getsetattr(d,s) ((void)0)
 #define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; })
-#define audit_log_capset(pid, ncr, ocr) ({ 0; })
+#define audit_log_capset(pid, ncr, ocr) ((void)0)
 #define audit_ptrace(t) ((void)0)
 #define audit_n_rules 0
 #define audit_signals 0
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 327e65d50674..c76a58215f54 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -235,6 +235,10 @@ struct audit_context {
 			mode_t			mode;
 			struct mq_attr		attr;
 		} mq_open;
+		struct {
+			pid_t			pid;
+			struct audit_cap_data	cap;
+		} capset;
 	};
 	int fds[2];
 
@@ -1291,6 +1295,12 @@ static void show_special(struct audit_context *context, int *call_panic)
 			attr->mq_flags, attr->mq_maxmsg,
 			attr->mq_msgsize, attr->mq_curmsgs);
 		break; }
+	case AUDIT_CAPSET: {
+		audit_log_format(ab, "pid=%d", context->capset.pid);
+		audit_log_cap(ab, "cap_pi", &context->capset.cap.inheritable);
+		audit_log_cap(ab, "cap_pp", &context->capset.cap.permitted);
+		audit_log_cap(ab, "cap_pe", &context->capset.cap.effective);
+		break; }
 	}
 	audit_log_end(ab);
 }
@@ -1392,14 +1402,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			audit_log_cap(ab, "new_pe", &axs->new_pcap.effective);
 			break; }
 
-		case AUDIT_CAPSET: {
-			struct audit_aux_data_capset *axs = (void *)aux;
-			audit_log_format(ab, "pid=%d", axs->pid);
-			audit_log_cap(ab, "cap_pi", &axs->cap.inheritable);
-			audit_log_cap(ab, "cap_pp", &axs->cap.permitted);
-			audit_log_cap(ab, "cap_pe", &axs->cap.effective);
-			break; }
-
 		}
 		audit_log_end(ab);
 	}
@@ -2456,29 +2458,15 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
  * Record the aguments userspace sent to sys_capset for later printing by the
  * audit system if applicable
  */
-int __audit_log_capset(pid_t pid,
+void __audit_log_capset(pid_t pid,
 		       const struct cred *new, const struct cred *old)
 {
-	struct audit_aux_data_capset *ax;
 	struct audit_context *context = current->audit_context;
-
-	if (likely(!audit_enabled || !context || context->dummy))
-		return 0;
-
-	ax = kmalloc(sizeof(*ax), GFP_KERNEL);
-	if (!ax)
-		return -ENOMEM;
-
-	ax->d.type = AUDIT_CAPSET;
-	ax->d.next = context->aux;
-	context->aux = (void *)ax;
-
-	ax->pid = pid;
-	ax->cap.effective   = new->cap_effective;
-	ax->cap.inheritable = new->cap_effective;
-	ax->cap.permitted   = new->cap_permitted;
-
-	return 0;
+	context->capset.pid = pid;
+	context->capset.cap.effective   = new->cap_effective;
+	context->capset.cap.inheritable = new->cap_effective;
+	context->capset.cap.permitted   = new->cap_permitted;
+	context->type = AUDIT_CAPSET;
 }
 
 /**
diff --git a/kernel/capability.c b/kernel/capability.c
index 36b4b4daebec..c598d9d5be4f 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -280,9 +280,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 	if (ret < 0)
 		goto error;
 
-	ret = audit_log_capset(pid, new, current_cred());
-	if (ret < 0)
-		return ret;
+	audit_log_capset(pid, new, current_cred());
 
 	return commit_creds(new);
 
-- 
cgit v1.2.3


From 0590b9335a1c72a3f0defcc6231287f7817e07c8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 Dec 2008 23:45:27 -0500
Subject: fixing audit rule ordering mess, part 1

Problem: ordering between the rules on exit chain is currently lost;
all watch and inode rules are listed after everything else _and_
exit,never on one kind doesn't stop exit,always on another from
being matched.

Solution: assign priorities to rules, keep track of the current
highest-priority matching rule and its result (always/never).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h |  1 +
 kernel/audit.h        |  5 +---
 kernel/auditfilter.c  | 17 +++++++++--
 kernel/auditsc.c      | 79 ++++++++++++++++++++++++++++-----------------------
 4 files changed, 59 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 7ddcb6a29eb1..5b47eeb00d53 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -373,6 +373,7 @@ struct audit_krule {
 	struct audit_watch	*watch;	/* associated watch */
 	struct audit_tree	*tree;	/* associated watched tree */
 	struct list_head	rlist;	/* entry in audit_{watch,tree}.rules list */
+	u64			prio;
 };
 
 struct audit_field {
diff --git a/kernel/audit.h b/kernel/audit.h
index 9d6717412fec..16f18cac661b 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -159,11 +159,8 @@ static inline int audit_signal_info(int sig, struct task_struct *t)
 		return __audit_signal_info(sig, t);
 	return 0;
 }
-extern enum audit_state audit_filter_inodes(struct task_struct *,
-					    struct audit_context *);
-extern void audit_set_auditable(struct audit_context *);
+extern void audit_filter_inodes(struct task_struct *, struct audit_context *);
 #else
 #define audit_signal_info(s,t) AUDIT_DISABLED
 #define audit_filter_inodes(t,c) AUDIT_DISABLED
-#define audit_set_auditable(c)
 #endif
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 0febaa0f784c..995a2e86808d 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -919,6 +919,7 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old,
 	new->action = old->action;
 	for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
 		new->mask[i] = old->mask[i];
+	new->prio = old->prio;
 	new->buflen = old->buflen;
 	new->inode_f = old->inode_f;
 	new->watch = NULL;
@@ -987,9 +988,8 @@ static void audit_update_watch(struct audit_parent *parent,
 
 		/* If the update involves invalidating rules, do the inode-based
 		 * filtering now, so we don't omit records. */
-		if (invalidating && current->audit_context &&
-		    audit_filter_inodes(current, current->audit_context) == AUDIT_RECORD_CONTEXT)
-			audit_set_auditable(current->audit_context);
+		if (invalidating && current->audit_context)
+			audit_filter_inodes(current, current->audit_context);
 
 		nwatch = audit_dupe_watch(owatch);
 		if (IS_ERR(nwatch)) {
@@ -1258,6 +1258,9 @@ static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp,
 	return ret;
 }
 
+static u64 prio_low = ~0ULL/2;
+static u64 prio_high = ~0ULL/2 - 1;
+
 /* Add rule to given filterlist if not a duplicate. */
 static inline int audit_add_rule(struct audit_entry *entry,
 				 struct list_head *list)
@@ -1319,6 +1322,14 @@ static inline int audit_add_rule(struct audit_entry *entry,
 		}
 	}
 
+	entry->rule.prio = ~0ULL;
+	if (entry->rule.listnr == AUDIT_FILTER_EXIT) {
+		if (entry->rule.flags & AUDIT_FILTER_PREPEND)
+			entry->rule.prio = ++prio_high;
+		else
+			entry->rule.prio = --prio_low;
+	}
+
 	if (entry->rule.flags & AUDIT_FILTER_PREPEND) {
 		list_add_rcu(&entry->list, list);
 		entry->rule.flags &= ~AUDIT_FILTER_PREPEND;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index c76a58215f54..19d2c2747c8d 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -165,14 +165,14 @@ struct audit_tree_refs {
 struct audit_context {
 	int		    dummy;	/* must be the first element */
 	int		    in_syscall;	/* 1 if task is in a syscall */
-	enum audit_state    state;
+	enum audit_state    state, current_state;
 	unsigned int	    serial;     /* serial number for record */
 	struct timespec	    ctime;      /* time of syscall entry */
 	int		    major;      /* syscall number */
 	unsigned long	    argv[4];    /* syscall arguments */
 	int		    return_valid; /* return code is valid */
 	long		    return_code;/* syscall return code */
-	int		    auditable;  /* 1 if record should be written */
+	u64		    prio;
 	int		    name_count;
 	struct audit_names  names[AUDIT_NAMES];
 	char *		    filterkey;	/* key for rule that triggered record */
@@ -630,8 +630,16 @@ static int audit_filter_rules(struct task_struct *tsk,
 			return 0;
 		}
 	}
-	if (rule->filterkey && ctx)
-		ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC);
+
+	if (ctx) {
+		if (rule->prio <= ctx->prio)
+			return 0;
+		if (rule->filterkey) {
+			kfree(ctx->filterkey);
+			ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC);
+		}
+		ctx->prio = rule->prio;
+	}
 	switch (rule->action) {
 	case AUDIT_NEVER:    *state = AUDIT_DISABLED;	    break;
 	case AUDIT_ALWAYS:   *state = AUDIT_RECORD_CONTEXT; break;
@@ -685,6 +693,7 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
 			    audit_filter_rules(tsk, &e->rule, ctx, NULL,
 					       &state)) {
 				rcu_read_unlock();
+				ctx->current_state = state;
 				return state;
 			}
 		}
@@ -698,15 +707,14 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
  * buckets applicable to the inode numbers in audit_names[].
  * Regarding audit_state, same rules apply as for audit_filter_syscall().
  */
-enum audit_state audit_filter_inodes(struct task_struct *tsk,
-				     struct audit_context *ctx)
+void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx)
 {
 	int i;
 	struct audit_entry *e;
 	enum audit_state state;
 
 	if (audit_pid && tsk->tgid == audit_pid)
-		return AUDIT_DISABLED;
+		return;
 
 	rcu_read_lock();
 	for (i = 0; i < ctx->name_count; i++) {
@@ -723,17 +731,20 @@ enum audit_state audit_filter_inodes(struct task_struct *tsk,
 			if ((e->rule.mask[word] & bit) == bit &&
 			    audit_filter_rules(tsk, &e->rule, ctx, n, &state)) {
 				rcu_read_unlock();
-				return state;
+				ctx->current_state = state;
+				return;
 			}
 		}
 	}
 	rcu_read_unlock();
-	return AUDIT_BUILD_CONTEXT;
 }
 
-void audit_set_auditable(struct audit_context *ctx)
+static void audit_set_auditable(struct audit_context *ctx)
 {
-	ctx->auditable = 1;
+	if (!ctx->prio) {
+		ctx->prio = 1;
+		ctx->current_state = AUDIT_RECORD_CONTEXT;
+	}
 }
 
 static inline struct audit_context *audit_get_context(struct task_struct *tsk,
@@ -764,23 +775,11 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
 	else
 		context->return_code  = return_code;
 
-	if (context->in_syscall && !context->dummy && !context->auditable) {
-		enum audit_state state;
-
-		state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]);
-		if (state == AUDIT_RECORD_CONTEXT) {
-			context->auditable = 1;
-			goto get_context;
-		}
-
-		state = audit_filter_inodes(tsk, context);
-		if (state == AUDIT_RECORD_CONTEXT)
-			context->auditable = 1;
-
+	if (context->in_syscall && !context->dummy) {
+		audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]);
+		audit_filter_inodes(tsk, context);
 	}
 
-get_context:
-
 	tsk->audit_context = NULL;
 	return context;
 }
@@ -790,8 +789,7 @@ static inline void audit_free_names(struct audit_context *context)
 	int i;
 
 #if AUDIT_DEBUG == 2
-	if (context->auditable
-	    ||context->put_count + context->ino_count != context->name_count) {
+	if (context->put_count + context->ino_count != context->name_count) {
 		printk(KERN_ERR "%s:%d(:%d): major=%d in_syscall=%d"
 		       " name_count=%d put_count=%d"
 		       " ino_count=%d [NOT freeing]\n",
@@ -842,6 +840,7 @@ static inline void audit_zero_context(struct audit_context *context,
 {
 	memset(context, 0, sizeof(*context));
 	context->state      = state;
+	context->prio = state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0;
 }
 
 static inline struct audit_context *audit_alloc_context(enum audit_state state)
@@ -1543,7 +1542,7 @@ void audit_free(struct task_struct *tsk)
 	 * We use GFP_ATOMIC here because we might be doing this
 	 * in the context of the idle thread */
 	/* that can happen only if we are called from do_exit() */
-	if (context->in_syscall && context->auditable)
+	if (context->in_syscall && context->current_state == AUDIT_RECORD_CONTEXT)
 		audit_log_exit(context, tsk);
 
 	audit_free_context(context);
@@ -1627,15 +1626,17 @@ void audit_syscall_entry(int arch, int major,
 
 	state = context->state;
 	context->dummy = !audit_n_rules;
-	if (!context->dummy && (state == AUDIT_SETUP_CONTEXT || state == AUDIT_BUILD_CONTEXT))
+	if (!context->dummy && state == AUDIT_BUILD_CONTEXT) {
+		context->prio = 0;
 		state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_ENTRY]);
+	}
 	if (likely(state == AUDIT_DISABLED))
 		return;
 
 	context->serial     = 0;
 	context->ctime      = CURRENT_TIME;
 	context->in_syscall = 1;
-	context->auditable  = !!(state == AUDIT_RECORD_CONTEXT);
+	context->current_state  = state;
 	context->ppid       = 0;
 }
 
@@ -1643,17 +1644,20 @@ void audit_finish_fork(struct task_struct *child)
 {
 	struct audit_context *ctx = current->audit_context;
 	struct audit_context *p = child->audit_context;
-	if (!p || !ctx || !ctx->auditable)
+	if (!p || !ctx)
+		return;
+	if (!ctx->in_syscall || ctx->current_state != AUDIT_RECORD_CONTEXT)
 		return;
 	p->arch = ctx->arch;
 	p->major = ctx->major;
 	memcpy(p->argv, ctx->argv, sizeof(ctx->argv));
 	p->ctime = ctx->ctime;
 	p->dummy = ctx->dummy;
-	p->auditable = ctx->auditable;
 	p->in_syscall = ctx->in_syscall;
 	p->filterkey = kstrdup(ctx->filterkey, GFP_KERNEL);
 	p->ppid = current->pid;
+	p->prio = ctx->prio;
+	p->current_state = ctx->current_state;
 }
 
 /**
@@ -1677,11 +1681,11 @@ void audit_syscall_exit(int valid, long return_code)
 	if (likely(!context))
 		return;
 
-	if (context->in_syscall && context->auditable)
+	if (context->in_syscall && context->current_state == AUDIT_RECORD_CONTEXT)
 		audit_log_exit(context, tsk);
 
 	context->in_syscall = 0;
-	context->auditable  = 0;
+	context->prio = context->state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0;
 
 	if (context->previous) {
 		struct audit_context *new_context = context->previous;
@@ -2091,7 +2095,10 @@ int auditsc_get_stamp(struct audit_context *ctx,
 	t->tv_sec  = ctx->ctime.tv_sec;
 	t->tv_nsec = ctx->ctime.tv_nsec;
 	*serial    = ctx->serial;
-	ctx->auditable = 1;
+	if (!ctx->prio) {
+		ctx->prio = 1;
+		ctx->current_state = AUDIT_RECORD_CONTEXT;
+	}
 	return 1;
 }
 
-- 
cgit v1.2.3


From e45aa212ea81d39b38ba158df344dc3a500153e5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 15 Dec 2008 01:17:50 -0500
Subject: audit rules ordering, part 2

Fix the actual rule listing; add per-type lists _not_ used for matching,
with all exit,... sitting on one such list.  Simplifies "do something
for all rules" logics, while we are at it...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h |  1 +
 kernel/audit_tree.c   |  1 +
 kernel/auditfilter.c  | 95 +++++++++++++++++++++------------------------------
 3 files changed, 41 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 5b47eeb00d53..cc71fdb56ae2 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -373,6 +373,7 @@ struct audit_krule {
 	struct audit_watch	*watch;	/* associated watch */
 	struct audit_tree	*tree;	/* associated watched tree */
 	struct list_head	rlist;	/* entry in audit_{watch,tree}.rules list */
+	struct list_head	list;	/* for AUDIT_LIST* purposes only */
 	u64			prio;
 };
 
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 8b509441f49a..48bddad2a3dc 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -450,6 +450,7 @@ static void kill_rules(struct audit_tree *tree)
 			audit_log_end(ab);
 			rule->tree = NULL;
 			list_del_rcu(&entry->list);
+			list_del(&entry->rule.list);
 			call_rcu(&entry->rcu, audit_free_rule_rcu);
 		}
 	}
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 995a2e86808d..5d4edc6f7a32 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -86,6 +86,14 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = {
 #error Fix audit_filter_list initialiser
 #endif
 };
+static struct list_head audit_rules_list[AUDIT_NR_FILTERS] = {
+	LIST_HEAD_INIT(audit_rules_list[0]),
+	LIST_HEAD_INIT(audit_rules_list[1]),
+	LIST_HEAD_INIT(audit_rules_list[2]),
+	LIST_HEAD_INIT(audit_rules_list[3]),
+	LIST_HEAD_INIT(audit_rules_list[4]),
+	LIST_HEAD_INIT(audit_rules_list[5]),
+};
 
 DEFINE_MUTEX(audit_filter_mutex);
 
@@ -1007,12 +1015,15 @@ static void audit_update_watch(struct audit_parent *parent,
 			list_del_rcu(&oentry->list);
 
 			nentry = audit_dupe_rule(&oentry->rule, nwatch);
-			if (IS_ERR(nentry))
+			if (IS_ERR(nentry)) {
+				list_del(&oentry->rule.list);
 				audit_panic("error updating watch, removing");
-			else {
+			} else {
 				int h = audit_hash_ino((u32)ino);
 				list_add(&nentry->rule.rlist, &nwatch->rules);
 				list_add_rcu(&nentry->list, &audit_inode_hash[h]);
+				list_replace(&oentry->rule.list,
+					     &nentry->rule.list);
 			}
 
 			call_rcu(&oentry->rcu, audit_free_rule_rcu);
@@ -1077,6 +1088,7 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
 				audit_log_end(ab);
 			}
 			list_del(&r->rlist);
+			list_del(&r->list);
 			list_del_rcu(&e->list);
 			call_rcu(&e->rcu, audit_free_rule_rcu);
 		}
@@ -1331,9 +1343,13 @@ static inline int audit_add_rule(struct audit_entry *entry,
 	}
 
 	if (entry->rule.flags & AUDIT_FILTER_PREPEND) {
+		list_add(&entry->rule.list,
+			 &audit_rules_list[entry->rule.listnr]);
 		list_add_rcu(&entry->list, list);
 		entry->rule.flags &= ~AUDIT_FILTER_PREPEND;
 	} else {
+		list_add_tail(&entry->rule.list,
+			      &audit_rules_list[entry->rule.listnr]);
 		list_add_tail_rcu(&entry->list, list);
 	}
 #ifdef CONFIG_AUDITSYSCALL
@@ -1415,6 +1431,7 @@ static inline int audit_del_rule(struct audit_entry *entry,
 		audit_remove_tree_rule(&e->rule);
 
 	list_del_rcu(&e->list);
+	list_del(&e->rule.list);
 	call_rcu(&e->rcu, audit_free_rule_rcu);
 
 #ifdef CONFIG_AUDITSYSCALL
@@ -1443,30 +1460,16 @@ out:
 static void audit_list(int pid, int seq, struct sk_buff_head *q)
 {
 	struct sk_buff *skb;
-	struct audit_entry *entry;
+	struct audit_krule *r;
 	int i;
 
 	/* This is a blocking read, so use audit_filter_mutex instead of rcu
 	 * iterator to sync with list writers. */
 	for (i=0; i<AUDIT_NR_FILTERS; i++) {
-		list_for_each_entry(entry, &audit_filter_list[i], list) {
-			struct audit_rule *rule;
-
-			rule = audit_krule_to_rule(&entry->rule);
-			if (unlikely(!rule))
-				break;
-			skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1,
-					 rule, sizeof(*rule));
-			if (skb)
-				skb_queue_tail(q, skb);
-			kfree(rule);
-		}
-	}
-	for (i = 0; i < AUDIT_INODE_BUCKETS; i++) {
-		list_for_each_entry(entry, &audit_inode_hash[i], list) {
+		list_for_each_entry(r, &audit_rules_list[i], list) {
 			struct audit_rule *rule;
 
-			rule = audit_krule_to_rule(&entry->rule);
+			rule = audit_krule_to_rule(r);
 			if (unlikely(!rule))
 				break;
 			skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1,
@@ -1485,30 +1488,16 @@ static void audit_list(int pid, int seq, struct sk_buff_head *q)
 static void audit_list_rules(int pid, int seq, struct sk_buff_head *q)
 {
 	struct sk_buff *skb;
-	struct audit_entry *e;
+	struct audit_krule *r;
 	int i;
 
 	/* This is a blocking read, so use audit_filter_mutex instead of rcu
 	 * iterator to sync with list writers. */
 	for (i=0; i<AUDIT_NR_FILTERS; i++) {
-		list_for_each_entry(e, &audit_filter_list[i], list) {
-			struct audit_rule_data *data;
-
-			data = audit_krule_to_data(&e->rule);
-			if (unlikely(!data))
-				break;
-			skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1,
-					 data, sizeof(*data) + data->buflen);
-			if (skb)
-				skb_queue_tail(q, skb);
-			kfree(data);
-		}
-	}
-	for (i=0; i< AUDIT_INODE_BUCKETS; i++) {
-		list_for_each_entry(e, &audit_inode_hash[i], list) {
+		list_for_each_entry(r, &audit_rules_list[i], list) {
 			struct audit_rule_data *data;
 
-			data = audit_krule_to_data(&e->rule);
+			data = audit_krule_to_data(r);
 			if (unlikely(!data))
 				break;
 			skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1,
@@ -1789,35 +1778,37 @@ unlock_and_return:
 	return result;
 }
 
-static int update_lsm_rule(struct audit_entry *entry)
+static int update_lsm_rule(struct audit_krule *r)
 {
+	struct audit_entry *entry = container_of(r, struct audit_entry, rule);
 	struct audit_entry *nentry;
 	struct audit_watch *watch;
 	struct audit_tree *tree;
 	int err = 0;
 
-	if (!security_audit_rule_known(&entry->rule))
+	if (!security_audit_rule_known(r))
 		return 0;
 
-	watch = entry->rule.watch;
-	tree = entry->rule.tree;
-	nentry = audit_dupe_rule(&entry->rule, watch);
+	watch = r->watch;
+	tree = r->tree;
+	nentry = audit_dupe_rule(r, watch);
 	if (IS_ERR(nentry)) {
 		/* save the first error encountered for the
 		 * return value */
 		err = PTR_ERR(nentry);
 		audit_panic("error updating LSM filters");
 		if (watch)
-			list_del(&entry->rule.rlist);
+			list_del(&r->rlist);
 		list_del_rcu(&entry->list);
+		list_del(&r->list);
 	} else {
 		if (watch) {
 			list_add(&nentry->rule.rlist, &watch->rules);
-			list_del(&entry->rule.rlist);
+			list_del(&r->rlist);
 		} else if (tree)
-			list_replace_init(&entry->rule.rlist,
-				     &nentry->rule.rlist);
+			list_replace_init(&r->rlist, &nentry->rule.rlist);
 		list_replace_rcu(&entry->list, &nentry->list);
+		list_replace(&r->list, &nentry->rule.list);
 	}
 	call_rcu(&entry->rcu, audit_free_rule_rcu);
 
@@ -1831,27 +1822,19 @@ static int update_lsm_rule(struct audit_entry *entry)
  * updated rule. */
 int audit_update_lsm_rules(void)
 {
-	struct audit_entry *e, *n;
+	struct audit_krule *r, *n;
 	int i, err = 0;
 
 	/* audit_filter_mutex synchronizes the writers */
 	mutex_lock(&audit_filter_mutex);
 
 	for (i = 0; i < AUDIT_NR_FILTERS; i++) {
-		list_for_each_entry_safe(e, n, &audit_filter_list[i], list) {
-			int res = update_lsm_rule(e);
-			if (!err)
-				err = res;
-		}
-	}
-	for (i=0; i< AUDIT_INODE_BUCKETS; i++) {
-		list_for_each_entry_safe(e, n, &audit_inode_hash[i], list) {
-			int res = update_lsm_rule(e);
+		list_for_each_entry_safe(r, n, &audit_rules_list[i], list) {
+			int res = update_lsm_rule(r);
 			if (!err)
 				err = res;
 		}
 	}
-
 	mutex_unlock(&audit_filter_mutex);
 
 	return err;
-- 
cgit v1.2.3


From 5af75d8d58d0f9f7b7c0515b35786b22892d5f12 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 16 Dec 2008 05:59:26 -0500
Subject: audit: validate comparison operations, store them in sane form

Don't store the field->op in the messy (and very inconvenient for e.g.
audit_comparator()) form; translate to dense set of values and do full
validation of userland-submitted value while we are at it.

->audit_init_rule() and ->audit_match_rule() get new values now; in-tree
instances updated.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h          |  12 ++++
 kernel/audit_tree.c            |   2 +-
 kernel/auditfilter.c           | 132 ++++++++++++++++++++---------------------
 security/selinux/ss/services.c |  26 ++++----
 security/smack/smack_lsm.c     |   6 +-
 5 files changed, 94 insertions(+), 84 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index cc71fdb56ae2..67e5dbfc2961 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -247,6 +247,18 @@
 #define AUDIT_GREATER_THAN_OR_EQUAL	(AUDIT_GREATER_THAN|AUDIT_EQUAL)
 #define AUDIT_OPERATORS			(AUDIT_EQUAL|AUDIT_NOT_EQUAL|AUDIT_BIT_MASK)
 
+enum {
+	Audit_equal,
+	Audit_not_equal,
+	Audit_bitmask,
+	Audit_bittest,
+	Audit_lt,
+	Audit_gt,
+	Audit_le,
+	Audit_ge,
+	Audit_bad
+};
+
 /* Status symbols */
 				/* Mask values */
 #define AUDIT_STATUS_ENABLED		0x0001
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 48bddad2a3dc..8ad9545b8db9 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -618,7 +618,7 @@ int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
 
 	if (pathname[0] != '/' ||
 	    rule->listnr != AUDIT_FILTER_EXIT ||
-	    op & ~AUDIT_EQUAL ||
+	    op != Audit_equal ||
 	    rule->inode_f || rule->watch || rule->tree)
 		return -EINVAL;
 	rule->tree = alloc_tree(pathname);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index e6e3829cadd1..fbf24d121d97 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -252,7 +252,8 @@ static inline int audit_to_inode(struct audit_krule *krule,
 				 struct audit_field *f)
 {
 	if (krule->listnr != AUDIT_FILTER_EXIT ||
-	    krule->watch || krule->inode_f || krule->tree)
+	    krule->watch || krule->inode_f || krule->tree ||
+	    (f->op != Audit_equal && f->op != Audit_not_equal))
 		return -EINVAL;
 
 	krule->inode_f = f;
@@ -270,7 +271,7 @@ static int audit_to_watch(struct audit_krule *krule, char *path, int len,
 
 	if (path[0] != '/' || path[len-1] == '/' ||
 	    krule->listnr != AUDIT_FILTER_EXIT ||
-	    op & ~AUDIT_EQUAL ||
+	    op != Audit_equal ||
 	    krule->inode_f || krule->watch || krule->tree)
 		return -EINVAL;
 
@@ -420,12 +421,32 @@ exit_err:
 	return ERR_PTR(err);
 }
 
+static u32 audit_ops[] =
+{
+	[Audit_equal] = AUDIT_EQUAL,
+	[Audit_not_equal] = AUDIT_NOT_EQUAL,
+	[Audit_bitmask] = AUDIT_BIT_MASK,
+	[Audit_bittest] = AUDIT_BIT_TEST,
+	[Audit_lt] = AUDIT_LESS_THAN,
+	[Audit_gt] = AUDIT_GREATER_THAN,
+	[Audit_le] = AUDIT_LESS_THAN_OR_EQUAL,
+	[Audit_ge] = AUDIT_GREATER_THAN_OR_EQUAL,
+};
+
+static u32 audit_to_op(u32 op)
+{
+	u32 n;
+	for (n = Audit_equal; n < Audit_bad && audit_ops[n] != op; n++)
+		;
+	return n;
+}
+
+
 /* Translate struct audit_rule to kernel's rule respresentation.
  * Exists for backward compatibility with userspace. */
 static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 {
 	struct audit_entry *entry;
-	struct audit_field *ino_f;
 	int err = 0;
 	int i;
 
@@ -435,12 +456,28 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 
 	for (i = 0; i < rule->field_count; i++) {
 		struct audit_field *f = &entry->rule.fields[i];
+		u32 n;
+
+		n = rule->fields[i] & (AUDIT_NEGATE|AUDIT_OPERATORS);
+
+		/* Support for legacy operators where
+		 * AUDIT_NEGATE bit signifies != and otherwise assumes == */
+		if (n & AUDIT_NEGATE)
+			f->op = Audit_not_equal;
+		else if (!n)
+			f->op = Audit_equal;
+		else
+			f->op = audit_to_op(n);
+
+		entry->rule.vers_ops = (n & AUDIT_OPERATORS) ? 2 : 1;
 
-		f->op = rule->fields[i] & (AUDIT_NEGATE|AUDIT_OPERATORS);
 		f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS);
 		f->val = rule->values[i];
 
 		err = -EINVAL;
+		if (f->op == Audit_bad)
+			goto exit_free;
+
 		switch(f->type) {
 		default:
 			goto exit_free;
@@ -462,11 +499,8 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 		case AUDIT_EXIT:
 		case AUDIT_SUCCESS:
 			/* bit ops are only useful on syscall args */
-			if (f->op == AUDIT_BIT_MASK ||
-						f->op == AUDIT_BIT_TEST) {
-				err = -EINVAL;
+			if (f->op == Audit_bitmask || f->op == Audit_bittest)
 				goto exit_free;
-			}
 			break;
 		case AUDIT_ARG0:
 		case AUDIT_ARG1:
@@ -475,11 +509,8 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 			break;
 		/* arch is only allowed to be = or != */
 		case AUDIT_ARCH:
-			if ((f->op != AUDIT_NOT_EQUAL) && (f->op != AUDIT_EQUAL)
-					&& (f->op != AUDIT_NEGATE) && (f->op)) {
-				err = -EINVAL;
+			if (f->op != Audit_not_equal && f->op != Audit_equal)
 				goto exit_free;
-			}
 			entry->rule.arch_f = f;
 			break;
 		case AUDIT_PERM:
@@ -496,33 +527,10 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 				goto exit_free;
 			break;
 		}
-
-		entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1;
-
-		/* Support for legacy operators where
-		 * AUDIT_NEGATE bit signifies != and otherwise assumes == */
-		if (f->op & AUDIT_NEGATE)
-			f->op = AUDIT_NOT_EQUAL;
-		else if (!f->op)
-			f->op = AUDIT_EQUAL;
-		else if (f->op == AUDIT_OPERATORS) {
-			err = -EINVAL;
-			goto exit_free;
-		}
 	}
 
-	ino_f = entry->rule.inode_f;
-	if (ino_f) {
-		switch(ino_f->op) {
-		case AUDIT_NOT_EQUAL:
-			entry->rule.inode_f = NULL;
-		case AUDIT_EQUAL:
-			break;
-		default:
-			err = -EINVAL;
-			goto exit_free;
-		}
-	}
+	if (entry->rule.inode_f && entry->rule.inode_f->op == Audit_not_equal)
+		entry->rule.inode_f = NULL;
 
 exit_nofree:
 	return entry;
@@ -538,7 +546,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 {
 	int err = 0;
 	struct audit_entry *entry;
-	struct audit_field *ino_f;
 	void *bufp;
 	size_t remain = datasz - sizeof(struct audit_rule_data);
 	int i;
@@ -554,11 +561,11 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		struct audit_field *f = &entry->rule.fields[i];
 
 		err = -EINVAL;
-		if (!(data->fieldflags[i] & AUDIT_OPERATORS) ||
-		    data->fieldflags[i] & ~AUDIT_OPERATORS)
+
+		f->op = audit_to_op(data->fieldflags[i]);
+		if (f->op == Audit_bad)
 			goto exit_free;
 
-		f->op = data->fieldflags[i] & AUDIT_OPERATORS;
 		f->type = data->fields[i];
 		f->val = data->values[i];
 		f->lsm_str = NULL;
@@ -670,18 +677,8 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		}
 	}
 
-	ino_f = entry->rule.inode_f;
-	if (ino_f) {
-		switch(ino_f->op) {
-		case AUDIT_NOT_EQUAL:
-			entry->rule.inode_f = NULL;
-		case AUDIT_EQUAL:
-			break;
-		default:
-			err = -EINVAL;
-			goto exit_free;
-		}
-	}
+	if (entry->rule.inode_f && entry->rule.inode_f->op == Audit_not_equal)
+		entry->rule.inode_f = NULL;
 
 exit_nofree:
 	return entry;
@@ -721,10 +718,10 @@ static struct audit_rule *audit_krule_to_rule(struct audit_krule *krule)
 		rule->fields[i] = krule->fields[i].type;
 
 		if (krule->vers_ops == 1) {
-			if (krule->fields[i].op & AUDIT_NOT_EQUAL)
+			if (krule->fields[i].op == Audit_not_equal)
 				rule->fields[i] |= AUDIT_NEGATE;
 		} else {
-			rule->fields[i] |= krule->fields[i].op;
+			rule->fields[i] |= audit_ops[krule->fields[i].op];
 		}
 	}
 	for (i = 0; i < AUDIT_BITMASK_SIZE; i++) rule->mask[i] = krule->mask[i];
@@ -752,7 +749,7 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
 		struct audit_field *f = &krule->fields[i];
 
 		data->fields[i] = f->type;
-		data->fieldflags[i] = f->op;
+		data->fieldflags[i] = audit_ops[f->op];
 		switch(f->type) {
 		case AUDIT_SUBJ_USER:
 		case AUDIT_SUBJ_ROLE:
@@ -1626,28 +1623,29 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
 	return err;
 }
 
-int audit_comparator(const u32 left, const u32 op, const u32 right)
+int audit_comparator(u32 left, u32 op, u32 right)
 {
 	switch (op) {
-	case AUDIT_EQUAL:
+	case Audit_equal:
 		return (left == right);
-	case AUDIT_NOT_EQUAL:
+	case Audit_not_equal:
 		return (left != right);
-	case AUDIT_LESS_THAN:
+	case Audit_lt:
 		return (left < right);
-	case AUDIT_LESS_THAN_OR_EQUAL:
+	case Audit_le:
 		return (left <= right);
-	case AUDIT_GREATER_THAN:
+	case Audit_gt:
 		return (left > right);
-	case AUDIT_GREATER_THAN_OR_EQUAL:
+	case Audit_ge:
 		return (left >= right);
-	case AUDIT_BIT_MASK:
+	case Audit_bitmask:
 		return (left & right);
-	case AUDIT_BIT_TEST:
+	case Audit_bittest:
 		return ((left & right) == right);
+	default:
+		BUG();
+		return 0;
 	}
-	BUG();
-	return 0;
 }
 
 /* Compare given dentry name with last component in given path,
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 343c8ab14af0..c65e4fe4a0f1 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2602,7 +2602,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule)
 	case AUDIT_OBJ_ROLE:
 	case AUDIT_OBJ_TYPE:
 		/* only 'equals' and 'not equals' fit user, role, and type */
-		if (op != AUDIT_EQUAL && op != AUDIT_NOT_EQUAL)
+		if (op != Audit_equal && op != Audit_not_equal)
 			return -EINVAL;
 		break;
 	case AUDIT_SUBJ_SEN:
@@ -2736,10 +2736,10 @@ int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule,
 	case AUDIT_SUBJ_USER:
 	case AUDIT_OBJ_USER:
 		switch (op) {
-		case AUDIT_EQUAL:
+		case Audit_equal:
 			match = (ctxt->user == rule->au_ctxt.user);
 			break;
-		case AUDIT_NOT_EQUAL:
+		case Audit_not_equal:
 			match = (ctxt->user != rule->au_ctxt.user);
 			break;
 		}
@@ -2747,10 +2747,10 @@ int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule,
 	case AUDIT_SUBJ_ROLE:
 	case AUDIT_OBJ_ROLE:
 		switch (op) {
-		case AUDIT_EQUAL:
+		case Audit_equal:
 			match = (ctxt->role == rule->au_ctxt.role);
 			break;
-		case AUDIT_NOT_EQUAL:
+		case Audit_not_equal:
 			match = (ctxt->role != rule->au_ctxt.role);
 			break;
 		}
@@ -2758,10 +2758,10 @@ int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule,
 	case AUDIT_SUBJ_TYPE:
 	case AUDIT_OBJ_TYPE:
 		switch (op) {
-		case AUDIT_EQUAL:
+		case Audit_equal:
 			match = (ctxt->type == rule->au_ctxt.type);
 			break;
-		case AUDIT_NOT_EQUAL:
+		case Audit_not_equal:
 			match = (ctxt->type != rule->au_ctxt.type);
 			break;
 		}
@@ -2774,31 +2774,31 @@ int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule,
 			  field == AUDIT_OBJ_LEV_LOW) ?
 			 &ctxt->range.level[0] : &ctxt->range.level[1]);
 		switch (op) {
-		case AUDIT_EQUAL:
+		case Audit_equal:
 			match = mls_level_eq(&rule->au_ctxt.range.level[0],
 					     level);
 			break;
-		case AUDIT_NOT_EQUAL:
+		case Audit_not_equal:
 			match = !mls_level_eq(&rule->au_ctxt.range.level[0],
 					      level);
 			break;
-		case AUDIT_LESS_THAN:
+		case Audit_lt:
 			match = (mls_level_dom(&rule->au_ctxt.range.level[0],
 					       level) &&
 				 !mls_level_eq(&rule->au_ctxt.range.level[0],
 					       level));
 			break;
-		case AUDIT_LESS_THAN_OR_EQUAL:
+		case Audit_le:
 			match = mls_level_dom(&rule->au_ctxt.range.level[0],
 					      level);
 			break;
-		case AUDIT_GREATER_THAN:
+		case Audit_gt:
 			match = (mls_level_dom(level,
 					      &rule->au_ctxt.range.level[0]) &&
 				 !mls_level_eq(level,
 					       &rule->au_ctxt.range.level[0]));
 			break;
-		case AUDIT_GREATER_THAN_OR_EQUAL:
+		case Audit_ge:
 			match = mls_level_dom(level,
 					      &rule->au_ctxt.range.level[0]);
 			break;
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 1b5551dfc1f7..848212fd4845 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2492,7 +2492,7 @@ static int smack_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule)
 	if (field != AUDIT_SUBJ_USER && field != AUDIT_OBJ_USER)
 		return -EINVAL;
 
-	if (op != AUDIT_EQUAL && op != AUDIT_NOT_EQUAL)
+	if (op != Audit_equal && op != Audit_not_equal)
 		return -EINVAL;
 
 	*rule = smk_import(rulestr, 0);
@@ -2556,9 +2556,9 @@ static int smack_audit_rule_match(u32 secid, u32 field, u32 op, void *vrule,
 	 * both pointers will point to the same smack_known
 	 * label.
 	 */
-	if (op == AUDIT_EQUAL)
+	if (op == Audit_equal)
 		return (rule == smack);
-	if (op == AUDIT_NOT_EQUAL)
+	if (op == Audit_not_equal)
 		return (rule != smack);
 
 	return 0;
-- 
cgit v1.2.3


From 0a30c5cefa53cbac429dcb2de906c0637b646253 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Sun, 4 Jan 2009 12:00:47 -0800
Subject: spi.h uses/needs device.h

Include header files as used/needed:

  In file included from drivers/leds/leds-dac124s085.c:16:
  include/linux/spi/spi.h:66: error: field 'dev' has incomplete type
  include/linux/spi/spi.h: In function 'to_spi_device':
  include/linux/spi/spi.h:100: warning: type defaults to 'int' in declaration of '__mptr'
  ...

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/spi/spi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 4be01bb44377..82229317753d 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -19,6 +19,8 @@
 #ifndef __LINUX_SPI_H
 #define __LINUX_SPI_H
 
+#include <linux/device.h>
+
 /*
  * INTERFACES between SPI master-side drivers and SPI infrastructure.
  * (There's no SPI slave support for Linux yet...)
-- 
cgit v1.2.3


From c644f0e4b56f9a2fc066cd0d75a18074d130e4a3 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Sun, 4 Jan 2009 12:00:48 -0800
Subject: fs: introduce bgl_lock_ptr()

As suggested by Andreas Dilger, introduce a bgl_lock_ptr() helper in
<linux/blockgroup_lock.h> and add separate sb_bgl_lock() helpers to
filesystem specific header files to break the hidden dependency to
struct ext[234]_sb_info.

Also, while at it, convert the macros to static inlines to try make up
for all the times I broke Andrew Morton's tree.

Acked-by: Andreas Dilger <adilger@sun.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext4/ext4_sb.h               | 6 ++++++
 include/linux/blockgroup_lock.h | 7 +++++--
 include/linux/ext2_fs_sb.h      | 6 ++++++
 include/linux/ext3_fs_sb.h      | 6 ++++++
 4 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 445fde603df8..b21f16713db0 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -146,4 +146,10 @@ struct ext4_sb_info {
 	struct flex_groups *s_flex_groups;
 };
 
+static inline spinlock_t *
+sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
+{
+	return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group);
+}
+
 #endif	/* _EXT4_SB */
diff --git a/include/linux/blockgroup_lock.h b/include/linux/blockgroup_lock.h
index 8607312983bd..e44b88ba552b 100644
--- a/include/linux/blockgroup_lock.h
+++ b/include/linux/blockgroup_lock.h
@@ -53,7 +53,10 @@ static inline void bgl_lock_init(struct blockgroup_lock *bgl)
  * The accessor is a macro so we can embed a blockgroup_lock into different
  * superblock types
  */
-#define sb_bgl_lock(sb, block_group) \
-	(&(sb)->s_blockgroup_lock.locks[(block_group) & (NR_BG_LOCKS-1)].lock)
+static inline spinlock_t *
+bgl_lock_ptr(struct blockgroup_lock *bgl, unsigned int block_group)
+{
+	return &bgl->locks[(block_group) & (NR_BG_LOCKS-1)].lock;
+}
 
 #endif
diff --git a/include/linux/ext2_fs_sb.h b/include/linux/ext2_fs_sb.h
index f273415ab6f1..dc541f3653d1 100644
--- a/include/linux/ext2_fs_sb.h
+++ b/include/linux/ext2_fs_sb.h
@@ -108,4 +108,10 @@ struct ext2_sb_info {
 	struct ext2_reserve_window_node s_rsv_window_head;
 };
 
+static inline spinlock_t *
+sb_bgl_lock(struct ext2_sb_info *sbi, unsigned int block_group)
+{
+	return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group);
+}
+
 #endif	/* _LINUX_EXT2_FS_SB */
diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
index b65f0288b842..e024e38248ff 100644
--- a/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@ -83,4 +83,10 @@ struct ext3_sb_info {
 #endif
 };
 
+static inline spinlock_t *
+sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group)
+{
+	return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group);
+}
+
 #endif	/* _LINUX_EXT3_FS_SB */
-- 
cgit v1.2.3


From 54566b2c1594c2326a645a3551f9d989f7ba3c5e Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sun, 4 Jan 2009 12:00:53 -0800
Subject: fs: symlink write_begin allocation context fix

With the write_begin/write_end aops, page_symlink was broken because it
could no longer pass a GFP_NOFS type mask into the point where the
allocations happened.  They are done in write_begin, which would always
assume that the filesystem can be entered from reclaim.  This bug could
cause filesystem deadlocks.

The funny thing with having a gfp_t mask there is that it doesn't really
allow the caller to arbitrarily tinker with the context in which it can be
called.  It couldn't ever be GFP_ATOMIC, for example, because it needs to
take the page lock.  The only thing any callers care about is __GFP_FS
anyway, so turn that into a single flag.

Add a new flag for write_begin, AOP_FLAG_NOFS.  Filesystems can now act on
this flag in their write_begin function.  Change __grab_cache_page to
accept a nofs argument as well, to honour that flag (while we're there,
change the name to grab_cache_page_write_begin which is more instructive
and does away with random leading underscores).

This is really a more flexible way to go in the end anyway -- if a
filesystem happens to want any extra allocations aside from the pagecache
ones in ints write_begin function, it may now use GFP_KERNEL (rather than
GFP_NOFS) for common case allocations (eg.  ocfs2_alloc_write_ctxt, for a
random example).

[kosaki.motohiro@jp.fujitsu.com: fix ubifs]
[kosaki.motohiro@jp.fujitsu.com: fix fuse]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: <stable@kernel.org>		[2.6.28.x]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[ Cleaned up the calling convention: just pass in the AOP flags
  untouched to the grab_cache_page_write_begin() function.  That
  just simplifies everybody, and may even allow future expansion of the
  logic.   - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/affs/file.c          |  2 +-
 fs/afs/write.c          |  2 +-
 fs/buffer.c             |  4 ++--
 fs/cifs/file.c          |  2 +-
 fs/ecryptfs/mmap.c      |  2 +-
 fs/ext3/inode.c         |  2 +-
 fs/ext3/namei.c         |  3 +--
 fs/ext4/inode.c         |  4 ++--
 fs/ext4/namei.c         |  3 +--
 fs/fuse/file.c          |  4 ++--
 fs/gfs2/ops_address.c   |  2 +-
 fs/hostfs/hostfs_kern.c |  2 +-
 fs/jffs2/file.c         |  2 +-
 fs/libfs.c              |  2 +-
 fs/namei.c              | 13 +++++++++----
 fs/nfs/file.c           |  2 +-
 fs/reiserfs/inode.c     |  2 +-
 fs/smbfs/file.c         |  2 +-
 fs/ubifs/file.c         |  9 +++++----
 include/linux/fs.h      |  5 ++++-
 include/linux/pagemap.h |  3 ++-
 mm/filemap.c            | 13 +++++++++----
 22 files changed, 49 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/fs/affs/file.c b/fs/affs/file.c
index 1377b1240b6e..9246cb4aa018 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -628,7 +628,7 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
 	}
 
 	index = pos >> PAGE_CACHE_SHIFT;
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 	*pagep = page;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index d6b85dab35fc..3fb36d433621 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -144,7 +144,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 	candidate->state = AFS_WBACK_PENDING;
 	init_waitqueue_head(&candidate->waitq);
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page) {
 		kfree(candidate);
 		return -ENOMEM;
diff --git a/fs/buffer.c b/fs/buffer.c
index 776ae091d3b0..a13f09b696f7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1996,7 +1996,7 @@ int block_write_begin(struct file *file, struct address_space *mapping,
 	page = *pagep;
 	if (page == NULL) {
 		ownpage = 1;
-		page = __grab_cache_page(mapping, index);
+		page = grab_cache_page_write_begin(mapping, index, flags);
 		if (!page) {
 			status = -ENOMEM;
 			goto out;
@@ -2502,7 +2502,7 @@ int nobh_write_begin(struct file *file, struct address_space *mapping,
 	from = pos & (PAGE_CACHE_SIZE - 1);
 	to = from + len;
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 	*pagep = page;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index b1e1fc6a6e6a..12bb656fbe75 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2074,7 +2074,7 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
 
 	cFYI(1, ("write_begin from %lld len %d", (long long)pos, len));
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page) {
 		rc = -ENOMEM;
 		goto out;
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 04d7b3fa1ac6..46cec2b69796 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -288,7 +288,7 @@ static int ecryptfs_write_begin(struct file *file,
 	loff_t prev_page_end_size;
 	int rc = 0;
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 	*pagep = page;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index c4bdccf976b5..5fa453b49a64 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1161,7 +1161,7 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
 	to = from + len;
 
 retry:
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 	*pagep = page;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 297ea8dfac7c..1dd2abe6313e 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2175,8 +2175,7 @@ retry:
 		 * We have a transaction open.  All is sweetness.  It also sets
 		 * i_size in generic_commit_write().
 		 */
-		err = __page_symlink(inode, symname, l,
-				mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+		err = __page_symlink(inode, symname, l, 1);
 		if (err) {
 			drop_nlink(inode);
 			unlock_new_inode(inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7c3325e0b005..6702a49992a6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1346,7 +1346,7 @@ retry:
 		goto out;
 	}
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page) {
 		ext4_journal_stop(handle);
 		ret = -ENOMEM;
@@ -2550,7 +2550,7 @@ retry:
 		goto out;
 	}
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page) {
 		ext4_journal_stop(handle);
 		ret = -ENOMEM;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index da98a9012fa5..9fd2a5e1be4d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2212,8 +2212,7 @@ retry:
 		 * We have a transaction open.  All is sweetness.  It also sets
 		 * i_size in generic_commit_write().
 		 */
-		err = __page_symlink(inode, symname, l,
-				mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+		err = __page_symlink(inode, symname, l, 1);
 		if (err) {
 			clear_nlink(inode);
 			unlock_new_inode(inode);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 34930a964b82..4c9ee7011265 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -646,7 +646,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
 {
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 
-	*pagep = __grab_cache_page(mapping, index);
+	*pagep = grab_cache_page_write_begin(mapping, index, flags);
 	if (!*pagep)
 		return -ENOMEM;
 	return 0;
@@ -779,7 +779,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
 			break;
 
 		err = -ENOMEM;
-		page = __grab_cache_page(mapping, index);
+		page = grab_cache_page_write_begin(mapping, index, 0);
 		if (!page)
 			break;
 
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 27563816e1c5..15f710f2d4da 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -675,7 +675,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 		goto out_trans_fail;
 
 	error = -ENOMEM;
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	*pagep = page;
 	if (unlikely(!page))
 		goto out_endtrans;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 3a31451ac170..5c538e0ec14b 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -501,7 +501,7 @@ int hostfs_write_begin(struct file *file, struct address_space *mapping,
 {
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 
-	*pagep = __grab_cache_page(mapping, index);
+	*pagep = grab_cache_page_write_begin(mapping, index, flags);
 	if (!*pagep)
 		return -ENOMEM;
 	return 0;
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 5a98aa87c853..5edc2bf20581 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -132,7 +132,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
 	uint32_t pageofs = index << PAGE_CACHE_SHIFT;
 	int ret = 0;
 
-	pg = __grab_cache_page(mapping, index);
+	pg = grab_cache_page_write_begin(mapping, index, flags);
 	if (!pg)
 		return -ENOMEM;
 	*pagep = pg;
diff --git a/fs/libfs.c b/fs/libfs.c
index e960a8321902..bdaec17fa388 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -360,7 +360,7 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
 	index = pos >> PAGE_CACHE_SHIFT;
 	from = pos & (PAGE_CACHE_SIZE - 1);
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 
diff --git a/fs/namei.c b/fs/namei.c
index dd5c9f0bf829..df2d3df4f049 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2817,18 +2817,23 @@ void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
 	}
 }
 
-int __page_symlink(struct inode *inode, const char *symname, int len,
-		gfp_t gfp_mask)
+/*
+ * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
+ */
+int __page_symlink(struct inode *inode, const char *symname, int len, int nofs)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
 	void *fsdata;
 	int err;
 	char *kaddr;
+	unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE;
+	if (nofs)
+		flags |= AOP_FLAG_NOFS;
 
 retry:
 	err = pagecache_write_begin(NULL, mapping, 0, len-1,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
+				flags, &page, &fsdata);
 	if (err)
 		goto fail;
 
@@ -2852,7 +2857,7 @@ fail:
 int page_symlink(struct inode *inode, const char *symname, int len)
 {
 	return __page_symlink(inode, symname, len,
-			mapping_gfp_mask(inode->i_mapping));
+			!(mapping_gfp_mask(inode->i_mapping) & __GFP_FS));
 }
 
 const struct inode_operations page_symlink_inode_operations = {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d319b49f8f06..90f292b520d2 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -354,7 +354,7 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
 		file->f_path.dentry->d_name.name,
 		mapping->host->i_ino, len, (long long) pos);
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 	*pagep = page;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 145c2d3e5e01..ed04f47007f8 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2561,7 +2561,7 @@ static int reiserfs_write_begin(struct file *file,
 	}
 
 	index = pos >> PAGE_CACHE_SHIFT;
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 	*pagep = page;
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index e4f8d51a5553..92d5e8ffb639 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -297,7 +297,7 @@ static int smb_write_begin(struct file *file, struct address_space *mapping,
 			struct page **pagep, void **fsdata)
 {
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	*pagep = __grab_cache_page(mapping, index);
+	*pagep = grab_cache_page_write_begin(mapping, index, flags);
 	if (!*pagep)
 		return -ENOMEM;
 	return 0;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index fe82d2464d46..bf37374567fa 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -219,7 +219,8 @@ static void release_existing_page_budget(struct ubifs_info *c)
 }
 
 static int write_begin_slow(struct address_space *mapping,
-			    loff_t pos, unsigned len, struct page **pagep)
+			    loff_t pos, unsigned len, struct page **pagep,
+			    unsigned flags)
 {
 	struct inode *inode = mapping->host;
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -247,7 +248,7 @@ static int write_begin_slow(struct address_space *mapping,
 	if (unlikely(err))
 		return err;
 
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (unlikely(!page)) {
 		ubifs_release_budget(c, &req);
 		return -ENOMEM;
@@ -438,7 +439,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
 		return -EROFS;
 
 	/* Try out the fast-path part first */
-	page = __grab_cache_page(mapping, index);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (unlikely(!page))
 		return -ENOMEM;
 
@@ -483,7 +484,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
 		unlock_page(page);
 		page_cache_release(page);
 
-		return write_begin_slow(mapping, pos, len, pagep);
+		return write_begin_slow(mapping, pos, len, pagep, flags);
 	}
 
 	/*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2170ee21e18..f2a3010140e3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -423,6 +423,9 @@ enum positive_aop_returns {
 
 #define AOP_FLAG_UNINTERRUPTIBLE	0x0001 /* will not do a short write */
 #define AOP_FLAG_CONT_EXPAND		0x0002 /* called from cont_expand */
+#define AOP_FLAG_NOFS			0x0004 /* used by filesystem to direct
+						* helper code (eg buffer layer)
+						* to clear GFP_FS from alloc */
 
 /*
  * oh the beauties of C type declarations.
@@ -2035,7 +2038,7 @@ extern int page_readlink(struct dentry *, char __user *, int);
 extern void *page_follow_link_light(struct dentry *, struct nameidata *);
 extern void page_put_link(struct dentry *, struct nameidata *, void *);
 extern int __page_symlink(struct inode *inode, const char *symname, int len,
-		gfp_t gfp_mask);
+		int nofs);
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
 extern int generic_readlink(struct dentry *, char __user *, int);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 709742be02f0..01ca0856caff 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -241,7 +241,8 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
 unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
 			int tag, unsigned int nr_pages, struct page **pages);
 
-struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index);
+struct page *grab_cache_page_write_begin(struct address_space *mapping,
+			pgoff_t index, unsigned flags);
 
 /*
  * Returns locked page at given index in given cache, creating it if needed.
diff --git a/mm/filemap.c b/mm/filemap.c
index f3e5f8944d17..f8c69273c37f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2140,19 +2140,24 @@ EXPORT_SYMBOL(generic_file_direct_write);
  * Find or create a page at the given pagecache position. Return the locked
  * page. This function is specifically for buffered writes.
  */
-struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index)
+struct page *grab_cache_page_write_begin(struct address_space *mapping,
+					pgoff_t index, unsigned flags)
 {
 	int status;
 	struct page *page;
+	gfp_t gfp_notmask = 0;
+	if (flags & AOP_FLAG_NOFS)
+		gfp_notmask = __GFP_FS;
 repeat:
 	page = find_lock_page(mapping, index);
 	if (likely(page))
 		return page;
 
-	page = page_cache_alloc(mapping);
+	page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask);
 	if (!page)
 		return NULL;
-	status = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
+	status = add_to_page_cache_lru(page, mapping, index,
+						GFP_KERNEL & ~gfp_notmask);
 	if (unlikely(status)) {
 		page_cache_release(page);
 		if (status == -EEXIST)
@@ -2161,7 +2166,7 @@ repeat:
 	}
 	return page;
 }
-EXPORT_SYMBOL(__grab_cache_page);
+EXPORT_SYMBOL(grab_cache_page_write_begin);
 
 static ssize_t generic_perform_write(struct file *file,
 				struct iov_iter *i, loff_t pos)
-- 
cgit v1.2.3


From 099e657625e801adf82054c8050dde5aceb68452 Mon Sep 17 00:00:00 2001
From: Alessandro Zummo <a.zummo@towertech.it>
Date: Sun, 4 Jan 2009 12:00:54 -0800
Subject: rtc: add alarm/update irq interfaces

Add standard interfaces for alarm/update irqs enabling.  Drivers are no
more required to implement equivalent ioctl code as rtc-dev will provide
it.

UIE emulation should now be handled correctly and will work even for those
RTC drivers who cannot be configured to do both UIE and AIE.

Signed-off-by: Alessandro Zummo <a.zummo@towertech.it>
Cc: David Brownell <david-b@pacbell.net>
Cc: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/Kconfig     |  6 +++++-
 drivers/rtc/interface.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/rtc/rtc-dev.c   | 51 +++++++++++++++++++++++++++++++---------------
 include/linux/rtc.h     |  8 +++++++-
 4 files changed, 101 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 123092d8a984..165a81843357 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -102,9 +102,13 @@ config RTC_INTF_DEV_UIE_EMUL
 	depends on RTC_INTF_DEV
 	help
 	  Provides an emulation for RTC_UIE if the underlying rtc chip
-	  driver does not expose RTC_UIE ioctls.  Those requests generate
+	  driver does not expose RTC_UIE ioctls. Those requests generate
 	  once-per-second update interrupts, used for synchronization.
 
+	  The emulation code will read the time from the hardware
+	  clock several times per second, please enable this option
+	  only if you know that you really need it.
+
 config RTC_DRV_TEST
 	tristate "Test driver/device"
 	help
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index a04c1b6b1575..fd2c652504ff 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -307,6 +307,60 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 }
 EXPORT_SYMBOL_GPL(rtc_set_alarm);
 
+int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled)
+{
+	int err = mutex_lock_interruptible(&rtc->ops_lock);
+	if (err)
+		return err;
+
+	if (!rtc->ops)
+		err = -ENODEV;
+	else if (!rtc->ops->alarm_irq_enable)
+		err = -EINVAL;
+	else
+		err = rtc->ops->alarm_irq_enable(rtc->dev.parent, enabled);
+
+	mutex_unlock(&rtc->ops_lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtc_alarm_irq_enable);
+
+int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled)
+{
+	int err = mutex_lock_interruptible(&rtc->ops_lock);
+	if (err)
+		return err;
+
+#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
+	if (enabled == 0 && rtc->uie_irq_active) {
+		mutex_unlock(&rtc->ops_lock);
+		return rtc_dev_update_irq_enable_emul(rtc, enabled);
+	}
+#endif
+
+	if (!rtc->ops)
+		err = -ENODEV;
+	else if (!rtc->ops->update_irq_enable)
+		err = -EINVAL;
+	else
+		err = rtc->ops->update_irq_enable(rtc->dev.parent, enabled);
+
+	mutex_unlock(&rtc->ops_lock);
+
+#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
+	/*
+	 * Enable emulation if the driver did not provide
+	 * the update_irq_enable function pointer or if returned
+	 * -EINVAL to signal that it has been configured without
+	 * interrupts or that are not available at the moment.
+	 */
+	if (err == -EINVAL)
+		err = rtc_dev_update_irq_enable_emul(rtc, enabled);
+#endif
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtc_update_irq_enable);
+
 /**
  * rtc_update_irq - report RTC periodic, alarm, and/or update irqs
  * @rtc: the rtc device
diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index ecdea44ae4e5..45152f4952d6 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -92,10 +92,10 @@ static void rtc_uie_timer(unsigned long data)
 	spin_unlock_irqrestore(&rtc->irq_lock, flags);
 }
 
-static void clear_uie(struct rtc_device *rtc)
+static int clear_uie(struct rtc_device *rtc)
 {
 	spin_lock_irq(&rtc->irq_lock);
-	if (rtc->irq_active) {
+	if (rtc->uie_irq_active) {
 		rtc->stop_uie_polling = 1;
 		if (rtc->uie_timer_active) {
 			spin_unlock_irq(&rtc->irq_lock);
@@ -108,9 +108,10 @@ static void clear_uie(struct rtc_device *rtc)
 			flush_scheduled_work();
 			spin_lock_irq(&rtc->irq_lock);
 		}
-		rtc->irq_active = 0;
+		rtc->uie_irq_active = 0;
 	}
 	spin_unlock_irq(&rtc->irq_lock);
+	return 0;
 }
 
 static int set_uie(struct rtc_device *rtc)
@@ -122,8 +123,8 @@ static int set_uie(struct rtc_device *rtc)
 	if (err)
 		return err;
 	spin_lock_irq(&rtc->irq_lock);
-	if (!rtc->irq_active) {
-		rtc->irq_active = 1;
+	if (!rtc->uie_irq_active) {
+		rtc->uie_irq_active = 1;
 		rtc->stop_uie_polling = 0;
 		rtc->oldsecs = tm.tm_sec;
 		rtc->uie_task_active = 1;
@@ -134,6 +135,16 @@ static int set_uie(struct rtc_device *rtc)
 	spin_unlock_irq(&rtc->irq_lock);
 	return 0;
 }
+
+int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc, unsigned int enabled)
+{
+	if (enabled)
+		return set_uie(rtc);
+	else
+		return clear_uie(rtc);
+}
+EXPORT_SYMBOL(rtc_dev_update_irq_enable_emul);
+
 #endif /* CONFIG_RTC_INTF_DEV_UIE_EMUL */
 
 static ssize_t
@@ -357,6 +368,22 @@ static long rtc_dev_ioctl(struct file *file,
 		err = rtc_irq_set_state(rtc, NULL, 0);
 		break;
 
+	case RTC_AIE_ON:
+		mutex_unlock(&rtc->ops_lock);
+		return rtc_alarm_irq_enable(rtc, 1);
+
+	case RTC_AIE_OFF:
+		mutex_unlock(&rtc->ops_lock);
+		return rtc_alarm_irq_enable(rtc, 0);
+
+	case RTC_UIE_ON:
+		mutex_unlock(&rtc->ops_lock);
+		return rtc_update_irq_enable(rtc, 1);
+
+	case RTC_UIE_OFF:
+		mutex_unlock(&rtc->ops_lock);
+		return rtc_update_irq_enable(rtc, 0);
+
 	case RTC_IRQP_SET:
 		err = rtc_irq_set_freq(rtc, NULL, arg);
 		break;
@@ -401,17 +428,6 @@ static long rtc_dev_ioctl(struct file *file,
 			err = -EFAULT;
 		return err;
 
-#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
-	case RTC_UIE_OFF:
-		mutex_unlock(&rtc->ops_lock);
-		clear_uie(rtc);
-		return 0;
-
-	case RTC_UIE_ON:
-		mutex_unlock(&rtc->ops_lock);
-		err = set_uie(rtc);
-		return err;
-#endif
 	default:
 		err = -ENOTTY;
 		break;
@@ -440,7 +456,10 @@ static int rtc_dev_release(struct inode *inode, struct file *file)
 	 * Leave the alarm alone; it may be set to trigger a system wakeup
 	 * later, or be used by kernel code, and is a one-shot event anyway.
 	 */
+
+	/* Keep ioctl until all drivers are converted */
 	rtc_dev_ioctl(file, RTC_UIE_OFF, 0);
+	rtc_update_irq_enable(rtc, 0);
 	rtc_irq_set_state(rtc, NULL, 0);
 
 	if (rtc->ops->release)
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 91f597ad6acc..4046b75563c1 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -145,6 +145,8 @@ struct rtc_class_ops {
 	int (*irq_set_state)(struct device *, int enabled);
 	int (*irq_set_freq)(struct device *, int freq);
 	int (*read_callback)(struct device *, int data);
+	int (*alarm_irq_enable)(struct device *, unsigned int enabled);
+	int (*update_irq_enable)(struct device *, unsigned int enabled);
 };
 
 #define RTC_DEVICE_NAME_SIZE 20
@@ -181,7 +183,7 @@ struct rtc_device
 	struct timer_list uie_timer;
 	/* Those fields are protected by rtc->irq_lock */
 	unsigned int oldsecs;
-	unsigned int irq_active:1;
+	unsigned int uie_irq_active:1;
 	unsigned int stop_uie_polling:1;
 	unsigned int uie_task_active:1;
 	unsigned int uie_timer_active:1;
@@ -216,6 +218,10 @@ extern int rtc_irq_set_state(struct rtc_device *rtc,
 				struct rtc_task *task, int enabled);
 extern int rtc_irq_set_freq(struct rtc_device *rtc,
 				struct rtc_task *task, int freq);
+extern int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled);
+extern int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled);
+extern int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc,
+						unsigned int enabled);
 
 typedef struct rtc_task {
 	void (*func)(void *private_data);
-- 
cgit v1.2.3


From 088af9a6e05d51e7c3dc85d45d8b7a52c3ee08d7 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 31 Dec 2008 12:31:18 +0100
Subject: module: fix module loading failure of large kernel modules for parisc

When creating the final layout of a kernel module in memory, allow the
module loader to reserve some additional memory in front of a given section.
This is currently only needed for the parisc port which needs to put the
stub entries there to fulfill the 17/22bit PCREL relocations with large
kernel modules like xfs.

Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (renamed fn)
---
 include/linux/moduleloader.h |  3 +++
 kernel/module.c              | 16 +++++++++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
index eb1033957486..c1f40c2f7ffb 100644
--- a/include/linux/moduleloader.h
+++ b/include/linux/moduleloader.h
@@ -13,6 +13,9 @@ int module_frob_arch_sections(Elf_Ehdr *hdr,
 			      char *secstrings,
 			      struct module *mod);
 
+/* Additional bytes needed by arch in front of individual sections */
+unsigned int arch_mod_section_prepend(struct module *mod, unsigned int section);
+
 /* Allocator used for allocating struct module, core sections and init
    sections.  Returns NULL on failure. */
 void *module_alloc(unsigned long size);
diff --git a/kernel/module.c b/kernel/module.c
index d3d254571bda..4299aefc20b8 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1578,11 +1578,21 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
 	return ret;
 }
 
+/* Additional bytes needed by arch in front of individual sections */
+unsigned int __weak arch_mod_section_prepend(struct module *mod,
+					     unsigned int section)
+{
+	/* default implementation just returns zero */
+	return 0;
+}
+
 /* Update size with this section: return offset. */
-static long get_offset(unsigned int *size, Elf_Shdr *sechdr)
+static long get_offset(struct module *mod, unsigned int *size,
+		       Elf_Shdr *sechdr, unsigned int section)
 {
 	long ret;
 
+	*size += arch_mod_section_prepend(mod, section);
 	ret = ALIGN(*size, sechdr->sh_addralign ?: 1);
 	*size = ret + sechdr->sh_size;
 	return ret;
@@ -1622,7 +1632,7 @@ static void layout_sections(struct module *mod,
 			    || strncmp(secstrings + s->sh_name,
 				       ".init", 5) == 0)
 				continue;
-			s->sh_entsize = get_offset(&mod->core_size, s);
+			s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
 			DEBUGP("\t%s\n", secstrings + s->sh_name);
 		}
 		if (m == 0)
@@ -1640,7 +1650,7 @@ static void layout_sections(struct module *mod,
 			    || strncmp(secstrings + s->sh_name,
 				       ".init", 5) != 0)
 				continue;
-			s->sh_entsize = (get_offset(&mod->init_size, s)
+			s->sh_entsize = (get_offset(mod, &mod->init_size, s, i)
 					 | INIT_OFFSET_MASK);
 			DEBUGP("\t%s\n", secstrings + s->sh_name);
 		}
-- 
cgit v1.2.3


From 9ea09af3bd3090e8349ca2899ca2011bd94cda85 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 22 Dec 2008 12:36:30 +0100
Subject: stop_machine: introduce stop_machine_create/destroy.

Introduce stop_machine_create/destroy. With this interface subsystems
that need a non-failing stop_machine environment can create the
stop_machine machine threads before actually calling stop_machine.
When the threads aren't needed anymore they can be killed with
stop_machine_destroy again.

When stop_machine gets called and the threads aren't present they
will be created and destroyed automatically. This restores the old
behaviour of stop_machine.

This patch also converts cpu hotplug to the new interface since it
is special: cpu_down calls __stop_machine instead of stop_machine.
However the kstop threads will only be created when stop_machine
gets called.

Changing the code so that the threads would be created automatically
on __stop_machine is currently not possible: when __stop_machine gets
called we hold cpu_add_remove_lock, which is the same lock that
create_rt_workqueue would take. So the workqueue needs to be created
before the cpu hotplug code locks cpu_add_remove_lock.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/stop_machine.h | 22 ++++++++++++++++++
 kernel/cpu.c                 |  6 ++++-
 kernel/stop_machine.c        | 55 ++++++++++++++++++++++++++++++++++++--------
 3 files changed, 72 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 74d59a641362..baba3a23a814 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -35,6 +35,24 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
  * won't come or go while it's being called.  Used by hotplug cpu.
  */
 int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
+
+/**
+ * stop_machine_create: create all stop_machine threads
+ *
+ * Description: This causes all stop_machine threads to be created before
+ * stop_machine actually gets called. This can be used by subsystems that
+ * need a non failing stop_machine infrastructure.
+ */
+int stop_machine_create(void);
+
+/**
+ * stop_machine_destroy: destroy all stop_machine threads
+ *
+ * Description: This causes all stop_machine threads which were created with
+ * stop_machine_create to be destroyed again.
+ */
+void stop_machine_destroy(void);
+
 #else
 
 static inline int stop_machine(int (*fn)(void *), void *data,
@@ -46,5 +64,9 @@ static inline int stop_machine(int (*fn)(void *), void *data,
 	local_irq_enable();
 	return ret;
 }
+
+static inline int stop_machine_create(void) { return 0; }
+static inline void stop_machine_destroy(void) { }
+
 #endif /* CONFIG_SMP */
 #endif /* _LINUX_STOP_MACHINE */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 47fff3b63cbf..30e74dd6d01b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -269,8 +269,11 @@ out_release:
 
 int __ref cpu_down(unsigned int cpu)
 {
-	int err = 0;
+	int err;
 
+	err = stop_machine_create();
+	if (err)
+		return err;
 	cpu_maps_update_begin();
 
 	if (cpu_hotplug_disabled) {
@@ -297,6 +300,7 @@ int __ref cpu_down(unsigned int cpu)
 
 out:
 	cpu_maps_update_done();
+	stop_machine_destroy();
 	return err;
 }
 EXPORT_SYMBOL(cpu_down);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 286c41722e8c..0cd415ee62a2 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -38,7 +38,10 @@ struct stop_machine_data {
 static unsigned int num_threads;
 static atomic_t thread_ack;
 static DEFINE_MUTEX(lock);
-
+/* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */
+static DEFINE_MUTEX(setup_lock);
+/* Users of stop_machine. */
+static int refcount;
 static struct workqueue_struct *stop_machine_wq;
 static struct stop_machine_data active, idle;
 static const cpumask_t *active_cpus;
@@ -109,6 +112,43 @@ static int chill(void *unused)
 	return 0;
 }
 
+int stop_machine_create(void)
+{
+	mutex_lock(&setup_lock);
+	if (refcount)
+		goto done;
+	stop_machine_wq = create_rt_workqueue("kstop");
+	if (!stop_machine_wq)
+		goto err_out;
+	stop_machine_work = alloc_percpu(struct work_struct);
+	if (!stop_machine_work)
+		goto err_out;
+done:
+	refcount++;
+	mutex_unlock(&setup_lock);
+	return 0;
+
+err_out:
+	if (stop_machine_wq)
+		destroy_workqueue(stop_machine_wq);
+	mutex_unlock(&setup_lock);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(stop_machine_create);
+
+void stop_machine_destroy(void)
+{
+	mutex_lock(&setup_lock);
+	refcount--;
+	if (refcount)
+		goto done;
+	destroy_workqueue(stop_machine_wq);
+	free_percpu(stop_machine_work);
+done:
+	mutex_unlock(&setup_lock);
+}
+EXPORT_SYMBOL_GPL(stop_machine_destroy);
+
 int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 {
 	struct work_struct *sm_work;
@@ -146,19 +186,14 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 {
 	int ret;
 
+	ret = stop_machine_create();
+	if (ret)
+		return ret;
 	/* No CPUs can come up or down during this. */
 	get_online_cpus();
 	ret = __stop_machine(fn, data, cpus);
 	put_online_cpus();
-
+	stop_machine_destroy();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(stop_machine);
-
-static int __init stop_machine_init(void)
-{
-	stop_machine_wq = create_rt_workqueue("kstop");
-	stop_machine_work = alloc_percpu(struct work_struct);
-	return 0;
-}
-core_initcall(stop_machine_init);
-- 
cgit v1.2.3


From 5d38a079ce3971f932bbdc0dc5b887806fabd5dc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 4 Jan 2009 16:13:40 -0800
Subject: gro: Add page frag support

This patch allows GRO to merge page frags (skb_shinfo(skb)->frags)
in one skb, rather than using the less efficient frag_list.

It also adds a new interface, napi_gro_frags to allow drivers
to inject page frags directly into the stack without allocating
an skb.  This is intended to be the GRO equivalent for LRO's
lro_receive_frags interface.

The existing GSO interface can already handle page frags with
or without an appended frag_list so nothing needs to be changed
there.

The merging itself is rather simple.  We store any new frag entries
after the last existing entry, without checking whether the first
new entry can be merged with the last existing entry.  Making this
check would actually be easy but since no existing driver can
produce contiguous frags anyway it would just be mental masturbation.

If the total number of entries would exceed the capacity of a
single skb, we simply resort to using frag_list as we do now.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 16 ++++++++-
 net/core/dev.c            | 91 ++++++++++++++++++++++++++++++++++++++++++++---
 net/core/skbuff.c         | 14 +++++++-
 3 files changed, 114 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 41e1224651cf..c28bbba3c23d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -313,10 +313,11 @@ struct napi_struct {
 #ifdef CONFIG_NETPOLL
 	spinlock_t		poll_lock;
 	int			poll_owner;
-	struct net_device	*dev;
 #endif
+	struct net_device	*dev;
 	struct list_head	dev_list;
 	struct sk_buff		*gro_list;
+	struct sk_buff		*skb;
 };
 
 enum
@@ -990,6 +991,9 @@ struct napi_gro_cb {
 
 	/* Number of segments aggregated. */
 	int count;
+
+	/* Free the skb? */
+	int free;
 };
 
 #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
@@ -1011,6 +1015,14 @@ struct packet_type {
 	struct list_head	list;
 };
 
+struct napi_gro_fraginfo {
+	skb_frag_t frags[MAX_SKB_FRAGS];
+	unsigned int nr_frags;
+	unsigned int ip_summed;
+	unsigned int len;
+	__wsum csum;
+};
+
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
 
@@ -1363,6 +1375,8 @@ extern int		netif_receive_skb(struct sk_buff *skb);
 extern void		napi_gro_flush(struct napi_struct *napi);
 extern int		napi_gro_receive(struct napi_struct *napi,
 					 struct sk_buff *skb);
+extern int		napi_gro_frags(struct napi_struct *napi,
+				       struct napi_gro_fraginfo *info);
 extern void		netif_nit_deliver(struct sk_buff *skb);
 extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
diff --git a/net/core/dev.c b/net/core/dev.c
index 1e1a68066457..382df6c09eec 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,9 @@
 /* Instead of increasing this, you should create a hash table. */
 #define MAX_GRO_SKBS 8
 
+/* This should be increased if a protocol with a bigger head is added. */
+#define GRO_MAX_HEAD (MAX_HEADER + 128)
+
 /*
  *	The list of packet types we will receive (as opposed to discard)
  *	and the routines to invoke.
@@ -2345,7 +2348,7 @@ static int napi_gro_complete(struct sk_buff *skb)
 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
 	int err = -ENOENT;
 
-	if (!skb_shinfo(skb)->frag_list)
+	if (NAPI_GRO_CB(skb)->count == 1)
 		goto out;
 
 	rcu_read_lock();
@@ -2384,7 +2387,7 @@ void napi_gro_flush(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
-int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff **pp = NULL;
 	struct packet_type *ptype;
@@ -2393,6 +2396,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	int count = 0;
 	int same_flow;
 	int mac_len;
+	int free;
 
 	if (!(skb->dev->features & NETIF_F_GRO))
 		goto normal;
@@ -2409,6 +2413,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 		skb->mac_len = mac_len;
 		NAPI_GRO_CB(skb)->same_flow = 0;
 		NAPI_GRO_CB(skb)->flush = 0;
+		NAPI_GRO_CB(skb)->free = 0;
 
 		for (p = napi->gro_list; p; p = p->next) {
 			count++;
@@ -2428,6 +2433,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 		goto normal;
 
 	same_flow = NAPI_GRO_CB(skb)->same_flow;
+	free = NAPI_GRO_CB(skb)->free;
 
 	if (pp) {
 		struct sk_buff *nskb = *pp;
@@ -2452,13 +2458,86 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	napi->gro_list = skb;
 
 ok:
-	return NET_RX_SUCCESS;
+	return free;
 
 normal:
-	return netif_receive_skb(skb);
+	return -1;
+}
+
+int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+	switch (__napi_gro_receive(napi, skb)) {
+	case -1:
+		return netif_receive_skb(skb);
+
+	case 1:
+		kfree_skb(skb);
+		break;
+	}
+
+	return NET_RX_SUCCESS;
 }
 EXPORT_SYMBOL(napi_gro_receive);
 
+int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
+{
+	struct net_device *dev = napi->dev;
+	struct sk_buff *skb = napi->skb;
+	int err = NET_RX_DROP;
+
+	napi->skb = NULL;
+
+	if (!skb) {
+		skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
+		if (!skb)
+			goto out;
+
+		skb_reserve(skb, NET_IP_ALIGN);
+	}
+
+	BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
+	skb_shinfo(skb)->nr_frags = info->nr_frags;
+	memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags));
+
+	skb->data_len = info->len;
+	skb->len += info->len;
+	skb->truesize += info->len;
+
+	if (!pskb_may_pull(skb, ETH_HLEN))
+		goto reuse;
+
+	err = NET_RX_SUCCESS;
+
+	skb->protocol = eth_type_trans(skb, dev);
+
+	skb->ip_summed = info->ip_summed;
+	skb->csum = info->csum;
+
+	switch (__napi_gro_receive(napi, skb)) {
+	case -1:
+		return netif_receive_skb(skb);
+
+	case 0:
+		goto out;
+	}
+
+reuse:
+	skb_shinfo(skb)->nr_frags = 0;
+
+	skb->len -= skb->data_len;
+	skb->truesize -= skb->data_len;
+	skb->data_len = 0;
+
+	__skb_pull(skb, skb_headlen(skb));
+	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
+
+	napi->skb = skb;
+
+out:
+	return err;
+}
+EXPORT_SYMBOL(napi_gro_frags);
+
 static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
@@ -2537,11 +2616,12 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 {
 	INIT_LIST_HEAD(&napi->poll_list);
 	napi->gro_list = NULL;
+	napi->skb = NULL;
 	napi->poll = poll;
 	napi->weight = weight;
 	list_add(&napi->dev_list, &dev->napi_list);
-#ifdef CONFIG_NETPOLL
 	napi->dev = dev;
+#ifdef CONFIG_NETPOLL
 	spin_lock_init(&napi->poll_lock);
 	napi->poll_owner = -1;
 #endif
@@ -2554,6 +2634,7 @@ void netif_napi_del(struct napi_struct *napi)
 	struct sk_buff *skb, *next;
 
 	list_del_init(&napi->dev_list);
+	kfree(napi->skb);
 
 	for (skb = napi->gro_list; skb; skb = next) {
 		next = skb->next;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3aafb10325b8..5110b359c758 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2594,6 +2594,17 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 	if (skb_shinfo(p)->frag_list)
 		goto merge;
+	else if (!skb_headlen(p) && !skb_headlen(skb) &&
+		 skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags <
+		 MAX_SKB_FRAGS) {
+		memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags,
+		       skb_shinfo(skb)->frags,
+		       skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
+
+		skb_shinfo(p)->nr_frags += skb_shinfo(skb)->nr_frags;
+		NAPI_GRO_CB(skb)->free = 1;
+		goto done;
+	}
 
 	headroom = skb_headroom(p);
 	nskb = netdev_alloc_skb(p->dev, headroom);
@@ -2628,11 +2639,12 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	p = nskb;
 
 merge:
-	NAPI_GRO_CB(p)->count++;
 	p->prev->next = skb;
 	p->prev = skb;
 	skb_header_release(skb);
 
+done:
+	NAPI_GRO_CB(p)->count++;
 	p->data_len += skb->len;
 	p->truesize += skb->len;
 	p->len += skb->len;
-- 
cgit v1.2.3


From 14eaddc967b16017d4a1a24d2be6c28ecbe06ed8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 31 Dec 2008 15:15:42 +0000
Subject: CRED: Fix regression in cap_capable() as shown up by sys_faccessat()
 [ver #2]

Fix a regression in cap_capable() due to:

	commit 5ff7711e635b32f0a1e558227d030c7e45b4a465
	Author: David Howells <dhowells@redhat.com>
	Date:   Wed Dec 31 02:52:28 2008 +0000

	    CRED: Differentiate objective and effective subjective credentials on a task

The problem is that the above patch allows a process to have two sets of
credentials, and for the most part uses the subjective credentials when
accessing current's creds.

There is, however, one exception: cap_capable(), and thus capable(), uses the
real/objective credentials of the target task, whether or not it is the current
task.

Ordinarily this doesn't matter, since usually the two cred pointers in current
point to the same set of creds.  However, sys_faccessat() makes use of this
facility to override the credentials of the calling process to make its test,
without affecting the creds as seen from other processes.

One of the things sys_faccessat() does is to make an adjustment to the
effective capabilities mask, which cap_capable(), as it stands, then ignores.

The affected capability check is in generic_permission():

	if (!(mask & MAY_EXEC) || execute_ok(inode))
		if (capable(CAP_DAC_OVERRIDE))
			return 0;

This change splits capable() from has_capability() down into the commoncap and
SELinux code.  The capable() security op now only deals with the current
process, and uses the current process's subjective creds.  A new security op -
task_capable() - is introduced that can check any task's objective creds.

strictly the capable() security op is superfluous with the presence of the
task_capable() op, however it should be faster to call the capable() op since
two fewer arguments need be passed down through the various layers.

This can be tested by compiling the following program from the XFS testsuite:

/*
 *  t_access_root.c - trivial test program to show permission bug.
 *
 *  Written by Michael Kerrisk - copyright ownership not pursued.
 *  Sourced from: http://linux.derkeiler.com/Mailing-Lists/Kernel/2003-10/6030.html
 */
#include <limits.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/stat.h>

#define UID 500
#define GID 100
#define PERM 0
#define TESTPATH "/tmp/t_access"

static void
errExit(char *msg)
{
    perror(msg);
    exit(EXIT_FAILURE);
} /* errExit */

static void
accessTest(char *file, int mask, char *mstr)
{
    printf("access(%s, %s) returns %d\n", file, mstr, access(file, mask));
} /* accessTest */

int
main(int argc, char *argv[])
{
    int fd, perm, uid, gid;
    char *testpath;
    char cmd[PATH_MAX + 20];

    testpath = (argc > 1) ? argv[1] : TESTPATH;
    perm = (argc > 2) ? strtoul(argv[2], NULL, 8) : PERM;
    uid = (argc > 3) ? atoi(argv[3]) : UID;
    gid = (argc > 4) ? atoi(argv[4]) : GID;

    unlink(testpath);

    fd = open(testpath, O_RDWR | O_CREAT, 0);
    if (fd == -1) errExit("open");

    if (fchown(fd, uid, gid) == -1) errExit("fchown");
    if (fchmod(fd, perm) == -1) errExit("fchmod");
    close(fd);

    snprintf(cmd, sizeof(cmd), "ls -l %s", testpath);
    system(cmd);

    if (seteuid(uid) == -1) errExit("seteuid");

    accessTest(testpath, 0, "0");
    accessTest(testpath, R_OK, "R_OK");
    accessTest(testpath, W_OK, "W_OK");
    accessTest(testpath, X_OK, "X_OK");
    accessTest(testpath, R_OK | W_OK, "R_OK | W_OK");
    accessTest(testpath, R_OK | X_OK, "R_OK | X_OK");
    accessTest(testpath, W_OK | X_OK, "W_OK | X_OK");
    accessTest(testpath, R_OK | W_OK | X_OK, "R_OK | W_OK | X_OK");

    exit(EXIT_SUCCESS);
} /* main */

This can be run against an Ext3 filesystem as well as against an XFS
filesystem.  If successful, it will show:

	[root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043
	---------- 1 dhowells dhowells 0 2008-12-31 03:00 /tmp/xxx
	access(/tmp/xxx, 0) returns 0
	access(/tmp/xxx, R_OK) returns 0
	access(/tmp/xxx, W_OK) returns 0
	access(/tmp/xxx, X_OK) returns -1
	access(/tmp/xxx, R_OK | W_OK) returns 0
	access(/tmp/xxx, R_OK | X_OK) returns -1
	access(/tmp/xxx, W_OK | X_OK) returns -1
	access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1

If unsuccessful, it will show:

	[root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043
	---------- 1 dhowells dhowells 0 2008-12-31 02:56 /tmp/xxx
	access(/tmp/xxx, 0) returns 0
	access(/tmp/xxx, R_OK) returns -1
	access(/tmp/xxx, W_OK) returns -1
	access(/tmp/xxx, X_OK) returns -1
	access(/tmp/xxx, R_OK | W_OK) returns -1
	access(/tmp/xxx, R_OK | X_OK) returns -1
	access(/tmp/xxx, W_OK | X_OK) returns -1
	access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1

I've also tested the fix with the SELinux and syscalls LTP testsuites.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/capability.h | 17 ++++++++++++++--
 include/linux/security.h   | 49 +++++++++++++++++++++++++++++++++++++---------
 kernel/capability.c        |  2 +-
 security/capability.c      |  1 +
 security/commoncap.c       | 42 +++++++++++++++++++++++++++------------
 security/root_plug.c       |  1 +
 security/security.c        | 25 +++++++++++++++++++----
 security/selinux/hooks.c   | 26 ++++++++++++++++++------
 security/smack/smack_lsm.c |  1 +
 9 files changed, 129 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index e22f48c2a46f..5b8a13214451 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -529,8 +529,21 @@ extern const kernel_cap_t __cap_init_eff_set;
  *
  * Note that this does not set PF_SUPERPRIV on the task.
  */
-#define has_capability(t, cap) (security_capable((t), (cap)) == 0)
-#define has_capability_noaudit(t, cap) (security_capable_noaudit((t), (cap)) == 0)
+#define has_capability(t, cap) (security_task_capable((t), (cap)) == 0)
+
+/**
+ * has_capability_noaudit - Determine if a task has a superior capability available (unaudited)
+ * @t: The task in question
+ * @cap: The capability to be tested for
+ *
+ * Return true if the specified task has the given superior capability
+ * currently in effect, false if not, but don't write an audit message for the
+ * check.
+ *
+ * Note that this does not set PF_SUPERPRIV on the task.
+ */
+#define has_capability_noaudit(t, cap) \
+	(security_task_capable_noaudit((t), (cap)) == 0)
 
 extern int capable(int cap);
 
diff --git a/include/linux/security.h b/include/linux/security.h
index 3416cb85e77b..76989b8bc34f 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -48,7 +48,9 @@ struct audit_krule;
  * These functions are in security/capability.c and are used
  * as the default capabilities functions
  */
-extern int cap_capable(struct task_struct *tsk, int cap, int audit);
+extern int cap_capable(int cap, int audit);
+extern int cap_task_capable(struct task_struct *tsk, const struct cred *cred,
+			    int cap, int audit);
 extern int cap_settime(struct timespec *ts, struct timezone *tz);
 extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode);
 extern int cap_ptrace_traceme(struct task_struct *parent);
@@ -1195,9 +1197,18 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@permitted contains the permitted capability set.
  *	Return 0 and update @new if permission is granted.
  * @capable:
- *	Check whether the @tsk process has the @cap capability.
+ *	Check whether the current process has the @cap capability in its
+ *      subjective/effective credentials.
+ *	@cap contains the capability <include/linux/capability.h>.
+ *	@audit: Whether to write an audit message or not
+ *	Return 0 if the capability is granted for @tsk.
+ * @task_capable:
+ *	Check whether the @tsk process has the @cap capability in its
+ *      objective/real credentials.
  *	@tsk contains the task_struct for the process.
+ *	@cred contains the credentials to use.
  *	@cap contains the capability <include/linux/capability.h>.
+ *	@audit: Whether to write an audit message or not
  *	Return 0 if the capability is granted for @tsk.
  * @acct:
  *	Check permission before enabling or disabling process accounting.  If
@@ -1290,7 +1301,9 @@ struct security_operations {
 		       const kernel_cap_t *effective,
 		       const kernel_cap_t *inheritable,
 		       const kernel_cap_t *permitted);
-	int (*capable) (struct task_struct *tsk, int cap, int audit);
+	int (*capable) (int cap, int audit);
+	int (*task_capable) (struct task_struct *tsk, const struct cred *cred,
+			     int cap, int audit);
 	int (*acct) (struct file *file);
 	int (*sysctl) (struct ctl_table *table, int op);
 	int (*quotactl) (int cmds, int type, int id, struct super_block *sb);
@@ -1556,8 +1569,9 @@ int security_capset(struct cred *new, const struct cred *old,
 		    const kernel_cap_t *effective,
 		    const kernel_cap_t *inheritable,
 		    const kernel_cap_t *permitted);
-int security_capable(struct task_struct *tsk, int cap);
-int security_capable_noaudit(struct task_struct *tsk, int cap);
+int security_capable(int cap);
+int security_task_capable(struct task_struct *tsk, int cap);
+int security_task_capable_noaudit(struct task_struct *tsk, int cap);
 int security_acct(struct file *file);
 int security_sysctl(struct ctl_table *table, int op);
 int security_quotactl(int cmds, int type, int id, struct super_block *sb);
@@ -1754,14 +1768,31 @@ static inline int security_capset(struct cred *new,
 	return cap_capset(new, old, effective, inheritable, permitted);
 }
 
-static inline int security_capable(struct task_struct *tsk, int cap)
+static inline int security_capable(int cap)
 {
-	return cap_capable(tsk, cap, SECURITY_CAP_AUDIT);
+	return cap_capable(cap, SECURITY_CAP_AUDIT);
 }
 
-static inline int security_capable_noaudit(struct task_struct *tsk, int cap)
+static inline int security_task_capable(struct task_struct *tsk, int cap)
 {
-	return cap_capable(tsk, cap, SECURITY_CAP_NOAUDIT);
+	int ret;
+
+	rcu_read_lock();
+	ret = cap_task_capable(tsk, __task_cred(tsk), cap, SECURITY_CAP_AUDIT);
+	rcu_read_unlock();
+	return ret;
+}
+
+static inline
+int security_task_capable_noaudit(struct task_struct *tsk, int cap)
+{
+	int ret;
+
+	rcu_read_lock();
+	ret = cap_task_capable(tsk, __task_cred(tsk), cap,
+			       SECURITY_CAP_NOAUDIT);
+	rcu_read_unlock();
+	return ret;
 }
 
 static inline int security_acct(struct file *file)
diff --git a/kernel/capability.c b/kernel/capability.c
index 36b4b4daebec..df62f53f84ac 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -308,7 +308,7 @@ int capable(int cap)
 		BUG();
 	}
 
-	if (has_capability(current, cap)) {
+	if (security_capable(cap) == 0) {
 		current->flags |= PF_SUPERPRIV;
 		return 1;
 	}
diff --git a/security/capability.c b/security/capability.c
index 2dce66fcb992..fd1493da4f8d 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -826,6 +826,7 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, capset);
 	set_to_cap_if_null(ops, acct);
 	set_to_cap_if_null(ops, capable);
+	set_to_cap_if_null(ops, task_capable);
 	set_to_cap_if_null(ops, quotactl);
 	set_to_cap_if_null(ops, quota_on);
 	set_to_cap_if_null(ops, sysctl);
diff --git a/security/commoncap.c b/security/commoncap.c
index 79713545cd63..7f0b2a68717d 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -43,28 +43,44 @@ int cap_netlink_recv(struct sk_buff *skb, int cap)
 EXPORT_SYMBOL(cap_netlink_recv);
 
 /**
- * cap_capable - Determine whether a task has a particular effective capability
- * @tsk: The task to query
+ * cap_capable - Determine whether current has a particular effective capability
  * @cap: The capability to check for
  * @audit: Whether to write an audit message or not
  *
  * Determine whether the nominated task has the specified capability amongst
- * its effective set, returning 0 if it does, -ve if it does not.
+ * its effective set, returning 0 if it does, -ve if it does not.  Note that
+ * this uses current's subjective/effective credentials.
  *
  * NOTE WELL: cap_capable() cannot be used like the kernel's capable()
  * function.  That is, it has the reverse semantics: cap_capable() returns 0
  * when a task has a capability, but the kernel's capable() returns 1 for this
  * case.
  */
-int cap_capable(struct task_struct *tsk, int cap, int audit)
+int cap_capable(int cap, int audit)
 {
-	__u32 cap_raised;
+	return cap_raised(current_cap(), cap) ? 0 : -EPERM;
+}
 
-	/* Derived from include/linux/sched.h:capable. */
-	rcu_read_lock();
-	cap_raised = cap_raised(__task_cred(tsk)->cap_effective, cap);
-	rcu_read_unlock();
-	return cap_raised ? 0 : -EPERM;
+/**
+ * cap_has_capability - Determine whether a task has a particular effective capability
+ * @tsk: The task to query
+ * @cred: The credentials to use
+ * @cap: The capability to check for
+ * @audit: Whether to write an audit message or not
+ *
+ * Determine whether the nominated task has the specified capability amongst
+ * its effective set, returning 0 if it does, -ve if it does not.  Note that
+ * this uses the task's objective/real credentials.
+ *
+ * NOTE WELL: cap_has_capability() cannot be used like the kernel's
+ * has_capability() function.  That is, it has the reverse semantics:
+ * cap_has_capability() returns 0 when a task has a capability, but the
+ * kernel's has_capability() returns 1 for this case.
+ */
+int cap_task_capable(struct task_struct *tsk, const struct cred *cred, int cap,
+		     int audit)
+{
+	return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
 }
 
 /**
@@ -160,7 +176,7 @@ static inline int cap_inh_is_capped(void)
 	/* they are so limited unless the current task has the CAP_SETPCAP
 	 * capability
 	 */
-	if (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0)
+	if (cap_capable(CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0)
 		return 0;
 #endif
 	return 1;
@@ -869,7 +885,7 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 		     & (new->securebits ^ arg2))			/*[1]*/
 		    || ((new->securebits & SECURE_ALL_LOCKS & ~arg2))	/*[2]*/
 		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))	/*[3]*/
-		    || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0) /*[4]*/
+		    || (cap_capable(CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0) /*[4]*/
 			/*
 			 * [1] no changing of bits that are locked
 			 * [2] no unlocking of locks
@@ -950,7 +966,7 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages)
 {
 	int cap_sys_admin = 0;
 
-	if (cap_capable(current, CAP_SYS_ADMIN, SECURITY_CAP_NOAUDIT) == 0)
+	if (cap_capable(CAP_SYS_ADMIN, SECURITY_CAP_NOAUDIT) == 0)
 		cap_sys_admin = 1;
 	return __vm_enough_memory(mm, pages, cap_sys_admin);
 }
diff --git a/security/root_plug.c b/security/root_plug.c
index 40fb4f15e27b..559578f8ac66 100644
--- a/security/root_plug.c
+++ b/security/root_plug.c
@@ -77,6 +77,7 @@ static struct security_operations rootplug_security_ops = {
 	.capget =			cap_capget,
 	.capset =			cap_capset,
 	.capable =			cap_capable,
+	.task_capable =			cap_task_capable,
 
 	.bprm_set_creds =		cap_bprm_set_creds,
 
diff --git a/security/security.c b/security/security.c
index d85dbb37c972..9bbc8e57b8c6 100644
--- a/security/security.c
+++ b/security/security.c
@@ -154,14 +154,31 @@ int security_capset(struct cred *new, const struct cred *old,
 				    effective, inheritable, permitted);
 }
 
-int security_capable(struct task_struct *tsk, int cap)
+int security_capable(int cap)
 {
-	return security_ops->capable(tsk, cap, SECURITY_CAP_AUDIT);
+	return security_ops->capable(cap, SECURITY_CAP_AUDIT);
 }
 
-int security_capable_noaudit(struct task_struct *tsk, int cap)
+int security_task_capable(struct task_struct *tsk, int cap)
 {
-	return security_ops->capable(tsk, cap, SECURITY_CAP_NOAUDIT);
+	const struct cred *cred;
+	int ret;
+
+	cred = get_task_cred(tsk);
+	ret = security_ops->task_capable(tsk, cred, cap, SECURITY_CAP_AUDIT);
+	put_cred(cred);
+	return ret;
+}
+
+int security_task_capable_noaudit(struct task_struct *tsk, int cap)
+{
+	const struct cred *cred;
+	int ret;
+
+	cred = get_task_cred(tsk);
+	ret = security_ops->task_capable(tsk, cred, cap, SECURITY_CAP_NOAUDIT);
+	put_cred(cred);
+	return ret;
 }
 
 int security_acct(struct file *file)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index df30a7555d8a..eb6c45107a05 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1433,12 +1433,13 @@ static int current_has_perm(const struct task_struct *tsk,
 
 /* Check whether a task is allowed to use a capability. */
 static int task_has_capability(struct task_struct *tsk,
+			       const struct cred *cred,
 			       int cap, int audit)
 {
 	struct avc_audit_data ad;
 	struct av_decision avd;
 	u16 sclass;
-	u32 sid = task_sid(tsk);
+	u32 sid = cred_sid(cred);
 	u32 av = CAP_TO_MASK(cap);
 	int rc;
 
@@ -1865,15 +1866,27 @@ static int selinux_capset(struct cred *new, const struct cred *old,
 	return cred_has_perm(old, new, PROCESS__SETCAP);
 }
 
-static int selinux_capable(struct task_struct *tsk, int cap, int audit)
+static int selinux_capable(int cap, int audit)
+{
+	int rc;
+
+	rc = secondary_ops->capable(cap, audit);
+	if (rc)
+		return rc;
+
+	return task_has_capability(current, current_cred(), cap, audit);
+}
+
+static int selinux_task_capable(struct task_struct *tsk,
+				const struct cred *cred, int cap, int audit)
 {
 	int rc;
 
-	rc = secondary_ops->capable(tsk, cap, audit);
+	rc = secondary_ops->task_capable(tsk, cred, cap, audit);
 	if (rc)
 		return rc;
 
-	return task_has_capability(tsk, cap, audit);
+	return task_has_capability(tsk, cred, cap, audit);
 }
 
 static int selinux_sysctl_get_sid(ctl_table *table, u16 tclass, u32 *sid)
@@ -2037,7 +2050,7 @@ static int selinux_vm_enough_memory(struct mm_struct *mm, long pages)
 {
 	int rc, cap_sys_admin = 0;
 
-	rc = selinux_capable(current, CAP_SYS_ADMIN, SECURITY_CAP_NOAUDIT);
+	rc = selinux_capable(CAP_SYS_ADMIN, SECURITY_CAP_NOAUDIT);
 	if (rc == 0)
 		cap_sys_admin = 1;
 
@@ -2880,7 +2893,7 @@ static int selinux_inode_getsecurity(const struct inode *inode, const char *name
 	 * and lack of permission just means that we fall back to the
 	 * in-core context value, not a denial.
 	 */
-	error = selinux_capable(current, CAP_MAC_ADMIN, SECURITY_CAP_NOAUDIT);
+	error = selinux_capable(CAP_MAC_ADMIN, SECURITY_CAP_NOAUDIT);
 	if (!error)
 		error = security_sid_to_context_force(isec->sid, &context,
 						      &size);
@@ -5568,6 +5581,7 @@ static struct security_operations selinux_ops = {
 	.capset =			selinux_capset,
 	.sysctl =			selinux_sysctl,
 	.capable =			selinux_capable,
+	.task_capable =			selinux_task_capable,
 	.quotactl =			selinux_quotactl,
 	.quota_on =			selinux_quota_on,
 	.syslog =			selinux_syslog,
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 6bfaba6177c2..7f12cc7015b6 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2827,6 +2827,7 @@ struct security_operations smack_ops = {
 	.capget = 			cap_capget,
 	.capset = 			cap_capset,
 	.capable = 			cap_capable,
+	.task_capable = 		cap_task_capable,
 	.syslog = 			smack_syslog,
 	.settime = 			cap_settime,
 	.vm_enough_memory = 		cap_vm_enough_memory,
-- 
cgit v1.2.3


From e9079cce201784632aed4b1a3121ee38c1ced0b6 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 14 Oct 2008 14:43:29 +0100
Subject: GFS2: Support for FIEMAP ioctl

This patch implements the FIEMAP ioctl for GFS2. We can use the generic
code (aside from a lock order issue, solved as per Ted Tso's suggestion)
for which I've introduced a new variant of the generic function. We also
have one exception to deal with, namely stuffed files, so we do that
"by hand", setting all the required flags.

This has been tested with a modified (I could only find an old version) of
Eric's test program, and appears to work correctly.

This patch does not currently support FIEMAP of xattrs, but the plan is to add
that feature at some future point.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Theodore Tso <tytso@mit.edu>
Cc: Eric Sandeen <sandeen@redhat.com>
---
 fs/gfs2/ops_inode.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/ioctl.c          | 44 ++++++++++++++++++++++++++++++++++----------
 include/linux/fs.h  |  3 +++
 3 files changed, 83 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index d232991b9046..1e24b65e1d23 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -19,6 +19,7 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
 #include <linux/lm_interface.h>
+#include <linux/fiemap.h>
 #include <asm/uaccess.h>
 
 #include "gfs2.h"
@@ -1212,6 +1213,48 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
 	return gfs2_ea_remove(GFS2_I(dentry->d_inode), &er);
 }
 
+static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+		       u64 start, u64 len)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder gh;
+	int ret;
+
+	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+	if (ret)
+		return ret;
+
+	mutex_lock(&inode->i_mutex);
+
+	ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+	if (ret)
+		goto out;
+
+	if (gfs2_is_stuffed(ip)) {
+		u64 phys = ip->i_no_addr << inode->i_blkbits;
+		u64 size = i_size_read(inode);
+		u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
+			    FIEMAP_EXTENT_DATA_INLINE;
+		phys += sizeof(struct gfs2_dinode);
+		phys += start;
+		if (start + len > size)
+			len = size - start;
+		if (start < size)
+			ret = fiemap_fill_next_extent(fieinfo, start, phys,
+						      len, flags);
+		if (ret == 1)
+			ret = 0;
+	} else {
+		ret = __generic_block_fiemap(inode, fieinfo, start, len,
+					     gfs2_block_map);
+	}
+
+	gfs2_glock_dq_uninit(&gh);
+out:
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
+
 const struct inode_operations gfs2_file_iops = {
 	.permission = gfs2_permission,
 	.setattr = gfs2_setattr,
@@ -1220,6 +1263,7 @@ const struct inode_operations gfs2_file_iops = {
 	.getxattr = gfs2_getxattr,
 	.listxattr = gfs2_listxattr,
 	.removexattr = gfs2_removexattr,
+	.fiemap = gfs2_fiemap,
 };
 
 const struct inode_operations gfs2_dir_iops = {
@@ -1239,6 +1283,7 @@ const struct inode_operations gfs2_dir_iops = {
 	.getxattr = gfs2_getxattr,
 	.listxattr = gfs2_listxattr,
 	.removexattr = gfs2_removexattr,
+	.fiemap = gfs2_fiemap,
 };
 
 const struct inode_operations gfs2_symlink_iops = {
@@ -1251,5 +1296,6 @@ const struct inode_operations gfs2_symlink_iops = {
 	.getxattr = gfs2_getxattr,
 	.listxattr = gfs2_listxattr,
 	.removexattr = gfs2_removexattr,
+	.fiemap = gfs2_fiemap,
 };
 
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 43e8b2c0664b..cc3f1aa1cf7b 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -231,7 +231,8 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
 #define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits)
 #define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits);
 
-/*
+/**
+ * __generic_block_fiemap - FIEMAP for block based inodes (no locking)
  * @inode - the inode to map
  * @arg - the pointer to userspace where we copy everything to
  * @get_block - the fs's get_block function
@@ -242,11 +243,15 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
  *
  * If it is possible to have data blocks beyond a hole past @inode->i_size, then
  * please do not use this function, it will stop at the first unmapped block
- * beyond i_size
+ * beyond i_size.
+ *
+ * If you use this function directly, you need to do your own locking. Use
+ * generic_block_fiemap if you want the locking done for you.
  */
-int generic_block_fiemap(struct inode *inode,
-			 struct fiemap_extent_info *fieinfo, u64 start,
-			 u64 len, get_block_t *get_block)
+
+int __generic_block_fiemap(struct inode *inode,
+			   struct fiemap_extent_info *fieinfo, u64 start,
+			   u64 len, get_block_t *get_block)
 {
 	struct buffer_head tmp;
 	unsigned int start_blk;
@@ -260,9 +265,6 @@ int generic_block_fiemap(struct inode *inode,
 
 	start_blk = logical_to_blk(inode, start);
 
-	/* guard against change */
-	mutex_lock(&inode->i_mutex);
-
 	length = (long long)min_t(u64, len, i_size_read(inode));
 	map_len = length;
 
@@ -334,14 +336,36 @@ int generic_block_fiemap(struct inode *inode,
 		cond_resched();
 	} while (1);
 
-	mutex_unlock(&inode->i_mutex);
-
 	/* if ret is 1 then we just hit the end of the extent array */
 	if (ret == 1)
 		ret = 0;
 
 	return ret;
 }
+EXPORT_SYMBOL(__generic_block_fiemap);
+
+/**
+ * generic_block_fiemap - FIEMAP for block based inodes
+ * @inode: The inode to map
+ * @fieinfo: The mapping information
+ * @start: The initial block to map
+ * @len: The length of the extect to attempt to map
+ * @get_block: The block mapping function for the fs
+ *
+ * Calls __generic_block_fiemap to map the inode, after taking
+ * the inode's mutex lock.
+ */
+
+int generic_block_fiemap(struct inode *inode,
+			 struct fiemap_extent_info *fieinfo, u64 start,
+			 u64 len, get_block_t *get_block)
+{
+	int ret;
+	mutex_lock(&inode->i_mutex);
+	ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
 EXPORT_SYMBOL(generic_block_fiemap);
 
 #endif  /*  CONFIG_BLOCK  */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f2a3010140e3..e34bc6925fdf 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2059,6 +2059,9 @@ extern int vfs_fstat(unsigned int, struct kstat *);
 
 extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
 		    unsigned long arg);
+extern int __generic_block_fiemap(struct inode *inode,
+				  struct fiemap_extent_info *fieinfo, u64 start,
+				  u64 len, get_block_t *get_block);
 extern int generic_block_fiemap(struct inode *inode,
 				struct fiemap_extent_info *fieinfo, u64 start,
 				u64 len, get_block_t *get_block);
-- 
cgit v1.2.3


From ea7d3fef4222cd98556a0b386598268d4dbf6670 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 4 Jan 2009 13:03:02 -0800
Subject: rcu: eliminate synchronize_rcu_xxx macro

Impact: cleanup

Expand macro into two files.

The synchronize_rcu_xxx macro is quite ugly and it's only used by two
callers, so expand it instead.  This makes this code easier to change.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 12 ------------
 kernel/rcupdate.c        | 11 +++++++++--
 kernel/rcupreempt.c      | 11 ++++++++++-
 3 files changed, 19 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 1168fbcea8d4..921340a7b71c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -204,18 +204,6 @@ struct rcu_synchronize {
 
 extern void wakeme_after_rcu(struct rcu_head  *head);
 
-#define synchronize_rcu_xxx(name, func) \
-void name(void) \
-{ \
-	struct rcu_synchronize rcu; \
-	\
-	init_completion(&rcu.completion); \
-	/* Will wake me after RCU finished. */ \
-	func(&rcu.head, wakeme_after_rcu); \
-	/* Wait for it. */ \
-	wait_for_completion(&rcu.completion); \
-}
-
 /**
  * synchronize_sched - block until all CPUs have exited any non-preemptive
  * kernel code sequences.
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index ad63af8b2521..d92a76a881aa 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -77,8 +77,15 @@ void wakeme_after_rcu(struct rcu_head  *head)
  * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
  * and may be nested.
  */
-void synchronize_rcu(void);	/* Makes kernel-doc tools happy */
-synchronize_rcu_xxx(synchronize_rcu, call_rcu)
+void synchronize_rcu(void)
+{
+	struct rcu_synchronize rcu;
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+}
 EXPORT_SYMBOL_GPL(synchronize_rcu);
 
 static void rcu_barrier_callback(struct rcu_head *notused)
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index f9dc8f3720f6..33cfc50781f9 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -1177,7 +1177,16 @@ EXPORT_SYMBOL_GPL(call_rcu_sched);
  * in -rt this does -not- necessarily result in all currently executing
  * interrupt -handlers- having completed.
  */
-synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched)
+void __synchronize_sched(void)
+{
+	struct rcu_synchronize rcu;
+
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu_sched(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+}
 EXPORT_SYMBOL_GPL(__synchronize_sched);
 
 /*
-- 
cgit v1.2.3


From a6037b61c2f5fc99c57c15b26d7cfa58bbb34008 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 5 Jan 2009 11:28:22 +0100
Subject: hrtimer: fix recursion deadlock by re-introducing the softirq

Impact: fix rare runtime deadlock

There are a few sites that do:

  spin_lock_irq(&foo)
  hrtimer_start(&bar)
    __run_hrtimer(&bar)
      func()
        spin_lock(&foo)

which obviously deadlocks. In order to avoid this, never call __run_hrtimer()
from hrtimer_start*() context, but instead defer this to softirq context.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/interrupt.h |  3 ++-
 kernel/hrtimer.c          | 60 +++++++++++++++++++++--------------------------
 2 files changed, 29 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 0702c4d7bdf0..2062833f5f7a 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -253,7 +253,8 @@ enum
 	BLOCK_SOFTIRQ,
 	TASKLET_SOFTIRQ,
 	SCHED_SOFTIRQ,
-	RCU_SOFTIRQ, 	/* Preferable RCU should always be the last softirq */
+	HRTIMER_SOFTIRQ,
+	RCU_SOFTIRQ,	/* Preferable RCU should always be the last softirq */
 
 	NR_SOFTIRQS
 };
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 8010a67cead0..b68e98f4e4c1 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -634,7 +634,6 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
 {
 }
 
-static void __run_hrtimer(struct hrtimer *timer);
 
 /*
  * When High resolution timers are active, try to reprogram. Note, that in case
@@ -646,13 +645,9 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 					    struct hrtimer_clock_base *base)
 {
 	if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
-		/*
-		 * XXX: recursion check?
-		 * hrtimer_forward() should round up with timer granularity
-		 * so that we never get into inf recursion here,
-		 * it doesn't do that though
-		 */
-		__run_hrtimer(timer);
+		spin_unlock(&base->cpu_base->lock);
+		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+		spin_lock(&base->cpu_base->lock);
 		return 1;
 	}
 	return 0;
@@ -705,11 +700,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
 }
 static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
 static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
-static inline int hrtimer_reprogram(struct hrtimer *timer,
-				    struct hrtimer_clock_base *base)
-{
-	return 0;
-}
 
 #endif /* CONFIG_HIGH_RES_TIMERS */
 
@@ -780,9 +770,11 @@ EXPORT_SYMBOL_GPL(hrtimer_forward);
  *
  * The timer is inserted in expiry order. Insertion into the
  * red black tree is O(log(n)). Must hold the base lock.
+ *
+ * Returns 1 when the new timer is the leftmost timer in the tree.
  */
-static void enqueue_hrtimer(struct hrtimer *timer,
-			    struct hrtimer_clock_base *base, int reprogram)
+static int enqueue_hrtimer(struct hrtimer *timer,
+			   struct hrtimer_clock_base *base)
 {
 	struct rb_node **link = &base->active.rb_node;
 	struct rb_node *parent = NULL;
@@ -814,20 +806,8 @@ static void enqueue_hrtimer(struct hrtimer *timer,
 	 * Insert the timer to the rbtree and check whether it
 	 * replaces the first pending timer
 	 */
-	if (leftmost) {
-		/*
-		 * Reprogram the clock event device. When the timer is already
-		 * expired hrtimer_enqueue_reprogram has either called the
-		 * callback or added it to the pending list and raised the
-		 * softirq.
-		 *
-		 * This is a NOP for !HIGHRES
-		 */
-		if (reprogram && hrtimer_enqueue_reprogram(timer, base))
-			return;
-
+	if (leftmost)
 		base->first = &timer->node;
-	}
 
 	rb_link_node(&timer->node, parent, link);
 	rb_insert_color(&timer->node, &base->active);
@@ -836,6 +816,8 @@ static void enqueue_hrtimer(struct hrtimer *timer,
 	 * state of a possibly running callback.
 	 */
 	timer->state |= HRTIMER_STATE_ENQUEUED;
+
+	return leftmost;
 }
 
 /*
@@ -912,7 +894,7 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n
 {
 	struct hrtimer_clock_base *base, *new_base;
 	unsigned long flags;
-	int ret;
+	int ret, leftmost;
 
 	base = lock_hrtimer_base(timer, &flags);
 
@@ -940,12 +922,16 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n
 
 	timer_stats_hrtimer_set_start_info(timer);
 
+	leftmost = enqueue_hrtimer(timer, new_base);
+
 	/*
 	 * Only allow reprogramming if the new base is on this CPU.
 	 * (it might still be on another CPU if the timer was pending)
+	 *
+	 * XXX send_remote_softirq() ?
 	 */
-	enqueue_hrtimer(timer, new_base,
-			new_base->cpu_base == &__get_cpu_var(hrtimer_bases));
+	if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases))
+		hrtimer_enqueue_reprogram(timer, new_base);
 
 	unlock_hrtimer_base(timer, &flags);
 
@@ -1163,7 +1149,7 @@ static void __run_hrtimer(struct hrtimer *timer)
 	 */
 	if (restart != HRTIMER_NORESTART) {
 		BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
-		enqueue_hrtimer(timer, base, 0);
+		enqueue_hrtimer(timer, base);
 	}
 	timer->state &= ~HRTIMER_STATE_CALLBACK;
 }
@@ -1277,6 +1263,11 @@ void hrtimer_peek_ahead_timers(void)
 	local_irq_restore(flags);
 }
 
+static void run_hrtimer_softirq(struct softirq_action *h)
+{
+	hrtimer_peek_ahead_timers();
+}
+
 #endif	/* CONFIG_HIGH_RES_TIMERS */
 
 /*
@@ -1532,7 +1523,7 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 		 * is done, which will run all expired timers and re-programm
 		 * the timer device.
 		 */
-		enqueue_hrtimer(timer, new_base, 0);
+		enqueue_hrtimer(timer, new_base);
 
 		/* Clear the migration state bit */
 		timer->state &= ~HRTIMER_STATE_MIGRATE;
@@ -1610,6 +1601,9 @@ void __init hrtimers_init(void)
 	hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
 			  (void *)(long)smp_processor_id());
 	register_cpu_notifier(&hrtimers_nb);
+#ifdef CONFIG_HIGH_RES_TIMERS
+	open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
+#endif
 }
 
 /**
-- 
cgit v1.2.3


From c70f22d203fc02c805b6ed4a3483b740dc36786b Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 5 Jan 2009 19:07:50 +0800
Subject: sched: clean up arch_reinit_sched_domains()

- Make arch_reinit_sched_domains() static. It was exported to be used in
  s390, but now rebuild_sched_domains() is used instead.

- Make it return void.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 1 -
 kernel/sched.c        | 9 +++------
 2 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 38a3f4b15394..91207df702e8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -912,7 +912,6 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
 
 extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 				    struct sched_domain_attr *dattr_new);
-extern int arch_reinit_sched_domains(void);
 
 /* Test a flag in parent sched domain */
 static inline int test_sd_parent(struct sched_domain *sd, int flag)
diff --git a/kernel/sched.c b/kernel/sched.c
index 9a8e296959c1..c5019a5dcaa4 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7987,7 +7987,7 @@ match2:
 }
 
 #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-int arch_reinit_sched_domains(void)
+static void arch_reinit_sched_domains(void)
 {
 	get_online_cpus();
 
@@ -7996,13 +7996,10 @@ int arch_reinit_sched_domains(void)
 
 	rebuild_sched_domains();
 	put_online_cpus();
-
-	return 0;
 }
 
 static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
 {
-	int ret;
 	unsigned int level = 0;
 
 	if (sscanf(buf, "%u", &level) != 1)
@@ -8023,9 +8020,9 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
 	else
 		sched_mc_power_savings = level;
 
-	ret = arch_reinit_sched_domains();
+	arch_reinit_sched_domains();
 
-	return ret ? ret : count;
+	return count;
 }
 
 #ifdef CONFIG_SCHED_MC
-- 
cgit v1.2.3


From be92d7af38fb8a91f8575ab2272e00f2e51667ff Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 5 Jan 2009 14:34:42 +0100
Subject: genirq: provide irq_to_desc() to non-genirq architectures too

Impact: build fix on non-genirq architectures

Sam Ravnborg reported this build failure on sparc32 allmodconfig,
the GPIO drivers assume the presence of irq_to_desc():

 drivers/gpio/gpiolib.c: In function `gpiolib_dbg_show':
 drivers/gpio/gpiolib.c:1146: error: implicit declaration of function 'irq_to_desc'

Add it in the !genirq case too.

Reported-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Tested-by: Sam Ravnborg <sam@ravnborg.org>
---
 include/linux/irqnr.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 5504a5c97836..86af92e9e84c 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -8,7 +8,12 @@
 
 #ifndef CONFIG_GENERIC_HARDIRQS
 #include <asm/irq.h>
-# define nr_irqs		NR_IRQS
+
+/*
+ * Wrappers for non-genirq architectures:
+ */
+#define nr_irqs			NR_IRQS
+#define irq_to_desc(irq)	(&irq_desc[irq])
 
 # define for_each_irq_desc(irq, desc)		\
 	for (irq = 0; irq < nr_irqs; irq++)
-- 
cgit v1.2.3


From c42aa775cc8a8ca558db0cc75979fb8e16667447 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Thu, 20 Nov 2008 15:59:12 +0100
Subject: atmel-mci: move atmel-mci.h file to include/linux

Needed to use the atmel-mci driver in an architecture
independant maner.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
---
 arch/avr32/boards/atngw100/setup.c      |  2 +-
 arch/avr32/boards/atstk1000/atstk1002.c |  2 +-
 arch/avr32/boards/atstk1000/atstk1003.c |  2 +-
 arch/avr32/boards/atstk1000/atstk1004.c |  2 +-
 arch/avr32/boards/mimc200/setup.c       |  2 +-
 arch/avr32/include/asm/atmel-mci.h      | 39 ---------------------------------
 arch/avr32/mach-at32ap/at32ap700x.c     |  2 +-
 drivers/mmc/host/atmel-mci.c            |  2 +-
 include/linux/atmel-mci.h               | 39 +++++++++++++++++++++++++++++++++
 9 files changed, 46 insertions(+), 46 deletions(-)
 delete mode 100644 arch/avr32/include/asm/atmel-mci.h
 create mode 100644 include/linux/atmel-mci.h

(limited to 'include/linux')

diff --git a/arch/avr32/boards/atngw100/setup.c b/arch/avr32/boards/atngw100/setup.c
index 32fb9ba0fbdf..05d3722fff18 100644
--- a/arch/avr32/boards/atngw100/setup.c
+++ b/arch/avr32/boards/atngw100/setup.c
@@ -19,8 +19,8 @@
 #include <linux/types.h>
 #include <linux/leds.h>
 #include <linux/spi/spi.h>
+#include <linux/atmel-mci.h>
 
-#include <asm/atmel-mci.h>
 #include <asm/io.h>
 #include <asm/setup.h>
 
diff --git a/arch/avr32/boards/atstk1000/atstk1002.c b/arch/avr32/boards/atstk1000/atstk1002.c
index 5c5cdf3b464f..6d94f74bc5c7 100644
--- a/arch/avr32/boards/atstk1000/atstk1002.c
+++ b/arch/avr32/boards/atstk1000/atstk1002.c
@@ -16,12 +16,12 @@
 #include <linux/types.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/at73c213.h>
+#include <linux/atmel-mci.h>
 
 #include <video/atmel_lcdc.h>
 
 #include <asm/io.h>
 #include <asm/setup.h>
-#include <asm/atmel-mci.h>
 
 #include <mach/at32ap700x.h>
 #include <mach/board.h>
diff --git a/arch/avr32/boards/atstk1000/atstk1003.c b/arch/avr32/boards/atstk1000/atstk1003.c
index 134b566630b0..db2038cd2e5b 100644
--- a/arch/avr32/boards/atstk1000/atstk1003.c
+++ b/arch/avr32/boards/atstk1000/atstk1003.c
@@ -17,9 +17,9 @@
 
 #include <linux/spi/at73c213.h>
 #include <linux/spi/spi.h>
+#include <linux/atmel-mci.h>
 
 #include <asm/setup.h>
-#include <asm/atmel-mci.h>
 
 #include <mach/at32ap700x.h>
 #include <mach/board.h>
diff --git a/arch/avr32/boards/atstk1000/atstk1004.c b/arch/avr32/boards/atstk1000/atstk1004.c
index cb32eb844aa7..29b35aca96cd 100644
--- a/arch/avr32/boards/atstk1000/atstk1004.c
+++ b/arch/avr32/boards/atstk1000/atstk1004.c
@@ -17,11 +17,11 @@
 
 #include <linux/spi/at73c213.h>
 #include <linux/spi/spi.h>
+#include <linux/atmel-mci.h>
 
 #include <video/atmel_lcdc.h>
 
 #include <asm/setup.h>
-#include <asm/atmel-mci.h>
 
 #include <mach/at32ap700x.h>
 #include <mach/board.h>
diff --git a/arch/avr32/boards/mimc200/setup.c b/arch/avr32/boards/mimc200/setup.c
index 397cbb8f44c8..811d73027f77 100644
--- a/arch/avr32/boards/mimc200/setup.c
+++ b/arch/avr32/boards/mimc200/setup.c
@@ -24,7 +24,7 @@ extern struct atmel_lcdfb_info mimc200_lcdc_data;
 #include <video/atmel_lcdc.h>
 #include <linux/fb.h>
 
-#include <asm/atmel-mci.h>
+#include <linux/atmel-mci.h>
 #include <linux/io.h>
 #include <asm/setup.h>
 
diff --git a/arch/avr32/include/asm/atmel-mci.h b/arch/avr32/include/asm/atmel-mci.h
deleted file mode 100644
index 59f3fadd0b68..000000000000
--- a/arch/avr32/include/asm/atmel-mci.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef __ASM_AVR32_ATMEL_MCI_H
-#define __ASM_AVR32_ATMEL_MCI_H
-
-#define ATMEL_MCI_MAX_NR_SLOTS	2
-
-struct dma_slave;
-
-/**
- * struct mci_slot_pdata - board-specific per-slot configuration
- * @bus_width: Number of data lines wired up the slot
- * @detect_pin: GPIO pin wired to the card detect switch
- * @wp_pin: GPIO pin wired to the write protect sensor
- *
- * If a given slot is not present on the board, @bus_width should be
- * set to 0. The other fields are ignored in this case.
- *
- * Any pins that aren't available should be set to a negative value.
- *
- * Note that support for multiple slots is experimental -- some cards
- * might get upset if we don't get the clock management exactly right.
- * But in most cases, it should work just fine.
- */
-struct mci_slot_pdata {
-	unsigned int		bus_width;
-	int			detect_pin;
-	int			wp_pin;
-};
-
-/**
- * struct mci_platform_data - board-specific MMC/SDcard configuration
- * @dma_slave: DMA slave interface to use in data transfers, or NULL.
- * @slot: Per-slot configuration data.
- */
-struct mci_platform_data {
-	struct dma_slave	*dma_slave;
-	struct mci_slot_pdata	slot[ATMEL_MCI_MAX_NR_SLOTS];
-};
-
-#endif /* __ASM_AVR32_ATMEL_MCI_H */
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index 066252eebf61..3cf49553e4f9 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -15,8 +15,8 @@
 #include <linux/gpio.h>
 #include <linux/spi/spi.h>
 #include <linux/usb/atmel_usba_udc.h>
+#include <linux/atmel-mci.h>
 
-#include <asm/atmel-mci.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 7a3f2436b011..1e97916914ad 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -25,8 +25,8 @@
 #include <linux/stat.h>
 
 #include <linux/mmc/host.h>
+#include <linux/atmel-mci.h>
 
-#include <asm/atmel-mci.h>
 #include <asm/io.h>
 #include <asm/unaligned.h>
 
diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h
new file mode 100644
index 000000000000..2a2213eefd85
--- /dev/null
+++ b/include/linux/atmel-mci.h
@@ -0,0 +1,39 @@
+#ifndef __LINUX_ATMEL_MCI_H
+#define __LINUX_ATMEL_MCI_H
+
+#define ATMEL_MCI_MAX_NR_SLOTS	2
+
+struct dma_slave;
+
+/**
+ * struct mci_slot_pdata - board-specific per-slot configuration
+ * @bus_width: Number of data lines wired up the slot
+ * @detect_pin: GPIO pin wired to the card detect switch
+ * @wp_pin: GPIO pin wired to the write protect sensor
+ *
+ * If a given slot is not present on the board, @bus_width should be
+ * set to 0. The other fields are ignored in this case.
+ *
+ * Any pins that aren't available should be set to a negative value.
+ *
+ * Note that support for multiple slots is experimental -- some cards
+ * might get upset if we don't get the clock management exactly right.
+ * But in most cases, it should work just fine.
+ */
+struct mci_slot_pdata {
+	unsigned int		bus_width;
+	int			detect_pin;
+	int			wp_pin;
+};
+
+/**
+ * struct mci_platform_data - board-specific MMC/SDcard configuration
+ * @dma_slave: DMA slave interface to use in data transfers, or NULL.
+ * @slot: Per-slot configuration data.
+ */
+struct mci_platform_data {
+	struct dma_slave	*dma_slave;
+	struct mci_slot_pdata	slot[ATMEL_MCI_MAX_NR_SLOTS];
+};
+
+#endif /* __LINUX_ATMEL_MCI_H */
-- 
cgit v1.2.3


From 74f783af95c982aef6d3a1415275650dcf511666 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 19 Aug 2008 14:51:22 +0200
Subject: quota: Add callbacks for allocating and destroying dquot structures

Some filesystems would like to keep private information together with each
dquot. Add callbacks alloc_dquot and destroy_dquot allowing filesystem to
allocate larger dquots from their private slab in a similar fashion we
currently allocate inodes.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/dquot.c            | 27 ++++++++++++++++++++++-----
 include/linux/quota.h |  2 ++
 2 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index c237ccc8581c..1b5fc4b7fbeb 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -415,6 +415,16 @@ out_dqlock:
 	return ret;
 }
 
+static void dquot_destroy(struct dquot *dquot)
+{
+	kmem_cache_free(dquot_cachep, dquot);
+}
+
+static inline void do_destroy_dquot(struct dquot *dquot)
+{
+	dquot->dq_sb->dq_op->destroy_dquot(dquot);
+}
+
 /* Invalidate all dquots on the list. Note that this function is called after
  * quota is disabled and pointers from inodes removed so there cannot be new
  * quota users. There can still be some users of quotas due to inodes being
@@ -463,7 +473,7 @@ restart:
 		remove_dquot_hash(dquot);
 		remove_free_dquot(dquot);
 		remove_inuse(dquot);
-		kmem_cache_free(dquot_cachep, dquot);
+		do_destroy_dquot(dquot);
 	}
 	spin_unlock(&dq_list_lock);
 }
@@ -527,7 +537,7 @@ static void prune_dqcache(int count)
 		remove_dquot_hash(dquot);
 		remove_free_dquot(dquot);
 		remove_inuse(dquot);
-		kmem_cache_free(dquot_cachep, dquot);
+		do_destroy_dquot(dquot);
 		count--;
 		head = free_dquots.prev;
 	}
@@ -625,11 +635,16 @@ we_slept:
 	spin_unlock(&dq_list_lock);
 }
 
+static struct dquot *dquot_alloc(struct super_block *sb, int type)
+{
+	return kmem_cache_zalloc(dquot_cachep, GFP_NOFS);
+}
+
 static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 {
 	struct dquot *dquot;
 
-	dquot = kmem_cache_zalloc(dquot_cachep, GFP_NOFS);
+	dquot = sb->dq_op->alloc_dquot(sb, type);
 	if(!dquot)
 		return NODQUOT;
 
@@ -682,7 +697,7 @@ we_slept:
 		dqstats.lookups++;
 		spin_unlock(&dq_list_lock);
 		if (empty)
-			kmem_cache_free(dquot_cachep, empty);
+			do_destroy_dquot(empty);
 	}
 	/* Wait for dq_lock - after this we know that either dquot_release() is already
 	 * finished or it will be canceled due to dq_count > 1 test */
@@ -1533,7 +1548,9 @@ struct dquot_operations dquot_operations = {
 	.acquire_dquot	= dquot_acquire,
 	.release_dquot	= dquot_release,
 	.mark_dirty	= dquot_mark_dquot_dirty,
-	.write_info	= dquot_commit_info
+	.write_info	= dquot_commit_info,
+	.alloc_dquot	= dquot_alloc,
+	.destroy_dquot	= dquot_destroy,
 };
 
 static inline void set_enable_flags(struct quota_info *dqopt, int type)
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 40401b554484..3ce708c2cb3c 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -292,6 +292,8 @@ struct dquot_operations {
 	int (*free_inode) (const struct inode *, unsigned long);
 	int (*transfer) (struct inode *, struct iattr *);
 	int (*write_dquot) (struct dquot *);		/* Ordinary dquot write */
+	struct dquot *(*alloc_dquot)(struct super_block *, int);	/* Allocate memory for new dquot */
+	void (*destroy_dquot)(struct dquot *);		/* Free memory for dquot */
 	int (*acquire_dquot) (struct dquot *);		/* Quota is going to be created on disk */
 	int (*release_dquot) (struct dquot *);		/* Quota is going to be deleted from disk */
 	int (*mark_dirty) (struct dquot *);		/* Dquot is marked dirty */
-- 
cgit v1.2.3


From 12095460f7f315f8ef67a55b2194195d325d48d7 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 20 Aug 2008 14:45:12 +0200
Subject: quota: Increase size of variables for limits and inode usage

So far quota was fine with quota block limits and inode limits/numbers in
a 32-bit type. Now with rapid increase in storage sizes there are coming
requests to be able to handle quota limits above 4TB / more that 2^32 inodes.
So bump up sizes of types in mem_dqblk structure to 64-bits to be able to
handle this. Also update inode allocation / checking functions to use qsize_t
and make global structure keep quota limits in bytes so that things are
consistent.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/dquot.c               | 50 ++++++++++++++++++++++++++++--------------------
 fs/quota_v1.c            | 25 ++++++++++++++++++------
 fs/quota_v2.c            | 21 ++++++++++++++++----
 include/linux/quota.h    | 28 ++++++++++++---------------
 include/linux/quotaops.h |  4 ++--
 5 files changed, 79 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index 1b5fc4b7fbeb..c02223b6aeb2 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -835,7 +835,7 @@ static void drop_dquot_ref(struct super_block *sb, int type)
 	}
 }
 
-static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number)
+static inline void dquot_incr_inodes(struct dquot *dquot, qsize_t number)
 {
 	dquot->dq_dqb.dqb_curinodes += number;
 }
@@ -845,7 +845,7 @@ static inline void dquot_incr_space(struct dquot *dquot, qsize_t number)
 	dquot->dq_dqb.dqb_curspace += number;
 }
 
-static inline void dquot_decr_inodes(struct dquot *dquot, unsigned long number)
+static inline void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
 {
 	if (dquot->dq_dqb.dqb_curinodes > number)
 		dquot->dq_dqb.dqb_curinodes -= number;
@@ -862,7 +862,7 @@ static inline void dquot_decr_space(struct dquot *dquot, qsize_t number)
 		dquot->dq_dqb.dqb_curspace -= number;
 	else
 		dquot->dq_dqb.dqb_curspace = 0;
-	if (toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
+	if (dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit)
 		dquot->dq_dqb.dqb_btime = (time_t) 0;
 	clear_bit(DQ_BLKS_B, &dquot->dq_flags);
 }
@@ -1038,7 +1038,7 @@ static inline char ignore_hardlimit(struct dquot *dquot)
 }
 
 /* needs dq_data_lock */
-static int check_idq(struct dquot *dquot, ulong inodes, char *warntype)
+static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
 {
 	*warntype = QUOTA_NL_NOWARN;
 	if (inodes <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags))
@@ -1077,7 +1077,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 		return QUOTA_OK;
 
 	if (dquot->dq_dqb.dqb_bhardlimit &&
-	   toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bhardlimit &&
+	    dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bhardlimit &&
             !ignore_hardlimit(dquot)) {
 		if (!prealloc)
 			*warntype = QUOTA_NL_BHARDWARN;
@@ -1085,7 +1085,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 	}
 
 	if (dquot->dq_dqb.dqb_bsoftlimit &&
-	   toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bsoftlimit &&
+	    dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bsoftlimit &&
 	    dquot->dq_dqb.dqb_btime && get_seconds() >= dquot->dq_dqb.dqb_btime &&
             !ignore_hardlimit(dquot)) {
 		if (!prealloc)
@@ -1094,7 +1094,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 	}
 
 	if (dquot->dq_dqb.dqb_bsoftlimit &&
-	   toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bsoftlimit &&
+	    dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bsoftlimit &&
 	    dquot->dq_dqb.dqb_btime == 0) {
 		if (!prealloc) {
 			*warntype = QUOTA_NL_BSOFTWARN;
@@ -1111,7 +1111,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 	return QUOTA_OK;
 }
 
-static int info_idq_free(struct dquot *dquot, ulong inodes)
+static int info_idq_free(struct dquot *dquot, qsize_t inodes)
 {
 	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
 	    dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
@@ -1128,15 +1128,13 @@ static int info_idq_free(struct dquot *dquot, ulong inodes)
 static int info_bdq_free(struct dquot *dquot, qsize_t space)
 {
 	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
-	    toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
+	    dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit)
 		return QUOTA_NL_NOWARN;
 
-	if (toqb(dquot->dq_dqb.dqb_curspace - space) <=
-	    dquot->dq_dqb.dqb_bsoftlimit)
+	if (dquot->dq_dqb.dqb_curspace - space <= dquot->dq_dqb.dqb_bsoftlimit)
 		return QUOTA_NL_BSOFTBELOW;
-	if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit &&
-	    toqb(dquot->dq_dqb.dqb_curspace - space) <
-						dquot->dq_dqb.dqb_bhardlimit)
+	if (dquot->dq_dqb.dqb_curspace >= dquot->dq_dqb.dqb_bhardlimit &&
+	    dquot->dq_dqb.dqb_curspace - space < dquot->dq_dqb.dqb_bhardlimit)
 		return QUOTA_NL_BHARDBELOW;
 	return QUOTA_NL_NOWARN;
 }
@@ -1279,7 +1277,7 @@ warn_put_all:
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_alloc_inode(const struct inode *inode, unsigned long number)
+int dquot_alloc_inode(const struct inode *inode, qsize_t number)
 {
 	int cnt, ret = NO_QUOTA;
 	char warntype[MAXQUOTAS];
@@ -1364,7 +1362,7 @@ out_sub:
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_free_inode(const struct inode *inode, unsigned long number)
+int dquot_free_inode(const struct inode *inode, qsize_t number)
 {
 	unsigned int cnt;
 	char warntype[MAXQUOTAS];
@@ -1883,14 +1881,24 @@ int vfs_dq_quota_on_remount(struct super_block *sb)
 	return ret;
 }
 
+static inline qsize_t qbtos(qsize_t blocks)
+{
+	return blocks << QIF_DQBLKSIZE_BITS;
+}
+
+static inline qsize_t stoqb(qsize_t space)
+{
+	return (space + QIF_DQBLKSIZE - 1) >> QIF_DQBLKSIZE_BITS;
+}
+
 /* Generic routine for getting common part of quota structure */
 static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
 {
 	struct mem_dqblk *dm = &dquot->dq_dqb;
 
 	spin_lock(&dq_data_lock);
-	di->dqb_bhardlimit = dm->dqb_bhardlimit;
-	di->dqb_bsoftlimit = dm->dqb_bsoftlimit;
+	di->dqb_bhardlimit = stoqb(dm->dqb_bhardlimit);
+	di->dqb_bsoftlimit = stoqb(dm->dqb_bsoftlimit);
 	di->dqb_curspace = dm->dqb_curspace;
 	di->dqb_ihardlimit = dm->dqb_ihardlimit;
 	di->dqb_isoftlimit = dm->dqb_isoftlimit;
@@ -1937,8 +1945,8 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
 		check_blim = 1;
 	}
 	if (di->dqb_valid & QIF_BLIMITS) {
-		dm->dqb_bsoftlimit = di->dqb_bsoftlimit;
-		dm->dqb_bhardlimit = di->dqb_bhardlimit;
+		dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit);
+		dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit);
 		check_blim = 1;
 	}
 	if (di->dqb_valid & QIF_INODES) {
@@ -1956,7 +1964,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
 		dm->dqb_itime = di->dqb_itime;
 
 	if (check_blim) {
-		if (!dm->dqb_bsoftlimit || toqb(dm->dqb_curspace) < dm->dqb_bsoftlimit) {
+		if (!dm->dqb_bsoftlimit || dm->dqb_curspace < dm->dqb_bsoftlimit) {
 			dm->dqb_btime = 0;
 			clear_bit(DQ_BLKS_B, &dquot->dq_flags);
 		}
diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index 5ae15b13eeb0..3e078eee5644 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -14,14 +14,27 @@ MODULE_AUTHOR("Jan Kara");
 MODULE_DESCRIPTION("Old quota format support");
 MODULE_LICENSE("GPL");
 
+#define QUOTABLOCK_BITS 10
+#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
+
+static inline qsize_t v1_stoqb(qsize_t space)
+{
+	return (space + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS;
+}
+
+static inline qsize_t v1_qbtos(qsize_t blocks)
+{
+	return blocks << QUOTABLOCK_BITS;
+}
+
 static void v1_disk2mem_dqblk(struct mem_dqblk *m, struct v1_disk_dqblk *d)
 {
 	m->dqb_ihardlimit = d->dqb_ihardlimit;
 	m->dqb_isoftlimit = d->dqb_isoftlimit;
 	m->dqb_curinodes = d->dqb_curinodes;
-	m->dqb_bhardlimit = d->dqb_bhardlimit;
-	m->dqb_bsoftlimit = d->dqb_bsoftlimit;
-	m->dqb_curspace = ((qsize_t)d->dqb_curblocks) << QUOTABLOCK_BITS;
+	m->dqb_bhardlimit = v1_qbtos(d->dqb_bhardlimit);
+	m->dqb_bsoftlimit = v1_qbtos(d->dqb_bsoftlimit);
+	m->dqb_curspace = v1_qbtos(d->dqb_curblocks);
 	m->dqb_itime = d->dqb_itime;
 	m->dqb_btime = d->dqb_btime;
 }
@@ -31,9 +44,9 @@ static void v1_mem2disk_dqblk(struct v1_disk_dqblk *d, struct mem_dqblk *m)
 	d->dqb_ihardlimit = m->dqb_ihardlimit;
 	d->dqb_isoftlimit = m->dqb_isoftlimit;
 	d->dqb_curinodes = m->dqb_curinodes;
-	d->dqb_bhardlimit = m->dqb_bhardlimit;
-	d->dqb_bsoftlimit = m->dqb_bsoftlimit;
-	d->dqb_curblocks = toqb(m->dqb_curspace);
+	d->dqb_bhardlimit = v1_stoqb(m->dqb_bhardlimit);
+	d->dqb_bsoftlimit = v1_stoqb(m->dqb_bsoftlimit);
+	d->dqb_curblocks = v1_stoqb(m->dqb_curspace);
 	d->dqb_itime = m->dqb_itime;
 	d->dqb_btime = m->dqb_btime;
 }
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index b53827dc02d9..51c4717f7c6a 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -26,6 +26,19 @@ typedef char *dqbuf_t;
 #define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
 #define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
 
+#define QUOTABLOCK_BITS 10
+#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
+
+static inline qsize_t v2_stoqb(qsize_t space)
+{
+	return (space + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS;
+}
+
+static inline qsize_t v2_qbtos(qsize_t blocks)
+{
+	return blocks << QUOTABLOCK_BITS;
+}
+
 /* Check whether given file is really vfsv0 quotafile */
 static int v2_check_quota_file(struct super_block *sb, int type)
 {
@@ -104,8 +117,8 @@ static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
 	m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
 	m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
 	m->dqb_itime = le64_to_cpu(d->dqb_itime);
-	m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
-	m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
+	m->dqb_bhardlimit = v2_qbtos(le32_to_cpu(d->dqb_bhardlimit));
+	m->dqb_bsoftlimit = v2_qbtos(le32_to_cpu(d->dqb_bsoftlimit));
 	m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
 	m->dqb_btime = le64_to_cpu(d->dqb_btime);
 }
@@ -116,8 +129,8 @@ static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
 	d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
 	d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
 	d->dqb_itime = cpu_to_le64(m->dqb_itime);
-	d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit);
-	d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit);
+	d->dqb_bhardlimit = cpu_to_le32(v2_qbtos(m->dqb_bhardlimit));
+	d->dqb_bsoftlimit = cpu_to_le32(v2_qbtos(m->dqb_bsoftlimit));
 	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
 	d->dqb_btime = cpu_to_le64(m->dqb_btime);
 	d->dqb_id = cpu_to_le32(id);
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 3ce708c2cb3c..9ea468363f9f 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -39,15 +39,6 @@
 #define __DQUOT_VERSION__	"dquot_6.5.1"
 #define __DQUOT_NUM_VERSION__	6*10000+5*100+1
 
-/* Size of blocks in which are counted size limits */
-#define QUOTABLOCK_BITS 10
-#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
-
-/* Conversion routines from and to quota blocks */
-#define qb2kb(x) ((x) << (QUOTABLOCK_BITS-10))
-#define kb2qb(x) ((x) >> (QUOTABLOCK_BITS-10))
-#define toqb(x) (((x) + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS)
-
 #define MAXQUOTAS 2
 #define USRQUOTA  0		/* element used for user quotas */
 #define GRPQUOTA  1		/* element used for group quotas */
@@ -80,6 +71,11 @@
 #define Q_GETQUOTA 0x800007	/* get user quota structure */
 #define Q_SETQUOTA 0x800008	/* set user quota structure */
 
+/* Size of block in which space limits are passed through the quota
+ * interface */
+#define QIF_DQBLKSIZE_BITS 10
+#define QIF_DQBLKSIZE (1 << QIF_DQBLKSIZE_BITS)
+
 /*
  * Quota structure used for communication with userspace via quotactl
  * Following flags are used to specify which fields are valid
@@ -187,12 +183,12 @@ extern spinlock_t dq_data_lock;
  * Data for one user/group kept in memory
  */
 struct mem_dqblk {
-	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
-	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
+	qsize_t dqb_bhardlimit;	/* absolute limit on disk blks alloc */
+	qsize_t dqb_bsoftlimit;	/* preferred limit on disk blks */
 	qsize_t dqb_curspace;	/* current used space */
-	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
-	__u32 dqb_isoftlimit;	/* preferred inode limit */
-	__u32 dqb_curinodes;	/* current # allocated inodes */
+	qsize_t dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	qsize_t dqb_isoftlimit;	/* preferred inode limit */
+	qsize_t dqb_curinodes;	/* current # allocated inodes */
 	time_t dqb_btime;	/* time limit for excessive disk use */
 	time_t dqb_itime;	/* time limit for excessive inode use */
 };
@@ -287,9 +283,9 @@ struct dquot_operations {
 	int (*initialize) (struct inode *, int);
 	int (*drop) (struct inode *);
 	int (*alloc_space) (struct inode *, qsize_t, int);
-	int (*alloc_inode) (const struct inode *, unsigned long);
+	int (*alloc_inode) (const struct inode *, qsize_t);
 	int (*free_space) (struct inode *, qsize_t);
-	int (*free_inode) (const struct inode *, unsigned long);
+	int (*free_inode) (const struct inode *, qsize_t);
 	int (*transfer) (struct inode *, struct iattr *);
 	int (*write_dquot) (struct dquot *);		/* Ordinary dquot write */
 	struct dquot *(*alloc_dquot)(struct super_block *, int);	/* Allocate memory for new dquot */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index a558a4c1d35a..adcc7ba3accb 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -26,10 +26,10 @@ int dquot_initialize(struct inode *inode, int type);
 int dquot_drop(struct inode *inode);
 
 int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
-int dquot_alloc_inode(const struct inode *inode, unsigned long number);
+int dquot_alloc_inode(const struct inode *inode, qsize_t number);
 
 int dquot_free_space(struct inode *inode, qsize_t number);
-int dquot_free_inode(const struct inode *inode, unsigned long number);
+int dquot_free_inode(const struct inode *inode, qsize_t number);
 
 int dquot_transfer(struct inode *inode, struct iattr *iattr);
 int dquot_commit(struct dquot *dquot);
-- 
cgit v1.2.3


From e4bc7b4b7ff783779b6928d55a9308910bf180a3 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 20 Aug 2008 16:21:01 +0200
Subject: quota: Make _SUSPENDED just a flag

Upto now, DQUOT_USR_SUSPENDED behaved like a state - i.e., either quota
was enabled or suspended or none. Now allowed states are 0, ENABLED,
ENABLED | SUSPENDED. This will be useful later when we implement separate
enabling of quota usage tracking and limits enforcement because we need to
keep track of a state which has been suspended.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/dquot.c               | 10 ++++++----
 include/linux/quotaops.h |  6 ++++--
 2 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index c88330602ddd..22340c610e1a 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1570,18 +1570,20 @@ static inline void reset_enable_flags(struct quota_info *dqopt, int type,
 {
 	switch (type) {
 		case USRQUOTA:
-			dqopt->flags &= ~DQUOT_USR_ENABLED;
 			if (remount)
 				dqopt->flags |= DQUOT_USR_SUSPENDED;
-			else
+			else {
+				dqopt->flags &= ~DQUOT_USR_ENABLED;
 				dqopt->flags &= ~DQUOT_USR_SUSPENDED;
+			}
 			break;
 		case GRPQUOTA:
-			dqopt->flags &= ~DQUOT_GRP_ENABLED;
 			if (remount)
 				dqopt->flags |= DQUOT_GRP_SUSPENDED;
-			else
+			else {
+				dqopt->flags &= ~DQUOT_GRP_ENABLED;
 				dqopt->flags &= ~DQUOT_GRP_SUSPENDED;
+			}
 			break;
 	}
 }
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index adcc7ba3accb..ffd97071cd1e 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -67,8 +67,10 @@ static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
 static inline int sb_has_quota_enabled(struct super_block *sb, int type)
 {
 	if (type == USRQUOTA)
-		return sb_dqopt(sb)->flags & DQUOT_USR_ENABLED;
-	return sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED;
+		return (sb_dqopt(sb)->flags & DQUOT_USR_ENABLED)
+			&& !(sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED);
+	return (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED)
+		&& !(sb_dqopt(sb)->flags & DQUOT_GROUP_SUSPENDED);
 }
 
 static inline int sb_any_quota_enabled(struct super_block *sb)
-- 
cgit v1.2.3


From f55abc0fb9c3189de3da829adf3220322c0da43e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 20 Aug 2008 17:50:32 +0200
Subject: quota: Allow to separately enable quota accounting and enforcing
 limits

Split DQUOT_USR_ENABLED (and DQUOT_GRP_ENABLED) into DQUOT_USR_USAGE_ENABLED
and DQUOT_USR_LIMITS_ENABLED. This way we are able to separately enable /
disable whether we should:
1) ignore quotas completely
2) just keep uptodate information about usage
3) actually enforce quota limits

This is going to be useful when quota is treated as filesystem metadata - we
then want to keep quota information uptodate all the time and just enable /
disable limits enforcement.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/dquot.c               | 222 +++++++++++++++++++++++++++++------------------
 fs/quota.c               |   8 +-
 include/linux/quota.h    |  30 ++++++-
 include/linux/quotaops.h |  91 ++++++++++++++-----
 4 files changed, 239 insertions(+), 112 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index 22340c610e1a..7569633efe0e 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -489,7 +489,7 @@ int vfs_quota_sync(struct super_block *sb, int type)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (type != -1 && cnt != type)
 			continue;
-		if (!sb_has_quota_enabled(sb, cnt))
+		if (!sb_has_quota_active(sb, cnt))
 			continue;
 		spin_lock(&dq_list_lock);
 		dirty = &dqopt->info[cnt].dqi_dirty_list;
@@ -514,8 +514,8 @@ int vfs_quota_sync(struct super_block *sb, int type)
 	}
 
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt)
-			&& info_dirty(&dqopt->info[cnt]))
+		if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
+		    && info_dirty(&dqopt->info[cnt]))
 			sb->dq_op->write_info(sb, cnt);
 	spin_lock(&dq_list_lock);
 	dqstats.syncs++;
@@ -594,7 +594,7 @@ we_slept:
 		/* We have more than one user... nothing to do */
 		atomic_dec(&dquot->dq_count);
 		/* Releasing dquot during quotaoff phase? */
-		if (!sb_has_quota_enabled(dquot->dq_sb, dquot->dq_type) &&
+		if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_type) &&
 		    atomic_read(&dquot->dq_count) == 1)
 			wake_up(&dquot->dq_wait_unused);
 		spin_unlock(&dq_list_lock);
@@ -670,7 +670,7 @@ static struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
 	unsigned int hashent = hashfn(sb, id, type);
 	struct dquot *dquot, *empty = NODQUOT;
 
-        if (!sb_has_quota_enabled(sb, type))
+        if (!sb_has_quota_active(sb, type))
 		return NODQUOT;
 we_slept:
 	spin_lock(&dq_list_lock);
@@ -1041,7 +1041,8 @@ static inline char ignore_hardlimit(struct dquot *dquot)
 static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
 {
 	*warntype = QUOTA_NL_NOWARN;
-	if (test_bit(DQ_FAKE_B, &dquot->dq_flags))
+	if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
+	    test_bit(DQ_FAKE_B, &dquot->dq_flags))
 		return QUOTA_OK;
 
 	if (dquot->dq_dqb.dqb_ihardlimit &&
@@ -1073,7 +1074,8 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
 static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype)
 {
 	*warntype = QUOTA_NL_NOWARN;
-	if (test_bit(DQ_FAKE_B, &dquot->dq_flags))
+	if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
+	    test_bit(DQ_FAKE_B, &dquot->dq_flags))
 		return QUOTA_OK;
 
 	if (dquot->dq_dqb.dqb_bhardlimit &&
@@ -1114,7 +1116,8 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
 static int info_idq_free(struct dquot *dquot, qsize_t inodes)
 {
 	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
-	    dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
+	    dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit ||
+	    !sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type))
 		return QUOTA_NL_NOWARN;
 
 	if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit)
@@ -1508,7 +1511,7 @@ warn_put_all:
 /* Wrapper for transferring ownership of an inode */
 int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
 {
-	if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
+	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
 		vfs_dq_init(inode);
 		if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
 			return 1;
@@ -1551,53 +1554,22 @@ struct dquot_operations dquot_operations = {
 	.destroy_dquot	= dquot_destroy,
 };
 
-static inline void set_enable_flags(struct quota_info *dqopt, int type)
-{
-	switch (type) {
-		case USRQUOTA:
-			dqopt->flags |= DQUOT_USR_ENABLED;
-			dqopt->flags &= ~DQUOT_USR_SUSPENDED;
-			break;
-		case GRPQUOTA:
-			dqopt->flags |= DQUOT_GRP_ENABLED;
-			dqopt->flags &= ~DQUOT_GRP_SUSPENDED;
-			break;
-	}
-}
-
-static inline void reset_enable_flags(struct quota_info *dqopt, int type,
-				      int remount)
-{
-	switch (type) {
-		case USRQUOTA:
-			if (remount)
-				dqopt->flags |= DQUOT_USR_SUSPENDED;
-			else {
-				dqopt->flags &= ~DQUOT_USR_ENABLED;
-				dqopt->flags &= ~DQUOT_USR_SUSPENDED;
-			}
-			break;
-		case GRPQUOTA:
-			if (remount)
-				dqopt->flags |= DQUOT_GRP_SUSPENDED;
-			else {
-				dqopt->flags &= ~DQUOT_GRP_ENABLED;
-				dqopt->flags &= ~DQUOT_GRP_SUSPENDED;
-			}
-			break;
-	}
-}
-
-
 /*
  * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
  */
-int vfs_quota_off(struct super_block *sb, int type, int remount)
+int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags)
 {
 	int cnt, ret = 0;
 	struct quota_info *dqopt = sb_dqopt(sb);
 	struct inode *toputinode[MAXQUOTAS];
 
+	/* Cannot turn off usage accounting without turning off limits, or
+	 * suspend quotas and simultaneously turn quotas off. */
+	if ((flags & DQUOT_USAGE_ENABLED && !(flags & DQUOT_LIMITS_ENABLED))
+	    || (flags & DQUOT_SUSPENDED && flags & (DQUOT_LIMITS_ENABLED |
+	    DQUOT_USAGE_ENABLED)))
+		return -EINVAL;
+
 	/* We need to serialize quota_off() for device */
 	mutex_lock(&dqopt->dqonoff_mutex);
 
@@ -1606,7 +1578,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
 	 * sometimes we are called when fill_super() failed and calling
 	 * sync_fs() in such cases does no good.
 	 */
-	if (!sb_any_quota_enabled(sb) && !sb_any_quota_suspended(sb)) {
+	if (!sb_any_quota_loaded(sb)) {
 		mutex_unlock(&dqopt->dqonoff_mutex);
 		return 0;
 	}
@@ -1614,17 +1586,28 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
 		toputinode[cnt] = NULL;
 		if (type != -1 && cnt != type)
 			continue;
-		/* If we keep inodes of quota files after remount and quotaoff
-		 * is called, drop kept inodes. */
-		if (!remount && sb_has_quota_suspended(sb, cnt)) {
-			iput(dqopt->files[cnt]);
-			dqopt->files[cnt] = NULL;
-			reset_enable_flags(dqopt, cnt, 0);
+		if (!sb_has_quota_loaded(sb, cnt))
 			continue;
+
+		if (flags & DQUOT_SUSPENDED) {
+			dqopt->flags |=
+				dquot_state_flag(DQUOT_SUSPENDED, cnt);
+		} else {
+			dqopt->flags &= ~dquot_state_flag(flags, cnt);
+			/* Turning off suspended quotas? */
+			if (!sb_has_quota_loaded(sb, cnt) &&
+			    sb_has_quota_suspended(sb, cnt)) {
+				dqopt->flags &=	~dquot_state_flag(
+							DQUOT_SUSPENDED, cnt);
+				iput(dqopt->files[cnt]);
+				dqopt->files[cnt] = NULL;
+				continue;
+			}
 		}
-		if (!sb_has_quota_enabled(sb, cnt))
+
+		/* We still have to keep quota loaded? */
+		if (sb_has_quota_loaded(sb, cnt) && !(flags & DQUOT_SUSPENDED))
 			continue;
-		reset_enable_flags(dqopt, cnt, remount);
 
 		/* Note: these are blocking operations */
 		drop_dquot_ref(sb, cnt);
@@ -1640,7 +1623,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
 		put_quota_format(dqopt->info[cnt].dqi_format);
 
 		toputinode[cnt] = dqopt->files[cnt];
-		if (!remount)
+		if (!sb_has_quota_loaded(sb, cnt))
 			dqopt->files[cnt] = NULL;
 		dqopt->info[cnt].dqi_flags = 0;
 		dqopt->info[cnt].dqi_igrace = 0;
@@ -1663,7 +1646,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
 			mutex_lock(&dqopt->dqonoff_mutex);
 			/* If quota was reenabled in the meantime, we have
 			 * nothing to do */
-			if (!sb_has_quota_enabled(sb, cnt)) {
+			if (!sb_has_quota_loaded(sb, cnt)) {
 				mutex_lock_nested(&toputinode[cnt]->i_mutex, I_MUTEX_QUOTA);
 				toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
 				  S_NOATIME | S_NOQUOTA);
@@ -1673,10 +1656,13 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
 			}
 			mutex_unlock(&dqopt->dqonoff_mutex);
 			/* On remount RO, we keep the inode pointer so that we
-			 * can reenable quota on the subsequent remount RW.
-			 * But we have better not keep inode pointer when there
-			 * is pending delete on the quota file... */
-			if (!remount)
+			 * can reenable quota on the subsequent remount RW. We
+			 * have to check 'flags' variable and not use sb_has_
+			 * function because another quotaon / quotaoff could
+			 * change global state before we got here. We refuse
+			 * to suspend quotas when there is pending delete on
+			 * the quota file... */
+			if (!(flags & DQUOT_SUSPENDED))
 				iput(toputinode[cnt]);
 			else if (!toputinode[cnt]->i_nlink)
 				ret = -EBUSY;
@@ -1686,12 +1672,22 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
 	return ret;
 }
 
+int vfs_quota_off(struct super_block *sb, int type, int remount)
+{
+	return vfs_quota_disable(sb, type, remount ? DQUOT_SUSPENDED :
+				 (DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED));
+}
+
 /*
  *	Turn quotas on on a device
  */
 
-/* Helper function when we already have the inode */
-static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
+/*
+ * Helper function to turn quotas on when we already have the inode of
+ * quota file and no quota information is loaded.
+ */
+static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
+	unsigned int flags)
 {
 	struct quota_format_type *fmt = find_quota_format(format_id);
 	struct super_block *sb = inode->i_sb;
@@ -1713,6 +1709,11 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
 		error = -EINVAL;
 		goto out_fmt;
 	}
+	/* Usage always has to be set... */
+	if (!(flags & DQUOT_USAGE_ENABLED)) {
+		error = -EINVAL;
+		goto out_fmt;
+	}
 
 	/* As we bypass the pagecache we must now flush the inode so that
 	 * we see all the changes from userspace... */
@@ -1721,8 +1722,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
 	invalidate_bdev(sb->s_bdev);
 	mutex_lock(&inode->i_mutex);
 	mutex_lock(&dqopt->dqonoff_mutex);
-	if (sb_has_quota_enabled(sb, type) ||
-			sb_has_quota_suspended(sb, type)) {
+	if (sb_has_quota_loaded(sb, type)) {
 		error = -EBUSY;
 		goto out_lock;
 	}
@@ -1754,7 +1754,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
 	}
 	mutex_unlock(&dqopt->dqio_mutex);
 	mutex_unlock(&inode->i_mutex);
-	set_enable_flags(dqopt, type);
+	dqopt->flags |= dquot_state_flag(flags, type);
 
 	add_dquot_ref(sb, type);
 	mutex_unlock(&dqopt->dqonoff_mutex);
@@ -1787,20 +1787,23 @@ static int vfs_quota_on_remount(struct super_block *sb, int type)
 	struct quota_info *dqopt = sb_dqopt(sb);
 	struct inode *inode;
 	int ret;
+	unsigned int flags;
 
 	mutex_lock(&dqopt->dqonoff_mutex);
 	if (!sb_has_quota_suspended(sb, type)) {
 		mutex_unlock(&dqopt->dqonoff_mutex);
 		return 0;
 	}
-	BUG_ON(sb_has_quota_enabled(sb, type));
-
 	inode = dqopt->files[type];
 	dqopt->files[type] = NULL;
-	reset_enable_flags(dqopt, type, 0);
+	flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED |
+						DQUOT_LIMITS_ENABLED, type);
+	dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, type);
 	mutex_unlock(&dqopt->dqonoff_mutex);
 
-	ret = vfs_quota_on_inode(inode, type, dqopt->info[type].dqi_fmt_id);
+	flags = dquot_generic_flag(flags, type);
+	ret = vfs_load_quota_inode(inode, type, dqopt->info[type].dqi_fmt_id,
+				   flags);
 	iput(inode);
 
 	return ret;
@@ -1816,12 +1819,12 @@ int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
 	if (path->mnt->mnt_sb != sb)
 		error = -EXDEV;
 	else
-		error = vfs_quota_on_inode(path->dentry->d_inode, type,
-					   format_id);
+		error = vfs_load_quota_inode(path->dentry->d_inode, type,
+					     format_id, DQUOT_USAGE_ENABLED |
+					     DQUOT_LIMITS_ENABLED);
 	return error;
 }
 
-/* Actual function called from quotactl() */
 int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name,
 		 int remount)
 {
@@ -1839,6 +1842,50 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name,
 	return error;
 }
 
+/*
+ * More powerful function for turning on quotas allowing setting
+ * of individual quota flags
+ */
+int vfs_quota_enable(struct inode *inode, int type, int format_id,
+		unsigned int flags)
+{
+	int ret = 0;
+	struct super_block *sb = inode->i_sb;
+	struct quota_info *dqopt = sb_dqopt(sb);
+
+	/* Just unsuspend quotas? */
+	if (flags & DQUOT_SUSPENDED)
+		return vfs_quota_on_remount(sb, type);
+	if (!flags)
+		return 0;
+	/* Just updating flags needed? */
+	if (sb_has_quota_loaded(sb, type)) {
+		mutex_lock(&dqopt->dqonoff_mutex);
+		/* Now do a reliable test... */
+		if (!sb_has_quota_loaded(sb, type)) {
+			mutex_unlock(&dqopt->dqonoff_mutex);
+			goto load_quota;
+		}
+		if (flags & DQUOT_USAGE_ENABLED &&
+		    sb_has_quota_usage_enabled(sb, type)) {
+			ret = -EBUSY;
+			goto out_lock;
+		}
+		if (flags & DQUOT_LIMITS_ENABLED &&
+		    sb_has_quota_limits_enabled(sb, type)) {
+			ret = -EBUSY;
+			goto out_lock;
+		}
+		sb_dqopt(sb)->flags |= dquot_state_flag(flags, type);
+out_lock:
+		mutex_unlock(&dqopt->dqonoff_mutex);
+		return ret;
+	}
+
+load_quota:
+	return vfs_load_quota_inode(inode, type, format_id, flags);
+}
+
 /*
  * This function is used when filesystem needs to initialize quotas
  * during mount time.
@@ -1860,7 +1907,8 @@ int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
 
 	error = security_quota_on(dentry);
 	if (!error)
-		error = vfs_quota_on_inode(dentry->d_inode, type, format_id);
+		error = vfs_load_quota_inode(dentry->d_inode, type, format_id,
+				DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
 
 out:
 	dput(dentry);
@@ -1997,12 +2045,14 @@ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
 	int rc;
 
 	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
-	if (!(dquot = dqget(sb, id, type))) {
-		mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
-		return -ESRCH;
+	dquot = dqget(sb, id, type);
+	if (!dquot) {
+		rc = -ESRCH;
+		goto out;
 	}
 	rc = do_set_dqblk(dquot, di);
 	dqput(dquot);
+out:
 	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	return rc;
 }
@@ -2013,7 +2063,7 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 	struct mem_dqinfo *mi;
   
 	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
-	if (!sb_has_quota_enabled(sb, type)) {
+	if (!sb_has_quota_active(sb, type)) {
 		mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 		return -ESRCH;
 	}
@@ -2032,11 +2082,12 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 {
 	struct mem_dqinfo *mi;
+	int err = 0;
 
 	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
-	if (!sb_has_quota_enabled(sb, type)) {
-		mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
-		return -ESRCH;
+	if (!sb_has_quota_active(sb, type)) {
+		err = -ESRCH;
+		goto out;
 	}
 	mi = sb_dqopt(sb)->info + type;
 	spin_lock(&dq_data_lock);
@@ -2050,8 +2101,9 @@ int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 	mark_info_dirty(sb, type);
 	/* Force write to disk */
 	sb->dq_op->write_info(sb, type);
+out:
 	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
-	return 0;
+	return err;
 }
 
 struct quotactl_ops vfs_quotactl_ops = {
@@ -2213,9 +2265,11 @@ EXPORT_SYMBOL(register_quota_format);
 EXPORT_SYMBOL(unregister_quota_format);
 EXPORT_SYMBOL(dqstats);
 EXPORT_SYMBOL(dq_data_lock);
+EXPORT_SYMBOL(vfs_quota_enable);
 EXPORT_SYMBOL(vfs_quota_on);
 EXPORT_SYMBOL(vfs_quota_on_path);
 EXPORT_SYMBOL(vfs_quota_on_mount);
+EXPORT_SYMBOL(vfs_quota_disable);
 EXPORT_SYMBOL(vfs_quota_off);
 EXPORT_SYMBOL(vfs_quota_sync);
 EXPORT_SYMBOL(vfs_get_dqinfo);
diff --git a/fs/quota.c b/fs/quota.c
index b7fe44e01618..8678d9f35ee9 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -73,7 +73,7 @@ static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid
 		case Q_SETQUOTA:
 		case Q_GETQUOTA:
 			/* This is just informative test so we are satisfied without a lock */
-			if (!sb_has_quota_enabled(sb, type))
+			if (!sb_has_quota_active(sb, type))
 				return -ESRCH;
 	}
 
@@ -175,7 +175,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (type != -1 && cnt != type)
 			continue;
-		if (!sb_has_quota_enabled(sb, cnt))
+		if (!sb_has_quota_active(sb, cnt))
 			continue;
 		mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex, I_MUTEX_QUOTA);
 		truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0);
@@ -201,7 +201,7 @@ restart:
 		for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 			if (type != -1 && type != cnt)
 				continue;
-			if (!sb_has_quota_enabled(sb, cnt))
+			if (!sb_has_quota_active(sb, cnt))
 				continue;
 			if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
 			    list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
@@ -245,7 +245,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void
 			__u32 fmt;
 
 			down_read(&sb_dqopt(sb)->dqptr_sem);
-			if (!sb_has_quota_enabled(sb, type)) {
+			if (!sb_has_quota_active(sb, type)) {
 				up_read(&sb_dqopt(sb)->dqptr_sem);
 				return -ESRCH;
 			}
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 9ea468363f9f..93717abcd35b 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -318,12 +318,34 @@ struct quota_format_type {
 	struct quota_format_type *qf_next;
 };
 
-#define DQUOT_USR_ENABLED	0x01		/* User diskquotas enabled */
-#define DQUOT_GRP_ENABLED	0x02		/* Group diskquotas enabled */
-#define DQUOT_USR_SUSPENDED	0x04		/* User diskquotas are off, but
+/* Quota state flags - they actually come in two flavors - for users and groups */
+enum {
+	_DQUOT_USAGE_ENABLED = 0,		/* Track disk usage for users */
+	_DQUOT_LIMITS_ENABLED,			/* Enforce quota limits for users */
+	_DQUOT_SUSPENDED,			/* User diskquotas are off, but
 						 * we have necessary info in
 						 * memory to turn them on */
-#define DQUOT_GRP_SUSPENDED	0x08		/* The same for group quotas */
+	_DQUOT_STATE_FLAGS
+};
+#define DQUOT_USAGE_ENABLED	(1 << _DQUOT_USAGE_ENABLED)
+#define DQUOT_LIMITS_ENABLED	(1 << _DQUOT_LIMITS_ENABLED)
+#define DQUOT_SUSPENDED		(1 << _DQUOT_SUSPENDED)
+#define DQUOT_STATE_FLAGS	(DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED | \
+				 DQUOT_SUSPENDED)
+
+static inline unsigned int dquot_state_flag(unsigned int flags, int type)
+{
+	if (type == USRQUOTA)
+		return flags;
+	return flags << _DQUOT_STATE_FLAGS;
+}
+
+static inline unsigned int dquot_generic_flag(unsigned int flags, int type)
+{
+	if (type == USRQUOTA)
+		return flags;
+	return flags >> _DQUOT_STATE_FLAGS;
+}
 
 struct quota_info {
 	unsigned int flags;			/* Flags for diskquotas on this device */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index ffd97071cd1e..3b3346fa657c 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -40,11 +40,14 @@ int dquot_mark_dquot_dirty(struct dquot *dquot);
 
 int vfs_quota_on(struct super_block *sb, int type, int format_id,
  	char *path, int remount);
+int vfs_quota_enable(struct inode *inode, int type, int format_id,
+	unsigned int flags);
 int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
  	struct path *path);
 int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
  	int format_id, int type);
 int vfs_quota_off(struct super_block *sb, int type, int remount);
+int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags);
 int vfs_quota_sync(struct super_block *sb, int type);
 int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
@@ -64,26 +67,22 @@ static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
  * Functions for checking status of quota
  */
 
-static inline int sb_has_quota_enabled(struct super_block *sb, int type)
+static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type)
 {
-	if (type == USRQUOTA)
-		return (sb_dqopt(sb)->flags & DQUOT_USR_ENABLED)
-			&& !(sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED);
-	return (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED)
-		&& !(sb_dqopt(sb)->flags & DQUOT_GROUP_SUSPENDED);
+	return sb_dqopt(sb)->flags &
+				dquot_state_flag(DQUOT_USAGE_ENABLED, type);
 }
 
-static inline int sb_any_quota_enabled(struct super_block *sb)
+static inline int sb_has_quota_limits_enabled(struct super_block *sb, int type)
 {
-	return sb_has_quota_enabled(sb, USRQUOTA) ||
-		sb_has_quota_enabled(sb, GRPQUOTA);
+	return sb_dqopt(sb)->flags &
+				dquot_state_flag(DQUOT_LIMITS_ENABLED, type);
 }
 
 static inline int sb_has_quota_suspended(struct super_block *sb, int type)
 {
-	if (type == USRQUOTA)
-		return sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED;
-	return sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED;
+	return sb_dqopt(sb)->flags &
+				dquot_state_flag(DQUOT_SUSPENDED, type);
 }
 
 static inline int sb_any_quota_suspended(struct super_block *sb)
@@ -92,6 +91,34 @@ static inline int sb_any_quota_suspended(struct super_block *sb)
 		sb_has_quota_suspended(sb, GRPQUOTA);
 }
 
+/* Does kernel know about any quota information for given sb + type? */
+static inline int sb_has_quota_loaded(struct super_block *sb, int type)
+{
+	/* Currently if anything is on, then quota usage is on as well */
+	return sb_has_quota_usage_enabled(sb, type);
+}
+
+static inline int sb_any_quota_loaded(struct super_block *sb)
+{
+	return sb_has_quota_loaded(sb, USRQUOTA) ||
+		sb_has_quota_loaded(sb, GRPQUOTA);
+}
+
+static inline int sb_has_quota_active(struct super_block *sb, int type)
+{
+	return sb_has_quota_loaded(sb, type) &&
+	       !sb_has_quota_suspended(sb, type);
+}
+
+static inline int sb_any_quota_active(struct super_block *sb)
+{
+	return sb_has_quota_active(sb, USRQUOTA) ||
+	       sb_has_quota_active(sb, GRPQUOTA);
+}
+
+/* For backward compatibility until we remove all users */
+#define sb_any_quota_enabled(sb) sb_any_quota_active(sb)
+
 /*
  * Operations supported for diskquotas.
  */
@@ -106,7 +133,7 @@ extern struct quotactl_ops vfs_quotactl_ops;
 static inline void vfs_dq_init(struct inode *inode)
 {
 	BUG_ON(!inode->i_sb);
-	if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode))
+	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode))
 		inode->i_sb->dq_op->initialize(inode, -1);
 }
 
@@ -114,7 +141,7 @@ static inline void vfs_dq_init(struct inode *inode)
  * a transaction (deadlocks possible otherwise) */
 static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
-	if (sb_any_quota_enabled(inode->i_sb)) {
+	if (sb_any_quota_active(inode->i_sb)) {
 		/* Used space is updated in alloc_space() */
 		if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA)
 			return 1;
@@ -134,7 +161,7 @@ static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
 
 static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
 {
-	if (sb_any_quota_enabled(inode->i_sb)) {
+	if (sb_any_quota_active(inode->i_sb)) {
 		/* Used space is updated in alloc_space() */
 		if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA)
 			return 1;
@@ -154,7 +181,7 @@ static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
 
 static inline int vfs_dq_alloc_inode(struct inode *inode)
 {
-	if (sb_any_quota_enabled(inode->i_sb)) {
+	if (sb_any_quota_active(inode->i_sb)) {
 		vfs_dq_init(inode);
 		if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
 			return 1;
@@ -164,7 +191,7 @@ static inline int vfs_dq_alloc_inode(struct inode *inode)
 
 static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
 {
-	if (sb_any_quota_enabled(inode->i_sb))
+	if (sb_any_quota_active(inode->i_sb))
 		inode->i_sb->dq_op->free_space(inode, nr);
 	else
 		inode_sub_bytes(inode, nr);
@@ -178,7 +205,7 @@ static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
 
 static inline void vfs_dq_free_inode(struct inode *inode)
 {
-	if (sb_any_quota_enabled(inode->i_sb))
+	if (sb_any_quota_active(inode->i_sb))
 		inode->i_sb->dq_op->free_inode(inode, 1);
 }
 
@@ -199,12 +226,12 @@ static inline int vfs_dq_off(struct super_block *sb, int remount)
 
 #else
 
-static inline int sb_has_quota_enabled(struct super_block *sb, int type)
+static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type)
 {
 	return 0;
 }
 
-static inline int sb_any_quota_enabled(struct super_block *sb)
+static inline int sb_has_quota_limits_enabled(struct super_block *sb, int type)
 {
 	return 0;
 }
@@ -219,6 +246,30 @@ static inline int sb_any_quota_suspended(struct super_block *sb)
 	return 0;
 }
 
+/* Does kernel know about any quota information for given sb + type? */
+static inline int sb_has_quota_loaded(struct super_block *sb, int type)
+{
+	return 0;
+}
+
+static inline int sb_any_quota_loaded(struct super_block *sb)
+{
+	return 0;
+}
+
+static inline int sb_has_quota_active(struct super_block *sb, int type)
+{
+	return 0;
+}
+
+static inline int sb_any_quota_active(struct super_block *sb)
+{
+	return 0;
+}
+
+/* For backward compatibility until we remove all users */
+#define sb_any_quota_enabled(sb) sb_any_quota_active(sb)
+
 /*
  * NO-OP when quota not configured.
  */
-- 
cgit v1.2.3


From dcb30695f2cac86b71417629a6fe8042b4fe2ab2 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 20 Aug 2008 18:30:40 +0200
Subject: quota: Remove compatibility function sb_any_quota_enabled()

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 include/linux/quotaops.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 3b3346fa657c..e840ca523175 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -116,9 +116,6 @@ static inline int sb_any_quota_active(struct super_block *sb)
 	       sb_has_quota_active(sb, GRPQUOTA);
 }
 
-/* For backward compatibility until we remove all users */
-#define sb_any_quota_enabled(sb) sb_any_quota_active(sb)
-
 /*
  * Operations supported for diskquotas.
  */
@@ -267,9 +264,6 @@ static inline int sb_any_quota_active(struct super_block *sb)
 	return 0;
 }
 
-/* For backward compatibility until we remove all users */
-#define sb_any_quota_enabled(sb) sb_any_quota_active(sb)
-
 /*
  * NO-OP when quota not configured.
  */
-- 
cgit v1.2.3


From ca785ec66b991e9ca74dd9840fc014487ad095e1 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 30 Sep 2008 17:53:37 +0200
Subject: quota: Introduce DQUOT_QUOTA_SYS_FILE flag

If filesystem can handle quota files as system files hidden from users, we can
skip a lot of cache invalidation, syncing, inode flags setting etc. when
turning quotas on, off and quota_sync. Allow filesystem to indicate that it is
hiding quota files from users by DQUOT_QUOTA_SYS_FILE flag.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/dquot.c            | 45 ++++++++++++++++++++++++++++++---------------
 fs/quota.c            |  3 +++
 include/linux/quota.h |  7 +++++++
 3 files changed, 40 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index 7569633efe0e..74185c34a4f0 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1631,6 +1631,11 @@ int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags)
 		dqopt->ops[cnt] = NULL;
 	}
 	mutex_unlock(&dqopt->dqonoff_mutex);
+
+	/* Skip syncing and setting flags if quota files are hidden */
+	if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
+		goto put_inodes;
+
 	/* Sync the superblock so that buffers with quota data are written to
 	 * disk (and so userspace sees correct data afterwards). */
 	if (sb->s_op->sync_fs)
@@ -1655,6 +1660,12 @@ int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags)
 				mark_inode_dirty(toputinode[cnt]);
 			}
 			mutex_unlock(&dqopt->dqonoff_mutex);
+		}
+	if (sb->s_bdev)
+		invalidate_bdev(sb->s_bdev);
+put_inodes:
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+		if (toputinode[cnt]) {
 			/* On remount RO, we keep the inode pointer so that we
 			 * can reenable quota on the subsequent remount RW. We
 			 * have to check 'flags' variable and not use sb_has_
@@ -1667,8 +1678,6 @@ int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags)
 			else if (!toputinode[cnt]->i_nlink)
 				ret = -EBUSY;
 		}
-	if (sb->s_bdev)
-		invalidate_bdev(sb->s_bdev);
 	return ret;
 }
 
@@ -1715,25 +1724,31 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 		goto out_fmt;
 	}
 
-	/* As we bypass the pagecache we must now flush the inode so that
-	 * we see all the changes from userspace... */
-	write_inode_now(inode, 1);
-	/* And now flush the block cache so that kernel sees the changes */
-	invalidate_bdev(sb->s_bdev);
+	if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
+		/* As we bypass the pagecache we must now flush the inode so
+		 * that we see all the changes from userspace... */
+		write_inode_now(inode, 1);
+		/* And now flush the block cache so that kernel sees the
+		 * changes */
+		invalidate_bdev(sb->s_bdev);
+	}
 	mutex_lock(&inode->i_mutex);
 	mutex_lock(&dqopt->dqonoff_mutex);
 	if (sb_has_quota_loaded(sb, type)) {
 		error = -EBUSY;
 		goto out_lock;
 	}
-	/* We don't want quota and atime on quota files (deadlocks possible)
-	 * Also nobody should write to the file - we use special IO operations
-	 * which ignore the immutable bit. */
-	down_write(&dqopt->dqptr_sem);
-	oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA);
-	inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
-	up_write(&dqopt->dqptr_sem);
-	sb->dq_op->drop(inode);
+
+	if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
+		/* We don't want quota and atime on quota files (deadlocks
+		 * possible) Also nobody should write to the file - we use
+		 * special IO operations which ignore the immutable bit. */
+		down_write(&dqopt->dqptr_sem);
+		oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA);
+		inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
+		up_write(&dqopt->dqptr_sem);
+		sb->dq_op->drop(inode);
+	}
 
 	error = -EIO;
 	dqopt->files[type] = igrab(inode);
diff --git a/fs/quota.c b/fs/quota.c
index 8678d9f35ee9..4a8c94f05f76 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -160,6 +160,9 @@ static void quota_sync_sb(struct super_block *sb, int type)
 	int cnt;
 
 	sb->s_qcop->quota_sync(sb, type);
+
+	if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)
+		return;
 	/* This is not very clever (and fast) but currently I don't know about
 	 * any other simple way of getting quota data to disk and we must get
 	 * them there for userspace to be visible... */
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 93717abcd35b..80b8807b4988 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -332,6 +332,13 @@ enum {
 #define DQUOT_SUSPENDED		(1 << _DQUOT_SUSPENDED)
 #define DQUOT_STATE_FLAGS	(DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED | \
 				 DQUOT_SUSPENDED)
+/* Other quota flags */
+#define DQUOT_QUOTA_SYS_FILE	(1 << 6)	/* Quota file is a special
+						 * system file and user cannot
+						 * touch it. Filesystem is
+						 * responsible for setting
+						 * S_NOQUOTA, S_NOATIME flags
+						 */
 
 static inline unsigned int dquot_state_flag(unsigned int flags, int type)
 {
-- 
cgit v1.2.3


From cf770c137122b78470a67ebd5498947869a09197 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Sun, 21 Sep 2008 23:17:53 +0200
Subject: quota: Move quotaio_v[12].h from include/linux/ to fs/

Since these include files are used only by implementation of quota formats,
there's no need to have them in include/linux/.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/quota_v1.c              |  3 +-
 fs/quota_v2.c              |  7 ++--
 fs/quotaio_v1.h            | 33 +++++++++++++++++++
 fs/quotaio_v2.h            | 79 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/Kbuild       |  2 --
 include/linux/quotaio_v1.h | 33 -------------------
 include/linux/quotaio_v2.h | 79 ----------------------------------------------
 7 files changed, 118 insertions(+), 118 deletions(-)
 create mode 100644 fs/quotaio_v1.h
 create mode 100644 fs/quotaio_v2.h
 delete mode 100644 include/linux/quotaio_v1.h
 delete mode 100644 include/linux/quotaio_v2.h

(limited to 'include/linux')

diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index 3e078eee5644..b4af1c69ad16 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -3,13 +3,14 @@
 #include <linux/quota.h>
 #include <linux/quotaops.h>
 #include <linux/dqblk_v1.h>
-#include <linux/quotaio_v1.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
 
 #include <asm/byteorder.h>
 
+#include "quotaio_v1.h"
+
 MODULE_AUTHOR("Jan Kara");
 MODULE_DESCRIPTION("Old quota format support");
 MODULE_LICENSE("GPL");
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 51c4717f7c6a..a21d1a7c356a 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -6,7 +6,6 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/dqblk_v2.h>
-#include <linux/quotaio_v2.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -15,6 +14,8 @@
 
 #include <asm/byteorder.h>
 
+#include "quotaio_v2.h"
+
 MODULE_AUTHOR("Jan Kara");
 MODULE_DESCRIPTION("Quota format v2 support");
 MODULE_LICENSE("GPL");
@@ -129,8 +130,8 @@ static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
 	d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
 	d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
 	d->dqb_itime = cpu_to_le64(m->dqb_itime);
-	d->dqb_bhardlimit = cpu_to_le32(v2_qbtos(m->dqb_bhardlimit));
-	d->dqb_bsoftlimit = cpu_to_le32(v2_qbtos(m->dqb_bsoftlimit));
+	d->dqb_bhardlimit = cpu_to_le32(v2_stoqb(m->dqb_bhardlimit));
+	d->dqb_bsoftlimit = cpu_to_le32(v2_stoqb(m->dqb_bsoftlimit));
 	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
 	d->dqb_btime = cpu_to_le64(m->dqb_btime);
 	d->dqb_id = cpu_to_le32(id);
diff --git a/fs/quotaio_v1.h b/fs/quotaio_v1.h
new file mode 100644
index 000000000000..746654b5de70
--- /dev/null
+++ b/fs/quotaio_v1.h
@@ -0,0 +1,33 @@
+#ifndef _LINUX_QUOTAIO_V1_H
+#define _LINUX_QUOTAIO_V1_H
+
+#include <linux/types.h>
+
+/*
+ * The following constants define the amount of time given a user
+ * before the soft limits are treated as hard limits (usually resulting
+ * in an allocation failure). The timer is started when the user crosses
+ * their soft limit, it is reset when they go below their soft limit.
+ */
+#define MAX_IQ_TIME  604800	/* (7*24*60*60) 1 week */
+#define MAX_DQ_TIME  604800	/* (7*24*60*60) 1 week */
+
+/*
+ * The following structure defines the format of the disk quota file
+ * (as it appears on disk) - the file is an array of these structures
+ * indexed by user or group number.
+ */
+struct v1_disk_dqblk {
+	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
+	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
+	__u32 dqb_curblocks;	/* current block count */
+	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	__u32 dqb_isoftlimit;	/* preferred inode limit */
+	__u32 dqb_curinodes;	/* current # allocated inodes */
+	time_t dqb_btime;	/* time limit for excessive disk use */
+	time_t dqb_itime;	/* time limit for excessive inode use */
+};
+
+#define v1_dqoff(UID)      ((loff_t)((UID) * sizeof (struct v1_disk_dqblk)))
+
+#endif	/* _LINUX_QUOTAIO_V1_H */
diff --git a/fs/quotaio_v2.h b/fs/quotaio_v2.h
new file mode 100644
index 000000000000..303d7cbe30d4
--- /dev/null
+++ b/fs/quotaio_v2.h
@@ -0,0 +1,79 @@
+/*
+ *	Definitions of structures for vfsv0 quota format
+ */
+
+#ifndef _LINUX_QUOTAIO_V2_H
+#define _LINUX_QUOTAIO_V2_H
+
+#include <linux/types.h>
+#include <linux/quota.h>
+
+/*
+ * Definitions of magics and versions of current quota files
+ */
+#define V2_INITQMAGICS {\
+	0xd9c01f11,	/* USRQUOTA */\
+	0xd9c01927	/* GRPQUOTA */\
+}
+
+#define V2_INITQVERSIONS {\
+	0,		/* USRQUOTA */\
+	0		/* GRPQUOTA */\
+}
+
+/*
+ * The following structure defines the format of the disk quota file
+ * (as it appears on disk) - the file is a radix tree whose leaves point
+ * to blocks of these structures.
+ */
+struct v2_disk_dqblk {
+	__le32 dqb_id;		/* id this quota applies to */
+	__le32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
+	__le32 dqb_isoftlimit;	/* preferred inode limit */
+	__le32 dqb_curinodes;	/* current # allocated inodes */
+	__le32 dqb_bhardlimit;	/* absolute limit on disk space (in QUOTABLOCK_SIZE) */
+	__le32 dqb_bsoftlimit;	/* preferred limit on disk space (in QUOTABLOCK_SIZE) */
+	__le64 dqb_curspace;	/* current space occupied (in bytes) */
+	__le64 dqb_btime;	/* time limit for excessive disk use */
+	__le64 dqb_itime;	/* time limit for excessive inode use */
+};
+
+/*
+ * Here are header structures as written on disk and their in-memory copies
+ */
+/* First generic header */
+struct v2_disk_dqheader {
+	__le32 dqh_magic;	/* Magic number identifying file */
+	__le32 dqh_version;	/* File version */
+};
+
+/* Header with type and version specific information */
+struct v2_disk_dqinfo {
+	__le32 dqi_bgrace;	/* Time before block soft limit becomes hard limit */
+	__le32 dqi_igrace;	/* Time before inode soft limit becomes hard limit */
+	__le32 dqi_flags;	/* Flags for quotafile (DQF_*) */
+	__le32 dqi_blocks;	/* Number of blocks in file */
+	__le32 dqi_free_blk;	/* Number of first free block in the list */
+	__le32 dqi_free_entry;	/* Number of block with at least one free entry */
+};
+
+/*
+ *  Structure of header of block with quota structures. It is padded to 16 bytes so
+ *  there will be space for exactly 21 quota-entries in a block
+ */
+struct v2_disk_dqdbheader {
+	__le32 dqdh_next_free;	/* Number of next block with free entry */
+	__le32 dqdh_prev_free;	/* Number of previous block with free entry */
+	__le16 dqdh_entries;	/* Number of valid entries in block */
+	__le16 dqdh_pad1;
+	__le32 dqdh_pad2;
+};
+
+#define V2_DQINFOOFF	sizeof(struct v2_disk_dqheader)	/* Offset of info header in file */
+#define V2_DQBLKSIZE_BITS	10
+#define V2_DQBLKSIZE	(1 << V2_DQBLKSIZE_BITS)	/* Size of block with quota structures */
+#define V2_DQTREEOFF	1		/* Offset of tree in file in blocks */
+#define V2_DQTREEDEPTH	4		/* Depth of quota tree */
+#define V2_DQSTRINBLK	((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk))	/* Number of entries in one blocks */
+
+#endif /* _LINUX_QUOTAIO_V2_H */
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 95ac82340c3b..900a787cbae9 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -134,8 +134,6 @@ header-y += posix_types.h
 header-y += ppdev.h
 header-y += prctl.h
 header-y += qnxtypes.h
-header-y += quotaio_v1.h
-header-y += quotaio_v2.h
 header-y += radeonfb.h
 header-y += raw.h
 header-y += resource.h
diff --git a/include/linux/quotaio_v1.h b/include/linux/quotaio_v1.h
deleted file mode 100644
index 746654b5de70..000000000000
--- a/include/linux/quotaio_v1.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef _LINUX_QUOTAIO_V1_H
-#define _LINUX_QUOTAIO_V1_H
-
-#include <linux/types.h>
-
-/*
- * The following constants define the amount of time given a user
- * before the soft limits are treated as hard limits (usually resulting
- * in an allocation failure). The timer is started when the user crosses
- * their soft limit, it is reset when they go below their soft limit.
- */
-#define MAX_IQ_TIME  604800	/* (7*24*60*60) 1 week */
-#define MAX_DQ_TIME  604800	/* (7*24*60*60) 1 week */
-
-/*
- * The following structure defines the format of the disk quota file
- * (as it appears on disk) - the file is an array of these structures
- * indexed by user or group number.
- */
-struct v1_disk_dqblk {
-	__u32 dqb_bhardlimit;	/* absolute limit on disk blks alloc */
-	__u32 dqb_bsoftlimit;	/* preferred limit on disk blks */
-	__u32 dqb_curblocks;	/* current block count */
-	__u32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
-	__u32 dqb_isoftlimit;	/* preferred inode limit */
-	__u32 dqb_curinodes;	/* current # allocated inodes */
-	time_t dqb_btime;	/* time limit for excessive disk use */
-	time_t dqb_itime;	/* time limit for excessive inode use */
-};
-
-#define v1_dqoff(UID)      ((loff_t)((UID) * sizeof (struct v1_disk_dqblk)))
-
-#endif	/* _LINUX_QUOTAIO_V1_H */
diff --git a/include/linux/quotaio_v2.h b/include/linux/quotaio_v2.h
deleted file mode 100644
index 303d7cbe30d4..000000000000
--- a/include/linux/quotaio_v2.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- *	Definitions of structures for vfsv0 quota format
- */
-
-#ifndef _LINUX_QUOTAIO_V2_H
-#define _LINUX_QUOTAIO_V2_H
-
-#include <linux/types.h>
-#include <linux/quota.h>
-
-/*
- * Definitions of magics and versions of current quota files
- */
-#define V2_INITQMAGICS {\
-	0xd9c01f11,	/* USRQUOTA */\
-	0xd9c01927	/* GRPQUOTA */\
-}
-
-#define V2_INITQVERSIONS {\
-	0,		/* USRQUOTA */\
-	0		/* GRPQUOTA */\
-}
-
-/*
- * The following structure defines the format of the disk quota file
- * (as it appears on disk) - the file is a radix tree whose leaves point
- * to blocks of these structures.
- */
-struct v2_disk_dqblk {
-	__le32 dqb_id;		/* id this quota applies to */
-	__le32 dqb_ihardlimit;	/* absolute limit on allocated inodes */
-	__le32 dqb_isoftlimit;	/* preferred inode limit */
-	__le32 dqb_curinodes;	/* current # allocated inodes */
-	__le32 dqb_bhardlimit;	/* absolute limit on disk space (in QUOTABLOCK_SIZE) */
-	__le32 dqb_bsoftlimit;	/* preferred limit on disk space (in QUOTABLOCK_SIZE) */
-	__le64 dqb_curspace;	/* current space occupied (in bytes) */
-	__le64 dqb_btime;	/* time limit for excessive disk use */
-	__le64 dqb_itime;	/* time limit for excessive inode use */
-};
-
-/*
- * Here are header structures as written on disk and their in-memory copies
- */
-/* First generic header */
-struct v2_disk_dqheader {
-	__le32 dqh_magic;	/* Magic number identifying file */
-	__le32 dqh_version;	/* File version */
-};
-
-/* Header with type and version specific information */
-struct v2_disk_dqinfo {
-	__le32 dqi_bgrace;	/* Time before block soft limit becomes hard limit */
-	__le32 dqi_igrace;	/* Time before inode soft limit becomes hard limit */
-	__le32 dqi_flags;	/* Flags for quotafile (DQF_*) */
-	__le32 dqi_blocks;	/* Number of blocks in file */
-	__le32 dqi_free_blk;	/* Number of first free block in the list */
-	__le32 dqi_free_entry;	/* Number of block with at least one free entry */
-};
-
-/*
- *  Structure of header of block with quota structures. It is padded to 16 bytes so
- *  there will be space for exactly 21 quota-entries in a block
- */
-struct v2_disk_dqdbheader {
-	__le32 dqdh_next_free;	/* Number of next block with free entry */
-	__le32 dqdh_prev_free;	/* Number of previous block with free entry */
-	__le16 dqdh_entries;	/* Number of valid entries in block */
-	__le16 dqdh_pad1;
-	__le32 dqdh_pad2;
-};
-
-#define V2_DQINFOOFF	sizeof(struct v2_disk_dqheader)	/* Offset of info header in file */
-#define V2_DQBLKSIZE_BITS	10
-#define V2_DQBLKSIZE	(1 << V2_DQBLKSIZE_BITS)	/* Size of block with quota structures */
-#define V2_DQTREEOFF	1		/* Offset of tree in file in blocks */
-#define V2_DQTREEDEPTH	4		/* Depth of quota tree */
-#define V2_DQSTRINBLK	((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk))	/* Number of entries in one blocks */
-
-#endif /* _LINUX_QUOTAIO_V2_H */
-- 
cgit v1.2.3


From 1ccd14b9c271c1ac6eec5c5ec5def433100e7248 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 22 Sep 2008 05:54:49 +0200
Subject: quota: Split off quota tree handling into a separate file

There is going to be a new version of quota format having 64-bit
quota limits and a new quota format for OCFS2. They are both
going to use the same tree structure as VFSv0 quota format. So
split out tree handling into a separate file and make size of
leaf blocks, amount of space usable in each block (needed for
checksumming) and structures contained in them configurable
so that the code can be shared.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/Kconfig                  |   5 +
 fs/Makefile                 |   1 +
 fs/quota_tree.c             | 645 ++++++++++++++++++++++++++++++++++++++++++++
 fs/quota_tree.h             |  25 ++
 fs/quota_v2.c               | 596 ++++------------------------------------
 fs/quotaio_v2.h             |  33 +--
 include/linux/dqblk_qtree.h |  56 ++++
 include/linux/dqblk_v2.h    |  19 +-
 8 files changed, 799 insertions(+), 581 deletions(-)
 create mode 100644 fs/quota_tree.c
 create mode 100644 fs/quota_tree.h
 create mode 100644 include/linux/dqblk_qtree.h

(limited to 'include/linux')

diff --git a/fs/Kconfig b/fs/Kconfig
index b93425ad15de..c1ce3d8831d8 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -302,6 +302,10 @@ config PRINT_QUOTA_WARNING
 	  Note that this behavior is currently deprecated and may go away in
 	  future. Please use notification via netlink socket instead.
 
+# Generic support for tree structured quota files. Seleted when needed.
+config QUOTA_TREE
+	 tristate
+
 config QFMT_V1
 	tristate "Old quota format support"
 	depends on QUOTA
@@ -313,6 +317,7 @@ config QFMT_V1
 config QFMT_V2
 	tristate "Quota format v2 support"
 	depends on QUOTA
+	select QUOTA_TREE
 	help
 	  This quota format allows using quotas with 32-bit UIDs/GIDs. If you
 	  need this functionality say Y here.
diff --git a/fs/Makefile b/fs/Makefile
index e6f423d1d228..c830611550d3 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -54,6 +54,7 @@ obj-$(CONFIG_GENERIC_ACL)	+= generic_acl.o
 obj-$(CONFIG_QUOTA)		+= dquot.o
 obj-$(CONFIG_QFMT_V1)		+= quota_v1.o
 obj-$(CONFIG_QFMT_V2)		+= quota_v2.o
+obj-$(CONFIG_QUOTA_TREE)	+= quota_tree.o
 obj-$(CONFIG_QUOTACTL)		+= quota.o
 
 obj-$(CONFIG_PROC_FS)		+= proc/
diff --git a/fs/quota_tree.c b/fs/quota_tree.c
new file mode 100644
index 000000000000..953404c95b17
--- /dev/null
+++ b/fs/quota_tree.c
@@ -0,0 +1,645 @@
+/*
+ *	vfsv0 quota IO operations on file
+ */
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/dqblk_v2.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/quotaops.h>
+
+#include <asm/byteorder.h>
+
+#include "quota_tree.h"
+
+MODULE_AUTHOR("Jan Kara");
+MODULE_DESCRIPTION("Quota trie support");
+MODULE_LICENSE("GPL");
+
+#define __QUOTA_QT_PARANOIA
+
+typedef char *dqbuf_t;
+
+static int get_index(struct qtree_mem_dqinfo *info, qid_t id, int depth)
+{
+	unsigned int epb = info->dqi_usable_bs >> 2;
+
+	depth = info->dqi_qtree_depth - depth - 1;
+	while (depth--)
+		id /= epb;
+	return id % epb;
+}
+
+/* Number of entries in one blocks */
+static inline int qtree_dqstr_in_blk(struct qtree_mem_dqinfo *info)
+{
+	return (info->dqi_usable_bs - sizeof(struct qt_disk_dqdbheader))
+	       / info->dqi_entry_size;
+}
+
+static dqbuf_t getdqbuf(size_t size)
+{
+	dqbuf_t buf = kmalloc(size, GFP_NOFS);
+	if (!buf)
+		printk(KERN_WARNING "VFS: Not enough memory for quota buffers.\n");
+	return buf;
+}
+
+static inline void freedqbuf(dqbuf_t buf)
+{
+	kfree(buf);
+}
+
+static inline ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, dqbuf_t buf)
+{
+	struct super_block *sb = info->dqi_sb;
+
+	memset(buf, 0, info->dqi_usable_bs);
+	return sb->s_op->quota_read(sb, info->dqi_type, (char *)buf,
+	       info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
+}
+
+static inline ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, dqbuf_t buf)
+{
+	struct super_block *sb = info->dqi_sb;
+
+	return sb->s_op->quota_write(sb, info->dqi_type, (char *)buf,
+	       info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
+}
+
+/* Remove empty block from list and return it */
+static int get_free_dqblk(struct qtree_mem_dqinfo *info)
+{
+	dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+	struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
+	int ret, blk;
+
+	if (!buf)
+		return -ENOMEM;
+	if (info->dqi_free_blk) {
+		blk = info->dqi_free_blk;
+		ret = read_blk(info, blk, buf);
+		if (ret < 0)
+			goto out_buf;
+		info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
+	}
+	else {
+		memset(buf, 0, info->dqi_usable_bs);
+		/* Assure block allocation... */
+		ret = write_blk(info, info->dqi_blocks, buf);
+		if (ret < 0)
+			goto out_buf;
+		blk = info->dqi_blocks++;
+	}
+	mark_info_dirty(info->dqi_sb, info->dqi_type);
+	ret = blk;
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Insert empty block to the list */
+static int put_free_dqblk(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk)
+{
+	struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
+	int err;
+
+	dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
+	dh->dqdh_prev_free = cpu_to_le32(0);
+	dh->dqdh_entries = cpu_to_le16(0);
+	err = write_blk(info, blk, buf);
+	if (err < 0)
+		return err;
+	info->dqi_free_blk = blk;
+	mark_info_dirty(info->dqi_sb, info->dqi_type);
+	return 0;
+}
+
+/* Remove given block from the list of blocks with free entries */
+static int remove_free_dqentry(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk)
+{
+	dqbuf_t tmpbuf = getdqbuf(info->dqi_usable_bs);
+	struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
+	uint nextblk = le32_to_cpu(dh->dqdh_next_free);
+	uint prevblk = le32_to_cpu(dh->dqdh_prev_free);
+	int err;
+
+	if (!tmpbuf)
+		return -ENOMEM;
+	if (nextblk) {
+		err = read_blk(info, nextblk, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+		((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
+							dh->dqdh_prev_free;
+		err = write_blk(info, nextblk, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+	}
+	if (prevblk) {
+		err = read_blk(info, prevblk, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+		((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
+							dh->dqdh_next_free;
+		err = write_blk(info, prevblk, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+	} else {
+		info->dqi_free_entry = nextblk;
+		mark_info_dirty(info->dqi_sb, info->dqi_type);
+	}
+	freedqbuf(tmpbuf);
+	dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
+	/* No matter whether write succeeds block is out of list */
+	if (write_blk(info, blk, buf) < 0)
+		printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk);
+	return 0;
+out_buf:
+	freedqbuf(tmpbuf);
+	return err;
+}
+
+/* Insert given block to the beginning of list with free entries */
+static int insert_free_dqentry(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk)
+{
+	dqbuf_t tmpbuf = getdqbuf(info->dqi_usable_bs);
+	struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
+	int err;
+
+	if (!tmpbuf)
+		return -ENOMEM;
+	dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
+	dh->dqdh_prev_free = cpu_to_le32(0);
+	err = write_blk(info, blk, buf);
+	if (err < 0)
+		goto out_buf;
+	if (info->dqi_free_entry) {
+		err = read_blk(info, info->dqi_free_entry, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+		((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
+							cpu_to_le32(blk);
+		err = write_blk(info, info->dqi_free_entry, tmpbuf);
+		if (err < 0)
+			goto out_buf;
+	}
+	freedqbuf(tmpbuf);
+	info->dqi_free_entry = blk;
+	mark_info_dirty(info->dqi_sb, info->dqi_type);
+	return 0;
+out_buf:
+	freedqbuf(tmpbuf);
+	return err;
+}
+
+/* Is the entry in the block free? */
+int qtree_entry_unused(struct qtree_mem_dqinfo *info, char *disk)
+{
+	int i;
+
+	for (i = 0; i < info->dqi_entry_size; i++)
+		if (disk[i])
+			return 0;
+	return 1;
+}
+EXPORT_SYMBOL(qtree_entry_unused);
+
+/* Find space for dquot */
+static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
+			      struct dquot *dquot, int *err)
+{
+	uint blk, i;
+	struct qt_disk_dqdbheader *dh;
+	dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+	char *ddquot;
+
+	*err = 0;
+	if (!buf) {
+		*err = -ENOMEM;
+		return 0;
+	}
+	dh = (struct qt_disk_dqdbheader *)buf;
+	if (info->dqi_free_entry) {
+		blk = info->dqi_free_entry;
+		*err = read_blk(info, blk, buf);
+		if (*err < 0)
+			goto out_buf;
+	} else {
+		blk = get_free_dqblk(info);
+		if ((int)blk < 0) {
+			*err = blk;
+			freedqbuf(buf);
+			return 0;
+		}
+		memset(buf, 0, info->dqi_usable_bs);
+		/* This is enough as block is already zeroed and entry list is empty... */
+		info->dqi_free_entry = blk;
+		mark_info_dirty(dquot->dq_sb, dquot->dq_type);
+	}
+	/* Block will be full? */
+	if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) {
+		*err = remove_free_dqentry(info, buf, blk);
+		if (*err < 0) {
+			printk(KERN_ERR "VFS: find_free_dqentry(): Can't "
+			       "remove block (%u) from entry free list.\n",
+			       blk);
+			goto out_buf;
+		}
+	}
+	le16_add_cpu(&dh->dqdh_entries, 1);
+	/* Find free structure in block */
+	for (i = 0, ddquot = ((char *)buf) + sizeof(struct qt_disk_dqdbheader);
+	     i < qtree_dqstr_in_blk(info) && !qtree_entry_unused(info, ddquot);
+	     i++, ddquot += info->dqi_entry_size);
+#ifdef __QUOTA_QT_PARANOIA
+	if (i == qtree_dqstr_in_blk(info)) {
+		printk(KERN_ERR "VFS: find_free_dqentry(): Data block full "
+				"but it shouldn't.\n");
+		*err = -EIO;
+		goto out_buf;
+	}
+#endif
+	*err = write_blk(info, blk, buf);
+	if (*err < 0) {
+		printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota "
+				"data block %u.\n", blk);
+		goto out_buf;
+	}
+	dquot->dq_off = (blk << info->dqi_blocksize_bits) +
+			sizeof(struct qt_disk_dqdbheader) +
+			i * info->dqi_entry_size;
+	freedqbuf(buf);
+	return blk;
+out_buf:
+	freedqbuf(buf);
+	return 0;
+}
+
+/* Insert reference to structure into the trie */
+static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+			  uint *treeblk, int depth)
+{
+	dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+	int ret = 0, newson = 0, newact = 0;
+	__le32 *ref;
+	uint newblk;
+
+	if (!buf)
+		return -ENOMEM;
+	if (!*treeblk) {
+		ret = get_free_dqblk(info);
+		if (ret < 0)
+			goto out_buf;
+		*treeblk = ret;
+		memset(buf, 0, info->dqi_usable_bs);
+		newact = 1;
+	} else {
+		ret = read_blk(info, *treeblk, buf);
+		if (ret < 0) {
+			printk(KERN_ERR "VFS: Can't read tree quota block "
+					"%u.\n", *treeblk);
+			goto out_buf;
+		}
+	}
+	ref = (__le32 *)buf;
+	newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+	if (!newblk)
+		newson = 1;
+	if (depth == info->dqi_qtree_depth - 1) {
+#ifdef __QUOTA_QT_PARANOIA
+		if (newblk) {
+			printk(KERN_ERR "VFS: Inserting already present quota "
+					"entry (block %u).\n",
+			       le32_to_cpu(ref[get_index(info,
+						dquot->dq_id, depth)]));
+			ret = -EIO;
+			goto out_buf;
+		}
+#endif
+		newblk = find_free_dqentry(info, dquot, &ret);
+	} else {
+		ret = do_insert_tree(info, dquot, &newblk, depth+1);
+	}
+	if (newson && ret >= 0) {
+		ref[get_index(info, dquot->dq_id, depth)] =
+							cpu_to_le32(newblk);
+		ret = write_blk(info, *treeblk, buf);
+	} else if (newact && ret < 0) {
+		put_free_dqblk(info, buf, *treeblk);
+	}
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Wrapper for inserting quota structure into tree */
+static inline int dq_insert_tree(struct qtree_mem_dqinfo *info,
+				 struct dquot *dquot)
+{
+	int tmp = QT_TREEOFF;
+	return do_insert_tree(info, dquot, &tmp, 0);
+}
+
+/*
+ *	We don't have to be afraid of deadlocks as we never have quotas on quota files...
+ */
+int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+{
+	int type = dquot->dq_type;
+	struct super_block *sb = dquot->dq_sb;
+	ssize_t ret;
+	dqbuf_t ddquot = getdqbuf(info->dqi_entry_size);
+
+	if (!ddquot)
+		return -ENOMEM;
+
+	/* dq_off is guarded by dqio_mutex */
+	if (!dquot->dq_off) {
+		ret = dq_insert_tree(info, dquot);
+		if (ret < 0) {
+			printk(KERN_ERR "VFS: Error %zd occurred while "
+					"creating quota.\n", ret);
+			freedqbuf(ddquot);
+			return ret;
+		}
+	}
+	spin_lock(&dq_data_lock);
+	info->dqi_ops->mem2disk_dqblk(ddquot, dquot);
+	spin_unlock(&dq_data_lock);
+	ret = sb->s_op->quota_write(sb, type, (char *)ddquot,
+					info->dqi_entry_size, dquot->dq_off);
+	if (ret != info->dqi_entry_size) {
+		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
+		       sb->s_id);
+		if (ret >= 0)
+			ret = -ENOSPC;
+	} else {
+		ret = 0;
+	}
+	dqstats.writes++;
+	freedqbuf(ddquot);
+
+	return ret;
+}
+EXPORT_SYMBOL(qtree_write_dquot);
+
+/* Free dquot entry in data block */
+static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+			uint blk)
+{
+	struct qt_disk_dqdbheader *dh;
+	dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+	int ret = 0;
+
+	if (!buf)
+		return -ENOMEM;
+	if (dquot->dq_off >> info->dqi_blocksize_bits != blk) {
+		printk(KERN_ERR "VFS: Quota structure has offset to other "
+		  "block (%u) than it should (%u).\n", blk,
+		  (uint)(dquot->dq_off >> info->dqi_blocksize_bits));
+		goto out_buf;
+	}
+	ret = read_blk(info, blk, buf);
+	if (ret < 0) {
+		printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
+		goto out_buf;
+	}
+	dh = (struct qt_disk_dqdbheader *)buf;
+	le16_add_cpu(&dh->dqdh_entries, -1);
+	if (!le16_to_cpu(dh->dqdh_entries)) {	/* Block got free? */
+		ret = remove_free_dqentry(info, buf, blk);
+		if (ret >= 0)
+			ret = put_free_dqblk(info, buf, blk);
+		if (ret < 0) {
+			printk(KERN_ERR "VFS: Can't move quota data block (%u) "
+			  "to free list.\n", blk);
+			goto out_buf;
+		}
+	} else {
+		memset(buf +
+		       (dquot->dq_off & ((1 << info->dqi_blocksize_bits) - 1)),
+		       0, info->dqi_entry_size);
+		if (le16_to_cpu(dh->dqdh_entries) ==
+		    qtree_dqstr_in_blk(info) - 1) {
+			/* Insert will write block itself */
+			ret = insert_free_dqentry(info, buf, blk);
+			if (ret < 0) {
+				printk(KERN_ERR "VFS: Can't insert quota data "
+				       "block (%u) to free entry list.\n", blk);
+				goto out_buf;
+			}
+		} else {
+			ret = write_blk(info, blk, buf);
+			if (ret < 0) {
+				printk(KERN_ERR "VFS: Can't write quota data "
+				  "block %u\n", blk);
+				goto out_buf;
+			}
+		}
+	}
+	dquot->dq_off = 0;	/* Quota is now unattached */
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Remove reference to dquot from tree */
+static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+		       uint *blk, int depth)
+{
+	dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+	int ret = 0;
+	uint newblk;
+	__le32 *ref = (__le32 *)buf;
+
+	if (!buf)
+		return -ENOMEM;
+	ret = read_blk(info, *blk, buf);
+	if (ret < 0) {
+		printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
+		goto out_buf;
+	}
+	newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+	if (depth == info->dqi_qtree_depth - 1) {
+		ret = free_dqentry(info, dquot, newblk);
+		newblk = 0;
+	} else {
+		ret = remove_tree(info, dquot, &newblk, depth+1);
+	}
+	if (ret >= 0 && !newblk) {
+		int i;
+		ref[get_index(info, dquot->dq_id, depth)] = cpu_to_le32(0);
+		/* Block got empty? */
+		for (i = 0;
+		     i < (info->dqi_usable_bs >> 2) && !ref[i];
+		     i++);
+		/* Don't put the root block into the free block list */
+		if (i == (info->dqi_usable_bs >> 2)
+		    && *blk != QT_TREEOFF) {
+			put_free_dqblk(info, buf, *blk);
+			*blk = 0;
+		} else {
+			ret = write_blk(info, *blk, buf);
+			if (ret < 0)
+				printk(KERN_ERR "VFS: Can't write quota tree "
+				  "block %u.\n", *blk);
+		}
+	}
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Delete dquot from tree */
+int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+{
+	uint tmp = QT_TREEOFF;
+
+	if (!dquot->dq_off)	/* Even not allocated? */
+		return 0;
+	return remove_tree(info, dquot, &tmp, 0);
+}
+EXPORT_SYMBOL(qtree_delete_dquot);
+
+/* Find entry in block */
+static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
+				 struct dquot *dquot, uint blk)
+{
+	dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+	loff_t ret = 0;
+	int i;
+	char *ddquot;
+
+	if (!buf)
+		return -ENOMEM;
+	ret = read_blk(info, blk, buf);
+	if (ret < 0) {
+		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
+		goto out_buf;
+	}
+	for (i = 0, ddquot = ((char *)buf) + sizeof(struct qt_disk_dqdbheader);
+	     i < qtree_dqstr_in_blk(info) && !info->dqi_ops->is_id(ddquot, dquot);
+	     i++, ddquot += info->dqi_entry_size);
+	if (i == qtree_dqstr_in_blk(info)) {
+		printk(KERN_ERR "VFS: Quota for id %u referenced "
+		  "but not present.\n", dquot->dq_id);
+		ret = -EIO;
+		goto out_buf;
+	} else {
+		ret = (blk << info->dqi_blocksize_bits) + sizeof(struct
+		  qt_disk_dqdbheader) + i * info->dqi_entry_size;
+	}
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Find entry for given id in the tree */
+static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
+				struct dquot *dquot, uint blk, int depth)
+{
+	dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+	loff_t ret = 0;
+	__le32 *ref = (__le32 *)buf;
+
+	if (!buf)
+		return -ENOMEM;
+	ret = read_blk(info, blk, buf);
+	if (ret < 0) {
+		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
+		goto out_buf;
+	}
+	ret = 0;
+	blk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+	if (!blk)	/* No reference? */
+		goto out_buf;
+	if (depth < info->dqi_qtree_depth - 1)
+		ret = find_tree_dqentry(info, dquot, blk, depth+1);
+	else
+		ret = find_block_dqentry(info, dquot, blk);
+out_buf:
+	freedqbuf(buf);
+	return ret;
+}
+
+/* Find entry for given id in the tree - wrapper function */
+static inline loff_t find_dqentry(struct qtree_mem_dqinfo *info,
+				  struct dquot *dquot)
+{
+	return find_tree_dqentry(info, dquot, QT_TREEOFF, 0);
+}
+
+int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+{
+	int type = dquot->dq_type;
+	struct super_block *sb = dquot->dq_sb;
+	loff_t offset;
+	dqbuf_t ddquot;
+	int ret = 0;
+
+#ifdef __QUOTA_QT_PARANOIA
+	/* Invalidated quota? */
+	if (!sb_dqopt(dquot->dq_sb)->files[type]) {
+		printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
+		return -EIO;
+	}
+#endif
+	/* Do we know offset of the dquot entry in the quota file? */
+	if (!dquot->dq_off) {
+		offset = find_dqentry(info, dquot);
+		if (offset <= 0) {	/* Entry not present? */
+			if (offset < 0)
+				printk(KERN_ERR "VFS: Can't read quota "
+				  "structure for id %u.\n", dquot->dq_id);
+			dquot->dq_off = 0;
+			set_bit(DQ_FAKE_B, &dquot->dq_flags);
+			memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
+			ret = offset;
+			goto out;
+		}
+		dquot->dq_off = offset;
+	}
+	ddquot = getdqbuf(info->dqi_entry_size);
+	if (!ddquot)
+		return -ENOMEM;
+	ret = sb->s_op->quota_read(sb, type, (char *)ddquot,
+				   info->dqi_entry_size, dquot->dq_off);
+	if (ret != info->dqi_entry_size) {
+		if (ret >= 0)
+			ret = -EIO;
+		printk(KERN_ERR "VFS: Error while reading quota "
+				"structure for id %u.\n", dquot->dq_id);
+		set_bit(DQ_FAKE_B, &dquot->dq_flags);
+		memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
+		freedqbuf(ddquot);
+		goto out;
+	}
+	spin_lock(&dq_data_lock);
+	info->dqi_ops->disk2mem_dqblk(dquot, ddquot);
+	if (!dquot->dq_dqb.dqb_bhardlimit &&
+	    !dquot->dq_dqb.dqb_bsoftlimit &&
+	    !dquot->dq_dqb.dqb_ihardlimit &&
+	    !dquot->dq_dqb.dqb_isoftlimit)
+		set_bit(DQ_FAKE_B, &dquot->dq_flags);
+	spin_unlock(&dq_data_lock);
+	freedqbuf(ddquot);
+out:
+	dqstats.reads++;
+	return ret;
+}
+EXPORT_SYMBOL(qtree_read_dquot);
+
+/* Check whether dquot should not be deleted. We know we are
+ * the only one operating on dquot (thanks to dq_lock) */
+int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+{
+	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace))
+		return qtree_delete_dquot(info, dquot);
+	return 0;
+}
+EXPORT_SYMBOL(qtree_release_dquot);
diff --git a/fs/quota_tree.h b/fs/quota_tree.h
new file mode 100644
index 000000000000..a1ab8db81a51
--- /dev/null
+++ b/fs/quota_tree.h
@@ -0,0 +1,25 @@
+/*
+ *	Definitions of structures for vfsv0 quota format
+ */
+
+#ifndef _LINUX_QUOTA_TREE_H
+#define _LINUX_QUOTA_TREE_H
+
+#include <linux/types.h>
+#include <linux/quota.h>
+
+/*
+ *  Structure of header of block with quota structures. It is padded to 16 bytes so
+ *  there will be space for exactly 21 quota-entries in a block
+ */
+struct qt_disk_dqdbheader {
+	__le32 dqdh_next_free;	/* Number of next block with free entry */
+	__le32 dqdh_prev_free;	/* Number of previous block with free entry */
+	__le16 dqdh_entries;	/* Number of valid entries in block */
+	__le16 dqdh_pad1;
+	__le32 dqdh_pad2;
+};
+
+#define QT_TREEOFF	1		/* Offset of tree in file in blocks */
+
+#endif /* _LINUX_QUOTAIO_TREE_H */
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index a21d1a7c356a..a87f1028a425 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -14,6 +14,7 @@
 
 #include <asm/byteorder.h>
 
+#include "quota_tree.h"
 #include "quotaio_v2.h"
 
 MODULE_AUTHOR("Jan Kara");
@@ -22,10 +23,15 @@ MODULE_LICENSE("GPL");
 
 #define __QUOTA_V2_PARANOIA
 
-typedef char *dqbuf_t;
+static void v2_mem2diskdqb(void *dp, struct dquot *dquot);
+static void v2_disk2memdqb(struct dquot *dquot, void *dp);
+static int v2_is_id(void *dp, struct dquot *dquot);
 
-#define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
-#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
+static struct qtree_fmt_operations v2_qtree_ops = {
+	.mem2disk_dqblk = v2_mem2diskdqb,
+	.disk2mem_dqblk = v2_disk2memdqb,
+	.is_id = v2_is_id,
+};
 
 #define QUOTABLOCK_BITS 10
 #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
@@ -64,7 +70,7 @@ static int v2_check_quota_file(struct super_block *sb, int type)
 static int v2_read_file_info(struct super_block *sb, int type)
 {
 	struct v2_disk_dqinfo dinfo;
-	struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
+	struct mem_dqinfo *info = sb_dqinfo(sb, type);
 	ssize_t size;
 
 	size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
@@ -80,9 +86,16 @@ static int v2_read_file_info(struct super_block *sb, int type)
 	info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
 	info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
 	info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
-	info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
-	info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
-	info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+	info->u.v2_i.i.dqi_sb = sb;
+	info->u.v2_i.i.dqi_type = type;
+	info->u.v2_i.i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
+	info->u.v2_i.i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
+	info->u.v2_i.i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+	info->u.v2_i.i.dqi_blocksize_bits = V2_DQBLKSIZE_BITS;
+	info->u.v2_i.i.dqi_usable_bs = 1 << V2_DQBLKSIZE_BITS;
+	info->u.v2_i.i.dqi_qtree_depth = qtree_depth(&info->u.v2_i.i);
+	info->u.v2_i.i.dqi_entry_size = sizeof(struct v2_disk_dqblk);
+	info->u.v2_i.i.dqi_ops = &v2_qtree_ops;
 	return 0;
 }
 
@@ -90,7 +103,7 @@ static int v2_read_file_info(struct super_block *sb, int type)
 static int v2_write_file_info(struct super_block *sb, int type)
 {
 	struct v2_disk_dqinfo dinfo;
-	struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
+	struct mem_dqinfo *info = sb_dqinfo(sb, type);
 	ssize_t size;
 
 	spin_lock(&dq_data_lock);
@@ -99,9 +112,9 @@ static int v2_write_file_info(struct super_block *sb, int type)
 	dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
 	dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
 	spin_unlock(&dq_data_lock);
-	dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.dqi_blocks);
-	dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.dqi_free_blk);
-	dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.dqi_free_entry);
+	dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.i.dqi_blocks);
+	dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.i.dqi_free_blk);
+	dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.i.dqi_free_entry);
 	size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
 	       sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
 	if (size != sizeof(struct v2_disk_dqinfo)) {
@@ -112,8 +125,11 @@ static int v2_write_file_info(struct super_block *sb, int type)
 	return 0;
 }
 
-static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
+static void v2_disk2memdqb(struct dquot *dquot, void *dp)
 {
+	struct v2_disk_dqblk *d = dp, empty;
+	struct mem_dqblk *m = &dquot->dq_dqb;
+
 	m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
 	m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
 	m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
@@ -122,10 +138,20 @@ static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
 	m->dqb_bsoftlimit = v2_qbtos(le32_to_cpu(d->dqb_bsoftlimit));
 	m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
 	m->dqb_btime = le64_to_cpu(d->dqb_btime);
+	/* We need to escape back all-zero structure */
+	memset(&empty, 0, sizeof(struct v2_disk_dqblk));
+	empty.dqb_itime = cpu_to_le64(1);
+	if (!memcmp(&empty, dp, sizeof(struct v2_disk_dqblk)))
+		m->dqb_itime = 0;
 }
 
-static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
+static void v2_mem2diskdqb(void *dp, struct dquot *dquot)
 {
+	struct v2_disk_dqblk *d = dp;
+	struct mem_dqblk *m = &dquot->dq_dqb;
+	struct qtree_mem_dqinfo *info =
+			&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i;
+
 	d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
 	d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
 	d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
@@ -134,553 +160,35 @@ static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
 	d->dqb_bsoftlimit = cpu_to_le32(v2_stoqb(m->dqb_bsoftlimit));
 	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
 	d->dqb_btime = cpu_to_le64(m->dqb_btime);
-	d->dqb_id = cpu_to_le32(id);
-}
-
-static dqbuf_t getdqbuf(void)
-{
-	dqbuf_t buf = kmalloc(V2_DQBLKSIZE, GFP_NOFS);
-	if (!buf)
-		printk(KERN_WARNING "VFS: Not enough memory for quota buffers.\n");
-	return buf;
-}
-
-static inline void freedqbuf(dqbuf_t buf)
-{
-	kfree(buf);
-}
-
-static inline ssize_t read_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
-{
-	memset(buf, 0, V2_DQBLKSIZE);
-	return sb->s_op->quota_read(sb, type, (char *)buf,
-	       V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
-}
-
-static inline ssize_t write_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
-{
-	return sb->s_op->quota_write(sb, type, (char *)buf,
-	       V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
-}
-
-/* Remove empty block from list and return it */
-static int get_free_dqblk(struct super_block *sb, int type)
-{
-	dqbuf_t buf = getdqbuf();
-	struct mem_dqinfo *info = sb_dqinfo(sb, type);
-	struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
-	int ret, blk;
-
-	if (!buf)
-		return -ENOMEM;
-	if (info->u.v2_i.dqi_free_blk) {
-		blk = info->u.v2_i.dqi_free_blk;
-		if ((ret = read_blk(sb, type, blk, buf)) < 0)
-			goto out_buf;
-		info->u.v2_i.dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
-	}
-	else {
-		memset(buf, 0, V2_DQBLKSIZE);
-		/* Assure block allocation... */
-		if ((ret = write_blk(sb, type, info->u.v2_i.dqi_blocks, buf)) < 0)
-			goto out_buf;
-		blk = info->u.v2_i.dqi_blocks++;
-	}
-	mark_info_dirty(sb, type);
-	ret = blk;
-out_buf:
-	freedqbuf(buf);
-	return ret;
-}
-
-/* Insert empty block to the list */
-static int put_free_dqblk(struct super_block *sb, int type, dqbuf_t buf, uint blk)
-{
-	struct mem_dqinfo *info = sb_dqinfo(sb, type);
-	struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
-	int err;
-
-	dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_blk);
-	dh->dqdh_prev_free = cpu_to_le32(0);
-	dh->dqdh_entries = cpu_to_le16(0);
-	info->u.v2_i.dqi_free_blk = blk;
-	mark_info_dirty(sb, type);
-	/* Some strange block. We had better leave it... */
-	if ((err = write_blk(sb, type, blk, buf)) < 0)
-		return err;
-	return 0;
+	d->dqb_id = cpu_to_le32(dquot->dq_id);
+	if (qtree_entry_unused(info, dp))
+		d->dqb_itime = cpu_to_le64(1);
 }
 
-/* Remove given block from the list of blocks with free entries */
-static int remove_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
+static int v2_is_id(void *dp, struct dquot *dquot)
 {
-	dqbuf_t tmpbuf = getdqbuf();
-	struct mem_dqinfo *info = sb_dqinfo(sb, type);
-	struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
-	uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk = le32_to_cpu(dh->dqdh_prev_free);
-	int err;
+	struct v2_disk_dqblk *d = dp;
+	struct qtree_mem_dqinfo *info =
+			&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i;
 
-	if (!tmpbuf)
-		return -ENOMEM;
-	if (nextblk) {
-		if ((err = read_blk(sb, type, nextblk, tmpbuf)) < 0)
-			goto out_buf;
-		((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = dh->dqdh_prev_free;
-		if ((err = write_blk(sb, type, nextblk, tmpbuf)) < 0)
-			goto out_buf;
-	}
-	if (prevblk) {
-		if ((err = read_blk(sb, type, prevblk, tmpbuf)) < 0)
-			goto out_buf;
-		((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_next_free = dh->dqdh_next_free;
-		if ((err = write_blk(sb, type, prevblk, tmpbuf)) < 0)
-			goto out_buf;
-	}
-	else {
-		info->u.v2_i.dqi_free_entry = nextblk;
-		mark_info_dirty(sb, type);
-	}
-	freedqbuf(tmpbuf);
-	dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
-	/* No matter whether write succeeds block is out of list */
-	if (write_blk(sb, type, blk, buf) < 0)
-		printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk);
-	return 0;
-out_buf:
-	freedqbuf(tmpbuf);
-	return err;
-}
-
-/* Insert given block to the beginning of list with free entries */
-static int insert_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
-{
-	dqbuf_t tmpbuf = getdqbuf();
-	struct mem_dqinfo *info = sb_dqinfo(sb, type);
-	struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
-	int err;
-
-	if (!tmpbuf)
-		return -ENOMEM;
-	dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_entry);
-	dh->dqdh_prev_free = cpu_to_le32(0);
-	if ((err = write_blk(sb, type, blk, buf)) < 0)
-		goto out_buf;
-	if (info->u.v2_i.dqi_free_entry) {
-		if ((err = read_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
-			goto out_buf;
-		((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = cpu_to_le32(blk);
-		if ((err = write_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
-			goto out_buf;
-	}
-	freedqbuf(tmpbuf);
-	info->u.v2_i.dqi_free_entry = blk;
-	mark_info_dirty(sb, type);
-	return 0;
-out_buf:
-	freedqbuf(tmpbuf);
-	return err;
-}
-
-/* Find space for dquot */
-static uint find_free_dqentry(struct dquot *dquot, int *err)
-{
-	struct super_block *sb = dquot->dq_sb;
-	struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
-	uint blk, i;
-	struct v2_disk_dqdbheader *dh;
-	struct v2_disk_dqblk *ddquot;
-	struct v2_disk_dqblk fakedquot;
-	dqbuf_t buf;
-
-	*err = 0;
-	if (!(buf = getdqbuf())) {
-		*err = -ENOMEM;
+	if (qtree_entry_unused(info, dp))
 		return 0;
-	}
-	dh = (struct v2_disk_dqdbheader *)buf;
-	ddquot = GETENTRIES(buf);
-	if (info->u.v2_i.dqi_free_entry) {
-		blk = info->u.v2_i.dqi_free_entry;
-		if ((*err = read_blk(sb, dquot->dq_type, blk, buf)) < 0)
-			goto out_buf;
-	}
-	else {
-		blk = get_free_dqblk(sb, dquot->dq_type);
-		if ((int)blk < 0) {
-			*err = blk;
-			freedqbuf(buf);
-			return 0;
-		}
-		memset(buf, 0, V2_DQBLKSIZE);
-		/* This is enough as block is already zeroed and entry list is empty... */
-		info->u.v2_i.dqi_free_entry = blk;
-		mark_info_dirty(sb, dquot->dq_type);
-	}
-	if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK)	/* Block will be full? */
-		if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
-			printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
-			goto out_buf;
-		}
-	le16_add_cpu(&dh->dqdh_entries, 1);
-	memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
-	/* Find free structure in block */
-	for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
-#ifdef __QUOTA_V2_PARANOIA
-	if (i == V2_DQSTRINBLK) {
-		printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
-		*err = -EIO;
-		goto out_buf;
-	}
-#endif
-	if ((*err = write_blk(sb, dquot->dq_type, blk, buf)) < 0) {
-		printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
-		goto out_buf;
-	}
-	dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
-	freedqbuf(buf);
-	return blk;
-out_buf:
-	freedqbuf(buf);
-	return 0;
+	return le32_to_cpu(d->dqb_id) == dquot->dq_id;
 }
 
-/* Insert reference to structure into the trie */
-static int do_insert_tree(struct dquot *dquot, uint *treeblk, int depth)
-{
-	struct super_block *sb = dquot->dq_sb;
-	dqbuf_t buf;
-	int ret = 0, newson = 0, newact = 0;
-	__le32 *ref;
-	uint newblk;
-
-	if (!(buf = getdqbuf()))
-		return -ENOMEM;
-	if (!*treeblk) {
-		ret = get_free_dqblk(sb, dquot->dq_type);
-		if (ret < 0)
-			goto out_buf;
-		*treeblk = ret;
-		memset(buf, 0, V2_DQBLKSIZE);
-		newact = 1;
-	}
-	else {
-		if ((ret = read_blk(sb, dquot->dq_type, *treeblk, buf)) < 0) {
-			printk(KERN_ERR "VFS: Can't read tree quota block %u.\n", *treeblk);
-			goto out_buf;
-		}
-	}
-	ref = (__le32 *)buf;
-	newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
-	if (!newblk)
-		newson = 1;
-	if (depth == V2_DQTREEDEPTH-1) {
-#ifdef __QUOTA_V2_PARANOIA
-		if (newblk) {
-			printk(KERN_ERR "VFS: Inserting already present quota entry (block %u).\n", le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]));
-			ret = -EIO;
-			goto out_buf;
-		}
-#endif
-		newblk = find_free_dqentry(dquot, &ret);
-	}
-	else
-		ret = do_insert_tree(dquot, &newblk, depth+1);
-	if (newson && ret >= 0) {
-		ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
-		ret = write_blk(sb, dquot->dq_type, *treeblk, buf);
-	}
-	else if (newact && ret < 0)
-		put_free_dqblk(sb, dquot->dq_type, buf, *treeblk);
-out_buf:
-	freedqbuf(buf);
-	return ret;
-}
-
-/* Wrapper for inserting quota structure into tree */
-static inline int dq_insert_tree(struct dquot *dquot)
+static int v2_read_dquot(struct dquot *dquot)
 {
-	int tmp = V2_DQTREEOFF;
-	return do_insert_tree(dquot, &tmp, 0);
+	return qtree_read_dquot(&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i, dquot);
 }
 
-/*
- *	We don't have to be afraid of deadlocks as we never have quotas on quota files...
- */
 static int v2_write_dquot(struct dquot *dquot)
 {
-	int type = dquot->dq_type;
-	ssize_t ret;
-	struct v2_disk_dqblk ddquot, empty;
-
-	/* dq_off is guarded by dqio_mutex */
-	if (!dquot->dq_off)
-		if ((ret = dq_insert_tree(dquot)) < 0) {
-			printk(KERN_ERR "VFS: Error %zd occurred while creating quota.\n", ret);
-			return ret;
-		}
-	spin_lock(&dq_data_lock);
-	mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
-	/* Argh... We may need to write structure full of zeroes but that would be
-	 * treated as an empty place by the rest of the code. Format change would
-	 * be definitely cleaner but the problems probably are not worth it */
-	memset(&empty, 0, sizeof(struct v2_disk_dqblk));
-	if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
-		ddquot.dqb_itime = cpu_to_le64(1);
-	spin_unlock(&dq_data_lock);
-	ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
-	      (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
-	if (ret != sizeof(struct v2_disk_dqblk)) {
-		printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
-		if (ret >= 0)
-			ret = -ENOSPC;
-	}
-	else
-		ret = 0;
-	dqstats.writes++;
-
-	return ret;
+	return qtree_write_dquot(&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i, dquot);
 }
 
-/* Free dquot entry in data block */
-static int free_dqentry(struct dquot *dquot, uint blk)
-{
-	struct super_block *sb = dquot->dq_sb;
-	int type = dquot->dq_type;
-	struct v2_disk_dqdbheader *dh;
-	dqbuf_t buf = getdqbuf();
-	int ret = 0;
-
-	if (!buf)
-		return -ENOMEM;
-	if (dquot->dq_off >> V2_DQBLKSIZE_BITS != blk) {
-		printk(KERN_ERR "VFS: Quota structure has offset to other "
-		  "block (%u) than it should (%u).\n", blk,
-		  (uint)(dquot->dq_off >> V2_DQBLKSIZE_BITS));
-		goto out_buf;
-	}
-	if ((ret = read_blk(sb, type, blk, buf)) < 0) {
-		printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
-		goto out_buf;
-	}
-	dh = (struct v2_disk_dqdbheader *)buf;
-	le16_add_cpu(&dh->dqdh_entries, -1);
-	if (!le16_to_cpu(dh->dqdh_entries)) {	/* Block got free? */
-		if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 ||
-		    (ret = put_free_dqblk(sb, type, buf, blk)) < 0) {
-			printk(KERN_ERR "VFS: Can't move quota data block (%u) "
-			  "to free list.\n", blk);
-			goto out_buf;
-		}
-	}
-	else {
-		memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
-		  sizeof(struct v2_disk_dqblk));
-		if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
-			/* Insert will write block itself */
-			if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
-				printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
-				goto out_buf;
-			}
-		}
-		else
-			if ((ret = write_blk(sb, type, blk, buf)) < 0) {
-				printk(KERN_ERR "VFS: Can't write quota data "
-				  "block %u\n", blk);
-				goto out_buf;
-			}
-	}
-	dquot->dq_off = 0;	/* Quota is now unattached */
-out_buf:
-	freedqbuf(buf);
-	return ret;
-}
-
-/* Remove reference to dquot from tree */
-static int remove_tree(struct dquot *dquot, uint *blk, int depth)
-{
-	struct super_block *sb = dquot->dq_sb;
-	int type = dquot->dq_type;
-	dqbuf_t buf = getdqbuf();
-	int ret = 0;
-	uint newblk;
-	__le32 *ref = (__le32 *)buf;
-	
-	if (!buf)
-		return -ENOMEM;
-	if ((ret = read_blk(sb, type, *blk, buf)) < 0) {
-		printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
-		goto out_buf;
-	}
-	newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
-	if (depth == V2_DQTREEDEPTH-1) {
-		ret = free_dqentry(dquot, newblk);
-		newblk = 0;
-	}
-	else
-		ret = remove_tree(dquot, &newblk, depth+1);
-	if (ret >= 0 && !newblk) {
-		int i;
-		ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
-		for (i = 0; i < V2_DQBLKSIZE && !buf[i]; i++);	/* Block got empty? */
-		/* Don't put the root block into the free block list */
-		if (i == V2_DQBLKSIZE && *blk != V2_DQTREEOFF) {
-			put_free_dqblk(sb, type, buf, *blk);
-			*blk = 0;
-		}
-		else
-			if ((ret = write_blk(sb, type, *blk, buf)) < 0)
-				printk(KERN_ERR "VFS: Can't write quota tree "
-				  "block %u.\n", *blk);
-	}
-out_buf:
-	freedqbuf(buf);
-	return ret;	
-}
-
-/* Delete dquot from tree */
-static int v2_delete_dquot(struct dquot *dquot)
-{
-	uint tmp = V2_DQTREEOFF;
-
-	if (!dquot->dq_off)	/* Even not allocated? */
-		return 0;
-	return remove_tree(dquot, &tmp, 0);
-}
-
-/* Find entry in block */
-static loff_t find_block_dqentry(struct dquot *dquot, uint blk)
-{
-	dqbuf_t buf = getdqbuf();
-	loff_t ret = 0;
-	int i;
-	struct v2_disk_dqblk *ddquot = GETENTRIES(buf);
-
-	if (!buf)
-		return -ENOMEM;
-	if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
-		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
-		goto out_buf;
-	}
-	if (dquot->dq_id)
-		for (i = 0; i < V2_DQSTRINBLK &&
-		     le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
-	else {	/* ID 0 as a bit more complicated searching... */
-		struct v2_disk_dqblk fakedquot;
-
-		memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
-		for (i = 0; i < V2_DQSTRINBLK; i++)
-			if (!le32_to_cpu(ddquot[i].dqb_id) &&
-			    memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
-				break;
-	}
-	if (i == V2_DQSTRINBLK) {
-		printk(KERN_ERR "VFS: Quota for id %u referenced "
-		  "but not present.\n", dquot->dq_id);
-		ret = -EIO;
-		goto out_buf;
-	}
-	else
-		ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
-		  v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
-out_buf:
-	freedqbuf(buf);
-	return ret;
-}
-
-/* Find entry for given id in the tree */
-static loff_t find_tree_dqentry(struct dquot *dquot, uint blk, int depth)
-{
-	dqbuf_t buf = getdqbuf();
-	loff_t ret = 0;
-	__le32 *ref = (__le32 *)buf;
-
-	if (!buf)
-		return -ENOMEM;
-	if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
-		printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
-		goto out_buf;
-	}
-	ret = 0;
-	blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
-	if (!blk)	/* No reference? */
-		goto out_buf;
-	if (depth < V2_DQTREEDEPTH-1)
-		ret = find_tree_dqentry(dquot, blk, depth+1);
-	else
-		ret = find_block_dqentry(dquot, blk);
-out_buf:
-	freedqbuf(buf);
-	return ret;
-}
-
-/* Find entry for given id in the tree - wrapper function */
-static inline loff_t find_dqentry(struct dquot *dquot)
-{
-	return find_tree_dqentry(dquot, V2_DQTREEOFF, 0);
-}
-
-static int v2_read_dquot(struct dquot *dquot)
-{
-	int type = dquot->dq_type;
-	loff_t offset;
-	struct v2_disk_dqblk ddquot, empty;
-	int ret = 0;
-
-#ifdef __QUOTA_V2_PARANOIA
-	/* Invalidated quota? */
-	if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) {
-		printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
-		return -EIO;
-	}
-#endif
-	offset = find_dqentry(dquot);
-	if (offset <= 0) {	/* Entry not present? */
-		if (offset < 0)
-			printk(KERN_ERR "VFS: Can't read quota "
-			  "structure for id %u.\n", dquot->dq_id);
-		dquot->dq_off = 0;
-		set_bit(DQ_FAKE_B, &dquot->dq_flags);
-		memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
-		ret = offset;
-	}
-	else {
-		dquot->dq_off = offset;
-		if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
-		    (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
-		    != sizeof(struct v2_disk_dqblk)) {
-			if (ret >= 0)
-				ret = -EIO;
-			printk(KERN_ERR "VFS: Error while reading quota "
-			  "structure for id %u.\n", dquot->dq_id);
-			memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
-		}
-		else {
-			ret = 0;
-			/* We need to escape back all-zero structure */
-			memset(&empty, 0, sizeof(struct v2_disk_dqblk));
-			empty.dqb_itime = cpu_to_le64(1);
-			if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
-				ddquot.dqb_itime = 0;
-		}
-		disk2memdqb(&dquot->dq_dqb, &ddquot);
-		if (!dquot->dq_dqb.dqb_bhardlimit &&
-			!dquot->dq_dqb.dqb_bsoftlimit &&
-			!dquot->dq_dqb.dqb_ihardlimit &&
-			!dquot->dq_dqb.dqb_isoftlimit)
-			set_bit(DQ_FAKE_B, &dquot->dq_flags);
-	}
-	dqstats.reads++;
-
-	return ret;
-}
-
-/* Check whether dquot should not be deleted. We know we are
- * the only one operating on dquot (thanks to dq_lock) */
 static int v2_release_dquot(struct dquot *dquot)
 {
-	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace))
-		return v2_delete_dquot(dquot);
-	return 0;
+	return qtree_release_dquot(&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i, dquot);
 }
 
 static struct quota_format_ops v2_format_ops = {
diff --git a/fs/quotaio_v2.h b/fs/quotaio_v2.h
index 303d7cbe30d4..530fe580685c 100644
--- a/fs/quotaio_v2.h
+++ b/fs/quotaio_v2.h
@@ -21,6 +21,12 @@
 	0		/* GRPQUOTA */\
 }
 
+/* First generic header */
+struct v2_disk_dqheader {
+	__le32 dqh_magic;	/* Magic number identifying file */
+	__le32 dqh_version;	/* File version */
+};
+
 /*
  * The following structure defines the format of the disk quota file
  * (as it appears on disk) - the file is a radix tree whose leaves point
@@ -38,15 +44,6 @@ struct v2_disk_dqblk {
 	__le64 dqb_itime;	/* time limit for excessive inode use */
 };
 
-/*
- * Here are header structures as written on disk and their in-memory copies
- */
-/* First generic header */
-struct v2_disk_dqheader {
-	__le32 dqh_magic;	/* Magic number identifying file */
-	__le32 dqh_version;	/* File version */
-};
-
 /* Header with type and version specific information */
 struct v2_disk_dqinfo {
 	__le32 dqi_bgrace;	/* Time before block soft limit becomes hard limit */
@@ -57,23 +54,7 @@ struct v2_disk_dqinfo {
 	__le32 dqi_free_entry;	/* Number of block with at least one free entry */
 };
 
-/*
- *  Structure of header of block with quota structures. It is padded to 16 bytes so
- *  there will be space for exactly 21 quota-entries in a block
- */
-struct v2_disk_dqdbheader {
-	__le32 dqdh_next_free;	/* Number of next block with free entry */
-	__le32 dqdh_prev_free;	/* Number of previous block with free entry */
-	__le16 dqdh_entries;	/* Number of valid entries in block */
-	__le16 dqdh_pad1;
-	__le32 dqdh_pad2;
-};
-
 #define V2_DQINFOOFF	sizeof(struct v2_disk_dqheader)	/* Offset of info header in file */
-#define V2_DQBLKSIZE_BITS	10
-#define V2_DQBLKSIZE	(1 << V2_DQBLKSIZE_BITS)	/* Size of block with quota structures */
-#define V2_DQTREEOFF	1		/* Offset of tree in file in blocks */
-#define V2_DQTREEDEPTH	4		/* Depth of quota tree */
-#define V2_DQSTRINBLK	((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk))	/* Number of entries in one blocks */
+#define V2_DQBLKSIZE_BITS 10				/* Size of leaf block in tree */
 
 #endif /* _LINUX_QUOTAIO_V2_H */
diff --git a/include/linux/dqblk_qtree.h b/include/linux/dqblk_qtree.h
new file mode 100644
index 000000000000..82a16527b367
--- /dev/null
+++ b/include/linux/dqblk_qtree.h
@@ -0,0 +1,56 @@
+/*
+ *	Definitions of structures and functions for quota formats using trie
+ */
+
+#ifndef _LINUX_DQBLK_QTREE_H
+#define _LINUX_DQBLK_QTREE_H
+
+#include <linux/types.h>
+
+/* Numbers of blocks needed for updates - we count with the smallest
+ * possible block size (1024) */
+#define QTREE_INIT_ALLOC 4
+#define QTREE_INIT_REWRITE 2
+#define QTREE_DEL_ALLOC 0
+#define QTREE_DEL_REWRITE 6
+
+struct dquot;
+
+/* Operations */
+struct qtree_fmt_operations {
+	void (*mem2disk_dqblk)(void *disk, struct dquot *dquot);	/* Convert given entry from in memory format to disk one */
+	void (*disk2mem_dqblk)(struct dquot *dquot, void *disk);	/* Convert given entry from disk format to in memory one */
+	int (*is_id)(void *disk, struct dquot *dquot);	/* Is this structure for given id? */
+};
+
+/* Inmemory copy of version specific information */
+struct qtree_mem_dqinfo {
+	struct super_block *dqi_sb;	/* Sb quota is on */
+	int dqi_type;			/* Quota type */
+	unsigned int dqi_blocks;	/* # of blocks in quota file */
+	unsigned int dqi_free_blk;	/* First block in list of free blocks */
+	unsigned int dqi_free_entry;	/* First block with free entry */
+	unsigned int dqi_blocksize_bits;	/* Block size of quota file */
+	unsigned int dqi_entry_size;	/* Size of quota entry in quota file */
+	unsigned int dqi_usable_bs;	/* Space usable in block for quota data */
+	unsigned int dqi_qtree_depth;	/* Precomputed depth of quota tree */
+	struct qtree_fmt_operations *dqi_ops;	/* Operations for entry manipulation */
+};
+
+int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot);
+int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot);
+int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot);
+int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot);
+int qtree_entry_unused(struct qtree_mem_dqinfo *info, char *disk);
+static inline int qtree_depth(struct qtree_mem_dqinfo *info)
+{
+	unsigned int epb = info->dqi_usable_bs >> 2;
+	unsigned long long entries = epb;
+	int i;
+
+	for (i = 1; entries < (1ULL << 32); i++)
+		entries *= epb;
+	return i;
+}
+
+#endif /* _LINUX_DQBLK_QTREE_H */
diff --git a/include/linux/dqblk_v2.h b/include/linux/dqblk_v2.h
index 4f853322cb7f..e5e22a787d58 100644
--- a/include/linux/dqblk_v2.h
+++ b/include/linux/dqblk_v2.h
@@ -1,26 +1,23 @@
 /*
- *	Definitions of structures for vfsv0 quota format
+ *  Definitions for vfsv0 quota format
  */
 
 #ifndef _LINUX_DQBLK_V2_H
 #define _LINUX_DQBLK_V2_H
 
-#include <linux/types.h>
+#include <linux/dqblk_qtree.h>
 
-/* id numbers of quota format */
+/* Id number of quota format */
 #define QFMT_VFS_V0 2
 
 /* Numbers of blocks needed for updates */
-#define V2_INIT_ALLOC 4
-#define V2_INIT_REWRITE 2
-#define V2_DEL_ALLOC 0
-#define V2_DEL_REWRITE 6
+#define V2_INIT_ALLOC QTREE_INIT_ALLOC
+#define V2_INIT_REWRITE QTREE_INIT_REWRITE
+#define V2_DEL_ALLOC QTREE_DEL_ALLOC
+#define V2_DEL_REWRITE QTREE_DEL_REWRITE
 
-/* Inmemory copy of version specific information */
 struct v2_mem_dqinfo {
-	unsigned int dqi_blocks;
-	unsigned int dqi_free_blk;
-	unsigned int dqi_free_entry;
+	struct qtree_mem_dqinfo i;
 };
 
 #endif /* _LINUX_DQBLK_V2_H */
-- 
cgit v1.2.3


From e3d4d56b9715e40ded2a84d0d4fa7f3b6c58983c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 2 Oct 2008 18:44:14 +0200
Subject: quota: Convert union in mem_dqinfo to a pointer

Coming quota support for OCFS2 is going to need quite a bit
of additional per-sb quota information. Moreover having fs.h
include all the types needed for this structure would be a
pain in the a**. So remove the union from mem_dqinfo and add
a private pointer for filesystem's use.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/quota_v2.c            | 53 +++++++++++++++++++++++++++++++-----------------
 include/linux/dqblk_v1.h |  4 ----
 include/linux/dqblk_v2.h |  4 ----
 include/linux/quota.h    |  5 +----
 4 files changed, 35 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index a87f1028a425..b618b563635c 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -71,6 +71,7 @@ static int v2_read_file_info(struct super_block *sb, int type)
 {
 	struct v2_disk_dqinfo dinfo;
 	struct mem_dqinfo *info = sb_dqinfo(sb, type);
+	struct qtree_mem_dqinfo *qinfo;
 	ssize_t size;
 
 	size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
@@ -80,22 +81,29 @@ static int v2_read_file_info(struct super_block *sb, int type)
 			sb->s_id);
 		return -1;
 	}
+	info->dqi_priv = kmalloc(sizeof(struct qtree_mem_dqinfo), GFP_NOFS);
+	if (!info->dqi_priv) {
+		printk(KERN_WARNING
+		       "Not enough memory for quota information structure.\n");
+		return -1;
+	}
+	qinfo = info->dqi_priv;
 	/* limits are stored as unsigned 32-bit data */
 	info->dqi_maxblimit = 0xffffffff;
 	info->dqi_maxilimit = 0xffffffff;
 	info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
 	info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
 	info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
-	info->u.v2_i.i.dqi_sb = sb;
-	info->u.v2_i.i.dqi_type = type;
-	info->u.v2_i.i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
-	info->u.v2_i.i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
-	info->u.v2_i.i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
-	info->u.v2_i.i.dqi_blocksize_bits = V2_DQBLKSIZE_BITS;
-	info->u.v2_i.i.dqi_usable_bs = 1 << V2_DQBLKSIZE_BITS;
-	info->u.v2_i.i.dqi_qtree_depth = qtree_depth(&info->u.v2_i.i);
-	info->u.v2_i.i.dqi_entry_size = sizeof(struct v2_disk_dqblk);
-	info->u.v2_i.i.dqi_ops = &v2_qtree_ops;
+	qinfo->dqi_sb = sb;
+	qinfo->dqi_type = type;
+	qinfo->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
+	qinfo->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
+	qinfo->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+	qinfo->dqi_blocksize_bits = V2_DQBLKSIZE_BITS;
+	qinfo->dqi_usable_bs = 1 << V2_DQBLKSIZE_BITS;
+	qinfo->dqi_qtree_depth = qtree_depth(qinfo);
+	qinfo->dqi_entry_size = sizeof(struct v2_disk_dqblk);
+	qinfo->dqi_ops = &v2_qtree_ops;
 	return 0;
 }
 
@@ -104,6 +112,7 @@ static int v2_write_file_info(struct super_block *sb, int type)
 {
 	struct v2_disk_dqinfo dinfo;
 	struct mem_dqinfo *info = sb_dqinfo(sb, type);
+	struct qtree_mem_dqinfo *qinfo = info->dqi_priv;
 	ssize_t size;
 
 	spin_lock(&dq_data_lock);
@@ -112,9 +121,9 @@ static int v2_write_file_info(struct super_block *sb, int type)
 	dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
 	dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
 	spin_unlock(&dq_data_lock);
-	dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.i.dqi_blocks);
-	dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.i.dqi_free_blk);
-	dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.i.dqi_free_entry);
+	dinfo.dqi_blocks = cpu_to_le32(qinfo->dqi_blocks);
+	dinfo.dqi_free_blk = cpu_to_le32(qinfo->dqi_free_blk);
+	dinfo.dqi_free_entry = cpu_to_le32(qinfo->dqi_free_entry);
 	size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
 	       sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
 	if (size != sizeof(struct v2_disk_dqinfo)) {
@@ -150,7 +159,7 @@ static void v2_mem2diskdqb(void *dp, struct dquot *dquot)
 	struct v2_disk_dqblk *d = dp;
 	struct mem_dqblk *m = &dquot->dq_dqb;
 	struct qtree_mem_dqinfo *info =
-			&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i;
+			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
 
 	d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
 	d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
@@ -169,7 +178,7 @@ static int v2_is_id(void *dp, struct dquot *dquot)
 {
 	struct v2_disk_dqblk *d = dp;
 	struct qtree_mem_dqinfo *info =
-			&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i;
+			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
 
 	if (qtree_entry_unused(info, dp))
 		return 0;
@@ -178,24 +187,30 @@ static int v2_is_id(void *dp, struct dquot *dquot)
 
 static int v2_read_dquot(struct dquot *dquot)
 {
-	return qtree_read_dquot(&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i, dquot);
+	return qtree_read_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
 }
 
 static int v2_write_dquot(struct dquot *dquot)
 {
-	return qtree_write_dquot(&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i, dquot);
+	return qtree_write_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
 }
 
 static int v2_release_dquot(struct dquot *dquot)
 {
-	return qtree_release_dquot(&sb_dqinfo(dquot->dq_sb, dquot->dq_type)->u.v2_i.i, dquot);
+	return qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
+}
+
+static int v2_free_file_info(struct super_block *sb, int type)
+{
+	kfree(sb_dqinfo(sb, type)->dqi_priv);
+	return 0;
 }
 
 static struct quota_format_ops v2_format_ops = {
 	.check_quota_file	= v2_check_quota_file,
 	.read_file_info		= v2_read_file_info,
 	.write_file_info	= v2_write_file_info,
-	.free_file_info		= NULL,
+	.free_file_info		= v2_free_file_info,
 	.read_dqblk		= v2_read_dquot,
 	.commit_dqblk		= v2_write_dquot,
 	.release_dqblk		= v2_release_dquot,
diff --git a/include/linux/dqblk_v1.h b/include/linux/dqblk_v1.h
index 57f1250d5a52..9cea901f5bba 100644
--- a/include/linux/dqblk_v1.h
+++ b/include/linux/dqblk_v1.h
@@ -17,8 +17,4 @@
 #define V1_DEL_ALLOC 0
 #define V1_DEL_REWRITE 2
 
-/* Special information about quotafile */
-struct v1_mem_dqinfo {
-};
-
 #endif	/* _LINUX_DQBLK_V1_H */
diff --git a/include/linux/dqblk_v2.h b/include/linux/dqblk_v2.h
index e5e22a787d58..ff8af1b4bda7 100644
--- a/include/linux/dqblk_v2.h
+++ b/include/linux/dqblk_v2.h
@@ -16,8 +16,4 @@
 #define V2_DEL_ALLOC QTREE_DEL_ALLOC
 #define V2_DEL_REWRITE QTREE_DEL_REWRITE
 
-struct v2_mem_dqinfo {
-	struct qtree_mem_dqinfo i;
-};
-
 #endif /* _LINUX_DQBLK_V2_H */
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 80b8807b4988..e51dfdc0aef0 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -208,10 +208,7 @@ struct mem_dqinfo {
 	unsigned int dqi_igrace;
 	qsize_t dqi_maxblimit;
 	qsize_t dqi_maxilimit;
-	union {
-		struct v1_mem_dqinfo v1_i;
-		struct v2_mem_dqinfo v2_i;
-	} u;
+	void *dqi_priv;
 };
 
 struct super_block;
-- 
cgit v1.2.3


From db49d2df489f727096438706a5428115e84a3f0d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 1 Oct 2008 18:21:39 +0200
Subject: quota: Allow negative usage of space and inodes

For clustered filesystems, it can happen that space / inode usage goes
negative temporarily (because some node is allocating another node
is freeing and they are not completely in sync). So let quota code
allow this and change qsize_t so a signed type so that we don't
underflow the variables.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/dquot.c            | 6 ++++--
 include/linux/quota.h | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index 74185c34a4f0..9c78ffe1aad2 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -847,7 +847,8 @@ static inline void dquot_incr_space(struct dquot *dquot, qsize_t number)
 
 static inline void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
 {
-	if (dquot->dq_dqb.dqb_curinodes > number)
+	if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
+	    dquot->dq_dqb.dqb_curinodes >= number)
 		dquot->dq_dqb.dqb_curinodes -= number;
 	else
 		dquot->dq_dqb.dqb_curinodes = 0;
@@ -858,7 +859,8 @@ static inline void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
 
 static inline void dquot_decr_space(struct dquot *dquot, qsize_t number)
 {
-	if (dquot->dq_dqb.dqb_curspace > number)
+	if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
+	    dquot->dq_dqb.dqb_curspace >= number)
 		dquot->dq_dqb.dqb_curspace -= number;
 	else
 		dquot->dq_dqb.dqb_curspace = 0;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index e51dfdc0aef0..75bf761caef2 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -168,7 +168,7 @@ enum {
 #include <asm/atomic.h>
 
 typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
-typedef __u64 qsize_t;          /* Type in which we store sizes */
+typedef long long qsize_t;	/* Type in which we store sizes */
 
 extern spinlock_t dq_data_lock;
 
@@ -336,6 +336,7 @@ enum {
 						 * responsible for setting
 						 * S_NOQUOTA, S_NOATIME flags
 						 */
+#define DQUOT_NEGATIVE_USAGE	(1 << 7)	/* Allow negative quota usage */
 
 static inline unsigned int dquot_state_flag(unsigned int flags, int type)
 {
-- 
cgit v1.2.3


From 4d59bce4f9eaf26d6d9046b56a2f1c0c7f20981d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 2 Oct 2008 16:48:10 +0200
Subject: quota: Keep which entries were set by SETQUOTA quotactl

Quota in a clustered environment needs to synchronize quota information
among cluster nodes. This means we have to occasionally update some
information in dquot from disk / network. On the other hand we have to
be careful not to overwrite changes administrator did via SETQUOTA.
So indicate in dquot->dq_flags which entries have been set by SETQUOTA
and quota format can clear these flags when it properly propagated
the changes.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/dquot.c            | 12 ++++++++++--
 include/linux/quota.h | 26 ++++++++++++++++++++------
 2 files changed, 30 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index 9c78ffe1aad2..89226726daa8 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -2010,25 +2010,33 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
 	if (di->dqb_valid & QIF_SPACE) {
 		dm->dqb_curspace = di->dqb_curspace;
 		check_blim = 1;
+		__set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
 	}
 	if (di->dqb_valid & QIF_BLIMITS) {
 		dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit);
 		dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit);
 		check_blim = 1;
+		__set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
 	}
 	if (di->dqb_valid & QIF_INODES) {
 		dm->dqb_curinodes = di->dqb_curinodes;
 		check_ilim = 1;
+		__set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
 	}
 	if (di->dqb_valid & QIF_ILIMITS) {
 		dm->dqb_isoftlimit = di->dqb_isoftlimit;
 		dm->dqb_ihardlimit = di->dqb_ihardlimit;
 		check_ilim = 1;
+		__set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
 	}
-	if (di->dqb_valid & QIF_BTIME)
+	if (di->dqb_valid & QIF_BTIME) {
 		dm->dqb_btime = di->dqb_btime;
-	if (di->dqb_valid & QIF_ITIME)
+		__set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
+	}
+	if (di->dqb_valid & QIF_ITIME) {
 		dm->dqb_itime = di->dqb_itime;
+		__set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
+	}
 
 	if (check_blim) {
 		if (!dm->dqb_bsoftlimit || dm->dqb_curspace < dm->dqb_bsoftlimit) {
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 75bf761caef2..6d98885c16da 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -80,12 +80,21 @@
  * Quota structure used for communication with userspace via quotactl
  * Following flags are used to specify which fields are valid
  */
-#define QIF_BLIMITS	1
-#define QIF_SPACE	2
-#define QIF_ILIMITS	4
-#define QIF_INODES	8
-#define QIF_BTIME	16
-#define QIF_ITIME	32
+enum {
+	QIF_BLIMITS_B = 0,
+	QIF_SPACE_B,
+	QIF_ILIMITS_B,
+	QIF_INODES_B,
+	QIF_BTIME_B,
+	QIF_ITIME_B,
+};
+
+#define QIF_BLIMITS	(1 << QIF_BLIMITS_B)
+#define QIF_SPACE	(1 << QIF_SPACE_B)
+#define QIF_ILIMITS	(1 << QIF_ILIMITS_B)
+#define QIF_INODES	(1 << QIF_INODES_B)
+#define QIF_BTIME	(1 << QIF_BTIME_B)
+#define QIF_ITIME	(1 << QIF_ITIME_B)
 #define QIF_LIMITS	(QIF_BLIMITS | QIF_ILIMITS)
 #define QIF_USAGE	(QIF_SPACE | QIF_INODES)
 #define QIF_TIMES	(QIF_BTIME | QIF_ITIME)
@@ -242,6 +251,11 @@ extern struct dqstats dqstats;
 #define DQ_FAKE_B	3	/* no limits only usage */
 #define DQ_READ_B	4	/* dquot was read into memory */
 #define DQ_ACTIVE_B	5	/* dquot is active (dquot_release not called) */
+#define DQ_LASTSET_B	6	/* Following 6 bits (see QIF_) are reserved\
+				 * for the mask of entries set via SETQUOTA\
+				 * quotactl. They are set under dq_data_lock\
+				 * and the quota format handling dquot can\
+				 * clear them when it sees fit. */
 
 struct dquot {
 	struct hlist_node dq_hash;	/* Hash list in memory */
-- 
cgit v1.2.3


From 571b46e40bebb0d57130ca24c4a84dfd553adb91 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 30 Oct 2008 09:17:52 +0100
Subject: quota: Update version number

Increase reported version number of quota support since quota core has changed
significantly. Also remove __DQUOT_NUM_VERSION__ since nobody uses it.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 include/linux/quota.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/quota.h b/include/linux/quota.h
index 6d98885c16da..ec82beb10424 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -36,8 +36,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 
-#define __DQUOT_VERSION__	"dquot_6.5.1"
-#define __DQUOT_NUM_VERSION__	6*10000+5*100+1
+#define __DQUOT_VERSION__	"dquot_6.5.2"
 
 #define MAXQUOTAS 2
 #define USRQUOTA  0		/* element used for user quotas */
-- 
cgit v1.2.3


From 3d9ea253a0e73dccaa869888ec2ceb17ea76c810 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 10 Oct 2008 16:12:23 +0200
Subject: quota: Add helpers to allow ocfs2 specific quota initialization,
 freeing and recovery

OCFS2 needs to peek whether quota structure is already in memory so
that it can avoid expensive cluster locking in that case. Similarly
when freeing dquots, it checks whether it is the last quota structure
user or not. Finally, it needs to get reference to dquot structure for
specified id and quota type when recovering quota file after crash.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/dquot.c               | 38 ++++++++++++++++++++++++++++++++------
 include/linux/quotaops.h |  4 ++++
 2 files changed, 36 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index 89226726daa8..ae8fd9e645cc 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -211,8 +211,6 @@ static struct hlist_head *dquot_hash;
 
 struct dqstats dqstats;
 
-static void dqput(struct dquot *dquot);
-
 static inline unsigned int
 hashfn(const struct super_block *sb, unsigned int id, int type)
 {
@@ -568,7 +566,7 @@ static struct shrinker dqcache_shrinker = {
  * NOTE: If you change this function please check whether dqput_blocks() works right...
  * MUST be called with either dqptr_sem or dqonoff_mutex held
  */
-static void dqput(struct dquot *dquot)
+void dqput(struct dquot *dquot)
 {
 	int ret;
 
@@ -661,11 +659,29 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 	return dquot;
 }
 
+/*
+ * Check whether dquot is in memory.
+ * MUST be called with either dqptr_sem or dqonoff_mutex held
+ */
+int dquot_is_cached(struct super_block *sb, unsigned int id, int type)
+{
+	unsigned int hashent = hashfn(sb, id, type);
+	int ret = 0;
+
+        if (!sb_has_quota_active(sb, type))
+		return 0;
+	spin_lock(&dq_list_lock);
+	if (find_dquot(hashent, sb, id, type) != NODQUOT)
+		ret = 1;
+	spin_unlock(&dq_list_lock);
+	return ret;
+}
+
 /*
  * Get reference to dquot
  * MUST be called with either dqptr_sem or dqonoff_mutex held
  */
-static struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
+struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
 {
 	unsigned int hashent = hashfn(sb, id, type);
 	struct dquot *dquot, *empty = NODQUOT;
@@ -1184,17 +1200,23 @@ out_err:
  * 	Release all quotas referenced by inode
  *	Transaction must be started at an entry
  */
-int dquot_drop(struct inode *inode)
+int dquot_drop_locked(struct inode *inode)
 {
 	int cnt;
 
-	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (inode->i_dquot[cnt] != NODQUOT) {
 			dqput(inode->i_dquot[cnt]);
 			inode->i_dquot[cnt] = NODQUOT;
 		}
 	}
+	return 0;
+}
+
+int dquot_drop(struct inode *inode)
+{
+	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+	dquot_drop_locked(inode);
 	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	return 0;
 }
@@ -2308,7 +2330,11 @@ EXPORT_SYMBOL(dquot_release);
 EXPORT_SYMBOL(dquot_mark_dquot_dirty);
 EXPORT_SYMBOL(dquot_initialize);
 EXPORT_SYMBOL(dquot_drop);
+EXPORT_SYMBOL(dquot_drop_locked);
 EXPORT_SYMBOL(vfs_dq_drop);
+EXPORT_SYMBOL(dqget);
+EXPORT_SYMBOL(dqput);
+EXPORT_SYMBOL(dquot_is_cached);
 EXPORT_SYMBOL(dquot_alloc_space);
 EXPORT_SYMBOL(dquot_alloc_inode);
 EXPORT_SYMBOL(dquot_free_space);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index e840ca523175..e3a10272d471 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -24,6 +24,10 @@ void sync_dquots(struct super_block *sb, int type);
 
 int dquot_initialize(struct inode *inode, int type);
 int dquot_drop(struct inode *inode);
+int dquot_drop_locked(struct inode *inode);
+struct dquot *dqget(struct super_block *sb, unsigned int id, int type);
+void dqput(struct dquot *dquot);
+int dquot_is_cached(struct super_block *sb, unsigned int id, int type);
 
 int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
 int dquot_alloc_inode(const struct inode *inode, qsize_t number);
-- 
cgit v1.2.3


From 12c77527e4138bc3b17d17b0e0c909e4fc84924f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 20 Oct 2008 17:05:00 +0200
Subject: quota: Implement function for scanning active dquots

OCFS2 needs to scan all active dquots once in a while and sync quota
information among cluster nodes. Provide a helper function for it so
that it does not have to reimplement internally a list which VFS
already has. Moreover this function is probably going to be useful
for other clustered filesystems if they decide to use VFS quotas.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/dquot.c               | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/quotaops.h |  3 +++
 2 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index ae8fd9e645cc..075dc76904e7 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -476,6 +476,41 @@ restart:
 	spin_unlock(&dq_list_lock);
 }
 
+/* Call callback for every active dquot on given filesystem */
+int dquot_scan_active(struct super_block *sb,
+		      int (*fn)(struct dquot *dquot, unsigned long priv),
+		      unsigned long priv)
+{
+	struct dquot *dquot, *old_dquot = NULL;
+	int ret = 0;
+
+	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
+	spin_lock(&dq_list_lock);
+	list_for_each_entry(dquot, &inuse_list, dq_inuse) {
+		if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
+			continue;
+		if (dquot->dq_sb != sb)
+			continue;
+		/* Now we have active dquot so we can just increase use count */
+		atomic_inc(&dquot->dq_count);
+		dqstats.lookups++;
+		spin_unlock(&dq_list_lock);
+		dqput(old_dquot);
+		old_dquot = dquot;
+		ret = fn(dquot, priv);
+		if (ret < 0)
+			goto out;
+		spin_lock(&dq_list_lock);
+		/* We are safe to continue now because our dquot could not
+		 * be moved out of the inuse list while we hold the reference */
+	}
+	spin_unlock(&dq_list_lock);
+out:
+	dqput(old_dquot);
+	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+	return ret;
+}
+
 int vfs_quota_sync(struct super_block *sb, int type)
 {
 	struct list_head *dirty;
@@ -2318,6 +2353,7 @@ EXPORT_SYMBOL(vfs_quota_on_path);
 EXPORT_SYMBOL(vfs_quota_on_mount);
 EXPORT_SYMBOL(vfs_quota_disable);
 EXPORT_SYMBOL(vfs_quota_off);
+EXPORT_SYMBOL(dquot_scan_active);
 EXPORT_SYMBOL(vfs_quota_sync);
 EXPORT_SYMBOL(vfs_get_dqinfo);
 EXPORT_SYMBOL(vfs_set_dqinfo);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index e3a10272d471..f4913948c305 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -28,6 +28,9 @@ int dquot_drop_locked(struct inode *inode);
 struct dquot *dqget(struct super_block *sb, unsigned int id, int type);
 void dqput(struct dquot *dquot);
 int dquot_is_cached(struct super_block *sb, unsigned int id, int type);
+int dquot_scan_active(struct super_block *sb,
+		      int (*fn)(struct dquot *dquot, unsigned long priv),
+		      unsigned long priv);
 
 int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
 int dquot_alloc_inode(const struct inode *inode, qsize_t number);
-- 
cgit v1.2.3


From e97fcd95a4778a8caf1980c6c72fdf68185a0838 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Tue, 18 Nov 2008 17:15:24 -0800
Subject: jbd2: Add BH_JBDPrivateStart

Add this so that file systems using JBD2 can safely allocate unused b_state
bits.

In this case, we add it so that Ocfs2 can define a single bit for tracking
the validation state of a buffer.

Acked-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 include/linux/jbd2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index c7d106ef22e2..f36645745489 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -329,6 +329,7 @@ enum jbd_state_bits {
 	BH_State,		/* Pins most journal_head state */
 	BH_JournalHead,		/* Pins bh->b_private and jh->b_bh */
 	BH_Unshadow,		/* Dummy bit, for BJ_Shadow wakeup filtering */
+	BH_JBDPrivateStart,	/* First bit available for private use by FS */
 };
 
 BUFFER_FNS(JBD, jbd)
-- 
cgit v1.2.3


From 5cd9d5bb86daf632a40f90e2321ea9379e42f073 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 25 Nov 2008 15:31:31 +0100
Subject: quota: Unexport dqblk_v1.h and dqblk_v2.h

Unexport header files dqblk_v[12].h since except for quota format ID they
don't contain information userspace should be interested in. Move ID
definitions to quota.h.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 include/linux/Kbuild     | 2 --
 include/linux/dqblk_v1.h | 3 ---
 include/linux/dqblk_v2.h | 3 ---
 include/linux/quota.h    | 4 ++++
 4 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 900a787cbae9..39da666067b9 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -56,8 +56,6 @@ header-y += dlm_device.h
 header-y += dlm_netlink.h
 header-y += dm-ioctl.h
 header-y += dn.h
-header-y += dqblk_v1.h
-header-y += dqblk_v2.h
 header-y += dqblk_xfs.h
 header-y += efs_fs_sb.h
 header-y += elf-fdpic.h
diff --git a/include/linux/dqblk_v1.h b/include/linux/dqblk_v1.h
index 9cea901f5bba..3713a7232dd8 100644
--- a/include/linux/dqblk_v1.h
+++ b/include/linux/dqblk_v1.h
@@ -5,9 +5,6 @@
 #ifndef _LINUX_DQBLK_V1_H
 #define _LINUX_DQBLK_V1_H
 
-/* Id of quota format */
-#define QFMT_VFS_OLD 1
-
 /* Root squash turned on */
 #define V1_DQF_RSQUASH 1
 
diff --git a/include/linux/dqblk_v2.h b/include/linux/dqblk_v2.h
index ff8af1b4bda7..18000a542677 100644
--- a/include/linux/dqblk_v2.h
+++ b/include/linux/dqblk_v2.h
@@ -7,9 +7,6 @@
 
 #include <linux/dqblk_qtree.h>
 
-/* Id number of quota format */
-#define QFMT_VFS_V0 2
-
 /* Numbers of blocks needed for updates */
 #define V2_INIT_ALLOC QTREE_INIT_ALLOC
 #define V2_INIT_REWRITE QTREE_INIT_REWRITE
diff --git a/include/linux/quota.h b/include/linux/quota.h
index ec82beb10424..d72d5d84fde5 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -70,6 +70,10 @@
 #define Q_GETQUOTA 0x800007	/* get user quota structure */
 #define Q_SETQUOTA 0x800008	/* set user quota structure */
 
+/* Quota format type IDs */
+#define	QFMT_VFS_OLD 1
+#define	QFMT_VFS_V0 2
+
 /* Size of block in which space limits are passed through the quota
  * interface */
 #define QIF_DQBLKSIZE_BITS 10
-- 
cgit v1.2.3


From 7d9056ba20ebed6e3937a2e23183f6117919cb00 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 25 Nov 2008 15:31:32 +0100
Subject: quota: Export dquot_alloc() and dquot_destroy() functions

These are default functions for creating and destroying quota structures
and they should be used from filesystems.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/dquot.c               | 6 ++++--
 include/linux/quotaops.h | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index 075dc76904e7..61bfff64e5af 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -413,10 +413,11 @@ out_dqlock:
 	return ret;
 }
 
-static void dquot_destroy(struct dquot *dquot)
+void dquot_destroy(struct dquot *dquot)
 {
 	kmem_cache_free(dquot_cachep, dquot);
 }
+EXPORT_SYMBOL(dquot_destroy);
 
 static inline void do_destroy_dquot(struct dquot *dquot)
 {
@@ -668,10 +669,11 @@ we_slept:
 	spin_unlock(&dq_list_lock);
 }
 
-static struct dquot *dquot_alloc(struct super_block *sb, int type)
+struct dquot *dquot_alloc(struct super_block *sb, int type)
 {
 	return kmem_cache_zalloc(dquot_cachep, GFP_NOFS);
 }
+EXPORT_SYMBOL(dquot_alloc);
 
 static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 {
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index f4913948c305..21b781a3350f 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -31,6 +31,8 @@ int dquot_is_cached(struct super_block *sb, unsigned int id, int type);
 int dquot_scan_active(struct super_block *sb,
 		      int (*fn)(struct dquot *dquot, unsigned long priv),
 		      unsigned long priv);
+struct dquot *dquot_alloc(struct super_block *sb, int type);
+void dquot_destroy(struct dquot *dquot);
 
 int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
 int dquot_alloc_inode(const struct inode *inode, qsize_t number);
-- 
cgit v1.2.3


From e06c8227fd94ec181849ba206bf032be31c4295c Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 11 Sep 2008 15:35:47 -0700
Subject: jbd2: Add buffer triggers

Filesystems often to do compute intensive operation on some
metadata.  If this operation is repeated many times, it can be very
expensive.  It would be much nicer if the operation could be performed
once before a buffer goes to disk.

This adds triggers to jbd2 buffer heads.  Just before writing a metadata
buffer to the journal, jbd2 will optionally call a commit trigger associated
with the buffer.  If the journal is aborted, an abort trigger will be
called on any dirty buffers as they are dropped from pending
transactions.

ocfs2 will use this feature.

Initially I tried to come up with a more generic trigger that could be
used for non-buffer-related events like transaction completion.  It
doesn't tie nicely, because the information a buffer trigger needs
(specific to a journal_head) isn't the same as what a transaction
trigger needs (specific to a tranaction_t or perhaps journal_t).  So I
implemented a buffer set, with the understanding that
journal/transaction wide triggers should be implemented separately.

There is only one trigger set allowed per buffer.  I can't think of any
reason to attach more than one set.  Contrast this with a journal or
transaction in which multiple places may want to watch the entire
transaction separately.

The trigger sets are considered static allocation from the jbd2
perspective.  ocfs2 will just have one trigger set per block type,
setting the same set on every bh of the same type.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
---
 fs/jbd2/commit.c             |  9 +++++++++
 fs/jbd2/journal.c            | 19 ++++++++++++++++++
 fs/jbd2/transaction.c        | 47 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/jbd2.h         | 31 +++++++++++++++++++++++++++++
 include/linux/journal-head.h |  8 ++++++++
 5 files changed, 114 insertions(+)

(limited to 'include/linux')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index ebc667bc54a8..c8a1bace685a 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -509,6 +509,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 		if (is_journal_aborted(journal)) {
 			clear_buffer_jbddirty(jh2bh(jh));
 			JBUFFER_TRACE(jh, "journal is aborting: refile");
+			jbd2_buffer_abort_trigger(jh,
+						  jh->b_frozen_data ?
+						  jh->b_frozen_triggers :
+						  jh->b_triggers);
 			jbd2_journal_refile_buffer(journal, jh);
 			/* If that was the last one, we need to clean up
 			 * any descriptor buffers which may have been
@@ -844,6 +848,9 @@ restart_loop:
 		 * data.
 		 *
 		 * Otherwise, we can just throw away the frozen data now.
+		 *
+		 * We also know that the frozen data has already fired
+		 * its triggers if they exist, so we can clear that too.
 		 */
 		if (jh->b_committed_data) {
 			jbd2_free(jh->b_committed_data, bh->b_size);
@@ -851,10 +858,12 @@ restart_loop:
 			if (jh->b_frozen_data) {
 				jh->b_committed_data = jh->b_frozen_data;
 				jh->b_frozen_data = NULL;
+				jh->b_frozen_triggers = NULL;
 			}
 		} else if (jh->b_frozen_data) {
 			jbd2_free(jh->b_frozen_data, bh->b_size);
 			jh->b_frozen_data = NULL;
+			jh->b_frozen_triggers = NULL;
 		}
 
 		spin_lock(&journal->j_list_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index e70d657a19f8..f6bff9d6f8df 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -50,6 +50,7 @@ EXPORT_SYMBOL(jbd2_journal_unlock_updates);
 EXPORT_SYMBOL(jbd2_journal_get_write_access);
 EXPORT_SYMBOL(jbd2_journal_get_create_access);
 EXPORT_SYMBOL(jbd2_journal_get_undo_access);
+EXPORT_SYMBOL(jbd2_journal_set_triggers);
 EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
 EXPORT_SYMBOL(jbd2_journal_release_buffer);
 EXPORT_SYMBOL(jbd2_journal_forget);
@@ -290,6 +291,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
 	struct page *new_page;
 	unsigned int new_offset;
 	struct buffer_head *bh_in = jh2bh(jh_in);
+	struct jbd2_buffer_trigger_type *triggers;
 
 	/*
 	 * The buffer really shouldn't be locked: only the current committing
@@ -314,12 +316,22 @@ repeat:
 		done_copy_out = 1;
 		new_page = virt_to_page(jh_in->b_frozen_data);
 		new_offset = offset_in_page(jh_in->b_frozen_data);
+		triggers = jh_in->b_frozen_triggers;
 	} else {
 		new_page = jh2bh(jh_in)->b_page;
 		new_offset = offset_in_page(jh2bh(jh_in)->b_data);
+		triggers = jh_in->b_triggers;
 	}
 
 	mapped_data = kmap_atomic(new_page, KM_USER0);
+	/*
+	 * Fire any commit trigger.  Do this before checking for escaping,
+	 * as the trigger may modify the magic offset.  If a copy-out
+	 * happens afterwards, it will have the correct data in the buffer.
+	 */
+	jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset,
+				   triggers);
+
 	/*
 	 * Check for escaping
 	 */
@@ -352,6 +364,13 @@ repeat:
 		new_page = virt_to_page(tmp);
 		new_offset = offset_in_page(tmp);
 		done_copy_out = 1;
+
+		/*
+		 * This isn't strictly necessary, as we're using frozen
+		 * data for the escaping, but it keeps consistency with
+		 * b_frozen_data usage.
+		 */
+		jh_in->b_frozen_triggers = jh_in->b_triggers;
 	}
 
 	/*
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 39b7805a599a..4f925a4f3d05 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -741,6 +741,12 @@ done:
 		source = kmap_atomic(page, KM_USER0);
 		memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
 		kunmap_atomic(source, KM_USER0);
+
+		/*
+		 * Now that the frozen data is saved off, we need to store
+		 * any matching triggers.
+		 */
+		jh->b_frozen_triggers = jh->b_triggers;
 	}
 	jbd_unlock_bh_state(bh);
 
@@ -943,6 +949,47 @@ out:
 	return err;
 }
 
+/**
+ * void jbd2_journal_set_triggers() - Add triggers for commit writeout
+ * @bh: buffer to trigger on
+ * @type: struct jbd2_buffer_trigger_type containing the trigger(s).
+ *
+ * Set any triggers on this journal_head.  This is always safe, because
+ * triggers for a committing buffer will be saved off, and triggers for
+ * a running transaction will match the buffer in that transaction.
+ *
+ * Call with NULL to clear the triggers.
+ */
+void jbd2_journal_set_triggers(struct buffer_head *bh,
+			       struct jbd2_buffer_trigger_type *type)
+{
+	struct journal_head *jh = bh2jh(bh);
+
+	jh->b_triggers = type;
+}
+
+void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
+				struct jbd2_buffer_trigger_type *triggers)
+{
+	struct buffer_head *bh = jh2bh(jh);
+
+	if (!triggers || !triggers->t_commit)
+		return;
+
+	triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
+}
+
+void jbd2_buffer_abort_trigger(struct journal_head *jh,
+			       struct jbd2_buffer_trigger_type *triggers)
+{
+	if (!triggers || !triggers->t_abort)
+		return;
+
+	triggers->t_abort(triggers, jh2bh(jh));
+}
+
+
+
 /**
  * int jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
  * @handle: transaction to add buffer to.
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f36645745489..34456476e761 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1008,6 +1008,35 @@ int __jbd2_journal_clean_checkpoint_list(journal_t *journal);
 int __jbd2_journal_remove_checkpoint(struct journal_head *);
 void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
+
+/*
+ * Triggers
+ */
+
+struct jbd2_buffer_trigger_type {
+	/*
+	 * Fired just before a buffer is written to the journal.
+	 * mapped_data is a mapped buffer that is the frozen data for
+	 * commit.
+	 */
+	void (*t_commit)(struct jbd2_buffer_trigger_type *type,
+			 struct buffer_head *bh, void *mapped_data,
+			 size_t size);
+
+	/*
+	 * Fired during journal abort for dirty buffers that will not be
+	 * committed.
+	 */
+	void (*t_abort)(struct jbd2_buffer_trigger_type *type,
+			struct buffer_head *bh);
+};
+
+extern void jbd2_buffer_commit_trigger(struct journal_head *jh,
+				       void *mapped_data,
+				       struct jbd2_buffer_trigger_type *triggers);
+extern void jbd2_buffer_abort_trigger(struct journal_head *jh,
+				      struct jbd2_buffer_trigger_type *triggers);
+
 /* Buffer IO */
 extern int
 jbd2_journal_write_metadata_buffer(transaction_t	  *transaction,
@@ -1046,6 +1075,8 @@ extern int	 jbd2_journal_extend (handle_t *, int nblocks);
 extern int	 jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
 extern int	 jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
 extern int	 jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
+void		 jbd2_journal_set_triggers(struct buffer_head *,
+					   struct jbd2_buffer_trigger_type *type);
 extern int	 jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
 extern void	 jbd2_journal_release_buffer (handle_t *, struct buffer_head *);
 extern int	 jbd2_journal_forget (handle_t *, struct buffer_head *);
diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h
index bb70ebb6a2d5..525aac3c97df 100644
--- a/include/linux/journal-head.h
+++ b/include/linux/journal-head.h
@@ -12,6 +12,8 @@
 
 typedef unsigned int		tid_t;		/* Unique transaction ID */
 typedef struct transaction_s	transaction_t;	/* Compound transaction type */
+
+
 struct buffer_head;
 
 struct journal_head {
@@ -87,6 +89,12 @@ struct journal_head {
 	 * [j_list_lock]
 	 */
 	struct journal_head *b_cpnext, *b_cpprev;
+
+	/* Trigger type */
+	struct jbd2_buffer_trigger_type *b_triggers;
+
+	/* Trigger type for the committing transaction's frozen data */
+	struct jbd2_buffer_trigger_type *b_frozen_triggers;
 };
 
 #endif		/* JOURNAL_HEAD_H_INCLUDED */
-- 
cgit v1.2.3


From 4c728ef583b3d82266584da5cb068294c09df31e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 22 Dec 2008 21:11:15 +0100
Subject: add a vfs_fsync helper

Fsync currently has a fdatawrite/fdatawait pair around the method call,
and a mutex_lock/unlock of the inode mutex.  All callers of fsync have
to duplicate this, but we have a few and most of them don't quite get
it right.  This patch adds a new vfs_fsync that takes care of this.
It's a little more complicated as usual as ->fsync might get a NULL file
pointer and just a dentry from nfsd, but otherwise gets afile and we
want to take the mapping and file operations from it when it is there.

Notes on the fsync callers:

 - ecryptfs wasn't calling filemap_fdatawrite / filemap_fdatawait on the
   	lower file
 - coda wasn't calling filemap_fdatawrite / filemap_fdatawait on the host
	file, and returning 0 when ->fsync was missing
 - shm wasn't calling either filemap_fdatawrite / filemap_fdatawait nor
   taking i_mutex.  Now given that shared memory doesn't have disk
   backing not doing anything in fsync seems fine and I left it out of
   the vfs_fsync conversion for now, but in that case we might just
   not pass it through to the lower file at all but just call the no-op
   simple_sync_file directly.

[and now actually export vfs_fsync]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/usb/gadget/file_storage.c | 18 +--------------
 fs/coda/file.c                    | 12 ++--------
 fs/ecryptfs/file.c                | 15 +++---------
 fs/nfsd/vfs.c                     | 35 +++-------------------------
 fs/sync.c                         | 48 ++++++++++++++++++++++++++++++---------
 include/linux/fs.h                |  2 +-
 mm/msync.c                        |  2 +-
 7 files changed, 48 insertions(+), 84 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index c4e62a6297d7..2e71368f45b4 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -1863,26 +1863,10 @@ static int do_write(struct fsg_dev *fsg)
 static int fsync_sub(struct lun *curlun)
 {
 	struct file	*filp = curlun->filp;
-	struct inode	*inode;
-	int		rc, err;
 
 	if (curlun->ro || !filp)
 		return 0;
-	if (!filp->f_op->fsync)
-		return -EINVAL;
-
-	inode = filp->f_path.dentry->d_inode;
-	mutex_lock(&inode->i_mutex);
-	rc = filemap_fdatawrite(inode->i_mapping);
-	err = filp->f_op->fsync(filp, filp->f_path.dentry, 1);
-	if (!rc)
-		rc = err;
-	err = filemap_fdatawait(inode->i_mapping);
-	if (!rc)
-		rc = err;
-	mutex_unlock(&inode->i_mutex);
-	VLDBG(curlun, "fdatasync -> %d\n", rc);
-	return rc;
+	return vfs_fsync(filp, filp->f_path.dentry, 1);
 }
 
 static void fsync_all(struct fsg_dev *fsg)
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 466303db2df6..6a347fbc998a 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -201,8 +201,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
 int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
 {
 	struct file *host_file;
-	struct dentry *host_dentry;
-	struct inode *host_inode, *coda_inode = coda_dentry->d_inode;
+	struct inode *coda_inode = coda_dentry->d_inode;
 	struct coda_file_info *cfi;
 	int err = 0;
 
@@ -214,14 +213,7 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
 	host_file = cfi->cfi_container;
 
-	if (host_file->f_op && host_file->f_op->fsync) {
-		host_dentry = host_file->f_path.dentry;
-		host_inode = host_dentry->d_inode;
-		mutex_lock(&host_inode->i_mutex);
-		err = host_file->f_op->fsync(host_file, host_dentry, datasync);
-		mutex_unlock(&host_inode->i_mutex);
-	}
-
+	err = vfs_fsync(host_file, host_file->f_path.dentry, datasync);
 	if ( !err && !datasync ) {
 		lock_kernel();
 		err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index eb3dc4c7ac06..713834371229 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -275,18 +275,9 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
 static int
 ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
-	struct file *lower_file = ecryptfs_file_to_lower(file);
-	struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
-	struct inode *lower_inode = lower_dentry->d_inode;
-	int rc = -EINVAL;
-
-	if (lower_inode->i_fop->fsync) {
-		mutex_lock(&lower_inode->i_mutex);
-		rc = lower_inode->i_fop->fsync(lower_file, lower_dentry,
-					       datasync);
-		mutex_unlock(&lower_inode->i_mutex);
-	}
-	return rc;
+	return vfs_fsync(ecryptfs_file_to_lower(file),
+			 ecryptfs_dentry_to_lower(dentry),
+			 datasync);
 }
 
 static int ecryptfs_fasync(int fd, struct file *file, int flag)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 5245a3965004..44aa92aba891 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -744,45 +744,16 @@ nfsd_close(struct file *filp)
 	fput(filp);
 }
 
-/*
- * Sync a file
- * As this calls fsync (not fdatasync) there is no need for a write_inode
- * after it.
- */
-static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
-			      const struct file_operations *fop)
-{
-	struct inode *inode = dp->d_inode;
-	int (*fsync) (struct file *, struct dentry *, int);
-	int err;
-
-	err = filemap_fdatawrite(inode->i_mapping);
-	if (err == 0 && fop && (fsync = fop->fsync))
-		err = fsync(filp, dp, 0);
-	if (err == 0)
-		err = filemap_fdatawait(inode->i_mapping);
-
-	return err;
-}
-	
-
 static int
 nfsd_sync(struct file *filp)
 {
-        int err;
-	struct inode *inode = filp->f_path.dentry->d_inode;
-	dprintk("nfsd: sync file %s\n", filp->f_path.dentry->d_name.name);
-	mutex_lock(&inode->i_mutex);
-	err=nfsd_dosync(filp, filp->f_path.dentry, filp->f_op);
-	mutex_unlock(&inode->i_mutex);
-
-	return err;
+	return vfs_fsync(filp, filp->f_path.dentry, 0);
 }
 
 int
-nfsd_sync_dir(struct dentry *dp)
+nfsd_sync_dir(struct dentry *dentry)
 {
-	return nfsd_dosync(NULL, dp, dp->d_inode->i_fop);
+	return vfs_fsync(NULL, dentry, 0);
 }
 
 /*
diff --git a/fs/sync.c b/fs/sync.c
index 2967562d416f..0921d6d4b5e6 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -75,14 +75,39 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
 	return ret;
 }
 
-long do_fsync(struct file *file, int datasync)
+/**
+ * vfs_fsync - perform a fsync or fdatasync on a file
+ * @file:		file to sync
+ * @dentry:		dentry of @file
+ * @data:		only perform a fdatasync operation
+ *
+ * Write back data and metadata for @file to disk.  If @datasync is
+ * set only metadata needed to access modified file data is written.
+ *
+ * In case this function is called from nfsd @file may be %NULL and
+ * only @dentry is set.  This can only happen when the filesystem
+ * implements the export_operations API.
+ */
+int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
-	int ret;
-	int err;
-	struct address_space *mapping = file->f_mapping;
+	const struct file_operations *fop;
+	struct address_space *mapping;
+	int err, ret;
+
+	/*
+	 * Get mapping and operations from the file in case we have
+	 * as file, or get the default values for them in case we
+	 * don't have a struct file available.  Damn nfsd..
+	 */
+	if (file) {
+		mapping = file->f_mapping;
+		fop = file->f_op;
+	} else {
+		mapping = dentry->d_inode->i_mapping;
+		fop = dentry->d_inode->i_fop;
+	}
 
-	if (!file->f_op || !file->f_op->fsync) {
-		/* Why?  We can still call filemap_fdatawrite */
+	if (!fop || !fop->fsync) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -94,7 +119,7 @@ long do_fsync(struct file *file, int datasync)
 	 * livelocks in fsync_buffers_list().
 	 */
 	mutex_lock(&mapping->host->i_mutex);
-	err = file->f_op->fsync(file, file->f_path.dentry, datasync);
+	err = fop->fsync(file, dentry, datasync);
 	if (!ret)
 		ret = err;
 	mutex_unlock(&mapping->host->i_mutex);
@@ -104,15 +129,16 @@ long do_fsync(struct file *file, int datasync)
 out:
 	return ret;
 }
+EXPORT_SYMBOL(vfs_fsync);
 
-static long __do_fsync(unsigned int fd, int datasync)
+static int do_fsync(unsigned int fd, int datasync)
 {
 	struct file *file;
 	int ret = -EBADF;
 
 	file = fget(fd);
 	if (file) {
-		ret = do_fsync(file, datasync);
+		ret = vfs_fsync(file, file->f_path.dentry, datasync);
 		fput(file);
 	}
 	return ret;
@@ -120,12 +146,12 @@ static long __do_fsync(unsigned int fd, int datasync)
 
 asmlinkage long sys_fsync(unsigned int fd)
 {
-	return __do_fsync(fd, 0);
+	return do_fsync(fd, 0);
 }
 
 asmlinkage long sys_fdatasync(unsigned int fd)
 {
-	return __do_fsync(fd, 1);
+	return do_fsync(fd, 1);
 }
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2170ee21e18..9ad9eac9eb0c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1827,7 +1827,7 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping,
 extern int filemap_fdatawrite_range(struct address_space *mapping,
 				loff_t start, loff_t end);
 
-extern long do_fsync(struct file *file, int datasync);
+extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
 extern void sync_supers(void);
 extern void sync_filesystems(int wait);
 extern void __fsync_super(struct super_block *sb);
diff --git a/mm/msync.c b/mm/msync.c
index 144a7570535d..07dae08cf31c 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -82,7 +82,7 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
 				(vma->vm_flags & VM_SHARED)) {
 			get_file(file);
 			up_read(&mm->mmap_sem);
-			error = do_fsync(file, 0);
+			error = vfs_fsync(file, file->f_path.dentry, 0);
 			fput(file);
 			if (error || start >= end)
 				goto out;
-- 
cgit v1.2.3


From 4ae8978cf92a96257cd8998a49e781be83571d64 Mon Sep 17 00:00:00 2001
From: Michael Kerrisk <mtk.manpages@googlemail.com>
Date: Mon, 5 Jan 2009 07:19:16 -0500
Subject: inotify: fix type errors in interfaces

The problems lie in the types used for some inotify interfaces, both at the kernel level and at the glibc level. This mail addresses the kernel problem. I will follow up with some suggestions for glibc changes.

For the sys_inotify_rm_watch() interface, the type of the 'wd' argument is
currently 'u32', it should be '__s32' .  That is Robert's suggestion, and
is consistent with the other declarations of watch descriptors in the
kernel source, in particular, the inotify_event structure in
include/linux/inotify.h:

struct inotify_event {
        __s32           wd;             /* watch descriptor */
        __u32           mask;           /* watch mask */
        __u32           cookie;         /* cookie to synchronize two events */
        __u32           len;            /* length (including nulls) of name */
        char            name[0];        /* stub for possible name */
};

The patch makes the changes needed for inotify_rm_watch().

Signed-off-by: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: Robert Love <rlove@google.com>
Cc: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/notify/inotify/inotify_user.c | 2 +-
 include/linux/syscalls.h         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 400f8064a548..81b8644b0136 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -704,7 +704,7 @@ fput_and_out:
 	return ret;
 }
 
-asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
+asmlinkage long sys_inotify_rm_watch(int fd, __s32 wd)
 {
 	struct file *filp;
 	struct inotify_device *dev;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 04fb47bfb920..18d0a243a7b3 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -549,7 +549,7 @@ asmlinkage long sys_inotify_init(void);
 asmlinkage long sys_inotify_init1(int flags);
 asmlinkage long sys_inotify_add_watch(int fd, const char __user *path,
 					u32 mask);
-asmlinkage long sys_inotify_rm_watch(int fd, u32 wd);
+asmlinkage long sys_inotify_rm_watch(int fd, __s32 wd);
 
 asmlinkage long sys_spu_run(int fd, __u32 __user *unpc,
 				 __u32 __user *ustatus);
-- 
cgit v1.2.3


From e8c82c2e23e3527e0c9dc195e432c16784d270fa Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Tue, 6 Jan 2009 03:05:50 +0100
Subject: mm lockless pagecache barrier fix

An XFS workload showed up a bug in the lockless pagecache patch. Basically it
would go into an "infinite" loop, although it would sometimes be able to break
out of the loop! The reason is a missing compiler barrier in the "increment
reference count unless it was zero" case of the lockless pagecache protocol in
the gang lookup functions.

This would cause the compiler to use a cached value of struct page pointer to
retry the operation with, rather than reload it. So the page might have been
removed from pagecache and freed (refcount==0) but the lookup would not correctly
notice the page is no longer in pagecache, and keep attempting to increment the
refcount and failing, until the page gets reallocated for something else. This
isn't a data corruption because the condition will be detected if the page has
been reallocated. However it can result in a lockup.

Linus points out that ACCESS_ONCE is also required in that pointer load, even
if it's absence is not causing a bug on our particular build. The most general
way to solve this is just to put an rcu_dereference in radix_tree_deref_slot.

Assembly of find_get_pages,
before:
.L220:
        movq    (%rbx), %rax    #* ivtmp.1162, tmp82
        movq    (%rax), %rdi    #, prephitmp.1149
.L218:
        testb   $1, %dil        #, prephitmp.1149
        jne     .L217   #,
        testq   %rdi, %rdi      # prephitmp.1149
        je      .L203   #,
        cmpq    $-1, %rdi       #, prephitmp.1149
        je      .L217   #,
        movl    8(%rdi), %esi   # <variable>._count.counter, c
        testl   %esi, %esi      # c
        je      .L218   #,

after:
.L212:
        movq    (%rbx), %rax    #* ivtmp.1109, tmp81
        movq    (%rax), %rdi    #, ret
        testb   $1, %dil        #, ret
        jne     .L211   #,
        testq   %rdi, %rdi      # ret
        je      .L197   #,
        cmpq    $-1, %rdi       #, ret
        je      .L211   #,
        movl    8(%rdi), %esi   # <variable>._count.counter, c
        testl   %esi, %esi      # c
        je      .L212   #,

(notice the obvious infinite loop in the first example, if page->count remains 0)

Signed-off-by: Nick Piggin <npiggin@suse.de>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index a916c6660dfa..355f6e80db0d 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -136,7 +136,7 @@ do {									\
  */
 static inline void *radix_tree_deref_slot(void **pslot)
 {
-	void *ret = *pslot;
+	void *ret = rcu_dereference(*pslot);
 	if (unlikely(radix_tree_is_indirect_ptr(ret)))
 		ret = RADIX_TREE_RETRY;
 	return ret;
-- 
cgit v1.2.3


From 10d3bd09a3c25df114f74f7f86e1b58d070bef32 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 6 Jan 2009 03:04:58 +0000
Subject: dm: consolidate target deregistration error handling

Change dm_unregister_target to return void and use BUG() for error
reporting.

dm_unregister_target can only fail because of programming bug in the
target driver. It can't fail because of user's behavior or disk errors.

This patch changes unregister_target to return void and use BUG if
someone tries to unregister non-registered target or unregister target
that is in use.

This patch removes code duplication (testing of error codes in all dm
targets) and reports bugs in just one place, in dm_unregister_target. In
some target drivers, these return codes were ignored, which could lead
to a situation where bugs could be missed.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-crypt.c         |  6 +-----
 drivers/md/dm-delay.c         |  6 +-----
 drivers/md/dm-linear.c        |  5 +----
 drivers/md/dm-mpath.c         |  6 +-----
 drivers/md/dm-raid1.c         |  6 +-----
 drivers/md/dm-snap.c          | 11 ++---------
 drivers/md/dm-stripe.c        |  4 +---
 drivers/md/dm-target.c        | 15 +++++++--------
 drivers/md/dm-zero.c          |  5 +----
 include/linux/device-mapper.h |  6 ++++--
 10 files changed, 20 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 3326750ec02c..35bda49796fb 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1322,11 +1322,7 @@ static int __init dm_crypt_init(void)
 
 static void __exit dm_crypt_exit(void)
 {
-	int r = dm_unregister_target(&crypt_target);
-
-	if (r < 0)
-		DMERR("unregister failed %d", r);
-
+	dm_unregister_target(&crypt_target);
 	kmem_cache_destroy(_crypt_io_pool);
 }
 
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 848b381f1173..59ee1b015d2d 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -364,11 +364,7 @@ bad_queue:
 
 static void __exit dm_delay_exit(void)
 {
-	int r = dm_unregister_target(&delay_target);
-
-	if (r < 0)
-		DMERR("unregister failed %d", r);
-
+	dm_unregister_target(&delay_target);
 	kmem_cache_destroy(delayed_cache);
 	destroy_workqueue(kdelayd_wq);
 }
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 44042becad8a..79fb53e51c70 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -156,8 +156,5 @@ int __init dm_linear_init(void)
 
 void dm_linear_exit(void)
 {
-	int r = dm_unregister_target(&linear_target);
-
-	if (r < 0)
-		DMERR("unregister failed %d", r);
+	dm_unregister_target(&linear_target);
 }
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 3d7f4923cd13..345a26047ae0 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1495,14 +1495,10 @@ static int __init dm_multipath_init(void)
 
 static void __exit dm_multipath_exit(void)
 {
-	int r;
-
 	destroy_workqueue(kmpath_handlerd);
 	destroy_workqueue(kmultipathd);
 
-	r = dm_unregister_target(&multipath_target);
-	if (r < 0)
-		DMERR("target unregister failed %d", r);
+	dm_unregister_target(&multipath_target);
 	kmem_cache_destroy(_mpio_cache);
 }
 
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index d0fed2b21b08..250f401668d5 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1300,11 +1300,7 @@ static int __init dm_mirror_init(void)
 
 static void __exit dm_mirror_exit(void)
 {
-	int r;
-
-	r = dm_unregister_target(&mirror_target);
-	if (r < 0)
-		DMERR("unregister failed %d", r);
+	dm_unregister_target(&mirror_target);
 }
 
 /* Module hooks */
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 4ceedd4f22af..a8005b43a06b 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1470,17 +1470,10 @@ static int __init dm_snapshot_init(void)
 
 static void __exit dm_snapshot_exit(void)
 {
-	int r;
-
 	destroy_workqueue(ksnapd);
 
-	r = dm_unregister_target(&snapshot_target);
-	if (r)
-		DMERR("snapshot unregister failed %d", r);
-
-	r = dm_unregister_target(&origin_target);
-	if (r)
-		DMERR("origin unregister failed %d", r);
+	dm_unregister_target(&snapshot_target);
+	dm_unregister_target(&origin_target);
 
 	exit_origin_hash();
 	kmem_cache_destroy(pending_cache);
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 9e4ef88d421e..41569bc60abc 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -337,9 +337,7 @@ int __init dm_stripe_init(void)
 
 void dm_stripe_exit(void)
 {
-	if (dm_unregister_target(&stripe_target))
-		DMWARN("target unregistration failed");
-
+	dm_unregister_target(&stripe_target);
 	destroy_workqueue(kstriped);
 
 	return;
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 835cf95b857f..7decf10006e4 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -130,26 +130,26 @@ int dm_register_target(struct target_type *t)
 	return rv;
 }
 
-int dm_unregister_target(struct target_type *t)
+void dm_unregister_target(struct target_type *t)
 {
 	struct tt_internal *ti;
 
 	down_write(&_lock);
 	if (!(ti = __find_target_type(t->name))) {
-		up_write(&_lock);
-		return -EINVAL;
+		DMCRIT("Unregistering unrecognised target: %s", t->name);
+		BUG();
 	}
 
 	if (ti->use) {
-		up_write(&_lock);
-		return -ETXTBSY;
+		DMCRIT("Attempt to unregister target still in use: %s",
+		       t->name);
+		BUG();
 	}
 
 	list_del(&ti->list);
 	kfree(ti);
 
 	up_write(&_lock);
-	return 0;
 }
 
 /*
@@ -187,8 +187,7 @@ int __init dm_target_init(void)
 
 void dm_target_exit(void)
 {
-	if (dm_unregister_target(&error_target))
-		DMWARN("error target unregistration failed");
+	dm_unregister_target(&error_target);
 }
 
 EXPORT_SYMBOL(dm_register_target);
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index cdbf126ec106..bbc97030c0c2 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -69,10 +69,7 @@ static int __init dm_zero_init(void)
 
 static void __exit dm_zero_exit(void)
 {
-	int r = dm_unregister_target(&zero_target);
-
-	if (r < 0)
-		DMERR("unregister failed %d", r);
+	dm_unregister_target(&zero_target);
 }
 
 module_init(dm_zero_init)
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index c17fd334e574..89ff2df40240 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -157,8 +157,7 @@ struct dm_target {
 };
 
 int dm_register_target(struct target_type *t);
-int dm_unregister_target(struct target_type *t);
-
+void dm_unregister_target(struct target_type *t);
 
 /*-----------------------------------------------------------------
  * Functions for creating and manipulating mapped devices.
@@ -276,6 +275,9 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
  *---------------------------------------------------------------*/
 #define DM_NAME "device-mapper"
 
+#define DMCRIT(f, arg...) \
+	printk(KERN_CRIT DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
+
 #define DMERR(f, arg...) \
 	printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)
 #define DMERR_LIMIT(f, arg...) \
-- 
cgit v1.2.3


From 7d76345da6ed3927c9cbf5d3f7a7021e8bba7374 Mon Sep 17 00:00:00 2001
From: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Date: Tue, 6 Jan 2009 03:05:07 +0000
Subject: dm request: extend target interface

This patch adds the following target interfaces for request-based dm.

  map_rq    : for mapping a request

  rq_end_io : for finishing a request

  busy      : for avoiding performance regression from bio-based dm.
              Target can tell dm core not to map requests now, and
              that may help requests in the block layer queue to be
              bigger by I/O merging.
              In bio-based dm, this behavior is done by device
              drivers managing the block layer queue.
              But in request-based dm, dm core has to do that
              since dm core manages the block layer queue.

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 include/linux/device-mapper.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 89ff2df40240..c1ba76c7c0e5 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -45,6 +45,8 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti);
  */
 typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio,
 			  union map_info *map_context);
+typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
+				  union map_info *map_context);
 
 /*
  * Returns:
@@ -57,6 +59,9 @@ typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio,
 typedef int (*dm_endio_fn) (struct dm_target *ti,
 			    struct bio *bio, int error,
 			    union map_info *map_context);
+typedef int (*dm_request_endio_fn) (struct dm_target *ti,
+				    struct request *clone, int error,
+				    union map_info *map_context);
 
 typedef void (*dm_flush_fn) (struct dm_target *ti);
 typedef void (*dm_presuspend_fn) (struct dm_target *ti);
@@ -75,6 +80,13 @@ typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd,
 typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm,
 			    struct bio_vec *biovec, int max_size);
 
+/*
+ * Returns:
+ *    0: The target can handle the next I/O immediately.
+ *    1: The target can't handle the next I/O immediately.
+ */
+typedef int (*dm_busy_fn) (struct dm_target *ti);
+
 void dm_error(const char *message);
 
 /*
@@ -107,7 +119,9 @@ struct target_type {
 	dm_ctr_fn ctr;
 	dm_dtr_fn dtr;
 	dm_map_fn map;
+	dm_map_request_fn map_rq;
 	dm_endio_fn end_io;
+	dm_request_endio_fn rq_end_io;
 	dm_flush_fn flush;
 	dm_presuspend_fn presuspend;
 	dm_postsuspend_fn postsuspend;
@@ -117,6 +131,7 @@ struct target_type {
 	dm_message_fn message;
 	dm_ioctl_fn ioctl;
 	dm_merge_fn merge;
+	dm_busy_fn busy;
 };
 
 struct io_restrictions {
-- 
cgit v1.2.3


From ab4c1424882be9cd70b89abf2b484add355712fa Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 6 Jan 2009 03:05:09 +0000
Subject: dm: support barriers on simple devices

Implement barrier support for single device DM devices

This patch implements barrier support in DM for the common case of dm linear
just remapping a single underlying device. In this case we can safely
pass the barrier through because there can be no reordering between
devices.

 NB. Any DM device might cease to support barriers if it gets
     reconfigured so code must continue to allow for a possible
     -EOPNOTSUPP on every barrier bio submitted.  - agk

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-linear.c        |  1 +
 drivers/md/dm-table.c         | 19 +++++++++++++++++++
 drivers/md/dm.c               | 15 +++++----------
 drivers/md/dm.h               |  1 +
 include/linux/device-mapper.h |  7 +++++++
 5 files changed, 33 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 79fb53e51c70..bfa107f59d96 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -142,6 +142,7 @@ static struct target_type linear_target = {
 	.status = linear_status,
 	.ioctl  = linear_ioctl,
 	.merge  = linear_merge,
+	.features = DM_TARGET_SUPPORTS_BARRIERS,
 };
 
 int __init dm_linear_init(void)
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 04e5fd742c2c..ebaaf72cd822 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -38,6 +38,8 @@ struct dm_table {
 	sector_t *highs;
 	struct dm_target *targets;
 
+	unsigned barriers_supported:1;
+
 	/*
 	 * Indicates the rw permissions for the new logical
 	 * device.  This should be a combination of FMODE_READ
@@ -227,6 +229,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
 
 	INIT_LIST_HEAD(&t->devices);
 	atomic_set(&t->holders, 1);
+	t->barriers_supported = 1;
 
 	if (!num_targets)
 		num_targets = KEYS_PER_NODE;
@@ -728,6 +731,10 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 	/* FIXME: the plan is to combine high here and then have
 	 * the merge fn apply the target level restrictions. */
 	combine_restrictions_low(&t->limits, &tgt->limits);
+
+	if (!(tgt->type->features & DM_TARGET_SUPPORTS_BARRIERS))
+		t->barriers_supported = 0;
+
 	return 0;
 
  bad:
@@ -772,6 +779,12 @@ int dm_table_complete(struct dm_table *t)
 
 	check_for_valid_limits(&t->limits);
 
+	/*
+	 * We only support barriers if there is exactly one underlying device.
+	 */
+	if (!list_is_singular(&t->devices))
+		t->barriers_supported = 0;
+
 	/* how many indexes will the btree have ? */
 	leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
 	t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
@@ -986,6 +999,12 @@ struct mapped_device *dm_table_get_md(struct dm_table *t)
 	return t->md;
 }
 
+int dm_table_barrier_ok(struct dm_table *t)
+{
+	return t->barriers_supported;
+}
+EXPORT_SYMBOL(dm_table_barrier_ok);
+
 EXPORT_SYMBOL(dm_vcalloc);
 EXPORT_SYMBOL(dm_get_device);
 EXPORT_SYMBOL(dm_put_device);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4882ce7e88a3..dd953b189f45 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -835,7 +835,11 @@ static int __split_bio(struct mapped_device *md, struct bio *bio)
 	ci.map = dm_get_table(md);
 	if (unlikely(!ci.map))
 		return -EIO;
-
+	if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) {
+		dm_table_put(ci.map);
+		bio_endio(bio, -EOPNOTSUPP);
+		return 0;
+	}
 	ci.md = md;
 	ci.bio = bio;
 	ci.io = alloc_io(md);
@@ -919,15 +923,6 @@ static int dm_request(struct request_queue *q, struct bio *bio)
 	struct mapped_device *md = q->queuedata;
 	int cpu;
 
-	/*
-	 * There is no use in forwarding any barrier request since we can't
-	 * guarantee it is (or can be) handled by the targets correctly.
-	 */
-	if (unlikely(bio_barrier(bio))) {
-		bio_endio(bio, -EOPNOTSUPP);
-		return 0;
-	}
-
 	down_read(&md->io_lock);
 
 	cpu = part_stat_lock();
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 0ade60cdef42..5b5d08ba9e92 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -51,6 +51,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits);
  * To check the return value from dm_table_find_target().
  */
 #define dm_target_is_valid(t) ((t)->table)
+int dm_table_barrier_ok(struct dm_table *t);
 
 /*-----------------------------------------------------------------
  * A registry of target types.
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index c1ba76c7c0e5..8209e08969f9 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -112,7 +112,14 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d);
 /*
  * Information about a target type
  */
+
+/*
+ * Target features
+ */
+#define DM_TARGET_SUPPORTS_BARRIERS 0x00000001
+
 struct target_type {
+	uint64_t features;
 	const char *name;
 	struct module *module;
 	unsigned version[3];
-- 
cgit v1.2.3


From 025dfdafe77f20b3890981a394774baab7b9c827 Mon Sep 17 00:00:00 2001
From: Frederik Schwarzer <schwarzerf@gmail.com>
Date: Thu, 16 Oct 2008 19:02:37 +0200
Subject: trivial: fix then -> than typos in comments and documentation

- (better, more, bigger ...) then -> (...) than

Signed-off-by: Frederik Schwarzer <schwarzerf@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/hwmon/abituguru-datasheet           |  6 +++---
 Documentation/networking/rxrpc.txt                |  2 +-
 Documentation/scsi/ChangeLog.lpfc                 |  2 +-
 arch/blackfin/kernel/kgdb.c                       |  2 +-
 arch/ia64/kernel/kprobes.c                        |  2 +-
 arch/m68k/Kconfig                                 |  2 +-
 arch/mips/pmc-sierra/yosemite/atmel_read_eeprom.c |  2 +-
 arch/powerpc/kernel/kprobes.c                     |  2 +-
 arch/powerpc/oprofile/cell/spu_profiler.c         |  2 +-
 arch/s390/Kconfig                                 |  2 +-
 arch/s390/kernel/kprobes.c                        |  2 +-
 arch/sparc/kernel/kprobes.c                       |  2 +-
 arch/x86/kernel/kprobes.c                         |  2 +-
 arch/x86/kernel/mfgpt_32.c                        |  2 +-
 drivers/hwmon/fschmd.c                            |  2 +-
 drivers/infiniband/hw/mlx4/cq.c                   |  2 +-
 drivers/message/i2o/i2o_scsi.c                    |  2 +-
 drivers/mtd/devices/pmc551.c                      |  2 +-
 drivers/mtd/ubi/eba.c                             |  2 +-
 drivers/mtd/ubi/io.c                              |  2 +-
 drivers/mtd/ubi/scan.c                            |  2 +-
 drivers/mtd/ubi/ubi-media.h                       |  4 ++--
 drivers/mtd/ubi/vtbl.c                            |  2 +-
 drivers/mtd/ubi/wl.c                              |  4 ++--
 drivers/net/bnx2x_link.c                          |  2 +-
 drivers/net/e1000/e1000_hw.c                      |  4 ++--
 drivers/net/slip.h                                |  2 +-
 drivers/net/tehuti.c                              |  4 ++--
 drivers/net/tokenring/smctr.c                     |  2 +-
 drivers/net/wireless/ipw2x00/ipw2100.c            |  2 +-
 drivers/net/wireless/rt2x00/rt2x00crypto.c        |  4 ++--
 drivers/net/wireless/strip.c                      |  2 +-
 drivers/s390/block/dasd_eer.c                     |  4 ++--
 drivers/s390/char/vmlogrdr.c                      |  4 ++--
 drivers/scsi/lpfc/lpfc_hbadisc.c                  |  4 ++--
 drivers/scsi/lpfc/lpfc_sli.c                      | 10 +++++-----
 drivers/serial/crisv10.c                          |  4 ++--
 drivers/video/console/vgacon.c                    |  2 +-
 fs/ocfs2/cluster/heartbeat.c                      |  2 +-
 fs/proc/task_nommu.c                              |  2 +-
 fs/ubifs/Kconfig                                  |  2 +-
 fs/ubifs/budget.c                                 |  4 ++--
 fs/ubifs/gc.c                                     |  2 +-
 fs/ubifs/journal.c                                |  2 +-
 fs/ubifs/shrinker.c                               |  2 +-
 fs/xfs/linux-2.6/xfs_super.c                      |  2 +-
 include/linux/mtd/mtd.h                           |  2 +-
 include/linux/spi/spi.h                           |  4 ++--
 include/mtd/ubi-user.h                            |  2 +-
 kernel/pid.c                                      |  2 +-
 kernel/time/jiffies.c                             |  2 +-
 net/sctp/auth.c                                   |  4 ++--
 net/sctp/sm_statefuns.c                           |  6 +++---
 net/sctp/socket.c                                 |  2 +-
 net/sctp/tsnmap.c                                 |  2 +-
 sound/usb/usx2y/usbusx2y.c                        |  2 +-
 56 files changed, 76 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/hwmon/abituguru-datasheet b/Documentation/hwmon/abituguru-datasheet
index aef5a9b36846..4d184f2db0ea 100644
--- a/Documentation/hwmon/abituguru-datasheet
+++ b/Documentation/hwmon/abituguru-datasheet
@@ -74,7 +74,7 @@ a sensor.
 Notice that some banks have both a read and a write address this is how the
 uGuru determines if a read from or a write to the bank is taking place, thus
 when reading you should always use the read address and when writing the
-write address. The write address is always one (1) more then the read address.
+write address. The write address is always one (1) more than the read address.
 
 
 uGuru ready
@@ -224,7 +224,7 @@ Bit 3: Beep if alarm							(RW)
 Bit 4: 1 if alarm cause measured temp is over the warning threshold	(R)
 Bit 5: 1 if alarm cause measured volt is over the max threshold		(R)
 Bit 6: 1 if alarm cause measured volt is under the min threshold	(R)
-Bit 7: Volt sensor: Shutdown if alarm persist for more then 4 seconds	(RW)
+Bit 7: Volt sensor: Shutdown if alarm persist for more than 4 seconds	(RW)
        Temp sensor: Shutdown if temp is over the shutdown threshold	(RW)
 
 *  This bit is only honored/used by the uGuru if a temp sensor is connected
@@ -293,7 +293,7 @@ Byte 0:
 Alarm behaviour for the selected sensor. A 1 enables the described behaviour.
 Bit 0: Give an alarm if measured rpm is under the min threshold	(RW)
 Bit 3: Beep if alarm						(RW)
-Bit 7: Shutdown if alarm persist for more then 4 seconds	(RW)
+Bit 7: Shutdown if alarm persist for more than 4 seconds	(RW)
 
 Byte 1:
 min threshold (scale as bank 0x26)
diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index c3669a3fb4af..60d05eb77c64 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -540,7 +540,7 @@ A client would issue an operation by:
      MSG_MORE should be set in msghdr::msg_flags on all but the last part of
      the request.  Multiple requests may be made simultaneously.
 
-     If a call is intended to go to a destination other then the default
+     If a call is intended to go to a destination other than the default
      specified through connect(), then msghdr::msg_name should be set on the
      first request message of that call.
 
diff --git a/Documentation/scsi/ChangeLog.lpfc b/Documentation/scsi/ChangeLog.lpfc
index ae3f962a7cfc..ff19a52fe004 100644
--- a/Documentation/scsi/ChangeLog.lpfc
+++ b/Documentation/scsi/ChangeLog.lpfc
@@ -733,7 +733,7 @@ Changes from 20040920 to 20041018
 	  I/O completion path a little more, especially taking care of
 	  fast-pathing the non-error case.  Also removes tons of dead
 	  members and defines from lpfc_scsi.h - e.g. lpfc_target is down
-	  to nothing more then the lpfc_nodelist pointer.
+	  to nothing more than the lpfc_nodelist pointer.
 	* Added binary sysfs file to issue mbox commands
 	* Replaced #if __BIG_ENDIAN with #if __BIG_ENDIAN_BITFIELD for
 	  compatibility with the user space applications.
diff --git a/arch/blackfin/kernel/kgdb.c b/arch/blackfin/kernel/kgdb.c
index b795a207742c..1c5afaeb9504 100644
--- a/arch/blackfin/kernel/kgdb.c
+++ b/arch/blackfin/kernel/kgdb.c
@@ -105,7 +105,7 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
  * Extracts ebp, esp and eip values understandable by gdb from the values
  * saved by switch_to.
  * thread.esp points to ebp. flags and ebp are pushed in switch_to hence esp
- * prior to entering switch_to is 8 greater then the value that is saved.
+ * prior to entering switch_to is 8 greater than the value that is saved.
  * If switch_to changes, change following code appropriately.
  */
 void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index f07688da947c..0017b9de2ddf 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -434,7 +434,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	/*
 	 * It is possible to have multiple instances associated with a given
 	 * task either because an multiple functions in the call path
-	 * have a return probe installed on them, and/or more then one return
+	 * have a return probe installed on them, and/or more than one return
 	 * return probe was registered for a target function.
 	 *
 	 * We can handle this because:
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index c825bde17cb3..fb87c08c6b57 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -303,7 +303,7 @@ config M68KFPU_EMU_EXTRAPREC
 	  correct rounding, the emulator can (often) do the same but this
 	  extra calculation can cost quite some time, so you can disable
 	  it here. The emulator will then "only" calculate with a 64 bit
-	  mantissa and round slightly incorrect, what is more then enough
+	  mantissa and round slightly incorrect, what is more than enough
 	  for normal usage.
 
 config M68KFPU_EMU_ONLY
diff --git a/arch/mips/pmc-sierra/yosemite/atmel_read_eeprom.c b/arch/mips/pmc-sierra/yosemite/atmel_read_eeprom.c
index 97862f45496d..caf5e9a0acc7 100644
--- a/arch/mips/pmc-sierra/yosemite/atmel_read_eeprom.c
+++ b/arch/mips/pmc-sierra/yosemite/atmel_read_eeprom.c
@@ -148,7 +148,7 @@ int read_eeprom(char *buffer, int eeprom_size, int size)
 	send_byte(W_HEADER);
 	recv_ack();
 
-	/* EEPROM with size of more then 2K need two byte addressing */
+	/* EEPROM with size of more than 2K need two byte addressing */
 	if (eeprom_size > 2048) {
 		send_byte(0x00);
 		recv_ack();
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index de79915452c8..b29005a5a8f5 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -316,7 +316,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	/*
 	 * It is possible to have multiple instances associated with a given
 	 * task either because an multiple functions in the call path
-	 * have a return probe installed on them, and/or more then one return
+	 * have a return probe installed on them, and/or more than one return
 	 * return probe was registered for a target function.
 	 *
 	 * We can handle this because:
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c
index dd499c3e9da7..83faa958b9d4 100644
--- a/arch/powerpc/oprofile/cell/spu_profiler.c
+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
@@ -49,7 +49,7 @@ void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_rese
 	 * of precision.  This is close enough for the purpose at hand.
 	 *
 	 * The value of the timeout should be small enough that the hw
-	 * trace buffer will not get more then about 1/3 full for the
+	 * trace buffer will not get more than about 1/3 full for the
 	 * maximum user specified (the LFSR value) hw sampling frequency.
 	 * This is to ensure the trace buffer will never fill even if the
 	 * kernel thread scheduling varies under a heavy system load.
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 19577aeffd7b..a94a3c3ae932 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -299,7 +299,7 @@ config WARN_STACK
 	  This option enables the compiler options -mwarn-framesize and
 	  -mwarn-dynamicstack. If the compiler supports these options it
 	  will generate warnings for function which either use alloca or
-	  create a stack frame bigger then CONFIG_WARN_STACK_SIZE.
+	  create a stack frame bigger than CONFIG_WARN_STACK_SIZE.
 
 	  Say N if you are unsure.
 
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 569079ec4ff0..267f6698680a 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -381,7 +381,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	/*
 	 * It is possible to have multiple instances associated with a given
 	 * task either because an multiple functions in the call path
-	 * have a return probe installed on them, and/or more then one return
+	 * have a return probe installed on them, and/or more than one return
 	 * return probe was registered for a target function.
 	 *
 	 * We can handle this because:
diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c
index 201a6e547e4a..3bc6527c95af 100644
--- a/arch/sparc/kernel/kprobes.c
+++ b/arch/sparc/kernel/kprobes.c
@@ -517,7 +517,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	/*
 	 * It is possible to have multiple instances associated with a given
 	 * task either because an multiple functions in the call path
-	 * have a return probe installed on them, and/or more then one return
+	 * have a return probe installed on them, and/or more than one return
 	 * return probe was registered for a target function.
 	 *
 	 * We can handle this because:
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 6c27679ec6aa..a116e6d5726c 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -694,7 +694,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	/*
 	 * It is possible to have multiple instances associated with a given
 	 * task either because multiple functions in the call path have
-	 * return probes installed on them, and/or more then one
+	 * return probes installed on them, and/or more than one
 	 * return probe was registered for a target function.
 	 *
 	 * We can handle this because:
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index c12314c9e86f..8815f3c7fec7 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -252,7 +252,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer);
 /*
  * The MFPGT timers on the CS5536 provide us with suitable timers to use
  * as clock event sources - not as good as a HPET or APIC, but certainly
- * better then the PIT.  This isn't a general purpose MFGPT driver, but
+ * better than the PIT.  This isn't a general purpose MFGPT driver, but
  * a simplified one designed specifically to act as a clock event source.
  * For full details about the MFGPT, please consult the CS5536 data sheet.
  */
diff --git a/drivers/hwmon/fschmd.c b/drivers/hwmon/fschmd.c
index 967170368933..8b2d756595d9 100644
--- a/drivers/hwmon/fschmd.c
+++ b/drivers/hwmon/fschmd.c
@@ -75,7 +75,7 @@ static const u8 FSCHMD_REG_VOLT[3] = { 0x45, 0x42, 0x48 };
 
 /* minimum pwm at which the fan is driven (pwm can by increased depending on
    the temp. Notice that for the scy some fans share there minimum speed.
-   Also notice that with the scy the sensor order is different then with the
+   Also notice that with the scy the sensor order is different than with the
    other chips, this order was in the 2.4 driver and kept for consistency. */
 static const u8 FSCHMD_REG_FAN_MIN[5][6] = {
 	{ 0x55, 0x65 },					/* pos */
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index a3c5af1d7ec0..de5263beab4a 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -367,7 +367,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		if (err)
 			goto out;
 	} else {
-		/* Can't be smaller then the number of outstanding CQEs */
+		/* Can't be smaller than the number of outstanding CQEs */
 		outst_cqe = mlx4_ib_get_outstanding_cqes(cq);
 		if (entries < outst_cqe + 1) {
 			err = 0;
diff --git a/drivers/message/i2o/i2o_scsi.c b/drivers/message/i2o/i2o_scsi.c
index 1bcdbbb9e7d3..3d45817e6dcd 100644
--- a/drivers/message/i2o/i2o_scsi.c
+++ b/drivers/message/i2o/i2o_scsi.c
@@ -390,7 +390,7 @@ static int i2o_scsi_reply(struct i2o_controller *c, u32 m,
  *	@i2o_dev: the I2O device which was added
  *
  *	If a I2O device is added we catch the notification, because I2O classes
- *	other then SCSI peripheral will not be received through
+ *	other than SCSI peripheral will not be received through
  *	i2o_scsi_probe().
  */
 static void i2o_scsi_notify_device_add(struct i2o_device *i2o_dev)
diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c
index d38bca64bb15..d2fd550f7e09 100644
--- a/drivers/mtd/devices/pmc551.c
+++ b/drivers/mtd/devices/pmc551.c
@@ -34,7 +34,7 @@
  *	aperture size, not the dram size, and the V370PDC supplies no
  *	other method for memory size discovery.  This problem is
  *	mostly only relevant when compiled as a module, as the
- *	unloading of the module with an aperture size smaller then
+ *	unloading of the module with an aperture size smaller than
  *	the ram will cause the driver to detect the onboard memory
  *	size to be equal to the aperture size when the module is
  *	reloaded.  Soooo, to help, the module supports an msize
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 048a606cebde..25def348e5ba 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -717,7 +717,7 @@ write_error:
  * to the real data size, although the @buf buffer has to contain the
  * alignment. In all other cases, @len has to be aligned.
  *
- * It is prohibited to write more then once to logical eraseblocks of static
+ * It is prohibited to write more than once to logical eraseblocks of static
  * volumes. This function returns zero in case of success and a negative error
  * code in case of failure.
  */
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index a74118c05745..fe81039f2a7c 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -465,7 +465,7 @@ out:
  * This function synchronously erases physical eraseblock @pnum. If @torture
  * flag is not zero, the physical eraseblock is checked by means of writing
  * different patterns to it and reading them back. If the torturing is enabled,
- * the physical eraseblock is erased more then once.
+ * the physical eraseblock is erased more than once.
  *
  * This function returns the number of erasures made in case of success, %-EIO
  * if the erasure failed or the torturing test failed, and other negative error
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 41d47e1cf15c..ecde202a5a12 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -478,7 +478,7 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
 			return 0;
 		} else {
 			/*
-			 * This logical eraseblock is older then the one found
+			 * This logical eraseblock is older than the one found
 			 * previously.
 			 */
 			if (cmp_res & 4)
diff --git a/drivers/mtd/ubi/ubi-media.h b/drivers/mtd/ubi/ubi-media.h
index 2ad940409053..8419fdccc79c 100644
--- a/drivers/mtd/ubi/ubi-media.h
+++ b/drivers/mtd/ubi/ubi-media.h
@@ -135,7 +135,7 @@ enum {
  * The erase counter header takes 64 bytes and has a plenty of unused space for
  * future usage. The unused fields are zeroed. The @version field is used to
  * indicate the version of UBI implementation which is supposed to be able to
- * work with this UBI image. If @version is greater then the current UBI
+ * work with this UBI image. If @version is greater than the current UBI
  * version, the image is rejected. This may be useful in future if something
  * is changed radically. This field is duplicated in the volume identifier
  * header.
@@ -187,7 +187,7 @@ struct ubi_ec_hdr {
  * (sequence number) is used to distinguish between older and newer versions of
  * logical eraseblocks.
  *
- * There are 2 situations when there may be more then one physical eraseblock
+ * There are 2 situations when there may be more than one physical eraseblock
  * corresponding to the same logical eraseblock, i.e., having the same @vol_id
  * and @lnum values in the volume identifier header. Suppose we have a logical
  * eraseblock L and it is mapped to the physical eraseblock P.
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index 333c8941552f..1afc61e7455d 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -577,7 +577,7 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
 		if (vtbl[i].flags & UBI_VTBL_AUTORESIZE_FLG) {
 			/* Auto re-size flag may be set only for one volume */
 			if (ubi->autoresize_vol_id != -1) {
-				ubi_err("more then one auto-resize volume (%d "
+				ubi_err("more than one auto-resize volume (%d "
 					"and %d)", ubi->autoresize_vol_id, i);
 				kfree(vol);
 				return -EINVAL;
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 14901cb82c18..891534f8210d 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -128,7 +128,7 @@
  * situation when the picked physical eraseblock is constantly erased after the
  * data is written to it. So, we have a constant which limits the highest erase
  * counter of the free physical eraseblock to pick. Namely, the WL sub-system
- * does not pick eraseblocks with erase counter greater then the lowest erase
+ * does not pick eraseblocks with erase counter greater than the lowest erase
  * counter plus %WL_FREE_MAX_DIFF.
  */
 #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
@@ -917,7 +917,7 @@ static int ensure_wear_leveling(struct ubi_device *ubi)
 		/*
 		 * We schedule wear-leveling only if the difference between the
 		 * lowest erase counter of used physical eraseblocks and a high
-		 * erase counter of free physical eraseblocks is greater then
+		 * erase counter of free physical eraseblocks is greater than
 		 * %UBI_WL_THRESHOLD.
 		 */
 		e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
diff --git a/drivers/net/bnx2x_link.c b/drivers/net/bnx2x_link.c
index 67de94f1f30e..fefa6ab13064 100644
--- a/drivers/net/bnx2x_link.c
+++ b/drivers/net/bnx2x_link.c
@@ -3359,7 +3359,7 @@ static u8 bnx2x_format_ver(u32 num, u8 *str, u16 len)
 	u8 shift = 8*4;
 	u8 digit;
 	if (len < 10) {
-		/* Need more then 10chars for this format */
+		/* Need more than 10chars for this format */
 		*str_ptr = '\0';
 		return -EINVAL;
 	}
diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c
index d04eef53571e..e1a3fc1303ee 100644
--- a/drivers/net/e1000/e1000_hw.c
+++ b/drivers/net/e1000/e1000_hw.c
@@ -6758,7 +6758,7 @@ static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length,
  * returns: - E1000_ERR_XXX
  *            E1000_SUCCESS
  *
- * For phy's older then IGP, this function simply reads the polarity bit in the
+ * For phy's older than IGP, this function simply reads the polarity bit in the
  * Phy Status register.  For IGP phy's, this bit is valid only if link speed is
  * 10 Mbps.  If the link speed is 100 Mbps there is no polarity so this bit will
  * return 0.  If the link speed is 1000 Mbps the polarity status is in the
@@ -6834,7 +6834,7 @@ static s32 e1000_check_polarity(struct e1000_hw *hw,
  * returns: - E1000_ERR_XXX
  *            E1000_SUCCESS
  *
- * For phy's older then IGP, this function reads the Downshift bit in the Phy
+ * For phy's older than IGP, this function reads the Downshift bit in the Phy
  * Specific Status register.  For IGP phy's, it reads the Downgrade bit in the
  * Link Health register.  In IGP this bit is latched high, so the driver must
  * read it immediately after link is established.
diff --git a/drivers/net/slip.h b/drivers/net/slip.h
index 853e0f6ec710..9ea5c11287d2 100644
--- a/drivers/net/slip.h
+++ b/drivers/net/slip.h
@@ -75,7 +75,7 @@ struct slip {
   unsigned long         tx_errors;      /* Planned stuff                */
   unsigned long         rx_dropped;     /* No memory for skb            */
   unsigned long         tx_dropped;     /* When MTU change              */
-  unsigned long         rx_over_errors; /* Frame bigger then SLIP buf.  */
+  unsigned long         rx_over_errors; /* Frame bigger than SLIP buf.  */
 #ifdef SL_INCLUDE_CSLIP
   unsigned long		tx_compressed;
   unsigned long		rx_compressed;
diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c
index a10a83a11d9f..a7a4dc4d6313 100644
--- a/drivers/net/tehuti.c
+++ b/drivers/net/tehuti.c
@@ -1004,7 +1004,7 @@ static inline void bdx_rxdb_free_elem(struct rxdb *db, int n)
  * skb for rx. It assumes that Rx is desabled in HW
  * funcs are grouped for better cache usage
  *
- * RxD fifo is smaller then RxF fifo by design. Upon high load, RxD will be
+ * RxD fifo is smaller than RxF fifo by design. Upon high load, RxD will be
  * filled and packets will be dropped by nic without getting into host or
  * cousing interrupt. Anyway, in that condition, host has no chance to proccess
  * all packets, but dropping in nic is cheaper, since it takes 0 cpu cycles
@@ -1826,7 +1826,7 @@ static void bdx_tx_free(struct bdx_priv *priv)
  *
  * Pushes desc to TxD fifo and overlaps it if needed.
  * NOTE: this func does not check for available space. this is responsibility
- *    of the caller. Neither does it check that data size is smaller then
+ *    of the caller. Neither does it check that data size is smaller than
  *    fifo size.
  */
 static void bdx_tx_push_desc(struct bdx_priv *priv, void *data, int size)
diff --git a/drivers/net/tokenring/smctr.c b/drivers/net/tokenring/smctr.c
index a011666342ff..50eb29ce3c87 100644
--- a/drivers/net/tokenring/smctr.c
+++ b/drivers/net/tokenring/smctr.c
@@ -3064,7 +3064,7 @@ static int smctr_load_node_addr(struct net_device *dev)
  * will consequently cause a timeout.
  *
  * NOTE 1: If the monitor_state is MS_BEACON_TEST_STATE, all transmit
- * queues other then the one used for the lobe_media_test should be
+ * queues other than the one used for the lobe_media_test should be
  * disabled.!?
  *
  * NOTE 2: If the monitor_state is MS_BEACON_TEST_STATE and the receive_mask
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
index 1667065b86a7..753de1a9c4b3 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/ipw2x00/ipw2100.c
@@ -1332,7 +1332,7 @@ static int ipw2100_power_cycle_adapter(struct ipw2100_priv *priv)
 		       IPW_AUX_HOST_RESET_REG_STOP_MASTER);
 
 	/* Step 2. Wait for stop Master Assert
-	 *         (not more then 50us, otherwise ret error */
+	 *         (not more than 50us, otherwise ret error */
 	i = 5;
 	do {
 		udelay(IPW_WAIT_RESET_MASTER_ASSERT_COMPLETE_DELAY);
diff --git a/drivers/net/wireless/rt2x00/rt2x00crypto.c b/drivers/net/wireless/rt2x00/rt2x00crypto.c
index 37ad0d2fb64c..aee9cba13eb3 100644
--- a/drivers/net/wireless/rt2x00/rt2x00crypto.c
+++ b/drivers/net/wireless/rt2x00/rt2x00crypto.c
@@ -184,8 +184,8 @@ void rt2x00crypto_rx_insert_iv(struct sk_buff *skb, unsigned int align,
 	 * Make room for new data, note that we increase both
 	 * headsize and tailsize when required. The tailsize is
 	 * only needed when ICV data needs to be inserted and
-	 * the padding is smaller then the ICV data.
-	 * When alignment requirements is greater then the
+	 * the padding is smaller than the ICV data.
+	 * When alignment requirements is greater than the
 	 * ICV data we must trim the skb to the correct size
 	 * because we need to remove the extra bytes.
 	 */
diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c
index dd0de3a9ed4e..7015f2480550 100644
--- a/drivers/net/wireless/strip.c
+++ b/drivers/net/wireless/strip.c
@@ -236,7 +236,7 @@ struct strip {
 	unsigned long tx_errors;	/* Planned stuff                */
 	unsigned long rx_dropped;	/* No memory for skb            */
 	unsigned long tx_dropped;	/* When MTU change              */
-	unsigned long rx_over_errors;	/* Frame bigger then STRIP buf. */
+	unsigned long rx_over_errors;	/* Frame bigger than STRIP buf. */
 
 	unsigned long pps_timer;	/* Timer to determine pps       */
 	unsigned long rx_pps_count;	/* Counter to determine pps     */
diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c
index 892e2878d61b..f8e05ce98621 100644
--- a/drivers/s390/block/dasd_eer.c
+++ b/drivers/s390/block/dasd_eer.c
@@ -535,8 +535,8 @@ static int dasd_eer_open(struct inode *inp, struct file *filp)
 	    eerb->buffer_page_count > INT_MAX / PAGE_SIZE) {
 		kfree(eerb);
 		MESSAGE(KERN_WARNING, "can't open device since module "
-			"parameter eer_pages is smaller then 1 or"
-			" bigger then %d", (int)(INT_MAX / PAGE_SIZE));
+			"parameter eer_pages is smaller than 1 or"
+			" bigger than %d", (int)(INT_MAX / PAGE_SIZE));
 		unlock_kernel();
 		return -EINVAL;
 	}
diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c
index aabbeb909cc6..d8a2289fcb69 100644
--- a/drivers/s390/char/vmlogrdr.c
+++ b/drivers/s390/char/vmlogrdr.c
@@ -427,7 +427,7 @@ static int vmlogrdr_receive_data(struct vmlogrdr_priv_t *priv)
 			buffer = priv->buffer + sizeof(int);
 		}
 		/*
-		 * If the record is bigger then our buffer, we receive only
+		 * If the record is bigger than our buffer, we receive only
 		 * a part of it. We can get the rest later.
 		 */
 		if (iucv_data_count > NET_BUFFER_SIZE)
@@ -437,7 +437,7 @@ static int vmlogrdr_receive_data(struct vmlogrdr_priv_t *priv)
 					  0, buffer, iucv_data_count,
 					  &priv->residual_length);
 		spin_unlock_bh(&priv->priv_lock);
-		/* An rc of 5 indicates that the record was bigger then
+		/* An rc of 5 indicates that the record was bigger than
 		 * the buffer, which is OK for us. A 9 indicates that the
 		 * record was purged befor we could receive it.
 		 */
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 8c64494444bf..311ed6dea726 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -1964,10 +1964,10 @@ lpfc_set_disctmo(struct lpfc_vport *vport)
 	uint32_t tmo;
 
 	if (vport->port_state == LPFC_LOCAL_CFG_LINK) {
-		/* For FAN, timeout should be greater then edtov */
+		/* For FAN, timeout should be greater than edtov */
 		tmo = (((phba->fc_edtov + 999) / 1000) + 1);
 	} else {
-		/* Normal discovery timeout should be > then ELS/CT timeout
+		/* Normal discovery timeout should be > than ELS/CT timeout
 		 * FC spec states we need 3 * ratov for CT requests
 		 */
 		tmo = ((phba->fc_ratov * 3) + 3);
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 01dfdc8696f8..a36a120561e2 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -420,7 +420,7 @@ lpfc_sli_next_iocb_slot (struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
 		if (unlikely(pring->local_getidx >= max_cmd_idx)) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 					"0315 Ring %d issue: portCmdGet %d "
-					"is bigger then cmd ring %d\n",
+					"is bigger than cmd ring %d\n",
 					pring->ringno,
 					pring->local_getidx, max_cmd_idx);
 
@@ -1628,12 +1628,12 @@ lpfc_sli_rsp_pointers_error(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
 {
 	struct lpfc_pgp *pgp = &phba->port_gp[pring->ringno];
 	/*
-	 * Ring <ringno> handler: portRspPut <portRspPut> is bigger then
+	 * Ring <ringno> handler: portRspPut <portRspPut> is bigger than
 	 * rsp ring <portRspMax>
 	 */
 	lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 			"0312 Ring %d handler: portRspPut %d "
-			"is bigger then rsp ring %d\n",
+			"is bigger than rsp ring %d\n",
 			pring->ringno, le32_to_cpu(pgp->rspPutInx),
 			pring->numRiocb);
 
@@ -2083,12 +2083,12 @@ lpfc_sli_handle_slow_ring_event(struct lpfc_hba *phba,
 	portRspPut = le32_to_cpu(pgp->rspPutInx);
 	if (portRspPut >= portRspMax) {
 		/*
-		 * Ring <ringno> handler: portRspPut <portRspPut> is bigger then
+		 * Ring <ringno> handler: portRspPut <portRspPut> is bigger than
 		 * rsp ring <portRspMax>
 		 */
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"0303 Ring %d handler: portRspPut %d "
-				"is bigger then rsp ring %d\n",
+				"is bigger than rsp ring %d\n",
 				pring->ringno, portRspPut, portRspMax);
 
 		phba->link_state = LPFC_HBA_ERROR;
diff --git a/drivers/serial/crisv10.c b/drivers/serial/crisv10.c
index 8b2c619a09f2..e642c22c80e2 100644
--- a/drivers/serial/crisv10.c
+++ b/drivers/serial/crisv10.c
@@ -1203,7 +1203,7 @@ static void e100_disable_txdma_channel(struct e100_serial *info)
 	unsigned long flags;
 
 	/* Disable output DMA channel for the serial port in question
-	 * ( set to something other then serialX)
+	 * ( set to something other than serialX)
 	 */
 	local_irq_save(flags);
 	DFLOW(DEBUG_LOG(info->line, "disable_txdma_channel %i\n", info->line));
@@ -1266,7 +1266,7 @@ static void e100_disable_rxdma_channel(struct e100_serial *info)
 	unsigned long flags;
 
 	/* Disable input DMA channel for the serial port in question
-	 * ( set to something other then serialX)
+	 * ( set to something other than serialX)
 	 */
 	local_irq_save(flags);
 	if (info->line == 0) {
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index e6210725b9ab..d012edda6d11 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -1332,7 +1332,7 @@ static void vgacon_save_screen(struct vc_data *c)
 		c->vc_y = screen_info.orig_y;
 	}
 
-	/* We can't copy in more then the size of the video buffer,
+	/* We can't copy in more than the size of the video buffer,
 	 * or we'll be copying in VGA BIOS */
 
 	if (!vga_is_gfx)
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 6ebaa58e2c03..04697ba7f73e 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -854,7 +854,7 @@ static int o2hb_thread(void *data)
 
 	while (!kthread_should_stop() && !reg->hr_unclean_stop) {
 		/* We track the time spent inside
-		 * o2hb_do_disk_heartbeat so that we avoid more then
+		 * o2hb_do_disk_heartbeat so that we avoid more than
 		 * hr_timeout_ms between disk writes. On busy systems
 		 * this should result in a heartbeat which is less
 		 * likely to time itself out. */
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 219bd79ea894..d4a8be32b902 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -9,7 +9,7 @@
 
 /*
  * Logic: we've got two memory sums for each process, "shared", and
- * "non-shared". Shared memory may get counted more then once, for
+ * "non-shared". Shared memory may get counted more than once, for
  * each process that owns it. Non-shared memory is counted
  * accurately.
  */
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index 91ceeda7e5bf..e35b54d5059d 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -40,7 +40,7 @@ config UBIFS_FS_ZLIB
 	depends on UBIFS_FS
 	default y
 	help
-	  Zlib copresses better then LZO but it is slower. Say 'Y' if unsure.
+	  Zlib compresses better than LZO but it is slower. Say 'Y' if unsure.
 
 # Debugging-related stuff
 config UBIFS_FS_DEBUG
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 0e5e54d82924..175f9c590b77 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -142,7 +142,7 @@ static long long get_liability(struct ubifs_info *c)
  *
  * This function is called when an operation cannot be budgeted because there
  * is supposedly no free space. But in most cases there is some free space:
- *   o budgeting is pessimistic, so it always budgets more then it is actually
+ *   o budgeting is pessimistic, so it always budgets more than it is actually
  *     needed, so shrinking the liability is one way to make free space - the
  *     cached data will take less space then it was budgeted for;
  *   o GC may turn some dark space into free space (budgeting treats dark space
@@ -606,7 +606,7 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
  * @c: UBIFS file-system description object
  *
  * This function converts budget which was allocated for a new page of data to
- * the budget of changing an existing page of data. The latter is smaller then
+ * the budget of changing an existing page of data. The latter is smaller than
  * the former, so this function only does simple re-calculation and does not
  * involve any write-back.
  */
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 0bef6501d58a..9832f9abe28e 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -45,7 +45,7 @@
 #define SMALL_NODE_WM  UBIFS_MAX_DENT_NODE_SZ
 
 /*
- * GC may need to move more then one LEB to make progress. The below constants
+ * GC may need to move more than one LEB to make progress. The below constants
  * define "soft" and "hard" limits on the number of LEBs the garbage collector
  * may move.
  */
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 10ae25b7d1db..9b7c54e0cd2a 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -191,7 +191,7 @@ again:
 	if (wbuf->lnum != -1 && avail >= len) {
 		/*
 		 * Someone else has switched the journal head and we have
-		 * enough space now. This happens when more then one process is
+		 * enough space now. This happens when more than one process is
 		 * trying to write to the same journal head at the same time.
 		 */
 		dbg_jnl("return LEB %d back, already have LEB %d:%d",
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index f248533841a2..e7bab52a1410 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -151,7 +151,7 @@ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
  * @contention: if any contention, this is set to %1
  *
  * This function walks the list of mounted UBIFS file-systems and frees clean
- * znodes which are older then @age, until at least @nr znodes are freed.
+ * znodes which are older than @age, until at least @nr znodes are freed.
  * Returns the number of freed znodes.
  */
 static int shrink_tnc_trees(int nr, int age, int *contention)
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 36f6cc703ef2..be846d606ae8 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1348,7 +1348,7 @@ xfs_finish_flags(
 {
 	int			ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
 
-	/* Fail a mount where the logbuf is smaller then the log stripe */
+	/* Fail a mount where the logbuf is smaller than the log stripe */
 	if (xfs_sb_version_haslogv2(&mp->m_sb)) {
 		if (mp->m_logbsize <= 0 &&
 		    mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index eae26bb6430a..64433eb411d7 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -83,7 +83,7 @@ typedef enum {
  * @datbuf:	data buffer - if NULL only oob data are read/written
  * @oobbuf:	oob data buffer
  *
- * Note, it is allowed to read more then one OOB area at one go, but not write.
+ * Note, it is allowed to read more than one OOB area at one go, but not write.
  * The interface assumes that the OOB write requests program only one page's
  * OOB area.
  */
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 82229317753d..68bb1c501d0d 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -327,9 +327,9 @@ extern struct spi_master *spi_busnum_to_master(u16 busnum);
  * @tx_dma: DMA address of tx_buf, if @spi_message.is_dma_mapped
  * @rx_dma: DMA address of rx_buf, if @spi_message.is_dma_mapped
  * @len: size of rx and tx buffers (in bytes)
- * @speed_hz: Select a speed other then the device default for this
+ * @speed_hz: Select a speed other than the device default for this
  *      transfer. If 0 the default (from @spi_device) is used.
- * @bits_per_word: select a bits_per_word other then the device default
+ * @bits_per_word: select a bits_per_word other than the device default
  *      for this transfer. If 0 the default (from @spi_device) is used.
  * @cs_change: affects chipselect after this transfer completes
  * @delay_usecs: microseconds to delay after this transfer before
diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h
index ccdc562e444e..2dc2eb2b8e22 100644
--- a/include/mtd/ubi-user.h
+++ b/include/mtd/ubi-user.h
@@ -253,7 +253,7 @@ struct ubi_mkvol_req {
  *
  * Re-sizing is possible for both dynamic and static volumes. But while dynamic
  * volumes may be re-sized arbitrarily, static volumes cannot be made to be
- * smaller then the number of bytes they bear. To arbitrarily shrink a static
+ * smaller than the number of bytes they bear. To arbitrarily shrink a static
  * volume, it must be wiped out first (by means of volume update operation with
  * zero number of bytes).
  */
diff --git a/kernel/pid.c b/kernel/pid.c
index 064e76afa507..af9224cdd6c0 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -475,7 +475,7 @@ pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
 EXPORT_SYMBOL(task_session_nr_ns);
 
 /*
- * Used by proc to find the first pid that is greater then or equal to nr.
+ * Used by proc to find the first pid that is greater than or equal to nr.
  *
  * If there is a pid at nr this function is exactly the same as find_pid_ns.
  */
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 1ca99557e929..06f197560f3b 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -45,7 +45,7 @@
  *
  * The value 8 is somewhat carefully chosen, as anything
  * larger can result in overflows. NSEC_PER_JIFFY grows as
- * HZ shrinks, so values greater then 8 overflow 32bits when
+ * HZ shrinks, so values greater than 8 overflow 32bits when
  * HZ=100.
  */
 #define JIFFIES_SHIFT	8
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 52db5f60daa0..20c576f530fa 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -141,8 +141,8 @@ void sctp_auth_destroy_keys(struct list_head *keys)
 /* Compare two byte vectors as numbers.  Return values
  * are:
  * 	  0 - vectors are equal
- * 	< 0 - vector 1 is smaller then vector2
- * 	> 0 - vector 1 is greater then vector2
+ * 	< 0 - vector 1 is smaller than vector2
+ * 	> 0 - vector 1 is greater than vector2
  *
  * Algorithm is:
  * 	This is performed by selecting the numerically smaller key vector...
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 1c4e5d6c29c0..3a0cd075914f 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -4268,9 +4268,9 @@ nomem:
 
 /*
  * Handle a protocol violation when the chunk length is invalid.
- * "Invalid" length is identified as smaller then the minimal length a
+ * "Invalid" length is identified as smaller than the minimal length a
  * given chunk can be.  For example, a SACK chunk has invalid length
- * if it's length is set to be smaller then the size of sctp_sack_chunk_t.
+ * if its length is set to be smaller than the size of sctp_sack_chunk_t.
  *
  * We inform the other end by sending an ABORT with a Protocol Violation
  * error code.
@@ -4300,7 +4300,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
 
 /*
  * Handle a protocol violation when the parameter length is invalid.
- * "Invalid" length is identified as smaller then the minimal length a
+ * "Invalid" length is identified as smaller than the minimal length a
  * given parameter can be.
  */
 static sctp_disposition_t sctp_sf_violation_paramlen(
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b14a8f33e42d..ff0a8f88de04 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2717,7 +2717,7 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, int o
 				paths++;
 			}
 
-			/* Only validate asocmaxrxt if we have more then
+			/* Only validate asocmaxrxt if we have more than
 			 * one path/transport.  We do this because path
 			 * retransmissions are only counted when we have more
 			 * then one path.
diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index 35c73e82553a..9bd64565021a 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c
@@ -227,7 +227,7 @@ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn)
 		 */
 		bitmap_zero(map->tsn_map, map->len);
 	} else {
-		/* If the gap is smaller then the map size,
+		/* If the gap is smaller than the map size,
 		 * shift the map by 'gap' bits and update further.
 		 */
 		bitmap_shift_right(map->tsn_map, map->tsn_map, gap, map->len);
diff --git a/sound/usb/usx2y/usbusx2y.c b/sound/usb/usx2y/usbusx2y.c
index ca26c532e77e..11639bd72a51 100644
--- a/sound/usb/usx2y/usbusx2y.c
+++ b/sound/usb/usx2y/usbusx2y.c
@@ -238,7 +238,7 @@ static void i_usX2Y_In04Int(struct urb *urb)
 					send = 0;
 				for (j = 0; j < URBS_AsyncSeq  &&  !err; ++j)
 					if (0 == usX2Y->AS04.urb[j]->status) {
-						struct us428_p4out *p4out = us428ctls->p4out + send;	// FIXME if more then 1 p4out is new, 1 gets lost.
+						struct us428_p4out *p4out = us428ctls->p4out + send;	// FIXME if more than 1 p4out is new, 1 gets lost.
 						usb_fill_bulk_urb(usX2Y->AS04.urb[j], usX2Y->chip.dev,
 								  usb_sndbulkpipe(usX2Y->chip.dev, 0x04), &p4out->val.vol, 
 								  p4out->type == eLT_Light ? sizeof(struct us428_lights) : 5,
-- 
cgit v1.2.3


From 0211a9c8508b2183e0e539509aad60414f1c3813 Mon Sep 17 00:00:00 2001
From: Frederik Schwarzer <schwarzerf@gmail.com>
Date: Mon, 29 Dec 2008 22:14:56 +0100
Subject: trivial: fix an -> a typos in documentation and comments

It is always "an" if there is a vowel _spoken_ (not written).
So it is:
"an hour" (spoken vowel)
but
"a uniform" (spoken 'j')

Signed-off-by: Frederik Schwarzer <schwarzerf@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/dell_rbu.txt              | 4 ++--
 Documentation/laptops/thinkpad-acpi.txt | 2 +-
 Documentation/networking/tuntap.txt     | 2 +-
 arch/m68k/kernel/traps.c                | 2 +-
 drivers/acpi/executer/exprep.c          | 2 +-
 drivers/acpi/executer/exresolv.c        | 2 +-
 drivers/acpi/executer/exstore.c         | 2 +-
 drivers/acpi/resources/rscreate.c       | 2 +-
 drivers/acpi/utilities/utobject.c       | 4 ++--
 drivers/char/epca.c                     | 2 +-
 drivers/cpufreq/Kconfig                 | 4 ++--
 drivers/input/keyboard/atkbd.c          | 2 +-
 drivers/macintosh/Kconfig               | 2 +-
 drivers/misc/phantom.c                  | 2 +-
 fs/ncpfs/ioctl.c                        | 2 +-
 include/acpi/acmacros.h                 | 4 ++--
 include/acpi/actypes.h                  | 2 +-
 include/linux/ncp_fs.h                  | 2 +-
 mm/slub.c                               | 2 +-
 sound/oss/aedsp16.c                     | 2 +-
 20 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/dell_rbu.txt b/Documentation/dell_rbu.txt
index 2c0d631de0cf..c11b931f8f98 100644
--- a/Documentation/dell_rbu.txt
+++ b/Documentation/dell_rbu.txt
@@ -81,8 +81,8 @@ Until this step is completed the driver cannot be unloaded.
 Also echoing either mono ,packet or init in to image_type will free up the
 memory allocated by the driver.
 
-If an user by accident executes steps 1 and 3 above without executing step 2;
-it will make the /sys/class/firmware/dell_rbu/ entries to disappear.
+If a user by accident executes steps 1 and 3 above without executing step 2;
+it will make the /sys/class/firmware/dell_rbu/ entries disappear.
 The entries can be recreated by doing the following
 echo init > /sys/devices/platform/dell_rbu/image_type
 NOTE: echoing init in image_type does not change it original value.
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index 71f0fe1fc1b0..898b4987bb80 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -1475,7 +1475,7 @@ Sysfs interface changelog:
 
 0x020100:	Marker for thinkpad-acpi with hot key NVRAM polling
 		support.  If you must, use it to know you should not
-		start an userspace NVRAM poller (allows to detect when
+		start a userspace NVRAM poller (allows to detect when
 		NVRAM is compiled out by the user because it is
 		unneeded/undesired in the first place).
 0x020101:	Marker for thinkpad-acpi with hot key NVRAM polling
diff --git a/Documentation/networking/tuntap.txt b/Documentation/networking/tuntap.txt
index 839cbb71388b..c0aab985bad9 100644
--- a/Documentation/networking/tuntap.txt
+++ b/Documentation/networking/tuntap.txt
@@ -118,7 +118,7 @@ As mentioned above, main purpose of TUN/TAP driver is tunneling.
 It is used by VTun (http://vtun.sourceforge.net).
 
 Another interesting application using TUN/TAP is pipsecd
-(http://perso.enst.fr/~beyssac/pipsec/), an userspace IPSec
+(http://perso.enst.fr/~beyssac/pipsec/), a userspace IPSec
 implementation that can use complete kernel routing (unlike FreeS/WAN).
 
 3. How does Virtual network device actually work ? 
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index 6d813de2baf1..184acc90808d 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -401,7 +401,7 @@ static inline void do_040writebacks(struct frame *fp)
  * called from sigreturn(), must ensure userspace code didn't
  * manipulate exception frame to circumvent protection, then complete
  * pending writebacks
- * we just clear TM2 to turn it into an userspace access
+ * we just clear TM2 to turn it into a userspace access
  */
 asmlinkage void berr_040cleanup(struct frame *fp)
 {
diff --git a/drivers/acpi/executer/exprep.c b/drivers/acpi/executer/exprep.c
index 5d438c32989d..a7dc87ecee37 100644
--- a/drivers/acpi/executer/exprep.c
+++ b/drivers/acpi/executer/exprep.c
@@ -404,7 +404,7 @@ acpi_ex_prep_common_field_object(union acpi_operand_object *obj_desc,
  *
  * RETURN:      Status
  *
- * DESCRIPTION: Construct an union acpi_operand_object of type def_field and
+ * DESCRIPTION: Construct a union acpi_operand_object of type def_field and
  *              connect it to the parent Node.
  *
  ******************************************************************************/
diff --git a/drivers/acpi/executer/exresolv.c b/drivers/acpi/executer/exresolv.c
index 89571b92a522..60e8c47128e9 100644
--- a/drivers/acpi/executer/exresolv.c
+++ b/drivers/acpi/executer/exresolv.c
@@ -146,7 +146,7 @@ acpi_ex_resolve_object_to_value(union acpi_operand_object **stack_ptr,
 
 	stack_desc = *stack_ptr;
 
-	/* This is an union acpi_operand_object    */
+	/* This is a union acpi_operand_object    */
 
 	switch (ACPI_GET_OBJECT_TYPE(stack_desc)) {
 	case ACPI_TYPE_LOCAL_REFERENCE:
diff --git a/drivers/acpi/executer/exstore.c b/drivers/acpi/executer/exstore.c
index 3318df4cbd98..1c118ba78adb 100644
--- a/drivers/acpi/executer/exstore.c
+++ b/drivers/acpi/executer/exstore.c
@@ -274,7 +274,7 @@ acpi_ex_do_debug_object(union acpi_operand_object *source_desc,
  *
  * PARAMETERS:  *source_desc        - Value to be stored
  *              *dest_desc          - Where to store it.  Must be an NS node
- *                                    or an union acpi_operand_object of type
+ *                                    or a union acpi_operand_object of type
  *                                    Reference;
  *              walk_state          - Current walk state
  *
diff --git a/drivers/acpi/resources/rscreate.c b/drivers/acpi/resources/rscreate.c
index c0bbfa2c4193..08b8d73e6ee5 100644
--- a/drivers/acpi/resources/rscreate.c
+++ b/drivers/acpi/resources/rscreate.c
@@ -124,7 +124,7 @@ acpi_rs_create_resource_list(union acpi_operand_object *aml_buffer,
  *
  * FUNCTION:    acpi_rs_create_pci_routing_table
  *
- * PARAMETERS:  package_object          - Pointer to an union acpi_operand_object
+ * PARAMETERS:  package_object          - Pointer to a union acpi_operand_object
  *                                        package
  *              output_buffer           - Pointer to the user's buffer
  *
diff --git a/drivers/acpi/utilities/utobject.c b/drivers/acpi/utilities/utobject.c
index c354e7a42bcd..4bef3cfbaccb 100644
--- a/drivers/acpi/utilities/utobject.c
+++ b/drivers/acpi/utilities/utobject.c
@@ -297,7 +297,7 @@ union acpi_operand_object *acpi_ut_create_string_object(acpi_size string_size)
  *
  * RETURN:      TRUE if object is valid, FALSE otherwise
  *
- * DESCRIPTION: Validate a pointer to be an union acpi_operand_object
+ * DESCRIPTION: Validate a pointer to be a union acpi_operand_object
  *
  ******************************************************************************/
 
@@ -389,7 +389,7 @@ void acpi_ut_delete_object_desc(union acpi_operand_object *object)
 {
 	ACPI_FUNCTION_TRACE_PTR(ut_delete_object_desc, object);
 
-	/* Object must be an union acpi_operand_object    */
+	/* Object must be a union acpi_operand_object    */
 
 	if (ACPI_GET_DESCRIPTOR_TYPE(object) != ACPI_DESC_TYPE_OPERAND) {
 		ACPI_ERROR((AE_INFO,
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index 39ad820b2350..af7c13ca9493 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -769,7 +769,7 @@ static int pc_open(struct tty_struct *tty, struct file *filp)
 	/* Check status of board configured in system.  */
 
 	/*
-	 * I check to see if the epca_setup routine detected an user error. It
+	 * I check to see if the epca_setup routine detected a user error. It
 	 * might be better to put this in pc_init, but for the moment it goes
 	 * here.
 	 */
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 5f076aef74fa..a8c8d9c19d74 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -83,7 +83,7 @@ config CPU_FREQ_DEFAULT_GOV_USERSPACE
 	select CPU_FREQ_GOV_USERSPACE
 	help
 	  Use the CPUFreq governor 'userspace' as default. This allows
-	  you to set the CPU frequency manually or when an userspace 
+	  you to set the CPU frequency manually or when a userspace 
 	  program shall be able to set the CPU dynamically without having
 	  to enable the userspace governor manually.
 
@@ -138,7 +138,7 @@ config CPU_FREQ_GOV_USERSPACE
 	tristate "'userspace' governor for userspace frequency scaling"
 	help
 	  Enable this cpufreq governor when you either want to set the
-	  CPU frequency manually or when an userspace program shall
+	  CPU frequency manually or when a userspace program shall
 	  be able to set the CPU dynamically, like on LART 
 	  <http://www.lartmaker.nl/>.
 
diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 379b7ff354ec..b9e6bef594ac 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -65,7 +65,7 @@ MODULE_PARM_DESC(extra, "Enable extra LEDs and keys on IBM RapidAcces, EzKey and
 
 /*
  * Scancode to keycode tables. These are just the default setting, and
- * are loadable via an userland utility.
+ * are loadable via a userland utility.
  */
 
 static const unsigned short atkbd_set2_keycode[512] = {
diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig
index b52659620d50..173cf55c64d0 100644
--- a/drivers/macintosh/Kconfig
+++ b/drivers/macintosh/Kconfig
@@ -138,7 +138,7 @@ config PMAC_BACKLIGHT
 	  Say Y here to enable Macintosh specific extensions of the generic
 	  backlight code. With this enabled, the brightness keys on older
 	  PowerBooks will be enabled so you can change the screen brightness.
-	  Newer models should use an userspace daemon like pbbuttonsd.
+	  Newer models should use a userspace daemon like pbbuttonsd.
 
 config PMAC_BACKLIGHT_LEGACY
 	bool "Provide legacy ioctl's on /dev/pmu for the backlight"
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
index abdebe347383..fa57b67593ae 100644
--- a/drivers/misc/phantom.c
+++ b/drivers/misc/phantom.c
@@ -6,7 +6,7 @@
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
- *  You need an userspace library to cooperate with this driver. It (and other
+ *  You need a userspace library to cooperate with this driver. It (and other
  *  info) may be obtained here:
  *  http://www.fi.muni.cz/~xslaby/phantom.html
  *  or alternatively, you might use OpenHaptics provided by Sensable.
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 6d04e050c74e..f54360f50a9c 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -98,7 +98,7 @@ struct compat_ncp_objectname_ioctl
 {
 	s32		auth_type;
 	u32		object_name_len;
-	compat_caddr_t	object_name;	/* an userspace data, in most cases user name */
+	compat_caddr_t	object_name;	/* a userspace data, in most cases user name */
 };
 
 struct compat_ncp_fs_info_v2 {
diff --git a/include/acpi/acmacros.h b/include/acpi/acmacros.h
index a597207e2835..1954c9d1d012 100644
--- a/include/acpi/acmacros.h
+++ b/include/acpi/acmacros.h
@@ -333,8 +333,8 @@ struct acpi_integer_overlay {
 #define ACPI_INSERT_BITS(target, mask, source)          target = ((target & (~(mask))) | (source & mask))
 
 /*
- * An struct acpi_namespace_node can appear in some contexts
- * where a pointer to an union acpi_operand_object can also
+ * A struct acpi_namespace_node can appear in some contexts
+ * where a pointer to a union acpi_operand_object can also
  * appear. This macro is used to distinguish them.
  *
  * The "Descriptor" field is the first field in both structures.
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 7220361790b3..8222e8de0d1c 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -467,7 +467,7 @@ typedef u32 acpi_object_type;
 
 /*
  * These are special object types that never appear in
- * a Namespace node, only in an union acpi_operand_object
+ * a Namespace node, only in a union acpi_operand_object
  */
 #define ACPI_TYPE_LOCAL_EXTRA           0x1C
 #define ACPI_TYPE_LOCAL_DATA            0x1D
diff --git a/include/linux/ncp_fs.h b/include/linux/ncp_fs.h
index 9f2d76347f19..f69e66d151cc 100644
--- a/include/linux/ncp_fs.h
+++ b/include/linux/ncp_fs.h
@@ -87,7 +87,7 @@ struct ncp_objectname_ioctl
 #define NCP_AUTH_NDS	0x32
 	int		auth_type;
 	size_t		object_name_len;
-	void __user *	object_name;	/* an userspace data, in most cases user name */
+	void __user *	object_name;	/* a userspace data, in most cases user name */
 };
 
 struct ncp_privatedata_ioctl
diff --git a/mm/slub.c b/mm/slub.c
index f0e2892fe403..6392ae5cc6b1 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2254,7 +2254,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 		 * Add some empty padding so that we can catch
 		 * overwrites from earlier objects rather than let
 		 * tracking information or the free pointer be
-		 * corrupted if an user writes before the start
+		 * corrupted if a user writes before the start
 		 * of the object.
 		 */
 		size += sizeof(void *);
diff --git a/sound/oss/aedsp16.c b/sound/oss/aedsp16.c
index a0274f3dac08..3ee9900ffd7b 100644
--- a/sound/oss/aedsp16.c
+++ b/sound/oss/aedsp16.c
@@ -157,7 +157,7 @@
 
    Started Fri Mar 17 16:13:18 MET 1995
 
-   v0.1 (ALPHA, was an user-level program called AudioExcelDSP16.c)
+   v0.1 (ALPHA, was a user-level program called AudioExcelDSP16.c)
    - Initial code.
    v0.2 (ALPHA)
    - Cleanups.
-- 
cgit v1.2.3


From bd53cbcce501b61921a1af2dddfa87c5b9923dfd Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 6 Jan 2009 17:20:48 +0100
Subject: ide: add ->cur_port to struct ide_host and use it for serialized
 hosts

* Pass 'ide_hwif_t *' instead of 'ide_hwgroup_t *' to unexpected_intr().

* Cache pointer to the port currently being serviced in ->cur_port
  and use it instead of hwif->hwgroup on serialized hosts.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-io.c | 25 +++++++++++++++----------
 include/linux/ide.h  |  1 +
 2 files changed, 16 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 1d63159fc97d..43bf43d802c3 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -726,7 +726,7 @@ void do_ide_request(struct request_queue *q)
 	if (!ide_lock_hwgroup(hwgroup)) {
 		ide_hwif_t *prev_port;
 repeat:
-		prev_port = hwgroup->hwif;
+		prev_port = hwif->host->cur_port;
 		hwgroup->rq = NULL;
 
 		if (drive->dev_flags & IDE_DFLAG_SLEEPING) {
@@ -736,15 +736,17 @@ repeat:
 			}
 		}
 
-		if (hwif != prev_port) {
+		if ((hwif->host->host_flags & IDE_HFLAG_SERIALIZE) &&
+		    hwif != prev_port) {
 			/*
 			 * set nIEN for previous port, drives in the
 			 * quirk_list may not like intr setups/cleanups
 			 */
-			if (hwgroup->drive->quirk_list == 0)
+			if (prev_port && hwgroup->drive->quirk_list == 0)
 				prev_port->tp_ops->set_irq(prev_port, 0);
+
+			hwif->host->cur_port = hwif;
 		}
-		hwgroup->hwif = hwif;
 		hwgroup->drive = drive;
 		drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED);
 
@@ -976,7 +978,7 @@ void ide_timer_expiry (unsigned long data)
 /**
  *	unexpected_intr		-	handle an unexpected IDE interrupt
  *	@irq: interrupt line
- *	@hwgroup: hwgroup being processed
+ *	@hwif: port being processed
  *
  *	There's nothing really useful we can do with an unexpected interrupt,
  *	other than reading the status register (to clear it), and logging it.
@@ -1005,11 +1007,11 @@ void ide_timer_expiry (unsigned long data)
  *	is doing the current command, but we don't know which hwif burped
  *	mysteriously.
  */
- 
-static void unexpected_intr (int irq, ide_hwgroup_t *hwgroup)
+
+static void unexpected_intr(int irq, ide_hwif_t *hwif)
 {
+	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	u8 stat;
-	ide_hwif_t *hwif = hwgroup->hwif;
 
 	/*
 	 * handle the unexpected interrupt
@@ -1044,7 +1046,7 @@ static void unexpected_intr (int irq, ide_hwgroup_t *hwgroup)
  *	not need to override it. If you do be aware it is subtle in
  *	places
  *
- *	hwgroup->hwif is the interface in the group currently performing
+ *	hwif is the interface in the group currently performing
  *	a command. hwgroup->drive is the drive and hwgroup->handler is
  *	the IRQ handler to call. As we issue a command the handlers
  *	step through multiple states, reassigning the handler to the
@@ -1070,6 +1072,9 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	irqreturn_t irq_ret = IRQ_NONE;
 	int plug_device = 0;
 
+	if (hwif->host->host_flags & IDE_HFLAG_SERIALIZE)
+		hwif = hwif->host->cur_port;
+
 	spin_lock_irqsave(&hwgroup->lock, flags);
 
 	if (!ide_ack_intr(hwif))
@@ -1099,7 +1104,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 			 * Probably not a shared PCI interrupt,
 			 * so we can safely try to do something about it:
 			 */
-			unexpected_intr(irq, hwgroup);
+			unexpected_intr(irq, hwif);
 #ifdef CONFIG_BLK_DEV_IDEPCI
 		} else {
 			/*
diff --git a/include/linux/ide.h b/include/linux/ide.h
index db5ef8ae1ab9..3de13df8bcef 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -852,6 +852,7 @@ struct ide_host {
 	unsigned int	(*init_chipset)(struct pci_dev *);
 	unsigned long	host_flags;
 	void		*host_priv;
+	ide_hwif_t	*cur_port;	/* for hosts requiring serialization */
 };
 
 /*
-- 
cgit v1.2.3


From ae86afaee6a1c77c7a06d81dcc3bf872204d3bec Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 6 Jan 2009 17:20:48 +0100
Subject: ide: use per-port IRQ handlers

Use hwif instead of hwgroup as {request,free}_irq()'s cookie,
teach ide_intr() to return early for non-active serialized ports,
modify unexpected_intr() accordingly and then use per-port IRQ
handlers instead of per-hwgroup ones.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-io.c    | 58 ++++++++++++++++++++-----------------------------
 drivers/ide/ide-probe.c | 21 +++++++-----------
 drivers/ide/ide.c       | 17 +--------------
 include/linux/ide.h     |  4 ++--
 4 files changed, 34 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 43bf43d802c3..1fc739f44154 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -723,7 +723,7 @@ void do_ide_request(struct request_queue *q)
 	spin_unlock_irq(q->queue_lock);
 	spin_lock_irq(&hwgroup->lock);
 
-	if (!ide_lock_hwgroup(hwgroup)) {
+	if (!ide_lock_hwgroup(hwgroup, hwif)) {
 		ide_hwif_t *prev_port;
 repeat:
 		prev_port = hwif->host->cur_port;
@@ -1002,44 +1002,30 @@ void ide_timer_expiry (unsigned long data)
  *	before completing the issuance of any new drive command, so we will not
  *	be accidentally invoked as a result of any valid command completion
  *	interrupt.
- *
- *	Note that we must walk the entire hwgroup here. We know which hwif
- *	is doing the current command, but we don't know which hwif burped
- *	mysteriously.
  */
 
 static void unexpected_intr(int irq, ide_hwif_t *hwif)
 {
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
-	u8 stat;
-
-	/*
-	 * handle the unexpected interrupt
-	 */
-	do {
-		if (hwif->irq == irq) {
-			stat = hwif->tp_ops->read_status(hwif);
-
-			if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) {
-				/* Try to not flood the console with msgs */
-				static unsigned long last_msgtime, count;
-				++count;
-				if (time_after(jiffies, last_msgtime + HZ)) {
-					last_msgtime = jiffies;
-					printk(KERN_ERR "%s%s: unexpected interrupt, "
-						"status=0x%02x, count=%ld\n",
-						hwif->name,
-						(hwif->next==hwgroup->hwif) ? "" : "(?)", stat, count);
-				}
-			}
+	u8 stat = hwif->tp_ops->read_status(hwif);
+
+	if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) {
+		/* Try to not flood the console with msgs */
+		static unsigned long last_msgtime, count;
+		++count;
+
+		if (time_after(jiffies, last_msgtime + HZ)) {
+			last_msgtime = jiffies;
+			printk(KERN_ERR "%s: unexpected interrupt, "
+				"status=0x%02x, count=%ld\n",
+				hwif->name, stat, count);
 		}
-	} while ((hwif = hwif->next) != hwgroup->hwif);
+	}
 }
 
 /**
  *	ide_intr	-	default IDE interrupt handler
  *	@irq: interrupt number
- *	@dev_id: hwif group
+ *	@dev_id: hwif
  *	@regs: unused weirdness from the kernel irq layer
  *
  *	This is the default IRQ handler for the IDE layer. You should
@@ -1063,17 +1049,19 @@ static void unexpected_intr(int irq, ide_hwif_t *hwif)
  
 irqreturn_t ide_intr (int irq, void *dev_id)
 {
-	unsigned long flags;
-	ide_hwgroup_t *hwgroup = (ide_hwgroup_t *)dev_id;
-	ide_hwif_t *hwif = hwgroup->hwif;
+	ide_hwif_t *hwif = (ide_hwif_t *)dev_id;
+	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	ide_drive_t *uninitialized_var(drive);
 	ide_handler_t *handler;
+	unsigned long flags;
 	ide_startstop_t startstop;
 	irqreturn_t irq_ret = IRQ_NONE;
 	int plug_device = 0;
 
-	if (hwif->host->host_flags & IDE_HFLAG_SERIALIZE)
-		hwif = hwif->host->cur_port;
+	if (hwif->host->host_flags & IDE_HFLAG_SERIALIZE) {
+		if (hwif != hwif->host->cur_port)
+			goto out_early;
+	}
 
 	spin_lock_irqsave(&hwgroup->lock, flags);
 
@@ -1172,7 +1160,7 @@ out_handled:
 	irq_ret = IRQ_HANDLED;
 out:
 	spin_unlock_irqrestore(&hwgroup->lock, flags);
-
+out_early:
 	if (plug_device)
 		ide_plug_device(drive);
 
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index c5adb7b9c5b5..2752509531b7 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1022,6 +1022,7 @@ static int init_irq (ide_hwif_t *hwif)
 	unsigned int index;
 	ide_hwgroup_t *hwgroup;
 	ide_hwif_t *match = NULL;
+	int sa = 0;
 
 	mutex_lock(&ide_cfg_mtx);
 	hwif->hwgroup = NULL;
@@ -1076,24 +1077,18 @@ static int init_irq (ide_hwif_t *hwif)
 
 	ide_ports[hwif->index] = hwif;
 
-	/*
-	 * Allocate the irq, if not already obtained for another hwif
-	 */
-	if (!match || match->irq != hwif->irq) {
-		int sa = 0;
 #if defined(__mc68000__)
-		sa = IRQF_SHARED;
+	sa = IRQF_SHARED;
 #endif /* __mc68000__ */
 
-		if (hwif->chipset == ide_pci)
-			sa = IRQF_SHARED;
+	if (hwif->chipset == ide_pci)
+		sa = IRQF_SHARED;
 
-		if (io_ports->ctl_addr)
-			hwif->tp_ops->set_irq(hwif, 1);
+	if (io_ports->ctl_addr)
+		hwif->tp_ops->set_irq(hwif, 1);
 
-		if (request_irq(hwif->irq,&ide_intr,sa,hwif->name,hwgroup))
-	       		goto out_unlink;
-	}
+	if (request_irq(hwif->irq, &ide_intr, sa, hwif->name, hwif))
+		goto out_unlink;
 
 	if (!hwif->rqsize) {
 		if ((hwif->host_flags & IDE_HFLAG_NO_LBA48) ||
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 46a2d4ca812b..5bc2e4782a55 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -175,10 +175,6 @@ EXPORT_SYMBOL_GPL(ide_port_unregister_devices);
 
 void ide_unregister(ide_hwif_t *hwif)
 {
-	ide_hwif_t *g;
-	ide_hwgroup_t *hwgroup;
-	int irq_count = 0;
-
 	BUG_ON(in_interrupt());
 	BUG_ON(irqs_disabled());
 
@@ -191,18 +187,7 @@ void ide_unregister(ide_hwif_t *hwif)
 
 	ide_proc_unregister_port(hwif);
 
-	hwgroup = hwif->hwgroup;
-	/*
-	 * free the irq if we were the only hwif using it
-	 */
-	g = hwgroup->hwif;
-	do {
-		if (g->irq == hwif->irq)
-			++irq_count;
-		g = g->next;
-	} while (g != hwgroup->hwif);
-	if (irq_count == 1)
-		free_irq(hwif->irq, hwgroup);
+	free_irq(hwif->irq, hwif);
 
 	ide_remove_port_from_hwgroup(hwif);
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 3de13df8bcef..f5382ad0bd4c 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1274,14 +1274,14 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout);
 extern void ide_timer_expiry(unsigned long);
 extern irqreturn_t ide_intr(int irq, void *dev_id);
 
-static inline int ide_lock_hwgroup(ide_hwgroup_t *hwgroup)
+static inline int ide_lock_hwgroup(ide_hwgroup_t *hwgroup, ide_hwif_t *hwif)
 {
 	if (hwgroup->busy)
 		return 1;
 
 	hwgroup->busy = 1;
 	/* for atari only */
-	ide_get_lock(ide_intr, hwgroup);
+	ide_get_lock(ide_intr, hwif);
 
 	return 0;
 }
-- 
cgit v1.2.3


From efe0397eef544ac4bcca23d39aa8d5db154952e0 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 6 Jan 2009 17:20:49 +0100
Subject: ide: remove hwgroup->hwif and {drive,hwif}->next

* Add 'int port_count' field to ide_hwgroup_t to keep the track
  of the number of ports in the hwgroup.  Then update init_irq()
  and ide_remove_port_from_hwgroup() to use it.

* Remove no longer needed hwgroup->hwif, {drive,hwif}->next,
  ide_add_drive_to_hwgroup() and ide_remove_drive_from_hwgroup()
  (hwgroup->drive now only denotes the currently active device
   in the hwgroup).

* Update locking documentation in <linux/ide.h>.

While at it:

* Rename ->drive field in ide_hwgroup_t to ->cur_dev.

* Use __func__ in ide_timer_expiry().

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-io.c    | 12 +++----
 drivers/ide/ide-probe.c | 90 ++++---------------------------------------------
 include/linux/ide.h     | 13 ++-----
 3 files changed, 15 insertions(+), 100 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 1fc739f44154..4ce793c05629 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -742,12 +742,12 @@ repeat:
 			 * set nIEN for previous port, drives in the
 			 * quirk_list may not like intr setups/cleanups
 			 */
-			if (prev_port && hwgroup->drive->quirk_list == 0)
+			if (prev_port && hwgroup->cur_dev->quirk_list == 0)
 				prev_port->tp_ops->set_irq(prev_port, 0);
 
 			hwif->host->cur_port = hwif;
 		}
-		hwgroup->drive = drive;
+		hwgroup->cur_dev = drive;
 		drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED);
 
 		spin_unlock_irq(&hwgroup->lock);
@@ -913,9 +913,9 @@ void ide_timer_expiry (unsigned long data)
 		 * Either way, we don't really want to complain about anything.
 		 */
 	} else {
-		drive = hwgroup->drive;
+		drive = hwgroup->cur_dev;
 		if (!drive) {
-			printk(KERN_ERR "ide_timer_expiry: hwgroup->drive was NULL\n");
+			printk(KERN_ERR "%s: ->cur_dev was NULL\n", __func__);
 			hwgroup->handler = NULL;
 		} else {
 			ide_hwif_t *hwif;
@@ -1033,7 +1033,7 @@ static void unexpected_intr(int irq, ide_hwif_t *hwif)
  *	places
  *
  *	hwif is the interface in the group currently performing
- *	a command. hwgroup->drive is the drive and hwgroup->handler is
+ *	a command. hwgroup->cur_dev is the drive and hwgroup->handler is
  *	the IRQ handler to call. As we issue a command the handlers
  *	step through multiple states, reassigning the handler to the
  *	next step in the process. Unlike a smart SCSI controller IDE
@@ -1105,7 +1105,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 		goto out;
 	}
 
-	drive = hwgroup->drive;
+	drive = hwgroup->cur_dev;
 	if (!drive) {
 		/*
 		 * This should NEVER happen, and there isn't much
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 2752509531b7..68f3c87b8284 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -918,27 +918,9 @@ static int ide_init_queue(ide_drive_t *drive)
 	return 0;
 }
 
-static void ide_add_drive_to_hwgroup(ide_drive_t *drive)
-{
-	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
-
-	spin_lock_irq(&hwgroup->lock);
-	if (!hwgroup->drive) {
-		/* first drive for hwgroup. */
-		drive->next = drive;
-		hwgroup->drive = drive;
-		hwgroup->hwif = HWIF(hwgroup->drive);
-	} else {
-		drive->next = hwgroup->drive->next;
-		hwgroup->drive->next = drive;
-	}
-	spin_unlock_irq(&hwgroup->lock);
-}
-
 /*
  * For any present drive:
  * - allocate the block device queue
- * - link drive into the hwgroup
  */
 static int ide_port_setup_devices(ide_hwif_t *hwif)
 {
@@ -961,8 +943,6 @@ static int ide_port_setup_devices(ide_hwif_t *hwif)
 		}
 
 		j++;
-
-		ide_add_drive_to_hwgroup(drive);
 	}
 	mutex_unlock(&ide_cfg_mtx);
 
@@ -978,33 +958,9 @@ void ide_remove_port_from_hwgroup(ide_hwif_t *hwif)
 	ide_ports[hwif->index] = NULL;
 
 	spin_lock_irq(&hwgroup->lock);
-	/*
-	 * Remove us from the hwgroup, and free
-	 * the hwgroup if we were the only member
-	 */
-	if (hwif->next == hwif) {
-		BUG_ON(hwgroup->hwif != hwif);
+	/* Free the hwgroup if we were the only member. */
+	if (--hwgroup->port_count == 0)
 		kfree(hwgroup);
-	} else {
-		/* There is another interface in hwgroup.
-		 * Unlink us, and set hwgroup->drive and ->hwif to
-		 * something sane.
-		 */
-		ide_hwif_t *g = hwgroup->hwif;
-
-		while (g->next != hwif)
-			g = g->next;
-		g->next = hwif->next;
-		if (hwgroup->hwif == hwif) {
-			/* Chose a random hwif for hwgroup->hwif.
-			 * It's guaranteed that there are no drives
-			 * left in the hwgroup.
-			 */
-			BUG_ON(hwgroup->drive != NULL);
-			hwgroup->hwif = g;
-		}
-		BUG_ON(hwgroup->hwif == hwif);
-	}
 	spin_unlock_irq(&hwgroup->lock);
 }
 
@@ -1044,20 +1000,9 @@ static int init_irq (ide_hwif_t *hwif)
 	if (match) {
 		hwgroup = match->hwgroup;
 		hwif->hwgroup = hwgroup;
-		/*
-		 * Link us into the hwgroup.
-		 * This must be done early, do ensure that unexpected_intr
-		 * can find the hwif and prevent irq storms.
-		 * No drives are attached to the new hwif, choose_drive
-		 * can't do anything stupid (yet).
-		 * Add ourself as the 2nd entry to the hwgroup->hwif
-		 * linked list, the first entry is the hwif that owns
-		 * hwgroup->handler - do not change that.
-		 */
+
 		spin_lock_irq(&hwgroup->lock);
-		hwif->next = hwgroup->hwif->next;
-		hwgroup->hwif->next = hwif;
-		BUG_ON(hwif->next == hwif);
+		hwgroup->port_count++;
 		spin_unlock_irq(&hwgroup->lock);
 	} else {
 		hwgroup = kmalloc_node(sizeof(*hwgroup), GFP_KERNEL|__GFP_ZERO,
@@ -1068,7 +1013,8 @@ static int init_irq (ide_hwif_t *hwif)
 		spin_lock_init(&hwgroup->lock);
 
 		hwif->hwgroup = hwgroup;
-		hwgroup->hwif = hwif->next = hwif;
+
+		hwgroup->port_count = 1;
 
 		init_timer(&hwgroup->timer);
 		hwgroup->timer.function = &ide_timer_expiry;
@@ -1191,29 +1137,6 @@ void ide_init_disk(struct gendisk *disk, ide_drive_t *drive)
 
 EXPORT_SYMBOL_GPL(ide_init_disk);
 
-static void ide_remove_drive_from_hwgroup(ide_drive_t *drive)
-{
-	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
-
-	if (drive == drive->next) {
-		/* special case: last drive from hwgroup. */
-		BUG_ON(hwgroup->drive != drive);
-		hwgroup->drive = NULL;
-	} else {
-		ide_drive_t *walk;
-
-		walk = hwgroup->drive;
-		while (walk->next != drive)
-			walk = walk->next;
-		walk->next = drive->next;
-		if (hwgroup->drive == drive) {
-			hwgroup->drive = drive->next;
-			hwgroup->hwif = hwgroup->drive->hwif;
-		}
-	}
-	BUG_ON(hwgroup->drive == drive);
-}
-
 static void drive_release_dev (struct device *dev)
 {
 	ide_drive_t *drive = container_of(dev, ide_drive_t, gendev);
@@ -1222,7 +1145,6 @@ static void drive_release_dev (struct device *dev)
 	ide_proc_unregister_device(drive);
 
 	spin_lock_irq(&hwgroup->lock);
-	ide_remove_drive_from_hwgroup(drive);
 	kfree(drive->id);
 	drive->id = NULL;
 	drive->dev_flags &= ~IDE_DFLAG_PRESENT;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index f5382ad0bd4c..8b74ccdd221c 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -588,7 +588,6 @@ struct ide_drive_s {
 	struct request_queue	*queue;	/* request queue */
 
 	struct request		*rq;	/* current request */
-	struct ide_drive_s 	*next;	/* circular list of hwgroup drives */
 	void		*driver_data;	/* extra driver data */
 	u16			*id;	/* identification info */
 #ifdef CONFIG_IDE_PROC_FS
@@ -750,7 +749,6 @@ struct ide_dma_ops {
 struct ide_host;
 
 typedef struct hwif_s {
-	struct hwif_s *next;		/* for linked-list in ide_hwgroup_t */
 	struct hwif_s *mate;		/* other hwif from same PCI chip */
 	struct hwgroup_s *hwgroup;	/* actually (ide_hwgroup_t *) */
 	struct proc_dir_entry *proc;	/* /proc/ide/ directory entry */
@@ -874,9 +872,7 @@ typedef struct hwgroup_s {
 	unsigned int polling	: 1;
 
 		/* current drive */
-	ide_drive_t *drive;
-		/* ptr to current hwif in linked-list */
-	ide_hwif_t *hwif;
+	ide_drive_t *cur_dev;
 
 		/* current request */
 	struct request *rq;
@@ -892,6 +888,8 @@ typedef struct hwgroup_s {
 	int req_gen_timer;
 
 	spinlock_t lock;
+
+	int port_count;
 } ide_hwgroup_t;
 
 typedef struct ide_driver_s ide_driver_t;
@@ -1622,12 +1620,7 @@ extern struct mutex ide_cfg_mtx;
 /*
  * Structure locking:
  *
- * ide_cfg_mtx and hwgroup->lock together protect changes to
- * ide_hwif_t->next
- * ide_drive_t->next
- *
  * ide_hwgroup_t->busy: hwgroup->lock
- * ide_hwgroup_t->hwif: hwgroup->lock
  * ide_hwif_t->{hwgroup,mate}: constant, no locking
  * ide_drive_t->hwif: constant, no locking
  */
-- 
cgit v1.2.3


From 5b31f855f10d0053e738baa6d91fb6a3fad35119 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 6 Jan 2009 17:20:49 +0100
Subject: ide: use lock bitops for ports serialization (v2)

* Add ->host_busy field to struct ide_host and use it's first bit
  together with lock bitops to provide new ports serialization method.

* Convert core IDE code to use new ide_[un]lock_host() helpers.

  This removes the need for taking hwgroup->lock if host is already
  busy on serialized hosts and makes it possible to merge ide_hwgroup_t
  into ide_hwif_t (done in the later patch).

* Remove no longer needed ide_hwgroup_t.busy and ide_[un]lock_hwgroup().

* Update do_ide_request() documentation.

v2:
* ide_release_lock() should be called inside IDE_HFLAG_SERIALIZE check.

* Add ide_hwif_t.busy flag and ide_[un]lock_port() for serializing
  devices on a port.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-io.c | 105 ++++++++++++++++++++++++++++++---------------------
 include/linux/ide.h  |  35 +++--------------
 2 files changed, 69 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 4ce793c05629..8f371821c614 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -666,45 +666,54 @@ void ide_stall_queue (ide_drive_t *drive, unsigned long timeout)
 }
 EXPORT_SYMBOL(ide_stall_queue);
 
+static inline int ide_lock_port(ide_hwif_t *hwif)
+{
+	if (hwif->busy)
+		return 1;
+
+	hwif->busy = 1;
+
+	return 0;
+}
+
+static inline void ide_unlock_port(ide_hwif_t *hwif)
+{
+	hwif->busy = 0;
+}
+
+static inline int ide_lock_host(struct ide_host *host, ide_hwif_t *hwif)
+{
+	int rc = 0;
+
+	if (host->host_flags & IDE_HFLAG_SERIALIZE) {
+		rc = test_and_set_bit_lock(IDE_HOST_BUSY, &host->host_busy);
+		if (rc == 0) {
+			/* for atari only */
+			ide_get_lock(ide_intr, hwif);
+		}
+	}
+	return rc;
+}
+
+static inline void ide_unlock_host(struct ide_host *host)
+{
+	if (host->host_flags & IDE_HFLAG_SERIALIZE) {
+		/* for atari only */
+		ide_release_lock();
+		clear_bit_unlock(IDE_HOST_BUSY, &host->host_busy);
+	}
+}
+
 /*
  * Issue a new request to a drive from hwgroup
- *
- * A hwgroup is a serialized group of IDE interfaces.  Usually there is
- * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640)
- * may have both interfaces in a single hwgroup to "serialize" access.
- * Or possibly multiple ISA interfaces can share a common IRQ by being grouped
- * together into one hwgroup for serialized access.
- *
- * Note also that several hwgroups can end up sharing a single IRQ,
- * possibly along with many other devices.  This is especially common in
- * PCI-based systems with off-board IDE controller cards.
- *
- * The IDE driver uses a per-hwgroup lock to protect the hwgroup->busy flag.
- *
- * The first thread into the driver for a particular hwgroup sets the
- * hwgroup->busy flag to indicate that this hwgroup is now active,
- * and then initiates processing of the top request from the request queue.
- *
- * Other threads attempting entry notice the busy setting, and will simply
- * queue their new requests and exit immediately.  Note that hwgroup->busy
- * remains set even when the driver is merely awaiting the next interrupt.
- * Thus, the meaning is "this hwgroup is busy processing a request".
- *
- * When processing of a request completes, the completing thread or IRQ-handler
- * will start the next request from the queue.  If no more work remains,
- * the driver will clear the hwgroup->busy flag and exit.
- *
- * The per-hwgroup spinlock is used to protect all access to the
- * hwgroup->busy flag, but is otherwise not needed for most processing in
- * the driver.  This makes the driver much more friendlier to shared IRQs
- * than previous designs, while remaining 100% (?) SMP safe and capable.
  */
 void do_ide_request(struct request_queue *q)
 {
 	ide_drive_t	*drive = q->queuedata;
 	ide_hwif_t	*hwif = drive->hwif;
+	struct ide_host *host = hwif->host;
 	ide_hwgroup_t	*hwgroup = hwif->hwgroup;
-	struct request	*rq;
+	struct request	*rq = NULL;
 	ide_startstop_t	startstop;
 
 	/*
@@ -721,9 +730,13 @@ void do_ide_request(struct request_queue *q)
 		blk_remove_plug(q);
 
 	spin_unlock_irq(q->queue_lock);
+
+	if (ide_lock_host(host, hwif))
+		goto plug_device_2;
+
 	spin_lock_irq(&hwgroup->lock);
 
-	if (!ide_lock_hwgroup(hwgroup, hwif)) {
+	if (!ide_lock_port(hwif)) {
 		ide_hwif_t *prev_port;
 repeat:
 		prev_port = hwif->host->cur_port;
@@ -731,7 +744,7 @@ repeat:
 
 		if (drive->dev_flags & IDE_DFLAG_SLEEPING) {
 			if (time_before(drive->sleep, jiffies)) {
-				ide_unlock_hwgroup(hwgroup);
+				ide_unlock_port(hwif);
 				goto plug_device;
 			}
 		}
@@ -761,7 +774,7 @@ repeat:
 		spin_lock_irq(&hwgroup->lock);
 
 		if (!rq) {
-			ide_unlock_hwgroup(hwgroup);
+			ide_unlock_port(hwif);
 			goto out;
 		}
 
@@ -782,7 +795,7 @@ repeat:
 		    blk_pm_request(rq) == 0 &&
 		    (rq->cmd_flags & REQ_PREEMPT) == 0) {
 			/* there should be no pending command at this point */
-			ide_unlock_hwgroup(hwgroup);
+			ide_unlock_port(hwif);
 			goto plug_device;
 		}
 
@@ -798,11 +811,15 @@ repeat:
 		goto plug_device;
 out:
 	spin_unlock_irq(&hwgroup->lock);
+	if (rq == NULL)
+		ide_unlock_host(host);
 	spin_lock_irq(q->queue_lock);
 	return;
 
 plug_device:
 	spin_unlock_irq(&hwgroup->lock);
+	ide_unlock_host(host);
+plug_device_2:
 	spin_lock_irq(q->queue_lock);
 
 	if (!elv_queue_empty(q))
@@ -844,9 +861,9 @@ static ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error)
 	ide_dma_off_quietly(drive);
 
 	/*
-	 * un-busy drive etc (hwgroup->busy is cleared on return) and
-	 * make sure request is sane
+	 * un-busy drive etc and make sure request is sane
 	 */
+
 	rq = HWGROUP(drive)->rq;
 
 	if (!rq)
@@ -895,6 +912,7 @@ static void ide_plug_device(ide_drive_t *drive)
 void ide_timer_expiry (unsigned long data)
 {
 	ide_hwgroup_t	*hwgroup = (ide_hwgroup_t *) data;
+	ide_hwif_t	*uninitialized_var(hwif);
 	ide_drive_t	*uninitialized_var(drive);
 	ide_handler_t	*handler;
 	ide_expiry_t	*expiry;
@@ -918,7 +936,6 @@ void ide_timer_expiry (unsigned long data)
 			printk(KERN_ERR "%s: ->cur_dev was NULL\n", __func__);
 			hwgroup->handler = NULL;
 		} else {
-			ide_hwif_t *hwif;
 			ide_startstop_t startstop = ide_stopped;
 
 			if ((expiry = hwgroup->expiry) != NULL) {
@@ -964,15 +981,17 @@ void ide_timer_expiry (unsigned long data)
 			spin_lock_irq(&hwgroup->lock);
 			enable_irq(hwif->irq);
 			if (startstop == ide_stopped) {
-				ide_unlock_hwgroup(hwgroup);
+				ide_unlock_port(hwif);
 				plug_device = 1;
 			}
 		}
 	}
 	spin_unlock_irqrestore(&hwgroup->lock, flags);
 
-	if (plug_device)
+	if (plug_device) {
+		ide_unlock_host(hwif->host);
 		ide_plug_device(drive);
+	}
 }
 
 /**
@@ -1150,7 +1169,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	 */
 	if (startstop == ide_stopped) {
 		if (hwgroup->handler == NULL) {	/* paranoia */
-			ide_unlock_hwgroup(hwgroup);
+			ide_unlock_port(hwif);
 			plug_device = 1;
 		} else
 			printk(KERN_ERR "%s: %s: huh? expected NULL handler "
@@ -1161,8 +1180,10 @@ out_handled:
 out:
 	spin_unlock_irqrestore(&hwgroup->lock, flags);
 out_early:
-	if (plug_device)
+	if (plug_device) {
+		ide_unlock_host(hwif->host);
 		ide_plug_device(drive);
+	}
 
 	return irq_ret;
 }
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 8b74ccdd221c..00df155b5a02 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -828,6 +828,7 @@ typedef struct hwif_s {
 
 	unsigned	present    : 1;	/* this interface exists */
 	unsigned	sg_mapped  : 1;	/* sg_table and sg_nents are ready */
+	unsigned	busy	   : 1; /* serializes devices on a port */
 
 	struct device		gendev;
 	struct device		*portdev;
@@ -851,8 +852,13 @@ struct ide_host {
 	unsigned long	host_flags;
 	void		*host_priv;
 	ide_hwif_t	*cur_port;	/* for hosts requiring serialization */
+
+	/* used for hosts requiring serialization */
+	volatile long	host_busy;
 };
 
+#define IDE_HOST_BUSY 0
+
 /*
  *  internal ide interrupt handler type
  */
@@ -866,8 +872,6 @@ typedef struct hwgroup_s {
 		/* irq handler, if active */
 	ide_startstop_t	(*handler)(ide_drive_t *);
 
-		/* BOOL: protects all fields below */
-	volatile int busy;
 		/* BOOL: polling active & poll_timeout field valid */
 	unsigned int polling	: 1;
 
@@ -1271,26 +1275,6 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout);
 
 extern void ide_timer_expiry(unsigned long);
 extern irqreturn_t ide_intr(int irq, void *dev_id);
-
-static inline int ide_lock_hwgroup(ide_hwgroup_t *hwgroup, ide_hwif_t *hwif)
-{
-	if (hwgroup->busy)
-		return 1;
-
-	hwgroup->busy = 1;
-	/* for atari only */
-	ide_get_lock(ide_intr, hwif);
-
-	return 0;
-}
-
-static inline void ide_unlock_hwgroup(ide_hwgroup_t *hwgroup)
-{
-	/* for atari only */
-	ide_release_lock();
-	hwgroup->busy = 0;
-}
-
 extern void do_ide_request(struct request_queue *);
 
 void ide_init_disk(struct gendisk *, ide_drive_t *);
@@ -1617,13 +1601,6 @@ static inline void ide_set_max_pio(ide_drive_t *drive)
 
 extern spinlock_t ide_lock;
 extern struct mutex ide_cfg_mtx;
-/*
- * Structure locking:
- *
- * ide_hwgroup_t->busy: hwgroup->lock
- * ide_hwif_t->{hwgroup,mate}: constant, no locking
- * ide_drive_t->hwif: constant, no locking
- */
 
 #define local_irq_set(flags)	do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0)
 
-- 
cgit v1.2.3


From b65fac32cfe3b2f98cd472fef400bd1c1340de23 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 6 Jan 2009 17:20:50 +0100
Subject: ide: merge ide_hwgroup_t with ide_hwif_t (v2)

* Merge ide_hwgroup_t with ide_hwif_t.

* Cleanup init_irq() accordingly, then remove no longer needed
  ide_remove_port_from_hwgroup() and ide_ports[].

* Remove now unused HWGROUP() macro.

While at it:

* ide_dump_ata_error() fixups

v2:
* Fix ->quirk_list check in do_ide_request()
  (s/hwif->cur_dev/prev_port->cur_dev).

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/alim15x3.c     |   2 +-
 drivers/ide/au1xxx-ide.c   |   6 +--
 drivers/ide/icside.c       |   2 +-
 drivers/ide/ide-atapi.c    |   8 ++--
 drivers/ide/ide-cd.c       |  16 +++----
 drivers/ide/ide-dma-sff.c  |   4 +-
 drivers/ide/ide-dma.c      |   2 +-
 drivers/ide/ide-floppy.c   |   2 +-
 drivers/ide/ide-io.c       | 117 ++++++++++++++++++++++-----------------------
 drivers/ide/ide-iops.c     |  69 +++++++++++++-------------
 drivers/ide/ide-lib.c      |   9 ++--
 drivers/ide/ide-park.c     |  16 +++----
 drivers/ide/ide-pm.c       |   2 +-
 drivers/ide/ide-probe.c    |  87 +++++----------------------------
 drivers/ide/ide-tape.c     |   9 ++--
 drivers/ide/ide-taskfile.c |   4 +-
 drivers/ide/ide.c          |   7 +--
 drivers/ide/pdc202xx_old.c |   2 +-
 drivers/ide/pmac.c         |   2 +-
 drivers/ide/scc_pata.c     |   4 +-
 drivers/ide/sgiioc4.c      |   2 +-
 drivers/ide/tc86c001.c     |  10 ++--
 drivers/ide/trm290.c       |   2 +-
 drivers/ide/tx4939ide.c    |   2 +-
 drivers/ide/umc8672.c      |  13 +++--
 include/linux/ide.h        |  55 ++++++++++-----------
 26 files changed, 188 insertions(+), 266 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
index 45d2356bb725..290204e89eaf 100644
--- a/drivers/ide/alim15x3.c
+++ b/drivers/ide/alim15x3.c
@@ -198,7 +198,7 @@ static void ali_set_dma_mode(ide_drive_t *drive, const u8 speed)
 static int ali15x3_dma_setup(ide_drive_t *drive)
 {
 	if (m5229_revision < 0xC2 && drive->media != ide_disk) {
-		if (rq_data_dir(drive->hwif->hwgroup->rq))
+		if (rq_data_dir(drive->hwif->rq))
 			return 1;	/* try PIO instead of DMA */
 	}
 	return ide_dma_setup(drive);
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index 0ec8fd1e4dcb..db412bc772d1 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -213,7 +213,7 @@ static int auide_build_dmatable(ide_drive_t *drive)
 {
 	int i, iswrite, count = 0;
 	ide_hwif_t *hwif = HWIF(drive);
-	struct request *rq = HWGROUP(drive)->rq;
+	struct request *rq = hwif->rq;
 	_auide_hwif *ahwif = &auide_hwif;
 	struct scatterlist *sg;
 
@@ -309,8 +309,8 @@ static void auide_dma_exec_cmd(ide_drive_t *drive, u8 command)
 }
 
 static int auide_dma_setup(ide_drive_t *drive)
-{       	
-	struct request *rq = HWGROUP(drive)->rq;
+{
+	struct request *rq = drive->hwif->rq;
 
 	if (!auide_build_dmatable(drive)) {
 		ide_map_sg(drive, rq);
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 81f70caeb40f..72d95c99f147 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -312,7 +312,7 @@ static int icside_dma_setup(ide_drive_t *drive)
 	ide_hwif_t *hwif = HWIF(drive);
 	struct expansion_card *ec = ECARD_DEV(hwif->dev);
 	struct icside_state *state = ecard_get_drvdata(ec);
-	struct request *rq = hwif->hwgroup->rq;
+	struct request *rq = hwif->rq;
 	unsigned int dma_mode;
 
 	if (rq_data_dir(rq))
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index e8688c0f8645..d6a50d67b7db 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -243,7 +243,7 @@ EXPORT_SYMBOL_GPL(ide_retry_pc);
 
 int ide_cd_expiry(ide_drive_t *drive)
 {
-	struct request *rq = HWGROUP(drive)->rq;
+	struct request *rq = drive->hwif->rq;
 	unsigned long wait = 0;
 
 	debug_log("%s: rq->cmd[0]: 0x%x\n", __func__, rq->cmd[0]);
@@ -294,7 +294,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 {
 	struct ide_atapi_pc *pc = drive->pc;
 	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->hwgroup->rq;
+	struct request *rq = hwif->rq;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	xfer_func_t *xferfunc;
 	unsigned int timeout, temp;
@@ -491,7 +491,7 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 {
 	struct ide_atapi_pc *uninitialized_var(pc);
 	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->hwgroup->rq;
+	struct request *rq = hwif->rq;
 	ide_expiry_t *expiry;
 	unsigned int timeout;
 	int cmd_len;
@@ -580,7 +580,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive)
 
 	if (dev_is_idecd(drive)) {
 		tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL;
-		bcount = ide_cd_get_xferlen(hwif->hwgroup->rq);
+		bcount = ide_cd_get_xferlen(hwif->rq);
 		expiry = ide_cd_expiry;
 		timeout = ATAPI_WAIT_PC;
 
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 1a7410f88249..c35b64d495f5 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -239,7 +239,7 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
 
 static void cdrom_end_request(ide_drive_t *drive, int uptodate)
 {
-	struct request *rq = HWGROUP(drive)->rq;
+	struct request *rq = drive->hwif->rq;
 	int nsectors = rq->hard_cur_sectors;
 
 	ide_debug_log(IDE_DBG_FUNC, "Call %s, cmd: 0x%x, uptodate: 0x%x, "
@@ -306,8 +306,7 @@ static void ide_dump_status_no_sense(ide_drive_t *drive, const char *msg, u8 st)
 static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
-	struct request *rq = hwgroup->rq;
+	struct request *rq = hwif->rq;
 	int stat, err, sense_key;
 
 	/* check for errors */
@@ -502,7 +501,7 @@ end_request:
 		blkdev_dequeue_request(rq);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 
-		hwgroup->rq = NULL;
+		hwif->rq = NULL;
 
 		cdrom_queue_request_sense(drive, rq->sense, rq);
 	} else
@@ -525,7 +524,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *);
 static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->hwgroup->rq;
+	struct request *rq = hwif->rq;
 	int xferlen;
 
 	xferlen = ide_cd_get_xferlen(rq);
@@ -567,7 +566,7 @@ static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive)
 static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->hwgroup->rq;
+	struct request *rq = hwif->rq;
 	int cmd_len;
 	ide_startstop_t startstop;
 
@@ -854,8 +853,7 @@ static int cdrom_newpc_intr_dummy_cb(struct request *rq)
 static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
-	struct request *rq = hwgroup->rq;
+	struct request *rq = hwif->rq;
 	xfer_func_t *xferfunc;
 	ide_expiry_t *expiry = NULL;
 	int dma_error = 0, dma, stat, thislen, uptodate = 0;
@@ -1061,7 +1059,7 @@ end_request:
 		if (blk_end_request(rq, 0, dlen))
 			BUG();
 
-		hwgroup->rq = NULL;
+		hwif->rq = NULL;
 	} else {
 		if (!uptodate)
 			rq->cmd_flags |= REQ_FAILED;
diff --git a/drivers/ide/ide-dma-sff.c b/drivers/ide/ide-dma-sff.c
index f6d2d44d8a9a..623a82d1535d 100644
--- a/drivers/ide/ide-dma-sff.c
+++ b/drivers/ide/ide-dma-sff.c
@@ -175,7 +175,7 @@ EXPORT_SYMBOL_GPL(ide_build_dmatable);
 int ide_dma_setup(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->hwgroup->rq;
+	struct request *rq = hwif->rq;
 	unsigned int reading = rq_data_dir(rq) ? 0 : ATA_DMA_WR;
 	u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
 	u8 dma_stat;
@@ -240,7 +240,7 @@ static int dma_timer_expiry(ide_drive_t *drive)
 	if ((dma_stat & 0x18) == 0x18)	/* BUSY Stupid Early Timer !! */
 		return WAIT_CMD;
 
-	hwif->hwgroup->expiry = NULL;	/* one free ride for now */
+	hwif->expiry = NULL;	/* one free ride for now */
 
 	if (dma_stat & ATA_DMA_ERR)	/* ERROR */
 		return -1;
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index fffd11717b2d..72ebab0bc755 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -96,7 +96,7 @@ ide_startstop_t ide_dma_intr(ide_drive_t *drive)
 
 	if (OK_STAT(stat, DRIVE_READY, drive->bad_wstat | ATA_DRQ)) {
 		if (!dma_stat) {
-			struct request *rq = hwif->hwgroup->rq;
+			struct request *rq = hwif->rq;
 
 			task_end_request(drive, rq, stat);
 			return ide_stopped;
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 0a48e2dc53a2..3eab1c6c9b31 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -71,7 +71,7 @@
 static int ide_floppy_end_request(ide_drive_t *drive, int uptodate, int nsecs)
 {
 	struct ide_disk_obj *floppy = drive->driver_data;
-	struct request *rq = HWGROUP(drive)->rq;
+	struct request *rq = drive->hwif->rq;
 	int error;
 
 	ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__);
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 8f371821c614..6ff82d7055b9 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -88,7 +88,7 @@ static int __ide_end_request(ide_drive_t *drive, struct request *rq,
 		ret = 0;
 
 	if (ret == 0 && dequeue)
-		drive->hwif->hwgroup->rq = NULL;
+		drive->hwif->rq = NULL;
 
 	return ret;
 }
@@ -107,7 +107,7 @@ static int __ide_end_request(ide_drive_t *drive, struct request *rq,
 int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors)
 {
 	unsigned int nr_bytes = nr_sectors << 9;
-	struct request *rq = drive->hwif->hwgroup->rq;
+	struct request *rq = drive->hwif->rq;
 
 	if (!nr_bytes) {
 		if (blk_pc_request(rq))
@@ -160,8 +160,8 @@ EXPORT_SYMBOL_GPL(ide_end_dequeued_request);
  
 void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 {
-	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
-	struct request *rq = hwgroup->rq;
+	ide_hwif_t *hwif = drive->hwif;
+	struct request *rq = hwif->rq;
 
 	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
 		ide_task_t *task = (ide_task_t *)rq->special;
@@ -186,7 +186,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 		return;
 	}
 
-	hwgroup->rq = NULL;
+	hwif->rq = NULL;
 
 	rq->errors = err;
 
@@ -321,7 +321,8 @@ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, u8 stat)
 
 	err = ide_dump_status(drive, msg, stat);
 
-	if ((rq = HWGROUP(drive)->rq) == NULL)
+	rq = drive->hwif->rq;
+	if (rq == NULL)
 		return ide_stopped;
 
 	/* retry only "normal" I/O: */
@@ -654,7 +655,7 @@ kill_rq:
  *	@timeout: time to stall for (jiffies)
  *
  *	ide_stall_queue() can be used by a drive to give excess bandwidth back
- *	to the hwgroup by sleeping for timeout jiffies.
+ *	to the port by sleeping for timeout jiffies.
  */
  
 void ide_stall_queue (ide_drive_t *drive, unsigned long timeout)
@@ -705,14 +706,13 @@ static inline void ide_unlock_host(struct ide_host *host)
 }
 
 /*
- * Issue a new request to a drive from hwgroup
+ * Issue a new request to a device.
  */
 void do_ide_request(struct request_queue *q)
 {
 	ide_drive_t	*drive = q->queuedata;
 	ide_hwif_t	*hwif = drive->hwif;
 	struct ide_host *host = hwif->host;
-	ide_hwgroup_t	*hwgroup = hwif->hwgroup;
 	struct request	*rq = NULL;
 	ide_startstop_t	startstop;
 
@@ -734,13 +734,13 @@ void do_ide_request(struct request_queue *q)
 	if (ide_lock_host(host, hwif))
 		goto plug_device_2;
 
-	spin_lock_irq(&hwgroup->lock);
+	spin_lock_irq(&hwif->lock);
 
 	if (!ide_lock_port(hwif)) {
 		ide_hwif_t *prev_port;
 repeat:
 		prev_port = hwif->host->cur_port;
-		hwgroup->rq = NULL;
+		hwif->rq = NULL;
 
 		if (drive->dev_flags & IDE_DFLAG_SLEEPING) {
 			if (time_before(drive->sleep, jiffies)) {
@@ -755,15 +755,15 @@ repeat:
 			 * set nIEN for previous port, drives in the
 			 * quirk_list may not like intr setups/cleanups
 			 */
-			if (prev_port && hwgroup->cur_dev->quirk_list == 0)
+			if (prev_port && prev_port->cur_dev->quirk_list == 0)
 				prev_port->tp_ops->set_irq(prev_port, 0);
 
 			hwif->host->cur_port = hwif;
 		}
-		hwgroup->cur_dev = drive;
+		hwif->cur_dev = drive;
 		drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED);
 
-		spin_unlock_irq(&hwgroup->lock);
+		spin_unlock_irq(&hwif->lock);
 		spin_lock_irq(q->queue_lock);
 		/*
 		 * we know that the queue isn't empty, but this can happen
@@ -771,7 +771,7 @@ repeat:
 		 */
 		rq = elv_next_request(drive->queue);
 		spin_unlock_irq(q->queue_lock);
-		spin_lock_irq(&hwgroup->lock);
+		spin_lock_irq(&hwif->lock);
 
 		if (!rq) {
 			ide_unlock_port(hwif);
@@ -799,25 +799,25 @@ repeat:
 			goto plug_device;
 		}
 
-		hwgroup->rq = rq;
+		hwif->rq = rq;
 
-		spin_unlock_irq(&hwgroup->lock);
+		spin_unlock_irq(&hwif->lock);
 		startstop = start_request(drive, rq);
-		spin_lock_irq(&hwgroup->lock);
+		spin_lock_irq(&hwif->lock);
 
 		if (startstop == ide_stopped)
 			goto repeat;
 	} else
 		goto plug_device;
 out:
-	spin_unlock_irq(&hwgroup->lock);
+	spin_unlock_irq(&hwif->lock);
 	if (rq == NULL)
 		ide_unlock_host(host);
 	spin_lock_irq(q->queue_lock);
 	return;
 
 plug_device:
-	spin_unlock_irq(&hwgroup->lock);
+	spin_unlock_irq(&hwif->lock);
 	ide_unlock_host(host);
 plug_device_2:
 	spin_lock_irq(q->queue_lock);
@@ -827,7 +827,7 @@ plug_device_2:
 }
 
 /*
- * un-busy the hwgroup etc, and clear any pending DMA status. we want to
+ * un-busy the port etc, and clear any pending DMA status. we want to
  * retry the current request in pio mode instead of risking tossing it
  * all away
  */
@@ -864,12 +864,11 @@ static ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error)
 	 * un-busy drive etc and make sure request is sane
 	 */
 
-	rq = HWGROUP(drive)->rq;
-
+	rq = hwif->rq;
 	if (!rq)
 		goto out;
 
-	HWGROUP(drive)->rq = NULL;
+	hwif->rq = NULL;
 
 	rq->errors = 0;
 
@@ -897,7 +896,7 @@ static void ide_plug_device(ide_drive_t *drive)
 
 /**
  *	ide_timer_expiry	-	handle lack of an IDE interrupt
- *	@data: timer callback magic (hwgroup)
+ *	@data: timer callback magic (hwif)
  *
  *	An IDE command has timed out before the expected drive return
  *	occurred. At this point we attempt to clean up the current
@@ -911,19 +910,18 @@ static void ide_plug_device(ide_drive_t *drive)
  
 void ide_timer_expiry (unsigned long data)
 {
-	ide_hwgroup_t	*hwgroup = (ide_hwgroup_t *) data;
-	ide_hwif_t	*uninitialized_var(hwif);
+	ide_hwif_t	*hwif = (ide_hwif_t *)data;
 	ide_drive_t	*uninitialized_var(drive);
 	ide_handler_t	*handler;
-	ide_expiry_t	*expiry;
 	unsigned long	flags;
 	unsigned long	wait = -1;
 	int		plug_device = 0;
 
-	spin_lock_irqsave(&hwgroup->lock, flags);
+	spin_lock_irqsave(&hwif->lock, flags);
+
+	handler = hwif->handler;
 
-	if (((handler = hwgroup->handler) == NULL) ||
-	    (hwgroup->req_gen != hwgroup->req_gen_timer)) {
+	if (handler == NULL || hwif->req_gen != hwif->req_gen_timer) {
 		/*
 		 * Either a marginal timeout occurred
 		 * (got the interrupt just as timer expired),
@@ -931,38 +929,39 @@ void ide_timer_expiry (unsigned long data)
 		 * Either way, we don't really want to complain about anything.
 		 */
 	} else {
-		drive = hwgroup->cur_dev;
+		drive = hwif->cur_dev;
 		if (!drive) {
 			printk(KERN_ERR "%s: ->cur_dev was NULL\n", __func__);
-			hwgroup->handler = NULL;
+			hwif->handler = NULL;
 		} else {
+			ide_expiry_t *expiry = hwif->expiry;
 			ide_startstop_t startstop = ide_stopped;
 
-			if ((expiry = hwgroup->expiry) != NULL) {
+			if (expiry) {
 				/* continue */
 				if ((wait = expiry(drive)) > 0) {
 					/* reset timer */
-					hwgroup->timer.expires  = jiffies + wait;
-					hwgroup->req_gen_timer = hwgroup->req_gen;
-					add_timer(&hwgroup->timer);
-					spin_unlock_irqrestore(&hwgroup->lock, flags);
+					hwif->timer.expires  = jiffies + wait;
+					hwif->req_gen_timer = hwif->req_gen;
+					add_timer(&hwif->timer);
+					spin_unlock_irqrestore(&hwif->lock, flags);
 					return;
 				}
 			}
-			hwgroup->handler = NULL;
+			hwif->handler = NULL;
 			/*
 			 * We need to simulate a real interrupt when invoking
 			 * the handler() function, which means we need to
 			 * globally mask the specific IRQ:
 			 */
-			spin_unlock(&hwgroup->lock);
+			spin_unlock(&hwif->lock);
 			hwif  = HWIF(drive);
 			/* disable_irq_nosync ?? */
 			disable_irq(hwif->irq);
 			/* local CPU only,
 			 * as if we were handling an interrupt */
 			local_irq_disable();
-			if (hwgroup->polling) {
+			if (hwif->polling) {
 				startstop = handler(drive);
 			} else if (drive_is_ready(drive)) {
 				if (drive->waiting_for_dma)
@@ -978,7 +977,7 @@ void ide_timer_expiry (unsigned long data)
 					ide_error(drive, "irq timeout",
 						  hwif->tp_ops->read_status(hwif));
 			}
-			spin_lock_irq(&hwgroup->lock);
+			spin_lock_irq(&hwif->lock);
 			enable_irq(hwif->irq);
 			if (startstop == ide_stopped) {
 				ide_unlock_port(hwif);
@@ -986,7 +985,7 @@ void ide_timer_expiry (unsigned long data)
 			}
 		}
 	}
-	spin_unlock_irqrestore(&hwgroup->lock, flags);
+	spin_unlock_irqrestore(&hwif->lock, flags);
 
 	if (plug_device) {
 		ide_unlock_host(hwif->host);
@@ -1052,7 +1051,7 @@ static void unexpected_intr(int irq, ide_hwif_t *hwif)
  *	places
  *
  *	hwif is the interface in the group currently performing
- *	a command. hwgroup->cur_dev is the drive and hwgroup->handler is
+ *	a command. hwif->cur_dev is the drive and hwif->handler is
  *	the IRQ handler to call. As we issue a command the handlers
  *	step through multiple states, reassigning the handler to the
  *	next step in the process. Unlike a smart SCSI controller IDE
@@ -1063,13 +1062,12 @@ static void unexpected_intr(int irq, ide_hwif_t *hwif)
  *
  *	The handler eventually returns ide_stopped to indicate the
  *	request completed. At this point we issue the next request
- *	on the hwgroup and the process begins again.
+ *	on the port and the process begins again.
  */
- 
+
 irqreturn_t ide_intr (int irq, void *dev_id)
 {
 	ide_hwif_t *hwif = (ide_hwif_t *)dev_id;
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	ide_drive_t *uninitialized_var(drive);
 	ide_handler_t *handler;
 	unsigned long flags;
@@ -1082,12 +1080,14 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 			goto out_early;
 	}
 
-	spin_lock_irqsave(&hwgroup->lock, flags);
+	spin_lock_irqsave(&hwif->lock, flags);
 
 	if (!ide_ack_intr(hwif))
 		goto out;
 
-	if ((handler = hwgroup->handler) == NULL || hwgroup->polling) {
+	handler = hwif->handler;
+
+	if (handler == NULL || hwif->polling) {
 		/*
 		 * Not expecting an interrupt from this drive.
 		 * That means this could be:
@@ -1124,7 +1124,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 		goto out;
 	}
 
-	drive = hwgroup->cur_dev;
+	drive = hwif->cur_dev;
 	if (!drive) {
 		/*
 		 * This should NEVER happen, and there isn't much
@@ -1145,10 +1145,10 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 		 */
 		goto out;
 
-	hwgroup->handler = NULL;
-	hwgroup->req_gen++;
-	del_timer(&hwgroup->timer);
-	spin_unlock(&hwgroup->lock);
+	hwif->handler = NULL;
+	hwif->req_gen++;
+	del_timer(&hwif->timer);
+	spin_unlock(&hwif->lock);
 
 	if (hwif->port_ops && hwif->port_ops->clear_irq)
 		hwif->port_ops->clear_irq(drive);
@@ -1159,7 +1159,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	/* service this interrupt, may set handler for next interrupt */
 	startstop = handler(drive);
 
-	spin_lock_irq(&hwgroup->lock);
+	spin_lock_irq(&hwif->lock);
 	/*
 	 * Note that handler() may have set things up for another
 	 * interrupt to occur soon, but it cannot happen until
@@ -1168,7 +1168,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 	 * won't allow another of the same (on any CPU) until we return.
 	 */
 	if (startstop == ide_stopped) {
-		if (hwgroup->handler == NULL) {	/* paranoia */
+		if (hwif->handler == NULL) {	/* paranoia */
 			ide_unlock_port(hwif);
 			plug_device = 1;
 		} else
@@ -1178,7 +1178,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 out_handled:
 	irq_ret = IRQ_HANDLED;
 out:
-	spin_unlock_irqrestore(&hwgroup->lock, flags);
+	spin_unlock_irqrestore(&hwif->lock, flags);
 out_early:
 	if (plug_device) {
 		ide_unlock_host(hwif->host);
@@ -1205,11 +1205,10 @@ out_early:
 
 void ide_do_drive_cmd(ide_drive_t *drive, struct request *rq)
 {
-	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
 	struct request_queue *q = drive->queue;
 	unsigned long flags;
 
-	hwgroup->rq = NULL;
+	drive->hwif->rq = NULL;
 
 	spin_lock_irqsave(q->queue_lock, flags);
 	__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index ad8bd6539283..b92304d0e79a 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -822,25 +822,25 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
 static void __ide_set_handler (ide_drive_t *drive, ide_handler_t *handler,
 		      unsigned int timeout, ide_expiry_t *expiry)
 {
-	ide_hwgroup_t *hwgroup = HWGROUP(drive);
-
-	BUG_ON(hwgroup->handler);
-	hwgroup->handler	= handler;
-	hwgroup->expiry		= expiry;
-	hwgroup->timer.expires	= jiffies + timeout;
-	hwgroup->req_gen_timer	= hwgroup->req_gen;
-	add_timer(&hwgroup->timer);
+	ide_hwif_t *hwif = drive->hwif;
+
+	BUG_ON(hwif->handler);
+	hwif->handler		= handler;
+	hwif->expiry		= expiry;
+	hwif->timer.expires	= jiffies + timeout;
+	hwif->req_gen_timer	= hwif->req_gen;
+	add_timer(&hwif->timer);
 }
 
 void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler,
 		      unsigned int timeout, ide_expiry_t *expiry)
 {
-	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
+	ide_hwif_t *hwif = drive->hwif;
 	unsigned long flags;
 
-	spin_lock_irqsave(&hwgroup->lock, flags);
+	spin_lock_irqsave(&hwif->lock, flags);
 	__ide_set_handler(drive, handler, timeout, expiry);
-	spin_unlock_irqrestore(&hwgroup->lock, flags);
+	spin_unlock_irqrestore(&hwif->lock, flags);
 }
 
 EXPORT_SYMBOL(ide_set_handler);
@@ -863,10 +863,9 @@ void ide_execute_command(ide_drive_t *drive, u8 cmd, ide_handler_t *handler,
 			 unsigned timeout, ide_expiry_t *expiry)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	unsigned long flags;
 
-	spin_lock_irqsave(&hwgroup->lock, flags);
+	spin_lock_irqsave(&hwif->lock, flags);
 	__ide_set_handler(drive, handler, timeout, expiry);
 	hwif->tp_ops->exec_command(hwif, cmd);
 	/*
@@ -876,26 +875,25 @@ void ide_execute_command(ide_drive_t *drive, u8 cmd, ide_handler_t *handler,
 	 * FIXME: we could skip this delay with care on non shared devices
 	 */
 	ndelay(400);
-	spin_unlock_irqrestore(&hwgroup->lock, flags);
+	spin_unlock_irqrestore(&hwif->lock, flags);
 }
 EXPORT_SYMBOL(ide_execute_command);
 
 void ide_execute_pkt_cmd(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	unsigned long flags;
 
-	spin_lock_irqsave(&hwgroup->lock, flags);
+	spin_lock_irqsave(&hwif->lock, flags);
 	hwif->tp_ops->exec_command(hwif, ATA_CMD_PACKET);
 	ndelay(400);
-	spin_unlock_irqrestore(&hwgroup->lock, flags);
+	spin_unlock_irqrestore(&hwif->lock, flags);
 }
 EXPORT_SYMBOL_GPL(ide_execute_pkt_cmd);
 
 static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
 {
-	struct request *rq = drive->hwif->hwgroup->rq;
+	struct request *rq = drive->hwif->rq;
 
 	if (rq && blk_special_request(rq) && rq->cmd[0] == REQ_DRIVE_RESET)
 		ide_end_request(drive, err ? err : 1, 0);
@@ -913,7 +911,6 @@ static ide_startstop_t do_reset1 (ide_drive_t *, int);
 static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	u8 stat;
 
 	SELECT_DRIVE(drive);
@@ -923,20 +920,20 @@ static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive)
 	if (OK_STAT(stat, 0, ATA_BUSY))
 		printk("%s: ATAPI reset complete\n", drive->name);
 	else {
-		if (time_before(jiffies, hwgroup->poll_timeout)) {
+		if (time_before(jiffies, hwif->poll_timeout)) {
 			ide_set_handler(drive, &atapi_reset_pollfunc, HZ/20, NULL);
 			/* continue polling */
 			return ide_started;
 		}
 		/* end of polling */
-		hwgroup->polling = 0;
+		hwif->polling = 0;
 		printk("%s: ATAPI reset timed-out, status=0x%02x\n",
 				drive->name, stat);
 		/* do it the old fashioned way */
 		return do_reset1(drive, 1);
 	}
 	/* done polling */
-	hwgroup->polling = 0;
+	hwif->polling = 0;
 	ide_complete_drive_reset(drive, 0);
 	return ide_stopped;
 }
@@ -968,7 +965,6 @@ static void ide_reset_report_error(ide_hwif_t *hwif, u8 err)
  */
 static ide_startstop_t reset_pollfunc (ide_drive_t *drive)
 {
-	ide_hwgroup_t *hwgroup	= HWGROUP(drive);
 	ide_hwif_t *hwif	= HWIF(drive);
 	const struct ide_port_ops *port_ops = hwif->port_ops;
 	u8 tmp;
@@ -986,7 +982,7 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive)
 	tmp = hwif->tp_ops->read_status(hwif);
 
 	if (!OK_STAT(tmp, 0, ATA_BUSY)) {
-		if (time_before(jiffies, hwgroup->poll_timeout)) {
+		if (time_before(jiffies, hwif->poll_timeout)) {
 			ide_set_handler(drive, &reset_pollfunc, HZ/20, NULL);
 			/* continue polling */
 			return ide_started;
@@ -1007,7 +1003,7 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive)
 		}
 	}
 out:
-	hwgroup->polling = 0;	/* done polling */
+	hwif->polling = 0;	/* done polling */
 	ide_complete_drive_reset(drive, err);
 	return ide_stopped;
 }
@@ -1081,7 +1077,6 @@ static void pre_reset(ide_drive_t *drive)
 static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	struct ide_io_ports *io_ports = &hwif->io_ports;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	const struct ide_port_ops *port_ops;
@@ -1089,10 +1084,10 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 	unsigned int unit;
 	DEFINE_WAIT(wait);
 
-	spin_lock_irqsave(&hwgroup->lock, flags);
+	spin_lock_irqsave(&hwif->lock, flags);
 
 	/* We must not reset with running handlers */
-	BUG_ON(hwgroup->handler != NULL);
+	BUG_ON(hwif->handler != NULL);
 
 	/* For an ATAPI device, first try an ATAPI SRST. */
 	if (drive->media != ide_disk && !do_not_try_atapi) {
@@ -1101,10 +1096,10 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 		udelay (20);
 		tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET);
 		ndelay(400);
-		hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
-		hwgroup->polling = 1;
+		hwif->poll_timeout = jiffies + WAIT_WORSTCASE;
+		hwif->polling = 1;
 		__ide_set_handler(drive, &atapi_reset_pollfunc, HZ/20, NULL);
-		spin_unlock_irqrestore(&hwgroup->lock, flags);
+		spin_unlock_irqrestore(&hwif->lock, flags);
 		return ide_started;
 	}
 
@@ -1127,9 +1122,9 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 		if (time_before_eq(timeout, now))
 			break;
 
-		spin_unlock_irqrestore(&hwgroup->lock, flags);
+		spin_unlock_irqrestore(&hwif->lock, flags);
 		timeout = schedule_timeout_uninterruptible(timeout - now);
-		spin_lock_irqsave(&hwgroup->lock, flags);
+		spin_lock_irqsave(&hwif->lock, flags);
 	} while (timeout);
 	finish_wait(&ide_park_wq, &wait);
 
@@ -1141,7 +1136,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 		pre_reset(&hwif->drives[unit]);
 
 	if (io_ports->ctl_addr == 0) {
-		spin_unlock_irqrestore(&hwgroup->lock, flags);
+		spin_unlock_irqrestore(&hwif->lock, flags);
 		ide_complete_drive_reset(drive, -ENXIO);
 		return ide_stopped;
 	}
@@ -1164,8 +1159,8 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 	tp_ops->set_irq(hwif, drive->quirk_list == 2);
 	/* more than enough time */
 	udelay(10);
-	hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
-	hwgroup->polling = 1;
+	hwif->poll_timeout = jiffies + WAIT_WORSTCASE;
+	hwif->polling = 1;
 	__ide_set_handler(drive, &reset_pollfunc, HZ/20, NULL);
 
 	/*
@@ -1177,7 +1172,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 	if (port_ops && port_ops->resetproc)
 		port_ops->resetproc(drive);
 
-	spin_unlock_irqrestore(&hwgroup->lock, flags);
+	spin_unlock_irqrestore(&hwif->lock, flags);
 	return ide_started;
 }
 
diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index 9f6e33d8a8b2..09526a0de734 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -273,7 +273,7 @@ int ide_set_xfer_rate(ide_drive_t *drive, u8 rate)
 
 static void ide_dump_opcode(ide_drive_t *drive)
 {
-	struct request *rq = drive->hwif->hwgroup->rq;
+	struct request *rq = drive->hwif->rq;
 	ide_task_t *task = NULL;
 
 	if (!rq)
@@ -346,10 +346,13 @@ static void ide_dump_ata_error(ide_drive_t *drive, u8 err)
 	printk(KERN_CONT "}");
 	if ((err & (ATA_BBK | ATA_ABORTED)) == ATA_BBK ||
 	    (err & (ATA_UNC | ATA_IDNF | ATA_AMNF))) {
+		struct request *rq = drive->hwif->rq;
+
 		ide_dump_sector(drive);
-		if (HWGROUP(drive) && HWGROUP(drive)->rq)
+
+		if (rq)
 			printk(KERN_CONT ", sector=%llu",
-			       (unsigned long long)HWGROUP(drive)->rq->sector);
+			       (unsigned long long)rq->sector);
 	}
 	printk(KERN_CONT "\n");
 }
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 678454ac2483..c875a957596c 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -7,22 +7,22 @@ DECLARE_WAIT_QUEUE_HEAD(ide_park_wq);
 
 static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 {
-	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
+	ide_hwif_t *hwif = drive->hwif;
 	struct request_queue *q = drive->queue;
 	struct request *rq;
 	int rc;
 
 	timeout += jiffies;
-	spin_lock_irq(&hwgroup->lock);
+	spin_lock_irq(&hwif->lock);
 	if (drive->dev_flags & IDE_DFLAG_PARKED) {
 		int reset_timer = time_before(timeout, drive->sleep);
 		int start_queue = 0;
 
 		drive->sleep = timeout;
 		wake_up_all(&ide_park_wq);
-		if (reset_timer && del_timer(&hwgroup->timer))
+		if (reset_timer && del_timer(&hwif->timer))
 			start_queue = 1;
-		spin_unlock_irq(&hwgroup->lock);
+		spin_unlock_irq(&hwif->lock);
 
 		if (start_queue) {
 			spin_lock_irq(q->queue_lock);
@@ -31,7 +31,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 		}
 		return;
 	}
-	spin_unlock_irq(&hwgroup->lock);
+	spin_unlock_irq(&hwif->lock);
 
 	rq = blk_get_request(q, READ, __GFP_WAIT);
 	rq->cmd[0] = REQ_PARK_HEADS;
@@ -64,21 +64,21 @@ ssize_t ide_park_show(struct device *dev, struct device_attribute *attr,
 		      char *buf)
 {
 	ide_drive_t *drive = to_ide_device(dev);
-	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
+	ide_hwif_t *hwif = drive->hwif;
 	unsigned long now;
 	unsigned int msecs;
 
 	if (drive->dev_flags & IDE_DFLAG_NO_UNLOAD)
 		return -EOPNOTSUPP;
 
-	spin_lock_irq(&hwgroup->lock);
+	spin_lock_irq(&hwif->lock);
 	now = jiffies;
 	if (drive->dev_flags & IDE_DFLAG_PARKED &&
 	    time_after(drive->sleep, now))
 		msecs = jiffies_to_msecs(drive->sleep - now);
 	else
 		msecs = 0;
-	spin_unlock_irq(&hwgroup->lock);
+	spin_unlock_irq(&hwif->lock);
 
 	return snprintf(buf, 20, "%u\n", msecs);
 }
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 8282c6086e6a..abb84a2dd821 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -194,7 +194,7 @@ void ide_complete_pm_request(ide_drive_t *drive, struct request *rq)
 	}
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
-	drive->hwif->hwgroup->rq = NULL;
+	drive->hwif->rq = NULL;
 
 	if (blk_end_request(rq, 0, 0))
 		BUG();
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 68f3c87b8284..e953b70706c2 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -949,79 +949,20 @@ static int ide_port_setup_devices(ide_hwif_t *hwif)
 	return j;
 }
 
-static ide_hwif_t *ide_ports[MAX_HWIFS];
-
-void ide_remove_port_from_hwgroup(ide_hwif_t *hwif)
-{
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
-
-	ide_ports[hwif->index] = NULL;
-
-	spin_lock_irq(&hwgroup->lock);
-	/* Free the hwgroup if we were the only member. */
-	if (--hwgroup->port_count == 0)
-		kfree(hwgroup);
-	spin_unlock_irq(&hwgroup->lock);
-}
-
 /*
- * This routine sets up the irq for an ide interface, and creates a new
- * hwgroup for the irq/hwif if none was previously assigned.
- *
- * Much of the code is for correctly detecting/handling irq sharing
- * and irq serialization situations.  This is somewhat complex because
- * it handles static as well as dynamic (PCMCIA) IDE interfaces.
+ * This routine sets up the IRQ for an IDE interface.
  */
 static int init_irq (ide_hwif_t *hwif)
 {
 	struct ide_io_ports *io_ports = &hwif->io_ports;
-	unsigned int index;
-	ide_hwgroup_t *hwgroup;
-	ide_hwif_t *match = NULL;
 	int sa = 0;
 
 	mutex_lock(&ide_cfg_mtx);
-	hwif->hwgroup = NULL;
-
-	for (index = 0; index < MAX_HWIFS; index++) {
-		ide_hwif_t *h = ide_ports[index];
+	spin_lock_init(&hwif->lock);
 
-		if (h && h->hwgroup) {  /* scan only initialized ports */
-			if (hwif->host->host_flags & IDE_HFLAG_SERIALIZE) {
-				if (hwif->host == h->host)
-					match = h;
-			}
-		}
-	}
-
-	/*
-	 * If we are still without a hwgroup, then form a new one
-	 */
-	if (match) {
-		hwgroup = match->hwgroup;
-		hwif->hwgroup = hwgroup;
-
-		spin_lock_irq(&hwgroup->lock);
-		hwgroup->port_count++;
-		spin_unlock_irq(&hwgroup->lock);
-	} else {
-		hwgroup = kmalloc_node(sizeof(*hwgroup), GFP_KERNEL|__GFP_ZERO,
-				       hwif_to_node(hwif));
-		if (hwgroup == NULL)
-			goto out_up;
-
-		spin_lock_init(&hwgroup->lock);
-
-		hwif->hwgroup = hwgroup;
-
-		hwgroup->port_count = 1;
-
-		init_timer(&hwgroup->timer);
-		hwgroup->timer.function = &ide_timer_expiry;
-		hwgroup->timer.data = (unsigned long) hwgroup;
-	}
-
-	ide_ports[hwif->index] = hwif;
+	init_timer(&hwif->timer);
+	hwif->timer.function = &ide_timer_expiry;
+	hwif->timer.data = (unsigned long)hwif;
 
 #if defined(__mc68000__)
 	sa = IRQF_SHARED;
@@ -1034,7 +975,7 @@ static int init_irq (ide_hwif_t *hwif)
 		hwif->tp_ops->set_irq(hwif, 1);
 
 	if (request_irq(hwif->irq, &ide_intr, sa, hwif->name, hwif))
-		goto out_unlink;
+		goto out_up;
 
 	if (!hwif->rqsize) {
 		if ((hwif->host_flags & IDE_HFLAG_NO_LBA48) ||
@@ -1052,14 +993,12 @@ static int init_irq (ide_hwif_t *hwif)
 	printk(KERN_INFO "%s at 0x%08lx on irq %d", hwif->name,
 		io_ports->data_addr, hwif->irq);
 #endif /* __mc68000__ */
-	if (match)
-		printk(KERN_CONT " (serialized with %s)", match->name);
+	if (hwif->host->host_flags & IDE_HFLAG_SERIALIZE)
+		printk(KERN_CONT " (serialized)");
 	printk(KERN_CONT "\n");
 
 	mutex_unlock(&ide_cfg_mtx);
 	return 0;
-out_unlink:
-	ide_remove_port_from_hwgroup(hwif);
 out_up:
 	mutex_unlock(&ide_cfg_mtx);
 	return 1;
@@ -1140,20 +1079,20 @@ EXPORT_SYMBOL_GPL(ide_init_disk);
 static void drive_release_dev (struct device *dev)
 {
 	ide_drive_t *drive = container_of(dev, ide_drive_t, gendev);
-	ide_hwgroup_t *hwgroup = drive->hwif->hwgroup;
+	ide_hwif_t *hwif = drive->hwif;
 
 	ide_proc_unregister_device(drive);
 
-	spin_lock_irq(&hwgroup->lock);
+	spin_lock_irq(&hwif->lock);
 	kfree(drive->id);
 	drive->id = NULL;
 	drive->dev_flags &= ~IDE_DFLAG_PRESENT;
 	/* Messed up locking ... */
-	spin_unlock_irq(&hwgroup->lock);
+	spin_unlock_irq(&hwif->lock);
 	blk_cleanup_queue(drive->queue);
-	spin_lock_irq(&hwgroup->lock);
+	spin_lock_irq(&hwif->lock);
 	drive->queue = NULL;
-	spin_unlock_irq(&hwgroup->lock);
+	spin_unlock_irq(&hwif->lock);
 
 	complete(&drive->gendev_rel_comp);
 }
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 5d2aa22cd6e4..e39f2f46d982 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -479,7 +479,7 @@ static void ide_tape_kfree_buffer(idetape_tape_t *tape)
 
 static int idetape_end_request(ide_drive_t *drive, int uptodate, int nr_sects)
 {
-	struct request *rq = HWGROUP(drive)->rq;
+	struct request *rq = drive->hwif->rq;
 	idetape_tape_t *tape = drive->driver_data;
 	unsigned long flags;
 	int error;
@@ -531,7 +531,7 @@ static void ide_tape_callback(ide_drive_t *drive, int dsc)
 			printk(KERN_ERR "ide-tape: Error in REQUEST SENSE "
 					"itself - Aborting request!\n");
 	} else if (pc->c[0] == READ_6 || pc->c[0] == WRITE_6) {
-		struct request *rq = drive->hwif->hwgroup->rq;
+		struct request *rq = drive->hwif->rq;
 		int blocks = pc->xferred / tape->blk_size;
 
 		tape->avg_size += blocks * tape->blk_size;
@@ -576,7 +576,7 @@ static void ide_tape_callback(ide_drive_t *drive, int dsc)
 
 /*
  * Postpone the current request so that ide.c will be able to service requests
- * from another device on the same hwgroup while we are polling for DSC.
+ * from another device on the same port while we are polling for DSC.
  */
 static void idetape_postpone_request(ide_drive_t *drive)
 {
@@ -584,7 +584,8 @@ static void idetape_postpone_request(ide_drive_t *drive)
 
 	debug_log(DBG_PROCS, "Enter %s\n", __func__);
 
-	tape->postponed_rq = HWGROUP(drive)->rq;
+	tape->postponed_rq = drive->hwif->rq;
+
 	ide_stall_queue(drive, tape->dsc_poll_freq);
 }
 
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index bf4fb9d8d176..693e8d15fb78 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -361,7 +361,7 @@ static ide_startstop_t task_in_unexpected(ide_drive_t *drive, struct request *rq
 static ide_startstop_t task_in_intr(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->hwgroup->rq;
+	struct request *rq = hwif->rq;
 	u8 stat = hwif->tp_ops->read_status(hwif);
 
 	/* Error? */
@@ -395,7 +395,7 @@ static ide_startstop_t task_in_intr(ide_drive_t *drive)
 static ide_startstop_t task_out_intr (ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = HWGROUP(drive)->rq;
+	struct request *rq = hwif->rq;
 	u8 stat = hwif->tp_ops->read_status(hwif);
 
 	if (!OK_STAT(stat, DRIVE_READY, drive->bad_wstat))
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 5bc2e4782a55..9e403282dfa7 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -189,8 +189,6 @@ void ide_unregister(ide_hwif_t *hwif)
 
 	free_irq(hwif->irq, hwif);
 
-	ide_remove_port_from_hwgroup(hwif);
-
 	device_unregister(hwif->portdev);
 	device_unregister(&hwif->gendev);
 	wait_for_completion(&hwif->gendev_rel_comp);
@@ -315,7 +313,6 @@ static int set_pio_mode_abuse(ide_hwif_t *hwif, u8 req_pio)
 static int set_pio_mode(ide_drive_t *drive, int arg)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	ide_hwgroup_t *hwgroup = hwif->hwgroup;
 	const struct ide_port_ops *port_ops = hwif->port_ops;
 
 	if (arg < 0 || arg > 255)
@@ -330,9 +327,9 @@ static int set_pio_mode(ide_drive_t *drive, int arg)
 			unsigned long flags;
 
 			/* take lock for IDE_DFLAG_[NO_]UNMASK/[NO_]IO_32BIT */
-			spin_lock_irqsave(&hwgroup->lock, flags);
+			spin_lock_irqsave(&hwif->lock, flags);
 			port_ops->set_pio_mode(drive, arg);
-			spin_unlock_irqrestore(&hwgroup->lock, flags);
+			spin_unlock_irqrestore(&hwif->lock, flags);
 		} else
 			port_ops->set_pio_mode(drive, arg);
 	} else {
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 624e62e5cc9a..072ef70bf061 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -169,8 +169,8 @@ static void pdc202xx_dma_start(ide_drive_t *drive)
 	if (drive->current_speed > XFER_UDMA_2)
 		pdc_old_enable_66MHz_clock(drive->hwif);
 	if (drive->media != ide_disk || (drive->dev_flags & IDE_DFLAG_LBA48)) {
-		struct request *rq	= HWGROUP(drive)->rq;
 		ide_hwif_t *hwif	= HWIF(drive);
+		struct request *rq	= hwif->rq;
 		unsigned long high_16	= hwif->extra_base - 16;
 		unsigned long atapi_reg	= high_16 + (hwif->channel ? 0x24 : 0x20);
 		u32 word_count	= 0;
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index 7c481bb56fab..899b96baf215 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -1516,7 +1516,7 @@ pmac_ide_dma_setup(ide_drive_t *drive)
 	ide_hwif_t *hwif = HWIF(drive);
 	pmac_ide_hwif_t *pmif =
 		(pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent);
-	struct request *rq = HWGROUP(drive)->rq;
+	struct request *rq = hwif->rq;
 	u8 unit = drive->dn & 1, ata4 = (pmif->kind == controller_kl_ata4);
 
 	if (!pmac_ide_build_dmatable(drive, rq)) {
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 0f48f9dacfa5..e966113fd569 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -316,7 +316,7 @@ static void scc_dma_host_set(ide_drive_t *drive, int on)
 static int scc_dma_setup(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = HWGROUP(drive)->rq;
+	struct request *rq = hwif->rq;
 	unsigned int reading;
 	u8 dma_stat;
 
@@ -405,7 +405,7 @@ static int scc_dma_end(ide_drive_t *drive)
 			       drive->name);
 			data_loss = 1;
 			if (retry++) {
-				struct request *rq = HWGROUP(drive)->rq;
+				struct request *rq = hwif->rq;
 				int unit;
 				/* ERROR_RESET and drive->crc_count are needed
 				 * to reduce DMA transfer mode in retry process.
diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c
index a687a7dfea6f..c68b71b1087b 100644
--- a/drivers/ide/sgiioc4.c
+++ b/drivers/ide/sgiioc4.c
@@ -492,7 +492,7 @@ use_pio_instead:
 
 static int sgiioc4_dma_setup(ide_drive_t *drive)
 {
-	struct request *rq = HWGROUP(drive)->rq;
+	struct request *rq = drive->hwif->rq;
 	unsigned int count = 0;
 	int ddir;
 
diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c
index 93e2cce4b296..accb379bcad6 100644
--- a/drivers/ide/tc86c001.c
+++ b/drivers/ide/tc86c001.c
@@ -64,11 +64,10 @@ static int tc86c001_timer_expiry(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif	= HWIF(drive);
 	ide_expiry_t *expiry	= ide_get_hwifdata(hwif);
-	ide_hwgroup_t *hwgroup	= HWGROUP(drive);
 	u8 dma_stat		= inb(hwif->dma_base + ATA_DMA_STATUS);
 
 	/* Restore a higher level driver's expiry handler first. */
-	hwgroup->expiry	= expiry;
+	hwif->expiry = expiry;
 
 	if ((dma_stat & 5) == 1) {	/* DMA active and no interrupt */
 		unsigned long sc_base	= hwif->config_data;
@@ -111,10 +110,9 @@ static int tc86c001_timer_expiry(ide_drive_t *drive)
 static void tc86c001_dma_start(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif	= HWIF(drive);
-	ide_hwgroup_t *hwgroup	= HWGROUP(drive);
 	unsigned long sc_base	= hwif->config_data;
 	unsigned long twcr_port	= sc_base + (drive->dn ? 0x06 : 0x04);
-	unsigned long nsectors	= hwgroup->rq->nr_sectors;
+	unsigned long nsectors	= hwif->rq->nr_sectors;
 
 	/*
 	 * We have to manually load the sector count and size into
@@ -125,8 +123,8 @@ static void tc86c001_dma_start(ide_drive_t *drive)
 	outw(SECTOR_SIZE / 2, twcr_port); /* Transfer Word Count 1/2 */
 
 	/* Install our timeout expiry hook, saving the current handler... */
-	ide_set_hwifdata(hwif, hwgroup->expiry);
-	hwgroup->expiry = &tc86c001_timer_expiry;
+	ide_set_hwifdata(hwif, hwif->expiry);
+	hwif->expiry = &tc86c001_timer_expiry;
 
 	ide_dma_start(drive);
 }
diff --git a/drivers/ide/trm290.c b/drivers/ide/trm290.c
index 2a5ea90cf8b8..79a03df118d8 100644
--- a/drivers/ide/trm290.c
+++ b/drivers/ide/trm290.c
@@ -184,7 +184,7 @@ static void trm290_dma_exec_cmd(ide_drive_t *drive, u8 command)
 static int trm290_dma_setup(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->hwgroup->rq;
+	struct request *rq = hwif->rq;
 	unsigned int count, rw;
 
 	if (rq_data_dir(rq)) {
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 4a8c5a21bd4c..1ac27ac7283b 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -293,7 +293,7 @@ static int tx4939ide_dma_setup(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	void __iomem *base = TX4939IDE_BASE(hwif);
-	struct request *rq = hwif->hwgroup->rq;
+	struct request *rq = hwif->rq;
 	u8 reading;
 	int nent;
 
diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c
index e29978cf6197..0608d41fb6d0 100644
--- a/drivers/ide/umc8672.c
+++ b/drivers/ide/umc8672.c
@@ -106,22 +106,21 @@ static void umc_set_speeds(u8 speeds[])
 
 static void umc_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif = drive->hwif;
-	ide_hwgroup_t *mate_hwgroup = hwif->mate ? hwif->mate->hwgroup : NULL;
+	ide_hwif_t *hwif = drive->hwif, *mate = hwif->mate;
 	unsigned long uninitialized_var(flags);
 
 	printk("%s: setting umc8672 to PIO mode%d (speed %d)\n",
 		drive->name, pio, pio_to_umc[pio]);
-	if (mate_hwgroup)
-		spin_lock_irqsave(&mate_hwgroup->lock, flags);
-	if (mate_hwgroup && mate_hwgroup->handler) {
+	if (mate)
+		spin_lock_irqsave(&mate->lock, flags);
+	if (mate && mate->handler) {
 		printk(KERN_ERR "umc8672: other interface is busy: exiting tune_umc()\n");
 	} else {
 		current_speeds[drive->name[2] - 'a'] = pio_to_umc[pio];
 		umc_set_speeds(current_speeds);
 	}
-	if (mate_hwgroup)
-		spin_unlock_irqrestore(&mate_hwgroup->lock, flags);
+	if (mate)
+		spin_unlock_irqrestore(&mate->lock, flags);
 }
 
 static const struct ide_port_ops umc8672_port_ops = {
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 00df155b5a02..f27f130ba000 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -42,7 +42,6 @@ typedef unsigned char	byte;	/* used everywhere */
 #define ERROR_RECAL	1	/* Recalibrate every 2nd retry */
 
 #define HWIF(drive)		((ide_hwif_t *)((drive)->hwif))
-#define HWGROUP(drive)		((ide_hwgroup_t *)(HWIF(drive)->hwgroup))
 
 /*
  * Definitions for accessing IDE controller registers
@@ -750,7 +749,6 @@ struct ide_host;
 
 typedef struct hwif_s {
 	struct hwif_s *mate;		/* other hwif from same PCI chip */
-	struct hwgroup_s *hwgroup;	/* actually (ide_hwgroup_t *) */
 	struct proc_dir_entry *proc;	/* /proc/ide/ directory entry */
 
 	struct ide_host *host;
@@ -840,6 +838,30 @@ typedef struct hwif_s {
 #ifdef CONFIG_BLK_DEV_IDEACPI
 	struct ide_acpi_hwif_link *acpidata;
 #endif
+
+	/* IRQ handler, if active */
+	ide_startstop_t	(*handler)(ide_drive_t *);
+
+	/* BOOL: polling active & poll_timeout field valid */
+	unsigned int polling : 1;
+
+	/* current drive */
+	ide_drive_t *cur_dev;
+
+	/* current request */
+	struct request *rq;
+
+	/* failsafe timer */
+	struct timer_list timer;
+	/* timeout value during long polls */
+	unsigned long poll_timeout;
+	/* queried upon timeouts */
+	int (*expiry)(ide_drive_t *);
+
+	int req_gen;
+	int req_gen_timer;
+
+	spinlock_t lock;
 } ____cacheline_internodealigned_in_smp ide_hwif_t;
 
 #define MAX_HOST_PORTS 4
@@ -868,34 +890,6 @@ typedef int (ide_expiry_t)(ide_drive_t *);
 /* used by ide-cd, ide-floppy, etc. */
 typedef void (xfer_func_t)(ide_drive_t *, struct request *rq, void *, unsigned);
 
-typedef struct hwgroup_s {
-		/* irq handler, if active */
-	ide_startstop_t	(*handler)(ide_drive_t *);
-
-		/* BOOL: polling active & poll_timeout field valid */
-	unsigned int polling	: 1;
-
-		/* current drive */
-	ide_drive_t *cur_dev;
-
-		/* current request */
-	struct request *rq;
-
-		/* failsafe timer */
-	struct timer_list timer;
-		/* timeout value during long polls */
-	unsigned long poll_timeout;
-		/* queried upon timeouts */
-	int (*expiry)(ide_drive_t *);
-
-	int req_gen;
-	int req_gen_timer;
-
-	spinlock_t lock;
-
-	int port_count;
-} ide_hwgroup_t;
-
 typedef struct ide_driver_s ide_driver_t;
 
 extern struct mutex ide_setting_mtx;
@@ -1512,7 +1506,6 @@ static inline void ide_acpi_port_init_devices(ide_hwif_t *hwif) { ; }
 static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {}
 #endif
 
-void ide_remove_port_from_hwgroup(ide_hwif_t *);
 void ide_unregister(ide_hwif_t *);
 
 void ide_register_region(struct gendisk *);
-- 
cgit v1.2.3


From b40d1b88f1001f0224c63fa2c008914514bcef33 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 6 Jan 2009 17:20:51 +0100
Subject: ide: move ide_init_port_data() and friends to ide-probe.c

* Move IDE_DEFAULT_MAX_FAILURES to <linux/ide.h>.

* Move ide_cfg_mtx, ide_hwif_to_major[], ide_port_init_devices_data(),
  ide_init_port_data(), ide_init_port_hw() and ide_unregister() to
  ide-probe.c from ide.c.

* Make ide_unregister(), ide_init_port_data(), ide_init_port_hw()
  and ide_cfg_mtx static.

While at it:

* Remove stale ide_init_port_data() documentation and ide_lock extern.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-probe.c | 135 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/ide/ide.c       | 146 ------------------------------------------------
 include/linux/ide.h     |   9 +--
 3 files changed, 136 insertions(+), 154 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index e953b70706c2..09ea50118e63 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -918,6 +918,8 @@ static int ide_init_queue(ide_drive_t *drive)
 	return 0;
 }
 
+static DEFINE_MUTEX(ide_cfg_mtx);
+
 /*
  * For any present drive:
  * - allocate the block device queue
@@ -1273,6 +1275,69 @@ static void ide_port_cable_detect(ide_hwif_t *hwif)
 	}
 }
 
+static const u8 ide_hwif_to_major[] =
+	{ IDE0_MAJOR, IDE1_MAJOR, IDE2_MAJOR, IDE3_MAJOR, IDE4_MAJOR,
+	  IDE5_MAJOR, IDE6_MAJOR, IDE7_MAJOR, IDE8_MAJOR, IDE9_MAJOR };
+
+static void ide_port_init_devices_data(ide_hwif_t *hwif)
+{
+	int unit;
+
+	for (unit = 0; unit < MAX_DRIVES; ++unit) {
+		ide_drive_t *drive = &hwif->drives[unit];
+		u8 j = (hwif->index * MAX_DRIVES) + unit;
+
+		memset(drive, 0, sizeof(*drive));
+
+		drive->media			= ide_disk;
+		drive->select			= (unit << 4) | ATA_DEVICE_OBS;
+		drive->hwif			= hwif;
+		drive->ready_stat		= ATA_DRDY;
+		drive->bad_wstat		= BAD_W_STAT;
+		drive->special.b.recalibrate	= 1;
+		drive->special.b.set_geometry	= 1;
+		drive->name[0]			= 'h';
+		drive->name[1]			= 'd';
+		drive->name[2]			= 'a' + j;
+		drive->max_failures		= IDE_DEFAULT_MAX_FAILURES;
+
+		INIT_LIST_HEAD(&drive->list);
+		init_completion(&drive->gendev_rel_comp);
+	}
+}
+
+static void ide_init_port_data(ide_hwif_t *hwif, unsigned int index)
+{
+	/* bulk initialize hwif & drive info with zeros */
+	memset(hwif, 0, sizeof(ide_hwif_t));
+
+	/* fill in any non-zero initial values */
+	hwif->index	= index;
+	hwif->major	= ide_hwif_to_major[index];
+
+	hwif->name[0]	= 'i';
+	hwif->name[1]	= 'd';
+	hwif->name[2]	= 'e';
+	hwif->name[3]	= '0' + index;
+
+	init_completion(&hwif->gendev_rel_comp);
+
+	hwif->tp_ops = &default_tp_ops;
+
+	ide_port_init_devices_data(hwif);
+}
+
+static void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw)
+{
+	memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports));
+	hwif->irq = hw->irq;
+	hwif->chipset = hw->chipset;
+	hwif->dev = hw->dev;
+	hwif->gendev.parent = hw->parent ? hw->parent : hw->dev;
+	hwif->ack_intr = hw->ack_intr;
+	hwif->config_data = hw->config;
+}
+
 static unsigned int ide_indexes;
 
 /**
@@ -1503,6 +1568,76 @@ int ide_host_add(const struct ide_port_info *d, hw_regs_t **hws,
 }
 EXPORT_SYMBOL_GPL(ide_host_add);
 
+static void __ide_port_unregister_devices(ide_hwif_t *hwif)
+{
+	int i;
+
+	for (i = 0; i < MAX_DRIVES; i++) {
+		ide_drive_t *drive = &hwif->drives[i];
+
+		if (drive->dev_flags & IDE_DFLAG_PRESENT) {
+			device_unregister(&drive->gendev);
+			wait_for_completion(&drive->gendev_rel_comp);
+		}
+	}
+}
+
+void ide_port_unregister_devices(ide_hwif_t *hwif)
+{
+	mutex_lock(&ide_cfg_mtx);
+	__ide_port_unregister_devices(hwif);
+	hwif->present = 0;
+	ide_port_init_devices_data(hwif);
+	mutex_unlock(&ide_cfg_mtx);
+}
+EXPORT_SYMBOL_GPL(ide_port_unregister_devices);
+
+/**
+ *	ide_unregister		-	free an IDE interface
+ *	@hwif: IDE interface
+ *
+ *	Perform the final unregister of an IDE interface.
+ *
+ *	Locking:
+ *	The caller must not hold the IDE locks.
+ *
+ *	It is up to the caller to be sure there is no pending I/O here,
+ *	and that the interface will not be reopened (present/vanishing
+ *	locking isn't yet done BTW).
+ */
+
+static void ide_unregister(ide_hwif_t *hwif)
+{
+	BUG_ON(in_interrupt());
+	BUG_ON(irqs_disabled());
+
+	mutex_lock(&ide_cfg_mtx);
+
+	if (hwif->present) {
+		__ide_port_unregister_devices(hwif);
+		hwif->present = 0;
+	}
+
+	ide_proc_unregister_port(hwif);
+
+	free_irq(hwif->irq, hwif);
+
+	device_unregister(hwif->portdev);
+	device_unregister(&hwif->gendev);
+	wait_for_completion(&hwif->gendev_rel_comp);
+
+	/*
+	 * Remove us from the kernel's knowledge
+	 */
+	blk_unregister_region(MKDEV(hwif->major, 0), MAX_DRIVES<<PARTN_BITS);
+	kfree(hwif->sg_table);
+	unregister_blkdev(hwif->major, hwif->name);
+
+	ide_release_dma_engine(hwif);
+
+	mutex_unlock(&ide_cfg_mtx);
+}
+
 void ide_host_free(struct ide_host *host)
 {
 	ide_hwif_t *hwif;
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 6538b63158bf..c1bb0f6784a9 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -60,154 +60,8 @@
 #include <linux/completion.h>
 #include <linux/device.h>
 
-
-/* default maximum number of failures */
-#define IDE_DEFAULT_MAX_FAILURES 	1
-
 struct class *ide_port_class;
 
-static const u8 ide_hwif_to_major[] = { IDE0_MAJOR, IDE1_MAJOR,
-					IDE2_MAJOR, IDE3_MAJOR,
-					IDE4_MAJOR, IDE5_MAJOR,
-					IDE6_MAJOR, IDE7_MAJOR,
-					IDE8_MAJOR, IDE9_MAJOR };
-
-DEFINE_MUTEX(ide_cfg_mtx);
-
-static void ide_port_init_devices_data(ide_hwif_t *);
-
-/*
- * Do not even *think* about calling this!
- */
-void ide_init_port_data(ide_hwif_t *hwif, unsigned int index)
-{
-	/* bulk initialize hwif & drive info with zeros */
-	memset(hwif, 0, sizeof(ide_hwif_t));
-
-	/* fill in any non-zero initial values */
-	hwif->index	= index;
-	hwif->major	= ide_hwif_to_major[index];
-
-	hwif->name[0]	= 'i';
-	hwif->name[1]	= 'd';
-	hwif->name[2]	= 'e';
-	hwif->name[3]	= '0' + index;
-
-	init_completion(&hwif->gendev_rel_comp);
-
-	hwif->tp_ops = &default_tp_ops;
-
-	ide_port_init_devices_data(hwif);
-}
-
-static void ide_port_init_devices_data(ide_hwif_t *hwif)
-{
-	int unit;
-
-	for (unit = 0; unit < MAX_DRIVES; ++unit) {
-		ide_drive_t *drive = &hwif->drives[unit];
-		u8 j = (hwif->index * MAX_DRIVES) + unit;
-
-		memset(drive, 0, sizeof(*drive));
-
-		drive->media			= ide_disk;
-		drive->select			= (unit << 4) | ATA_DEVICE_OBS;
-		drive->hwif			= hwif;
-		drive->ready_stat		= ATA_DRDY;
-		drive->bad_wstat		= BAD_W_STAT;
-		drive->special.b.recalibrate	= 1;
-		drive->special.b.set_geometry	= 1;
-		drive->name[0]			= 'h';
-		drive->name[1]			= 'd';
-		drive->name[2]			= 'a' + j;
-		drive->max_failures		= IDE_DEFAULT_MAX_FAILURES;
-
-		INIT_LIST_HEAD(&drive->list);
-		init_completion(&drive->gendev_rel_comp);
-	}
-}
-
-static void __ide_port_unregister_devices(ide_hwif_t *hwif)
-{
-	int i;
-
-	for (i = 0; i < MAX_DRIVES; i++) {
-		ide_drive_t *drive = &hwif->drives[i];
-
-		if (drive->dev_flags & IDE_DFLAG_PRESENT) {
-			device_unregister(&drive->gendev);
-			wait_for_completion(&drive->gendev_rel_comp);
-		}
-	}
-}
-
-void ide_port_unregister_devices(ide_hwif_t *hwif)
-{
-	mutex_lock(&ide_cfg_mtx);
-	__ide_port_unregister_devices(hwif);
-	hwif->present = 0;
-	ide_port_init_devices_data(hwif);
-	mutex_unlock(&ide_cfg_mtx);
-}
-EXPORT_SYMBOL_GPL(ide_port_unregister_devices);
-
-/**
- *	ide_unregister		-	free an IDE interface
- *	@hwif: IDE interface
- *
- *	Perform the final unregister of an IDE interface.
- *
- *	Locking:
- *	The caller must not hold the IDE locks.
- *
- *	It is up to the caller to be sure there is no pending I/O here,
- *	and that the interface will not be reopened (present/vanishing
- *	locking isn't yet done BTW).
- */
-
-void ide_unregister(ide_hwif_t *hwif)
-{
-	BUG_ON(in_interrupt());
-	BUG_ON(irqs_disabled());
-
-	mutex_lock(&ide_cfg_mtx);
-
-	if (hwif->present) {
-		__ide_port_unregister_devices(hwif);
-		hwif->present = 0;
-	}
-
-	ide_proc_unregister_port(hwif);
-
-	free_irq(hwif->irq, hwif);
-
-	device_unregister(hwif->portdev);
-	device_unregister(&hwif->gendev);
-	wait_for_completion(&hwif->gendev_rel_comp);
-
-	/*
-	 * Remove us from the kernel's knowledge
-	 */
-	blk_unregister_region(MKDEV(hwif->major, 0), MAX_DRIVES<<PARTN_BITS);
-	kfree(hwif->sg_table);
-	unregister_blkdev(hwif->major, hwif->name);
-
-	ide_release_dma_engine(hwif);
-
-	mutex_unlock(&ide_cfg_mtx);
-}
-
-void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw)
-{
-	memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports));
-	hwif->irq = hw->irq;
-	hwif->chipset = hw->chipset;
-	hwif->dev = hw->dev;
-	hwif->gendev.parent = hw->parent ? hw->parent : hw->dev;
-	hwif->ack_intr = hw->ack_intr;
-	hwif->config_data = hw->config;
-}
-
 /*
  *	Locks for IDE setting functionality
  */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index f27f130ba000..ee2f461882ad 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -37,6 +37,7 @@ typedef unsigned char	byte;	/* used everywhere */
 /*
  * Probably not wise to fiddle with these
  */
+#define IDE_DEFAULT_MAX_FAILURES	1
 #define ERROR_MAX	8	/* Max read/write errors per sector */
 #define ERROR_RESET	3	/* Reset controller every 4th retry */
 #define ERROR_RECAL	1	/* Recalibrate every 2nd retry */
@@ -184,9 +185,6 @@ typedef struct hw_regs_s {
 	unsigned long	config;
 } hw_regs_t;
 
-void ide_init_port_data(struct hwif_s *, unsigned int);
-void ide_init_port_hw(struct hwif_s *, hw_regs_t *);
-
 static inline void ide_std_init_ports(hw_regs_t *hw,
 				      unsigned long io_addr,
 				      unsigned long ctl_addr)
@@ -1506,8 +1504,6 @@ static inline void ide_acpi_port_init_devices(ide_hwif_t *hwif) { ; }
 static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {}
 #endif
 
-void ide_unregister(ide_hwif_t *);
-
 void ide_register_region(struct gendisk *);
 void ide_unregister_region(struct gendisk *);
 
@@ -1592,9 +1588,6 @@ static inline void ide_set_max_pio(ide_drive_t *drive)
 	ide_set_pio(drive, 255);
 }
 
-extern spinlock_t ide_lock;
-extern struct mutex ide_cfg_mtx;
-
 #define local_irq_set(flags)	do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0)
 
 char *ide_media_string(ide_drive_t *);
-- 
cgit v1.2.3


From 898ec223fea2a2df88035e58dbf50f493577e225 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 6 Jan 2009 17:20:52 +0100
Subject: ide: remove HWIF() macro

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/aec62xx.c      |  4 ++--
 drivers/ide/alim15x3.c     |  4 ++--
 drivers/ide/au1xxx-ide.c   |  4 ++--
 drivers/ide/cmd64x.c       | 14 +++++++-------
 drivers/ide/cs5520.c       |  2 +-
 drivers/ide/cy82c693.c     |  2 +-
 drivers/ide/hpt366.c       | 18 +++++++++---------
 drivers/ide/icside.c       | 10 +++++-----
 drivers/ide/ide-acpi.c     |  4 ++--
 drivers/ide/ide-disk.c     |  4 ++--
 drivers/ide/ide-io.c       |  8 ++++----
 drivers/ide/ide-iops.c     |  4 ++--
 drivers/ide/ide-pm.c       |  4 ++--
 drivers/ide/ide-probe.c    | 12 ++++++------
 drivers/ide/ide-taskfile.c |  2 +-
 drivers/ide/it8213.c       |  4 ++--
 drivers/ide/ns87415.c      |  6 +++---
 drivers/ide/pdc202xx_new.c |  4 ++--
 drivers/ide/pdc202xx_old.c | 10 +++++-----
 drivers/ide/piix.c         |  6 +++---
 drivers/ide/pmac.c         |  4 ++--
 drivers/ide/qd65xx.c       |  7 ++++---
 drivers/ide/sc1200.c       |  6 +++---
 drivers/ide/scc_pata.c     |  8 ++++----
 drivers/ide/serverworks.c  |  2 +-
 drivers/ide/sgiioc4.c      | 12 ++++++------
 drivers/ide/siimage.c      | 10 +++++-----
 drivers/ide/sis5513.c      |  2 +-
 drivers/ide/sl82c105.c     |  4 ++--
 drivers/ide/slc90e66.c     |  4 ++--
 drivers/ide/tc86c001.c     |  6 +++---
 drivers/ide/triflex.c      |  2 +-
 drivers/ide/trm290.c       |  8 ++++----
 drivers/ide/via82cxxx.c    |  2 +-
 include/linux/ide.h        |  2 --
 35 files changed, 102 insertions(+), 103 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/aec62xx.c b/drivers/ide/aec62xx.c
index 4142c698e0d3..4485b9c6f0e6 100644
--- a/drivers/ide/aec62xx.c
+++ b/drivers/ide/aec62xx.c
@@ -83,7 +83,7 @@ static u8 pci_bus_clock_list_ultra (u8 speed, struct chipset_bus_clock_list_entr
 
 static void aec6210_set_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct ide_host *host	= pci_get_drvdata(dev);
 	struct chipset_bus_clock_list_entry *bus_clock = host->host_priv;
@@ -111,7 +111,7 @@ static void aec6210_set_mode(ide_drive_t *drive, const u8 speed)
 
 static void aec6260_set_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct ide_host *host	= pci_get_drvdata(dev);
 	struct chipset_bus_clock_list_entry *bus_clock = host->host_priv;
diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
index 290204e89eaf..8d217430c997 100644
--- a/drivers/ide/alim15x3.c
+++ b/drivers/ide/alim15x3.c
@@ -68,7 +68,7 @@ static struct pci_dev *isa_dev;
 
 static void ali_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
 	int s_time = t->setup, a_time = t->active, c_time = t->cycle;
@@ -150,7 +150,7 @@ static u8 ali_udma_filter(ide_drive_t *drive)
 
 static void ali_set_dma_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 speed1		= speed;
 	u8 unit			= drive->dn & 1;
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index db412bc772d1..11f2c8f3db48 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -212,7 +212,7 @@ static void auide_set_dma_mode(ide_drive_t *drive, const u8 speed)
 static int auide_build_dmatable(ide_drive_t *drive)
 {
 	int i, iswrite, count = 0;
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq = hwif->rq;
 	_auide_hwif *ahwif = &auide_hwif;
 	struct scatterlist *sg;
@@ -286,7 +286,7 @@ static int auide_build_dmatable(ide_drive_t *drive)
 
 static int auide_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 
 	if (hwif->sg_nents) {
 		ide_destroy_dmatable(drive);
diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c
index 3623bf013bcf..d1fc198719b2 100644
--- a/drivers/ide/cmd64x.c
+++ b/drivers/ide/cmd64x.c
@@ -115,7 +115,7 @@ static void program_cycle_times (ide_drive_t *drive, int cycle_time, int active_
  */
 static void cmd64x_tune_pio(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	struct ide_timing *t	= ide_timing_find_mode(XFER_PIO_0 + pio);
 	unsigned int cycle_time;
@@ -180,7 +180,7 @@ static void cmd64x_set_pio_mode(ide_drive_t *drive, const u8 pio)
 
 static void cmd64x_set_dma_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 unit			= drive->dn & 0x01;
 	u8 regU = 0, pciU	= hwif->channel ? UDIDETCR1 : UDIDETCR0;
@@ -226,7 +226,7 @@ static void cmd64x_set_dma_mode(ide_drive_t *drive, const u8 speed)
 
 static int cmd648_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long base	= hwif->dma_base - (hwif->channel * 8);
 	int err			= ide_dma_end(drive);
 	u8  irq_mask		= hwif->channel ? MRDMODE_INTR_CH1 :
@@ -242,7 +242,7 @@ static int cmd648_dma_end(ide_drive_t *drive)
 
 static int cmd64x_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int irq_reg		= hwif->channel ? ARTTIM23 : CFR;
 	u8  irq_mask		= hwif->channel ? ARTTIM23_INTR_CH1 :
@@ -259,7 +259,7 @@ static int cmd64x_dma_end(ide_drive_t *drive)
 
 static int cmd648_dma_test_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long base	= hwif->dma_base - (hwif->channel * 8);
 	u8 irq_mask		= hwif->channel ? MRDMODE_INTR_CH1 :
 						  MRDMODE_INTR_CH0;
@@ -282,7 +282,7 @@ static int cmd648_dma_test_irq(ide_drive_t *drive)
 
 static int cmd64x_dma_test_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int irq_reg		= hwif->channel ? ARTTIM23 : CFR;
 	u8  irq_mask		= hwif->channel ? ARTTIM23_INTR_CH1 :
@@ -313,7 +313,7 @@ static int cmd64x_dma_test_irq(ide_drive_t *drive)
 
 static int cmd646_1_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	u8 dma_stat = 0, dma_cmd = 0;
 
 	drive->waiting_for_dma = 0;
diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c
index 5efb467f8fa0..d003bec56ff9 100644
--- a/drivers/ide/cs5520.c
+++ b/drivers/ide/cs5520.c
@@ -59,7 +59,7 @@ static struct pio_clocks cs5520_pio_clocks[]={
 
 static void cs5520_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *pdev = to_pci_dev(hwif->dev);
 	int controller = drive->dn > 1 ? 1 : 0;
 
diff --git a/drivers/ide/cy82c693.c b/drivers/ide/cy82c693.c
index d37baf8ecc5f..74fc5401f407 100644
--- a/drivers/ide/cy82c693.c
+++ b/drivers/ide/cy82c693.c
@@ -203,7 +203,7 @@ static void cy82c693_set_dma_mode(ide_drive_t *drive, const u8 mode)
 
 static void cy82c693_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	pio_clocks_t pclk;
 	unsigned int addrCtrl;
diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index b18e10d99d2e..a18a02a96977 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -626,7 +626,7 @@ static struct hpt_info *hpt3xx_get_info(struct device *dev)
 
 static u8 hpt3xx_udma_filter(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 	u8 mask 		= hwif->ultra_mask;
 
@@ -665,7 +665,7 @@ static u8 hpt3xx_udma_filter(ide_drive_t *drive)
 
 static u8 hpt3xx_mdma_filter(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 
 	switch (info->chip_type) {
@@ -743,7 +743,7 @@ static void hpt3xx_quirkproc(ide_drive_t *drive)
 
 static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev	*dev	= to_pci_dev(hwif->dev);
 	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
 
@@ -788,7 +788,7 @@ static void hpt366_dma_lost_irq(ide_drive_t *drive)
 
 static void hpt370_clear_engine(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 
 	pci_write_config_byte(dev, hwif->select_data, 0x37);
@@ -797,7 +797,7 @@ static void hpt370_clear_engine(ide_drive_t *drive)
 
 static void hpt370_irq_timeout(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u16 bfifo		= 0;
 	u8  dma_cmd;
@@ -822,7 +822,7 @@ static void hpt370_dma_start(ide_drive_t *drive)
 
 static int hpt370_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	u8  dma_stat		= inb(hwif->dma_base + ATA_DMA_STATUS);
 
 	if (dma_stat & 0x01) {
@@ -844,7 +844,7 @@ static void hpt370_dma_timeout(ide_drive_t *drive)
 /* returns 1 if DMA IRQ issued, 0 otherwise */
 static int hpt374_dma_test_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u16 bfifo		= 0;
 	u8  dma_stat;
@@ -865,7 +865,7 @@ static int hpt374_dma_test_irq(ide_drive_t *drive)
 
 static int hpt374_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 mcr	= 0, mcr_addr	= hwif->select_data;
 	u8 bwsr = 0, mask	= hwif->channel ? 0x02 : 0x01;
@@ -927,7 +927,7 @@ static void hpt3xxn_set_clock(ide_hwif_t *hwif, u8 mode)
 
 static void hpt3xxn_rw_disk(ide_drive_t *drive, struct request *rq)
 {
-	hpt3xxn_set_clock(HWIF(drive), rq_data_dir(rq) ? 0x23 : 0x21);
+	hpt3xxn_set_clock(drive->hwif, rq_data_dir(rq) ? 0x23 : 0x21);
 }
 
 /**
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
index 72d95c99f147..97a35c667aee 100644
--- a/drivers/ide/icside.c
+++ b/drivers/ide/icside.c
@@ -166,7 +166,7 @@ static const expansioncard_ops_t icside_ops_arcin_v6 = {
  */
 static void icside_maskproc(ide_drive_t *drive, int mask)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct expansion_card *ec = ECARD_DEV(hwif->dev);
 	struct icside_state *state = ecard_get_drvdata(ec);
 	unsigned long flags;
@@ -284,7 +284,7 @@ static void icside_dma_host_set(ide_drive_t *drive, int on)
 
 static int icside_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct expansion_card *ec = ECARD_DEV(hwif->dev);
 
 	drive->waiting_for_dma = 0;
@@ -299,7 +299,7 @@ static int icside_dma_end(ide_drive_t *drive)
 
 static void icside_dma_start(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct expansion_card *ec = ECARD_DEV(hwif->dev);
 
 	/* We can not enable DMA on both channels simultaneously. */
@@ -309,7 +309,7 @@ static void icside_dma_start(ide_drive_t *drive)
 
 static int icside_dma_setup(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct expansion_card *ec = ECARD_DEV(hwif->dev);
 	struct icside_state *state = ecard_get_drvdata(ec);
 	struct request *rq = hwif->rq;
@@ -362,7 +362,7 @@ static void icside_dma_exec_cmd(ide_drive_t *drive, u8 cmd)
 
 static int icside_dma_test_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct expansion_card *ec = ECARD_DEV(hwif->dev);
 	struct icside_state *state = ecard_get_drvdata(ec);
 
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index fd4a36433050..f89b6ecf7d1a 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -218,7 +218,7 @@ static acpi_handle ide_acpi_hwif_get_handle(ide_hwif_t *hwif)
  */
 static acpi_handle ide_acpi_drive_get_handle(ide_drive_t *drive)
 {
-	ide_hwif_t	*hwif = HWIF(drive);
+	ide_hwif_t	*hwif = drive->hwif;
 	int		 port;
 	acpi_handle	 drive_handle;
 
@@ -263,7 +263,7 @@ static int do_drive_get_GTF(ide_drive_t *drive,
 	acpi_status			status;
 	struct acpi_buffer		output;
 	union acpi_object 		*out_obj;
-	ide_hwif_t			*hwif = HWIF(drive);
+	ide_hwif_t			*hwif = drive->hwif;
 	struct device			*dev = hwif->gendev.parent;
 	int				err = -ENODEV;
 	int				port;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index eb9fac4d0f0c..4088a622873e 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -89,7 +89,7 @@ static void ide_tf_set_cmd(ide_drive_t *drive, ide_task_t *task, u8 dma)
 static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
 					sector_t block)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	u16 nsectors		= (u16)rq->nr_sectors;
 	u8 lba48		= !!(drive->dev_flags & IDE_DFLAG_LBA48);
 	u8 dma			= !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
@@ -187,7 +187,7 @@ static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
 static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
 				      sector_t block)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 
 	BUG_ON(drive->dev_flags & IDE_DFLAG_BLOCKED);
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 6ff82d7055b9..0e2b95ec08a5 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -463,7 +463,7 @@ EXPORT_SYMBOL_GPL(ide_init_sg_cmd);
 static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
 		struct request *rq)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	ide_task_t *task = rq->special;
 
 	if (task) {
@@ -587,7 +587,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 
 #ifdef DEBUG
 	printk("%s: start_request: current=0x%08lx\n",
-		HWIF(drive)->name, (unsigned long) rq);
+		drive->hwif->name, (unsigned long) rq);
 #endif
 
 	/* bail early if we've exceeded max_failures */
@@ -833,7 +833,7 @@ plug_device_2:
  */
 static ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq;
 	ide_startstop_t ret = ide_stopped;
 
@@ -955,7 +955,7 @@ void ide_timer_expiry (unsigned long data)
 			 * globally mask the specific IRQ:
 			 */
 			spin_unlock(&hwif->lock);
-			hwif  = HWIF(drive);
+			hwif = drive->hwif;
 			/* disable_irq_nosync ?? */
 			disable_irq(hwif->irq);
 			/* local CPU only,
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index b92304d0e79a..386080ee4608 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -451,7 +451,7 @@ EXPORT_SYMBOL(ide_fixstring);
  */
 int drive_is_ready (ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	u8 stat			= 0;
 
 	if (drive->waiting_for_dma)
@@ -965,7 +965,7 @@ static void ide_reset_report_error(ide_hwif_t *hwif, u8 err)
  */
 static ide_startstop_t reset_pollfunc (ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	const struct ide_port_ops *port_ops = hwif->port_ops;
 	u8 tmp;
 	int err = 0;
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index abb84a2dd821..0c206c68bbb1 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -5,7 +5,7 @@
 int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 {
 	ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive);
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq;
 	struct request_pm_state rqpm;
 	ide_task_t args;
@@ -39,7 +39,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 int generic_ide_resume(struct device *dev)
 {
 	ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive);
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct request *rq;
 	struct request_pm_state rqpm;
 	ide_task_t args;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 09ea50118e63..9dfa99f062aa 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -189,7 +189,7 @@ static void ide_classify_atapi_dev(ide_drive_t *drive)
 
 static void do_identify(ide_drive_t *drive, u8 cmd)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	u16 *id = drive->id;
 	char *m = (char *)&id[ATA_ID_PROD];
 	unsigned long flags;
@@ -266,7 +266,7 @@ err_misc:
 
 static int actual_try_to_identify (ide_drive_t *drive, u8 cmd)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct ide_io_ports *io_ports = &hwif->io_ports;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	int use_altstatus = 0, rc;
@@ -341,7 +341,7 @@ static int actual_try_to_identify (ide_drive_t *drive, u8 cmd)
  
 static int try_to_identify (ide_drive_t *drive, u8 cmd)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	int retval;
 	int autoprobe = 0;
@@ -438,7 +438,7 @@ static u8 ide_read_device(ide_drive_t *drive)
 
 static int do_probe (ide_drive_t *drive, u8 cmd)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	int rc;
 	u8 present = !!(drive->dev_flags & IDE_DFLAG_PRESENT), stat;
@@ -522,7 +522,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
  */
 static void enable_nest (ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	u8 stat;
 
@@ -869,7 +869,7 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
 static int ide_init_queue(ide_drive_t *drive)
 {
 	struct request_queue *q;
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	int max_sectors = 256;
 	int max_sg_entries = PRD_ENTRIES;
 
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 693e8d15fb78..55d451ce9b4f 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -58,7 +58,7 @@ static ide_startstop_t task_in_intr(ide_drive_t *);
 
 ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct ide_taskfile *tf = &task->tf;
 	ide_handler_t *handler = NULL;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
diff --git a/drivers/ide/it8213.c b/drivers/ide/it8213.c
index 7c2feeb3c5ec..d7969b6d139e 100644
--- a/drivers/ide/it8213.c
+++ b/drivers/ide/it8213.c
@@ -25,7 +25,7 @@
 
 static void it8213_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int is_slave		= drive->dn & 1;
 	int master_port		= 0x40;
@@ -82,7 +82,7 @@ static void it8213_set_pio_mode(ide_drive_t *drive, const u8 pio)
 
 static void it8213_set_dma_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 maslave		= 0x40;
 	int a_speed		= 3 << (drive->dn * 4);
diff --git a/drivers/ide/ns87415.c b/drivers/ide/ns87415.c
index 13789060f407..aceb2fcbe1d1 100644
--- a/drivers/ide/ns87415.c
+++ b/drivers/ide/ns87415.c
@@ -138,12 +138,12 @@ static unsigned int ns87415_count = 0, ns87415_control[MAX_HWIFS] = { 0 };
 
 /*
  * This routine either enables/disables (according to IDE_DFLAG_PRESENT)
- * the IRQ associated with the port (HWIF(drive)),
+ * the IRQ associated with the port,
  * and selects either PIO or DMA handshaking for the next I/O operation.
  */
 static void ns87415_prepare_drive (ide_drive_t *drive, unsigned int use_dma)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	unsigned int bit, other, new, *old = (unsigned int *) hwif->select_data;
 	unsigned long flags;
@@ -197,7 +197,7 @@ static void ns87415_selectproc (ide_drive_t *drive)
 
 static int ns87415_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t      *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	u8 dma_stat = 0, dma_cmd = 0;
 
 	drive->waiting_for_dma = 0;
diff --git a/drivers/ide/pdc202xx_new.c b/drivers/ide/pdc202xx_new.c
index 211ae46e3e0c..f21290c4b447 100644
--- a/drivers/ide/pdc202xx_new.c
+++ b/drivers/ide/pdc202xx_new.c
@@ -143,7 +143,7 @@ static struct udma_timing {
 
 static void pdcnew_set_dma_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 adj			= (drive->dn & 1) ? 0x08 : 0x00;
 
@@ -219,7 +219,7 @@ static void pdcnew_reset(ide_drive_t *drive)
 	 * Deleted this because it is redundant from the caller.
 	 */
 	printk(KERN_WARNING "pdc202xx_new: %s channel reset.\n",
-		HWIF(drive)->channel ? "Secondary" : "Primary");
+		drive->hwif->channel ? "Secondary" : "Primary");
 }
 
 /**
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index 072ef70bf061..e8e6b29d9e41 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -39,7 +39,7 @@ static void pdc_old_disable_66MHz_clock(ide_hwif_t *);
 
 static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 drive_pci		= 0x60 + (drive->dn << 2);
 
@@ -169,7 +169,7 @@ static void pdc202xx_dma_start(ide_drive_t *drive)
 	if (drive->current_speed > XFER_UDMA_2)
 		pdc_old_enable_66MHz_clock(drive->hwif);
 	if (drive->media != ide_disk || (drive->dev_flags & IDE_DFLAG_LBA48)) {
-		ide_hwif_t *hwif	= HWIF(drive);
+		ide_hwif_t *hwif	= drive->hwif;
 		struct request *rq	= hwif->rq;
 		unsigned long high_16	= hwif->extra_base - 16;
 		unsigned long atapi_reg	= high_16 + (hwif->channel ? 0x24 : 0x20);
@@ -189,7 +189,7 @@ static void pdc202xx_dma_start(ide_drive_t *drive)
 static int pdc202xx_dma_end(ide_drive_t *drive)
 {
 	if (drive->media != ide_disk || (drive->dev_flags & IDE_DFLAG_LBA48)) {
-		ide_hwif_t *hwif	= HWIF(drive);
+		ide_hwif_t *hwif	= drive->hwif;
 		unsigned long high_16	= hwif->extra_base - 16;
 		unsigned long atapi_reg	= high_16 + (hwif->channel ? 0x24 : 0x20);
 		u8 clock		= 0;
@@ -205,7 +205,7 @@ static int pdc202xx_dma_end(ide_drive_t *drive)
 
 static int pdc202xx_dma_test_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long high_16	= hwif->extra_base - 16;
 	u8 dma_stat		= inb(hwif->dma_base + ATA_DMA_STATUS);
 	u8 sc1d			= inb(high_16 + 0x001d);
@@ -243,7 +243,7 @@ static void pdc202xx_reset_host (ide_hwif_t *hwif)
 
 static void pdc202xx_reset (ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	ide_hwif_t *mate	= hwif->mate;
 
 	pdc202xx_reset_host(hwif);
diff --git a/drivers/ide/piix.c b/drivers/ide/piix.c
index 61d2d920a5cd..e57b6c3ac78e 100644
--- a/drivers/ide/piix.c
+++ b/drivers/ide/piix.c
@@ -67,7 +67,7 @@ static int no_piix_dma;
 
 static void piix_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int is_slave		= drive->dn & 1;
 	int master_port		= hwif->channel ? 0x42 : 0x40;
@@ -136,7 +136,7 @@ static void piix_set_pio_mode(ide_drive_t *drive, const u8 pio)
 
 static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 maslave		= hwif->channel ? 0x42 : 0x40;
 	int a_speed		= 3 << (drive->dn * 4);
@@ -224,7 +224,7 @@ static unsigned int init_chipset_ich(struct pci_dev *dev)
  */
 static void ich_clear_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	u8 dma_stat;
 
 	/*
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index 899b96baf215..ee52a21af1be 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -1513,7 +1513,7 @@ use_pio_instead:
 static int
 pmac_ide_dma_setup(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	pmac_ide_hwif_t *pmif =
 		(pmac_ide_hwif_t *)dev_get_drvdata(hwif->gendev.parent);
 	struct request *rq = hwif->rq;
@@ -1637,7 +1637,7 @@ pmac_ide_dma_test_irq (ide_drive_t *drive)
 			break;
 		if (++timeout > 100) {
 			printk(KERN_WARNING "ide%d, ide_dma_test_irq \
-			timeout flushing channel\n", HWIF(drive)->index);
+			timeout flushing channel\n", hwif->index);
 			break;
 		}
 	}	
diff --git a/drivers/ide/qd65xx.c b/drivers/ide/qd65xx.c
index bc27c7aba936..5b2e3af43c4b 100644
--- a/drivers/ide/qd65xx.c
+++ b/drivers/ide/qd65xx.c
@@ -202,7 +202,8 @@ static void qd6500_set_pio_mode(ide_drive_t *drive, const u8 pio)
 		recovery_time = drive->id[ATA_ID_EIDE_PIO] - 120;
 	}
 
-	qd_set_timing(drive, qd6500_compute_timing(HWIF(drive), active_time, recovery_time));
+	qd_set_timing(drive, qd6500_compute_timing(drive->hwif,
+				active_time, recovery_time));
 }
 
 static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio)
@@ -245,11 +246,11 @@ static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio)
 		printk(KERN_INFO "%s: PIO mode%d\n", drive->name,pio);
 	}
 
-	if (!HWIF(drive)->channel && drive->media != ide_disk) {
+	if (!hwif->channel && drive->media != ide_disk) {
 		outb(0x5f, QD_CONTROL_PORT);
 		printk(KERN_WARNING "%s: ATAPI: disabled read-ahead FIFO "
 			"and post-write buffer on %s.\n",
-			drive->name, HWIF(drive)->name);
+			drive->name, hwif->name);
 	}
 
 	qd_set_timing(drive, qd6580_compute_timing(active_time, recovery_time));
diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c
index ec7f766ef5e4..6f68fe984bfb 100644
--- a/drivers/ide/sc1200.c
+++ b/drivers/ide/sc1200.c
@@ -125,7 +125,7 @@ out:
 
 static void sc1200_set_dma_mode(ide_drive_t *drive, const u8 mode)
 {
-	ide_hwif_t		*hwif = HWIF(drive);
+	ide_hwif_t		*hwif = drive->hwif;
 	struct pci_dev		*dev = to_pci_dev(hwif->dev);
 	unsigned int		reg, timings;
 	unsigned short		pci_clock;
@@ -170,7 +170,7 @@ static void sc1200_set_dma_mode(ide_drive_t *drive, const u8 mode)
  */
 static int sc1200_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	unsigned long dma_base = hwif->dma_base;
 	byte dma_stat;
 
@@ -199,7 +199,7 @@ static int sc1200_dma_end(ide_drive_t *drive)
 
 static void sc1200_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t	*hwif = HWIF(drive);
+	ide_hwif_t	*hwif = drive->hwif;
 	int		mode = -1;
 
 	/*
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index e966113fd569..90574ab76345 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -217,7 +217,7 @@ scc_ide_outsl(unsigned long port, void *addr, u32 count)
 
 static void scc_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct scc_ports *ports = ide_get_hwifdata(hwif);
 	unsigned long ctl_base = ports->ctl;
 	unsigned long cckctrl_port = ctl_base + 0xff0;
@@ -249,7 +249,7 @@ static void scc_set_pio_mode(ide_drive_t *drive, const u8 pio)
 
 static void scc_set_dma_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct scc_ports *ports = ide_get_hwifdata(hwif);
 	unsigned long ctl_base = ports->ctl;
 	unsigned long cckctrl_port = ctl_base + 0xff0;
@@ -387,7 +387,7 @@ static int __scc_dma_end(ide_drive_t *drive)
 
 static int scc_dma_end(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	void __iomem *dma_base = (void __iomem *)hwif->dma_base;
 	unsigned long intsts_port = hwif->dma_base + 0x014;
 	u32 reg;
@@ -496,7 +496,7 @@ static int scc_dma_end(ide_drive_t *drive)
 /* returns 1 if dma irq issued, 0 otherwise */
 static int scc_dma_test_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	u32 int_stat = in_be32((void __iomem *)hwif->dma_base + 0x014);
 
 	/* SCC errata A252,A308 workaround: Step4 */
diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c
index 437bc919dafd..382102ba467b 100644
--- a/drivers/ide/serverworks.c
+++ b/drivers/ide/serverworks.c
@@ -151,7 +151,7 @@ static void svwks_set_dma_mode(ide_drive_t *drive, const u8 speed)
 	static const u8 dma_modes[]		= { 0x77, 0x21, 0x20 };
 	static const u8 drive_pci2[]		= { 0x45, 0x44, 0x47, 0x46 };
 
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 unit			= drive->dn & 1;
 
diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c
index c68b71b1087b..8e1ffd57a86d 100644
--- a/drivers/ide/sgiioc4.c
+++ b/drivers/ide/sgiioc4.c
@@ -123,7 +123,7 @@ static int
 sgiioc4_clearirq(ide_drive_t * drive)
 {
 	u32 intr_reg;
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct ide_io_ports *io_ports = &hwif->io_ports;
 	unsigned long other_ir = io_ports->irq_addr + (IOC4_INTR_REG << 2);
 
@@ -181,7 +181,7 @@ sgiioc4_clearirq(ide_drive_t * drive)
 
 static void sgiioc4_dma_start(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	unsigned long ioc4_dma_addr = hwif->dma_base + IOC4_DMA_CTRL * 4;
 	unsigned int reg = readl((void __iomem *)ioc4_dma_addr);
 	unsigned int temp_reg = reg | IOC4_S_DMA_START;
@@ -209,7 +209,7 @@ sgiioc4_ide_dma_stop(ide_hwif_t *hwif, u64 dma_base)
 static int sgiioc4_dma_end(ide_drive_t *drive)
 {
 	u32 ioc4_dma, bc_dev, bc_mem, num, valid = 0, cnt = 0;
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	unsigned long dma_base = hwif->dma_base;
 	int dma_stat = 0;
 	unsigned long *ending_dma = ide_get_hwifdata(hwif);
@@ -271,7 +271,7 @@ static void sgiioc4_set_dma_mode(ide_drive_t *drive, const u8 speed)
 /* returns 1 if dma irq issued, 0 otherwise */
 static int sgiioc4_dma_test_irq(ide_drive_t *drive)
 {
-	return sgiioc4_checkirq(HWIF(drive));
+	return sgiioc4_checkirq(drive->hwif);
 }
 
 static void sgiioc4_dma_host_set(ide_drive_t *drive, int on)
@@ -367,7 +367,7 @@ static void
 sgiioc4_configure_for_dma(int dma_direction, ide_drive_t * drive)
 {
 	u32 ioc4_dma;
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	unsigned long dma_base = hwif->dma_base;
 	unsigned long ioc4_dma_addr = dma_base + IOC4_DMA_CTRL * 4;
 	u32 dma_addr, ending_dma_addr;
@@ -427,7 +427,7 @@ sgiioc4_configure_for_dma(int dma_direction, ide_drive_t * drive)
 static unsigned int
 sgiioc4_build_dma_table(ide_drive_t * drive, struct request *rq, int ddir)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	unsigned int *table = hwif->dmatable_cpu;
 	unsigned int count = 0, i = 1;
 	struct scatterlist *sg;
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index 7d622d20bc4c..652b3a04620f 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -114,7 +114,7 @@ static unsigned long siimage_selreg(ide_hwif_t *hwif, int r)
 
 static inline unsigned long siimage_seldev(ide_drive_t *drive, int r)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long base	= (unsigned long)hwif->hwif_data;
 	u8 unit			= drive->dn & 1;
 
@@ -243,7 +243,7 @@ static void sil_set_pio_mode(ide_drive_t *drive, u8 pio)
 	static const u16 tf_speed[]   = { 0x328a, 0x2283, 0x1281, 0x10c3, 0x10c1 };
 	static const u16 data_speed[] = { 0x328a, 0x2283, 0x1104, 0x10c3, 0x10c1 };
 
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	ide_drive_t *pair	= ide_get_pair_dev(drive);
 	u32 speedt		= 0;
@@ -300,7 +300,7 @@ static void sil_set_dma_mode(ide_drive_t *drive, const u8 speed)
 	static const u8 ultra5[] = { 0x0C, 0x07, 0x05, 0x04, 0x02, 0x01 };
 	static const u16 dma[]	 = { 0x2208, 0x10C2, 0x10C1 };
 
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	unsigned long base	= (unsigned long)hwif->hwif_data;
 	u16 ultra = 0, multi	= 0;
@@ -340,7 +340,7 @@ static void sil_set_dma_mode(ide_drive_t *drive, const u8 speed)
 /* returns 1 if dma irq issued, 0 otherwise */
 static int siimage_io_dma_test_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 dma_altstat		= 0;
 	unsigned long addr	= siimage_selreg(hwif, 1);
@@ -367,7 +367,7 @@ static int siimage_io_dma_test_irq(ide_drive_t *drive)
 
 static int siimage_mmio_dma_test_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long addr	= siimage_selreg(hwif, 0x1);
 	void __iomem *sata_error_addr
 		= (void __iomem *)hwif->sata_scr[SATA_ERROR_OFFSET];
diff --git a/drivers/ide/sis5513.c b/drivers/ide/sis5513.c
index ad32e18c5ba3..9ec1a4a4432c 100644
--- a/drivers/ide/sis5513.c
+++ b/drivers/ide/sis5513.c
@@ -274,7 +274,7 @@ static void sis_program_timings(ide_drive_t *drive, const u8 mode)
 
 static void config_drive_art_rwp(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 reg4bh		= 0;
 	u8 rw_prefetch		= 0;
diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c
index 84dc33602ff8..1ded01d81ab3 100644
--- a/drivers/ide/sl82c105.c
+++ b/drivers/ide/sl82c105.c
@@ -140,7 +140,7 @@ static inline void sl82c105_reset_host(struct pci_dev *dev)
  */
 static void sl82c105_dma_lost_irq(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u32 val, mask		= hwif->channel ? CTRL_IDE_IRQB : CTRL_IDE_IRQA;
 	u8 dma_cmd;
@@ -177,7 +177,7 @@ static void sl82c105_dma_lost_irq(ide_drive_t *drive)
  */
 static void sl82c105_dma_start(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int reg 		= 0x44 + drive->dn * 4;
 
diff --git a/drivers/ide/slc90e66.c b/drivers/ide/slc90e66.c
index 0f759e4ed779..40b4b94a4288 100644
--- a/drivers/ide/slc90e66.c
+++ b/drivers/ide/slc90e66.c
@@ -20,7 +20,7 @@ static DEFINE_SPINLOCK(slc90e66_lock);
 
 static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	int is_slave		= drive->dn & 1;
 	int master_port		= hwif->channel ? 0x42 : 0x40;
@@ -73,7 +73,7 @@ static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio)
 
 static void slc90e66_set_dma_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	struct pci_dev *dev	= to_pci_dev(hwif->dev);
 	u8 maslave		= hwif->channel ? 0x42 : 0x40;
 	int sitre = 0, a_speed	= 7 << (drive->dn * 4);
diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c
index accb379bcad6..d2c00fb928e4 100644
--- a/drivers/ide/tc86c001.c
+++ b/drivers/ide/tc86c001.c
@@ -15,7 +15,7 @@
 
 static void tc86c001_set_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long scr_port	= hwif->config_data + (drive->dn ? 0x02 : 0x00);
 	u16 mode, scr		= inw(scr_port);
 
@@ -62,7 +62,7 @@ static void tc86c001_set_pio_mode(ide_drive_t *drive, const u8 pio)
  */
 static int tc86c001_timer_expiry(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	ide_expiry_t *expiry	= ide_get_hwifdata(hwif);
 	u8 dma_stat		= inb(hwif->dma_base + ATA_DMA_STATUS);
 
@@ -109,7 +109,7 @@ static int tc86c001_timer_expiry(ide_drive_t *drive)
 
 static void tc86c001_dma_start(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif	= drive->hwif;
 	unsigned long sc_base	= hwif->config_data;
 	unsigned long twcr_port	= sc_base + (drive->dn ? 0x06 : 0x04);
 	unsigned long nsectors	= hwif->rq->nr_sectors;
diff --git a/drivers/ide/triflex.c b/drivers/ide/triflex.c
index b6ff40336aa9..8773c3ba7462 100644
--- a/drivers/ide/triflex.c
+++ b/drivers/ide/triflex.c
@@ -36,7 +36,7 @@
 
 static void triflex_set_mode(ide_drive_t *drive, const u8 speed)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
 	u32 triflex_timings = 0;
 	u16 timing = 0;
diff --git a/drivers/ide/trm290.c b/drivers/ide/trm290.c
index 79a03df118d8..b6a1285a4021 100644
--- a/drivers/ide/trm290.c
+++ b/drivers/ide/trm290.c
@@ -144,7 +144,7 @@
 
 static void trm290_prepare_drive (ide_drive_t *drive, unsigned int use_dma)
 {
-	ide_hwif_t *hwif = HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	u16 reg = 0;
 	unsigned long flags;
 
@@ -222,15 +222,15 @@ static int trm290_dma_end(ide_drive_t *drive)
 	drive->waiting_for_dma = 0;
 	/* purge DMA mappings */
 	ide_destroy_dmatable(drive);
-	status = inw(HWIF(drive)->dma_base + 2);
+	status = inw(drive->hwif->dma_base + 2);
+
 	return status != 0x00ff;
 }
 
 static int trm290_dma_test_irq(ide_drive_t *drive)
 {
-	u16 status;
+	u16 status = inw(drive->hwif->dma_base + 2);
 
-	status = inw(HWIF(drive)->dma_base + 2);
 	return status == 0x00ff;
 }
 
diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c
index 2a812d3207e9..fecc0e03c3fc 100644
--- a/drivers/ide/via82cxxx.c
+++ b/drivers/ide/via82cxxx.c
@@ -178,7 +178,7 @@ static void via_set_drive(ide_drive_t *drive, const u8 speed)
 		ide_timing_merge(&p, &t, &t, IDE_TIMING_8BIT);
 	}
 
-	via_set_speed(HWIF(drive), drive->dn, &t);
+	via_set_speed(hwif, drive->dn, &t);
 }
 
 /**
diff --git a/include/linux/ide.h b/include/linux/ide.h
index ee2f461882ad..58b9c99482cd 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -42,8 +42,6 @@ typedef unsigned char	byte;	/* used everywhere */
 #define ERROR_RESET	3	/* Reset controller every 4th retry */
 #define ERROR_RECAL	1	/* Recalibrate every 2nd retry */
 
-#define HWIF(drive)		((ide_hwif_t *)((drive)->hwif))
-
 /*
  * Definitions for accessing IDE controller registers
  */
-- 
cgit v1.2.3


From 54cc1428cfa619e16d75baae8cb041a2eff015f0 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 6 Jan 2009 17:20:52 +0100
Subject: ide: remove local_irq_set() macro

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-iops.c  | 3 ++-
 drivers/ide/ide-probe.c | 3 ++-
 include/linux/ide.h     | 2 --
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 386080ee4608..44ad03bf9cb4 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -503,7 +503,8 @@ static int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, unsigned long ti
 	stat = tp_ops->read_status(hwif);
 
 	if (stat & ATA_BUSY) {
-		local_irq_set(flags);
+		local_irq_save(flags);
+		local_irq_enable_in_hardirq();
 		timeout += jiffies;
 		while ((stat = tp_ops->read_status(hwif)) & ATA_BUSY) {
 			if (time_after(jiffies, timeout)) {
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 9dfa99f062aa..2fca32ed576f 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -796,7 +796,8 @@ static int ide_probe_port(ide_hwif_t *hwif)
 	if (irqd)
 		disable_irq(hwif->irq);
 
-	local_irq_set(flags);
+	local_irq_save(flags);
+	local_irq_enable_in_hardirq();
 
 	if (ide_port_wait_ready(hwif) == -EBUSY)
 		printk(KERN_DEBUG "%s: Wait for ready failed before probe !\n", hwif->name);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 58b9c99482cd..82d500c5a847 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1586,8 +1586,6 @@ static inline void ide_set_max_pio(ide_drive_t *drive)
 	ide_set_pio(drive, 255);
 }
 
-#define local_irq_set(flags)	do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0)
-
 char *ide_media_string(ide_drive_t *);
 
 extern struct device_attribute ide_dev_attrs[];
-- 
cgit v1.2.3


From c0ae50234771684ae0cbac5dfb70e1a09c22aa89 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 6 Jan 2009 17:20:52 +0100
Subject: ide: remove ide_pci_enablebit_t typedef

Remove needless parens while at it.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/setup-pci.c | 2 +-
 include/linux/ide.h     | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index 9f1f9163a136..09f820af379d 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -471,7 +471,7 @@ void ide_pci_setup_ports(struct pci_dev *dev, const struct ide_port_info *d,
 	 */
 
 	for (port = 0; port < channels; ++port) {
-		const ide_pci_enablebit_t *e = &(d->enablebits[port]);
+		const struct ide_pci_enablebit *e = &d->enablebits[port];
 
 		if (e->reg && (pci_read_config_byte(dev, e->reg, &tmp) ||
 		    (tmp & e->mask) != e->val)) {
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 82d500c5a847..cca6cfb299bc 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1300,11 +1300,11 @@ static inline int ide_hwif_setup_dma(ide_hwif_t *hwif,
 }
 #endif
 
-typedef struct ide_pci_enablebit_s {
+struct ide_pci_enablebit {
 	u8	reg;	/* byte pci reg holding the enable-bit */
 	u8	mask;	/* mask to isolate the enable-bit */
 	u8	val;	/* value of masked reg when "enabled" */
-} ide_pci_enablebit_t;
+};
 
 enum {
 	/* Uses ISA control ports not PCI ones. */
@@ -1393,7 +1393,8 @@ struct ide_port_info {
 	const struct ide_port_ops	*port_ops;
 	const struct ide_dma_ops	*dma_ops;
 
-	ide_pci_enablebit_t	enablebits[2];
+	struct ide_pci_enablebit	enablebits[2];
+
 	hwif_chipset_t		chipset;
 
 	u16			max_sectors;	/* if < than the default one */
-- 
cgit v1.2.3


From 9892ec5497af1ec38c46974b5a370ba11c636b19 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 6 Jan 2009 17:20:53 +0100
Subject: ide: remove 'byte' typedef

Just use u8 instead, also s/__u8/u8/ in ide-cd.h while at it.

Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.h | 26 +++++++++++++-------------
 drivers/ide/it821x.c |  4 ++--
 drivers/ide/qd65xx.h |  4 ++--
 drivers/ide/sc1200.c |  2 +-
 include/linux/ide.h  |  4 +---
 5 files changed, 19 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index bf676b262181..d5ae036af00c 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -33,33 +33,33 @@
 
 /* Structure of a MSF cdrom address. */
 struct atapi_msf {
-	byte reserved;
-	byte minute;
-	byte second;
-	byte frame;
+	u8 reserved;
+	u8 minute;
+	u8 second;
+	u8 frame;
 };
 
 /* Space to hold the disk TOC. */
 #define MAX_TRACKS 99
 struct atapi_toc_header {
 	unsigned short toc_length;
-	byte first_track;
-	byte last_track;
+	u8 first_track;
+	u8 last_track;
 };
 
 struct atapi_toc_entry {
-	byte reserved1;
+	u8 reserved1;
 #if defined(__BIG_ENDIAN_BITFIELD)
-	__u8 adr     : 4;
-	__u8 control : 4;
+	u8 adr     : 4;
+	u8 control : 4;
 #elif defined(__LITTLE_ENDIAN_BITFIELD)
-	__u8 control : 4;
-	__u8 adr     : 4;
+	u8 control : 4;
+	u8 adr     : 4;
 #else
 #error "Please fix <asm/byteorder.h>"
 #endif
-	byte track;
-	byte reserved2;
+	u8 track;
+	u8 reserved2;
 	union {
 		unsigned lba;
 		struct atapi_msf msf;
diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c
index ef004089761b..a34f3e19b4a4 100644
--- a/drivers/ide/it821x.c
+++ b/drivers/ide/it821x.c
@@ -279,7 +279,7 @@ static void it821x_set_pio_mode(ide_drive_t *drive, const u8 pio)
  *	the shared MWDMA/PIO timing register.
  */
 
-static void it821x_tune_mwdma (ide_drive_t *drive, byte mode_wanted)
+static void it821x_tune_mwdma(ide_drive_t *drive, u8 mode_wanted)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
@@ -316,7 +316,7 @@ static void it821x_tune_mwdma (ide_drive_t *drive, byte mode_wanted)
  *	controller when doing UDMA modes in pass through.
  */
 
-static void it821x_tune_udma (ide_drive_t *drive, byte mode_wanted)
+static void it821x_tune_udma(ide_drive_t *drive, u8 mode_wanted)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	struct pci_dev *dev = to_pci_dev(hwif->dev);
diff --git a/drivers/ide/qd65xx.h b/drivers/ide/qd65xx.h
index c83dea85e621..6636f9665d16 100644
--- a/drivers/ide/qd65xx.h
+++ b/drivers/ide/qd65xx.h
@@ -31,8 +31,8 @@
 
 #define QD_CONFIG(hwif)		((hwif)->config_data & 0x00ff)
 
-#define QD_TIMING(drive)	(byte)(((drive)->drive_data) & 0x00ff)
-#define QD_TIMREG(drive)	(byte)((((drive)->drive_data) & 0xff00) >> 8)
+#define QD_TIMING(drive)	(u8)(((drive)->drive_data) & 0x00ff)
+#define QD_TIMREG(drive)	(u8)((((drive)->drive_data) & 0xff00) >> 8)
 
 #define QD6500_DEF_DATA		((QD_TIM1_PORT<<8) | (QD_ID3 ? 0x0c : 0x08))
 #define QD6580_DEF_DATA		((QD_TIM1_PORT<<8) | (QD_ID3 ? 0x0a : 0x00))
diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c
index 6f68fe984bfb..1cf477aaae36 100644
--- a/drivers/ide/sc1200.c
+++ b/drivers/ide/sc1200.c
@@ -172,7 +172,7 @@ static int sc1200_dma_end(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	unsigned long dma_base = hwif->dma_base;
-	byte dma_stat;
+	u8 dma_stat;
 
 	dma_stat = inb(dma_base+2);		/* get DMA status */
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index cca6cfb299bc..545a67f1f6b5 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -32,8 +32,6 @@
 # define SUPPORT_VLB_SYNC 1
 #endif
 
-typedef unsigned char	byte;	/* used everywhere */
-
 /*
  * Probably not wise to fiddle with these
  */
@@ -1161,7 +1159,7 @@ void ide_pad_transfer(ide_drive_t *, int, int);
 
 ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8);
 
-ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat);
+ide_startstop_t ide_error(ide_drive_t *, const char *, u8);
 
 void ide_fix_driveid(u16 *);
 
-- 
cgit v1.2.3


From 7f3c868ba78e486bd9d7569f884dd46d8f59bb18 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 6 Jan 2009 17:20:53 +0100
Subject: ide: remove ide_driver_t typedef

While at it:
- s/struct ide_driver_s/struct ide_driver/
- use to_ide_driver() macro in ide-proc.c

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c       |  2 +-
 drivers/ide/ide-cd.h       |  8 ++++----
 drivers/ide/ide-gd.c       |  2 +-
 drivers/ide/ide-gd.h       | 10 +++++-----
 drivers/ide/ide-io.c       | 12 ++++++------
 drivers/ide/ide-pm.c       |  2 +-
 drivers/ide/ide-proc.c     | 16 ++++++++--------
 drivers/ide/ide-tape.c     | 10 +++++-----
 drivers/ide/ide-taskfile.c |  8 ++++----
 drivers/ide/ide.c          |  6 +++---
 include/linux/ide.h        | 18 +++++++++---------
 11 files changed, 47 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index c35b64d495f5..bc982dfacc36 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1914,7 +1914,7 @@ static void ide_cd_release(struct kref *kref)
 
 static int ide_cd_probe(ide_drive_t *);
 
-static ide_driver_t ide_cdrom_driver = {
+static struct ide_driver ide_cdrom_driver = {
 	.gen_driver = {
 		.owner		= THIS_MODULE,
 		.name		= "ide-cdrom",
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index d5ae036af00c..ac40d6cb90a2 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -77,10 +77,10 @@ struct atapi_toc {
 
 /* Extra per-device info for cdrom drives. */
 struct cdrom_info {
-	ide_drive_t	*drive;
-	ide_driver_t	*driver;
-	struct gendisk	*disk;
-	struct kref	kref;
+	ide_drive_t		*drive;
+	struct ide_driver	*driver;
+	struct gendisk		*disk;
+	struct kref		kref;
 
 	/* Buffer for table of contents.  NULL if we haven't allocated
 	   a TOC buffer for this device yet. */
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index b8078b3231f7..cae82b3c432a 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -149,7 +149,7 @@ static int ide_gd_end_request(ide_drive_t *drive, int uptodate, int nrsecs)
 	return drive->disk_ops->end_request(drive, uptodate, nrsecs);
 }
 
-static ide_driver_t ide_gd_driver = {
+static struct ide_driver ide_gd_driver = {
 	.gen_driver = {
 		.owner		= THIS_MODULE,
 		.name		= "ide-gd",
diff --git a/drivers/ide/ide-gd.h b/drivers/ide/ide-gd.h
index 7d3d101713e0..a86779f0756b 100644
--- a/drivers/ide/ide-gd.h
+++ b/drivers/ide/ide-gd.h
@@ -14,11 +14,11 @@
 #endif
 
 struct ide_disk_obj {
-	ide_drive_t	*drive;
-	ide_driver_t	*driver;
-	struct gendisk	*disk;
-	struct kref	kref;
-	unsigned int	openers;	/* protected by BKL for now */
+	ide_drive_t		*drive;
+	struct ide_driver	*driver;
+	struct gendisk		*disk;
+	struct kref		kref;
+	unsigned int		openers;	/* protected by BKL for now */
 
 	/* Last failed packet command */
 	struct ide_atapi_pc *failed_pc;
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 0e2b95ec08a5..e788b3622f2e 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -199,9 +199,9 @@ EXPORT_SYMBOL(ide_end_drive_cmd);
 static void ide_kill_rq(ide_drive_t *drive, struct request *rq)
 {
 	if (rq->rq_disk) {
-		ide_driver_t *drv;
+		struct ide_driver *drv;
 
-		drv = *(ide_driver_t **)rq->rq_disk->private_data;
+		drv = *(struct ide_driver **)rq->rq_disk->private_data;
 		drv->end_request(drive, 0, 0);
 	} else
 		ide_end_request(drive, 0, 0);
@@ -333,9 +333,9 @@ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, u8 stat)
 	}
 
 	if (rq->rq_disk) {
-		ide_driver_t *drv;
+		struct ide_driver *drv;
 
-		drv = *(ide_driver_t **)rq->rq_disk->private_data;
+		drv = *(struct ide_driver **)rq->rq_disk->private_data;
 		return drv->error(drive, rq, stat, err);
 	} else
 		return __ide_error(drive, rq, stat, err);
@@ -606,7 +606,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 		return startstop;
 	}
 	if (!drive->special.all) {
-		ide_driver_t *drv;
+		struct ide_driver *drv;
 
 		/*
 		 * We reset the drive so we need to issue a SETFEATURES.
@@ -639,7 +639,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 			 */
 			return ide_special_rq(drive, rq);
 
-		drv = *(ide_driver_t **)rq->rq_disk->private_data;
+		drv = *(struct ide_driver **)rq->rq_disk->private_data;
 
 		return drv->do_request(drive, rq, rq->sector);
 	}
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 0c206c68bbb1..4b3bf6a06b70 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -67,7 +67,7 @@ int generic_ide_resume(struct device *dev)
 	blk_put_request(rq);
 
 	if (err == 0 && dev->driver) {
-		ide_driver_t *drv = to_ide_driver(dev->driver);
+		struct ide_driver *drv = to_ide_driver(dev->driver);
 
 		if (drv->resume)
 			drv->resume(drive);
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index a14e2938e4f3..d985a9ec6bef 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -439,13 +439,13 @@ static int proc_ide_read_dmodel
 static int proc_ide_read_driver
 	(char *page, char **start, off_t off, int count, int *eof, void *data)
 {
-	ide_drive_t	*drive = (ide_drive_t *) data;
-	struct device	*dev = &drive->gendev;
-	ide_driver_t	*ide_drv;
-	int		len;
+	ide_drive_t		*drive = (ide_drive_t *)data;
+	struct device		*dev = &drive->gendev;
+	struct ide_driver	*ide_drv;
+	int			len;
 
 	if (dev->driver) {
-		ide_drv = container_of(dev->driver, ide_driver_t, gen_driver);
+		ide_drv = to_ide_driver(dev->driver);
 		len = sprintf(page, "%s version %s\n",
 				dev->driver->name, ide_drv->version);
 	} else
@@ -555,7 +555,7 @@ static void ide_remove_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t
 	}
 }
 
-void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver)
+void ide_proc_register_driver(ide_drive_t *drive, struct ide_driver *driver)
 {
 	mutex_lock(&ide_setting_mtx);
 	drive->settings = driver->proc_devsets(drive);
@@ -577,7 +577,7 @@ EXPORT_SYMBOL(ide_proc_register_driver);
  *	Takes ide_setting_mtx.
  */
 
-void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver)
+void ide_proc_unregister_driver(ide_drive_t *drive, struct ide_driver *driver)
 {
 	ide_remove_proc_entries(drive->proc, driver->proc_entries(drive));
 
@@ -653,7 +653,7 @@ void ide_proc_unregister_port(ide_hwif_t *hwif)
 
 static int proc_print_driver(struct device_driver *drv, void *data)
 {
-	ide_driver_t *ide_drv = container_of(drv, ide_driver_t, gen_driver);
+	struct ide_driver *ide_drv = to_ide_driver(drv);
 	struct seq_file *s = data;
 
 	seq_printf(s, "%s version %s\n", drv->name, ide_drv->version);
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index e39f2f46d982..fd865039bfdb 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -166,10 +166,10 @@ struct idetape_bh {
  * to an interrupt or a timer event is stored in the struct defined below.
  */
 typedef struct ide_tape_obj {
-	ide_drive_t	*drive;
-	ide_driver_t	*driver;
-	struct gendisk	*disk;
-	struct kref	kref;
+	ide_drive_t		*drive;
+	struct ide_driver	*driver;
+	struct gendisk		*disk;
+	struct kref		kref;
 
 	/*
 	 *	failed_pc points to the last failed packet command, or contains
@@ -2313,7 +2313,7 @@ static const struct ide_proc_devset *ide_tape_proc_devsets(ide_drive_t *drive)
 
 static int ide_tape_probe(ide_drive_t *);
 
-static ide_driver_t idetape_driver = {
+static struct ide_driver idetape_driver = {
 	.gen_driver = {
 		.owner		= THIS_MODULE,
 		.name		= "ide-tape",
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 55d451ce9b4f..16138bce84a7 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -309,9 +309,9 @@ static ide_startstop_t task_error(ide_drive_t *drive, struct request *rq,
 		}
 
 		if (sectors > 0) {
-			ide_driver_t *drv;
+			struct ide_driver *drv;
 
-			drv = *(ide_driver_t **)rq->rq_disk->private_data;
+			drv = *(struct ide_driver **)rq->rq_disk->private_data;
 			drv->end_request(drive, 1, sectors);
 		}
 	}
@@ -328,9 +328,9 @@ void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat)
 	}
 
 	if (rq->rq_disk) {
-		ide_driver_t *drv;
+		struct ide_driver *drv;
 
-		drv = *(ide_driver_t **)rq->rq_disk->private_data;;
+		drv = *(struct ide_driver **)rq->rq_disk->private_data;;
 		drv->end_request(drive, 1, rq->nr_sectors);
 	} else
 		ide_end_request(drive, 1, rq->nr_sectors);
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index c1bb0f6784a9..6971c285a212 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -281,7 +281,7 @@ static int ide_uevent(struct device *dev, struct kobj_uevent_env *env)
 static int generic_ide_probe(struct device *dev)
 {
 	ide_drive_t *drive = to_ide_device(dev);
-	ide_driver_t *drv = to_ide_driver(dev->driver);
+	struct ide_driver *drv = to_ide_driver(dev->driver);
 
 	return drv->probe ? drv->probe(drive) : -ENODEV;
 }
@@ -289,7 +289,7 @@ static int generic_ide_probe(struct device *dev)
 static int generic_ide_remove(struct device *dev)
 {
 	ide_drive_t *drive = to_ide_device(dev);
-	ide_driver_t *drv = to_ide_driver(dev->driver);
+	struct ide_driver *drv = to_ide_driver(dev->driver);
 
 	if (drv->remove)
 		drv->remove(drive);
@@ -300,7 +300,7 @@ static int generic_ide_remove(struct device *dev)
 static void generic_ide_shutdown(struct device *dev)
 {
 	ide_drive_t *drive = to_ide_device(dev);
-	ide_driver_t *drv = to_ide_driver(dev->driver);
+	struct ide_driver *drv = to_ide_driver(dev->driver);
 
 	if (dev->driver && drv->shutdown)
 		drv->shutdown(drive);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 545a67f1f6b5..fcbcfa2cbe75 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -437,7 +437,7 @@ struct ide_atapi_pc {
 };
 
 struct ide_devset;
-struct ide_driver_s;
+struct ide_driver;
 
 #ifdef CONFIG_BLK_DEV_IDEACPI
 struct ide_acpi_drive_link;
@@ -884,8 +884,6 @@ typedef int (ide_expiry_t)(ide_drive_t *);
 /* used by ide-cd, ide-floppy, etc. */
 typedef void (xfer_func_t)(ide_drive_t *, struct request *rq, void *, unsigned);
 
-typedef struct ide_driver_s ide_driver_t;
-
 extern struct mutex ide_setting_mtx;
 
 /*
@@ -1011,8 +1009,8 @@ void ide_proc_register_port(ide_hwif_t *);
 void ide_proc_port_register_devices(ide_hwif_t *);
 void ide_proc_unregister_device(ide_drive_t *);
 void ide_proc_unregister_port(ide_hwif_t *);
-void ide_proc_register_driver(ide_drive_t *, ide_driver_t *);
-void ide_proc_unregister_driver(ide_drive_t *, ide_driver_t *);
+void ide_proc_register_driver(ide_drive_t *, struct ide_driver *);
+void ide_proc_unregister_driver(ide_drive_t *, struct ide_driver *);
 
 read_proc_t proc_ide_read_capacity;
 read_proc_t proc_ide_read_geometry;
@@ -1039,8 +1037,10 @@ static inline void ide_proc_register_port(ide_hwif_t *hwif) { ; }
 static inline void ide_proc_port_register_devices(ide_hwif_t *hwif) { ; }
 static inline void ide_proc_unregister_device(ide_drive_t *drive) { ; }
 static inline void ide_proc_unregister_port(ide_hwif_t *hwif) { ; }
-static inline void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) { ; }
-static inline void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) { ; }
+static inline void ide_proc_register_driver(ide_drive_t *drive,
+					    struct ide_driver *driver) { ; }
+static inline void ide_proc_unregister_driver(ide_drive_t *drive,
+					      struct ide_driver *driver) { ; }
 #define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0;
 #endif
 
@@ -1109,7 +1109,7 @@ void ide_check_pm_state(ide_drive_t *, struct request *);
  * The gendriver.owner field should be set to the module owner of this driver.
  * The gendriver.name field should be set to the name of this driver
  */
-struct ide_driver_s {
+struct ide_driver {
 	const char			*version;
 	ide_startstop_t	(*do_request)(ide_drive_t *, struct request *, sector_t);
 	int		(*end_request)(ide_drive_t *, int, int);
@@ -1125,7 +1125,7 @@ struct ide_driver_s {
 #endif
 };
 
-#define to_ide_driver(drv) container_of(drv, ide_driver_t, gen_driver)
+#define to_ide_driver(drv) container_of(drv, struct ide_driver, gen_driver)
 
 int ide_device_get(ide_drive_t *);
 void ide_device_put(ide_drive_t *);
-- 
cgit v1.2.3


From 627e05daa10896a8f012fa78e8434c07e9e55ea7 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 6 Jan 2009 17:20:54 +0100
Subject: ide: remove ->error method from struct ide_driver

* Remove (now superfluous) ->error method from struct ide_driver.

* Unexport __ide_error() and make it static.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-cd.c   |  1 -
 drivers/ide/ide-gd.c   |  1 -
 drivers/ide/ide-io.c   | 13 ++-----------
 drivers/ide/ide-tape.c |  1 -
 include/linux/ide.h    |  3 ---
 5 files changed, 2 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index bc982dfacc36..6c7dd8fd8638 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1925,7 +1925,6 @@ static struct ide_driver ide_cdrom_driver = {
 	.version		= IDECD_VERSION,
 	.do_request		= ide_cd_do_request,
 	.end_request		= ide_end_request,
-	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
 	.proc_entries		= ide_cd_proc_entries,
 	.proc_devsets		= ide_cd_proc_devsets,
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index cae82b3c432a..7857b209c6df 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -162,7 +162,6 @@ static struct ide_driver ide_gd_driver = {
 	.version		= IDE_GD_VERSION,
 	.do_request		= ide_gd_do_request,
 	.end_request		= ide_gd_end_request,
-	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
 	.proc_entries		= ide_disk_proc_entries,
 	.proc_devsets		= ide_disk_proc_devsets,
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index e788b3622f2e..9d363a1b5573 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -291,7 +291,7 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq, u
 	return ide_stopped;
 }
 
-ide_startstop_t
+static ide_startstop_t
 __ide_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err)
 {
 	if (drive->media == ide_disk)
@@ -299,8 +299,6 @@ __ide_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err)
 	return ide_atapi_error(drive, rq, stat, err);
 }
 
-EXPORT_SYMBOL_GPL(__ide_error);
-
 /**
  *	ide_error	-	handle an error on the IDE
  *	@drive: drive the error occurred on
@@ -332,15 +330,8 @@ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, u8 stat)
 		return ide_stopped;
 	}
 
-	if (rq->rq_disk) {
-		struct ide_driver *drv;
-
-		drv = *(struct ide_driver **)rq->rq_disk->private_data;
-		return drv->error(drive, rq, stat, err);
-	} else
-		return __ide_error(drive, rq, stat, err);
+	return __ide_error(drive, rq, stat, err);
 }
-
 EXPORT_SYMBOL_GPL(ide_error);
 
 static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index fd865039bfdb..d7ecd3c79757 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -2324,7 +2324,6 @@ static struct ide_driver idetape_driver = {
 	.version		= IDETAPE_VERSION,
 	.do_request		= idetape_do_request,
 	.end_request		= idetape_end_request,
-	.error			= __ide_error,
 #ifdef CONFIG_IDE_PROC_FS
 	.proc_entries		= ide_tape_proc_entries,
 	.proc_devsets		= ide_tape_proc_devsets,
diff --git a/include/linux/ide.h b/include/linux/ide.h
index fcbcfa2cbe75..9f6fe1fe7a6c 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1113,7 +1113,6 @@ struct ide_driver {
 	const char			*version;
 	ide_startstop_t	(*do_request)(ide_drive_t *, struct request *, sector_t);
 	int		(*end_request)(ide_drive_t *, int, int);
-	ide_startstop_t	(*error)(ide_drive_t *, struct request *rq, u8, u8);
 	struct device_driver	gen_driver;
 	int		(*probe)(ide_drive_t *);
 	void		(*remove)(ide_drive_t *);
@@ -1157,8 +1156,6 @@ void ide_execute_pkt_cmd(ide_drive_t *);
 
 void ide_pad_transfer(ide_drive_t *, int, int);
 
-ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8);
-
 ide_startstop_t ide_error(ide_drive_t *, const char *, u8);
 
 void ide_fix_driveid(u16 *);
-- 
cgit v1.2.3


From 5e7f3a46690f7f6c9f2781c700ab4370874aa0e8 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 6 Jan 2009 17:20:56 +0100
Subject: ide: dynamic allocation of device structures

Allocate device structures dynamically instead of having them embedded
in ide_hwif_t:

* Remove needless zeroing of port structure from ide_init_port_data().

* Add ide_hwif_t.devices[MAX_DRIVES] (table of pointers to the devices).

* Add ide_port_{alloc,free}_devices() helpers and use them respectively
  in ide_{host,free}_alloc().

* Convert all users of ->drives[] to use ->devices[] instead.

While at it:

* Use drive->dn for the slave device check in scc_pata.c.

As a nice side-effect this patch cuts ~1kB (x86-32) from the resulting
code size:

   text    data     bss     dec     hex filename
  53963    1244     237   55444    d894 drivers/ide/ide-core.o.before
  52981    1244     237   54462    d4be drivers/ide/ide-core.o.after

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-acpi.c  | 10 +++----
 drivers/ide/ide-iops.c  |  4 +--
 drivers/ide/ide-probe.c | 71 +++++++++++++++++++++++++++++++++++--------------
 drivers/ide/ide-proc.c  |  2 +-
 drivers/ide/ide.c       |  2 +-
 drivers/ide/scc_pata.c  |  5 ++--
 include/linux/ide.h     |  4 +--
 7 files changed, 65 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index f89b6ecf7d1a..fd155b8a256c 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -656,7 +656,7 @@ void ide_acpi_set_state(ide_hwif_t *hwif, int on)
 	if (on)
 		acpi_bus_set_power(hwif->acpidata->obj_handle, ACPI_STATE_D0);
 	for (unit = 0; unit < MAX_DRIVES; ++unit) {
-		ide_drive_t *drive = &hwif->drives[unit];
+		ide_drive_t *drive = hwif->devices[unit];
 
 		if (!drive->acpidata->obj_handle)
 			drive->acpidata->obj_handle = ide_acpi_drive_get_handle(drive);
@@ -711,14 +711,14 @@ void ide_acpi_port_init_devices(ide_hwif_t *hwif)
 	 * for both drives, regardless whether they are connected
 	 * or not.
 	 */
-	hwif->drives[0].acpidata = &hwif->acpidata->master;
-	hwif->drives[1].acpidata = &hwif->acpidata->slave;
+	hwif->devices[0]->acpidata = &hwif->acpidata->master;
+	hwif->devices[1]->acpidata = &hwif->acpidata->slave;
 
 	/*
 	 * Send IDENTIFY for each drive
 	 */
 	for (i = 0; i < MAX_DRIVES; i++) {
-		drive = &hwif->drives[i];
+		drive = hwif->devices[i];
 
 		memset(drive->acpidata, 0, sizeof(*drive->acpidata));
 
@@ -745,7 +745,7 @@ void ide_acpi_port_init_devices(ide_hwif_t *hwif)
 	ide_acpi_push_timing(hwif);
 
 	for (i = 0; i < MAX_DRIVES; i++) {
-		drive = &hwif->drives[i];
+		drive = hwif->devices[i];
 
 		if (drive->dev_flags & IDE_DFLAG_PRESENT)
 			/* Execute ACPI startup code */
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 1bcb9484f49e..26b58d15c4e6 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -1111,7 +1111,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 		prepare_to_wait(&ide_park_wq, &wait, TASK_UNINTERRUPTIBLE);
 		timeout = jiffies;
 		for (unit = 0; unit < MAX_DRIVES; unit++) {
-			ide_drive_t *tdrive = &hwif->drives[unit];
+			ide_drive_t *tdrive = hwif->devices[unit];
 
 			if (tdrive->dev_flags & IDE_DFLAG_PRESENT &&
 			    tdrive->dev_flags & IDE_DFLAG_PARKED &&
@@ -1134,7 +1134,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 	 * for any of the drives on this interface.
 	 */
 	for (unit = 0; unit < MAX_DRIVES; ++unit)
-		pre_reset(&hwif->drives[unit]);
+		pre_reset(hwif->devices[unit]);
 
 	if (io_ports->ctl_addr == 0) {
 		spin_unlock_irqrestore(&hwif->lock, flags);
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 00c7e5a67bd1..006e601cafb8 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -463,7 +463,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
 	if (ide_read_device(drive) != drive->select && present == 0) {
 		if (drive->dn & 1) {
 			/* exit with drive0 selected */
-			SELECT_DRIVE(&hwif->drives[0]);
+			SELECT_DRIVE(hwif->devices[0]);
 			/* allow ATA_BUSY to assert & clear */
 			msleep(50);
 		}
@@ -509,7 +509,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
 	}
 	if (drive->dn & 1) {
 		/* exit with drive0 selected */
-		SELECT_DRIVE(&hwif->drives[0]);
+		SELECT_DRIVE(hwif->devices[0]);
 		msleep(50);
 		/* ensure drive irq is clear */
 		(void)tp_ops->read_status(hwif);
@@ -715,7 +715,7 @@ static int ide_port_wait_ready(ide_hwif_t *hwif)
 
 	/* Now make sure both master & slave are ready */
 	for (unit = 0; unit < MAX_DRIVES; unit++) {
-		ide_drive_t *drive = &hwif->drives[unit];
+		ide_drive_t *drive = hwif->devices[unit];
 
 		/* Ignore disks that we will not probe for later. */
 		if ((drive->dev_flags & IDE_DFLAG_NOPROBE) == 0 ||
@@ -733,7 +733,7 @@ static int ide_port_wait_ready(ide_hwif_t *hwif)
 out:
 	/* Exit function with master reselected (let's be sane) */
 	if (unit)
-		SELECT_DRIVE(&hwif->drives[0]);
+		SELECT_DRIVE(hwif->devices[0]);
 
 	return rc;
 }
@@ -749,7 +749,7 @@ out:
 
 void ide_undecoded_slave(ide_drive_t *dev1)
 {
-	ide_drive_t *dev0 = &dev1->hwif->drives[0];
+	ide_drive_t *dev0 = dev1->hwif->devices[0];
 
 	if ((dev1->dn & 1) == 0 || (dev0->dev_flags & IDE_DFLAG_PRESENT) == 0)
 		return;
@@ -784,8 +784,8 @@ static int ide_probe_port(ide_hwif_t *hwif)
 
 	BUG_ON(hwif->present);
 
-	if ((hwif->drives[0].dev_flags & IDE_DFLAG_NOPROBE) &&
-	    (hwif->drives[1].dev_flags & IDE_DFLAG_NOPROBE))
+	if ((hwif->devices[0]->dev_flags & IDE_DFLAG_NOPROBE) &&
+	    (hwif->devices[1]->dev_flags & IDE_DFLAG_NOPROBE))
 		return -EACCES;
 
 	/*
@@ -807,7 +807,7 @@ static int ide_probe_port(ide_hwif_t *hwif)
 	 * but a lot of cdrom drives are configured as single slaves.
 	 */
 	for (unit = 0; unit < MAX_DRIVES; ++unit) {
-		ide_drive_t *drive = &hwif->drives[unit];
+		ide_drive_t *drive = hwif->devices[unit];
 
 		(void) probe_for_drive(drive);
 		if (drive->dev_flags & IDE_DFLAG_PRESENT)
@@ -832,7 +832,7 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
 	int unit;
 
 	for (unit = 0; unit < MAX_DRIVES; unit++) {
-		ide_drive_t *drive = &hwif->drives[unit];
+		ide_drive_t *drive = hwif->devices[unit];
 
 		if (drive->dev_flags & IDE_DFLAG_PRESENT) {
 			if (port_ops && port_ops->quirkproc)
@@ -841,7 +841,7 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
 	}
 
 	for (unit = 0; unit < MAX_DRIVES; ++unit) {
-		ide_drive_t *drive = &hwif->drives[unit];
+		ide_drive_t *drive = hwif->devices[unit];
 
 		if (drive->dev_flags & IDE_DFLAG_PRESENT) {
 			ide_set_max_pio(drive);
@@ -854,7 +854,7 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
 	}
 
 	for (unit = 0; unit < MAX_DRIVES; ++unit) {
-		ide_drive_t *drive = &hwif->drives[unit];
+		ide_drive_t *drive = hwif->devices[unit];
 
 		if ((hwif->host_flags & IDE_HFLAG_NO_IO_32BIT) ||
 		    drive->id[ATA_ID_DWORD_IO])
@@ -931,7 +931,7 @@ static int ide_port_setup_devices(ide_hwif_t *hwif)
 
 	mutex_lock(&ide_cfg_mtx);
 	for (i = 0; i < MAX_DRIVES; i++) {
-		ide_drive_t *drive = &hwif->drives[i];
+		ide_drive_t *drive = hwif->devices[i];
 
 		if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
 			continue;
@@ -1017,7 +1017,7 @@ static struct kobject *ata_probe(dev_t dev, int *part, void *data)
 {
 	ide_hwif_t *hwif = data;
 	int unit = *part >> PARTN_BITS;
-	ide_drive_t *drive = &hwif->drives[unit];
+	ide_drive_t *drive = hwif->devices[unit];
 
 	if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
 		return NULL;
@@ -1164,7 +1164,7 @@ static void hwif_register_devices(ide_hwif_t *hwif)
 	unsigned int i;
 
 	for (i = 0; i < MAX_DRIVES; i++) {
-		ide_drive_t *drive = &hwif->drives[i];
+		ide_drive_t *drive = hwif->devices[i];
 		struct device *dev = &drive->gendev;
 		int ret;
 
@@ -1190,7 +1190,7 @@ static void ide_port_init_devices(ide_hwif_t *hwif)
 	int i;
 
 	for (i = 0; i < MAX_DRIVES; i++) {
-		ide_drive_t *drive = &hwif->drives[i];
+		ide_drive_t *drive = hwif->devices[i];
 
 		drive->dn = i + hwif->channel * 2;
 
@@ -1285,7 +1285,7 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif)
 	int unit;
 
 	for (unit = 0; unit < MAX_DRIVES; ++unit) {
-		ide_drive_t *drive = &hwif->drives[unit];
+		ide_drive_t *drive = hwif->devices[unit];
 		u8 j = (hwif->index * MAX_DRIVES) + unit;
 
 		memset(drive, 0, sizeof(*drive));
@@ -1309,9 +1309,6 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif)
 
 static void ide_init_port_data(ide_hwif_t *hwif, unsigned int index)
 {
-	/* bulk initialize hwif & drive info with zeros */
-	memset(hwif, 0, sizeof(ide_hwif_t));
-
 	/* fill in any non-zero initial values */
 	hwif->index	= index;
 	hwif->major	= ide_hwif_to_major[index];
@@ -1388,6 +1385,34 @@ static void ide_free_port_slot(int idx)
 	mutex_unlock(&ide_cfg_mtx);
 }
 
+static void ide_port_free_devices(ide_hwif_t *hwif)
+{
+	int i;
+
+	for (i = 0; i < MAX_DRIVES; i++)
+		kfree(hwif->devices[i]);
+}
+
+static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
+{
+	int i;
+
+	for (i = 0; i < MAX_DRIVES; i++) {
+		ide_drive_t *drive;
+
+		drive = kzalloc_node(sizeof(*drive), GFP_KERNEL, node);
+		if (drive == NULL)
+			goto out_nomem;
+
+		hwif->devices[i] = drive;
+	}
+	return 0;
+
+out_nomem:
+	ide_port_free_devices(hwif);
+	return -ENOMEM;
+}
+
 struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws)
 {
 	struct ide_host *host;
@@ -1410,6 +1435,11 @@ struct ide_host *ide_host_alloc(const struct ide_port_info *d, hw_regs_t **hws)
 		if (hwif == NULL)
 			continue;
 
+		if (ide_port_alloc_devices(hwif, node) < 0) {
+			kfree(hwif);
+			continue;
+		}
+
 		idx = ide_find_port_slot(d);
 		if (idx < 0) {
 			printk(KERN_ERR "%s: no free slot for interface\n",
@@ -1575,7 +1605,7 @@ static void __ide_port_unregister_devices(ide_hwif_t *hwif)
 	int i;
 
 	for (i = 0; i < MAX_DRIVES; i++) {
-		ide_drive_t *drive = &hwif->drives[i];
+		ide_drive_t *drive = hwif->devices[i];
 
 		if (drive->dev_flags & IDE_DFLAG_PRESENT) {
 			device_unregister(&drive->gendev);
@@ -1651,6 +1681,7 @@ void ide_host_free(struct ide_host *host)
 		if (hwif == NULL)
 			continue;
 
+		ide_port_free_devices(hwif);
 		ide_free_port_slot(hwif->index);
 		kfree(hwif);
 	}
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index d985a9ec6bef..1dc827fa7061 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -599,7 +599,7 @@ void ide_proc_port_register_devices(ide_hwif_t *hwif)
 	char name[64];
 
 	for (d = 0; d < MAX_DRIVES; d++) {
-		ide_drive_t *drive = &hwif->drives[d];
+		ide_drive_t *drive = hwif->devices[d];
 
 		if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0 || drive->proc)
 			continue;
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 6971c285a212..8a6f893d127e 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -497,7 +497,7 @@ void ide_port_apply_params(ide_hwif_t *hwif)
 	}
 
 	for (i = 0; i < MAX_DRIVES; i++)
-		ide_dev_apply_params(&hwif->drives[i], i);
+		ide_dev_apply_params(hwif->devices[i], i);
 }
 
 /*
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 90574ab76345..841164d7d3b9 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -259,7 +259,7 @@ static void scc_set_dma_mode(ide_drive_t *drive, const u8 speed)
 	unsigned long scrcst_port = ctl_base + 0x014;
 	unsigned long udenvt_port = ctl_base + 0x018;
 	unsigned long tdvhsel_port   = ctl_base + 0x020;
-	int is_slave = (&hwif->drives[1] == drive);
+	int is_slave = drive->dn & 1;
 	int offset, idx;
 	unsigned long reg;
 	unsigned long jcactsel;
@@ -413,7 +413,8 @@ static int scc_dma_end(ide_drive_t *drive)
 				if (rq)
 					rq->errors |= ERROR_RESET;
 				for (unit = 0; unit < MAX_DRIVES; unit++) {
-					ide_drive_t *drive = &hwif->drives[unit];
+					ide_drive_t *drive = hwif->devices[unit];
+
 					drive->crc_count++;
 				}
 			}
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 9f6fe1fe7a6c..f00086b10be3 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -753,7 +753,7 @@ typedef struct hwif_s {
 
 	unsigned long	sata_scr[SATA_NR_PORTS];
 
-	ide_drive_t	drives[MAX_DRIVES];	/* drive info */
+	ide_drive_t	*devices[MAX_DRIVES];
 
 	u8 major;	/* our major number */
 	u8 index;	/* 0 for ide0; 1 for ide1; ... */
@@ -1600,7 +1600,7 @@ static inline int hwif_to_node(ide_hwif_t *hwif)
 
 static inline ide_drive_t *ide_get_pair_dev(ide_drive_t *drive)
 {
-	ide_drive_t *peer = &drive->hwif->drives[(drive->dn ^ 1) & 1];
+	ide_drive_t *peer = drive->hwif->devices[(drive->dn ^ 1) & 1];
 
 	return (peer->dev_flags & IDE_DFLAG_PRESENT) ? peer : NULL;
 }
-- 
cgit v1.2.3


From 2bd24a1cfc99d242c2cff9a6b74ca49fcaac3fb6 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 6 Jan 2009 17:20:56 +0100
Subject: ide: add port and host iterators

Add ide_port_for_each_dev() / ide_host_for_each_port() iterators
and update IDE code to use them.

While at it:
- s/unit/i/ variable in ide_port_wait_ready(), ide_probe_port(),
  ide_port_tune_devices(), ide_port_init_devices_data(), do_reset1(),
  ide_acpi_set_state() and scc_dma_end()
- s/d/i/ variable in ide_proc_port_register_devices()

There should be no functional changes caused by this patch.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-acpi.c  | 14 +++-----
 drivers/ide/ide-iops.c  | 11 +++---
 drivers/ide/ide-probe.c | 96 ++++++++++++++++++++-----------------------------
 drivers/ide/ide-proc.c  |  7 ++--
 drivers/ide/ide.c       |  5 +--
 drivers/ide/scc_pata.c  |  8 ++---
 include/linux/ide.h     | 11 ++++--
 7 files changed, 67 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index fd155b8a256c..2f9e941968d6 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -641,7 +641,8 @@ void ide_acpi_push_timing(ide_hwif_t *hwif)
  */
 void ide_acpi_set_state(ide_hwif_t *hwif, int on)
 {
-	int unit;
+	ide_drive_t *drive;
+	int i;
 
 	if (ide_noacpi || ide_noacpi_psx)
 		return;
@@ -655,9 +656,8 @@ void ide_acpi_set_state(ide_hwif_t *hwif, int on)
 	/* channel first and then drives for power on and verse versa for power off */
 	if (on)
 		acpi_bus_set_power(hwif->acpidata->obj_handle, ACPI_STATE_D0);
-	for (unit = 0; unit < MAX_DRIVES; ++unit) {
-		ide_drive_t *drive = hwif->devices[unit];
 
+	ide_port_for_each_dev(i, drive, hwif) {
 		if (!drive->acpidata->obj_handle)
 			drive->acpidata->obj_handle = ide_acpi_drive_get_handle(drive);
 
@@ -717,9 +717,7 @@ void ide_acpi_port_init_devices(ide_hwif_t *hwif)
 	/*
 	 * Send IDENTIFY for each drive
 	 */
-	for (i = 0; i < MAX_DRIVES; i++) {
-		drive = hwif->devices[i];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		memset(drive->acpidata, 0, sizeof(*drive->acpidata));
 
 		if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
@@ -744,9 +742,7 @@ void ide_acpi_port_init_devices(ide_hwif_t *hwif)
 	ide_acpi_get_timing(hwif);
 	ide_acpi_push_timing(hwif);
 
-	for (i = 0; i < MAX_DRIVES; i++) {
-		drive = hwif->devices[i];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		if (drive->dev_flags & IDE_DFLAG_PRESENT)
 			/* Execute ACPI startup code */
 			ide_acpi_exec_tfs(drive);
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 26b58d15c4e6..1a22c31ce7ac 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -1081,8 +1081,9 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 	struct ide_io_ports *io_ports = &hwif->io_ports;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
 	const struct ide_port_ops *port_ops;
+	ide_drive_t *tdrive;
 	unsigned long flags, timeout;
-	unsigned int unit;
+	int i;
 	DEFINE_WAIT(wait);
 
 	spin_lock_irqsave(&hwif->lock, flags);
@@ -1110,9 +1111,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 
 		prepare_to_wait(&ide_park_wq, &wait, TASK_UNINTERRUPTIBLE);
 		timeout = jiffies;
-		for (unit = 0; unit < MAX_DRIVES; unit++) {
-			ide_drive_t *tdrive = hwif->devices[unit];
-
+		ide_port_for_each_dev(i, tdrive, hwif) {
 			if (tdrive->dev_flags & IDE_DFLAG_PRESENT &&
 			    tdrive->dev_flags & IDE_DFLAG_PARKED &&
 			    time_after(tdrive->sleep, timeout))
@@ -1133,8 +1132,8 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi)
 	 * First, reset any device state data we were maintaining
 	 * for any of the drives on this interface.
 	 */
-	for (unit = 0; unit < MAX_DRIVES; ++unit)
-		pre_reset(hwif->devices[unit]);
+	ide_port_for_each_dev(i, tdrive, hwif)
+		pre_reset(tdrive);
 
 	if (io_ports->ctl_addr == 0) {
 		spin_unlock_irqrestore(&hwif->lock, flags);
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 006e601cafb8..e688ca1c967c 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -697,7 +697,8 @@ out:
 
 static int ide_port_wait_ready(ide_hwif_t *hwif)
 {
-	int unit, rc;
+	ide_drive_t *drive;
+	int i, rc;
 
 	printk(KERN_DEBUG "Probing IDE interface %s...\n", hwif->name);
 
@@ -714,9 +715,7 @@ static int ide_port_wait_ready(ide_hwif_t *hwif)
 		return rc;
 
 	/* Now make sure both master & slave are ready */
-	for (unit = 0; unit < MAX_DRIVES; unit++) {
-		ide_drive_t *drive = hwif->devices[unit];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		/* Ignore disks that we will not probe for later. */
 		if ((drive->dev_flags & IDE_DFLAG_NOPROBE) == 0 ||
 		    (drive->dev_flags & IDE_DFLAG_PRESENT)) {
@@ -732,7 +731,7 @@ static int ide_port_wait_ready(ide_hwif_t *hwif)
 	}
 out:
 	/* Exit function with master reselected (let's be sane) */
-	if (unit)
+	if (i)
 		SELECT_DRIVE(hwif->devices[0]);
 
 	return rc;
@@ -778,9 +777,10 @@ EXPORT_SYMBOL_GPL(ide_undecoded_slave);
 
 static int ide_probe_port(ide_hwif_t *hwif)
 {
+	ide_drive_t *drive;
 	unsigned long flags;
 	unsigned int irqd;
-	int unit, rc = -ENODEV;
+	int i, rc = -ENODEV;
 
 	BUG_ON(hwif->present);
 
@@ -806,9 +806,7 @@ static int ide_probe_port(ide_hwif_t *hwif)
 	 * Second drive should only exist if first drive was found,
 	 * but a lot of cdrom drives are configured as single slaves.
 	 */
-	for (unit = 0; unit < MAX_DRIVES; ++unit) {
-		ide_drive_t *drive = hwif->devices[unit];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		(void) probe_for_drive(drive);
 		if (drive->dev_flags & IDE_DFLAG_PRESENT)
 			rc = 0;
@@ -829,20 +827,17 @@ static int ide_probe_port(ide_hwif_t *hwif)
 static void ide_port_tune_devices(ide_hwif_t *hwif)
 {
 	const struct ide_port_ops *port_ops = hwif->port_ops;
-	int unit;
-
-	for (unit = 0; unit < MAX_DRIVES; unit++) {
-		ide_drive_t *drive = hwif->devices[unit];
+	ide_drive_t *drive;
+	int i;
 
+	ide_port_for_each_dev(i, drive, hwif) {
 		if (drive->dev_flags & IDE_DFLAG_PRESENT) {
 			if (port_ops && port_ops->quirkproc)
 				port_ops->quirkproc(drive);
 		}
 	}
 
-	for (unit = 0; unit < MAX_DRIVES; ++unit) {
-		ide_drive_t *drive = hwif->devices[unit];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		if (drive->dev_flags & IDE_DFLAG_PRESENT) {
 			ide_set_max_pio(drive);
 
@@ -853,9 +848,7 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
 		}
 	}
 
-	for (unit = 0; unit < MAX_DRIVES; ++unit) {
-		ide_drive_t *drive = hwif->devices[unit];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		if ((hwif->host_flags & IDE_HFLAG_NO_IO_32BIT) ||
 		    drive->id[ATA_ID_DWORD_IO])
 			drive->dev_flags |= IDE_DFLAG_NO_IO_32BIT;
@@ -927,12 +920,11 @@ static DEFINE_MUTEX(ide_cfg_mtx);
  */
 static int ide_port_setup_devices(ide_hwif_t *hwif)
 {
+	ide_drive_t *drive;
 	int i, j = 0;
 
 	mutex_lock(&ide_cfg_mtx);
-	for (i = 0; i < MAX_DRIVES; i++) {
-		ide_drive_t *drive = hwif->devices[i];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
 			continue;
 
@@ -1161,10 +1153,10 @@ out:
 
 static void hwif_register_devices(ide_hwif_t *hwif)
 {
+	ide_drive_t *drive;
 	unsigned int i;
 
-	for (i = 0; i < MAX_DRIVES; i++) {
-		ide_drive_t *drive = hwif->devices[i];
+	ide_port_for_each_dev(i, drive, hwif) {
 		struct device *dev = &drive->gendev;
 		int ret;
 
@@ -1187,11 +1179,10 @@ static void hwif_register_devices(ide_hwif_t *hwif)
 static void ide_port_init_devices(ide_hwif_t *hwif)
 {
 	const struct ide_port_ops *port_ops = hwif->port_ops;
+	ide_drive_t *drive;
 	int i;
 
-	for (i = 0; i < MAX_DRIVES; i++) {
-		ide_drive_t *drive = hwif->devices[i];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		drive->dn = i + hwif->channel * 2;
 
 		if (hwif->host_flags & IDE_HFLAG_IO_32BIT)
@@ -1282,16 +1273,16 @@ static const u8 ide_hwif_to_major[] =
 
 static void ide_port_init_devices_data(ide_hwif_t *hwif)
 {
-	int unit;
+	ide_drive_t *drive;
+	int i;
 
-	for (unit = 0; unit < MAX_DRIVES; ++unit) {
-		ide_drive_t *drive = hwif->devices[unit];
-		u8 j = (hwif->index * MAX_DRIVES) + unit;
+	ide_port_for_each_dev(i, drive, hwif) {
+		u8 j = (hwif->index * MAX_DRIVES) + i;
 
 		memset(drive, 0, sizeof(*drive));
 
 		drive->media			= ide_disk;
-		drive->select			= (unit << 4) | ATA_DEVICE_OBS;
+		drive->select			= (i << 4) | ATA_DEVICE_OBS;
 		drive->hwif			= hwif;
 		drive->ready_stat		= ATA_DRDY;
 		drive->bad_wstat		= BAD_W_STAT;
@@ -1387,10 +1378,11 @@ static void ide_free_port_slot(int idx)
 
 static void ide_port_free_devices(ide_hwif_t *hwif)
 {
+	ide_drive_t *drive;
 	int i;
 
-	for (i = 0; i < MAX_DRIVES; i++)
-		kfree(hwif->devices[i]);
+	ide_port_for_each_dev(i, drive, hwif)
+		kfree(drive);
 }
 
 static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
@@ -1478,9 +1470,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
 	ide_hwif_t *hwif, *mate = NULL;
 	int i, j = 0;
 
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
-		hwif = host->ports[i];
-
+	ide_host_for_each_port(i, hwif, host) {
 		if (hwif == NULL) {
 			mate = NULL;
 			continue;
@@ -1506,9 +1496,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
 		ide_port_init_devices(hwif);
 	}
 
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
-		hwif = host->ports[i];
-
+	ide_host_for_each_port(i, hwif, host) {
 		if (hwif == NULL)
 			continue;
 
@@ -1523,9 +1511,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
 			ide_port_tune_devices(hwif);
 	}
 
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
-		hwif = host->ports[i];
-
+	ide_host_for_each_port(i, hwif, host) {
 		if (hwif == NULL)
 			continue;
 
@@ -1550,9 +1536,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
 			ide_acpi_port_init_devices(hwif);
 	}
 
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
-		hwif = host->ports[i];
-
+	ide_host_for_each_port(i, hwif, host) {
 		if (hwif == NULL)
 			continue;
 
@@ -1560,9 +1544,7 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
 			hwif_register_devices(hwif);
 	}
 
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
-		hwif = host->ports[i];
-
+	ide_host_for_each_port(i, hwif, host) {
 		if (hwif == NULL)
 			continue;
 
@@ -1602,11 +1584,10 @@ EXPORT_SYMBOL_GPL(ide_host_add);
 
 static void __ide_port_unregister_devices(ide_hwif_t *hwif)
 {
+	ide_drive_t *drive;
 	int i;
 
-	for (i = 0; i < MAX_DRIVES; i++) {
-		ide_drive_t *drive = hwif->devices[i];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		if (drive->dev_flags & IDE_DFLAG_PRESENT) {
 			device_unregister(&drive->gendev);
 			wait_for_completion(&drive->gendev_rel_comp);
@@ -1675,9 +1656,7 @@ void ide_host_free(struct ide_host *host)
 	ide_hwif_t *hwif;
 	int i;
 
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
-		hwif = host->ports[i];
-
+	ide_host_for_each_port(i, hwif, host) {
 		if (hwif == NULL)
 			continue;
 
@@ -1692,11 +1671,12 @@ EXPORT_SYMBOL_GPL(ide_host_free);
 
 void ide_host_remove(struct ide_host *host)
 {
+	ide_hwif_t *hwif;
 	int i;
 
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
-		if (host->ports[i])
-			ide_unregister(host->ports[i]);
+	ide_host_for_each_port(i, hwif, host) {
+		if (hwif)
+			ide_unregister(hwif);
 	}
 
 	ide_host_free(host);
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index 1dc827fa7061..1d8978b3314a 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -593,14 +593,13 @@ EXPORT_SYMBOL(ide_proc_unregister_driver);
 
 void ide_proc_port_register_devices(ide_hwif_t *hwif)
 {
-	int	d;
 	struct proc_dir_entry *ent;
 	struct proc_dir_entry *parent = hwif->proc;
+	ide_drive_t *drive;
 	char name[64];
+	int i;
 
-	for (d = 0; d < MAX_DRIVES; d++) {
-		ide_drive_t *drive = hwif->devices[d];
-
+	ide_port_for_each_dev(i, drive, hwif) {
 		if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0 || drive->proc)
 			continue;
 
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 8a6f893d127e..258805da15c3 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -488,6 +488,7 @@ MODULE_PARM_DESC(ignore_cable, "ignore cable detection");
 
 void ide_port_apply_params(ide_hwif_t *hwif)
 {
+	ide_drive_t *drive;
 	int i;
 
 	if (ide_ignore_cable & (1 << hwif->index)) {
@@ -496,8 +497,8 @@ void ide_port_apply_params(ide_hwif_t *hwif)
 		hwif->cbl = ATA_CBL_PATA40_SHORT;
 	}
 
-	for (i = 0; i < MAX_DRIVES; i++)
-		ide_dev_apply_params(hwif->devices[i], i);
+	ide_port_for_each_dev(i, drive, hwif)
+		ide_dev_apply_params(drive, i);
 }
 
 /*
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 841164d7d3b9..5d53850c79d7 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -406,17 +406,17 @@ static int scc_dma_end(ide_drive_t *drive)
 			data_loss = 1;
 			if (retry++) {
 				struct request *rq = hwif->rq;
-				int unit;
+				ide_drive_t *drive;
+				int i;
+
 				/* ERROR_RESET and drive->crc_count are needed
 				 * to reduce DMA transfer mode in retry process.
 				 */
 				if (rq)
 					rq->errors |= ERROR_RESET;
-				for (unit = 0; unit < MAX_DRIVES; unit++) {
-					ide_drive_t *drive = hwif->devices[unit];
 
+				ide_port_for_each_dev(i, drive, hwif)
 					drive->crc_count++;
-				}
 			}
 		}
 	}
diff --git a/include/linux/ide.h b/include/linux/ide.h
index f00086b10be3..4cecd923fc79 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -753,7 +753,7 @@ typedef struct hwif_s {
 
 	unsigned long	sata_scr[SATA_NR_PORTS];
 
-	ide_drive_t	*devices[MAX_DRIVES];
+	ide_drive_t	*devices[MAX_DRIVES + 1];
 
 	u8 major;	/* our major number */
 	u8 index;	/* 0 for ide0; 1 for ide1; ... */
@@ -861,7 +861,7 @@ typedef struct hwif_s {
 #define MAX_HOST_PORTS 4
 
 struct ide_host {
-	ide_hwif_t	*ports[MAX_HOST_PORTS];
+	ide_hwif_t	*ports[MAX_HOST_PORTS + 1];
 	unsigned int	n_ports;
 	struct device	*dev[2];
 	unsigned int	(*init_chipset)(struct pci_dev *);
@@ -1604,4 +1604,11 @@ static inline ide_drive_t *ide_get_pair_dev(ide_drive_t *drive)
 
 	return (peer->dev_flags & IDE_DFLAG_PRESENT) ? peer : NULL;
 }
+
+#define ide_port_for_each_dev(i, dev, port) \
+	for ((i) = 0; ((dev) = (port)->devices[i]) || (i) < MAX_DRIVES; (i)++)
+
+#define ide_host_for_each_port(i, port, host) \
+	for ((i) = 0; ((port) = (host)->ports[i]) || (i) < MAX_HOST_PORTS; (i)++)
+
 #endif /* _IDE_H */
-- 
cgit v1.2.3


From d6251d4488a361c93da2398818e1ec69cffb6073 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <petkovbb@gmail.com>
Date: Tue, 6 Jan 2009 17:20:58 +0100
Subject: ide-cd: convert to ide-atapi facilities

... and remove no longer needed cdrom_start_packet_command and
cdrom_transfer_packet_command.

Tested lightly with ide-cd and ide-floppy.

Signed-off-by: Borislav Petkov <petkovbb@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-atapi.c |  5 ++-
 drivers/ide/ide-cd.c    | 96 ++-----------------------------------------------
 include/linux/ide.h     |  2 ++
 3 files changed, 8 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index d6a50d67b7db..e96c01260598 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -549,7 +549,10 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 	}
 
 	/* Set the interrupt routine */
-	ide_set_handler(drive, ide_pc_intr, timeout, expiry);
+	ide_set_handler(drive,
+			(dev_is_idecd(drive) ? drive->irq_handler
+					     : ide_pc_intr),
+			timeout, expiry);
 
 	/* Begin DMA, if necessary */
 	if (dev_is_idecd(drive)) {
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 2cb301dccd21..cae69372cf45 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -510,99 +510,6 @@ end_request:
 	return 1;
 }
 
-static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *);
-static ide_startstop_t cdrom_newpc_intr(ide_drive_t *);
-
-/*
- * Set up the device registers for transferring a packet command on DEV,
- * expecting to later transfer XFERLEN bytes.  HANDLER is the routine
- * which actually transfers the command to the drive.  If this is a
- * drq_interrupt device, this routine will arrange for HANDLER to be
- * called when the interrupt from the drive arrives.  Otherwise, HANDLER
- * will be called immediately after the drive is prepared for the transfer.
- */
-static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->rq;
-	int xferlen;
-
-	xferlen = ide_cd_get_xferlen(rq);
-
-	ide_debug_log(IDE_DBG_PC, "Call %s, xferlen: %d\n", __func__, xferlen);
-
-	/* FIXME: for Virtual DMA we must check harder */
-	if (drive->dma)
-		drive->dma = !hwif->dma_ops->dma_setup(drive);
-
-	/* set up the controller registers */
-	ide_pktcmd_tf_load(drive, IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL,
-			   xferlen, drive->dma);
-
-	if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) {
-		/* waiting for CDB interrupt, not DMA yet. */
-		if (drive->dma)
-			drive->waiting_for_dma = 0;
-
-		/* packet command */
-		ide_execute_command(drive, ATA_CMD_PACKET,
-				    cdrom_transfer_packet_command,
-				    ATAPI_WAIT_PC, ide_cd_expiry);
-		return ide_started;
-	} else {
-		ide_execute_pkt_cmd(drive);
-
-		return cdrom_transfer_packet_command(drive);
-	}
-}
-
-/*
- * Send a packet command to DRIVE described by CMD_BUF and CMD_LEN. The device
- * registers must have already been prepared by cdrom_start_packet_command.
- * HANDLER is the interrupt handler to call when the command completes or
- * there's data ready.
- */
-#define ATAPI_MIN_CDB_BYTES 12
-static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->rq;
-	int cmd_len;
-	ide_startstop_t startstop;
-
-	ide_debug_log(IDE_DBG_PC, "Call %s\n", __func__);
-
-	/* we must wait for DRQ to get set */
-	if (ide_wait_stat(&startstop, drive, ATA_DRQ, ATA_BUSY, WAIT_READY)) {
-		printk(KERN_ERR "%s: timeout while waiting for DRQ to assert\n",
-				drive->name);
-		return startstop;
-	}
-
-	if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) {
-		/* ok, next interrupt will be DMA interrupt */
-		if (drive->dma)
-			drive->waiting_for_dma = 1;
-	}
-
-	/* arm the interrupt handler */
-	ide_set_handler(drive, cdrom_newpc_intr, rq->timeout, ide_cd_expiry);
-
-	/* ATAPI commands get padded out to 12 bytes minimum */
-	cmd_len = COMMAND_SIZE(rq->cmd[0]);
-	if (cmd_len < ATAPI_MIN_CDB_BYTES)
-		cmd_len = ATAPI_MIN_CDB_BYTES;
-
-	/* start the DMA if need be */
-	if (drive->dma)
-		hwif->dma_ops->dma_start(drive);
-
-	/* send the command to the device */
-	hwif->tp_ops->output_data(drive, NULL, rq->cmd, cmd_len);
-
-	return ide_started;
-}
-
 /*
  * Check the contents of the interrupt reason register from the cdrom
  * and attempt to recover if there are problems.  Returns  0 if everything's
@@ -1174,7 +1081,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
 		return ide_stopped;
 	}
 
-	return cdrom_start_packet_command(drive);
+	return ide_issue_pc(drive);
 }
 
 /*
@@ -2072,6 +1979,7 @@ static int ide_cd_probe(ide_drive_t *drive)
 	}
 
 	drive->debug_mask = debug_mask;
+	drive->irq_handler = cdrom_newpc_intr;
 
 	info = kzalloc(sizeof(struct cdrom_info), GFP_KERNEL);
 	if (info == NULL) {
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 4cecd923fc79..13deba5e0157 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -654,6 +654,8 @@ struct ide_drive_s {
 	int  (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *,
 			      unsigned int, int);
 
+	ide_startstop_t (*irq_handler)(struct ide_drive_s *);
+
 	unsigned long atapi_flags;
 
 	struct ide_atapi_pc request_sense_pc;
-- 
cgit v1.2.3


From 906ef986a71d541a726550fa40dcbc5c356f810e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 6 Jan 2009 17:20:59 +0100
Subject: ide: struct ide_atapi_pc - remove unused fields and update
 documentation

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ide.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 13deba5e0157..a7dbfd857115 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -426,13 +426,9 @@ struct ide_atapi_pc {
 	struct idetape_bh *bh;
 	char *b_data;
 
-	/* idescsi only for now */
 	struct scatterlist *sg;
 	unsigned int sg_cnt;
 
-	struct scsi_cmnd *scsi_cmd;
-	void (*done) (struct scsi_cmnd *);
-
 	unsigned long timeout;
 };
 
-- 
cgit v1.2.3


From 94c96445f32c16cfdc398b20b7e78945ab7e35f9 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 6 Jan 2009 17:20:59 +0100
Subject: ide: remove unused ide_hwif_t.sg_mapped field

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ide.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index a7dbfd857115..ebc22a836520 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -817,7 +817,6 @@ typedef struct hwif_s {
 	unsigned	extra_ports;	/* number of extra dma ports */
 
 	unsigned	present    : 1;	/* this interface exists */
-	unsigned	sg_mapped  : 1;	/* sg_table and sg_nents are ready */
 	unsigned	busy	   : 1; /* serializes devices on a port */
 
 	struct device		gendev;
-- 
cgit v1.2.3


From 391ad1908a9c13d457ea12ce1508d6b8a7ba72ad Mon Sep 17 00:00:00 2001
From: Shane McDonald <mcdonald.shane@gmail.com>
Date: Tue, 6 Jan 2009 17:21:01 +0100
Subject: Resurrect IT8172 IDE controller driver

Support for the IT8172 IDE controller was removed from the kernel
sometime after 2.6.18.  Support for the only boards that used the IT8172
was removed from the kernel after 2.6.18, as they had never compiled
since 2.6.0.  However, there are a couple of platforms that use this
chip: the PMC-Sierra Xiao Hu thin-client computer, which is no longer
in production, and the Linksys NSS4000 Network Attached Storage box,
which is based on the Xiao Hu board.  I am attempting to add support
for the Xiao Hu to the kernel, and this IT8172 IDE controller is the
first bit of code in this effort.

This patch resurrects the IT8172 IDE controller code.  I began with
the 2.6.18 version of the it8172.c file, and have moved it forward so
that it works with the latest version of the kernel.  I have run this
driver on a PMC-Sierra Xiao Hu board with the 2.6.28 kernel, and
I have had no problems with it in my configuration.  The attached patch
applies cleanly against 2.6.28.

Signed-off-by: Shane McDonald <mcdonald.shane@gmail.com>
Acked-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Cc: alan@lxorguk.ukuu.org.uk
[bart: s/HWIF(drive)/drive->hwif/]
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/Kconfig     |   7 ++
 drivers/ide/Makefile    |   1 +
 drivers/ide/it8172.c    | 166 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h |   1 +
 4 files changed, 175 insertions(+)
 create mode 100644 drivers/ide/it8172.c

(limited to 'include/linux')

diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 4ee85fcf9aaf..3f9503867e6b 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -511,6 +511,13 @@ config BLK_DEV_PIIX
 	  This allows the kernel to change PIO, DMA and UDMA speeds and to
 	  configure the chip to optimum performance.
 
+config BLK_DEV_IT8172
+	tristate "IT8172 IDE support"
+	select BLK_DEV_IDEDMA_PCI
+	help
+	  This driver adds support for the IDE controller on the
+	  IT8172 System Controller.
+
 config BLK_DEV_IT8213
 	tristate "IT8213 IDE support"
 	select BLK_DEV_IDEDMA_PCI
diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index 410728992e6a..c2b9c93f0095 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_BLK_DEV_SC1200)		+= sc1200.o
 obj-$(CONFIG_BLK_DEV_CY82C693)		+= cy82c693.o
 obj-$(CONFIG_BLK_DEV_DELKIN)		+= delkin_cb.o
 obj-$(CONFIG_BLK_DEV_HPT366)		+= hpt366.o
+obj-$(CONFIG_BLK_DEV_IT8172)		+= it8172.o
 obj-$(CONFIG_BLK_DEV_IT8213)		+= it8213.o
 obj-$(CONFIG_BLK_DEV_IT821X)		+= it821x.o
 obj-$(CONFIG_BLK_DEV_JMICRON)		+= jmicron.o
diff --git a/drivers/ide/it8172.c b/drivers/ide/it8172.c
new file mode 100644
index 000000000000..e021078cd06b
--- /dev/null
+++ b/drivers/ide/it8172.c
@@ -0,0 +1,166 @@
+/*
+ *
+ * BRIEF MODULE DESCRIPTION
+ *      IT8172 IDE controller support
+ *
+ * Copyright (C) 2000 MontaVista Software Inc.
+ * Copyright (C) 2008 Shane McDonald
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
+ *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+ *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+ *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <linux/ide.h>
+#include <linux/init.h>
+
+#define DRV_NAME "IT8172"
+
+static void it8172_set_pio_mode(ide_drive_t *drive, const u8 pio)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct pci_dev *dev	= to_pci_dev(hwif->dev);
+	u16 drive_enables;
+	u32 drive_timing;
+
+	/*
+	 * The highest value of DIOR/DIOW pulse width and recovery time
+	 * that can be set in the IT8172 is 8 PCI clock cycles.  As a result,
+	 * it cannot be configured for PIO mode 0.  This table sets these
+	 * parameters to the maximum supported by the IT8172.
+	 */
+	static const u8 timings[] = { 0x3f, 0x3c, 0x1b, 0x12, 0x0a };
+
+	pci_read_config_word(dev, 0x40, &drive_enables);
+	pci_read_config_dword(dev, 0x44, &drive_timing);
+
+	/*
+	 * Enable port 0x44. The IT8172 spec is confused; it calls
+	 * this register the "Slave IDE Timing Register", but in fact,
+	 * it controls timing for both master and slave drives.
+	 */
+	drive_enables |= 0x4000;
+
+	drive_enables &= drive->dn ? 0xc006 : 0xc060;
+	if (drive->media == ide_disk)
+		/* enable prefetch */
+		drive_enables |= 0x0004 << (drive->dn * 4);
+	if (ata_id_has_iordy(drive->id))
+		/* enable IORDY sample-point */
+		drive_enables |= 0x0002 << (drive->dn * 4);
+
+	drive_timing &= drive->dn ? 0x00003f00 : 0x000fc000;
+	drive_timing |= timings[pio] << (drive->dn * 6 + 8);
+
+	pci_write_config_word(dev, 0x40, drive_enables);
+	pci_write_config_dword(dev, 0x44, drive_timing);
+}
+
+static void it8172_set_dma_mode(ide_drive_t *drive, const u8 speed)
+{
+	ide_hwif_t *hwif	= drive->hwif;
+	struct pci_dev *dev	= to_pci_dev(hwif->dev);
+	int a_speed		= 3 << (drive->dn * 4);
+	int u_flag		= 1 << drive->dn;
+	int u_speed		= 0;
+	u8 reg48, reg4a;
+
+	pci_read_config_byte(dev, 0x48, &reg48);
+	pci_read_config_byte(dev, 0x4a, &reg4a);
+
+	if (speed >= XFER_UDMA_0) {
+		u8 udma = speed - XFER_UDMA_0;
+		u_speed = udma << (drive->dn * 4);
+
+		pci_write_config_byte(dev, 0x48, reg48 | u_flag);
+		reg4a &= ~a_speed;
+		pci_write_config_byte(dev, 0x4a, reg4a | u_speed);
+	} else {
+		const u8 mwdma_to_pio[] = { 0, 3, 4 };
+		u8 pio;
+
+		pci_write_config_byte(dev, 0x48, reg48 & ~u_flag);
+		pci_write_config_byte(dev, 0x4a, reg4a & ~a_speed);
+
+		pio = mwdma_to_pio[speed - XFER_MW_DMA_0];
+
+		it8172_set_pio_mode(drive, pio);
+	}
+}
+
+
+static const struct ide_port_ops it8172_port_ops = {
+	.set_pio_mode	= it8172_set_pio_mode,
+	.set_dma_mode	= it8172_set_dma_mode,
+};
+
+static const struct ide_port_info it8172_port_info __devinitdata = {
+	.name		= DRV_NAME,
+	.port_ops	= &it8172_port_ops,
+	.enablebits	= { {0x41, 0x80, 0x80}, {0x00, 0x00, 0x00} },
+	.host_flags	= IDE_HFLAG_SINGLE,
+	.pio_mask	= ATA_PIO4 & ~ATA_PIO0,
+	.mwdma_mask	= ATA_MWDMA2,
+	.udma_mask	= ATA_UDMA2,
+};
+
+static int __devinit it8172_init_one(struct pci_dev *dev,
+					const struct pci_device_id *id)
+{
+	if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE)
+		return -ENODEV; /* IT8172 is more than an IDE controller */
+	return ide_pci_init_one(dev, &it8172_port_info, NULL);
+}
+
+static struct pci_device_id it8172_pci_tbl[] = {
+	{ PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8172), 0 },
+	{ 0, },
+};
+MODULE_DEVICE_TABLE(pci, it8172_pci_tbl);
+
+static struct pci_driver it8172_pci_driver = {
+	.name		= "IT8172_IDE",
+	.id_table	= it8172_pci_tbl,
+	.probe		= it8172_init_one,
+	.remove		= ide_pci_remove,
+	.suspend	= ide_pci_suspend,
+	.resume		= ide_pci_resume,
+};
+
+static int __init it8172_ide_init(void)
+{
+	return ide_pci_register_driver(&it8172_pci_driver);
+}
+
+static void __exit it8172_ide_exit(void)
+{
+	pci_unregister_driver(&it8172_pci_driver);
+}
+
+module_init(it8172_ide_init);
+module_exit(it8172_ide_exit);
+
+MODULE_AUTHOR("Steve Longerbeam");
+MODULE_DESCRIPTION("PCI driver module for ITE 8172 IDE");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 218c73b1e6d4..d543365518ab 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1658,6 +1658,7 @@
 #define PCI_VENDOR_ID_ROCKWELL		0x127A
 
 #define PCI_VENDOR_ID_ITE		0x1283
+#define PCI_DEVICE_ID_ITE_8172		0x8172
 #define PCI_DEVICE_ID_ITE_8211		0x8211
 #define PCI_DEVICE_ID_ITE_8212		0x8212
 #define PCI_DEVICE_ID_ITE_8213		0x8213
-- 
cgit v1.2.3


From 592b5315219881c6c0af4785f96456ad2043193a Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Tue, 6 Jan 2009 17:21:02 +0100
Subject: ide: move read_sff_dma_status() method to 'struct ide_dma_ops'

Move apparently misplaced read_sff_dma_status() method from 'struct ide_tp_ops'
to 'struct ide_dma_ops', renaming it to dma_sff_read_status() and making only
required for SFF-8038i compatible IDE controller drivers (greatly cutting down
the number of initializers) as its only user (outside ide-dma-sff.c and such
drivers) appears to be ide_pci_check_simplex() which is only called for such
controllers...

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/alim15x3.c     |  1 +
 drivers/ide/au1xxx-ide.c   |  1 -
 drivers/ide/cmd64x.c       |  3 +++
 drivers/ide/falconide.c    |  1 -
 drivers/ide/hpt366.c       |  3 +++
 drivers/ide/ide-dma-sff.c  | 22 +++++++++++++++++-----
 drivers/ide/ide-h8300.c    |  1 -
 drivers/ide/ide-iops.c     | 10 ----------
 drivers/ide/ide-probe.c    |  7 +++++--
 drivers/ide/it821x.c       |  1 +
 drivers/ide/ns87415.c      |  8 +++++---
 drivers/ide/pdc202xx_old.c |  2 ++
 drivers/ide/pmac.c         |  1 -
 drivers/ide/q40ide.c       |  1 -
 drivers/ide/sc1200.c       |  1 +
 drivers/ide/scc_pata.c     |  4 ++--
 drivers/ide/setup-pci.c    |  7 ++++---
 drivers/ide/sgiioc4.c      |  1 -
 drivers/ide/siimage.c      |  1 +
 drivers/ide/sl82c105.c     |  1 +
 drivers/ide/tc86c001.c     |  1 +
 drivers/ide/tx4939ide.c    | 21 ++++++++++++---------
 include/linux/ide.h        |  8 ++++++--
 23 files changed, 65 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
index f1c817240501..66f43083408b 100644
--- a/drivers/ide/alim15x3.c
+++ b/drivers/ide/alim15x3.c
@@ -509,6 +509,7 @@ static const struct ide_dma_ops ali_dma_ops = {
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_port_info ali15x3_chipset __devinitdata = {
diff --git a/drivers/ide/au1xxx-ide.c b/drivers/ide/au1xxx-ide.c
index 11f2c8f3db48..79a2dfed8eb7 100644
--- a/drivers/ide/au1xxx-ide.c
+++ b/drivers/ide/au1xxx-ide.c
@@ -502,7 +502,6 @@ static const struct ide_tp_ops au1xxx_tp_ops = {
 	.exec_command		= ide_exec_command,
 	.read_status		= ide_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= ide_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c
index 265cf9268c63..2f9688d87ecd 100644
--- a/drivers/ide/cmd64x.c
+++ b/drivers/ide/cmd64x.c
@@ -385,6 +385,7 @@ static const struct ide_dma_ops cmd64x_dma_ops = {
 	.dma_test_irq		= cmd64x_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_dma_ops cmd646_rev1_dma_ops = {
@@ -396,6 +397,7 @@ static const struct ide_dma_ops cmd646_rev1_dma_ops = {
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_dma_ops cmd648_dma_ops = {
@@ -407,6 +409,7 @@ static const struct ide_dma_ops cmd648_dma_ops = {
 	.dma_test_irq		= cmd648_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_port_info cmd64x_chipsets[] __devinitdata = {
diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c
index 39d500d84b07..a5ba820d69bb 100644
--- a/drivers/ide/falconide.c
+++ b/drivers/ide/falconide.c
@@ -70,7 +70,6 @@ static const struct ide_tp_ops falconide_tp_ops = {
 	.exec_command		= ide_exec_command,
 	.read_status		= ide_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= ide_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index 208614f4dc3c..3eb9b5c63a0f 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -1424,6 +1424,7 @@ static const struct ide_dma_ops hpt37x_dma_ops = {
 	.dma_test_irq		= hpt374_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_dma_ops hpt370_dma_ops = {
@@ -1435,6 +1436,7 @@ static const struct ide_dma_ops hpt370_dma_ops = {
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= hpt370_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_dma_ops hpt36x_dma_ops = {
@@ -1446,6 +1448,7 @@ static const struct ide_dma_ops hpt36x_dma_ops = {
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_lost_irq		= hpt366_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_port_info hpt366_chipsets[] __devinitdata = {
diff --git a/drivers/ide/ide-dma-sff.c b/drivers/ide/ide-dma-sff.c
index 623a82d1535d..bcdadc777564 100644
--- a/drivers/ide/ide-dma-sff.c
+++ b/drivers/ide/ide-dma-sff.c
@@ -50,6 +50,17 @@ int config_drive_for_dma(ide_drive_t *drive)
 	return 0;
 }
 
+u8 ide_dma_sff_read_status(ide_hwif_t *hwif)
+{
+	unsigned long addr = hwif->dma_base + ATA_DMA_STATUS;
+
+	if (hwif->host_flags & IDE_HFLAG_MMIO)
+		return readb((void __iomem *)addr);
+	else
+		return inb(addr);
+}
+EXPORT_SYMBOL_GPL(ide_dma_sff_read_status);
+
 /**
  *	ide_dma_host_set	-	Enable/disable DMA on a host
  *	@drive: drive to control
@@ -62,7 +73,7 @@ void ide_dma_host_set(ide_drive_t *drive, int on)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	u8 unit = drive->dn & 1;
-	u8 dma_stat = hwif->tp_ops->read_sff_dma_status(hwif);
+	u8 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
 
 	if (on)
 		dma_stat |= (1 << (5 + unit));
@@ -200,7 +211,7 @@ int ide_dma_setup(ide_drive_t *drive)
 		outb(reading, hwif->dma_base + ATA_DMA_CMD);
 
 	/* read DMA status for INTR & ERROR flags */
-	dma_stat = hwif->tp_ops->read_sff_dma_status(hwif);
+	dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
 
 	/* clear INTR & ERROR flags */
 	if (mmio)
@@ -232,7 +243,7 @@ EXPORT_SYMBOL_GPL(ide_dma_setup);
 static int dma_timer_expiry(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	u8 dma_stat = hwif->tp_ops->read_sff_dma_status(hwif);
+	u8 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
 
 	printk(KERN_WARNING "%s: %s: DMA status (0x%02x)\n",
 		drive->name, __func__, dma_stat);
@@ -305,7 +316,7 @@ int ide_dma_end(ide_drive_t *drive)
 	}
 
 	/* get DMA status */
-	dma_stat = hwif->tp_ops->read_sff_dma_status(hwif);
+	dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
 
 	if (mmio)
 		/* clear the INTR & ERROR bits */
@@ -331,7 +342,7 @@ EXPORT_SYMBOL_GPL(ide_dma_end);
 int ide_dma_test_irq(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
-	u8 dma_stat = hwif->tp_ops->read_sff_dma_status(hwif);
+	u8 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
 
 	return (dma_stat & ATA_DMA_INTR) ? 1 : 0;
 }
@@ -346,5 +357,6 @@ const struct ide_dma_ops sff_dma_ops = {
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_timeout		= ide_dma_timeout,
 	.dma_lost_irq		= ide_dma_lost_irq,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 EXPORT_SYMBOL_GPL(sff_dma_ops);
diff --git a/drivers/ide/ide-h8300.c b/drivers/ide/ide-h8300.c
index e2cdd2e9cdec..9270d3255ee0 100644
--- a/drivers/ide/ide-h8300.c
+++ b/drivers/ide/ide-h8300.c
@@ -159,7 +159,6 @@ static const struct ide_tp_ops h8300_tp_ops = {
 	.exec_command		= ide_exec_command,
 	.read_status		= ide_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= ide_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 1a22c31ce7ac..e728cfe7273f 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -105,15 +105,6 @@ u8 ide_read_altstatus(ide_hwif_t *hwif)
 }
 EXPORT_SYMBOL_GPL(ide_read_altstatus);
 
-u8 ide_read_sff_dma_status(ide_hwif_t *hwif)
-{
-	if (hwif->host_flags & IDE_HFLAG_MMIO)
-		return readb((void __iomem *)(hwif->dma_base + ATA_DMA_STATUS));
-	else
-		return inb(hwif->dma_base + ATA_DMA_STATUS);
-}
-EXPORT_SYMBOL_GPL(ide_read_sff_dma_status);
-
 void ide_set_irq(ide_hwif_t *hwif, int on)
 {
 	u8 ctl = ATA_DEVCTL_OBS;
@@ -388,7 +379,6 @@ const struct ide_tp_ops default_tp_ops = {
 	.exec_command		= ide_exec_command,
 	.read_status		= ide_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= ide_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index ebb1b7f863f4..0ccbb4459fb9 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1229,6 +1229,8 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
 	if ((d->host_flags & IDE_HFLAG_NO_DMA) == 0) {
 		int rc;
 
+		hwif->dma_ops = d->dma_ops;
+
 		if (d->init_dma)
 			rc = d->init_dma(hwif, d);
 		else
@@ -1236,12 +1238,13 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
 
 		if (rc < 0) {
 			printk(KERN_INFO "%s: DMA disabled\n", hwif->name);
+
+			hwif->dma_ops = NULL;
 			hwif->dma_base = 0;
 			hwif->swdma_mask = 0;
 			hwif->mwdma_mask = 0;
 			hwif->ultra_mask = 0;
-		} else if (d->dma_ops)
-			hwif->dma_ops = d->dma_ops;
+		}
 	}
 
 	if ((d->host_flags & IDE_HFLAG_SERIALIZE) ||
diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c
index 879056645352..0be27ac1f077 100644
--- a/drivers/ide/it821x.c
+++ b/drivers/ide/it821x.c
@@ -512,6 +512,7 @@ static struct ide_dma_ops it821x_pass_through_dma_ops = {
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_timeout		= ide_dma_timeout,
 	.dma_lost_irq		= ide_dma_lost_irq,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 /**
diff --git a/drivers/ide/ns87415.c b/drivers/ide/ns87415.c
index aceb2fcbe1d1..83643ed9a426 100644
--- a/drivers/ide/ns87415.c
+++ b/drivers/ide/ns87415.c
@@ -56,7 +56,7 @@ static u8 superio_read_status(ide_hwif_t *hwif)
 	return superio_ide_inb(hwif->io_ports.status_addr);
 }
 
-static u8 superio_read_sff_dma_status(ide_hwif_t *hwif)
+static u8 superio_dma_sff_read_status(ide_hwif_t *hwif)
 {
 	return superio_ide_inb(hwif->dma_base + ATA_DMA_STATUS);
 }
@@ -109,7 +109,6 @@ static const struct ide_tp_ops superio_tp_ops = {
 	.exec_command		= ide_exec_command,
 	.read_status		= superio_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= superio_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
@@ -132,6 +131,8 @@ static void __devinit superio_init_iops(struct hwif_s *hwif)
 	tmp = superio_ide_inb(dma_stat);
 	outb(tmp | 0x66, dma_stat);
 }
+#else
+#define superio_dma_sff_read_status ide_dma_sff_read_status
 #endif
 
 static unsigned int ns87415_count = 0, ns87415_control[MAX_HWIFS] = { 0 };
@@ -201,7 +202,7 @@ static int ns87415_dma_end(ide_drive_t *drive)
 	u8 dma_stat = 0, dma_cmd = 0;
 
 	drive->waiting_for_dma = 0;
-	dma_stat = hwif->tp_ops->read_sff_dma_status(hwif);
+	dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
 	/* get DMA command mode */
 	dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD);
 	/* stop DMA */
@@ -308,6 +309,7 @@ static const struct ide_dma_ops ns87415_dma_ops = {
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= superio_dma_sff_read_status,
 };
 
 static const struct ide_port_info ns87415_chipset __devinitdata = {
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
index e8e6b29d9e41..97193323aebf 100644
--- a/drivers/ide/pdc202xx_old.c
+++ b/drivers/ide/pdc202xx_old.c
@@ -337,6 +337,7 @@ static const struct ide_dma_ops pdc20246_dma_ops = {
 	.dma_test_irq		= pdc202xx_dma_test_irq,
 	.dma_lost_irq		= pdc202xx_dma_lost_irq,
 	.dma_timeout		= pdc202xx_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_dma_ops pdc2026x_dma_ops = {
@@ -348,6 +349,7 @@ static const struct ide_dma_ops pdc2026x_dma_ops = {
 	.dma_test_irq		= pdc202xx_dma_test_irq,
 	.dma_lost_irq		= pdc202xx_dma_lost_irq,
 	.dma_timeout		= pdc202xx_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 #define DECLARE_PDC2026X_DEV(udma, sectors) \
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index ee52a21af1be..74625e821a43 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -955,7 +955,6 @@ static const struct ide_tp_ops pmac_tp_ops = {
 	.exec_command		= pmac_exec_command,
 	.read_status		= ide_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= ide_read_sff_dma_status,
 
 	.set_irq		= pmac_set_irq,
 
diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c
index 4af4a8ce4cdf..9f9c0b3cc3a3 100644
--- a/drivers/ide/q40ide.c
+++ b/drivers/ide/q40ide.c
@@ -99,7 +99,6 @@ static const struct ide_tp_ops q40ide_tp_ops = {
 	.exec_command		= ide_exec_command,
 	.read_status		= ide_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= ide_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c
index 1cf477aaae36..dbdd2985a0d8 100644
--- a/drivers/ide/sc1200.c
+++ b/drivers/ide/sc1200.c
@@ -292,6 +292,7 @@ static const struct ide_dma_ops sc1200_dma_ops = {
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_port_info sc1200_chipset __devinitdata = {
diff --git a/drivers/ide/scc_pata.c b/drivers/ide/scc_pata.c
index 5d53850c79d7..1cb43068455a 100644
--- a/drivers/ide/scc_pata.c
+++ b/drivers/ide/scc_pata.c
@@ -143,7 +143,7 @@ static u8 scc_read_altstatus(ide_hwif_t *hwif)
 	return (u8)in_be32((void *)hwif->io_ports.ctl_addr);
 }
 
-static u8 scc_read_sff_dma_status(ide_hwif_t *hwif)
+static u8 scc_dma_sff_read_status(ide_hwif_t *hwif)
 {
 	return (u8)in_be32((void *)(hwif->dma_base + 4));
 }
@@ -853,7 +853,6 @@ static const struct ide_tp_ops scc_tp_ops = {
 	.exec_command		= scc_exec_command,
 	.read_status		= scc_read_status,
 	.read_altstatus		= scc_read_altstatus,
-	.read_sff_dma_status	= scc_read_sff_dma_status,
 
 	.set_irq		= scc_set_irq,
 
@@ -880,6 +879,7 @@ static const struct ide_dma_ops scc_dma_ops = {
 	.dma_test_irq		= scc_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= scc_dma_sff_read_status,
 };
 
 #define DECLARE_SCC_DEV(name_str)			\
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index bc37dff8c675..e85d1ed29c2a 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -130,7 +130,7 @@ int ide_pci_check_simplex(ide_hwif_t *hwif, const struct ide_port_info *d)
 	 * we tune the drive then try to grab DMA ownership if we want to be
 	 * the DMA end.  This has to be become dynamic to handle hot-plug.
 	 */
-	dma_stat = hwif->tp_ops->read_sff_dma_status(hwif);
+	dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
 	if ((dma_stat & 0x80) && hwif->mate && hwif->mate->dma_base) {
 		printk(KERN_INFO "%s %s: simplex device: DMA disabled\n",
 			d->name, pci_name(dev));
@@ -377,6 +377,9 @@ int ide_hwif_setup_dma(ide_hwif_t *hwif, const struct ide_port_info *d)
 
 		hwif->dma_base = base;
 
+		if (hwif->dma_ops == NULL)
+			hwif->dma_ops = &sff_dma_ops;
+
 		if (ide_pci_check_simplex(hwif, d) < 0)
 			return -1;
 
@@ -393,8 +396,6 @@ int ide_hwif_setup_dma(ide_hwif_t *hwif, const struct ide_port_info *d)
 
 		if (ide_allocate_dma_engine(hwif))
 			return -1;
-
-		hwif->dma_ops = &sff_dma_ops;
 	}
 
 	return 0;
diff --git a/drivers/ide/sgiioc4.c b/drivers/ide/sgiioc4.c
index 8e1ffd57a86d..fdb9d7037694 100644
--- a/drivers/ide/sgiioc4.c
+++ b/drivers/ide/sgiioc4.c
@@ -523,7 +523,6 @@ static const struct ide_tp_ops sgiioc4_tp_ops = {
 	.exec_command		= ide_exec_command,
 	.read_status		= sgiioc4_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= ide_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index 652b3a04620f..cb2b352b876b 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -717,6 +717,7 @@ static const struct ide_dma_ops sil_dma_ops = {
 	.dma_test_irq		= siimage_dma_test_irq,
 	.dma_timeout		= ide_dma_timeout,
 	.dma_lost_irq		= ide_dma_lost_irq,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 #define DECLARE_SII_DEV(p_ops)				\
diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c
index 1ded01d81ab3..48cc748c5043 100644
--- a/drivers/ide/sl82c105.c
+++ b/drivers/ide/sl82c105.c
@@ -299,6 +299,7 @@ static const struct ide_dma_ops sl82c105_dma_ops = {
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_lost_irq		= sl82c105_dma_lost_irq,
 	.dma_timeout		= sl82c105_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_port_info sl82c105_chipset __devinitdata = {
diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c
index d2c00fb928e4..84109f5a1632 100644
--- a/drivers/ide/tc86c001.c
+++ b/drivers/ide/tc86c001.c
@@ -188,6 +188,7 @@ static const struct ide_dma_ops tc86c001_dma_ops = {
 	.dma_test_irq		= ide_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= ide_dma_sff_read_status,
 };
 
 static const struct ide_port_info tc86c001_chipset __devinitdata = {
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
index 1ac27ac7283b..882f6f07c476 100644
--- a/drivers/ide/tx4939ide.c
+++ b/drivers/ide/tx4939ide.c
@@ -397,6 +397,17 @@ static int tx4939ide_dma_test_irq(ide_drive_t *drive)
 	return found;
 }
 
+#ifdef __BIG_ENDIAN
+static u8 tx4939ide_dma_sff_read_status(ide_hwif_t *hwif)
+{
+	void __iomem *base = TX4939IDE_BASE(hwif);
+
+	return tx4939ide_readb(base, TX4939IDE_DMA_Stat);
+}
+#else
+#define tx4939ide_dma_sff_read_status ide_dma_sff_read_status
+#endif
+
 static void tx4939ide_init_hwif(ide_hwif_t *hwif)
 {
 	void __iomem *base = TX4939IDE_BASE(hwif);
@@ -443,13 +454,6 @@ static void tx4939ide_tf_load_fixup(ide_drive_t *drive, ide_task_t *task)
 
 #ifdef __BIG_ENDIAN
 
-static u8 tx4939ide_read_sff_dma_status(ide_hwif_t *hwif)
-{
-	void __iomem *base = TX4939IDE_BASE(hwif);
-
-	return tx4939ide_readb(base, TX4939IDE_DMA_Stat);
-}
-
 /* custom iops (independent from SWAP_IO_SPACE) */
 static u8 tx4939ide_inb(unsigned long port)
 {
@@ -585,7 +589,6 @@ static const struct ide_tp_ops tx4939ide_tp_ops = {
 	.exec_command		= ide_exec_command,
 	.read_status		= ide_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= tx4939ide_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
@@ -609,7 +612,6 @@ static const struct ide_tp_ops tx4939ide_tp_ops = {
 	.exec_command		= ide_exec_command,
 	.read_status		= ide_read_status,
 	.read_altstatus		= ide_read_altstatus,
-	.read_sff_dma_status	= ide_read_sff_dma_status,
 
 	.set_irq		= ide_set_irq,
 
@@ -638,6 +640,7 @@ static const struct ide_dma_ops tx4939ide_dma_ops = {
 	.dma_test_irq		= tx4939ide_dma_test_irq,
 	.dma_lost_irq		= ide_dma_lost_irq,
 	.dma_timeout		= ide_dma_timeout,
+	.dma_sff_read_status	= tx4939ide_dma_sff_read_status,
 };
 
 static const struct ide_port_info tx4939ide_port_info __initdata = {
diff --git a/include/linux/ide.h b/include/linux/ide.h
index ebc22a836520..3644f6323384 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -674,7 +674,6 @@ struct ide_tp_ops {
 	void	(*exec_command)(struct hwif_s *, u8);
 	u8	(*read_status)(struct hwif_s *);
 	u8	(*read_altstatus)(struct hwif_s *);
-	u8	(*read_sff_dma_status)(struct hwif_s *);
 
 	void	(*set_irq)(struct hwif_s *, int);
 
@@ -735,6 +734,11 @@ struct ide_dma_ops {
 	int	(*dma_test_irq)(struct ide_drive_s *);
 	void	(*dma_lost_irq)(struct ide_drive_s *);
 	void	(*dma_timeout)(struct ide_drive_s *);
+	/*
+	 * The following method is optional and only required to be
+	 * implemented for the SFF-8038i compatible controllers.
+	 */
+	u8	(*dma_sff_read_status)(struct hwif_s *);
 };
 
 struct ide_host;
@@ -1177,7 +1181,6 @@ void ide_tf_dump(const char *, struct ide_taskfile *);
 void ide_exec_command(ide_hwif_t *, u8);
 u8 ide_read_status(ide_hwif_t *);
 u8 ide_read_altstatus(ide_hwif_t *);
-u8 ide_read_sff_dma_status(ide_hwif_t *);
 
 void ide_set_irq(ide_hwif_t *, int);
 
@@ -1458,6 +1461,7 @@ void ide_dma_exec_cmd(ide_drive_t *, u8);
 extern void ide_dma_start(ide_drive_t *);
 int ide_dma_end(ide_drive_t *);
 int ide_dma_test_irq(ide_drive_t *);
+u8 ide_dma_sff_read_status(ide_hwif_t *);
 extern const struct ide_dma_ops sff_dma_ops;
 #else
 static inline int config_drive_for_dma(ide_drive_t *drive) { return 0; }
-- 
cgit v1.2.3


From 548eaca46b3cf4419b6c2be839a106d8641ffb70 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 20 Oct 2008 17:48:43 -0400
Subject: nfsd: document new filehandle fsid types

Descriptions taken from mountd code (in nfs-utils/utils/mountd/cache.c).

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfsd/nfsfh.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index d1941cb965e9..b2e093870bc6 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -68,6 +68,10 @@ struct nfs_fhbase_old {
  *     1  - 4 byte user specified identifier
  *     2  - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED
  *     3  - 4 byte device id, encoded for user-space, 4 byte inode number
+ *     4  - 4 byte inode number and 4 byte uuid
+ *     5  - 8 byte uuid
+ *     6  - 16 byte uuid
+ *     7  - 8 byte inode number and 16 byte uuid
  *
  * The fileid_type identified how the file within the filesystem is encoded.
  * This is (will be) passed to, and set by, the underlying filesystem if it supports
-- 
cgit v1.2.3


From c9233eb7b0b11ef176d4bf68da2ce85464b6ec39 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 20 Oct 2008 11:51:57 -0400
Subject: sunrpc: add sv_maxconn field to svc_serv (try #3)

svc_check_conn_limits() attempts to prevent denial of service attacks
by having the service close old connections once it reaches a
threshold. This threshold is based on the number of threads in the
service:

	(serv->sv_nrthreads + 3) * 20

Once we reach this, we drop the oldest connections and a printk pops
to warn the admin that they should increase the number of threads.

Increasing the number of threads isn't an option however for services
like lockd. We don't want to eliminate this check entirely for such
services but we need some way to increase this limit.

This patch adds a sv_maxconn field to the svc_serv struct. When it's
set to 0, we use the current method to calculate the max number of
connections. RPC services can then set this on an as-needed basis.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/svc.h |  5 ++++-
 net/sunrpc/svc_xprt.c      | 22 ++++++++++++++++------
 2 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 3afe7fb403b2..3435d24bfe55 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -58,10 +58,13 @@ struct svc_serv {
 	struct svc_stat *	sv_stats;	/* RPC statistics */
 	spinlock_t		sv_lock;
 	unsigned int		sv_nrthreads;	/* # of server threads */
+	unsigned int		sv_maxconn;	/* max connections allowed or
+						 * '0' causing max to be based
+						 * on number of threads. */
+
 	unsigned int		sv_max_payload;	/* datagram payload size */
 	unsigned int		sv_max_mesg;	/* max_payload + 1 page for overheads */
 	unsigned int		sv_xdrsize;	/* XDR buffer size */
-
 	struct list_head	sv_permsocks;	/* all permanent sockets */
 	struct list_head	sv_tempsocks;	/* all temporary sockets */
 	int			sv_tmpcnt;	/* count of temporary sockets */
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index bf5b5cdafebf..3fe4f1004278 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -515,8 +515,10 @@ int svc_port_is_privileged(struct sockaddr *sin)
 }
 
 /*
- * Make sure that we don't have too many active connections.  If we
- * have, something must be dropped.
+ * Make sure that we don't have too many active connections. If we have,
+ * something must be dropped. It's not clear what will happen if we allow
+ * "too many" connections, but when dealing with network-facing software,
+ * we have to code defensively. Here we do that by imposing hard limits.
  *
  * There's no point in trying to do random drop here for DoS
  * prevention. The NFS clients does 1 reconnect in 15 seconds. An
@@ -525,19 +527,27 @@ int svc_port_is_privileged(struct sockaddr *sin)
  * The only somewhat efficient mechanism would be if drop old
  * connections from the same IP first. But right now we don't even
  * record the client IP in svc_sock.
+ *
+ * single-threaded services that expect a lot of clients will probably
+ * need to set sv_maxconn to override the default value which is based
+ * on the number of threads
  */
 static void svc_check_conn_limits(struct svc_serv *serv)
 {
-	if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) {
+	unsigned int limit = serv->sv_maxconn ? serv->sv_maxconn :
+				(serv->sv_nrthreads+3) * 20;
+
+	if (serv->sv_tmpcnt > limit) {
 		struct svc_xprt *xprt = NULL;
 		spin_lock_bh(&serv->sv_lock);
 		if (!list_empty(&serv->sv_tempsocks)) {
 			if (net_ratelimit()) {
 				/* Try to help the admin */
 				printk(KERN_NOTICE "%s: too many open  "
-				       "connections, consider increasing the "
-				       "number of nfsd threads\n",
-				       serv->sv_name);
+				       "connections, consider increasing %s\n",
+				       serv->sv_name, serv->sv_maxconn ?
+				       "the max number of connections." :
+				       "the number of threads.");
 			}
 			/*
 			 * Always select the oldest connection. It's not fair,
-- 
cgit v1.2.3


From 7538ce1eb656a1477bedd5b1c202226e7abf5e7b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Dec 2008 14:19:45 -0500
Subject: NLM: Use modern style for pointer fields in nlm_host

Clean up: I'm about to add another "char *" field to the nlm_host
structure.  The h_name field, for example, uses an older style of
declaring a "char *" field.  If I match that style for the new field,
checkpatch.pl will complain.

So, fix pointer fields to use the new style.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 23da3fa69efa..3dbdd353156c 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -43,8 +43,8 @@ struct nlm_host {
 	struct sockaddr_storage	h_addr;		/* peer address */
 	size_t			h_addrlen;
 	struct sockaddr_storage	h_srcaddr;	/* our address (optional) */
-	struct rpc_clnt	*	h_rpcclnt;	/* RPC client to talk to peer */
-	char *			h_name;		/* remote hostname */
+	struct rpc_clnt		*h_rpcclnt;	/* RPC client to talk to peer */
+	char			*h_name;		/* remote hostname */
 	u32			h_version;	/* interface version */
 	unsigned short		h_proto;	/* transport proto */
 	unsigned short		h_reclaiming : 1,
@@ -64,7 +64,7 @@ struct nlm_host {
 	spinlock_t		h_lock;
 	struct list_head	h_granted;	/* Locks in GRANTED state */
 	struct list_head	h_reclaim;	/* Locks in RECLAIM state */
-	struct nsm_handle *	h_nsmhandle;	/* NSM status handle */
+	struct nsm_handle	*h_nsmhandle;	/* NSM status handle */
 
 	char			h_addrbuf[48],	/* address eyecatchers */
 				h_srcaddrbuf[48];
-- 
cgit v1.2.3


From 1df40b609ad5a622904eb652109c287fe9c93ec5 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Dec 2008 14:19:53 -0500
Subject: NLM: Remove address eye-catcher buffers from nlm_host

The h_name field in struct nlm_host is a just copy of
h_nsmhandle->sm_name.  Likewise, the contents of the h_addrbuf field
should be identical to the sm_addrbuf field.

The h_srcaddrbuf field is used only in one place for debugging.  We can
live without this until we get %pI formatting for printk().

Currently these buffers are 48 bytes, but we need to support scope IDs
in IPv6 presentation addresses, which means making the buffers even
larger.  Instead, let's find ways to eliminate them to save space.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/host.c             | 10 +++-------
 include/linux/lockd/lockd.h |  4 +---
 2 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index abdebf76b820..33bf67af7aba 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -206,6 +206,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
 		goto out;
 	}
 	host->h_name	   = nsm->sm_name;
+	host->h_addrbuf    = nsm->sm_addrbuf;
 	memcpy(nlm_addr(host), ni->sap, ni->salen);
 	host->h_addrlen = ni->salen;
 	nlm_clear_port(nlm_addr(host));
@@ -232,11 +233,6 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
 
 	nrhosts++;
 
-	nlm_display_address((struct sockaddr *)&host->h_addr,
-				host->h_addrbuf, sizeof(host->h_addrbuf));
-	nlm_display_address((struct sockaddr *)&host->h_srcaddr,
-				host->h_srcaddrbuf, sizeof(host->h_srcaddrbuf));
-
 	dprintk("lockd: nlm_lookup_host created host %s\n",
 			host->h_name);
 
@@ -378,8 +374,8 @@ nlm_bind_host(struct nlm_host *host)
 {
 	struct rpc_clnt	*clnt;
 
-	dprintk("lockd: nlm_bind_host %s (%s), my addr=%s\n",
-			host->h_name, host->h_addrbuf, host->h_srcaddrbuf);
+	dprintk("lockd: nlm_bind_host %s (%s)\n",
+			host->h_name, host->h_addrbuf);
 
 	/* Lock host handle */
 	mutex_lock(&host->h_mutex);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 3dbdd353156c..dae22cb4c38d 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -65,9 +65,7 @@ struct nlm_host {
 	struct list_head	h_granted;	/* Locks in GRANTED state */
 	struct list_head	h_reclaim;	/* Locks in RECLAIM state */
 	struct nsm_handle	*h_nsmhandle;	/* NSM status handle */
-
-	char			h_addrbuf[48],	/* address eyecatchers */
-				h_srcaddrbuf[48];
+	char			*h_addrbuf;	/* address eyecatcher */
 };
 
 struct nsm_handle {
-- 
cgit v1.2.3


From bc995801a09d1fead0bec1356bfd836911c8eed7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Dec 2008 14:20:08 -0500
Subject: NLM: Support IPv6 scope IDs in nlm_display_address()

Scope ID support is needed since the kernel's NSM implementation is
about to use these displayed addresses as a mon_name in some cases.

When nsm_use_hostnames is zero, without scope ID support NSM will fail
to handle peers that contact us via a link-local address.  Link-local
addresses do not work without an interface ID, which is stored in the
sockaddr's sin6_scope_id field.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/host.c             | 21 +++++++++++++++------
 include/linux/lockd/lockd.h | 10 +++++++++-
 2 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index beb5da810167..012e49aaecd1 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -105,22 +105,31 @@ static void nlm_clear_port(struct sockaddr *sap)
 	}
 }
 
+static void nlm_display_ipv6_address(const struct sockaddr *sap, char *buf,
+				     const size_t len)
+{
+	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+
+	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
+		snprintf(buf, len, "%pI4", &sin6->sin6_addr.s6_addr32[3]);
+	else if (sin6->sin6_scope_id != 0)
+		snprintf(buf, len, "%pI6%%%u", &sin6->sin6_addr,
+				sin6->sin6_scope_id);
+	else
+		snprintf(buf, len, "%pI6", &sin6->sin6_addr);
+}
+
 static void nlm_display_address(const struct sockaddr *sap,
 				char *buf, const size_t len)
 {
 	const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
-	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
 
 	switch (sap->sa_family) {
 	case AF_INET:
 		snprintf(buf, len, "%pI4", &sin->sin_addr.s_addr);
 		break;
 	case AF_INET6:
-		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
-			snprintf(buf, len, "%pI4",
-				 &sin6->sin6_addr.s6_addr32[3]);
-		else
-			snprintf(buf, len, "%pI6", &sin6->sin6_addr);
+		nlm_display_ipv6_address(sap, buf, len);
 		break;
 	default:
 		snprintf(buf, len, "unsupported address family");
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index dae22cb4c38d..80a0a2cff2b8 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -68,6 +68,14 @@ struct nlm_host {
 	char			*h_addrbuf;	/* address eyecatcher */
 };
 
+/*
+ * The largest string sm_addrbuf should hold is a full-size IPv6 address
+ * (no "::" anywhere) with a scope ID.  The buffer size is computed to
+ * hold eight groups of colon-separated four-hex-digit numbers, a
+ * percent sign, a scope id (at most 32 bits, in decimal), and NUL.
+ */
+#define NSM_ADDRBUF		((8 * 4 + 7) + (1 + 10) + 1)
+
 struct nsm_handle {
 	struct list_head	sm_link;
 	atomic_t		sm_count;
@@ -76,7 +84,7 @@ struct nsm_handle {
 	size_t			sm_addrlen;
 	unsigned int		sm_monitored : 1,
 				sm_sticky : 1;	/* don't unmonitor */
-	char			sm_addrbuf[48];	/* address eyecatcher */
+	char			sm_addrbuf[NSM_ADDRBUF];
 };
 
 /*
-- 
cgit v1.2.3


From f47534f7f0ac7727e05ec4274b764b181df2cf7f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Dec 2008 14:20:38 -0500
Subject: NSM: Use modern style for sm_name field in nsm_handle

Clean up: I'm about to add another "char *" field to the nsm_handle
structure.  The sm_name field uses an older style of declaring a
"char *" field.  If I match that style for the new field, checkpatch.pl
will complain.

So, fix the sm_name field to use the new style.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 80a0a2cff2b8..54dbb458e73c 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -79,7 +79,7 @@ struct nlm_host {
 struct nsm_handle {
 	struct list_head	sm_link;
 	atomic_t		sm_count;
-	char *			sm_name;
+	char			*sm_name;
 	struct sockaddr_storage	sm_addr;
 	size_t			sm_addrlen;
 	unsigned int		sm_monitored : 1,
-- 
cgit v1.2.3


From 29ed1407ed81086b778ebf12145b048ac3f7e10e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Dec 2008 14:20:46 -0500
Subject: NSM: Support IPv6 version of mon_name

The "mon_name" argument of the NSMPROC_MON and NSMPROC_UNMON upcalls
is a string that contains the hostname or IP address of the remote peer
to be notified when this host has rebooted.  The sm-notify command uses
this identifier to contact the peer when we reboot, so it must be
either a well-qualified DNS hostname or a presentation format IP
address string.

When the "nsm_use_hostnames" sysctl is set to zero, the kernel's NSM
provides a presentation format IP address in the "mon_name" argument.
Otherwise, the "caller_name" argument from NLM requests is used,
which is usually just the DNS hostname of the peer.

To support IPv6 addresses for the mon_name argument, we use the
nsm_handle's address eye-catcher, which already contains an appropriate
presentation format address string.  Using the eye-catcher string
obviates the need to use a large buffer on the stack to form the
presentation address string for the upcall.

This patch also addresses a subtle bug.

An NSMPROC_MON request and the subsequent NSMPROC_UNMON request for the
same peer are required to use the same value for the "mon_name"
argument.  Otherwise, rpc.statd's NSMPROC_UNMON processing cannot
locate the database entry for that peer and remove it.

If the setting of nsm_use_hostnames is changed between the time the
kernel sends an NSMPROC_MON request and the time it sends the
NSMPROC_UNMON request for the same peer, the "mon_name" argument for
these two requests may not be the same.  This is because the value of
"mon_name" is currently chosen at the moment the call is made based on
the setting of nsm_use_hostnames

To ensure both requests pass identical contents in the "mon_name"
argument, we now select which string to use for the argument in the
nsm_monitor() function.  A pointer to this string is saved in the
nsm_handle so it can be used for a subsequent NSMPROC_UNMON upcall.

NB: There are other potential problems, such as how nlm_host_rebooted()
might behave if nsm_use_hostnames were changed while hosts are still
being monitored.  This patch does not attempt to address those
problems.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/mon.c              | 27 ++++++++-------------------
 include/linux/lockd/lockd.h |  1 +
 2 files changed, 9 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 497dfea02e8a..a606fbbf804d 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -18,8 +18,6 @@
 
 #define NLMDBG_FACILITY		NLMDBG_MONITOR
 
-#define XDR_ADDRBUF_LEN		(20)
-
 static struct rpc_clnt *	nsm_create(void);
 
 static struct rpc_program	nsm_program;
@@ -42,7 +40,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
 		.prog		= NLM_PROGRAM,
 		.vers		= 3,
 		.proc		= NLMPROC_NSM_NOTIFY,
-		.mon_name	= nsm->sm_name,
+		.mon_name	= nsm->sm_mon_name,
 	};
 	struct rpc_message msg = {
 		.rpc_argp	= &args,
@@ -87,6 +85,12 @@ nsm_monitor(struct nlm_host *host)
 	if (nsm->sm_monitored)
 		return 0;
 
+	/*
+	 * Choose whether to record the caller_name or IP address of
+	 * this peer in the local rpc.statd's database.
+	 */
+	nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf;
+
 	status = nsm_mon_unmon(nsm, SM_MON, &res);
 
 	if (status < 0 || res.status != 0)
@@ -167,25 +171,10 @@ static __be32 *xdr_encode_nsm_string(__be32 *p, char *string)
 
 /*
  * "mon_name" specifies the host to be monitored.
- *
- * Linux uses a text version of the IP address of the remote
- * host as the host identifier (the "mon_name" argument).
- *
- * Linux statd always looks up the canonical hostname first for
- * whatever remote hostname it receives, so this works alright.
  */
 static __be32 *xdr_encode_mon_name(__be32 *p, struct nsm_args *argp)
 {
-	char	buffer[XDR_ADDRBUF_LEN + 1];
-	char	*name = argp->mon_name;
-
-	if (!nsm_use_hostnames) {
-		snprintf(buffer, XDR_ADDRBUF_LEN,
-			 "%pI4", &argp->addr);
-		name = buffer;
-	}
-
-	return xdr_encode_nsm_string(p, name);
+	return xdr_encode_nsm_string(p, argp->mon_name);
 }
 
 /*
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 54dbb458e73c..d3c7247d23e8 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -79,6 +79,7 @@ struct nlm_host {
 struct nsm_handle {
 	struct list_head	sm_link;
 	atomic_t		sm_count;
+	char			*sm_mon_name;
 	char			*sm_name;
 	struct sockaddr_storage	sm_addr;
 	size_t			sm_addrlen;
-- 
cgit v1.2.3


From 1e49323c4ab044d05bbc68cf13cadcbd4372468c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Dec 2008 14:21:24 -0500
Subject: NLM: Move the public declaration of nsm_monitor() to lockd.h

Clean up.

Make the nlm_host argument "const," and move the public declaration to
lockd.h with other NSM public function (nsm_release, eg) and global
variable declarations.

Add a documenting comment.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/mon.c                 | 15 +++++++++++----
 include/linux/lockd/lockd.h    |  4 ++++
 include/linux/lockd/sm_inter.h |  1 -
 3 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 07e16b81498d..aaaa08e7ae7a 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -69,11 +69,18 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
 	return status;
 }
 
-/*
- * Set up monitoring of a remote host
+/**
+ * nsm_monitor - Notify a peer in case we reboot
+ * @host: pointer to nlm_host of peer to notify
+ *
+ * If this peer is not already monitored, this function sends an
+ * upcall to the local rpc.statd to record the name/address of
+ * the peer to notify in case we reboot.
+ *
+ * Returns zero if the peer is monitored by the local rpc.statd;
+ * otherwise a negative errno value is returned.
  */
-int
-nsm_monitor(struct nlm_host *host)
+int nsm_monitor(const struct nlm_host *host)
 {
 	struct nsm_handle *nsm = host->h_nsmhandle;
 	struct nsm_res	res;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index d3c7247d23e8..f15a4f5ccbfb 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -242,6 +242,10 @@ extern void	  nlm_host_rebooted(const struct sockaddr_in *, const char *,
 					unsigned int, u32);
 void		  nsm_release(struct nsm_handle *);
 
+/*
+ * Host monitoring
+ */
+int		  nsm_monitor(const struct nlm_host *host);
 
 /*
  * This is used in garbage collection and resource reclaim
diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h
index 5a5448bdb17d..546b6102b0d7 100644
--- a/include/linux/lockd/sm_inter.h
+++ b/include/linux/lockd/sm_inter.h
@@ -41,7 +41,6 @@ struct nsm_res {
 	u32		state;
 };
 
-int		nsm_monitor(struct nlm_host *);
 int		nsm_unmonitor(struct nlm_host *);
 extern int	nsm_local_state;
 
-- 
cgit v1.2.3


From c8c23c423dec49cb439697d3dc714e1500ff1610 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Dec 2008 14:21:31 -0500
Subject: NSM: Release nsmhandle in nlm_destroy_host

The nsm_handle's reference count is bumped in nlm_lookup_host().  It
should be decremented in nlm_destroy_host() to make it easier to see
the balance of these two operations.

Move the nsm_release() call to fs/lockd/host.c.

The h_nsmhandle pointer is set in nlm_lookup_host(), and never cleared.
The nlm_destroy_host() function is never called for the same nlm_host
twice, so h_nsmhandle won't ever be NULL when nsm_unmonitor() is
called.

All references to the nlm_host are gone before it is freed.  We can
skip making h_nsmhandle NULL just before the nlm_host is deallocated.

It's also likely we can remove the h_nsmhandle NULL check in
nlmsvc_is_client() as well, but we can do that later when rearchitect-
ing the nlm_host cache.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/host.c             | 8 +++-----
 fs/lockd/mon.c              | 5 -----
 include/linux/lockd/lockd.h | 1 -
 3 files changed, 3 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 780918acd6f4..1d523c1a7b62 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -37,6 +37,7 @@ static struct nsm_handle	*nsm_find(const struct sockaddr *sap,
 						const char *hostname,
 						const size_t hostname_len,
 						const int create);
+static void			nsm_release(struct nsm_handle *nsm);
 
 struct nlm_lookup_host_info {
 	const int		server;		/* search for server|client */
@@ -263,10 +264,8 @@ nlm_destroy_host(struct nlm_host *host)
 	BUG_ON(!list_empty(&host->h_lockowners));
 	BUG_ON(atomic_read(&host->h_count));
 
-	/*
-	 * Release NSM handle and unmonitor host.
-	 */
 	nsm_unmonitor(host);
+	nsm_release(host->h_nsmhandle);
 
 	clnt = host->h_rpcclnt;
 	if (clnt != NULL)
@@ -711,8 +710,7 @@ found:
 /*
  * Release an NSM handle
  */
-void
-nsm_release(struct nsm_handle *nsm)
+static void nsm_release(struct nsm_handle *nsm)
 {
 	if (!nsm)
 		return;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index aaaa08e7ae7a..15fab22db028 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -117,10 +117,6 @@ nsm_unmonitor(struct nlm_host *host)
 	struct nsm_res	res;
 	int		status = 0;
 
-	if (nsm == NULL)
-		return 0;
-	host->h_nsmhandle = NULL;
-
 	if (atomic_read(&nsm->sm_count) == 1
 	 && nsm->sm_monitored && !nsm->sm_sticky) {
 		dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name);
@@ -132,7 +128,6 @@ nsm_unmonitor(struct nlm_host *host)
 		else
 			nsm->sm_monitored = 0;
 	}
-	nsm_release(nsm);
 	return status;
 }
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index f15a4f5ccbfb..30a6a9c1ce42 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -240,7 +240,6 @@ void		  nlm_release_host(struct nlm_host *);
 void		  nlm_shutdown_hosts(void);
 extern void	  nlm_host_rebooted(const struct sockaddr_in *, const char *,
 					unsigned int, u32);
-void		  nsm_release(struct nsm_handle *);
 
 /*
  * Host monitoring
-- 
cgit v1.2.3


From 356c3eb466fd1a12afd6448d90fba3922836e5f1 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Dec 2008 14:21:38 -0500
Subject: NLM: Move the public declaration of nsm_unmonitor() to lockd.h

Clean up.

Make the nlm_host argument "const," and move the public declaration to
lockd.h.  Add a documenting comment.

Bruce observed that nsm_unmonitor()'s only caller doesn't care about
its return code, so make nsm_unmonitor() return void.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/mon.c                 | 15 +++++++++------
 include/linux/lockd/lockd.h    |  1 +
 include/linux/lockd/sm_inter.h |  1 -
 3 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 15fab22db028..d61cdc61cb50 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -107,15 +107,19 @@ int nsm_monitor(const struct nlm_host *host)
 	return status;
 }
 
-/*
- * Cease to monitor remote host
+/**
+ * nsm_unmonitor - Unregister peer notification
+ * @host: pointer to nlm_host of peer to stop monitoring
+ *
+ * If this peer is monitored, this function sends an upcall to
+ * tell the local rpc.statd not to send this peer a notification
+ * when we reboot.
  */
-int
-nsm_unmonitor(struct nlm_host *host)
+void nsm_unmonitor(const struct nlm_host *host)
 {
 	struct nsm_handle *nsm = host->h_nsmhandle;
 	struct nsm_res	res;
-	int		status = 0;
+	int status;
 
 	if (atomic_read(&nsm->sm_count) == 1
 	 && nsm->sm_monitored && !nsm->sm_sticky) {
@@ -128,7 +132,6 @@ nsm_unmonitor(struct nlm_host *host)
 		else
 			nsm->sm_monitored = 0;
 	}
-	return status;
 }
 
 /*
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 30a6a9c1ce42..38344bfb814a 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -245,6 +245,7 @@ extern void	  nlm_host_rebooted(const struct sockaddr_in *, const char *,
  * Host monitoring
  */
 int		  nsm_monitor(const struct nlm_host *host);
+void		  nsm_unmonitor(const struct nlm_host *host);
 
 /*
  * This is used in garbage collection and resource reclaim
diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h
index 546b6102b0d7..896a5e303323 100644
--- a/include/linux/lockd/sm_inter.h
+++ b/include/linux/lockd/sm_inter.h
@@ -41,7 +41,6 @@ struct nsm_res {
 	u32		state;
 };
 
-int		nsm_unmonitor(struct nlm_host *);
 extern int	nsm_local_state;
 
 #endif /* LINUX_LOCKD_SM_INTER_H */
-- 
cgit v1.2.3


From 9c1bfd037f7ff8badaecb47418f109148d88bf45 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 5 Dec 2008 19:01:59 -0500
Subject: NSM: Move NSM-related XDR data structures to lockd's xdr.h

Clean up: NSM's XDR data structures are used only in fs/lockd/mon.c,
so move them there.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/mon.c                 | 14 ++++++++++++++
 include/linux/lockd/sm_inter.h | 20 --------------------
 2 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 3bb71e1b1e1f..81308832e994 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -18,6 +18,20 @@
 
 #define NLMDBG_FACILITY		NLMDBG_MONITOR
 
+struct nsm_args {
+	__be32			addr;		/* remote address */
+	u32			prog;		/* RPC callback info */
+	u32			vers;
+	u32			proc;
+
+	char			*mon_name;
+};
+
+struct nsm_res {
+	u32			status;
+	u32			state;
+};
+
 static struct rpc_clnt *	nsm_create(void);
 
 static struct rpc_program	nsm_program;
diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h
index 896a5e303323..dd9d8a5bb316 100644
--- a/include/linux/lockd/sm_inter.h
+++ b/include/linux/lockd/sm_inter.h
@@ -21,26 +21,6 @@
 #define SM_MAXSTRLEN	1024
 #define SM_PRIV_SIZE	16
 
-/*
- * Arguments for all calls to statd
- */
-struct nsm_args {
-	__be32		addr;		/* remote address */
-	u32		prog;		/* RPC callback info */
-	u32		vers;
-	u32		proc;
-
-	char *		mon_name;
-};
-
-/*
- * Result returned by statd
- */
-struct nsm_res {
-	u32		status;
-	u32		state;
-};
-
 extern int	nsm_local_state;
 
 #endif /* LINUX_LOCKD_SM_INTER_H */
-- 
cgit v1.2.3


From 36e8e668d3e6a61848a8921ddeb663b417299fa5 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 5 Dec 2008 19:02:07 -0500
Subject: NSM: Move NSM program and procedure numbers to fs/lockd/mon.c

Clean up: Move the RPC program and procedure numbers for NSM into the
one source file that needs them: fs/lockd/mon.c.

And, as with NLM, NFS, and rpcbind calls, use NSMPROC_FOO instead of
SM_FOO for NSM procedure numbers.

Finally, make a couple of comments more precise: what is referred to
here as SM_NOTIFY is really the NLM (lockd) NLMPROC_SM_NOTIFY downcall,
not NSMPROC_NOTIFY.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/mon.c                 | 42 +++++++++++++++++++++++++++---------------
 include/linux/lockd/sm_inter.h |  9 ---------
 2 files changed, 27 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 81308832e994..0fc9836db4e7 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -17,6 +17,18 @@
 
 
 #define NLMDBG_FACILITY		NLMDBG_MONITOR
+#define NSM_PROGRAM		100024
+#define NSM_VERSION		1
+
+enum {
+	NSMPROC_NULL,
+	NSMPROC_STAT,
+	NSMPROC_MON,
+	NSMPROC_UNMON,
+	NSMPROC_UNMON_ALL,
+	NSMPROC_SIMU_CRASH,
+	NSMPROC_NOTIFY,
+};
 
 struct nsm_args {
 	__be32			addr;		/* remote address */
@@ -42,7 +54,7 @@ static struct rpc_program	nsm_program;
 int				nsm_local_state;
 
 /*
- * Common procedure for SM_MON/SM_UNMON calls
+ * Common procedure for NSMPROC_MON/NSMPROC_UNMON calls
  */
 static int
 nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
@@ -111,7 +123,7 @@ int nsm_monitor(const struct nlm_host *host)
 	 */
 	nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf;
 
-	status = nsm_mon_unmon(nsm, SM_MON, &res);
+	status = nsm_mon_unmon(nsm, NSMPROC_MON, &res);
 	if (res.status != 0)
 		status = -EIO;
 	if (status < 0)
@@ -139,7 +151,7 @@ void nsm_unmonitor(const struct nlm_host *host)
 	 && nsm->sm_monitored && !nsm->sm_sticky) {
 		dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name);
 
-		status = nsm_mon_unmon(nsm, SM_UNMON, &res);
+		status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res);
 		if (res.status != 0)
 			status = -EIO;
 		if (status < 0)
@@ -167,7 +179,7 @@ nsm_create(void)
 		.addrsize	= sizeof(sin),
 		.servername	= "localhost",
 		.program	= &nsm_program,
-		.version	= SM_VERSION,
+		.version	= NSM_VERSION,
 		.authflavor	= RPC_AUTH_NULL,
 	};
 
@@ -201,7 +213,7 @@ static __be32 *xdr_encode_mon_name(__be32 *p, struct nsm_args *argp)
 /*
  * The "my_id" argument specifies the hostname and RPC procedure
  * to be called when the status manager receives notification
- * (via the SM_NOTIFY call) that the state of host "mon_name"
+ * (via the NLMPROC_SM_NOTIFY call) that the state of host "mon_name"
  * has changed.
  */
 static __be32 *xdr_encode_my_id(__be32 *p, struct nsm_args *argp)
@@ -219,7 +231,7 @@ static __be32 *xdr_encode_my_id(__be32 *p, struct nsm_args *argp)
 
 /*
  * The "mon_id" argument specifies the non-private arguments
- * of an SM_MON or SM_UNMON call.
+ * of an NSMPROC_MON or NSMPROC_UNMON call.
  */
 static __be32 *xdr_encode_mon_id(__be32 *p, struct nsm_args *argp)
 {
@@ -232,8 +244,8 @@ static __be32 *xdr_encode_mon_id(__be32 *p, struct nsm_args *argp)
 
 /*
  * The "priv" argument may contain private information required
- * by the SM_MON call. This information will be supplied in the
- * SM_NOTIFY call.
+ * by the NSMPROC_MON call. This information will be supplied in the
+ * NLMPROC_SM_NOTIFY call.
  *
  * Linux provides the raw IP address of the monitored host,
  * left in network byte order.
@@ -300,22 +312,22 @@ xdr_decode_stat(struct rpc_rqst *rqstp, __be32 *p, struct nsm_res *resp)
 #define SM_unmonres_sz	1
 
 static struct rpc_procinfo	nsm_procedures[] = {
-[SM_MON] = {
-		.p_proc		= SM_MON,
+[NSMPROC_MON] = {
+		.p_proc		= NSMPROC_MON,
 		.p_encode	= (kxdrproc_t) xdr_encode_mon,
 		.p_decode	= (kxdrproc_t) xdr_decode_stat_res,
 		.p_arglen	= SM_mon_sz,
 		.p_replen	= SM_monres_sz,
-		.p_statidx	= SM_MON,
+		.p_statidx	= NSMPROC_MON,
 		.p_name		= "MONITOR",
 	},
-[SM_UNMON] = {
-		.p_proc		= SM_UNMON,
+[NSMPROC_UNMON] = {
+		.p_proc		= NSMPROC_UNMON,
 		.p_encode	= (kxdrproc_t) xdr_encode_unmon,
 		.p_decode	= (kxdrproc_t) xdr_decode_stat,
 		.p_arglen	= SM_mon_id_sz,
 		.p_replen	= SM_unmonres_sz,
-		.p_statidx	= SM_UNMON,
+		.p_statidx	= NSMPROC_UNMON,
 		.p_name		= "UNMONITOR",
 	},
 };
@@ -334,7 +346,7 @@ static struct rpc_stat		nsm_stats;
 
 static struct rpc_program	nsm_program = {
 		.name		= "statd",
-		.number		= SM_PROGRAM,
+		.number		= NSM_PROGRAM,
 		.nrvers		= ARRAY_SIZE(nsm_version),
 		.version	= nsm_version,
 		.stats		= &nsm_stats
diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h
index dd9d8a5bb316..116bf38535a0 100644
--- a/include/linux/lockd/sm_inter.h
+++ b/include/linux/lockd/sm_inter.h
@@ -9,15 +9,6 @@
 #ifndef LINUX_LOCKD_SM_INTER_H
 #define LINUX_LOCKD_SM_INTER_H
 
-#define SM_PROGRAM	100024
-#define SM_VERSION	1
-#define SM_STAT		1
-#define SM_MON		2
-#define SM_UNMON	3
-#define SM_UNMON_ALL	4
-#define SM_SIMU_CRASH	5
-#define SM_NOTIFY	6
-
 #define SM_MAXSTRLEN	1024
 #define SM_PRIV_SIZE	16
 
-- 
cgit v1.2.3


From 67c6d107a689243979a2b5f15244b5261634a924 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 5 Dec 2008 19:02:45 -0500
Subject: NSM: Move nsm_find() to fs/lockd/mon.c

The nsm_find() function sets up fresh nsm_handle entries.  This is
where we will store the "priv" cookie used to lookup nsm_handles during
reboot recovery.  The cookie will be constructed when nsm_find()
creates a new nsm_handle.

As much as possible, I would like to keep everything that handles a
"priv" cookie in fs/lockd/mon.c so that all the smarts are in one
source file.  That organization should make it pretty simple to see how
all this works.

To me, it makes more sense than the current arrangement to keep
nsm_find() with nsm_monitor() and nsm_unmonitor().

So, start reorganizing by moving nsm_find() into fs/lockd/mon.c.  The
nsm_release() function comes along too, since it shares the nsm_lock
global variable.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/host.c             | 128 ------------------------------------------
 fs/lockd/mon.c              | 133 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/lockd/lockd.h |   6 ++
 3 files changed, 139 insertions(+), 128 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 1d523c1a7b62..dbdeaa88d2fa 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -32,12 +32,6 @@ static int			nrhosts;
 static DEFINE_MUTEX(nlm_host_mutex);
 
 static void			nlm_gc_hosts(void);
-static struct nsm_handle	*nsm_find(const struct sockaddr *sap,
-						const size_t salen,
-						const char *hostname,
-						const size_t hostname_len,
-						const int create);
-static void			nsm_release(struct nsm_handle *nsm);
 
 struct nlm_lookup_host_info {
 	const int		server;		/* search for server|client */
@@ -106,43 +100,6 @@ static void nlm_clear_port(struct sockaddr *sap)
 	}
 }
 
-static void nlm_display_ipv4_address(const struct sockaddr *sap, char *buf,
-				     const size_t len)
-{
-	const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
-	snprintf(buf, len, "%pI4", &sin->sin_addr.s_addr);
-}
-
-static void nlm_display_ipv6_address(const struct sockaddr *sap, char *buf,
-				     const size_t len)
-{
-	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
-
-	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
-		snprintf(buf, len, "%pI4", &sin6->sin6_addr.s6_addr32[3]);
-	else if (sin6->sin6_scope_id != 0)
-		snprintf(buf, len, "%pI6%%%u", &sin6->sin6_addr,
-				sin6->sin6_scope_id);
-	else
-		snprintf(buf, len, "%pI6", &sin6->sin6_addr);
-}
-
-static void nlm_display_address(const struct sockaddr *sap,
-				char *buf, const size_t len)
-{
-	switch (sap->sa_family) {
-	case AF_INET:
-		nlm_display_ipv4_address(sap, buf, len);
-		break;
-	case AF_INET6:
-		nlm_display_ipv6_address(sap, buf, len);
-		break;
-	default:
-		snprintf(buf, len, "unsupported address family");
-		break;
-	}
-}
-
 /*
  * Common host lookup routine for server & client
  */
@@ -635,88 +592,3 @@ nlm_gc_hosts(void)
 
 	next_gc = jiffies + NLM_HOST_COLLECT;
 }
-
-
-/*
- * Manage NSM handles
- */
-static LIST_HEAD(nsm_handles);
-static DEFINE_SPINLOCK(nsm_lock);
-
-static struct nsm_handle *nsm_find(const struct sockaddr *sap,
-				   const size_t salen,
-				   const char *hostname,
-				   const size_t hostname_len,
-				   const int create)
-{
-	struct nsm_handle *nsm = NULL;
-	struct nsm_handle *pos;
-
-	if (!sap)
-		return NULL;
-
-	if (hostname && memchr(hostname, '/', hostname_len) != NULL) {
-		if (printk_ratelimit()) {
-			printk(KERN_WARNING "Invalid hostname \"%.*s\" "
-					    "in NFS lock request\n",
-				(int)hostname_len, hostname);
-		}
-		return NULL;
-	}
-
-retry:
-	spin_lock(&nsm_lock);
-	list_for_each_entry(pos, &nsm_handles, sm_link) {
-
-		if (hostname && nsm_use_hostnames) {
-			if (strlen(pos->sm_name) != hostname_len
-			 || memcmp(pos->sm_name, hostname, hostname_len))
-				continue;
-		} else if (!nlm_cmp_addr(nsm_addr(pos), sap))
-			continue;
-		atomic_inc(&pos->sm_count);
-		kfree(nsm);
-		nsm = pos;
-		goto found;
-	}
-	if (nsm) {
-		list_add(&nsm->sm_link, &nsm_handles);
-		goto found;
-	}
-	spin_unlock(&nsm_lock);
-
-	if (!create)
-		return NULL;
-
-	nsm = kzalloc(sizeof(*nsm) + hostname_len + 1, GFP_KERNEL);
-	if (nsm == NULL)
-		return NULL;
-
-	memcpy(nsm_addr(nsm), sap, salen);
-	nsm->sm_addrlen = salen;
-	nsm->sm_name = (char *) (nsm + 1);
-	memcpy(nsm->sm_name, hostname, hostname_len);
-	nsm->sm_name[hostname_len] = '\0';
-	nlm_display_address((struct sockaddr *)&nsm->sm_addr,
-				nsm->sm_addrbuf, sizeof(nsm->sm_addrbuf));
-	atomic_set(&nsm->sm_count, 1);
-	goto retry;
-
-found:
-	spin_unlock(&nsm_lock);
-	return nsm;
-}
-
-/*
- * Release an NSM handle
- */
-static void nsm_release(struct nsm_handle *nsm)
-{
-	if (!nsm)
-		return;
-	if (atomic_dec_and_lock(&nsm->sm_count, &nsm_lock)) {
-		list_del(&nsm->sm_link);
-		spin_unlock(&nsm_lock);
-		kfree(nsm);
-	}
-}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 81e1cc14246f..8e68e799293c 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -47,12 +47,51 @@ struct nsm_res {
 static struct rpc_clnt *	nsm_create(void);
 
 static struct rpc_program	nsm_program;
+static				LIST_HEAD(nsm_handles);
+static				DEFINE_SPINLOCK(nsm_lock);
 
 /*
  * Local NSM state
  */
 int				nsm_local_state;
 
+static void nsm_display_ipv4_address(const struct sockaddr *sap, char *buf,
+				     const size_t len)
+{
+	const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+	snprintf(buf, len, "%pI4", &sin->sin_addr.s_addr);
+}
+
+static void nsm_display_ipv6_address(const struct sockaddr *sap, char *buf,
+				     const size_t len)
+{
+	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+
+	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
+		snprintf(buf, len, "%pI4", &sin6->sin6_addr.s6_addr32[3]);
+	else if (sin6->sin6_scope_id != 0)
+		snprintf(buf, len, "%pI6%%%u", &sin6->sin6_addr,
+				sin6->sin6_scope_id);
+	else
+		snprintf(buf, len, "%pI6", &sin6->sin6_addr);
+}
+
+static void nsm_display_address(const struct sockaddr *sap,
+				char *buf, const size_t len)
+{
+	switch (sap->sa_family) {
+	case AF_INET:
+		nsm_display_ipv4_address(sap, buf, len);
+		break;
+	case AF_INET6:
+		nsm_display_ipv6_address(sap, buf, len);
+		break;
+	default:
+		snprintf(buf, len, "unsupported address family");
+		break;
+	}
+}
+
 /*
  * Common procedure for NSMPROC_MON/NSMPROC_UNMON calls
  */
@@ -162,6 +201,100 @@ void nsm_unmonitor(const struct nlm_host *host)
 	}
 }
 
+/**
+ * nsm_find - Find or create a cached nsm_handle
+ * @sap: pointer to socket address of handle to find
+ * @salen: length of socket address
+ * @hostname: pointer to C string containing hostname to find
+ * @hostname_len: length of C string
+ * @create: one means create new handle if not found in cache
+ *
+ * Behavior is modulated by the global nsm_use_hostnames variable
+ * and by the @create argument.
+ *
+ * Returns a cached nsm_handle after bumping its ref count, or if
+ * @create is set, returns a fresh nsm_handle if a handle that
+ * matches @sap and/or @hostname cannot be found in the handle cache.
+ * Returns NULL if an error occurs.
+ */
+struct nsm_handle *nsm_find(const struct sockaddr *sap, const size_t salen,
+			    const char *hostname, const size_t hostname_len,
+			    const int create)
+{
+	struct nsm_handle *nsm = NULL;
+	struct nsm_handle *pos;
+
+	if (!sap)
+		return NULL;
+
+	if (hostname && memchr(hostname, '/', hostname_len) != NULL) {
+		if (printk_ratelimit()) {
+			printk(KERN_WARNING "Invalid hostname \"%.*s\" "
+					    "in NFS lock request\n",
+				(int)hostname_len, hostname);
+		}
+		return NULL;
+	}
+
+retry:
+	spin_lock(&nsm_lock);
+	list_for_each_entry(pos, &nsm_handles, sm_link) {
+
+		if (hostname && nsm_use_hostnames) {
+			if (strlen(pos->sm_name) != hostname_len
+			 || memcmp(pos->sm_name, hostname, hostname_len))
+				continue;
+		} else if (!nlm_cmp_addr(nsm_addr(pos), sap))
+			continue;
+		atomic_inc(&pos->sm_count);
+		kfree(nsm);
+		nsm = pos;
+		goto found;
+	}
+	if (nsm) {
+		list_add(&nsm->sm_link, &nsm_handles);
+		goto found;
+	}
+	spin_unlock(&nsm_lock);
+
+	if (!create)
+		return NULL;
+
+	nsm = kzalloc(sizeof(*nsm) + hostname_len + 1, GFP_KERNEL);
+	if (nsm == NULL)
+		return NULL;
+
+	memcpy(nsm_addr(nsm), sap, salen);
+	nsm->sm_addrlen = salen;
+	nsm->sm_name = (char *) (nsm + 1);
+	memcpy(nsm->sm_name, hostname, hostname_len);
+	nsm->sm_name[hostname_len] = '\0';
+	nsm_display_address((struct sockaddr *)&nsm->sm_addr,
+				nsm->sm_addrbuf, sizeof(nsm->sm_addrbuf));
+	atomic_set(&nsm->sm_count, 1);
+	goto retry;
+
+found:
+	spin_unlock(&nsm_lock);
+	return nsm;
+}
+
+/**
+ * nsm_release - Release an NSM handle
+ * @nsm: pointer to handle to be released
+ *
+ */
+void nsm_release(struct nsm_handle *nsm)
+{
+	if (!nsm)
+		return;
+	if (atomic_dec_and_lock(&nsm->sm_count, &nsm_lock)) {
+		list_del(&nsm->sm_link);
+		spin_unlock(&nsm_lock);
+		kfree(nsm);
+	}
+}
+
 /*
  * Create NSM client for the local host
  */
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 38344bfb814a..8d715363c6ac 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -247,6 +247,12 @@ extern void	  nlm_host_rebooted(const struct sockaddr_in *, const char *,
 int		  nsm_monitor(const struct nlm_host *host);
 void		  nsm_unmonitor(const struct nlm_host *host);
 
+struct nsm_handle *nsm_find(const struct sockaddr *sap, const size_t salen,
+					const char *hostname,
+					const size_t hostname_len,
+					const int create);
+void		  nsm_release(struct nsm_handle *nsm);
+
 /*
  * This is used in garbage collection and resource reclaim
  * A return value != 0 means destroy the lock/block/share
-- 
cgit v1.2.3


From 7e44d3bea21fbb9494930d1cd35ca92a9a4a3279 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 5 Dec 2008 19:03:16 -0500
Subject: NSM: Generate NSMPROC_MON's "priv" argument when nsm_handle is
 created

Introduce a new data type, used by both the in-kernel NLM and NSM
implementations, that is used to manage the opaque "priv" argument
for the NSMPROC_MON and NLMPROC_SM_NOTIFY calls.

Construct the "priv" cookie when the nsm_handle is created.

The nsm_init_private() function may look a little strange, but it is
roughly equivalent to how the XDR encoder formed the "priv" argument.
It's going to go away soon.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/mon.c                 | 16 ++++++++++++++++
 include/linux/lockd/lockd.h    |  1 +
 include/linux/lockd/sm_inter.h |  1 -
 include/linux/lockd/xdr.h      |  6 ++++++
 4 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 0792900b6281..c8d18cd22b8a 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -201,6 +201,21 @@ void nsm_unmonitor(const struct nlm_host *host)
 	}
 }
 
+/*
+ * Construct a unique cookie to match this nsm_handle to this monitored
+ * host.  It is passed to the local rpc.statd via NSMPROC_MON, and
+ * returned via NLMPROC_SM_NOTIFY, in the "priv" field of these
+ * requests.
+ *
+ * Linux provides the raw IP address of the monitored host,
+ * left in network byte order.
+ */
+static void nsm_init_private(struct nsm_handle *nsm)
+{
+	__be32 *p = (__be32 *)&nsm->sm_priv.data;
+	*p = nsm_addr_in(nsm)->sin_addr.s_addr;
+}
+
 /**
  * nsm_find - Find or create a cached nsm_handle
  * @sap: pointer to socket address of handle to find
@@ -271,6 +286,7 @@ retry:
 	nsm->sm_name = (char *) (nsm + 1);
 	memcpy(nsm->sm_name, hostname, hostname_len);
 	nsm->sm_name[hostname_len] = '\0';
+	nsm_init_private(nsm);
 	nsm_display_address((struct sockaddr *)&nsm->sm_addr,
 				nsm->sm_addrbuf, sizeof(nsm->sm_addrbuf));
 	atomic_set(&nsm->sm_count, 1);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 8d715363c6ac..194fa8a66398 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -85,6 +85,7 @@ struct nsm_handle {
 	size_t			sm_addrlen;
 	unsigned int		sm_monitored : 1,
 				sm_sticky : 1;	/* don't unmonitor */
+	struct nsm_private	sm_priv;
 	char			sm_addrbuf[NSM_ADDRBUF];
 };
 
diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h
index 116bf38535a0..5cef5a79dd94 100644
--- a/include/linux/lockd/sm_inter.h
+++ b/include/linux/lockd/sm_inter.h
@@ -10,7 +10,6 @@
 #define LINUX_LOCKD_SM_INTER_H
 
 #define SM_MAXSTRLEN	1024
-#define SM_PRIV_SIZE	16
 
 extern int	nsm_local_state;
 
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index d6b3a802c046..6b5199263858 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -13,6 +13,12 @@
 #include <linux/nfs.h>
 #include <linux/sunrpc/xdr.h>
 
+#define SM_PRIV_SIZE		16
+
+struct nsm_private {
+	unsigned char		data[SM_PRIV_SIZE];
+};
+
 struct svc_rqst;
 
 #define NLM_MAXCOOKIELEN    	32
-- 
cgit v1.2.3


From 7fefc9cb9d5f129c238d93166f705c96ca2e7e51 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 5 Dec 2008 19:03:31 -0500
Subject: NLM: Change nlm_host_rebooted() to take a single nlm_reboot argument

Pass the nlm_reboot data structure directly from the NLMPROC_SM_NOTIFY
XDR decoders to nlm_host_rebooted().  This eliminates some packing and
unpacking of the NLMPROC_SM_NOTIFY results, and prepares for passing
these results, including the "priv" cookie, directly to a lookup
routine in fs/lockd/mon.c.

This patch changes code organization but should not cause any
behavioral change.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/host.c             | 31 +++++++++++++++++--------------
 fs/lockd/svc4proc.c         | 11 +----------
 fs/lockd/svcproc.c          | 11 +----------
 include/linux/lockd/lockd.h |  3 +--
 4 files changed, 20 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index dbdeaa88d2fa..ed103387964d 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -444,31 +444,34 @@ void nlm_release_host(struct nlm_host *host)
 	}
 }
 
-/*
- * We were notified that the host indicated by address &sin
- * has rebooted.
- * Release all resources held by that peer.
+/**
+ * nlm_host_rebooted - Release all resources held by rebooted host
+ * @info: pointer to decoded results of NLM_SM_NOTIFY call
+ *
+ * We were notified that the specified host has rebooted.  Release
+ * all resources held by that peer.
  */
-void nlm_host_rebooted(const struct sockaddr_in *sin,
-				const char *hostname,
-				unsigned int hostname_len,
-				u32 new_state)
+void nlm_host_rebooted(const struct nlm_reboot *info)
 {
+	const struct sockaddr_in sin = {
+		.sin_family		= AF_INET,
+		.sin_addr.s_addr	= info->addr,
+	};
 	struct hlist_head *chain;
 	struct hlist_node *pos;
 	struct nsm_handle *nsm;
 	struct nlm_host	*host;
 
-	nsm = nsm_find((struct sockaddr *)sin, sizeof(*sin),
-			hostname, hostname_len, 0);
+	nsm = nsm_find((struct sockaddr *)&sin, sizeof(sin),
+			info->mon, info->len, 0);
 	if (nsm == NULL) {
 		dprintk("lockd: never saw rebooted peer '%.*s' before\n",
-				hostname_len, hostname);
+				info->len, info->mon);
 		return;
 	}
 
 	dprintk("lockd: nlm_host_rebooted(%.*s, %s)\n",
-			hostname_len, hostname, nsm->sm_addrbuf);
+			info->len, info->mon, nsm->sm_addrbuf);
 
 	/* When reclaiming locks on this peer, make sure that
 	 * we set up a new notification */
@@ -483,8 +486,8 @@ again:	mutex_lock(&nlm_host_mutex);
 	for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) {
 		hlist_for_each_entry(host, pos, chain, h_hash) {
 			if (host->h_nsmhandle == nsm
-			 && host->h_nsmstate != new_state) {
-				host->h_nsmstate = new_state;
+			 && host->h_nsmstate != info->state) {
+				host->h_nsmstate = info->state;
 				host->h_state++;
 
 				nlm_get_host(host);
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4dfdcbc6bf68..bb79a53e0608 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -419,8 +419,6 @@ static __be32
 nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
 					      void	        *resp)
 {
-	struct sockaddr_in	saddr;
-
 	dprintk("lockd: SM_NOTIFY     called\n");
 
 	if (!nlm_privileged_requester(rqstp)) {
@@ -430,14 +428,7 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
 		return rpc_system_err;
 	}
 
-	/* Obtain the host pointer for this NFS server and try to
-	 * reclaim all locks we hold on this server.
-	 */
-	memset(&saddr, 0, sizeof(saddr));
-	saddr.sin_family = AF_INET;
-	saddr.sin_addr.s_addr = argp->addr;
-	nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state);
-
+	nlm_host_rebooted(argp);
 	return rpc_success;
 }
 
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3ca89e2a9381..e44310c0211c 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -451,8 +451,6 @@ static __be32
 nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
 					      void	        *resp)
 {
-	struct sockaddr_in	saddr;
-
 	dprintk("lockd: SM_NOTIFY     called\n");
 
 	if (!nlm_privileged_requester(rqstp)) {
@@ -462,14 +460,7 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
 		return rpc_system_err;
 	}
 
-	/* Obtain the host pointer for this NFS server and try to
-	 * reclaim all locks we hold on this server.
-	 */
-	memset(&saddr, 0, sizeof(saddr));
-	saddr.sin_family = AF_INET;
-	saddr.sin_addr.s_addr = argp->addr;
-	nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state);
-
+	nlm_host_rebooted(argp);
 	return rpc_success;
 }
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 194fa8a66398..2a3533ea38dd 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -239,8 +239,7 @@ void		  nlm_rebind_host(struct nlm_host *);
 struct nlm_host * nlm_get_host(struct nlm_host *);
 void		  nlm_release_host(struct nlm_host *);
 void		  nlm_shutdown_hosts(void);
-extern void	  nlm_host_rebooted(const struct sockaddr_in *, const char *,
-					unsigned int, u32);
+void		  nlm_host_rebooted(const struct nlm_reboot *);
 
 /*
  * Host monitoring
-- 
cgit v1.2.3


From 576df4634e37e46b441fefb91915184edb13bb94 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 5 Dec 2008 19:03:39 -0500
Subject: NLM: Decode "priv" argument of NLMPROC_SM_NOTIFY as an opaque

The NLM XDR decoders for the NLMPROC_SM_NOTIFY procedure should treat
their "priv" argument truly as an opaque, as defined by the protocol,
and let the upper layers figure out what is in it.

This will make it easier to modify the contents and interpretation of
the "priv" argument, and keep knowledge about what's in "priv" local
to fs/lockd/mon.c.

For now, the NLM and NSM implementations should behave exactly as they
did before.

The formation of the address of the rebooted host in
nlm_host_rebooted() may look a little strange, but it is the inverse
of how nsm_init_private() forms the private cookie.  Plus, it's
going away soon anyway.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/host.c           | 3 ++-
 fs/lockd/xdr.c            | 4 ++--
 fs/lockd/xdr4.c           | 4 ++--
 include/linux/lockd/xdr.h | 8 ++++----
 4 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index ed103387964d..dc41e46ef74c 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -453,9 +453,10 @@ void nlm_release_host(struct nlm_host *host)
  */
 void nlm_host_rebooted(const struct nlm_reboot *info)
 {
+	__be32 *p = (__be32 *)&info->priv.data;
 	const struct sockaddr_in sin = {
 		.sin_family		= AF_INET,
-		.sin_addr.s_addr	= info->addr,
+		.sin_addr.s_addr	= *p,
 	};
 	struct hlist_head *chain;
 	struct hlist_node *pos;
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 1f226290c67c..4cc7d01a1eb5 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -349,8 +349,8 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
 	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
 		return 0;
 	argp->state = ntohl(*p++);
-	/* Preserve the address in network byte order */
-	argp->addr = *p++;
+	memcpy(&argp->priv.data, p, sizeof(argp->priv.data));
+	p += XDR_QUADLEN(SM_PRIV_SIZE);
 	return xdr_argsize_check(rqstp, p);
 }
 
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 50c493a8ad8e..61d1714a470e 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -356,8 +356,8 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp
 	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
 		return 0;
 	argp->state = ntohl(*p++);
-	/* Preserve the address in network byte order */
-	argp->addr  = *p++;
+	memcpy(&argp->priv.data, p, sizeof(argp->priv.data));
+	p += XDR_QUADLEN(SM_PRIV_SIZE);
 	return xdr_argsize_check(rqstp, p);
 }
 
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index 6b5199263858..6338866222a8 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -83,10 +83,10 @@ struct nlm_res {
  * statd callback when client has rebooted
  */
 struct nlm_reboot {
-	char *		mon;
-	unsigned int	len;
-	u32		state;
-	__be32		addr;
+	char			*mon;
+	unsigned int		len;
+	u32			state;
+	struct nsm_private	priv;
 };
 
 /*
-- 
cgit v1.2.3


From 3420a8c4359a189f7d854ed7075d151257415447 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 5 Dec 2008 19:03:46 -0500
Subject: NSM: Add nsm_lookup() function

Introduce a new API to fs/lockd/mon.c that allows nlm_host_rebooted()
to lookup up nsm_handles via the contents of an nlm_reboot struct.

The new function is equivalent to calling nsm_find() with @create set
to zero, but it takes a struct nlm_reboot instead of separate
arguments.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/mon.c              | 64 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/lockd/lockd.h |  1 +
 2 files changed, 65 insertions(+)

(limited to 'include/linux')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 4424b0a5a51f..e46903995c99 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -201,6 +201,29 @@ void nsm_unmonitor(const struct nlm_host *host)
 	}
 }
 
+static struct nsm_handle *nsm_lookup_hostname(const char *hostname,
+					      const size_t len)
+{
+	struct nsm_handle *nsm;
+
+	list_for_each_entry(nsm, &nsm_handles, sm_link)
+		if (strlen(nsm->sm_name) == len &&
+		    memcmp(nsm->sm_name, hostname, len) == 0)
+			return nsm;
+	return NULL;
+}
+
+static struct nsm_handle *nsm_lookup_priv(const struct nsm_private *priv)
+{
+	struct nsm_handle *nsm;
+
+	list_for_each_entry(nsm, &nsm_handles, sm_link)
+		if (memcmp(nsm->sm_priv.data, priv->data,
+					sizeof(priv->data)) == 0)
+			return nsm;
+	return NULL;
+}
+
 /*
  * Construct a unique cookie to match this nsm_handle to this monitored
  * host.  It is passed to the local rpc.statd via NSMPROC_MON, and
@@ -297,6 +320,47 @@ found:
 	return nsm;
 }
 
+/**
+ * nsm_reboot_lookup - match NLMPROC_SM_NOTIFY arguments to an nsm_handle
+ * @info: pointer to NLMPROC_SM_NOTIFY arguments
+ *
+ * Returns a matching nsm_handle if found in the nsm cache; the returned
+ * nsm_handle's reference count is bumped and sm_monitored is cleared.
+ * Otherwise returns NULL if some error occurred.
+ */
+struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info)
+{
+	struct nsm_handle *cached;
+
+	spin_lock(&nsm_lock);
+
+	if (nsm_use_hostnames && info->mon != NULL)
+		cached = nsm_lookup_hostname(info->mon, info->len);
+	else
+		cached = nsm_lookup_priv(&info->priv);
+
+	if (unlikely(cached == NULL)) {
+		spin_unlock(&nsm_lock);
+		dprintk("lockd: never saw rebooted peer '%.*s' before\n",
+				info->len, info->mon);
+		return cached;
+	}
+
+	atomic_inc(&cached->sm_count);
+	spin_unlock(&nsm_lock);
+
+	/*
+	 * During subsequent lock activity, force a fresh
+	 * notification to be set up for this host.
+	 */
+	cached->sm_monitored = 0;
+
+	dprintk("lockd: host %s (%s) rebooted, cnt %d\n",
+			cached->sm_name, cached->sm_addrbuf,
+			atomic_read(&cached->sm_count));
+	return cached;
+}
+
 /**
  * nsm_release - Release an NSM handle
  * @nsm: pointer to handle to be released
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 2a3533ea38dd..5e3ad926de89 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -251,6 +251,7 @@ struct nsm_handle *nsm_find(const struct sockaddr *sap, const size_t salen,
 					const char *hostname,
 					const size_t hostname_len,
 					const int create);
+struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info);
 void		  nsm_release(struct nsm_handle *nsm);
 
 /*
-- 
cgit v1.2.3


From 92fd91b998a5216a6d6606704e71d541a180216c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 5 Dec 2008 19:04:01 -0500
Subject: NLM: Remove "create" argument from nsm_find()

Clean up: nsm_find() now has only one caller, and that caller
unconditionally sets the @create argument. Thus the @create
argument is no longer needed.

Since nsm_find() now has a more specific purpose, pick a more
appropriate name for it.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/host.c             |  4 ++--
 fs/lockd/mon.c              | 23 +++++++++--------------
 include/linux/lockd/lockd.h |  6 +++---
 3 files changed, 14 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 230de93fc048..e5a65df4c0cd 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -159,8 +159,8 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
 		atomic_inc(&nsm->sm_count);
 	else {
 		host = NULL;
-		nsm = nsm_find(ni->sap, ni->salen,
-				ni->hostname, ni->hostname_len, 1);
+		nsm = nsm_get_handle(ni->sap, ni->salen,
+					ni->hostname, ni->hostname_len);
 		if (!nsm) {
 			dprintk("lockd: nlm_lookup_host failed; "
 				"no nsm handle\n");
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index e46903995c99..740702216042 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -240,24 +240,22 @@ static void nsm_init_private(struct nsm_handle *nsm)
 }
 
 /**
- * nsm_find - Find or create a cached nsm_handle
+ * nsm_get_handle - Find or create a cached nsm_handle
  * @sap: pointer to socket address of handle to find
  * @salen: length of socket address
  * @hostname: pointer to C string containing hostname to find
  * @hostname_len: length of C string
- * @create: one means create new handle if not found in cache
  *
- * Behavior is modulated by the global nsm_use_hostnames variable
- * and by the @create argument.
+ * Behavior is modulated by the global nsm_use_hostnames variable.
  *
- * Returns a cached nsm_handle after bumping its ref count, or if
- * @create is set, returns a fresh nsm_handle if a handle that
- * matches @sap and/or @hostname cannot be found in the handle cache.
- * Returns NULL if an error occurs.
+ * Returns a cached nsm_handle after bumping its ref count, or
+ * returns a fresh nsm_handle if a handle that matches @sap and/or
+ * @hostname cannot be found in the handle cache.  Returns NULL if
+ * an error occurs.
  */
-struct nsm_handle *nsm_find(const struct sockaddr *sap, const size_t salen,
-			    const char *hostname, const size_t hostname_len,
-			    const int create)
+struct nsm_handle *nsm_get_handle(const struct sockaddr *sap,
+				  const size_t salen, const char *hostname,
+				  const size_t hostname_len)
 {
 	struct nsm_handle *nsm = NULL;
 	struct nsm_handle *pos;
@@ -297,9 +295,6 @@ retry:
 	}
 	spin_unlock(&nsm_lock);
 
-	if (!create)
-		return NULL;
-
 	nsm = kzalloc(sizeof(*nsm) + hostname_len + 1, GFP_KERNEL);
 	if (nsm == NULL)
 		return NULL;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 5e3ad926de89..1ccd49e97a7f 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -247,10 +247,10 @@ void		  nlm_host_rebooted(const struct nlm_reboot *);
 int		  nsm_monitor(const struct nlm_host *host);
 void		  nsm_unmonitor(const struct nlm_host *host);
 
-struct nsm_handle *nsm_find(const struct sockaddr *sap, const size_t salen,
+struct nsm_handle *nsm_get_handle(const struct sockaddr *sap,
+					const size_t salen,
 					const char *hostname,
-					const size_t hostname_len,
-					const int create);
+					const size_t hostname_len);
 struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info);
 void		  nsm_release(struct nsm_handle *nsm);
 
-- 
cgit v1.2.3


From e6765b83977f07983c7a10e6bbb19d6c7bbfc3a4 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 11 Dec 2008 17:56:14 -0500
Subject: NSM: Remove include/linux/lockd/sm_inter.h

Clean up: The include/linux/lockd/sm_inter.h header is nearly empty
now.  Remove it.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/clntproc.c            |  1 -
 fs/lockd/host.c                |  1 -
 fs/lockd/mon.c                 |  2 --
 fs/lockd/svc.c                 |  1 -
 fs/lockd/svc4proc.c            |  2 --
 fs/lockd/svcproc.c             |  2 --
 fs/lockd/svcsubs.c             |  1 -
 fs/lockd/xdr.c                 |  1 -
 fs/lockd/xdr4.c                |  1 -
 include/linux/lockd/lockd.h    |  1 +
 include/linux/lockd/sm_inter.h | 16 ----------------
 include/linux/lockd/xdr.h      |  1 +
 12 files changed, 2 insertions(+), 28 deletions(-)
 delete mode 100644 include/linux/lockd/sm_inter.h

(limited to 'include/linux')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 5ce42e0ed4a0..dd7957064a8c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -16,7 +16,6 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
-#include <linux/lockd/sm_inter.h>
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 #define NLMCLNT_GRACE_WAIT	(5*HZ)
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index e5a65df4c0cd..99d737bd4325 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -15,7 +15,6 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
-#include <linux/lockd/sm_inter.h>
 #include <linux/mutex.h>
 
 #include <net/ipv6.h>
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 8ae4c02d7dfd..dfa9d80efcba 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -15,8 +15,6 @@
 #include <linux/sunrpc/xprtsock.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
-#include <linux/lockd/sm_inter.h>
-
 
 #define NLMDBG_FACILITY		NLMDBG_MONITOR
 #define NSM_PROGRAM		100024
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index bc3c3cb62db5..0b13392931a6 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -35,7 +35,6 @@
 #include <linux/sunrpc/svcsock.h>
 #include <net/ip.h>
 #include <linux/lockd/lockd.h>
-#include <linux/lockd/sm_inter.h>
 #include <linux/nfs.h>
 
 #define NLMDBG_FACILITY		NLMDBG_SVC
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index bb79a53e0608..1725037374c5 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -16,8 +16,6 @@
 #include <linux/nfsd/nfsd.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/share.h>
-#include <linux/lockd/sm_inter.h>
-
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index e44310c0211c..3688e55901fc 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -16,8 +16,6 @@
 #include <linux/nfsd/nfsd.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/share.h>
-#include <linux/lockd/sm_inter.h>
-
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 34c2766e27c7..9e4d6aab611b 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -17,7 +17,6 @@
 #include <linux/nfsd/export.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/share.h>
-#include <linux/lockd/sm_inter.h>
 #include <linux/module.h>
 #include <linux/mount.h>
 
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 4cc7d01a1eb5..0336f2beacde 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -16,7 +16,6 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/lockd/lockd.h>
-#include <linux/lockd/sm_inter.h>
 
 #define NLMDBG_FACILITY		NLMDBG_XDR
 
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 61d1714a470e..e1d528653192 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -17,7 +17,6 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/lockd/lockd.h>
-#include <linux/lockd/sm_inter.h>
 
 #define NLMDBG_FACILITY		NLMDBG_XDR
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 1ccd49e97a7f..8b57467375cc 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -205,6 +205,7 @@ extern struct svc_procedure	nlmsvc_procedures4[];
 extern int			nlmsvc_grace_period;
 extern unsigned long		nlmsvc_timeout;
 extern int			nsm_use_hostnames;
+extern int			nsm_local_state;
 
 /*
  * Lockd client functions
diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h
deleted file mode 100644
index 5cef5a79dd94..000000000000
--- a/include/linux/lockd/sm_inter.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * linux/include/linux/lockd/sm_inter.h
- *
- * Declarations for the kernel statd client.
- *
- * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_LOCKD_SM_INTER_H
-#define LINUX_LOCKD_SM_INTER_H
-
-#define SM_MAXSTRLEN	1024
-
-extern int	nsm_local_state;
-
-#endif /* LINUX_LOCKD_SM_INTER_H */
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index 6338866222a8..7dc5b6cb44cd 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -13,6 +13,7 @@
 #include <linux/nfs.h>
 #include <linux/sunrpc/xdr.h>
 
+#define SM_MAXSTRLEN		1024
 #define SM_PRIV_SIZE		16
 
 struct nsm_private {
-- 
cgit v1.2.3


From 8529bc51d30b8f001734b29b21a51b579c260f5b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 11 Dec 2008 17:56:22 -0500
Subject: NSM: Move nsm_addr() to fs/lockd/mon.c

Clean up: nsm_addr_in() is no longer used, and nsm_addr() is used only in
fs/lockd/mon.c, so move it there.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/mon.c              |  5 +++++
 include/linux/lockd/lockd.h | 10 ----------
 2 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index dfa9d80efcba..43be31c4a2de 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -55,6 +55,11 @@ static				DEFINE_SPINLOCK(nsm_lock);
  */
 int				nsm_local_state;
 
+static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm)
+{
+	return (struct sockaddr *)&nsm->sm_addr;
+}
+
 static void nsm_display_ipv4_address(const struct sockaddr *sap, char *buf,
 				     const size_t len)
 {
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 8b57467375cc..6ab0449bc828 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -112,16 +112,6 @@ static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host)
 	return (struct sockaddr *)&host->h_srcaddr;
 }
 
-static inline struct sockaddr_in *nsm_addr_in(const struct nsm_handle *handle)
-{
-	return (struct sockaddr_in *)&handle->sm_addr;
-}
-
-static inline struct sockaddr *nsm_addr(const struct nsm_handle *handle)
-{
-	return (struct sockaddr *)&handle->sm_addr;
-}
-
 /*
  * Map an fl_owner_t into a unique 32-bit "pid"
  */
-- 
cgit v1.2.3


From d1208f70738c91f13b4eadb1b7a694082e439da2 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 11 Dec 2008 17:56:44 -0500
Subject: NLM: nlm_privileged_requester() doesn't recognize mapped loopback
 address

Commit b85e4676 added the nlm_privileged_requester() helper to check
whether an RPC request was sent from a local privileged caller.  It
recognizes IPv4 privileged callers (from "127.0.0.1"), and IPv6
privileged callers (from "::1").

However, IPV6_ADDR_LOOPBACK is not set for the mapped IPv4 loopback
address (::ffff:7f00:0001), so the test breaks when the kernel's RPC
service is IPv6-enabled but user space is calling via the IPv4
loopback address.  This is actually the most common case for IPv6-
enabled RPC services on Linux.

Rewrite the IPv6 check to handle the mapped IPv4 loopback address as
well as a normal IPv6 loopback address.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 6ab0449bc828..80d7e8a8257d 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -299,8 +299,14 @@ static inline int __nlm_privileged_request4(const struct sockaddr *sap)
 static inline int __nlm_privileged_request6(const struct sockaddr *sap)
 {
 	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
-	return (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK) &&
-			(ntohs(sin6->sin6_port) < 1024);
+
+	if (ntohs(sin6->sin6_port) > 1023)
+		return 0;
+
+	if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED)
+		return ipv4_is_loopback(sin6->sin6_addr.s6_addr32[3]);
+
+	return ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK;
 }
 #else	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
 static inline int __nlm_privileged_request6(const struct sockaddr *sap)
-- 
cgit v1.2.3


From 57ef692588bc225853ca3267ca5b7cea2b07e058 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 11 Dec 2008 17:56:52 -0500
Subject: NLM: Rewrite IPv4 privileged requester's check

Clean up.

For consistency, rewrite the IPv4 check to match the same style as the
new IPv6 check.  Note that ipv4_is_loopback() is somewhat broader in
its interpretation of what is a loopback address than simply
"127.0.0.1".

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 80d7e8a8257d..aa6fe7026de7 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -291,8 +291,11 @@ static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
 static inline int __nlm_privileged_request4(const struct sockaddr *sap)
 {
 	const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
-	return (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) &&
-			(ntohs(sin->sin_port) < 1024);
+
+	if (ntohs(sin->sin_port) > 1023)
+		return 0;
+
+	return ipv4_is_loopback(sin->sin_addr.s_addr);
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-- 
cgit v1.2.3


From adf094931ffb25ef4b381559918f1a34181a5273 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 6 Oct 2008 22:46:05 +0200
Subject: PM: Simplify the new suspend/hibernation framework for devices

PM: Simplify the new suspend/hibernation framework for devices

Following the discussion at the Kernel Summit, simplify the new
device PM framework by merging 'struct pm_ops' and
'struct pm_ext_ops' and removing pointers to 'struct pm_ext_ops'
from 'struct platform_driver' and 'struct pci_driver'.

After this change, the suspend/hibernation callbacks will only
reside in 'struct device_driver' as well as at the bus type/
device class/device type level.  Accordingly, PCI and platform
device drivers are now expected to put their suspend/hibernation
callbacks into the 'struct device_driver' embedded in
'struct pci_driver' or 'struct platform_driver', respectively.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@suse.cz>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/platform.c         | 115 +++++++++++++++++++++-------------------
 drivers/base/power/main.c       |  19 +++----
 drivers/pci/pci-driver.c        |  46 +++++++---------
 drivers/usb/core/usb.c          |   4 +-
 include/linux/device.h          |   8 +--
 include/linux/pci.h             |   1 -
 include/linux/platform_device.h |   1 -
 include/linux/pm.h              |  76 +++++++++-----------------
 8 files changed, 119 insertions(+), 151 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index dfcbfe504867..6c743b6008d9 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -503,8 +503,6 @@ int platform_driver_register(struct platform_driver *drv)
 		drv->driver.suspend = platform_drv_suspend;
 	if (drv->resume)
 		drv->driver.resume = platform_drv_resume;
-	if (drv->pm)
-		drv->driver.pm = &drv->pm->base;
 	return driver_register(&drv->driver);
 }
 EXPORT_SYMBOL_GPL(platform_driver_register);
@@ -686,7 +684,10 @@ static int platform_pm_suspend(struct device *dev)
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
 
-	if (drv && drv->pm) {
+	if (!drv)
+		return 0;
+
+	if (drv->pm) {
 		if (drv->pm->suspend)
 			ret = drv->pm->suspend(dev);
 	} else {
@@ -698,16 +699,15 @@ static int platform_pm_suspend(struct device *dev)
 
 static int platform_pm_suspend_noirq(struct device *dev)
 {
-	struct platform_driver *pdrv;
+	struct device_driver *drv = dev->driver;
 	int ret = 0;
 
-	if (!dev->driver)
+	if (!drv)
 		return 0;
 
-	pdrv = to_platform_driver(dev->driver);
-	if (pdrv->pm) {
-		if (pdrv->pm->suspend_noirq)
-			ret = pdrv->pm->suspend_noirq(dev);
+	if (drv->pm) {
+		if (drv->pm->suspend_noirq)
+			ret = drv->pm->suspend_noirq(dev);
 	} else {
 		ret = platform_legacy_suspend_late(dev, PMSG_SUSPEND);
 	}
@@ -720,7 +720,10 @@ static int platform_pm_resume(struct device *dev)
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
 
-	if (drv && drv->pm) {
+	if (!drv)
+		return 0;
+
+	if (drv->pm) {
 		if (drv->pm->resume)
 			ret = drv->pm->resume(dev);
 	} else {
@@ -732,16 +735,15 @@ static int platform_pm_resume(struct device *dev)
 
 static int platform_pm_resume_noirq(struct device *dev)
 {
-	struct platform_driver *pdrv;
+	struct device_driver *drv = dev->driver;
 	int ret = 0;
 
-	if (!dev->driver)
+	if (!drv)
 		return 0;
 
-	pdrv = to_platform_driver(dev->driver);
-	if (pdrv->pm) {
-		if (pdrv->pm->resume_noirq)
-			ret = pdrv->pm->resume_noirq(dev);
+	if (drv->pm) {
+		if (drv->pm->resume_noirq)
+			ret = drv->pm->resume_noirq(dev);
 	} else {
 		ret = platform_legacy_resume_early(dev);
 	}
@@ -780,16 +782,15 @@ static int platform_pm_freeze(struct device *dev)
 
 static int platform_pm_freeze_noirq(struct device *dev)
 {
-	struct platform_driver *pdrv;
+	struct device_driver *drv = dev->driver;
 	int ret = 0;
 
-	if (!dev->driver)
+	if (!drv)
 		return 0;
 
-	pdrv = to_platform_driver(dev->driver);
-	if (pdrv->pm) {
-		if (pdrv->pm->freeze_noirq)
-			ret = pdrv->pm->freeze_noirq(dev);
+	if (drv->pm) {
+		if (drv->pm->freeze_noirq)
+			ret = drv->pm->freeze_noirq(dev);
 	} else {
 		ret = platform_legacy_suspend_late(dev, PMSG_FREEZE);
 	}
@@ -802,7 +803,10 @@ static int platform_pm_thaw(struct device *dev)
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
 
-	if (drv && drv->pm) {
+	if (!drv)
+		return 0;
+
+	if (drv->pm) {
 		if (drv->pm->thaw)
 			ret = drv->pm->thaw(dev);
 	} else {
@@ -814,16 +818,15 @@ static int platform_pm_thaw(struct device *dev)
 
 static int platform_pm_thaw_noirq(struct device *dev)
 {
-	struct platform_driver *pdrv;
+	struct device_driver *drv = dev->driver;
 	int ret = 0;
 
-	if (!dev->driver)
+	if (!drv)
 		return 0;
 
-	pdrv = to_platform_driver(dev->driver);
-	if (pdrv->pm) {
-		if (pdrv->pm->thaw_noirq)
-			ret = pdrv->pm->thaw_noirq(dev);
+	if (drv->pm) {
+		if (drv->pm->thaw_noirq)
+			ret = drv->pm->thaw_noirq(dev);
 	} else {
 		ret = platform_legacy_resume_early(dev);
 	}
@@ -836,7 +839,10 @@ static int platform_pm_poweroff(struct device *dev)
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
 
-	if (drv && drv->pm) {
+	if (!drv)
+		return 0;
+
+	if (drv->pm) {
 		if (drv->pm->poweroff)
 			ret = drv->pm->poweroff(dev);
 	} else {
@@ -848,16 +854,15 @@ static int platform_pm_poweroff(struct device *dev)
 
 static int platform_pm_poweroff_noirq(struct device *dev)
 {
-	struct platform_driver *pdrv;
+	struct device_driver *drv = dev->driver;
 	int ret = 0;
 
-	if (!dev->driver)
+	if (!drv)
 		return 0;
 
-	pdrv = to_platform_driver(dev->driver);
-	if (pdrv->pm) {
-		if (pdrv->pm->poweroff_noirq)
-			ret = pdrv->pm->poweroff_noirq(dev);
+	if (drv->pm) {
+		if (drv->pm->poweroff_noirq)
+			ret = drv->pm->poweroff_noirq(dev);
 	} else {
 		ret = platform_legacy_suspend_late(dev, PMSG_HIBERNATE);
 	}
@@ -870,7 +875,10 @@ static int platform_pm_restore(struct device *dev)
 	struct device_driver *drv = dev->driver;
 	int ret = 0;
 
-	if (drv && drv->pm) {
+	if (!drv)
+		return 0;
+
+	if (drv->pm) {
 		if (drv->pm->restore)
 			ret = drv->pm->restore(dev);
 	} else {
@@ -882,16 +890,15 @@ static int platform_pm_restore(struct device *dev)
 
 static int platform_pm_restore_noirq(struct device *dev)
 {
-	struct platform_driver *pdrv;
+	struct device_driver *drv = dev->driver;
 	int ret = 0;
 
-	if (!dev->driver)
+	if (!drv)
 		return 0;
 
-	pdrv = to_platform_driver(dev->driver);
-	if (pdrv->pm) {
-		if (pdrv->pm->restore_noirq)
-			ret = pdrv->pm->restore_noirq(dev);
+	if (drv->pm) {
+		if (drv->pm->restore_noirq)
+			ret = drv->pm->restore_noirq(dev);
 	} else {
 		ret = platform_legacy_resume_early(dev);
 	}
@@ -912,17 +919,15 @@ static int platform_pm_restore_noirq(struct device *dev)
 
 #endif /* !CONFIG_HIBERNATION */
 
-static struct pm_ext_ops platform_pm_ops = {
-	.base = {
-		.prepare = platform_pm_prepare,
-		.complete = platform_pm_complete,
-		.suspend = platform_pm_suspend,
-		.resume = platform_pm_resume,
-		.freeze = platform_pm_freeze,
-		.thaw = platform_pm_thaw,
-		.poweroff = platform_pm_poweroff,
-		.restore = platform_pm_restore,
-	},
+static struct dev_pm_ops platform_dev_pm_ops = {
+	.prepare = platform_pm_prepare,
+	.complete = platform_pm_complete,
+	.suspend = platform_pm_suspend,
+	.resume = platform_pm_resume,
+	.freeze = platform_pm_freeze,
+	.thaw = platform_pm_thaw,
+	.poweroff = platform_pm_poweroff,
+	.restore = platform_pm_restore,
 	.suspend_noirq = platform_pm_suspend_noirq,
 	.resume_noirq = platform_pm_resume_noirq,
 	.freeze_noirq = platform_pm_freeze_noirq,
@@ -931,7 +936,7 @@ static struct pm_ext_ops platform_pm_ops = {
 	.restore_noirq = platform_pm_restore_noirq,
 };
 
-#define PLATFORM_PM_OPS_PTR	&platform_pm_ops
+#define PLATFORM_PM_OPS_PTR	(&platform_dev_pm_ops)
 
 #else /* !CONFIG_PM_SLEEP */
 
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 692c20ba5144..a8e4dcbcaf7a 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -112,7 +112,8 @@ void device_pm_remove(struct device *dev)
  *	@ops:	PM operations to choose from.
  *	@state:	PM transition of the system being carried out.
  */
-static int pm_op(struct device *dev, struct pm_ops *ops, pm_message_t state)
+static int pm_op(struct device *dev, struct dev_pm_ops *ops,
+			pm_message_t state)
 {
 	int error = 0;
 
@@ -174,7 +175,7 @@ static int pm_op(struct device *dev, struct pm_ops *ops, pm_message_t state)
  *	The operation is executed with interrupts disabled by the only remaining
  *	functional CPU in the system.
  */
-static int pm_noirq_op(struct device *dev, struct pm_ext_ops *ops,
+static int pm_noirq_op(struct device *dev, struct dev_pm_ops *ops,
 			pm_message_t state)
 {
 	int error = 0;
@@ -354,7 +355,7 @@ static int resume_device(struct device *dev, pm_message_t state)
 	if (dev->bus) {
 		if (dev->bus->pm) {
 			pm_dev_dbg(dev, state, "");
-			error = pm_op(dev, &dev->bus->pm->base, state);
+			error = pm_op(dev, dev->bus->pm, state);
 		} else if (dev->bus->resume) {
 			pm_dev_dbg(dev, state, "legacy ");
 			error = dev->bus->resume(dev);
@@ -451,9 +452,9 @@ static void complete_device(struct device *dev, pm_message_t state)
 		dev->type->pm->complete(dev);
 	}
 
-	if (dev->bus && dev->bus->pm && dev->bus->pm->base.complete) {
+	if (dev->bus && dev->bus->pm && dev->bus->pm->complete) {
 		pm_dev_dbg(dev, state, "completing ");
-		dev->bus->pm->base.complete(dev);
+		dev->bus->pm->complete(dev);
 	}
 
 	up(&dev->sem);
@@ -624,7 +625,7 @@ static int suspend_device(struct device *dev, pm_message_t state)
 	if (dev->bus) {
 		if (dev->bus->pm) {
 			pm_dev_dbg(dev, state, "");
-			error = pm_op(dev, &dev->bus->pm->base, state);
+			error = pm_op(dev, dev->bus->pm, state);
 		} else if (dev->bus->suspend) {
 			pm_dev_dbg(dev, state, "legacy ");
 			error = dev->bus->suspend(dev, state);
@@ -685,10 +686,10 @@ static int prepare_device(struct device *dev, pm_message_t state)
 
 	down(&dev->sem);
 
-	if (dev->bus && dev->bus->pm && dev->bus->pm->base.prepare) {
+	if (dev->bus && dev->bus->pm && dev->bus->pm->prepare) {
 		pm_dev_dbg(dev, state, "preparing ");
-		error = dev->bus->pm->base.prepare(dev);
-		suspend_report_result(dev->bus->pm->base.prepare, error);
+		error = dev->bus->pm->prepare(dev);
+		suspend_report_result(dev->bus->pm->prepare, error);
 		if (error)
 			goto End;
 	}
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index b4cdd690ae71..4042d211c3e5 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -433,8 +433,7 @@ static int pci_pm_suspend(struct device *dev)
 
 static int pci_pm_suspend_noirq(struct device *dev)
 {
-	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct pci_driver *drv = pci_dev->driver;
+	struct device_driver *drv = dev->driver;
 	int error = 0;
 
 	if (drv && drv->pm) {
@@ -469,11 +468,10 @@ static int pci_pm_resume(struct device *dev)
 
 static int pci_pm_resume_noirq(struct device *dev)
 {
-	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct pci_driver *drv = pci_dev->driver;
+	struct device_driver *drv = dev->driver;
 	int error = 0;
 
-	pci_fixup_device(pci_fixup_resume_early, pci_dev);
+	pci_fixup_device(pci_fixup_resume_early, to_pci_dev(dev));
 
 	if (drv && drv->pm) {
 		if (drv->pm->resume_noirq)
@@ -519,8 +517,7 @@ static int pci_pm_freeze(struct device *dev)
 
 static int pci_pm_freeze_noirq(struct device *dev)
 {
-	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct pci_driver *drv = pci_dev->driver;
+	struct device_driver *drv = dev->driver;
 	int error = 0;
 
 	if (drv && drv->pm) {
@@ -553,15 +550,14 @@ static int pci_pm_thaw(struct device *dev)
 
 static int pci_pm_thaw_noirq(struct device *dev)
 {
-	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct pci_driver *drv = pci_dev->driver;
+	struct device_driver *drv = dev->driver;
 	int error = 0;
 
 	if (drv && drv->pm) {
 		if (drv->pm->thaw_noirq)
 			error = drv->pm->thaw_noirq(dev);
 	} else {
-		pci_fixup_device(pci_fixup_resume_early, pci_dev);
+		pci_fixup_device(pci_fixup_resume_early, to_pci_dev(dev));
 		error = pci_legacy_resume_early(dev);
 	}
 
@@ -589,8 +585,7 @@ static int pci_pm_poweroff(struct device *dev)
 
 static int pci_pm_poweroff_noirq(struct device *dev)
 {
-	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct pci_driver *drv = pci_dev->driver;
+	struct device_driver *drv = dev->driver;
 	int error = 0;
 
 	if (drv && drv->pm) {
@@ -625,7 +620,7 @@ static int pci_pm_restore(struct device *dev)
 static int pci_pm_restore_noirq(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct pci_driver *drv = pci_dev->driver;
+	struct device_driver *drv = dev->driver;
 	int error = 0;
 
 	pci_fixup_device(pci_fixup_resume, pci_dev);
@@ -654,17 +649,15 @@ static int pci_pm_restore_noirq(struct device *dev)
 
 #endif /* !CONFIG_HIBERNATION */
 
-struct pm_ext_ops pci_pm_ops = {
-	.base = {
-		.prepare = pci_pm_prepare,
-		.complete = pci_pm_complete,
-		.suspend = pci_pm_suspend,
-		.resume = pci_pm_resume,
-		.freeze = pci_pm_freeze,
-		.thaw = pci_pm_thaw,
-		.poweroff = pci_pm_poweroff,
-		.restore = pci_pm_restore,
-	},
+struct dev_pm_ops pci_dev_pm_ops = {
+	.prepare = pci_pm_prepare,
+	.complete = pci_pm_complete,
+	.suspend = pci_pm_suspend,
+	.resume = pci_pm_resume,
+	.freeze = pci_pm_freeze,
+	.thaw = pci_pm_thaw,
+	.poweroff = pci_pm_poweroff,
+	.restore = pci_pm_restore,
 	.suspend_noirq = pci_pm_suspend_noirq,
 	.resume_noirq = pci_pm_resume_noirq,
 	.freeze_noirq = pci_pm_freeze_noirq,
@@ -673,7 +666,7 @@ struct pm_ext_ops pci_pm_ops = {
 	.restore_noirq = pci_pm_restore_noirq,
 };
 
-#define PCI_PM_OPS_PTR	&pci_pm_ops
+#define PCI_PM_OPS_PTR	(&pci_dev_pm_ops)
 
 #else /* !CONFIG_PM_SLEEP */
 
@@ -703,9 +696,6 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner,
 	drv->driver.owner = owner;
 	drv->driver.mod_name = mod_name;
 
-	if (drv->pm)
-		drv->driver.pm = &drv->pm->base;
-
 	spin_lock_init(&drv->dynids.lock);
 	INIT_LIST_HEAD(&drv->dynids.list);
 
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index be1fa0723f2c..399e15fc5052 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -286,7 +286,7 @@ static int usb_dev_restore(struct device *dev)
 	return usb_resume(dev);
 }
 
-static struct pm_ops usb_device_pm_ops = {
+static struct dev_pm_ops usb_device_pm_ops = {
 	.prepare =	usb_dev_prepare,
 	.complete =	usb_dev_complete,
 	.suspend =	usb_dev_suspend,
@@ -301,7 +301,7 @@ static struct pm_ops usb_device_pm_ops = {
 
 #define ksuspend_usb_init()	0
 #define ksuspend_usb_cleanup()	do {} while (0)
-#define usb_device_pm_ops	(*(struct pm_ops *)0)
+#define usb_device_pm_ops	(*(struct dev_pm_ops *)0)
 
 #endif	/* CONFIG_PM */
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 1a3686d15f98..4a520051c315 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -65,7 +65,7 @@ struct bus_type {
 	int (*resume_early)(struct device *dev);
 	int (*resume)(struct device *dev);
 
-	struct pm_ext_ops *pm;
+	struct dev_pm_ops *pm;
 
 	struct bus_type_private *p;
 };
@@ -133,7 +133,7 @@ struct device_driver {
 	int (*resume) (struct device *dev);
 	struct attribute_group **groups;
 
-	struct pm_ops *pm;
+	struct dev_pm_ops *pm;
 
 	struct driver_private *p;
 };
@@ -198,7 +198,7 @@ struct class {
 	int (*suspend)(struct device *dev, pm_message_t state);
 	int (*resume)(struct device *dev);
 
-	struct pm_ops *pm;
+	struct dev_pm_ops *pm;
 	struct class_private *p;
 };
 
@@ -291,7 +291,7 @@ struct device_type {
 	int (*suspend)(struct device *dev, pm_message_t state);
 	int (*resume)(struct device *dev);
 
-	struct pm_ops *pm;
+	struct dev_pm_ops *pm;
 };
 
 /* interface for exporting device attributes */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 03b0b8c3c81b..4bb156ba854a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -421,7 +421,6 @@ struct pci_driver {
 	int  (*resume_early) (struct pci_dev *dev);
 	int  (*resume) (struct pci_dev *dev);	                /* Device woken up */
 	void (*shutdown) (struct pci_dev *dev);
-	struct pm_ext_ops *pm;
 	struct pci_error_handlers *err_handler;
 	struct device_driver	driver;
 	struct pci_dynids dynids;
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 4b8cc6a32479..9a342699c607 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -55,7 +55,6 @@ struct platform_driver {
 	int (*suspend_late)(struct platform_device *, pm_message_t state);
 	int (*resume_early)(struct platform_device *);
 	int (*resume)(struct platform_device *);
-	struct pm_ext_ops *pm;
 	struct device_driver driver;
 };
 
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 42de4003c4ee..5785666d0cc4 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -41,7 +41,7 @@ typedef struct pm_message {
 } pm_message_t;
 
 /**
- * struct pm_ops - device PM callbacks
+ * struct dev_pm_ops - device PM callbacks
  *
  * Several driver power state transitions are externally visible, affecting
  * the state of pending I/O queues and (for drivers that touch hardware)
@@ -126,46 +126,6 @@ typedef struct pm_message {
  *	On most platforms, there are no restrictions on availability of
  *	resources like clocks during @restore().
  *
- * All of the above callbacks, except for @complete(), return error codes.
- * However, the error codes returned by the resume operations, @resume(),
- * @thaw(), and @restore(), do not cause the PM core to abort the resume
- * transition during which they are returned.  The error codes returned in
- * that cases are only printed by the PM core to the system logs for debugging
- * purposes.  Still, it is recommended that drivers only return error codes
- * from their resume methods in case of an unrecoverable failure (i.e. when the
- * device being handled refuses to resume and becomes unusable) to allow us to
- * modify the PM core in the future, so that it can avoid attempting to handle
- * devices that failed to resume and their children.
- *
- * It is allowed to unregister devices while the above callbacks are being
- * executed.  However, it is not allowed to unregister a device from within any
- * of its own callbacks.
- */
-
-struct pm_ops {
-	int (*prepare)(struct device *dev);
-	void (*complete)(struct device *dev);
-	int (*suspend)(struct device *dev);
-	int (*resume)(struct device *dev);
-	int (*freeze)(struct device *dev);
-	int (*thaw)(struct device *dev);
-	int (*poweroff)(struct device *dev);
-	int (*restore)(struct device *dev);
-};
-
-/**
- * struct pm_ext_ops - extended device PM callbacks
- *
- * Some devices require certain operations related to suspend and hibernation
- * to be carried out with interrupts disabled.  Thus, 'struct pm_ext_ops' below
- * is defined, adding callbacks to be executed with interrupts disabled to
- * 'struct pm_ops'.
- *
- * The following callbacks included in 'struct pm_ext_ops' are executed with
- * the nonboot CPUs switched off and with interrupts disabled on the only
- * functional CPU.  They also are executed with the PM core list of devices
- * locked, so they must NOT unregister any devices.
- *
  * @suspend_noirq: Complete the operations of ->suspend() by carrying out any
  *	actions required for suspending the device that need interrupts to be
  *	disabled
@@ -190,18 +150,32 @@ struct pm_ops {
  *	actions required for restoring the operations of the device that need
  *	interrupts to be disabled
  *
- * All of the above callbacks return error codes, but the error codes returned
- * by the resume operations, @resume_noirq(), @thaw_noirq(), and
- * @restore_noirq(), do not cause the PM core to abort the resume transition
- * during which they are returned.  The error codes returned in that cases are
- * only printed by the PM core to the system logs for debugging purposes.
- * Still, as stated above, it is recommended that drivers only return error
- * codes from their resume methods if the device being handled fails to resume
- * and is not usable any more.
+ * All of the above callbacks, except for @complete(), return error codes.
+ * However, the error codes returned by the resume operations, @resume(),
+ * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq() do
+ * not cause the PM core to abort the resume transition during which they are
+ * returned.  The error codes returned in that cases are only printed by the PM
+ * core to the system logs for debugging purposes.  Still, it is recommended
+ * that drivers only return error codes from their resume methods in case of an
+ * unrecoverable failure (i.e. when the device being handled refuses to resume
+ * and becomes unusable) to allow us to modify the PM core in the future, so
+ * that it can avoid attempting to handle devices that failed to resume and
+ * their children.
+ *
+ * It is allowed to unregister devices while the above callbacks are being
+ * executed.  However, it is not allowed to unregister a device from within any
+ * of its own callbacks.
  */
 
-struct pm_ext_ops {
-	struct pm_ops base;
+struct dev_pm_ops {
+	int (*prepare)(struct device *dev);
+	void (*complete)(struct device *dev);
+	int (*suspend)(struct device *dev);
+	int (*resume)(struct device *dev);
+	int (*freeze)(struct device *dev);
+	int (*thaw)(struct device *dev);
+	int (*poweroff)(struct device *dev);
+	int (*restore)(struct device *dev);
 	int (*suspend_noirq)(struct device *dev);
 	int (*resume_noirq)(struct device *dev);
 	int (*freeze_noirq)(struct device *dev);
-- 
cgit v1.2.3


From 7f4f5d4516b441d712fa0ffe5380618fb7fc545e Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 17 Nov 2008 11:14:19 -0500
Subject: Fix misspellings in pm.h macros

This patch (as1167) fixes some misspellings in various recently-added
macros in pm.h.  Fortunately these macros are not yet used anywhere.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/pm.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 5785666d0cc4..de2e0a8f6728 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -252,7 +252,7 @@ struct dev_pm_ops {
 #define PM_EVENT_SLEEP		(PM_EVENT_SUSPEND | PM_EVENT_HIBERNATE)
 #define PM_EVENT_USER_SUSPEND	(PM_EVENT_USER | PM_EVENT_SUSPEND)
 #define PM_EVENT_USER_RESUME	(PM_EVENT_USER | PM_EVENT_RESUME)
-#define PM_EVENT_REMOTE_WAKEUP	(PM_EVENT_REMOTE | PM_EVENT_RESUME)
+#define PM_EVENT_REMOTE_RESUME	(PM_EVENT_REMOTE | PM_EVENT_RESUME)
 #define PM_EVENT_AUTO_SUSPEND	(PM_EVENT_AUTO | PM_EVENT_SUSPEND)
 #define PM_EVENT_AUTO_RESUME	(PM_EVENT_AUTO | PM_EVENT_RESUME)
 
@@ -265,15 +265,15 @@ struct dev_pm_ops {
 #define PMSG_THAW	((struct pm_message){ .event = PM_EVENT_THAW, })
 #define PMSG_RESTORE	((struct pm_message){ .event = PM_EVENT_RESTORE, })
 #define PMSG_RECOVER	((struct pm_message){ .event = PM_EVENT_RECOVER, })
-#define PMSG_USER_SUSPEND	((struct pm_messge) \
+#define PMSG_USER_SUSPEND	((struct pm_message) \
 					{ .event = PM_EVENT_USER_SUSPEND, })
-#define PMSG_USER_RESUME	((struct pm_messge) \
+#define PMSG_USER_RESUME	((struct pm_message) \
 					{ .event = PM_EVENT_USER_RESUME, })
-#define PMSG_REMOTE_RESUME	((struct pm_messge) \
+#define PMSG_REMOTE_RESUME	((struct pm_message) \
 					{ .event = PM_EVENT_REMOTE_RESUME, })
-#define PMSG_AUTO_SUSPEND	((struct pm_messge) \
+#define PMSG_AUTO_SUSPEND	((struct pm_message) \
 					{ .event = PM_EVENT_AUTO_SUSPEND, })
-#define PMSG_AUTO_RESUME		((struct pm_messge) \
+#define PMSG_AUTO_RESUME	((struct pm_message) \
 					{ .event = PM_EVENT_AUTO_RESUME, })
 
 /**
-- 
cgit v1.2.3


From 929d2fa5955ab27aa21fac615b23e0e92e8dc3a0 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Thu, 16 Oct 2008 15:51:35 -0600
Subject: driver core: Rearrange struct device for better packing

This minor rearrangement saves 16 bytes from sizeof(struct device)
according to pahole.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 4a520051c315..4e14fad41430 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -373,9 +373,9 @@ struct device {
 
 	struct kobject kobj;
 	char	bus_id[BUS_ID_SIZE];	/* position on parent bus */
+	unsigned		uevent_suppress:1;
 	const char		*init_name; /* initial name of the device */
 	struct device_type	*type;
-	unsigned		uevent_suppress:1;
 
 	struct semaphore	sem;	/* semaphore to synchronize calls to
 					 * its driver.
@@ -408,12 +408,13 @@ struct device {
 	/* arch specific additions */
 	struct dev_archdata	archdata;
 
+	dev_t			devt;	/* dev_t, creates the sysfs "dev" */
+
 	spinlock_t		devres_lock;
 	struct list_head	devres_head;
 
 	struct klist_node	knode_class;
 	struct class		*class;
-	dev_t			devt;	/* dev_t, creates the sysfs "dev" */
 	struct attribute_group	**groups;	/* optional groups */
 
 	void	(*release)(struct device *dev);
-- 
cgit v1.2.3


From 210272a28465a7a31bcd580d2f9529f924965aa5 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Thu, 16 Oct 2008 14:57:54 -0600
Subject: driver core: Remove completion from struct klist_node

Removing the completion from klist_node reduces its size from 64 bytes
to 28 on x86-64.  To maintain the semantics of klist_remove(), we add
a single list of klist nodes which are pending deletion and scan them.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/klist.h |  2 --
 lib/klist.c           | 43 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 40 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/klist.h b/include/linux/klist.h
index 8ea98db223e5..d5a27af9dba5 100644
--- a/include/linux/klist.h
+++ b/include/linux/klist.h
@@ -13,7 +13,6 @@
 #define _LINUX_KLIST_H
 
 #include <linux/spinlock.h>
-#include <linux/completion.h>
 #include <linux/kref.h>
 #include <linux/list.h>
 
@@ -41,7 +40,6 @@ struct klist_node {
 	void			*n_klist;	/* never access directly */
 	struct list_head	n_node;
 	struct kref		n_ref;
-	struct completion	n_removed;
 };
 
 extern void klist_add_tail(struct klist_node *n, struct klist *k);
diff --git a/lib/klist.c b/lib/klist.c
index bbdd3015c2c7..573d6068a42e 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -36,6 +36,7 @@
 
 #include <linux/klist.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 
 /*
  * Use the lowest bit of n_klist to mark deleted nodes and exclude
@@ -108,7 +109,6 @@ static void add_tail(struct klist *k, struct klist_node *n)
 static void klist_node_init(struct klist *k, struct klist_node *n)
 {
 	INIT_LIST_HEAD(&n->n_node);
-	init_completion(&n->n_removed);
 	kref_init(&n->n_ref);
 	knode_set_klist(n, k);
 	if (k->get)
@@ -171,13 +171,34 @@ void klist_add_before(struct klist_node *n, struct klist_node *pos)
 }
 EXPORT_SYMBOL_GPL(klist_add_before);
 
+struct klist_waiter {
+	struct list_head list;
+	struct klist_node *node;
+	struct task_struct *process;
+	int woken;
+};
+
+static DEFINE_SPINLOCK(klist_remove_lock);
+static LIST_HEAD(klist_remove_waiters);
+
 static void klist_release(struct kref *kref)
 {
+	struct klist_waiter *waiter, *tmp;
 	struct klist_node *n = container_of(kref, struct klist_node, n_ref);
 
 	WARN_ON(!knode_dead(n));
 	list_del(&n->n_node);
-	complete(&n->n_removed);
+	spin_lock(&klist_remove_lock);
+	list_for_each_entry_safe(waiter, tmp, &klist_remove_waiters, list) {
+		if (waiter->node != n)
+			continue;
+
+		waiter->woken = 1;
+		mb();
+		wake_up_process(waiter->process);
+		list_del(&waiter->list);
+	}
+	spin_unlock(&klist_remove_lock);
 	knode_set_klist(n, NULL);
 }
 
@@ -217,8 +238,24 @@ EXPORT_SYMBOL_GPL(klist_del);
  */
 void klist_remove(struct klist_node *n)
 {
+	struct klist_waiter waiter;
+
+	waiter.node = n;
+	waiter.process = current;
+	waiter.woken = 0;
+	spin_lock(&klist_remove_lock);
+	list_add(&waiter.list, &klist_remove_waiters);
+	spin_unlock(&klist_remove_lock);
+
 	klist_del(n);
-	wait_for_completion(&n->n_removed);
+
+	for (;;) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (waiter.woken)
+			break;
+		schedule();
+	}
+	__set_current_state(TASK_RUNNING);
 }
 EXPORT_SYMBOL_GPL(klist_remove);
 
-- 
cgit v1.2.3


From 2831fe6f9cc4e16c103504ee09a47a084297c0f3 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Tue, 16 Dec 2008 12:23:36 -0800
Subject: driver core: create a private portion of struct device

This is to be used to move things out of struct device that no code
outside of the driver core should ever touch.

Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/base.h    | 12 ++++++++++++
 drivers/base/core.c    |  8 ++++++++
 include/linux/device.h |  3 +++
 3 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/base.h b/drivers/base/base.h
index 0a5f055dffba..6b20809b5fd4 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -63,6 +63,18 @@ struct class_private {
 #define to_class(obj)	\
 	container_of(obj, struct class_private, class_subsys.kobj)
 
+/**
+ * struct device_private - structure to hold the private to the driver core portions of the device structure.
+ *
+ * @device - pointer back to the struct class that this structure is
+ * associated with.
+ *
+ * Nothing outside of the driver core should ever touch these fields.
+ */
+struct device_private {
+	struct device *device;
+};
+
 /* initialisation functions */
 extern int devices_init(void);
 extern int buses_init(void);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 14aa2b6953c9..6657787e64f8 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -109,6 +109,7 @@ static struct sysfs_ops dev_sysfs_ops = {
 static void device_release(struct kobject *kobj)
 {
 	struct device *dev = to_dev(kobj);
+	struct device_private *p = dev->p;
 
 	if (dev->release)
 		dev->release(dev);
@@ -120,6 +121,7 @@ static void device_release(struct kobject *kobj)
 		WARN(1, KERN_ERR "Device '%s' does not have a release() "
 			"function, it is broken and must be fixed.\n",
 			dev_name(dev));
+	kfree(p);
 }
 
 static struct kobj_type device_ktype = {
@@ -536,6 +538,12 @@ static void klist_children_put(struct klist_node *n)
  */
 void device_initialize(struct device *dev)
 {
+	dev->p = kzalloc(sizeof(*dev->p), GFP_KERNEL);
+	if (!dev->p) {
+		WARN_ON(1);
+		return;
+	}
+	dev->p->device = dev;
 	dev->kobj.kset = devices_kset;
 	kobject_init(&dev->kobj, &device_ktype);
 	klist_init(&dev->klist_children, klist_children_get,
diff --git a/include/linux/device.h b/include/linux/device.h
index 4e14fad41430..d6d34084fd37 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -28,6 +28,7 @@
 #define BUS_ID_SIZE		20
 
 struct device;
+struct device_private;
 struct device_driver;
 struct driver_private;
 struct class;
@@ -371,6 +372,8 @@ struct device {
 	struct klist_node	knode_bus;
 	struct device		*parent;
 
+	struct device_private	*p;
+
 	struct kobject kobj;
 	char	bus_id[BUS_ID_SIZE];	/* position on parent bus */
 	unsigned		uevent_suppress:1;
-- 
cgit v1.2.3


From 11c3b5c3e08f4d855cbef52883c266b9ab9df879 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Tue, 16 Dec 2008 12:24:56 -0800
Subject: driver core: move klist_children into private structure

Nothing outside of the driver core should ever touch klist_children, or
knode_parent, so move them out of the public eye.

Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/base.h    |  6 ++++++
 drivers/base/core.c    | 37 ++++++++++++++++++++++++-------------
 include/linux/device.h |  2 --
 3 files changed, 30 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/base.h b/drivers/base/base.h
index 6b20809b5fd4..f5cf31c664d7 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -66,14 +66,20 @@ struct class_private {
 /**
  * struct device_private - structure to hold the private to the driver core portions of the device structure.
  *
+ * @klist_children - klist containing all children of this device
+ * @knode_parent - node in sibling list
  * @device - pointer back to the struct class that this structure is
  * associated with.
  *
  * Nothing outside of the driver core should ever touch these fields.
  */
 struct device_private {
+	struct klist klist_children;
+	struct klist_node knode_parent;
 	struct device *device;
 };
+#define to_device_private_parent(obj)	\
+	container_of(obj, struct device_private, knode_parent)
 
 /* initialisation functions */
 extern int devices_init(void);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 6657787e64f8..180ff84ea26c 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -509,14 +509,16 @@ EXPORT_SYMBOL_GPL(device_schedule_callback_owner);
 
 static void klist_children_get(struct klist_node *n)
 {
-	struct device *dev = container_of(n, struct device, knode_parent);
+	struct device_private *p = to_device_private_parent(n);
+	struct device *dev = p->device;
 
 	get_device(dev);
 }
 
 static void klist_children_put(struct klist_node *n)
 {
-	struct device *dev = container_of(n, struct device, knode_parent);
+	struct device_private *p = to_device_private_parent(n);
+	struct device *dev = p->device;
 
 	put_device(dev);
 }
@@ -546,7 +548,7 @@ void device_initialize(struct device *dev)
 	dev->p->device = dev;
 	dev->kobj.kset = devices_kset;
 	kobject_init(&dev->kobj, &device_ktype);
-	klist_init(&dev->klist_children, klist_children_get,
+	klist_init(&dev->p->klist_children, klist_children_get,
 		   klist_children_put);
 	INIT_LIST_HEAD(&dev->dma_pools);
 	init_MUTEX(&dev->sem);
@@ -927,7 +929,8 @@ int device_add(struct device *dev)
 	kobject_uevent(&dev->kobj, KOBJ_ADD);
 	bus_attach_device(dev);
 	if (parent)
-		klist_add_tail(&dev->knode_parent, &parent->klist_children);
+		klist_add_tail(&dev->p->knode_parent,
+			       &parent->p->klist_children);
 
 	if (dev->class) {
 		mutex_lock(&dev->class->p->class_mutex);
@@ -1038,7 +1041,7 @@ void device_del(struct device *dev)
 	device_pm_remove(dev);
 	dpm_sysfs_remove(dev);
 	if (parent)
-		klist_del(&dev->knode_parent);
+		klist_del(&dev->p->knode_parent);
 	if (MAJOR(dev->devt)) {
 		device_remove_sys_dev_entry(dev);
 		device_remove_file(dev, &devt_attr);
@@ -1102,7 +1105,14 @@ void device_unregister(struct device *dev)
 static struct device *next_device(struct klist_iter *i)
 {
 	struct klist_node *n = klist_next(i);
-	return n ? container_of(n, struct device, knode_parent) : NULL;
+	struct device *dev = NULL;
+	struct device_private *p;
+
+	if (n) {
+		p = to_device_private_parent(n);
+		dev = p->device;
+	}
+	return dev;
 }
 
 /**
@@ -1124,7 +1134,7 @@ int device_for_each_child(struct device *parent, void *data,
 	struct device *child;
 	int error = 0;
 
-	klist_iter_init(&parent->klist_children, &i);
+	klist_iter_init(&parent->p->klist_children, &i);
 	while ((child = next_device(&i)) && !error)
 		error = fn(child, data);
 	klist_iter_exit(&i);
@@ -1155,7 +1165,7 @@ struct device *device_find_child(struct device *parent, void *data,
 	if (!parent)
 		return NULL;
 
-	klist_iter_init(&parent->klist_children, &i);
+	klist_iter_init(&parent->p->klist_children, &i);
 	while ((child = next_device(&i)))
 		if (match(child, data) && get_device(child))
 			break;
@@ -1478,9 +1488,10 @@ int device_move(struct device *dev, struct device *new_parent)
 	old_parent = dev->parent;
 	dev->parent = new_parent;
 	if (old_parent)
-		klist_remove(&dev->knode_parent);
+		klist_remove(&dev->p->knode_parent);
 	if (new_parent) {
-		klist_add_tail(&dev->knode_parent, &new_parent->klist_children);
+		klist_add_tail(&dev->p->knode_parent,
+			       &new_parent->p->klist_children);
 		set_dev_node(dev, dev_to_node(new_parent));
 	}
 
@@ -1492,11 +1503,11 @@ int device_move(struct device *dev, struct device *new_parent)
 		device_move_class_links(dev, new_parent, old_parent);
 		if (!kobject_move(&dev->kobj, &old_parent->kobj)) {
 			if (new_parent)
-				klist_remove(&dev->knode_parent);
+				klist_remove(&dev->p->knode_parent);
 			dev->parent = old_parent;
 			if (old_parent) {
-				klist_add_tail(&dev->knode_parent,
-					       &old_parent->klist_children);
+				klist_add_tail(&dev->p->knode_parent,
+					       &old_parent->p->klist_children);
 				set_dev_node(dev, dev_to_node(old_parent));
 			}
 		}
diff --git a/include/linux/device.h b/include/linux/device.h
index d6d34084fd37..60423e687205 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -366,8 +366,6 @@ struct device_dma_parameters {
 };
 
 struct device {
-	struct klist		klist_children;
-	struct klist_node	knode_parent;	/* node in sibling list */
 	struct klist_node	knode_driver;
 	struct klist_node	knode_bus;
 	struct device		*parent;
-- 
cgit v1.2.3


From 93e746db183b3bdbbda67900f79b5835f9cb388f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Tue, 16 Dec 2008 12:25:49 -0800
Subject: driver core: move knode_driver into private structure

Nothing outside of the driver core should ever touch knode_driver, so
move it out of the public eye.

Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/base.h    |  4 ++++
 drivers/base/dd.c      | 13 ++++++++-----
 drivers/base/driver.c  | 13 ++++++++++---
 include/linux/device.h |  1 -
 4 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/base.h b/drivers/base/base.h
index f5cf31c664d7..8af0bb2c0aa8 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -68,6 +68,7 @@ struct class_private {
  *
  * @klist_children - klist containing all children of this device
  * @knode_parent - node in sibling list
+ * @knode_driver - node in driver list
  * @device - pointer back to the struct class that this structure is
  * associated with.
  *
@@ -76,10 +77,13 @@ struct class_private {
 struct device_private {
 	struct klist klist_children;
 	struct klist_node knode_parent;
+	struct klist_node knode_driver;
 	struct device *device;
 };
 #define to_device_private_parent(obj)	\
 	container_of(obj, struct device_private, knode_parent)
+#define to_device_private_driver(obj)	\
+	container_of(obj, struct device_private, knode_driver)
 
 /* initialisation functions */
 extern int devices_init(void);
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 17a8e45cf9c6..bb5f1eb83c03 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -28,7 +28,7 @@
 
 static void driver_bound(struct device *dev)
 {
-	if (klist_node_attached(&dev->knode_driver)) {
+	if (klist_node_attached(&dev->p->knode_driver)) {
 		printk(KERN_WARNING "%s: device %s already bound\n",
 			__func__, kobject_name(&dev->kobj));
 		return;
@@ -41,7 +41,7 @@ static void driver_bound(struct device *dev)
 		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
 					     BUS_NOTIFY_BOUND_DRIVER, dev);
 
-	klist_add_tail(&dev->knode_driver, &dev->driver->p->klist_devices);
+	klist_add_tail(&dev->p->knode_driver, &dev->driver->p->klist_devices);
 }
 
 static int driver_sysfs_add(struct device *dev)
@@ -311,7 +311,7 @@ static void __device_release_driver(struct device *dev)
 			drv->remove(dev);
 		devres_release_all(dev);
 		dev->driver = NULL;
-		klist_remove(&dev->knode_driver);
+		klist_remove(&dev->p->knode_driver);
 	}
 }
 
@@ -341,6 +341,7 @@ EXPORT_SYMBOL_GPL(device_release_driver);
  */
 void driver_detach(struct device_driver *drv)
 {
+	struct device_private *dev_prv;
 	struct device *dev;
 
 	for (;;) {
@@ -349,8 +350,10 @@ void driver_detach(struct device_driver *drv)
 			spin_unlock(&drv->p->klist_devices.k_lock);
 			break;
 		}
-		dev = list_entry(drv->p->klist_devices.k_list.prev,
-				struct device, knode_driver.n_node);
+		dev_prv = list_entry(drv->p->klist_devices.k_list.prev,
+				     struct device_private,
+				     knode_driver.n_node);
+		dev = dev_prv->device;
 		get_device(dev);
 		spin_unlock(&drv->p->klist_devices.k_lock);
 
diff --git a/drivers/base/driver.c b/drivers/base/driver.c
index 1e2bda780e48..b76cc69f1106 100644
--- a/drivers/base/driver.c
+++ b/drivers/base/driver.c
@@ -19,7 +19,14 @@
 static struct device *next_device(struct klist_iter *i)
 {
 	struct klist_node *n = klist_next(i);
-	return n ? container_of(n, struct device, knode_driver) : NULL;
+	struct device *dev = NULL;
+	struct device_private *dev_prv;
+
+	if (n) {
+		dev_prv = to_device_private_driver(n);
+		dev = dev_prv->device;
+	}
+	return dev;
 }
 
 /**
@@ -42,7 +49,7 @@ int driver_for_each_device(struct device_driver *drv, struct device *start,
 		return -EINVAL;
 
 	klist_iter_init_node(&drv->p->klist_devices, &i,
-			     start ? &start->knode_driver : NULL);
+			     start ? &start->p->knode_driver : NULL);
 	while ((dev = next_device(&i)) && !error)
 		error = fn(dev, data);
 	klist_iter_exit(&i);
@@ -76,7 +83,7 @@ struct device *driver_find_device(struct device_driver *drv,
 		return NULL;
 
 	klist_iter_init_node(&drv->p->klist_devices, &i,
-			     (start ? &start->knode_driver : NULL));
+			     (start ? &start->p->knode_driver : NULL));
 	while ((dev = next_device(&i)))
 		if (match(dev, data) && get_device(dev))
 			break;
diff --git a/include/linux/device.h b/include/linux/device.h
index 60423e687205..e3630222c3c1 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -366,7 +366,6 @@ struct device_dma_parameters {
 };
 
 struct device {
-	struct klist_node	knode_driver;
 	struct klist_node	knode_bus;
 	struct device		*parent;
 
-- 
cgit v1.2.3


From b9daa99ee533578e3f88231e7a16784dcb44ec42 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Tue, 16 Dec 2008 12:26:21 -0800
Subject: driver core: move knode_bus into private structure

Nothing outside of the driver core should ever touch knode_bus, so
move it out of the public eye.

Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/base.h    |  4 ++++
 drivers/base/bus.c     | 40 +++++++++++++++++++++++++++-------------
 include/linux/device.h |  1 -
 3 files changed, 31 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/base.h b/drivers/base/base.h
index 8af0bb2c0aa8..b676f8f801f8 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -69,6 +69,7 @@ struct class_private {
  * @klist_children - klist containing all children of this device
  * @knode_parent - node in sibling list
  * @knode_driver - node in driver list
+ * @knode_bus - node in bus list
  * @device - pointer back to the struct class that this structure is
  * associated with.
  *
@@ -78,12 +79,15 @@ struct device_private {
 	struct klist klist_children;
 	struct klist_node knode_parent;
 	struct klist_node knode_driver;
+	struct klist_node knode_bus;
 	struct device *device;
 };
 #define to_device_private_parent(obj)	\
 	container_of(obj, struct device_private, knode_parent)
 #define to_device_private_driver(obj)	\
 	container_of(obj, struct device_private, knode_driver)
+#define to_device_private_bus(obj)	\
+	container_of(obj, struct device_private, knode_bus)
 
 /* initialisation functions */
 extern int devices_init(void);
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 83f32b891fa9..0f0a50444672 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -253,7 +253,14 @@ static ssize_t store_drivers_probe(struct bus_type *bus,
 static struct device *next_device(struct klist_iter *i)
 {
 	struct klist_node *n = klist_next(i);
-	return n ? container_of(n, struct device, knode_bus) : NULL;
+	struct device *dev = NULL;
+	struct device_private *dev_prv;
+
+	if (n) {
+		dev_prv = to_device_private_bus(n);
+		dev = dev_prv->device;
+	}
+	return dev;
 }
 
 /**
@@ -286,7 +293,7 @@ int bus_for_each_dev(struct bus_type *bus, struct device *start,
 		return -EINVAL;
 
 	klist_iter_init_node(&bus->p->klist_devices, &i,
-			     (start ? &start->knode_bus : NULL));
+			     (start ? &start->p->knode_bus : NULL));
 	while ((dev = next_device(&i)) && !error)
 		error = fn(dev, data);
 	klist_iter_exit(&i);
@@ -320,7 +327,7 @@ struct device *bus_find_device(struct bus_type *bus,
 		return NULL;
 
 	klist_iter_init_node(&bus->p->klist_devices, &i,
-			     (start ? &start->knode_bus : NULL));
+			     (start ? &start->p->knode_bus : NULL));
 	while ((dev = next_device(&i)))
 		if (match(dev, data) && get_device(dev))
 			break;
@@ -507,7 +514,8 @@ void bus_attach_device(struct device *dev)
 			ret = device_attach(dev);
 		WARN_ON(ret < 0);
 		if (ret >= 0)
-			klist_add_tail(&dev->knode_bus, &bus->p->klist_devices);
+			klist_add_tail(&dev->p->knode_bus,
+				       &bus->p->klist_devices);
 	}
 }
 
@@ -528,8 +536,8 @@ void bus_remove_device(struct device *dev)
 		sysfs_remove_link(&dev->bus->p->devices_kset->kobj,
 				  dev_name(dev));
 		device_remove_attrs(dev->bus, dev);
-		if (klist_node_attached(&dev->knode_bus))
-			klist_del(&dev->knode_bus);
+		if (klist_node_attached(&dev->p->knode_bus))
+			klist_del(&dev->p->knode_bus);
 
 		pr_debug("bus: '%s': remove device %s\n",
 			 dev->bus->name, dev_name(dev));
@@ -831,14 +839,16 @@ static void bus_remove_attrs(struct bus_type *bus)
 
 static void klist_devices_get(struct klist_node *n)
 {
-	struct device *dev = container_of(n, struct device, knode_bus);
+	struct device_private *dev_prv = to_device_private_bus(n);
+	struct device *dev = dev_prv->device;
 
 	get_device(dev);
 }
 
 static void klist_devices_put(struct klist_node *n)
 {
-	struct device *dev = container_of(n, struct device, knode_bus);
+	struct device_private *dev_prv = to_device_private_bus(n);
+	struct device *dev = dev_prv->device;
 
 	put_device(dev);
 }
@@ -993,18 +1003,20 @@ static void device_insertion_sort_klist(struct device *a, struct list_head *list
 {
 	struct list_head *pos;
 	struct klist_node *n;
+	struct device_private *dev_prv;
 	struct device *b;
 
 	list_for_each(pos, list) {
 		n = container_of(pos, struct klist_node, n_node);
-		b = container_of(n, struct device, knode_bus);
+		dev_prv = to_device_private_bus(n);
+		b = dev_prv->device;
 		if (compare(a, b) <= 0) {
-			list_move_tail(&a->knode_bus.n_node,
-				       &b->knode_bus.n_node);
+			list_move_tail(&a->p->knode_bus.n_node,
+				       &b->p->knode_bus.n_node);
 			return;
 		}
 	}
-	list_move_tail(&a->knode_bus.n_node, list);
+	list_move_tail(&a->p->knode_bus.n_node, list);
 }
 
 void bus_sort_breadthfirst(struct bus_type *bus,
@@ -1014,6 +1026,7 @@ void bus_sort_breadthfirst(struct bus_type *bus,
 	LIST_HEAD(sorted_devices);
 	struct list_head *pos, *tmp;
 	struct klist_node *n;
+	struct device_private *dev_prv;
 	struct device *dev;
 	struct klist *device_klist;
 
@@ -1022,7 +1035,8 @@ void bus_sort_breadthfirst(struct bus_type *bus,
 	spin_lock(&device_klist->k_lock);
 	list_for_each_safe(pos, tmp, &device_klist->k_list) {
 		n = container_of(pos, struct klist_node, n_node);
-		dev = container_of(n, struct device, knode_bus);
+		dev_prv = to_device_private_bus(n);
+		dev = dev_prv->device;
 		device_insertion_sort_klist(dev, &sorted_devices, compare);
 	}
 	list_splice(&sorted_devices, &device_klist->k_list);
diff --git a/include/linux/device.h b/include/linux/device.h
index e3630222c3c1..e21b5d69d67c 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -366,7 +366,6 @@ struct device_dma_parameters {
 };
 
 struct device {
-	struct klist_node	knode_bus;
 	struct device		*parent;
 
 	struct device_private	*p;
-- 
cgit v1.2.3


From d0d85ff989222f08dd1fa66321fef5567bbc4a7b Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Thu, 4 Dec 2008 16:55:47 +0100
Subject: Make DEBUG take precedence over DYNAMIC_PRINTK_DEBUG

Statically defined DEBUG should take precedence over
dynamically enabled debugging; otherwise adding DEBUG
(like, for example, via CONFIG_DEBUG_KOBJECT) does not
have the expected result of printing pr_debug() and dev_dbg()
messages unconditionally.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 8 ++++----
 include/linux/kernel.h | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index e21b5d69d67c..b97a0cf1eb05 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -553,13 +553,13 @@ extern const char *dev_driver_string(const struct device *dev);
 #define dev_info(dev, format, arg...)		\
 	dev_printk(KERN_INFO , dev , format , ## arg)
 
-#if defined(CONFIG_DYNAMIC_PRINTK_DEBUG)
+#if defined(DEBUG)
+#define dev_dbg(dev, format, arg...)		\
+	dev_printk(KERN_DEBUG , dev , format , ## arg)
+#elif defined(CONFIG_DYNAMIC_PRINTK_DEBUG)
 #define dev_dbg(dev, format, ...) do { \
 	dynamic_dev_dbg(dev, format, ##__VA_ARGS__); \
 	} while (0)
-#elif defined(DEBUG)
-#define dev_dbg(dev, format, arg...)		\
-	dev_printk(KERN_DEBUG , dev , format , ## arg)
 #else
 #define dev_dbg(dev, format, arg...)		\
 	({ if (0) dev_printk(KERN_DEBUG, dev, format, ##arg); 0; })
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index ca9ff6411dfa..d242fe1381fd 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -349,13 +349,13 @@ static inline char *pack_hex_byte(char *buf, u8 byte)
         printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 
 /* If you are writing a driver, please use dev_dbg instead */
-#if defined(CONFIG_DYNAMIC_PRINTK_DEBUG)
+#if defined(DEBUG)
+#define pr_debug(fmt, ...) \
+	printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#elif defined(CONFIG_DYNAMIC_PRINTK_DEBUG)
 #define pr_debug(fmt, ...) do { \
 	dynamic_pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
 	} while (0)
-#elif defined(DEBUG)
-#define pr_debug(fmt, ...) \
-	printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
 #else
 #define pr_debug(fmt, ...) \
 	({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; })
-- 
cgit v1.2.3


From 0aa0dc41bfd993491c2344870eee7a3b218551fb Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Mon, 15 Dec 2008 12:58:26 +0000
Subject: driver core: add root_device_register()

Add support for allocating root device objects which group
device objects under /sys/devices directories.

Also add a sysfs 'module' symlink which points to the owner
of the root device object. This symlink will be used in virtio
to allow userspace to determine which virtio bus implementation
a given device is associated with.

[Includes suggestions from Cornelia Huck]

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/core.c    | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/device.h | 11 ++++++
 2 files changed, 102 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index ee555d7d5c3f..61df508fa62b 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1217,6 +1217,97 @@ EXPORT_SYMBOL_GPL(put_device);
 EXPORT_SYMBOL_GPL(device_create_file);
 EXPORT_SYMBOL_GPL(device_remove_file);
 
+struct root_device
+{
+	struct device dev;
+	struct module *owner;
+};
+
+#define to_root_device(dev) container_of(dev, struct root_device, dev)
+
+static void root_device_release(struct device *dev)
+{
+	kfree(to_root_device(dev));
+}
+
+/**
+ * __root_device_register - allocate and register a root device
+ * @name: root device name
+ * @owner: owner module of the root device, usually THIS_MODULE
+ *
+ * This function allocates a root device and registers it
+ * using device_register(). In order to free the returned
+ * device, use root_device_unregister().
+ *
+ * Root devices are dummy devices which allow other devices
+ * to be grouped under /sys/devices. Use this function to
+ * allocate a root device and then use it as the parent of
+ * any device which should appear under /sys/devices/{name}
+ *
+ * The /sys/devices/{name} directory will also contain a
+ * 'module' symlink which points to the @owner directory
+ * in sysfs.
+ *
+ * Note: You probably want to use root_device_register().
+ */
+struct device *__root_device_register(const char *name, struct module *owner)
+{
+	struct root_device *root;
+	int err = -ENOMEM;
+
+	root = kzalloc(sizeof(struct root_device), GFP_KERNEL);
+	if (!root)
+		return ERR_PTR(err);
+
+	err = dev_set_name(&root->dev, name);
+	if (err) {
+		kfree(root);
+		return ERR_PTR(err);
+	}
+
+	root->dev.release = root_device_release;
+
+	err = device_register(&root->dev);
+	if (err) {
+		put_device(&root->dev);
+		return ERR_PTR(err);
+	}
+
+#ifdef CONFIG_MODULE	/* gotta find a "cleaner" way to do this */
+	if (owner) {
+		struct module_kobject *mk = &owner->mkobj;
+
+		err = sysfs_create_link(&root->dev.kobj, &mk->kobj, "module");
+		if (err) {
+			device_unregister(&root->dev);
+			return ERR_PTR(err);
+		}
+		root->owner = owner;
+	}
+#endif
+
+	return &root->dev;
+}
+EXPORT_SYMBOL_GPL(__root_device_register);
+
+/**
+ * root_device_unregister - unregister and free a root device
+ * @root: device going away.
+ *
+ * This function unregisters and cleans up a device that was created by
+ * root_device_register().
+ */
+void root_device_unregister(struct device *dev)
+{
+	struct root_device *root = to_root_device(dev);
+
+	if (root->owner)
+		sysfs_remove_link(&root->dev.kobj, "module");
+
+	device_unregister(dev);
+}
+EXPORT_SYMBOL_GPL(root_device_unregister);
+
 
 static void device_create_release(struct device *dev)
 {
diff --git a/include/linux/device.h b/include/linux/device.h
index b97a0cf1eb05..7d9da4b4993f 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -482,6 +482,17 @@ extern struct device *device_find_child(struct device *dev, void *data,
 extern int device_rename(struct device *dev, char *new_name);
 extern int device_move(struct device *dev, struct device *new_parent);
 
+/*
+ * Root device objects for grouping under /sys/devices
+ */
+extern struct device *__root_device_register(const char *name,
+					     struct module *owner);
+static inline struct device *root_device_register(const char *name)
+{
+	return __root_device_register(name, THIS_MODULE);
+}
+extern void root_device_unregister(struct device *root);
+
 /*
  * Manual binding of a device to driver. See drivers/base/bus.c
  * for information on use.
-- 
cgit v1.2.3


From 475b44c19913b877537c8bc19799f75b0b142641 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Tue, 6 Jan 2009 10:44:38 -0800
Subject: mtd: struct device - replace bus_id with dev_name(), dev_set_name()

CC: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/mtd/devices/m25p80.c        | 16 ++++++++--------
 drivers/mtd/devices/mtd_dataflash.c | 30 +++++++++++++++---------------
 drivers/mtd/maps/integrator-flash.c |  2 +-
 drivers/mtd/maps/ixp2000.c          |  4 ++--
 drivers/mtd/maps/ixp4xx.c           |  2 +-
 drivers/mtd/maps/omap_nor.c         |  2 +-
 drivers/mtd/maps/physmap.c          |  6 +++---
 drivers/mtd/maps/physmap_of.c       |  4 ++--
 drivers/mtd/mtdconcat.c             |  2 +-
 drivers/mtd/nand/fsl_upm.c          |  2 +-
 drivers/mtd/nand/plat_nand.c        |  2 +-
 drivers/mtd/nand/tmio_nand.c        |  2 +-
 drivers/mtd/onenand/generic.c       |  2 +-
 drivers/mtd/onenand/omap2.c         |  2 +-
 drivers/mtd/ubi/build.c             |  2 +-
 drivers/mtd/ubi/vmt.c               |  4 ++--
 include/linux/mtd/concat.h          |  2 +-
 17 files changed, 43 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 6659b2275c0c..5733f0643843 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -170,7 +170,7 @@ static int wait_till_ready(struct m25p *flash)
 static int erase_chip(struct m25p *flash)
 {
 	DEBUG(MTD_DEBUG_LEVEL3, "%s: %s %dKiB\n",
-			flash->spi->dev.bus_id, __func__,
+			dev_name(&flash->spi->dev), __func__,
 			flash->mtd.size / 1024);
 
 	/* Wait until finished previous write command. */
@@ -197,7 +197,7 @@ static int erase_chip(struct m25p *flash)
 static int erase_sector(struct m25p *flash, u32 offset)
 {
 	DEBUG(MTD_DEBUG_LEVEL3, "%s: %s %dKiB at 0x%08x\n",
-			flash->spi->dev.bus_id, __func__,
+			dev_name(&flash->spi->dev), __func__,
 			flash->mtd.erasesize / 1024, offset);
 
 	/* Wait until finished previous write command. */
@@ -234,7 +234,7 @@ static int m25p80_erase(struct mtd_info *mtd, struct erase_info *instr)
 	u32 addr,len;
 
 	DEBUG(MTD_DEBUG_LEVEL2, "%s: %s %s 0x%08x, len %d\n",
-			flash->spi->dev.bus_id, __func__, "at",
+			dev_name(&flash->spi->dev), __func__, "at",
 			(u32)instr->addr, instr->len);
 
 	/* sanity checks */
@@ -295,7 +295,7 @@ static int m25p80_read(struct mtd_info *mtd, loff_t from, size_t len,
 	struct spi_message m;
 
 	DEBUG(MTD_DEBUG_LEVEL2, "%s: %s %s 0x%08x, len %zd\n",
-			flash->spi->dev.bus_id, __func__, "from",
+			dev_name(&flash->spi->dev), __func__, "from",
 			(u32)from, len);
 
 	/* sanity checks */
@@ -367,7 +367,7 @@ static int m25p80_write(struct mtd_info *mtd, loff_t to, size_t len,
 	struct spi_message m;
 
 	DEBUG(MTD_DEBUG_LEVEL2, "%s: %s %s 0x%08x, len %zd\n",
-			flash->spi->dev.bus_id, __func__, "to",
+			dev_name(&flash->spi->dev), __func__, "to",
 			(u32)to, len);
 
 	if (retlen)
@@ -563,7 +563,7 @@ static struct flash_info *__devinit jedec_probe(struct spi_device *spi)
 	tmp = spi_write_then_read(spi, &code, 1, id, 5);
 	if (tmp < 0) {
 		DEBUG(MTD_DEBUG_LEVEL0, "%s: error %d reading JEDEC ID\n",
-			spi->dev.bus_id, tmp);
+			dev_name(&spi->dev), tmp);
 		return NULL;
 	}
 	jedec = id[0];
@@ -617,7 +617,7 @@ static int __devinit m25p_probe(struct spi_device *spi)
 		/* unrecognized chip? */
 		if (i == ARRAY_SIZE(m25p_data)) {
 			DEBUG(MTD_DEBUG_LEVEL0, "%s: unrecognized id %s\n",
-					spi->dev.bus_id, data->type);
+					dev_name(&spi->dev), data->type);
 			info = NULL;
 
 		/* recognized; is that chip really what's there? */
@@ -658,7 +658,7 @@ static int __devinit m25p_probe(struct spi_device *spi)
 	if (data && data->name)
 		flash->mtd.name = data->name;
 	else
-		flash->mtd.name = spi->dev.bus_id;
+		flash->mtd.name = dev_name(&spi->dev);
 
 	flash->mtd.type = MTD_NORFLASH;
 	flash->mtd.writesize = 1;
diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 6dd9aff8bb2d..65126cd668ff 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -128,7 +128,7 @@ static int dataflash_waitready(struct spi_device *spi)
 		status = dataflash_status(spi);
 		if (status < 0) {
 			DEBUG(MTD_DEBUG_LEVEL1, "%s: status %d?\n",
-					spi->dev.bus_id, status);
+					dev_name(&spi->dev), status);
 			status = 0;
 		}
 
@@ -154,7 +154,7 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr)
 	uint8_t			*command;
 
 	DEBUG(MTD_DEBUG_LEVEL2, "%s: erase addr=0x%x len 0x%x\n",
-			spi->dev.bus_id,
+			dev_name(&spi->dev),
 			instr->addr, instr->len);
 
 	/* Sanity checks */
@@ -197,7 +197,7 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr)
 
 		if (status < 0) {
 			printk(KERN_ERR "%s: erase %x, err %d\n",
-				spi->dev.bus_id, pageaddr, status);
+				dev_name(&spi->dev), pageaddr, status);
 			/* REVISIT:  can retry instr->retries times; or
 			 * giveup and instr->fail_addr = instr->addr;
 			 */
@@ -239,7 +239,7 @@ static int dataflash_read(struct mtd_info *mtd, loff_t from, size_t len,
 	int			status;
 
 	DEBUG(MTD_DEBUG_LEVEL2, "%s: read 0x%x..0x%x\n",
-		priv->spi->dev.bus_id, (unsigned)from, (unsigned)(from + len));
+		dev_name(&priv->spi->dev), (unsigned)from, (unsigned)(from + len));
 
 	*retlen = 0;
 
@@ -288,7 +288,7 @@ static int dataflash_read(struct mtd_info *mtd, loff_t from, size_t len,
 		status = 0;
 	} else
 		DEBUG(MTD_DEBUG_LEVEL1, "%s: read %x..%x --> %d\n",
-			priv->spi->dev.bus_id,
+			dev_name(&priv->spi->dev),
 			(unsigned)from, (unsigned)(from + len),
 			status);
 	return status;
@@ -315,7 +315,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 	uint8_t			*command;
 
 	DEBUG(MTD_DEBUG_LEVEL2, "%s: write 0x%x..0x%x\n",
-		spi->dev.bus_id, (unsigned)to, (unsigned)(to + len));
+		dev_name(&spi->dev), (unsigned)to, (unsigned)(to + len));
 
 	*retlen = 0;
 
@@ -374,7 +374,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 			status = spi_sync(spi, &msg);
 			if (status < 0)
 				DEBUG(MTD_DEBUG_LEVEL1, "%s: xfer %u -> %d \n",
-					spi->dev.bus_id, addr, status);
+					dev_name(&spi->dev), addr, status);
 
 			(void) dataflash_waitready(priv->spi);
 		}
@@ -396,7 +396,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 		spi_transfer_del(x + 1);
 		if (status < 0)
 			DEBUG(MTD_DEBUG_LEVEL1, "%s: pgm %u/%u -> %d \n",
-				spi->dev.bus_id, addr, writelen, status);
+				dev_name(&spi->dev), addr, writelen, status);
 
 		(void) dataflash_waitready(priv->spi);
 
@@ -416,14 +416,14 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
 		status = spi_sync(spi, &msg);
 		if (status < 0)
 			DEBUG(MTD_DEBUG_LEVEL1, "%s: compare %u -> %d \n",
-				spi->dev.bus_id, addr, status);
+				dev_name(&spi->dev), addr, status);
 
 		status = dataflash_waitready(priv->spi);
 
 		/* Check result of the compare operation */
 		if (status & (1 << 6)) {
 			printk(KERN_ERR "%s: compare page %u, err %d\n",
-				spi->dev.bus_id, pageaddr, status);
+				dev_name(&spi->dev), pageaddr, status);
 			remaining = 0;
 			status = -EIO;
 			break;
@@ -779,7 +779,7 @@ static struct flash_info *__devinit jedec_probe(struct spi_device *spi)
 	tmp = spi_write_then_read(spi, &code, 1, id, 3);
 	if (tmp < 0) {
 		DEBUG(MTD_DEBUG_LEVEL0, "%s: error %d reading JEDEC ID\n",
-			spi->dev.bus_id, tmp);
+			dev_name(&spi->dev), tmp);
 		return ERR_PTR(tmp);
 	}
 	if (id[0] != 0x1f)
@@ -869,7 +869,7 @@ static int __devinit dataflash_probe(struct spi_device *spi)
 	status = dataflash_status(spi);
 	if (status <= 0 || status == 0xff) {
 		DEBUG(MTD_DEBUG_LEVEL1, "%s: status error %d\n",
-				spi->dev.bus_id, status);
+				dev_name(&spi->dev), status);
 		if (status == 0 || status == 0xff)
 			status = -ENODEV;
 		return status;
@@ -905,13 +905,13 @@ static int __devinit dataflash_probe(struct spi_device *spi)
 	/* obsolete AT45DB1282 not (yet?) supported */
 	default:
 		DEBUG(MTD_DEBUG_LEVEL1, "%s: unsupported device (%x)\n",
-				spi->dev.bus_id, status & 0x3c);
+				dev_name(&spi->dev), status & 0x3c);
 		status = -ENODEV;
 	}
 
 	if (status < 0)
 		DEBUG(MTD_DEBUG_LEVEL1, "%s: add_dataflash --> %d\n",
-				spi->dev.bus_id, status);
+				dev_name(&spi->dev), status);
 
 	return status;
 }
@@ -921,7 +921,7 @@ static int __devexit dataflash_remove(struct spi_device *spi)
 	struct dataflash	*flash = dev_get_drvdata(&spi->dev);
 	int			status;
 
-	DEBUG(MTD_DEBUG_LEVEL1, "%s: remove\n", spi->dev.bus_id);
+	DEBUG(MTD_DEBUG_LEVEL1, "%s: remove\n", dev_name(&spi->dev));
 
 	if (mtd_has_partitions() && flash->partitioned)
 		status = del_mtd_partitions(&flash->mtd);
diff --git a/drivers/mtd/maps/integrator-flash.c b/drivers/mtd/maps/integrator-flash.c
index 7100ee3c7b01..d2ec262666c7 100644
--- a/drivers/mtd/maps/integrator-flash.c
+++ b/drivers/mtd/maps/integrator-flash.c
@@ -105,7 +105,7 @@ static int armflash_probe(struct platform_device *dev)
 	info->map.bankwidth	= plat->width;
 	info->map.phys		= res->start;
 	info->map.virt		= base;
-	info->map.name		= dev->dev.bus_id;
+	info->map.name		= dev_name(&dev->dev);
 	info->map.set_vpp	= armflash_set_vpp;
 
 	simple_map_init(&info->map);
diff --git a/drivers/mtd/maps/ixp2000.c b/drivers/mtd/maps/ixp2000.c
index 3ea1de9be720..d4fb9a3ab4df 100644
--- a/drivers/mtd/maps/ixp2000.c
+++ b/drivers/mtd/maps/ixp2000.c
@@ -188,7 +188,7 @@ static int ixp2000_flash_probe(struct platform_device *dev)
  	 */
 	info->map.map_priv_2 = (unsigned long) ixp_data->bank_setup;
 
-	info->map.name = dev->dev.bus_id;
+	info->map.name = dev_name(&dev->dev);
 	info->map.read = ixp2000_flash_read8;
 	info->map.write = ixp2000_flash_write8;
 	info->map.copy_from = ixp2000_flash_copy_from;
@@ -196,7 +196,7 @@ static int ixp2000_flash_probe(struct platform_device *dev)
 
 	info->res = request_mem_region(dev->resource->start,
 			dev->resource->end - dev->resource->start + 1,
-			dev->dev.bus_id);
+			dev_name(&dev->dev));
 	if (!info->res) {
 		dev_err(&dev->dev, "Could not reserve memory region\n");
 		err = -ENOMEM;
diff --git a/drivers/mtd/maps/ixp4xx.c b/drivers/mtd/maps/ixp4xx.c
index 16555cbeaea4..7214b876feba 100644
--- a/drivers/mtd/maps/ixp4xx.c
+++ b/drivers/mtd/maps/ixp4xx.c
@@ -218,7 +218,7 @@ static int ixp4xx_flash_probe(struct platform_device *dev)
 	 * handle that.
 	 */
 	info->map.bankwidth = 2;
-	info->map.name = dev->dev.bus_id;
+	info->map.name = dev_name(&dev->dev);
 	info->map.read = ixp4xx_read16,
 	info->map.write = ixp4xx_probe_write16,
 	info->map.copy_from = ixp4xx_copy_from,
diff --git a/drivers/mtd/maps/omap_nor.c b/drivers/mtd/maps/omap_nor.c
index 05f276af15da..7e50e9b1b781 100644
--- a/drivers/mtd/maps/omap_nor.c
+++ b/drivers/mtd/maps/omap_nor.c
@@ -101,7 +101,7 @@ static int __init omapflash_probe(struct platform_device *pdev)
 		err = -ENOMEM;
 		goto out_release_mem_region;
 	}
-	info->map.name		= pdev->dev.bus_id;
+	info->map.name		= dev_name(&pdev->dev);
 	info->map.phys		= res->start;
 	info->map.size		= size;
 	info->map.bankwidth	= pdata->width;
diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index dfbf3f270cea..1db16e549e38 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -108,13 +108,13 @@ static int physmap_flash_probe(struct platform_device *dev)
 		if (!devm_request_mem_region(&dev->dev,
 			dev->resource[i].start,
 			dev->resource[i].end - dev->resource[i].start + 1,
-			dev->dev.bus_id)) {
+			dev_name(&dev->dev))) {
 			dev_err(&dev->dev, "Could not reserve memory region\n");
 			err = -ENOMEM;
 			goto err_out;
 		}
 
-		info->map[i].name = dev->dev.bus_id;
+		info->map[i].name = dev_name(&dev->dev);
 		info->map[i].phys = dev->resource[i].start;
 		info->map[i].size = dev->resource[i].end - dev->resource[i].start + 1;
 		info->map[i].bankwidth = physmap_data->width;
@@ -150,7 +150,7 @@ static int physmap_flash_probe(struct platform_device *dev)
 		 * We detected multiple devices. Concatenate them together.
 		 */
 #ifdef CONFIG_MTD_CONCAT
-		info->cmtd = mtd_concat_create(info->mtd, devices_found, dev->dev.bus_id);
+		info->cmtd = mtd_concat_create(info->mtd, devices_found, dev_name(&dev->dev));
 		if (info->cmtd == NULL)
 			err = -ENXIO;
 #else
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index 5fcfec034a94..fbf0ca939d72 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -183,7 +183,7 @@ static int __devinit of_flash_probe(struct of_device *dev,
 
 	err = -EBUSY;
 	info->res = request_mem_region(res.start, res.end - res.start + 1,
-				       dev->dev.bus_id);
+				       dev_name(&dev->dev));
 	if (!info->res)
 		goto err_out;
 
@@ -194,7 +194,7 @@ static int __devinit of_flash_probe(struct of_device *dev,
 		goto err_out;
 	}
 
-	info->map.name = dev->dev.bus_id;
+	info->map.name = dev_name(&dev->dev);
 	info->map.phys = res.start;
 	info->map.size = res.end - res.start + 1;
 	info->map.bankwidth = *width;
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 789842d0e6f2..1a05cf37851e 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -691,7 +691,7 @@ static int concat_block_markbad(struct mtd_info *mtd, loff_t ofs)
  */
 struct mtd_info *mtd_concat_create(struct mtd_info *subdev[],	/* subdevices to concatenate */
 				   int num_devs,	/* number of subdevices      */
-				   char *name)
+				   const char *name)
 {				/* name for the new device   */
 	int i;
 	size_t size;
diff --git a/drivers/mtd/nand/fsl_upm.c b/drivers/mtd/nand/fsl_upm.c
index a83192f80eba..7815a404a632 100644
--- a/drivers/mtd/nand/fsl_upm.c
+++ b/drivers/mtd/nand/fsl_upm.c
@@ -222,7 +222,7 @@ static int __devinit fun_probe(struct of_device *ofdev,
 
 	fun->rnb_gpio = of_get_gpio(ofdev->node, 0);
 	if (fun->rnb_gpio >= 0) {
-		ret = gpio_request(fun->rnb_gpio, ofdev->dev.bus_id);
+		ret = gpio_request(fun->rnb_gpio, dev_name(&ofdev->dev));
 		if (ret) {
 			dev_err(&ofdev->dev, "can't request RNB gpio\n");
 			goto err2;
diff --git a/drivers/mtd/nand/plat_nand.c b/drivers/mtd/nand/plat_nand.c
index f674c5427b17..75f9f4874ecf 100644
--- a/drivers/mtd/nand/plat_nand.c
+++ b/drivers/mtd/nand/plat_nand.c
@@ -54,7 +54,7 @@ static int __init plat_nand_probe(struct platform_device *pdev)
 	data->chip.priv = &data;
 	data->mtd.priv = &data->chip;
 	data->mtd.owner = THIS_MODULE;
-	data->mtd.name = pdev->dev.bus_id;
+	data->mtd.name = dev_name(&pdev->dev);
 
 	data->chip.IO_ADDR_R = data->io_base;
 	data->chip.IO_ADDR_W = data->io_base;
diff --git a/drivers/mtd/nand/tmio_nand.c b/drivers/mtd/nand/tmio_nand.c
index edb1e322113d..daa6a4c3b8ce 100644
--- a/drivers/mtd/nand/tmio_nand.c
+++ b/drivers/mtd/nand/tmio_nand.c
@@ -433,7 +433,7 @@ static int tmio_probe(struct platform_device *dev)
 	nand_chip->chip_delay = 15;
 
 	retval = request_irq(irq, &tmio_irq,
-				IRQF_DISABLED, dev->dev.bus_id, tmio);
+				IRQF_DISABLED, dev_name(&dev->dev), tmio);
 	if (retval) {
 		dev_err(&dev->dev, "request_irq error %d\n", retval);
 		goto err_irq;
diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c
index ad81ab8e95e2..5b69e7773c6c 100644
--- a/drivers/mtd/onenand/generic.c
+++ b/drivers/mtd/onenand/generic.c
@@ -63,7 +63,7 @@ static int __devinit generic_onenand_probe(struct device *dev)
 	info->onenand.mmcontrol = pdata->mmcontrol;
 	info->onenand.irq = platform_get_irq(pdev, 0);
 
-	info->mtd.name = pdev->dev.bus_id;
+	info->mtd.name = dev_name(&pdev->dev);
 	info->mtd.priv = &info->onenand;
 	info->mtd.owner = THIS_MODULE;
 
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index d1e0b8e7224b..96ecc1766fa8 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -668,7 +668,7 @@ static int __devinit omap2_onenand_probe(struct platform_device *pdev)
 		 c->onenand.base);
 
 	c->pdev = pdev;
-	c->mtd.name = pdev->dev.bus_id;
+	c->mtd.name = dev_name(&pdev->dev);
 	c->mtd.priv = &c->onenand;
 	c->mtd.owner = THIS_MODULE;
 
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index ba0bd3d5775b..7caf22cd5ad0 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -280,7 +280,7 @@ static int ubi_sysfs_init(struct ubi_device *ubi)
 	ubi->dev.release = dev_release;
 	ubi->dev.devt = ubi->cdev.dev;
 	ubi->dev.class = ubi_class;
-	sprintf(&ubi->dev.bus_id[0], UBI_NAME_STR"%d", ubi->ubi_num);
+	dev_set_name(&ubi->dev, UBI_NAME_STR"%d", ubi->ubi_num);
 	err = device_register(&ubi->dev);
 	if (err)
 		return err;
diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
index 3531ca9a1e24..22e1d7398fce 100644
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@@ -329,7 +329,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
 	vol->dev.devt = dev;
 	vol->dev.class = ubi_class;
 
-	sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id);
+	dev_set_name(&vol->dev, "%s_%d", ubi->ubi_name, vol->vol_id);
 	err = device_register(&vol->dev);
 	if (err) {
 		ubi_err("cannot register device");
@@ -678,7 +678,7 @@ int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol)
 	vol->dev.parent = &ubi->dev;
 	vol->dev.devt = dev;
 	vol->dev.class = ubi_class;
-	sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id);
+	dev_set_name(&vol->dev, "%s_%d", ubi->ubi_name, vol->vol_id);
 	err = device_register(&vol->dev);
 	if (err)
 		goto out_gluebi;
diff --git a/include/linux/mtd/concat.h b/include/linux/mtd/concat.h
index c02f3d264ecf..e80c674daeb3 100644
--- a/include/linux/mtd/concat.h
+++ b/include/linux/mtd/concat.h
@@ -13,7 +13,7 @@
 struct mtd_info *mtd_concat_create(
     struct mtd_info *subdev[],  /* subdevices to concatenate */
     int num_devs,               /* number of subdevices      */
-    char *name);                /* name for the new device   */
+    const char *name);          /* name for the new device   */
 
 void mtd_concat_destroy(struct mtd_info *mtd);
 
-- 
cgit v1.2.3


From e70c412ee45332db2636a8f5a35a0685efb0e4aa Mon Sep 17 00:00:00 2001
From: "Hans J. Koch" <hjk@linutronix.de>
Date: Sat, 6 Dec 2008 02:23:13 +0100
Subject: UIO: Pass information about ioports to userspace (V2)

Devices sometimes have memory where all or parts of it can not be mapped to
userspace. But it might still be possible to access this memory from
userspace by other means. An example are PCI cards that advertise not only
mappable memory but also ioport ranges. On x86 architectures, these can be
accessed with ioperm, iopl, inb, outb, and friends. Mike Frysinger (CCed)
reported a similar problem on Blackfin arch where it doesn't seem to be easy
to mmap non-cached memory but it can still be accessed from userspace.

This patch allows kernel drivers to pass information about such ports to
userspace. Similar to the existing mem[] array, it adds a port[] array to
struct uio_info. Each port range is described by start, size, and porttype.

If a driver fills in at least one such port range, the UIO core will simply
pass this information to userspace by creating a new directory "portio"
underneath /sys/class/uio/uioN/. Similar to the "mem" directory, it will
contain a subdirectory (portX) for each port range given.

Note that UIO simply passes this information to userspace, it performs no
action whatsoever with this data. It's userspace's responsibility to obtain
access to these ports and to solve arch dependent issues. The "porttype"
attribute tells userspace what kind of port it is dealing with.

This mechanism could also be used to give userspace information about GPIOs
related to a device. You frequently find such hardware in embedded devices,
so I added a UIO_PORT_GPIO definition. I'm not really sure if this is a good
idea since there are other solutions to this problem, but it won't hurt much
anyway.

Signed-off-by: Hans J. Koch <hjk@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/uio/uio.c          | 159 ++++++++++++++++++++++++++++++++++++++++-----
 include/linux/uio_driver.h |  26 ++++++++
 2 files changed, 168 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 2d2440cd57a9..4ca85a113aa2 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -35,6 +35,7 @@ struct uio_device {
 	int			vma_count;
 	struct uio_info		*info;
 	struct kobject		*map_dir;
+	struct kobject		*portio_dir;
 };
 
 static int uio_major;
@@ -75,17 +76,17 @@ static ssize_t map_offset_show(struct uio_mem *mem, char *buf)
 	return sprintf(buf, "0x%lx\n", mem->addr & ~PAGE_MASK);
 }
 
-struct uio_sysfs_entry {
+struct map_sysfs_entry {
 	struct attribute attr;
 	ssize_t (*show)(struct uio_mem *, char *);
 	ssize_t (*store)(struct uio_mem *, const char *, size_t);
 };
 
-static struct uio_sysfs_entry addr_attribute =
+static struct map_sysfs_entry addr_attribute =
 	__ATTR(addr, S_IRUGO, map_addr_show, NULL);
-static struct uio_sysfs_entry size_attribute =
+static struct map_sysfs_entry size_attribute =
 	__ATTR(size, S_IRUGO, map_size_show, NULL);
-static struct uio_sysfs_entry offset_attribute =
+static struct map_sysfs_entry offset_attribute =
 	__ATTR(offset, S_IRUGO, map_offset_show, NULL);
 
 static struct attribute *attrs[] = {
@@ -106,9 +107,9 @@ static ssize_t map_type_show(struct kobject *kobj, struct attribute *attr,
 {
 	struct uio_map *map = to_map(kobj);
 	struct uio_mem *mem = map->mem;
-	struct uio_sysfs_entry *entry;
+	struct map_sysfs_entry *entry;
 
-	entry = container_of(attr, struct uio_sysfs_entry, attr);
+	entry = container_of(attr, struct map_sysfs_entry, attr);
 
 	if (!entry->show)
 		return -EIO;
@@ -116,16 +117,93 @@ static ssize_t map_type_show(struct kobject *kobj, struct attribute *attr,
 	return entry->show(mem, buf);
 }
 
-static struct sysfs_ops uio_sysfs_ops = {
+static struct sysfs_ops map_sysfs_ops = {
 	.show = map_type_show,
 };
 
 static struct kobj_type map_attr_type = {
 	.release	= map_release,
-	.sysfs_ops	= &uio_sysfs_ops,
+	.sysfs_ops	= &map_sysfs_ops,
 	.default_attrs	= attrs,
 };
 
+struct uio_portio {
+	struct kobject kobj;
+	struct uio_port *port;
+};
+#define to_portio(portio) container_of(portio, struct uio_portio, kobj)
+
+static ssize_t portio_start_show(struct uio_port *port, char *buf)
+{
+	return sprintf(buf, "0x%lx\n", port->start);
+}
+
+static ssize_t portio_size_show(struct uio_port *port, char *buf)
+{
+	return sprintf(buf, "0x%lx\n", port->size);
+}
+
+static ssize_t portio_porttype_show(struct uio_port *port, char *buf)
+{
+	const char *porttypes[] = {"none", "x86", "gpio", "other"};
+
+	if ((port->porttype < 0) || (port->porttype > UIO_PORT_OTHER))
+		return -EINVAL;
+
+	return sprintf(buf, "port_%s\n", porttypes[port->porttype]);
+}
+
+struct portio_sysfs_entry {
+	struct attribute attr;
+	ssize_t (*show)(struct uio_port *, char *);
+	ssize_t (*store)(struct uio_port *, const char *, size_t);
+};
+
+static struct portio_sysfs_entry portio_start_attribute =
+	__ATTR(start, S_IRUGO, portio_start_show, NULL);
+static struct portio_sysfs_entry portio_size_attribute =
+	__ATTR(size, S_IRUGO, portio_size_show, NULL);
+static struct portio_sysfs_entry portio_porttype_attribute =
+	__ATTR(porttype, S_IRUGO, portio_porttype_show, NULL);
+
+static struct attribute *portio_attrs[] = {
+	&portio_start_attribute.attr,
+	&portio_size_attribute.attr,
+	&portio_porttype_attribute.attr,
+	NULL,
+};
+
+static void portio_release(struct kobject *kobj)
+{
+	struct uio_portio *portio = to_portio(kobj);
+	kfree(portio);
+}
+
+static ssize_t portio_type_show(struct kobject *kobj, struct attribute *attr,
+			     char *buf)
+{
+	struct uio_portio *portio = to_portio(kobj);
+	struct uio_port *port = portio->port;
+	struct portio_sysfs_entry *entry;
+
+	entry = container_of(attr, struct portio_sysfs_entry, attr);
+
+	if (!entry->show)
+		return -EIO;
+
+	return entry->show(port, buf);
+}
+
+static struct sysfs_ops portio_sysfs_ops = {
+	.show = portio_type_show,
+};
+
+static struct kobj_type portio_attr_type = {
+	.release	= portio_release,
+	.sysfs_ops	= &portio_sysfs_ops,
+	.default_attrs	= portio_attrs,
+};
+
 static ssize_t show_name(struct device *dev,
 			 struct device_attribute *attr, char *buf)
 {
@@ -177,10 +255,13 @@ static struct attribute_group uio_attr_grp = {
 static int uio_dev_add_attributes(struct uio_device *idev)
 {
 	int ret;
-	int mi;
+	int mi, pi;
 	int map_found = 0;
+	int portio_found = 0;
 	struct uio_mem *mem;
 	struct uio_map *map;
+	struct uio_port *port;
+	struct uio_portio *portio;
 
 	ret = sysfs_create_group(&idev->dev->kobj, &uio_attr_grp);
 	if (ret)
@@ -195,25 +276,58 @@ static int uio_dev_add_attributes(struct uio_device *idev)
 			idev->map_dir = kobject_create_and_add("maps",
 							&idev->dev->kobj);
 			if (!idev->map_dir)
-				goto err;
+				goto err_map;
 		}
 		map = kzalloc(sizeof(*map), GFP_KERNEL);
 		if (!map)
-			goto err;
+			goto err_map;
 		kobject_init(&map->kobj, &map_attr_type);
 		map->mem = mem;
 		mem->map = map;
 		ret = kobject_add(&map->kobj, idev->map_dir, "map%d", mi);
 		if (ret)
-			goto err;
+			goto err_map;
 		ret = kobject_uevent(&map->kobj, KOBJ_ADD);
 		if (ret)
-			goto err;
+			goto err_map;
+	}
+
+	for (pi = 0; pi < MAX_UIO_PORT_REGIONS; pi++) {
+		port = &idev->info->port[pi];
+		if (port->size == 0)
+			break;
+		if (!portio_found) {
+			portio_found = 1;
+			idev->portio_dir = kobject_create_and_add("portio",
+							&idev->dev->kobj);
+			if (!idev->portio_dir)
+				goto err_portio;
+		}
+		portio = kzalloc(sizeof(*portio), GFP_KERNEL);
+		if (!portio)
+			goto err_portio;
+		kobject_init(&portio->kobj, &portio_attr_type);
+		portio->port = port;
+		port->portio = portio;
+		ret = kobject_add(&portio->kobj, idev->portio_dir,
+							"port%d", pi);
+		if (ret)
+			goto err_portio;
+		ret = kobject_uevent(&portio->kobj, KOBJ_ADD);
+		if (ret)
+			goto err_portio;
 	}
 
 	return 0;
 
-err:
+err_portio:
+	for (pi--; pi >= 0; pi--) {
+		port = &idev->info->port[pi];
+		portio = port->portio;
+		kobject_put(&portio->kobj);
+	}
+	kobject_put(idev->portio_dir);
+err_map:
 	for (mi--; mi>=0; mi--) {
 		mem = &idev->info->mem[mi];
 		map = mem->map;
@@ -228,15 +342,26 @@ err_group:
 
 static void uio_dev_del_attributes(struct uio_device *idev)
 {
-	int mi;
+	int i;
 	struct uio_mem *mem;
-	for (mi = 0; mi < MAX_UIO_MAPS; mi++) {
-		mem = &idev->info->mem[mi];
+	struct uio_port *port;
+
+	for (i = 0; i < MAX_UIO_MAPS; i++) {
+		mem = &idev->info->mem[i];
 		if (mem->size == 0)
 			break;
 		kobject_put(&mem->map->kobj);
 	}
 	kobject_put(idev->map_dir);
+
+	for (i = 0; i < MAX_UIO_PORT_REGIONS; i++) {
+		port = &idev->info->port[i];
+		if (port->size == 0)
+			break;
+		kobject_put(&port->portio->kobj);
+	}
+	kobject_put(idev->portio_dir);
+
 	sysfs_remove_group(&idev->dev->kobj, &uio_attr_grp);
 }
 
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index cdf338d94b7f..20be327bfbb4 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -38,6 +38,24 @@ struct uio_mem {
 
 #define MAX_UIO_MAPS	5
 
+struct uio_portio;
+
+/**
+ * struct uio_port - description of a UIO port region
+ * @start:		start of port region
+ * @size:		size of port region
+ * @porttype:		type of port (see UIO_PORT_* below)
+ * @portio:		for use by the UIO core only.
+ */
+struct uio_port {
+	unsigned long		start;
+	unsigned long		size;
+	int			porttype;
+	struct uio_portio	*portio;
+};
+
+#define MAX_UIO_PORT_REGIONS	5
+
 struct uio_device;
 
 /**
@@ -46,6 +64,7 @@ struct uio_device;
  * @name:		device name
  * @version:		device driver version
  * @mem:		list of mappable memory regions, size==0 for end of list
+ * @port:		list of port regions, size==0 for end of list
  * @irq:		interrupt number or UIO_IRQ_CUSTOM
  * @irq_flags:		flags for request_irq()
  * @priv:		optional private data
@@ -60,6 +79,7 @@ struct uio_info {
 	char			*name;
 	char			*version;
 	struct uio_mem		mem[MAX_UIO_MAPS];
+	struct uio_port		port[MAX_UIO_PORT_REGIONS];
 	long			irq;
 	unsigned long		irq_flags;
 	void			*priv;
@@ -92,4 +112,10 @@ extern void uio_event_notify(struct uio_info *info);
 #define UIO_MEM_LOGICAL	2
 #define UIO_MEM_VIRTUAL 3
 
+/* defines for uio_port->porttype */
+#define UIO_PORT_NONE	0
+#define UIO_PORT_X86	1
+#define UIO_PORT_GPIO	2
+#define UIO_PORT_OTHER	3
+
 #endif /* _LINUX_UIO_DRIVER_H_ */
-- 
cgit v1.2.3


From b8ac9fc0e8cda9f9776019c5b0464b0c6d2d4c90 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 12 Dec 2008 11:44:21 +0100
Subject: uio: make uio_info's name and version const

These are only ever assigned constant strings and never modified.

This was noticed because Wolfram Sang needed to cast the result of
of_get_property() in order to assign it to the name field of a struct
uio_info.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Hans J. Koch <hjk@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/DocBook/uio-howto.tmpl | 4 ++--
 include/linux/uio_driver.h           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl
index 6116b93608df..b787e4721c90 100644
--- a/Documentation/DocBook/uio-howto.tmpl
+++ b/Documentation/DocBook/uio-howto.tmpl
@@ -393,12 +393,12 @@ offset = N * getpagesize();
 
 <itemizedlist>
 <listitem><para>
-<varname>char *name</varname>: Required. The name of your driver as
+<varname>const char *name</varname>: Required. The name of your driver as
 it will appear in sysfs. I recommend using the name of your module for this.
 </para></listitem>
 
 <listitem><para>
-<varname>char *version</varname>: Required. This string appears in
+<varname>const char *version</varname>: Required. This string appears in
 <filename>/sys/class/uio/uioX/version</filename>.
 </para></listitem>
 
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 20be327bfbb4..a0bb6bd2e5c1 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -76,8 +76,8 @@ struct uio_device;
  */
 struct uio_info {
 	struct uio_device	*uio_dev;
-	char			*name;
-	char			*version;
+	const char		*name;
+	const char		*version;
 	struct uio_mem		mem[MAX_UIO_MAPS];
 	struct uio_port		port[MAX_UIO_PORT_REGIONS];
 	long			irq;
-- 
cgit v1.2.3


From 29881c4502ba05f46bc12ae8053d4e08d7e2615c Mon Sep 17 00:00:00 2001
From: James Morris <jmorris@namei.org>
Date: Wed, 7 Jan 2009 09:21:54 +1100
Subject: Revert "CRED: Fix regression in cap_capable() as shown up by
 sys_faccessat() [ver #2]"

This reverts commit 14eaddc967b16017d4a1a24d2be6c28ecbe06ed8.

David has a better version to come.
---
 include/linux/capability.h | 17 ++--------------
 include/linux/security.h   | 49 +++++++++-------------------------------------
 kernel/capability.c        |  2 +-
 security/capability.c      |  1 -
 security/commoncap.c       | 42 ++++++++++++---------------------------
 security/root_plug.c       |  1 -
 security/security.c        | 25 ++++-------------------
 security/selinux/hooks.c   | 26 ++++++------------------
 security/smack/smack_lsm.c |  1 -
 9 files changed, 35 insertions(+), 129 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 5b8a13214451..e22f48c2a46f 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -529,21 +529,8 @@ extern const kernel_cap_t __cap_init_eff_set;
  *
  * Note that this does not set PF_SUPERPRIV on the task.
  */
-#define has_capability(t, cap) (security_task_capable((t), (cap)) == 0)
-
-/**
- * has_capability_noaudit - Determine if a task has a superior capability available (unaudited)
- * @t: The task in question
- * @cap: The capability to be tested for
- *
- * Return true if the specified task has the given superior capability
- * currently in effect, false if not, but don't write an audit message for the
- * check.
- *
- * Note that this does not set PF_SUPERPRIV on the task.
- */
-#define has_capability_noaudit(t, cap) \
-	(security_task_capable_noaudit((t), (cap)) == 0)
+#define has_capability(t, cap) (security_capable((t), (cap)) == 0)
+#define has_capability_noaudit(t, cap) (security_capable_noaudit((t), (cap)) == 0)
 
 extern int capable(int cap);
 
diff --git a/include/linux/security.h b/include/linux/security.h
index 76989b8bc34f..3416cb85e77b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -48,9 +48,7 @@ struct audit_krule;
  * These functions are in security/capability.c and are used
  * as the default capabilities functions
  */
-extern int cap_capable(int cap, int audit);
-extern int cap_task_capable(struct task_struct *tsk, const struct cred *cred,
-			    int cap, int audit);
+extern int cap_capable(struct task_struct *tsk, int cap, int audit);
 extern int cap_settime(struct timespec *ts, struct timezone *tz);
 extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode);
 extern int cap_ptrace_traceme(struct task_struct *parent);
@@ -1197,18 +1195,9 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@permitted contains the permitted capability set.
  *	Return 0 and update @new if permission is granted.
  * @capable:
- *	Check whether the current process has the @cap capability in its
- *      subjective/effective credentials.
- *	@cap contains the capability <include/linux/capability.h>.
- *	@audit: Whether to write an audit message or not
- *	Return 0 if the capability is granted for @tsk.
- * @task_capable:
- *	Check whether the @tsk process has the @cap capability in its
- *      objective/real credentials.
+ *	Check whether the @tsk process has the @cap capability.
  *	@tsk contains the task_struct for the process.
- *	@cred contains the credentials to use.
  *	@cap contains the capability <include/linux/capability.h>.
- *	@audit: Whether to write an audit message or not
  *	Return 0 if the capability is granted for @tsk.
  * @acct:
  *	Check permission before enabling or disabling process accounting.  If
@@ -1301,9 +1290,7 @@ struct security_operations {
 		       const kernel_cap_t *effective,
 		       const kernel_cap_t *inheritable,
 		       const kernel_cap_t *permitted);
-	int (*capable) (int cap, int audit);
-	int (*task_capable) (struct task_struct *tsk, const struct cred *cred,
-			     int cap, int audit);
+	int (*capable) (struct task_struct *tsk, int cap, int audit);
 	int (*acct) (struct file *file);
 	int (*sysctl) (struct ctl_table *table, int op);
 	int (*quotactl) (int cmds, int type, int id, struct super_block *sb);
@@ -1569,9 +1556,8 @@ int security_capset(struct cred *new, const struct cred *old,
 		    const kernel_cap_t *effective,
 		    const kernel_cap_t *inheritable,
 		    const kernel_cap_t *permitted);
-int security_capable(int cap);
-int security_task_capable(struct task_struct *tsk, int cap);
-int security_task_capable_noaudit(struct task_struct *tsk, int cap);
+int security_capable(struct task_struct *tsk, int cap);
+int security_capable_noaudit(struct task_struct *tsk, int cap);
 int security_acct(struct file *file);
 int security_sysctl(struct ctl_table *table, int op);
 int security_quotactl(int cmds, int type, int id, struct super_block *sb);
@@ -1768,31 +1754,14 @@ static inline int security_capset(struct cred *new,
 	return cap_capset(new, old, effective, inheritable, permitted);
 }
 
-static inline int security_capable(int cap)
+static inline int security_capable(struct task_struct *tsk, int cap)
 {
-	return cap_capable(cap, SECURITY_CAP_AUDIT);
+	return cap_capable(tsk, cap, SECURITY_CAP_AUDIT);
 }
 
-static inline int security_task_capable(struct task_struct *tsk, int cap)
+static inline int security_capable_noaudit(struct task_struct *tsk, int cap)
 {
-	int ret;
-
-	rcu_read_lock();
-	ret = cap_task_capable(tsk, __task_cred(tsk), cap, SECURITY_CAP_AUDIT);
-	rcu_read_unlock();
-	return ret;
-}
-
-static inline
-int security_task_capable_noaudit(struct task_struct *tsk, int cap)
-{
-	int ret;
-
-	rcu_read_lock();
-	ret = cap_task_capable(tsk, __task_cred(tsk), cap,
-			       SECURITY_CAP_NOAUDIT);
-	rcu_read_unlock();
-	return ret;
+	return cap_capable(tsk, cap, SECURITY_CAP_NOAUDIT);
 }
 
 static inline int security_acct(struct file *file)
diff --git a/kernel/capability.c b/kernel/capability.c
index df62f53f84ac..36b4b4daebec 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -308,7 +308,7 @@ int capable(int cap)
 		BUG();
 	}
 
-	if (security_capable(cap) == 0) {
+	if (has_capability(current, cap)) {
 		current->flags |= PF_SUPERPRIV;
 		return 1;
 	}
diff --git a/security/capability.c b/security/capability.c
index fd1493da4f8d..2dce66fcb992 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -826,7 +826,6 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, capset);
 	set_to_cap_if_null(ops, acct);
 	set_to_cap_if_null(ops, capable);
-	set_to_cap_if_null(ops, task_capable);
 	set_to_cap_if_null(ops, quotactl);
 	set_to_cap_if_null(ops, quota_on);
 	set_to_cap_if_null(ops, sysctl);
diff --git a/security/commoncap.c b/security/commoncap.c
index 7f0b2a68717d..79713545cd63 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -43,44 +43,28 @@ int cap_netlink_recv(struct sk_buff *skb, int cap)
 EXPORT_SYMBOL(cap_netlink_recv);
 
 /**
- * cap_capable - Determine whether current has a particular effective capability
+ * cap_capable - Determine whether a task has a particular effective capability
+ * @tsk: The task to query
  * @cap: The capability to check for
  * @audit: Whether to write an audit message or not
  *
  * Determine whether the nominated task has the specified capability amongst
- * its effective set, returning 0 if it does, -ve if it does not.  Note that
- * this uses current's subjective/effective credentials.
+ * its effective set, returning 0 if it does, -ve if it does not.
  *
  * NOTE WELL: cap_capable() cannot be used like the kernel's capable()
  * function.  That is, it has the reverse semantics: cap_capable() returns 0
  * when a task has a capability, but the kernel's capable() returns 1 for this
  * case.
  */
-int cap_capable(int cap, int audit)
+int cap_capable(struct task_struct *tsk, int cap, int audit)
 {
-	return cap_raised(current_cap(), cap) ? 0 : -EPERM;
-}
+	__u32 cap_raised;
 
-/**
- * cap_has_capability - Determine whether a task has a particular effective capability
- * @tsk: The task to query
- * @cred: The credentials to use
- * @cap: The capability to check for
- * @audit: Whether to write an audit message or not
- *
- * Determine whether the nominated task has the specified capability amongst
- * its effective set, returning 0 if it does, -ve if it does not.  Note that
- * this uses the task's objective/real credentials.
- *
- * NOTE WELL: cap_has_capability() cannot be used like the kernel's
- * has_capability() function.  That is, it has the reverse semantics:
- * cap_has_capability() returns 0 when a task has a capability, but the
- * kernel's has_capability() returns 1 for this case.
- */
-int cap_task_capable(struct task_struct *tsk, const struct cred *cred, int cap,
-		     int audit)
-{
-	return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
+	/* Derived from include/linux/sched.h:capable. */
+	rcu_read_lock();
+	cap_raised = cap_raised(__task_cred(tsk)->cap_effective, cap);
+	rcu_read_unlock();
+	return cap_raised ? 0 : -EPERM;
 }
 
 /**
@@ -176,7 +160,7 @@ static inline int cap_inh_is_capped(void)
 	/* they are so limited unless the current task has the CAP_SETPCAP
 	 * capability
 	 */
-	if (cap_capable(CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0)
+	if (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0)
 		return 0;
 #endif
 	return 1;
@@ -885,7 +869,7 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 		     & (new->securebits ^ arg2))			/*[1]*/
 		    || ((new->securebits & SECURE_ALL_LOCKS & ~arg2))	/*[2]*/
 		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))	/*[3]*/
-		    || (cap_capable(CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0) /*[4]*/
+		    || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0) /*[4]*/
 			/*
 			 * [1] no changing of bits that are locked
 			 * [2] no unlocking of locks
@@ -966,7 +950,7 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages)
 {
 	int cap_sys_admin = 0;
 
-	if (cap_capable(CAP_SYS_ADMIN, SECURITY_CAP_NOAUDIT) == 0)
+	if (cap_capable(current, CAP_SYS_ADMIN, SECURITY_CAP_NOAUDIT) == 0)
 		cap_sys_admin = 1;
 	return __vm_enough_memory(mm, pages, cap_sys_admin);
 }
diff --git a/security/root_plug.c b/security/root_plug.c
index 559578f8ac66..40fb4f15e27b 100644
--- a/security/root_plug.c
+++ b/security/root_plug.c
@@ -77,7 +77,6 @@ static struct security_operations rootplug_security_ops = {
 	.capget =			cap_capget,
 	.capset =			cap_capset,
 	.capable =			cap_capable,
-	.task_capable =			cap_task_capable,
 
 	.bprm_set_creds =		cap_bprm_set_creds,
 
diff --git a/security/security.c b/security/security.c
index 9bbc8e57b8c6..d85dbb37c972 100644
--- a/security/security.c
+++ b/security/security.c
@@ -154,31 +154,14 @@ int security_capset(struct cred *new, const struct cred *old,
 				    effective, inheritable, permitted);
 }
 
-int security_capable(int cap)
+int security_capable(struct task_struct *tsk, int cap)
 {
-	return security_ops->capable(cap, SECURITY_CAP_AUDIT);
+	return security_ops->capable(tsk, cap, SECURITY_CAP_AUDIT);
 }
 
-int security_task_capable(struct task_struct *tsk, int cap)
+int security_capable_noaudit(struct task_struct *tsk, int cap)
 {
-	const struct cred *cred;
-	int ret;
-
-	cred = get_task_cred(tsk);
-	ret = security_ops->task_capable(tsk, cred, cap, SECURITY_CAP_AUDIT);
-	put_cred(cred);
-	return ret;
-}
-
-int security_task_capable_noaudit(struct task_struct *tsk, int cap)
-{
-	const struct cred *cred;
-	int ret;
-
-	cred = get_task_cred(tsk);
-	ret = security_ops->task_capable(tsk, cred, cap, SECURITY_CAP_NOAUDIT);
-	put_cred(cred);
-	return ret;
+	return security_ops->capable(tsk, cap, SECURITY_CAP_NOAUDIT);
 }
 
 int security_acct(struct file *file)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index eb6c45107a05..df30a7555d8a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1433,13 +1433,12 @@ static int current_has_perm(const struct task_struct *tsk,
 
 /* Check whether a task is allowed to use a capability. */
 static int task_has_capability(struct task_struct *tsk,
-			       const struct cred *cred,
 			       int cap, int audit)
 {
 	struct avc_audit_data ad;
 	struct av_decision avd;
 	u16 sclass;
-	u32 sid = cred_sid(cred);
+	u32 sid = task_sid(tsk);
 	u32 av = CAP_TO_MASK(cap);
 	int rc;
 
@@ -1866,27 +1865,15 @@ static int selinux_capset(struct cred *new, const struct cred *old,
 	return cred_has_perm(old, new, PROCESS__SETCAP);
 }
 
-static int selinux_capable(int cap, int audit)
-{
-	int rc;
-
-	rc = secondary_ops->capable(cap, audit);
-	if (rc)
-		return rc;
-
-	return task_has_capability(current, current_cred(), cap, audit);
-}
-
-static int selinux_task_capable(struct task_struct *tsk,
-				const struct cred *cred, int cap, int audit)
+static int selinux_capable(struct task_struct *tsk, int cap, int audit)
 {
 	int rc;
 
-	rc = secondary_ops->task_capable(tsk, cred, cap, audit);
+	rc = secondary_ops->capable(tsk, cap, audit);
 	if (rc)
 		return rc;
 
-	return task_has_capability(tsk, cred, cap, audit);
+	return task_has_capability(tsk, cap, audit);
 }
 
 static int selinux_sysctl_get_sid(ctl_table *table, u16 tclass, u32 *sid)
@@ -2050,7 +2037,7 @@ static int selinux_vm_enough_memory(struct mm_struct *mm, long pages)
 {
 	int rc, cap_sys_admin = 0;
 
-	rc = selinux_capable(CAP_SYS_ADMIN, SECURITY_CAP_NOAUDIT);
+	rc = selinux_capable(current, CAP_SYS_ADMIN, SECURITY_CAP_NOAUDIT);
 	if (rc == 0)
 		cap_sys_admin = 1;
 
@@ -2893,7 +2880,7 @@ static int selinux_inode_getsecurity(const struct inode *inode, const char *name
 	 * and lack of permission just means that we fall back to the
 	 * in-core context value, not a denial.
 	 */
-	error = selinux_capable(CAP_MAC_ADMIN, SECURITY_CAP_NOAUDIT);
+	error = selinux_capable(current, CAP_MAC_ADMIN, SECURITY_CAP_NOAUDIT);
 	if (!error)
 		error = security_sid_to_context_force(isec->sid, &context,
 						      &size);
@@ -5581,7 +5568,6 @@ static struct security_operations selinux_ops = {
 	.capset =			selinux_capset,
 	.sysctl =			selinux_sysctl,
 	.capable =			selinux_capable,
-	.task_capable =			selinux_task_capable,
 	.quotactl =			selinux_quotactl,
 	.quota_on =			selinux_quota_on,
 	.syslog =			selinux_syslog,
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 7f12cc7015b6..6bfaba6177c2 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2827,7 +2827,6 @@ struct security_operations smack_ops = {
 	.capget = 			cap_capget,
 	.capset = 			cap_capset,
 	.capable = 			cap_capable,
-	.task_capable = 		cap_task_capable,
 	.syslog = 			smack_syslog,
 	.settime = 			cap_settime,
 	.vm_enough_memory = 		cap_vm_enough_memory,
-- 
cgit v1.2.3


From 3699c53c485bf0168e6500d0ed18bf931584dd7c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 6 Jan 2009 22:27:01 +0000
Subject: CRED: Fix regression in cap_capable() as shown up by sys_faccessat()
 [ver #3]

Fix a regression in cap_capable() due to:

	commit 3b11a1decef07c19443d24ae926982bc8ec9f4c0
	Author: David Howells <dhowells@redhat.com>
	Date:   Fri Nov 14 10:39:26 2008 +1100

	    CRED: Differentiate objective and effective subjective credentials on a task

The problem is that the above patch allows a process to have two sets of
credentials, and for the most part uses the subjective credentials when
accessing current's creds.

There is, however, one exception: cap_capable(), and thus capable(), uses the
real/objective credentials of the target task, whether or not it is the current
task.

Ordinarily this doesn't matter, since usually the two cred pointers in current
point to the same set of creds.  However, sys_faccessat() makes use of this
facility to override the credentials of the calling process to make its test,
without affecting the creds as seen from other processes.

One of the things sys_faccessat() does is to make an adjustment to the
effective capabilities mask, which cap_capable(), as it stands, then ignores.

The affected capability check is in generic_permission():

	if (!(mask & MAY_EXEC) || execute_ok(inode))
		if (capable(CAP_DAC_OVERRIDE))
			return 0;

This change passes the set of credentials to be tested down into the commoncap
and SELinux code.  The security functions called by capable() and
has_capability() select the appropriate set of credentials from the process
being checked.

This can be tested by compiling the following program from the XFS testsuite:

/*
 *  t_access_root.c - trivial test program to show permission bug.
 *
 *  Written by Michael Kerrisk - copyright ownership not pursued.
 *  Sourced from: http://linux.derkeiler.com/Mailing-Lists/Kernel/2003-10/6030.html
 */
#include <limits.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/stat.h>

#define UID 500
#define GID 100
#define PERM 0
#define TESTPATH "/tmp/t_access"

static void
errExit(char *msg)
{
    perror(msg);
    exit(EXIT_FAILURE);
} /* errExit */

static void
accessTest(char *file, int mask, char *mstr)
{
    printf("access(%s, %s) returns %d\n", file, mstr, access(file, mask));
} /* accessTest */

int
main(int argc, char *argv[])
{
    int fd, perm, uid, gid;
    char *testpath;
    char cmd[PATH_MAX + 20];

    testpath = (argc > 1) ? argv[1] : TESTPATH;
    perm = (argc > 2) ? strtoul(argv[2], NULL, 8) : PERM;
    uid = (argc > 3) ? atoi(argv[3]) : UID;
    gid = (argc > 4) ? atoi(argv[4]) : GID;

    unlink(testpath);

    fd = open(testpath, O_RDWR | O_CREAT, 0);
    if (fd == -1) errExit("open");

    if (fchown(fd, uid, gid) == -1) errExit("fchown");
    if (fchmod(fd, perm) == -1) errExit("fchmod");
    close(fd);

    snprintf(cmd, sizeof(cmd), "ls -l %s", testpath);
    system(cmd);

    if (seteuid(uid) == -1) errExit("seteuid");

    accessTest(testpath, 0, "0");
    accessTest(testpath, R_OK, "R_OK");
    accessTest(testpath, W_OK, "W_OK");
    accessTest(testpath, X_OK, "X_OK");
    accessTest(testpath, R_OK | W_OK, "R_OK | W_OK");
    accessTest(testpath, R_OK | X_OK, "R_OK | X_OK");
    accessTest(testpath, W_OK | X_OK, "W_OK | X_OK");
    accessTest(testpath, R_OK | W_OK | X_OK, "R_OK | W_OK | X_OK");

    exit(EXIT_SUCCESS);
} /* main */

This can be run against an Ext3 filesystem as well as against an XFS
filesystem.  If successful, it will show:

	[root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043
	---------- 1 dhowells dhowells 0 2008-12-31 03:00 /tmp/xxx
	access(/tmp/xxx, 0) returns 0
	access(/tmp/xxx, R_OK) returns 0
	access(/tmp/xxx, W_OK) returns 0
	access(/tmp/xxx, X_OK) returns -1
	access(/tmp/xxx, R_OK | W_OK) returns 0
	access(/tmp/xxx, R_OK | X_OK) returns -1
	access(/tmp/xxx, W_OK | X_OK) returns -1
	access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1

If unsuccessful, it will show:

	[root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043
	---------- 1 dhowells dhowells 0 2008-12-31 02:56 /tmp/xxx
	access(/tmp/xxx, 0) returns 0
	access(/tmp/xxx, R_OK) returns -1
	access(/tmp/xxx, W_OK) returns -1
	access(/tmp/xxx, X_OK) returns -1
	access(/tmp/xxx, R_OK | W_OK) returns -1
	access(/tmp/xxx, R_OK | X_OK) returns -1
	access(/tmp/xxx, W_OK | X_OK) returns -1
	access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1

I've also tested the fix with the SELinux and syscalls LTP testsuites.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: J. Bruce Fields <bfields@citi.umich.edu>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/capability.h | 17 +++++++++++++++--
 include/linux/security.h   | 41 ++++++++++++++++++++++++++++++++---------
 kernel/capability.c        |  2 +-
 security/commoncap.c       | 29 ++++++++++++++---------------
 security/security.c        | 26 ++++++++++++++++++++++----
 security/selinux/hooks.c   | 16 ++++++++++------
 6 files changed, 94 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index e22f48c2a46f..02bdb768d43b 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -529,8 +529,21 @@ extern const kernel_cap_t __cap_init_eff_set;
  *
  * Note that this does not set PF_SUPERPRIV on the task.
  */
-#define has_capability(t, cap) (security_capable((t), (cap)) == 0)
-#define has_capability_noaudit(t, cap) (security_capable_noaudit((t), (cap)) == 0)
+#define has_capability(t, cap) (security_real_capable((t), (cap)) == 0)
+
+/**
+ * has_capability_noaudit - Determine if a task has a superior capability available (unaudited)
+ * @t: The task in question
+ * @cap: The capability to be tested for
+ *
+ * Return true if the specified task has the given superior capability
+ * currently in effect, false if not, but don't write an audit message for the
+ * check.
+ *
+ * Note that this does not set PF_SUPERPRIV on the task.
+ */
+#define has_capability_noaudit(t, cap) \
+	(security_real_capable_noaudit((t), (cap)) == 0)
 
 extern int capable(int cap);
 
diff --git a/include/linux/security.h b/include/linux/security.h
index 3416cb85e77b..f9c390494f18 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -48,7 +48,8 @@ struct audit_krule;
  * These functions are in security/capability.c and are used
  * as the default capabilities functions
  */
-extern int cap_capable(struct task_struct *tsk, int cap, int audit);
+extern int cap_capable(struct task_struct *tsk, const struct cred *cred,
+		       int cap, int audit);
 extern int cap_settime(struct timespec *ts, struct timezone *tz);
 extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode);
 extern int cap_ptrace_traceme(struct task_struct *parent);
@@ -1195,9 +1196,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@permitted contains the permitted capability set.
  *	Return 0 and update @new if permission is granted.
  * @capable:
- *	Check whether the @tsk process has the @cap capability.
+ *	Check whether the @tsk process has the @cap capability in the indicated
+ *	credentials.
  *	@tsk contains the task_struct for the process.
+ *	@cred contains the credentials to use.
  *	@cap contains the capability <include/linux/capability.h>.
+ *	@audit: Whether to write an audit message or not
  *	Return 0 if the capability is granted for @tsk.
  * @acct:
  *	Check permission before enabling or disabling process accounting.  If
@@ -1290,7 +1294,8 @@ struct security_operations {
 		       const kernel_cap_t *effective,
 		       const kernel_cap_t *inheritable,
 		       const kernel_cap_t *permitted);
-	int (*capable) (struct task_struct *tsk, int cap, int audit);
+	int (*capable) (struct task_struct *tsk, const struct cred *cred,
+			int cap, int audit);
 	int (*acct) (struct file *file);
 	int (*sysctl) (struct ctl_table *table, int op);
 	int (*quotactl) (int cmds, int type, int id, struct super_block *sb);
@@ -1556,8 +1561,9 @@ int security_capset(struct cred *new, const struct cred *old,
 		    const kernel_cap_t *effective,
 		    const kernel_cap_t *inheritable,
 		    const kernel_cap_t *permitted);
-int security_capable(struct task_struct *tsk, int cap);
-int security_capable_noaudit(struct task_struct *tsk, int cap);
+int security_capable(int cap);
+int security_real_capable(struct task_struct *tsk, int cap);
+int security_real_capable_noaudit(struct task_struct *tsk, int cap);
 int security_acct(struct file *file);
 int security_sysctl(struct ctl_table *table, int op);
 int security_quotactl(int cmds, int type, int id, struct super_block *sb);
@@ -1754,14 +1760,31 @@ static inline int security_capset(struct cred *new,
 	return cap_capset(new, old, effective, inheritable, permitted);
 }
 
-static inline int security_capable(struct task_struct *tsk, int cap)
+static inline int security_capable(int cap)
 {
-	return cap_capable(tsk, cap, SECURITY_CAP_AUDIT);
+	return cap_capable(current, current_cred(), cap, SECURITY_CAP_AUDIT);
 }
 
-static inline int security_capable_noaudit(struct task_struct *tsk, int cap)
+static inline int security_real_capable(struct task_struct *tsk, int cap)
 {
-	return cap_capable(tsk, cap, SECURITY_CAP_NOAUDIT);
+	int ret;
+
+	rcu_read_lock();
+	ret = cap_capable(tsk, __task_cred(tsk), cap, SECURITY_CAP_AUDIT);
+	rcu_read_unlock();
+	return ret;
+}
+
+static inline
+int security_real_capable_noaudit(struct task_struct *tsk, int cap)
+{
+	int ret;
+
+	rcu_read_lock();
+	ret = cap_capable(tsk, __task_cred(tsk), cap,
+			       SECURITY_CAP_NOAUDIT);
+	rcu_read_unlock();
+	return ret;
 }
 
 static inline int security_acct(struct file *file)
diff --git a/kernel/capability.c b/kernel/capability.c
index 36b4b4daebec..df62f53f84ac 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -308,7 +308,7 @@ int capable(int cap)
 		BUG();
 	}
 
-	if (has_capability(current, cap)) {
+	if (security_capable(cap) == 0) {
 		current->flags |= PF_SUPERPRIV;
 		return 1;
 	}
diff --git a/security/commoncap.c b/security/commoncap.c
index 79713545cd63..f0e671dcfff0 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -45,26 +45,22 @@ EXPORT_SYMBOL(cap_netlink_recv);
 /**
  * cap_capable - Determine whether a task has a particular effective capability
  * @tsk: The task to query
+ * @cred: The credentials to use
  * @cap: The capability to check for
  * @audit: Whether to write an audit message or not
  *
  * Determine whether the nominated task has the specified capability amongst
  * its effective set, returning 0 if it does, -ve if it does not.
  *
- * NOTE WELL: cap_capable() cannot be used like the kernel's capable()
- * function.  That is, it has the reverse semantics: cap_capable() returns 0
- * when a task has a capability, but the kernel's capable() returns 1 for this
- * case.
+ * NOTE WELL: cap_has_capability() cannot be used like the kernel's capable()
+ * and has_capability() functions.  That is, it has the reverse semantics:
+ * cap_has_capability() returns 0 when a task has a capability, but the
+ * kernel's capable() and has_capability() returns 1 for this case.
  */
-int cap_capable(struct task_struct *tsk, int cap, int audit)
+int cap_capable(struct task_struct *tsk, const struct cred *cred, int cap,
+		int audit)
 {
-	__u32 cap_raised;
-
-	/* Derived from include/linux/sched.h:capable. */
-	rcu_read_lock();
-	cap_raised = cap_raised(__task_cred(tsk)->cap_effective, cap);
-	rcu_read_unlock();
-	return cap_raised ? 0 : -EPERM;
+	return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
 }
 
 /**
@@ -160,7 +156,8 @@ static inline int cap_inh_is_capped(void)
 	/* they are so limited unless the current task has the CAP_SETPCAP
 	 * capability
 	 */
-	if (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0)
+	if (cap_capable(current, current_cred(), CAP_SETPCAP,
+			SECURITY_CAP_AUDIT) == 0)
 		return 0;
 #endif
 	return 1;
@@ -869,7 +866,8 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 		     & (new->securebits ^ arg2))			/*[1]*/
 		    || ((new->securebits & SECURE_ALL_LOCKS & ~arg2))	/*[2]*/
 		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))	/*[3]*/
-		    || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0) /*[4]*/
+		    || (cap_capable(current, current_cred(), CAP_SETPCAP,
+				    SECURITY_CAP_AUDIT) != 0)		/*[4]*/
 			/*
 			 * [1] no changing of bits that are locked
 			 * [2] no unlocking of locks
@@ -950,7 +948,8 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages)
 {
 	int cap_sys_admin = 0;
 
-	if (cap_capable(current, CAP_SYS_ADMIN, SECURITY_CAP_NOAUDIT) == 0)
+	if (cap_capable(current, current_cred(), CAP_SYS_ADMIN,
+			SECURITY_CAP_NOAUDIT) == 0)
 		cap_sys_admin = 1;
 	return __vm_enough_memory(mm, pages, cap_sys_admin);
 }
diff --git a/security/security.c b/security/security.c
index d85dbb37c972..a02f243f09c0 100644
--- a/security/security.c
+++ b/security/security.c
@@ -154,14 +154,32 @@ int security_capset(struct cred *new, const struct cred *old,
 				    effective, inheritable, permitted);
 }
 
-int security_capable(struct task_struct *tsk, int cap)
+int security_capable(int cap)
 {
-	return security_ops->capable(tsk, cap, SECURITY_CAP_AUDIT);
+	return security_ops->capable(current, current_cred(), cap,
+				     SECURITY_CAP_AUDIT);
 }
 
-int security_capable_noaudit(struct task_struct *tsk, int cap)
+int security_real_capable(struct task_struct *tsk, int cap)
 {
-	return security_ops->capable(tsk, cap, SECURITY_CAP_NOAUDIT);
+	const struct cred *cred;
+	int ret;
+
+	cred = get_task_cred(tsk);
+	ret = security_ops->capable(tsk, cred, cap, SECURITY_CAP_AUDIT);
+	put_cred(cred);
+	return ret;
+}
+
+int security_real_capable_noaudit(struct task_struct *tsk, int cap)
+{
+	const struct cred *cred;
+	int ret;
+
+	cred = get_task_cred(tsk);
+	ret = security_ops->capable(tsk, cred, cap, SECURITY_CAP_NOAUDIT);
+	put_cred(cred);
+	return ret;
 }
 
 int security_acct(struct file *file)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index df30a7555d8a..00815973d412 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1433,12 +1433,13 @@ static int current_has_perm(const struct task_struct *tsk,
 
 /* Check whether a task is allowed to use a capability. */
 static int task_has_capability(struct task_struct *tsk,
+			       const struct cred *cred,
 			       int cap, int audit)
 {
 	struct avc_audit_data ad;
 	struct av_decision avd;
 	u16 sclass;
-	u32 sid = task_sid(tsk);
+	u32 sid = cred_sid(cred);
 	u32 av = CAP_TO_MASK(cap);
 	int rc;
 
@@ -1865,15 +1866,16 @@ static int selinux_capset(struct cred *new, const struct cred *old,
 	return cred_has_perm(old, new, PROCESS__SETCAP);
 }
 
-static int selinux_capable(struct task_struct *tsk, int cap, int audit)
+static int selinux_capable(struct task_struct *tsk, const struct cred *cred,
+			   int cap, int audit)
 {
 	int rc;
 
-	rc = secondary_ops->capable(tsk, cap, audit);
+	rc = secondary_ops->capable(tsk, cred, cap, audit);
 	if (rc)
 		return rc;
 
-	return task_has_capability(tsk, cap, audit);
+	return task_has_capability(tsk, cred, cap, audit);
 }
 
 static int selinux_sysctl_get_sid(ctl_table *table, u16 tclass, u32 *sid)
@@ -2037,7 +2039,8 @@ static int selinux_vm_enough_memory(struct mm_struct *mm, long pages)
 {
 	int rc, cap_sys_admin = 0;
 
-	rc = selinux_capable(current, CAP_SYS_ADMIN, SECURITY_CAP_NOAUDIT);
+	rc = selinux_capable(current, current_cred(), CAP_SYS_ADMIN,
+			     SECURITY_CAP_NOAUDIT);
 	if (rc == 0)
 		cap_sys_admin = 1;
 
@@ -2880,7 +2883,8 @@ static int selinux_inode_getsecurity(const struct inode *inode, const char *name
 	 * and lack of permission just means that we fall back to the
 	 * in-core context value, not a denial.
 	 */
-	error = selinux_capable(current, CAP_MAC_ADMIN, SECURITY_CAP_NOAUDIT);
+	error = selinux_capable(current, current_cred(), CAP_MAC_ADMIN,
+				SECURITY_CAP_NOAUDIT);
 	if (!error)
 		error = security_sid_to_context_force(isec->sid, &context,
 						      &size);
-- 
cgit v1.2.3


From 08fba69986e20c1c9e5fe2e6064d146cc4f42480 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 6 Jan 2009 14:38:53 -0800
Subject: mm: report the pagesize backing a VMA in /proc/pid/smaps

It is useful to verify a hugepage-aware application is using the expected
pagesizes for its memory regions. This patch creates an entry called
KernelPageSize in /proc/pid/smaps that is the size of page used by the
kernel to back a VMA. The entry is not called PageSize as it is possible
the MMU uses a different size. This extension should not break any sensible
parser that skips lines containing unrecognised information.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: "KOSAKI Motohiro" <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c      |  6 ++++--
 include/linux/hugetlb.h |  3 +++
 mm/hugetlb.c            | 16 ++++++++++++++++
 3 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3a8bdd7f5756..41ef5f23e779 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -396,7 +396,8 @@ static int show_smap(struct seq_file *m, void *v)
 		   "Private_Clean:  %8lu kB\n"
 		   "Private_Dirty:  %8lu kB\n"
 		   "Referenced:     %8lu kB\n"
-		   "Swap:           %8lu kB\n",
+		   "Swap:           %8lu kB\n"
+		   "KernelPageSize: %8lu kB\n",
 		   (vma->vm_end - vma->vm_start) >> 10,
 		   mss.resident >> 10,
 		   (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
@@ -405,7 +406,8 @@ static int show_smap(struct seq_file *m, void *v)
 		   mss.private_clean >> 10,
 		   mss.private_dirty >> 10,
 		   mss.referenced >> 10,
-		   mss.swap >> 10);
+		   mss.swap >> 10,
+		   vma_kernel_pagesize(vma) >> 10);
 
 	if (m->count < m->size)  /* vma is copied successfully */
 		m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index e1c8afc002c0..648e1e25979e 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -233,6 +233,8 @@ static inline unsigned long huge_page_size(struct hstate *h)
 	return (unsigned long)PAGE_SIZE << h->order;
 }
 
+extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma);
+
 static inline unsigned long huge_page_mask(struct hstate *h)
 {
 	return h->mask;
@@ -273,6 +275,7 @@ struct hstate {};
 #define hstate_inode(i) NULL
 #define huge_page_size(h) PAGE_SIZE
 #define huge_page_mask(h) PAGE_MASK
+#define vma_kernel_pagesize(v) PAGE_SIZE
 #define huge_page_order(h) 0
 #define huge_page_shift(h) PAGE_SHIFT
 static inline unsigned int pages_per_huge_page(struct hstate *h)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6058b53dcb89..5cb8bc7c80f7 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -219,6 +219,22 @@ static pgoff_t vma_hugecache_offset(struct hstate *h,
 			(vma->vm_pgoff >> huge_page_order(h));
 }
 
+/*
+ * Return the size of the pages allocated when backing a VMA. In the majority
+ * cases this will be same size as used by the page table entries.
+ */
+unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
+{
+	struct hstate *hstate;
+
+	if (!is_vm_hugetlb_page(vma))
+		return PAGE_SIZE;
+
+	hstate = hstate_vma(vma);
+
+	return 1UL << (hstate->order + PAGE_SHIFT);
+}
+
 /*
  * Flags for MAP_PRIVATE reservations.  These are stored in the bottom
  * bits of the reservation map pointer, which are always clear due to
-- 
cgit v1.2.3


From 3340289ddf29ca75c3acfb3a6b72f234b2f74d5c Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 6 Jan 2009 14:38:54 -0800
Subject: mm: report the MMU pagesize in /proc/pid/smaps

The KernelPageSize entry in /proc/pid/smaps is the pagesize used by the
kernel to back a VMA.  This matches the size used by the MMU in the
majority of cases.  However, one counter-example occurs on PPC64 kernels
whereby a kernel using 64K as a base pagesize may still use 4K pages for
the MMU on older processor.  To distinguish, this patch reports
MMUPageSize as the pagesize used by the MMU in /proc/pid/smaps.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: "KOSAKI Motohiro" <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/include/asm/hugetlb.h |  6 ++++++
 arch/powerpc/mm/hugetlbpage.c      |  7 +++++++
 fs/proc/task_mmu.c                 |  6 ++++--
 include/linux/hugetlb.h            |  3 +++
 mm/hugetlb.c                       | 13 +++++++++++++
 5 files changed, 33 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 26f0d0ab27a5..b1dafb6a9743 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -17,6 +17,12 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep);
 
+/*
+ * The version of vma_mmu_pagesize() in arch/powerpc/mm/hugetlbpage.c needs
+ * to override the version in mm/hugetlb.c
+ */
+#define vma_mmu_pagesize vma_mmu_pagesize
+
 /*
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 201c7a5486cb..9920d6a7cf29 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -512,6 +512,13 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
 }
 
+unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
+{
+	unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
+
+	return 1UL << mmu_psize_to_shift(psize);
+}
+
 /*
  * Called by asm hashtable.S for doing lazy icache flush
  */
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 41ef5f23e779..94063840832a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -397,7 +397,8 @@ static int show_smap(struct seq_file *m, void *v)
 		   "Private_Dirty:  %8lu kB\n"
 		   "Referenced:     %8lu kB\n"
 		   "Swap:           %8lu kB\n"
-		   "KernelPageSize: %8lu kB\n",
+		   "KernelPageSize: %8lu kB\n"
+		   "MMUPageSize:    %8lu kB\n",
 		   (vma->vm_end - vma->vm_start) >> 10,
 		   mss.resident >> 10,
 		   (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
@@ -407,7 +408,8 @@ static int show_smap(struct seq_file *m, void *v)
 		   mss.private_dirty >> 10,
 		   mss.referenced >> 10,
 		   mss.swap >> 10,
-		   vma_kernel_pagesize(vma) >> 10);
+		   vma_kernel_pagesize(vma) >> 10,
+		   vma_mmu_pagesize(vma) >> 10);
 
 	if (m->count < m->size)  /* vma is copied successfully */
 		m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 648e1e25979e..f1d2fba19ea0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -235,6 +235,8 @@ static inline unsigned long huge_page_size(struct hstate *h)
 
 extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma);
 
+extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma);
+
 static inline unsigned long huge_page_mask(struct hstate *h)
 {
 	return h->mask;
@@ -276,6 +278,7 @@ struct hstate {};
 #define huge_page_size(h) PAGE_SIZE
 #define huge_page_mask(h) PAGE_MASK
 #define vma_kernel_pagesize(v) PAGE_SIZE
+#define vma_mmu_pagesize(v) PAGE_SIZE
 #define huge_page_order(h) 0
 #define huge_page_shift(h) PAGE_SHIFT
 static inline unsigned int pages_per_huge_page(struct hstate *h)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5cb8bc7c80f7..9595278b5ab4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -235,6 +235,19 @@ unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
 	return 1UL << (hstate->order + PAGE_SHIFT);
 }
 
+/*
+ * Return the page size being used by the MMU to back a VMA. In the majority
+ * of cases, the page size used by the kernel matches the MMU size. On
+ * architectures where it differs, an architecture-specific version of this
+ * function is required.
+ */
+#ifndef vma_mmu_pagesize
+unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
+{
+	return vma_kernel_pagesize(vma);
+}
+#endif
+
 /*
  * Flags for MAP_PRIVATE reservations.  These are stored in the bottom
  * bits of the reservation map pointer, which are always clear due to
-- 
cgit v1.2.3


From 1c0fe6e3bda0464728c23c8d84aa47567e8b716c Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Tue, 6 Jan 2009 14:38:59 -0800
Subject: mm: invoke oom-killer from page fault

Rather than have the pagefault handler kill a process directly if it gets
a VM_FAULT_OOM, have it call into the OOM killer.

With increasingly sophisticated oom behaviour (cpusets, memory cgroups,
oom killing throttling, oom priority adjustment or selective disabling,
panic on oom, etc), it's silly to unconditionally kill the faulting
process at page fault time.  Create a hook for pagefault oom path to call
into instead.

Only converted x86 and uml so far.

[akpm@linux-foundation.org: make __out_of_memory() static]
[akpm@linux-foundation.org: fix comment]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Jeff Dike <jdike@addtoit.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/um/kernel/trap.c | 24 +++++--------
 arch/x86/mm/fault.c   | 24 ++++---------
 include/linux/mm.h    |  5 +++
 mm/oom_kill.c         | 94 +++++++++++++++++++++++++++++++++++----------------
 4 files changed, 84 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 44e490419495..7384d8accfe7 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -64,11 +64,10 @@ good_area:
 
 	do {
 		int fault;
-survive:
+
 		fault = handle_mm_fault(mm, vma, address, is_write);
 		if (unlikely(fault & VM_FAULT_ERROR)) {
 			if (fault & VM_FAULT_OOM) {
-				err = -ENOMEM;
 				goto out_of_memory;
 			} else if (fault & VM_FAULT_SIGBUS) {
 				err = -EACCES;
@@ -104,18 +103,14 @@ out:
 out_nosemaphore:
 	return err;
 
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
 out_of_memory:
-	if (is_global_init(current)) {
-		up_read(&mm->mmap_sem);
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	goto out;
+	/*
+	 * We ran out of memory, call the OOM killer, and return the userspace
+	 * (which will retry the fault, or kill us if we got oom-killed).
+	 */
+	up_read(&mm->mmap_sem);
+	pagefault_out_of_memory();
+	return 0;
 }
 
 static void bad_segv(struct faultinfo fi, unsigned long ip)
@@ -214,9 +209,6 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
 		si.si_addr = (void __user *)address;
 		current->thread.arch.faultinfo = fi;
 		force_sig_info(SIGBUS, &si, current);
-	} else if (err == -ENOMEM) {
-		printk(KERN_INFO "VM: killing process %s\n", current->comm);
-		do_exit(SIGKILL);
 	} else {
 		BUG_ON(err != -EFAULT);
 		si.si_signo = SIGSEGV;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 57ec8c86a877..9e268b6b204e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -667,7 +667,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (unlikely(in_atomic() || !mm))
 		goto bad_area_nosemaphore;
 
-again:
 	/*
 	 * When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
@@ -859,25 +858,14 @@ no_context:
 	oops_end(flags, regs, sig);
 #endif
 
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
 out_of_memory:
+	/*
+	 * We ran out of memory, call the OOM killer, and return the userspace
+	 * (which will retry the fault, or kill us if we got oom-killed).
+	 */
 	up_read(&mm->mmap_sem);
-	if (is_global_init(tsk)) {
-		yield();
-		/*
-		 * Re-lookup the vma - in theory the vma tree might
-		 * have changed:
-		 */
-		goto again;
-	}
-
-	printk("VM: killing process %s\n", tsk->comm);
-	if (error_code & PF_USER)
-		do_group_exit(SIGKILL);
-	goto no_context;
+	pagefault_out_of_memory();
+	return;
 
 do_sigbus:
 	up_read(&mm->mmap_sem);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index aaa8b843be28..4a3d28c86443 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -717,6 +717,11 @@ static inline int page_mapped(struct page *page)
 
 #define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS)
 
+/*
+ * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
+ */
+extern void pagefault_out_of_memory(void);
+
 #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
 
 extern void show_free_areas(void);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 558f9afe6e4e..c592965dab2f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -509,6 +509,69 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
 	spin_unlock(&zone_scan_mutex);
 }
 
+/*
+ * Must be called with tasklist_lock held for read.
+ */
+static void __out_of_memory(gfp_t gfp_mask, int order)
+{
+	if (sysctl_oom_kill_allocating_task) {
+		oom_kill_process(current, gfp_mask, order, 0, NULL,
+				"Out of memory (oom_kill_allocating_task)");
+
+	} else {
+		unsigned long points;
+		struct task_struct *p;
+
+retry:
+		/*
+		 * Rambo mode: Shoot down a process and hope it solves whatever
+		 * issues we may have.
+		 */
+		p = select_bad_process(&points, NULL);
+
+		if (PTR_ERR(p) == -1UL)
+			return;
+
+		/* Found nothing?!?! Either we hang forever, or we panic. */
+		if (!p) {
+			read_unlock(&tasklist_lock);
+			panic("Out of memory and no killable processes...\n");
+		}
+
+		if (oom_kill_process(p, gfp_mask, order, points, NULL,
+				     "Out of memory"))
+			goto retry;
+	}
+}
+
+/*
+ * pagefault handler calls into here because it is out of memory but
+ * doesn't know exactly how or why.
+ */
+void pagefault_out_of_memory(void)
+{
+	unsigned long freed = 0;
+
+	blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
+	if (freed > 0)
+		/* Got some memory back in the last second. */
+		return;
+
+	if (sysctl_panic_on_oom)
+		panic("out of memory from page fault. panic_on_oom is selected.\n");
+
+	read_lock(&tasklist_lock);
+	__out_of_memory(0, 0); /* unknown gfp_mask and order */
+	read_unlock(&tasklist_lock);
+
+	/*
+	 * Give "p" a good chance of killing itself before we
+	 * retry to allocate memory.
+	 */
+	if (!test_thread_flag(TIF_MEMDIE))
+		schedule_timeout_uninterruptible(1);
+}
+
 /**
  * out_of_memory - kill the "best" process when we run out of memory
  * @zonelist: zonelist pointer
@@ -522,8 +585,6 @@ void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
  */
 void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
 {
-	struct task_struct *p;
-	unsigned long points = 0;
 	unsigned long freed = 0;
 	enum oom_constraint constraint;
 
@@ -544,7 +605,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
 
 	switch (constraint) {
 	case CONSTRAINT_MEMORY_POLICY:
-		oom_kill_process(current, gfp_mask, order, points, NULL,
+		oom_kill_process(current, gfp_mask, order, 0, NULL,
 				"No available memory (MPOL_BIND)");
 		break;
 
@@ -553,35 +614,10 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
 			panic("out of memory. panic_on_oom is selected\n");
 		/* Fall-through */
 	case CONSTRAINT_CPUSET:
-		if (sysctl_oom_kill_allocating_task) {
-			oom_kill_process(current, gfp_mask, order, points, NULL,
-					"Out of memory (oom_kill_allocating_task)");
-			break;
-		}
-retry:
-		/*
-		 * Rambo mode: Shoot down a process and hope it solves whatever
-		 * issues we may have.
-		 */
-		p = select_bad_process(&points, NULL);
-
-		if (PTR_ERR(p) == -1UL)
-			goto out;
-
-		/* Found nothing?!?! Either we hang forever, or we panic. */
-		if (!p) {
-			read_unlock(&tasklist_lock);
-			panic("Out of memory and no killable processes...\n");
-		}
-
-		if (oom_kill_process(p, gfp_mask, order, points, NULL,
-				     "Out of memory"))
-			goto retry;
-
+		__out_of_memory(gfp_mask, order);
 		break;
 	}
 
-out:
 	read_unlock(&tasklist_lock);
 
 	/*
-- 
cgit v1.2.3


From 75aa199410359dc5fbcf9025ff7af98a9d20f0d5 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 6 Jan 2009 14:39:01 -0800
Subject: oom: print triggering task's cpuset and mems allowed

When cpusets are enabled, it's necessary to print the triggering task's
set of allowable nodes so the subsequently printed meminfo can be
interpreted correctly.

We also print the task's cpuset name for informational purposes.

[rientjes@google.com: task lock current before dereferencing cpuset]
Cc: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpuset.h |  6 ++++++
 kernel/cpuset.c        | 34 ++++++++++++++++++++++++++++++++++
 mm/oom_kill.c          |  3 +++
 3 files changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 8e540d32c9fe..51ea2bdea0f9 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -78,6 +78,8 @@ extern int current_cpuset_is_being_rebound(void);
 
 extern void rebuild_sched_domains(void);
 
+extern void cpuset_print_task_mems_allowed(struct task_struct *p);
+
 #else /* !CONFIG_CPUSETS */
 
 static inline int cpuset_init_early(void) { return 0; }
@@ -159,6 +161,10 @@ static inline void rebuild_sched_domains(void)
 	partition_sched_domains(1, NULL, NULL);
 }
 
+static inline void cpuset_print_task_mems_allowed(struct task_struct *p)
+{
+}
+
 #endif /* !CONFIG_CPUSETS */
 
 #endif /* _LINUX_CPUSET_H */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 39c1a4c1c5a9..345ace5117de 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -239,6 +239,17 @@ static struct cpuset top_cpuset = {
 
 static DEFINE_MUTEX(callback_mutex);
 
+/*
+ * cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist
+ * buffers.  They are statically allocated to prevent using excess stack
+ * when calling cpuset_print_task_mems_allowed().
+ */
+#define CPUSET_NAME_LEN		(128)
+#define	CPUSET_NODELIST_LEN	(256)
+static char cpuset_name[CPUSET_NAME_LEN];
+static char cpuset_nodelist[CPUSET_NODELIST_LEN];
+static DEFINE_SPINLOCK(cpuset_buffer_lock);
+
 /*
  * This is ugly, but preserves the userspace API for existing cpuset
  * users. If someone tries to mount the "cpuset" filesystem, we
@@ -2356,6 +2367,29 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
 	return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
 }
 
+/**
+ * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
+ * @task: pointer to task_struct of some task.
+ *
+ * Description: Prints @task's name, cpuset name, and cached copy of its
+ * mems_allowed to the kernel log.  Must hold task_lock(task) to allow
+ * dereferencing task_cs(task).
+ */
+void cpuset_print_task_mems_allowed(struct task_struct *tsk)
+{
+	struct dentry *dentry;
+
+	dentry = task_cs(tsk)->css.cgroup->dentry;
+	spin_lock(&cpuset_buffer_lock);
+	snprintf(cpuset_name, CPUSET_NAME_LEN,
+		 dentry ? (const char *)dentry->d_name.name : "/");
+	nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
+			   tsk->mems_allowed);
+	printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
+	       tsk->comm, cpuset_name, cpuset_nodelist);
+	spin_unlock(&cpuset_buffer_lock);
+}
+
 /*
  * Collection of memory_pressure is suppressed unless
  * this flag is enabled by writing "1" to the special
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index e5f50cfdca4d..6b9e758c98a5 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -392,6 +392,9 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 		printk(KERN_WARNING "%s invoked oom-killer: "
 			"gfp_mask=0x%x, order=%d, oomkilladj=%d\n",
 			current->comm, gfp_mask, order, current->oomkilladj);
+		task_lock(current);
+		cpuset_print_task_mems_allowed(current);
+		task_unlock(current);
 		dump_stack();
 		show_mem();
 		if (sysctl_oom_dump_tasks)
-- 
cgit v1.2.3


From c04fc586c1a480ba198f03ae7b6cbd7b57380b91 Mon Sep 17 00:00:00 2001
From: Gary Hade <garyhade@us.ibm.com>
Date: Tue, 6 Jan 2009 14:39:14 -0800
Subject: mm: show node to memory section relationship with symlinks in sysfs

Show node to memory section relationship with symlinks in sysfs

Add /sys/devices/system/node/nodeX/memoryY symlinks for all
the memory sections located on nodeX.  For example:
/sys/devices/system/node/node1/memory135 -> ../../memory/memory135
indicates that memory section 135 resides on node1.

Also revises documentation to cover this change as well as updating
Documentation/ABI/testing/sysfs-devices-memory to include descriptions
of memory hotremove files 'phys_device', 'phys_index', and 'state'
that were previously not described there.

In addition to it always being a good policy to provide users with
the maximum possible amount of physical location information for
resources that can be hot-added and/or hot-removed, the following
are some (but likely not all) of the user benefits provided by
this change.
Immediate:
  - Provides information needed to determine the specific node
    on which a defective DIMM is located.  This will reduce system
    downtime when the node or defective DIMM is swapped out.
  - Prevents unintended onlining of a memory section that was
    previously offlined due to a defective DIMM.  This could happen
    during node hot-add when the user or node hot-add assist script
    onlines _all_ offlined sections due to user or script inability
    to identify the specific memory sections located on the hot-added
    node.  The consequences of reintroducing the defective memory
    could be ugly.
  - Provides information needed to vary the amount and distribution
    of memory on specific nodes for testing or debugging purposes.
Future:
  - Will provide information needed to identify the memory
    sections that need to be offlined prior to physical removal
    of a specific node.

Symlink creation during boot was tested on 2-node x86_64, 2-node
ppc64, and 2-node ia64 systems.  Symlink creation during physical
memory hot-add tested on a 2-node x86_64 system.

Signed-off-by: Gary Hade <garyhade@us.ibm.com>
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/ABI/testing/sysfs-devices-memory |  51 +++++++++++-
 Documentation/memory-hotplug.txt               |  16 +++-
 arch/ia64/mm/init.c                            |   2 +-
 arch/powerpc/mm/mem.c                          |   2 +-
 arch/s390/mm/init.c                            |   2 +-
 arch/sh/mm/init.c                              |   3 +-
 arch/x86/mm/init_32.c                          |   2 +-
 arch/x86/mm/init_64.c                          |   2 +-
 drivers/base/memory.c                          |  19 +++--
 drivers/base/node.c                            | 103 +++++++++++++++++++++++++
 include/linux/memory.h                         |   6 +-
 include/linux/memory_hotplug.h                 |   2 +-
 include/linux/node.h                           |  13 ++++
 mm/memory_hotplug.c                            |  11 +--
 14 files changed, 209 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory
index 7a16fe1e2270..9fe91c02ee40 100644
--- a/Documentation/ABI/testing/sysfs-devices-memory
+++ b/Documentation/ABI/testing/sysfs-devices-memory
@@ -6,7 +6,6 @@ Description:
 		internal state of the kernel memory blocks. Files could be
 		added or removed dynamically to represent hot-add/remove
 		operations.
-
 Users:		hotplug memory add/remove tools
 		https://w3.opensource.ibm.com/projects/powerpc-utils/
 
@@ -19,6 +18,56 @@ Description:
 		This is useful for a user-level agent to determine
 		identify removable sections of the memory before attempting
 		potentially expensive hot-remove memory operation
+Users:		hotplug memory remove tools
+		https://w3.opensource.ibm.com/projects/powerpc-utils/
+
+What:		/sys/devices/system/memory/memoryX/phys_device
+Date:		September 2008
+Contact:	Badari Pulavarty <pbadari@us.ibm.com>
+Description:
+		The file /sys/devices/system/memory/memoryX/phys_device
+		is read-only and is designed to show the name of physical
+		memory device.  Implementation is currently incomplete.
 
+What:		/sys/devices/system/memory/memoryX/phys_index
+Date:		September 2008
+Contact:	Badari Pulavarty <pbadari@us.ibm.com>
+Description:
+		The file /sys/devices/system/memory/memoryX/phys_index
+		is read-only and contains the section ID in hexadecimal
+		which is equivalent to decimal X contained in the
+		memory section directory name.
+
+What:		/sys/devices/system/memory/memoryX/state
+Date:		September 2008
+Contact:	Badari Pulavarty <pbadari@us.ibm.com>
+Description:
+		The file /sys/devices/system/memory/memoryX/state
+		is read-write.  When read, it's contents show the
+		online/offline state of the memory section.  When written,
+		root can toggle the the online/offline state of a removable
+		memory section (see removable file description above)
+		using the following commands.
+		# echo online > /sys/devices/system/memory/memoryX/state
+		# echo offline > /sys/devices/system/memory/memoryX/state
+
+		For example, if /sys/devices/system/memory/memory22/removable
+		contains a value of 1 and
+		/sys/devices/system/memory/memory22/state contains the
+		string "online" the following command can be executed by
+		by root to offline that section.
+		# echo offline > /sys/devices/system/memory/memory22/state
 Users:		hotplug memory remove tools
 		https://w3.opensource.ibm.com/projects/powerpc-utils/
+
+What:		/sys/devices/system/node/nodeX/memoryY
+Date:		September 2008
+Contact:	Gary Hade <garyhade@us.ibm.com>
+Description:
+		When CONFIG_NUMA is enabled
+		/sys/devices/system/node/nodeX/memoryY is a symbolic link that
+		points to the corresponding /sys/devices/system/memory/memoryY
+		memory section directory.  For example, the following symbolic
+		link is created for memory section 9 on node0.
+		/sys/devices/system/node/node0/memory9 -> ../../memory/memory9
+
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index 168117bd6ee8..4c2ecf537a4a 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt
@@ -124,7 +124,7 @@ config options.
     This option can be kernel module too.
 
 --------------------------------
-3 sysfs files for memory hotplug
+4 sysfs files for memory hotplug
 --------------------------------
 All sections have their device information under /sys/devices/system/memory as
 
@@ -138,11 +138,12 @@ For example, assume 1GiB section size. A device for a memory starting at
 (0x100000000 / 1Gib = 4)
 This device covers address range [0x100000000 ... 0x140000000)
 
-Under each section, you can see 3 files.
+Under each section, you can see 4 files.
 
 /sys/devices/system/memory/memoryXXX/phys_index
 /sys/devices/system/memory/memoryXXX/phys_device
 /sys/devices/system/memory/memoryXXX/state
+/sys/devices/system/memory/memoryXXX/removable
 
 'phys_index' : read-only and contains section id, same as XXX.
 'state'      : read-write
@@ -150,10 +151,20 @@ Under each section, you can see 3 files.
                at write: user can specify "online", "offline" command
 'phys_device': read-only: designed to show the name of physical memory device.
                This is not well implemented now.
+'removable'  : read-only: contains an integer value indicating
+               whether the memory section is removable or not
+               removable.  A value of 1 indicates that the memory
+               section is removable and a value of 0 indicates that
+               it is not removable.
 
 NOTE:
   These directories/files appear after physical memory hotplug phase.
 
+If CONFIG_NUMA is enabled the
+/sys/devices/system/memory/memoryXXX memory section
+directories can also be accessed via symbolic links located in
+the /sys/devices/system/node/node* directories.  For example:
+/sys/devices/system/node/node0/memory9 -> ../../memory/memory9
 
 --------------------------------
 4. Physical memory hot-add phase
@@ -365,7 +376,6 @@ node if necessary.
   - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
     sysctl or new control file.
   - showing memory section and physical device relationship.
-  - showing memory section and node relationship (maybe good for NUMA)
   - showing memory section is under ZONE_MOVABLE or not
   - test and make it better memory offlining.
   - support HugeTLB page migration and offlining.
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 054bcd9439aa..56e12903973c 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -692,7 +692,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	pgdat = NODE_DATA(nid);
 
 	zone = pgdat->node_zones + ZONE_NORMAL;
-	ret = __add_pages(zone, start_pfn, nr_pages);
+	ret = __add_pages(nid, zone, start_pfn, nr_pages);
 
 	if (ret)
 		printk("%s: Problem encountered in __add_pages() as ret=%d\n",
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 53b06ebb3f2f..f00f09a77f12 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -132,7 +132,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	/* this should work for most non-highmem platforms */
 	zone = pgdata->node_zones;
 
-	return __add_pages(zone, start_pfn, nr_pages);
+	return __add_pages(nid, zone, start_pfn, nr_pages);
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 158b0d6d7046..f0258ca3b17e 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -183,7 +183,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	rc = vmem_add_mapping(start, size);
 	if (rc)
 		return rc;
-	rc = __add_pages(zone, PFN_DOWN(start), PFN_DOWN(size));
+	rc = __add_pages(nid, zone, PFN_DOWN(start), PFN_DOWN(size));
 	if (rc)
 		vmem_remove_mapping(start, size);
 	return rc;
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 6cbef8caeb56..3edf297c829b 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -311,7 +311,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	pgdat = NODE_DATA(nid);
 
 	/* We only have ZONE_NORMAL, so this is easy.. */
-	ret = __add_pages(pgdat->node_zones + ZONE_NORMAL, start_pfn, nr_pages);
+	ret = __add_pages(nid, pgdat->node_zones + ZONE_NORMAL,
+				start_pfn, nr_pages);
 	if (unlikely(ret))
 		printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index f99a6c6c432e..544d724caeee 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -1079,7 +1079,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 
-	return __add_pages(zone, start_pfn, nr_pages);
+	return __add_pages(nid, zone, start_pfn, nr_pages);
 }
 #endif
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9f7a0d24d42a..54c437e96541 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -857,7 +857,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	if (last_mapped_pfn > max_pfn_mapped)
 		max_pfn_mapped = last_mapped_pfn;
 
-	ret = __add_pages(zone, start_pfn, nr_pages);
+	ret = __add_pages(nid, zone, start_pfn, nr_pages);
 	WARN_ON_ONCE(ret);
 
 	return ret;
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 5260e9e0df48..989429cfed88 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -347,8 +347,9 @@ static inline int memory_probe_init(void)
  * section belongs to...
  */
 
-static int add_memory_block(unsigned long node_id, struct mem_section *section,
-		     unsigned long state, int phys_device)
+static int add_memory_block(int nid, struct mem_section *section,
+			unsigned long state, int phys_device,
+			enum mem_add_context context)
 {
 	struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
 	int ret = 0;
@@ -370,6 +371,10 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section,
 		ret = mem_create_simple_file(mem, phys_device);
 	if (!ret)
 		ret = mem_create_simple_file(mem, removable);
+	if (!ret) {
+		if (context == HOTPLUG)
+			ret = register_mem_sect_under_node(mem, nid);
+	}
 
 	return ret;
 }
@@ -382,7 +387,7 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section,
  *
  * This could be made generic for all sysdev classes.
  */
-static struct memory_block *find_memory_block(struct mem_section *section)
+struct memory_block *find_memory_block(struct mem_section *section)
 {
 	struct kobject *kobj;
 	struct sys_device *sysdev;
@@ -411,6 +416,7 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
 	struct memory_block *mem;
 
 	mem = find_memory_block(section);
+	unregister_mem_sect_under_nodes(mem);
 	mem_remove_simple_file(mem, phys_index);
 	mem_remove_simple_file(mem, state);
 	mem_remove_simple_file(mem, phys_device);
@@ -424,9 +430,9 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
  * need an interface for the VM to add new memory regions,
  * but without onlining it.
  */
-int register_new_memory(struct mem_section *section)
+int register_new_memory(int nid, struct mem_section *section)
 {
-	return add_memory_block(0, section, MEM_OFFLINE, 0);
+	return add_memory_block(nid, section, MEM_OFFLINE, 0, HOTPLUG);
 }
 
 int unregister_memory_section(struct mem_section *section)
@@ -458,7 +464,8 @@ int __init memory_dev_init(void)
 	for (i = 0; i < NR_MEM_SECTIONS; i++) {
 		if (!present_section_nr(i))
 			continue;
-		err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0);
+		err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE,
+					0, BOOT);
 		if (!ret)
 			ret = err;
 	}
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 91636cd8b6c9..43fa90b837ee 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -6,6 +6,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/mm.h>
+#include <linux/memory.h>
 #include <linux/node.h>
 #include <linux/hugetlb.h>
 #include <linux/cpumask.h>
@@ -248,6 +249,105 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
 	return 0;
 }
 
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+#define page_initialized(page)  (page->lru.next)
+
+static int get_nid_for_pfn(unsigned long pfn)
+{
+	struct page *page;
+
+	if (!pfn_valid_within(pfn))
+		return -1;
+	page = pfn_to_page(pfn);
+	if (!page_initialized(page))
+		return -1;
+	return pfn_to_nid(pfn);
+}
+
+/* register memory section under specified node if it spans that node */
+int register_mem_sect_under_node(struct memory_block *mem_blk, int nid)
+{
+	unsigned long pfn, sect_start_pfn, sect_end_pfn;
+
+	if (!mem_blk)
+		return -EFAULT;
+	if (!node_online(nid))
+		return 0;
+	sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
+	sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
+	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
+		int page_nid;
+
+		page_nid = get_nid_for_pfn(pfn);
+		if (page_nid < 0)
+			continue;
+		if (page_nid != nid)
+			continue;
+		return sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj,
+					&mem_blk->sysdev.kobj,
+					kobject_name(&mem_blk->sysdev.kobj));
+	}
+	/* mem section does not span the specified node */
+	return 0;
+}
+
+/* unregister memory section under all nodes that it spans */
+int unregister_mem_sect_under_nodes(struct memory_block *mem_blk)
+{
+	nodemask_t unlinked_nodes;
+	unsigned long pfn, sect_start_pfn, sect_end_pfn;
+
+	if (!mem_blk)
+		return -EFAULT;
+	nodes_clear(unlinked_nodes);
+	sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
+	sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
+	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
+		unsigned int nid;
+
+		nid = get_nid_for_pfn(pfn);
+		if (nid < 0)
+			continue;
+		if (!node_online(nid))
+			continue;
+		if (node_test_and_set(nid, unlinked_nodes))
+			continue;
+		sysfs_remove_link(&node_devices[nid].sysdev.kobj,
+			 kobject_name(&mem_blk->sysdev.kobj));
+	}
+	return 0;
+}
+
+static int link_mem_sections(int nid)
+{
+	unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn;
+	unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages;
+	unsigned long pfn;
+	int err = 0;
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+		unsigned long section_nr = pfn_to_section_nr(pfn);
+		struct mem_section *mem_sect;
+		struct memory_block *mem_blk;
+		int ret;
+
+		if (!present_section_nr(section_nr))
+			continue;
+		mem_sect = __nr_to_section(section_nr);
+		mem_blk = find_memory_block(mem_sect);
+		ret = register_mem_sect_under_node(mem_blk, nid);
+		if (!err)
+			err = ret;
+
+		/* discard ref obtained in find_memory_block() */
+		kobject_put(&mem_blk->sysdev.kobj);
+	}
+	return err;
+}
+#else
+static int link_mem_sections(int nid) { return 0; }
+#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
+
 int register_one_node(int nid)
 {
 	int error = 0;
@@ -267,6 +367,9 @@ int register_one_node(int nid)
 			if (cpu_to_node(cpu) == nid)
 				register_cpu_under_node(cpu, nid);
 		}
+
+		/* link memory sections under this node */
+		error = link_mem_sections(nid);
 	}
 
 	return error;
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 36c82c9e6ea7..3fdc10806d31 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -79,14 +79,14 @@ static inline int memory_notify(unsigned long val, void *v)
 #else
 extern int register_memory_notifier(struct notifier_block *nb);
 extern void unregister_memory_notifier(struct notifier_block *nb);
-extern int register_new_memory(struct mem_section *);
+extern int register_new_memory(int, struct mem_section *);
 extern int unregister_memory_section(struct mem_section *);
 extern int memory_dev_init(void);
 extern int remove_memory_block(unsigned long, struct mem_section *, int);
 extern int memory_notify(unsigned long val, void *v);
+extern struct memory_block *find_memory_block(struct mem_section *);
 #define CONFIG_MEM_BLOCK_SIZE	(PAGES_PER_SECTION<<PAGE_SHIFT)
-
-
+enum mem_add_context { BOOT, HOTPLUG };
 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
 #ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 763ba81fc0f0..d95f72e79b82 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -72,7 +72,7 @@ extern void __offline_isolated_pages(unsigned long, unsigned long);
 extern int offline_pages(unsigned long, unsigned long, unsigned long);
 
 /* reasonably generic interface to expand the physical pages in a zone  */
-extern int __add_pages(struct zone *zone, unsigned long start_pfn,
+extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn,
 	unsigned long nr_pages);
 extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
 	unsigned long nr_pages);
diff --git a/include/linux/node.h b/include/linux/node.h
index bc001bc225c3..681a697b9a86 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -26,6 +26,7 @@ struct node {
 	struct sys_device	sysdev;
 };
 
+struct memory_block;
 extern struct node node_devices[];
 
 extern int register_node(struct node *, int, struct node *);
@@ -35,6 +36,9 @@ extern int register_one_node(int nid);
 extern void unregister_one_node(int nid);
 extern int register_cpu_under_node(unsigned int cpu, unsigned int nid);
 extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
+extern int register_mem_sect_under_node(struct memory_block *mem_blk,
+						int nid);
+extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk);
 #else
 static inline int register_one_node(int nid)
 {
@@ -52,6 +56,15 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
 {
 	return 0;
 }
+static inline int register_mem_sect_under_node(struct memory_block *mem_blk,
+							int nid)
+{
+	return 0;
+}
+static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk)
+{
+	return 0;
+}
 #endif
 
 #define to_node(sys_device) container_of(sys_device, struct node, sysdev)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b17371185468..2ba38bc07b47 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -216,7 +216,8 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
 	return 0;
 }
 
-static int __meminit __add_section(struct zone *zone, unsigned long phys_start_pfn)
+static int __meminit __add_section(int nid, struct zone *zone,
+					unsigned long phys_start_pfn)
 {
 	int nr_pages = PAGES_PER_SECTION;
 	int ret;
@@ -234,7 +235,7 @@ static int __meminit __add_section(struct zone *zone, unsigned long phys_start_p
 	if (ret < 0)
 		return ret;
 
-	return register_new_memory(__pfn_to_section(phys_start_pfn));
+	return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
 }
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
@@ -273,8 +274,8 @@ static int __remove_section(struct zone *zone, struct mem_section *ms)
  * call this function after deciding the zone to which to
  * add the new pages.
  */
-int __ref __add_pages(struct zone *zone, unsigned long phys_start_pfn,
-		 unsigned long nr_pages)
+int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
+			unsigned long nr_pages)
 {
 	unsigned long i;
 	int err = 0;
@@ -284,7 +285,7 @@ int __ref __add_pages(struct zone *zone, unsigned long phys_start_pfn,
 	end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
 
 	for (i = start_sec; i <= end_sec; i++) {
-		err = __add_section(zone, i << PFN_SECTION_SHIFT);
+		err = __add_section(nid, zone, i << PFN_SECTION_SHIFT);
 
 		/*
 		 * EEXIST is finally dealt with by ioresource collision
-- 
cgit v1.2.3


From 1b0bd118862cd9fe9ac2872137a1b8107e83ff9d Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 6 Jan 2009 14:39:15 -0800
Subject: mm: get rid of pagevec_release_nonlru()

speculative page references patch (commit:
e286781d5f2e9c846e012a39653a166e9d31777d) removed last
pagevec_release_nonlru() caller.

So this function can be removed now.

This patch doesn't have any functional change.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagevec.h |  7 -------
 mm/swap.c               | 22 ----------------------
 2 files changed, 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index e90a2cb02915..7b2886fa7fdc 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -21,7 +21,6 @@ struct pagevec {
 };
 
 void __pagevec_release(struct pagevec *pvec);
-void __pagevec_release_nonlru(struct pagevec *pvec);
 void __pagevec_free(struct pagevec *pvec);
 void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
 void pagevec_strip(struct pagevec *pvec);
@@ -69,12 +68,6 @@ static inline void pagevec_release(struct pagevec *pvec)
 		__pagevec_release(pvec);
 }
 
-static inline void pagevec_release_nonlru(struct pagevec *pvec)
-{
-	if (pagevec_count(pvec))
-		__pagevec_release_nonlru(pvec);
-}
-
 static inline void pagevec_free(struct pagevec *pvec)
 {
 	if (pagevec_count(pvec))
diff --git a/mm/swap.c b/mm/swap.c
index b135ec90cdeb..21a566fc4570 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -397,28 +397,6 @@ void __pagevec_release(struct pagevec *pvec)
 
 EXPORT_SYMBOL(__pagevec_release);
 
-/*
- * pagevec_release() for pages which are known to not be on the LRU
- *
- * This function reinitialises the caller's pagevec.
- */
-void __pagevec_release_nonlru(struct pagevec *pvec)
-{
-	int i;
-	struct pagevec pages_to_free;
-
-	pagevec_init(&pages_to_free, pvec->cold);
-	for (i = 0; i < pagevec_count(pvec); i++) {
-		struct page *page = pvec->pages[i];
-
-		VM_BUG_ON(PageLRU(page));
-		if (put_page_testzero(page))
-			pagevec_add(&pages_to_free, page);
-	}
-	pagevec_free(&pages_to_free);
-	pagevec_reinit(pvec);
-}
-
 /*
  * Add the passed pages to the LRU, then drop the caller's refcount
  * on them.  Reinitialises the caller's pagevec.
-- 
cgit v1.2.3


From 64cdd548ffe26849d4cd113ac640f60606063b14 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 6 Jan 2009 14:39:16 -0800
Subject: mm: cleanup: remove #ifdef CONFIG_MIGRATION

#ifdef in *.c file decrease source readability a bit.  removing is better.

This patch doesn't have any functional change.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h |  4 ++++
 mm/mprotect.c           |  6 ++----
 mm/rmap.c               | 10 +++-------
 3 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 3f34005068d4..527602cdea1c 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,6 +7,8 @@
 typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
 #ifdef CONFIG_MIGRATION
+#define PAGE_MIGRATION 1
+
 extern int putback_lru_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
 			struct page *, struct page *);
@@ -20,6 +22,8 @@ extern int migrate_vmas(struct mm_struct *mm,
 		const nodemask_t *from, const nodemask_t *to,
 		unsigned long flags);
 #else
+#define PAGE_MIGRATION 0
+
 static inline int putback_lru_pages(struct list_head *l) { return 0; }
 static inline int migrate_pages(struct list_head *l, new_page_t x,
 		unsigned long private) { return -ENOSYS; }
diff --git a/mm/mprotect.c b/mm/mprotect.c
index cfb4c4852062..d0f6e7ce09f1 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -22,6 +22,7 @@
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/mmu_notifier.h>
+#include <linux/migrate.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
@@ -59,8 +60,7 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 				ptent = pte_mkwrite(ptent);
 
 			ptep_modify_prot_commit(mm, addr, pte, ptent);
-#ifdef CONFIG_MIGRATION
-		} else if (!pte_file(oldpte)) {
+		} else if (PAGE_MIGRATION && !pte_file(oldpte)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
 
 			if (is_write_migration_entry(entry)) {
@@ -72,9 +72,7 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 				set_pte_at(mm, addr, pte,
 					swp_entry_to_pte(entry));
 			}
-#endif
 		}
-
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
diff --git a/mm/rmap.c b/mm/rmap.c
index 10993942d6c9..53c56dacd725 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -50,6 +50,7 @@
 #include <linux/kallsyms.h>
 #include <linux/memcontrol.h>
 #include <linux/mmu_notifier.h>
+#include <linux/migrate.h>
 
 #include <asm/tlbflush.h>
 
@@ -818,8 +819,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 				spin_unlock(&mmlist_lock);
 			}
 			dec_mm_counter(mm, anon_rss);
-#ifdef CONFIG_MIGRATION
-		} else {
+		} else if (PAGE_MIGRATION) {
 			/*
 			 * Store the pfn of the page in a special migration
 			 * pte. do_swap_page() will wait until the migration
@@ -827,19 +827,15 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			 */
 			BUG_ON(!migration);
 			entry = make_migration_entry(page, pte_write(pteval));
-#endif
 		}
 		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
 		BUG_ON(pte_file(*pte));
-	} else
-#ifdef CONFIG_MIGRATION
-	if (migration) {
+	} else if (PAGE_MIGRATION && migration) {
 		/* Establish migration entry for a file page */
 		swp_entry_t entry;
 		entry = make_migration_entry(page, pte_write(pteval));
 		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
 	} else
-#endif
 		dec_mm_counter(mm, file_rss);
 
 
-- 
cgit v1.2.3


From e5991371ee0d1c0ce19e133c6f9075b49c5b4ae8 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:39:22 -0800
Subject: mm: remove cgroup_mm_owner_callbacks

cgroup_mm_owner_callbacks() was brought in to support the memrlimit
controller, but sneaked into mainline ahead of it.  That controller has
now been shelved, and the mm_owner_changed() args were inadequate for it
anyway (they needed an mm pointer instead of a task pointer).

Remove the dead code, and restore mm_update_next_owner() locking to how it
was before: taking mmap_sem there does nothing for memcontrol.c, now the
only user of mm->owner.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Paul Menage <menage@google.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 14 +-------------
 kernel/cgroup.c        | 33 ---------------------------------
 kernel/exit.c          | 16 ++++++----------
 3 files changed, 7 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1164963c3a85..08b78c09b09a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -329,13 +329,7 @@ struct cgroup_subsys {
 			struct cgroup *cgrp);
 	void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 	void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
-	/*
-	 * This routine is called with the task_lock of mm->owner held
-	 */
-	void (*mm_owner_changed)(struct cgroup_subsys *ss,
-					struct cgroup *old,
-					struct cgroup *new,
-					struct task_struct *p);
+
 	int subsys_id;
 	int active;
 	int disabled;
@@ -400,9 +394,6 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
 int cgroup_scan_tasks(struct cgroup_scanner *scan);
 int cgroup_attach_task(struct cgroup *, struct task_struct *);
 
-void cgroup_mm_owner_callbacks(struct task_struct *old,
-			       struct task_struct *new);
-
 #else /* !CONFIG_CGROUPS */
 
 static inline int cgroup_init_early(void) { return 0; }
@@ -420,9 +411,6 @@ static inline int cgroupstats_build(struct cgroupstats *stats,
 	return -EINVAL;
 }
 
-static inline void cgroup_mm_owner_callbacks(struct task_struct *old,
-					     struct task_struct *new) {}
-
 #endif /* !CONFIG_CGROUPS */
 
 #endif /* _LINUX_CGROUP_H */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 87bb0258fd27..f221446aa02d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -116,7 +116,6 @@ static int root_count;
  * be called.
  */
 static int need_forkexit_callback __read_mostly;
-static int need_mm_owner_callback __read_mostly;
 
 /* convenient tests for these bits */
 inline int cgroup_is_removed(const struct cgroup *cgrp)
@@ -2539,7 +2538,6 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 	init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
 
 	need_forkexit_callback |= ss->fork || ss->exit;
-	need_mm_owner_callback |= !!ss->mm_owner_changed;
 
 	/* At system boot, before all subsystems have been
 	 * registered, no tasks have been forked, so we don't
@@ -2789,37 +2787,6 @@ void cgroup_fork_callbacks(struct task_struct *child)
 	}
 }
 
-#ifdef CONFIG_MM_OWNER
-/**
- * cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes
- * @p: the new owner
- *
- * Called on every change to mm->owner. mm_init_owner() does not
- * invoke this routine, since it assigns the mm->owner the first time
- * and does not change it.
- *
- * The callbacks are invoked with mmap_sem held in read mode.
- */
-void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
-{
-	struct cgroup *oldcgrp, *newcgrp = NULL;
-
-	if (need_mm_owner_callback) {
-		int i;
-		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-			struct cgroup_subsys *ss = subsys[i];
-			oldcgrp = task_cgroup(old, ss->subsys_id);
-			if (new)
-				newcgrp = task_cgroup(new, ss->subsys_id);
-			if (oldcgrp == newcgrp)
-				continue;
-			if (ss->mm_owner_changed)
-				ss->mm_owner_changed(ss, oldcgrp, newcgrp, new);
-		}
-	}
-}
-#endif /* CONFIG_MM_OWNER */
-
 /**
  * cgroup_post_fork - called on a new task after adding it to the task list
  * @child: the task in question
diff --git a/kernel/exit.c b/kernel/exit.c
index c9e5a1c14e08..f923724ab3c9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -642,35 +642,31 @@ retry:
 	/*
 	 * We found no owner yet mm_users > 1: this implies that we are
 	 * most likely racing with swapoff (try_to_unuse()) or /proc or
-	 * ptrace or page migration (get_task_mm()).  Mark owner as NULL,
-	 * so that subsystems can understand the callback and take action.
+	 * ptrace or page migration (get_task_mm()).  Mark owner as NULL.
 	 */
-	down_write(&mm->mmap_sem);
-	cgroup_mm_owner_callbacks(mm->owner, NULL);
 	mm->owner = NULL;
-	up_write(&mm->mmap_sem);
 	return;
 
 assign_new_owner:
 	BUG_ON(c == p);
 	get_task_struct(c);
-	read_unlock(&tasklist_lock);
-	down_write(&mm->mmap_sem);
 	/*
 	 * The task_lock protects c->mm from changing.
 	 * We always want mm->owner->mm == mm
 	 */
 	task_lock(c);
+	/*
+	 * Delay read_unlock() till we have the task_lock()
+	 * to ensure that c does not slip away underneath us
+	 */
+	read_unlock(&tasklist_lock);
 	if (c->mm != mm) {
 		task_unlock(c);
-		up_write(&mm->mmap_sem);
 		put_task_struct(c);
 		goto retry;
 	}
-	cgroup_mm_owner_callbacks(mm->owner, c);
 	mm->owner = c;
 	task_unlock(c);
-	up_write(&mm->mmap_sem);
 	put_task_struct(c);
 }
 #endif /* CONFIG_MM_OWNER */
-- 
cgit v1.2.3


From 3c1d43787b48c798f44dc32a6e6deb5ca2da3e68 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:39:23 -0800
Subject: mm: remove GFP_HIGHUSER_PAGECACHE

GFP_HIGHUSER_PAGECACHE is just an alias for GFP_HIGHUSER_MOVABLE, making
that harder to track down: remove it, and its out-of-work brothers
GFP_NOFS_PAGECACHE and GFP_USER_PAGECACHE.

Since we're making that improvement to hotremove_migrate_alloc(), I think
we can now also remove one of the "o"s from its comment.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/inode.c          | 4 ++--
 include/linux/gfp.h | 6 ------
 mm/memory_hotplug.c | 9 +++------
 3 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index bd48e5e6d3e8..a903860bc5ac 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -166,7 +166,7 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
 	mapping->a_ops = &empty_aops;
 	mapping->host = inode;
 	mapping->flags = 0;
-	mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
+	mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
 	mapping->assoc_mapping = NULL;
 	mapping->backing_dev_info = &default_backing_dev_info;
 	mapping->writeback_index = 0;
@@ -601,7 +601,7 @@ EXPORT_SYMBOL_GPL(inode_add_to_lists);
  *	@sb: superblock
  *
  *	Allocates a new inode for given superblock. The default gfp_mask
- *	for allocations related to inode->i_mapping is GFP_HIGHUSER_PAGECACHE.
+ *	for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE.
  *	If HIGHMEM pages are unsuitable or it is known that pages allocated
  *	for the page cache are not reclaimable or migratable,
  *	mapping_set_gfp_mask() must be called with suitable flags on the
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index e8003afeffba..dd20cd78faa8 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -69,12 +69,6 @@ struct vm_area_struct;
 #define GFP_HIGHUSER_MOVABLE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
 				 __GFP_HARDWALL | __GFP_HIGHMEM | \
 				 __GFP_MOVABLE)
-#define GFP_NOFS_PAGECACHE	(__GFP_WAIT | __GFP_IO | __GFP_MOVABLE)
-#define GFP_USER_PAGECACHE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
-				 __GFP_HARDWALL | __GFP_MOVABLE)
-#define GFP_HIGHUSER_PAGECACHE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
-				 __GFP_HARDWALL | __GFP_HIGHMEM | \
-				 __GFP_MOVABLE)
 
 #ifdef CONFIG_NUMA
 #define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 2ba38bc07b47..c083cf5fd6df 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -627,15 +627,12 @@ int scan_lru_pages(unsigned long start, unsigned long end)
 }
 
 static struct page *
-hotremove_migrate_alloc(struct page *page,
-			unsigned long private,
-			int **x)
+hotremove_migrate_alloc(struct page *page, unsigned long private, int **x)
 {
-	/* This should be improoooooved!! */
-	return alloc_page(GFP_HIGHUSER_PAGECACHE);
+	/* This should be improooooved!! */
+	return alloc_page(GFP_HIGHUSER_MOVABLE);
 }
 
-
 #define NR_OFFLINE_AT_ONCE_PAGES	(256)
 static int
 do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
-- 
cgit v1.2.3


From 6d91add09f4bad5f4d4233b13faa392f0c4b16be Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:39:24 -0800
Subject: mm: add Set,ClearPageSwapCache stubs

If we add NOOP stubs for SetPageSwapCache() and ClearPageSwapCache(), then
we can remove the #ifdef CONFIG_SWAPs from mm/migrate.c.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 1 +
 mm/migrate.c               | 4 ----
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b12f93a3c345..628ec0802492 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -228,6 +228,7 @@ PAGEFLAG_FALSE(HighMem)
 PAGEFLAG(SwapCache, swapcache)
 #else
 PAGEFLAG_FALSE(SwapCache)
+	SETPAGEFLAG_NOOP(SwapCache) CLEARPAGEFLAG_NOOP(SwapCache)
 #endif
 
 #ifdef CONFIG_UNEVICTABLE_LRU
diff --git a/mm/migrate.c b/mm/migrate.c
index 60510306c44a..55373983c9c6 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -300,12 +300,10 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	 * Now we know that no one else is looking at the page.
 	 */
 	get_page(newpage);	/* add cache reference */
-#ifdef CONFIG_SWAP
 	if (PageSwapCache(page)) {
 		SetPageSwapCache(newpage);
 		set_page_private(newpage, page_private(page));
 	}
-#endif
 
 	radix_tree_replace_slot(pslot, newpage);
 
@@ -373,9 +371,7 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
 
 	mlock_migrate_page(newpage, page);
 
-#ifdef CONFIG_SWAP
 	ClearPageSwapCache(page);
-#endif
 	ClearPagePrivate(page);
 	set_page_private(page, 0);
 	/* page->mapping contains a flag for PageAnon() */
-- 
cgit v1.2.3


From b5934c531849ff4a51ce0f290141efe564290e40 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:39:25 -0800
Subject: mm: add_active_or_unevictable into rmap

lru_cache_add_active_or_unevictable() and page_add_new_anon_rmap() always
appear together.  Save some symbol table space and some jumping around by
removing lru_cache_add_active_or_unevictable(), folding its code into
page_add_new_anon_rmap(): like how we add file pages to lru just after
adding them to page cache.

Remove the nearby "TODO: is this safe?" comments (yes, it is safe), and
change page_add_new_anon_rmap()'s address BUG_ON to VM_BUG_ON as
originally intended.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  2 --
 mm/memory.c          |  6 ------
 mm/rmap.c            |  7 ++++++-
 mm/swap.c            | 19 -------------------
 4 files changed, 6 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index a3af95b2cb6d..48f309dc5a0c 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -174,8 +174,6 @@ extern unsigned int nr_free_pagecache_pages(void);
 /* linux/mm/swap.c */
 extern void __lru_cache_add(struct page *, enum lru_list lru);
 extern void lru_cache_add_lru(struct page *, enum lru_list lru);
-extern void lru_cache_add_active_or_unevictable(struct page *,
-					struct vm_area_struct *);
 extern void activate_page(struct page *);
 extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
diff --git a/mm/memory.c b/mm/memory.c
index b5af358b8b22..a138c50dc39a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1949,10 +1949,7 @@ gotten:
 		 */
 		ptep_clear_flush_notify(vma, address, page_table);
 		SetPageSwapBacked(new_page);
-		lru_cache_add_active_or_unevictable(new_page, vma);
 		page_add_new_anon_rmap(new_page, vma, address);
-
-//TODO:  is this safe?  do_anonymous_page() does it this way.
 		set_pte_at(mm, address, page_table, entry);
 		update_mmu_cache(vma, address, entry);
 		if (old_page) {
@@ -2448,7 +2445,6 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto release;
 	inc_mm_counter(mm, anon_rss);
 	SetPageSwapBacked(page);
-	lru_cache_add_active_or_unevictable(page, vma);
 	page_add_new_anon_rmap(page, vma, address);
 	set_pte_at(mm, address, page_table, entry);
 
@@ -2597,7 +2593,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		if (anon) {
 			inc_mm_counter(mm, anon_rss);
 			SetPageSwapBacked(page);
-			lru_cache_add_active_or_unevictable(page, vma);
 			page_add_new_anon_rmap(page, vma, address);
 		} else {
 			inc_mm_counter(mm, file_rss);
@@ -2607,7 +2602,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 				get_page(dirty_page);
 			}
 		}
-//TODO:  is this safe?  do_anonymous_page() does it this way.
 		set_pte_at(mm, address, page_table, entry);
 
 		/* no need to invalidate: a not-present page won't be cached */
diff --git a/mm/rmap.c b/mm/rmap.c
index f01e92244c53..10da68202b10 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -47,6 +47,7 @@
 #include <linux/rmap.h>
 #include <linux/rcupdate.h>
 #include <linux/module.h>
+#include <linux/mm_inline.h>
 #include <linux/kallsyms.h>
 #include <linux/memcontrol.h>
 #include <linux/mmu_notifier.h>
@@ -671,9 +672,13 @@ void page_add_anon_rmap(struct page *page,
 void page_add_new_anon_rmap(struct page *page,
 	struct vm_area_struct *vma, unsigned long address)
 {
-	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
 	atomic_set(&page->_mapcount, 0); /* elevate count by 1 (starts at -1) */
 	__page_set_anon_rmap(page, vma, address);
+	if (page_evictable(page, vma))
+		lru_cache_add_lru(page, LRU_ACTIVE + page_is_file_cache(page));
+	else
+		add_page_to_unevictable_list(page);
 }
 
 /**
diff --git a/mm/swap.c b/mm/swap.c
index 21a566fc4570..ff0b290475fd 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -246,25 +246,6 @@ void add_page_to_unevictable_list(struct page *page)
 	spin_unlock_irq(&zone->lru_lock);
 }
 
-/**
- * lru_cache_add_active_or_unevictable
- * @page:  the page to be added to LRU
- * @vma:   vma in which page is mapped for determining reclaimability
- *
- * place @page on active or unevictable LRU list, depending on
- * page_evictable().  Note that if the page is not evictable,
- * it goes directly back onto it's zone's unevictable list.  It does
- * NOT use a per cpu pagevec.
- */
-void lru_cache_add_active_or_unevictable(struct page *page,
-					struct vm_area_struct *vma)
-{
-	if (page_evictable(page, vma))
-		lru_cache_add_lru(page, LRU_ACTIVE + page_is_file_cache(page));
-	else
-		add_page_to_unevictable_list(page);
-}
-
 /*
  * Drain pages out of the cpu's pagevecs.
  * Either "cpu" is the current CPU, and preemption has already been
-- 
cgit v1.2.3


From 2afd1c928f1132b8d0099866e75ce8ad713a1180 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:39:26 -0800
Subject: mm: make page_lock_anon_vma() static

page_lock_anon_vma() and page_unlock_anon_vma() were made available to
show_page_path() in vmscan.c; but now that has been removed, make them
static in rmap.c again, they're better kept private if possible.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h | 3 ---
 mm/rmap.c            | 4 ++--
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 89f0564b10c8..3593b18a07dd 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -63,9 +63,6 @@ void anon_vma_unlink(struct vm_area_struct *);
 void anon_vma_link(struct vm_area_struct *);
 void __anon_vma_link(struct vm_area_struct *);
 
-extern struct anon_vma *page_lock_anon_vma(struct page *page);
-extern void page_unlock_anon_vma(struct anon_vma *anon_vma);
-
 /*
  * rmap interfaces called when adding or removing pte of page
  */
diff --git a/mm/rmap.c b/mm/rmap.c
index 10da68202b10..892e1877366b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -193,7 +193,7 @@ void __init anon_vma_init(void)
  * Getting a lock on a stable anon_vma from a page off the LRU is
  * tricky: page_lock_anon_vma rely on RCU to guard against the races.
  */
-struct anon_vma *page_lock_anon_vma(struct page *page)
+static struct anon_vma *page_lock_anon_vma(struct page *page)
 {
 	struct anon_vma *anon_vma;
 	unsigned long anon_mapping;
@@ -213,7 +213,7 @@ out:
 	return NULL;
 }
 
-void page_unlock_anon_vma(struct anon_vma *anon_vma)
+static void page_unlock_anon_vma(struct anon_vma *anon_vma)
 {
 	spin_unlock(&anon_vma->lock);
 	rcu_read_unlock();
-- 
cgit v1.2.3


From 364aeb2849789b51bf4b9af2ddd02fee7285c54e Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 6 Jan 2009 14:39:29 -0800
Subject: mm: change dirty limit type specifiers to unsigned long

The background dirty and dirty limits are better defined with type
specifiers of unsigned long since negative writeback thresholds are not
possible.

These values, as returned by get_dirty_limits(), are normally compared
with ZVC values to determine whether writeback shall commence or be
throttled.  Such page counts cannot be negative, so declaring the page
limits as signed is unnecessary.

Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Andrea Righi <righi.andrea@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/writeback.h |  4 ++--
 mm/backing-dev.c          |  6 +++---
 mm/page-writeback.c       | 22 +++++++++++-----------
 3 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index e585657e9831..259e9ea58cab 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -125,8 +125,8 @@ struct file;
 int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *,
 				      void __user *, size_t *, loff_t *);
 
-void get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty,
-		 struct backing_dev_info *bdi);
+void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
+		      unsigned long *pbdi_dirty, struct backing_dev_info *bdi);
 
 void page_writeback_init(void);
 void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 801c08b046e6..6f80beddd8a4 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -24,9 +24,9 @@ static void bdi_debug_init(void)
 static int bdi_debug_stats_show(struct seq_file *m, void *v)
 {
 	struct backing_dev_info *bdi = m->private;
-	long background_thresh;
-	long dirty_thresh;
-	long bdi_thresh;
+	unsigned long background_thresh;
+	unsigned long dirty_thresh;
+	unsigned long bdi_thresh;
 
 	get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi);
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 08d2b960b294..4d4074cff300 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -362,13 +362,13 @@ unsigned long determine_dirtyable_memory(void)
 }
 
 void
-get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty,
-		 struct backing_dev_info *bdi)
+get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
+		 unsigned long *pbdi_dirty, struct backing_dev_info *bdi)
 {
 	int background_ratio;		/* Percentages */
 	int dirty_ratio;
-	long background;
-	long dirty;
+	unsigned long background;
+	unsigned long dirty;
 	unsigned long available_memory = determine_dirtyable_memory();
 	struct task_struct *tsk;
 
@@ -423,9 +423,9 @@ static void balance_dirty_pages(struct address_space *mapping)
 {
 	long nr_reclaimable, bdi_nr_reclaimable;
 	long nr_writeback, bdi_nr_writeback;
-	long background_thresh;
-	long dirty_thresh;
-	long bdi_thresh;
+	unsigned long background_thresh;
+	unsigned long dirty_thresh;
+	unsigned long bdi_thresh;
 	unsigned long pages_written = 0;
 	unsigned long write_chunk = sync_writeback_pages();
 
@@ -580,8 +580,8 @@ EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr);
 
 void throttle_vm_writeout(gfp_t gfp_mask)
 {
-	long background_thresh;
-	long dirty_thresh;
+	unsigned long background_thresh;
+	unsigned long dirty_thresh;
 
         for ( ; ; ) {
 		get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
@@ -624,8 +624,8 @@ static void background_writeout(unsigned long _min_pages)
 	};
 
 	for ( ; ; ) {
-		long background_thresh;
-		long dirty_thresh;
+		unsigned long background_thresh;
+		unsigned long dirty_thresh;
 
 		get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
 		if (global_page_state(NR_FILE_DIRTY) +
-- 
cgit v1.2.3


From 2da02997e08d3efe8174c7a47696e6f7cbe69ba9 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 6 Jan 2009 14:39:31 -0800
Subject: mm: add dirty_background_bytes and dirty_bytes sysctls

This change introduces two new sysctls to /proc/sys/vm:
dirty_background_bytes and dirty_bytes.

dirty_background_bytes is the counterpart to dirty_background_ratio and
dirty_bytes is the counterpart to dirty_ratio.

With growing memory capacities of individual machines, it's no longer
sufficient to specify dirty thresholds as a percentage of the amount of
dirtyable memory over the entire system.

dirty_background_bytes and dirty_bytes specify quantities of memory, in
bytes, that represent the dirty limits for the entire system.  If either
of these values is set, its value represents the amount of dirty memory
that is needed to commence either background or direct writeback.

When a `bytes' or `ratio' file is written, its counterpart becomes a
function of the written value.  For example, if dirty_bytes is written to
be 8096, 8K of memory is required to commence direct writeback.
dirty_ratio is then functionally equivalent to 8K / the amount of
dirtyable memory:

	dirtyable_memory = free pages + mapped pages + file cache

	dirty_background_bytes = dirty_background_ratio * dirtyable_memory
		-or-
	dirty_background_ratio = dirty_background_bytes / dirtyable_memory

		AND

	dirty_bytes = dirty_ratio * dirtyable_memory
		-or-
	dirty_ratio = dirty_bytes / dirtyable_memory

Only one of dirty_background_bytes and dirty_background_ratio may be
specified at a time, and only one of dirty_bytes and dirty_ratio may be
specified.  When one sysctl is written, the other appears as 0 when read.

The `bytes' files operate on a page size granularity since dirty limits
are compared with ZVC values, which are in page units.

Prior to this change, the minimum dirty_ratio was 5 as implemented by
get_dirty_limits() although /proc/sys/vm/dirty_ratio would show any user
written value between 0 and 100.  This restriction is maintained, but
dirty_bytes has a lower limit of only one page.

Also prior to this change, the dirty_background_ratio could not equal or
exceed dirty_ratio.  This restriction is maintained in addition to
restricting dirty_background_bytes.  If either background threshold equals
or exceeds that of the dirty threshold, it is implicitly set to half the
dirty threshold.

Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Andrea Righi <righi.andrea@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt |  26 +++++++++-
 Documentation/sysctl/vm.txt        |   3 +-
 include/linux/writeback.h          |  11 ++++
 kernel/sysctl.c                    |  27 ++++++++--
 mm/page-writeback.c                | 102 +++++++++++++++++++++++++++++++------
 5 files changed, 146 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 71df353e367c..32e94635484f 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1385,6 +1385,15 @@ swapcache reclaim.  Decreasing vfs_cache_pressure causes the kernel to prefer
 to retain dentry and inode caches.  Increasing vfs_cache_pressure beyond 100
 causes the kernel to prefer to reclaim dentries and inodes.
 
+dirty_background_bytes
+----------------------
+
+Contains the amount of dirty memory at which the pdflush background writeback
+daemon will start writeback.
+
+If dirty_background_bytes is written, dirty_background_ratio becomes a function
+of its value (dirty_background_bytes / the amount of dirtyable system memory).
+
 dirty_background_ratio
 ----------------------
 
@@ -1393,14 +1402,29 @@ pages + file cache, not including locked pages and HugePages), the number of
 pages at which the pdflush background writeback daemon will start writing out
 dirty data.
 
+If dirty_background_ratio is written, dirty_background_bytes becomes a function
+of its value (dirty_background_ratio * the amount of dirtyable system memory).
+
+dirty_bytes
+-----------
+
+Contains the amount of dirty memory at which a process generating disk writes
+will itself start writeback.
+
+If dirty_bytes is written, dirty_ratio becomes a function of its value
+(dirty_bytes / the amount of dirtyable system memory).
+
 dirty_ratio
------------------
+-----------
 
 Contains, as a percentage of the dirtyable system memory (free pages + mapped
 pages + file cache, not including locked pages and HugePages), the number of
 pages at which a process which is generating disk writes will itself start
 writing out dirty data.
 
+If dirty_ratio is written, dirty_bytes becomes a function of its value
+(dirty_ratio * the amount of dirtyable system memory).
+
 dirty_writeback_centisecs
 -------------------------
 
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index d79eeda7a699..cd05994a49e6 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -41,7 +41,8 @@ Currently, these files are in /proc/sys/vm:
 
 ==============================================================
 
-dirty_ratio, dirty_background_ratio, dirty_expire_centisecs,
+dirty_bytes, dirty_ratio, dirty_background_bytes,
+dirty_background_ratio, dirty_expire_centisecs,
 dirty_writeback_centisecs, highmem_is_dirtyable,
 vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout,
 drop-caches, hugepages_treat_as_movable:
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 259e9ea58cab..bb28c975c1d7 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -107,7 +107,9 @@ void throttle_vm_writeout(gfp_t gfp_mask);
 
 /* These are exported to sysctl. */
 extern int dirty_background_ratio;
+extern unsigned long dirty_background_bytes;
 extern int vm_dirty_ratio;
+extern unsigned long vm_dirty_bytes;
 extern int dirty_writeback_interval;
 extern int dirty_expire_interval;
 extern int vm_highmem_is_dirtyable;
@@ -116,9 +118,18 @@ extern int laptop_mode;
 
 extern unsigned long determine_dirtyable_memory(void);
 
+extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
+		struct file *filp, void __user *buffer, size_t *lenp,
+		loff_t *ppos);
+extern int dirty_background_bytes_handler(struct ctl_table *table, int write,
+		struct file *filp, void __user *buffer, size_t *lenp,
+		loff_t *ppos);
 extern int dirty_ratio_handler(struct ctl_table *table, int write,
 		struct file *filp, void __user *buffer, size_t *lenp,
 		loff_t *ppos);
+extern int dirty_bytes_handler(struct ctl_table *table, int write,
+		struct file *filp, void __user *buffer, size_t *lenp,
+		loff_t *ppos);
 
 struct ctl_table;
 struct file;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ff6d45c7626f..92f6e5bc3c24 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -87,10 +87,6 @@ extern int rcutorture_runnable;
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 
 /* Constants used for minimum and  maximum */
-#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP)
-static int one = 1;
-#endif
-
 #ifdef CONFIG_DETECT_SOFTLOCKUP
 static int sixty = 60;
 static int neg_one = -1;
@@ -101,6 +97,7 @@ static int two = 2;
 #endif
 
 static int zero;
+static int one = 1;
 static int one_hundred = 100;
 
 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
@@ -952,11 +949,21 @@ static struct ctl_table vm_table[] = {
 		.data		= &dirty_background_ratio,
 		.maxlen		= sizeof(dirty_background_ratio),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
+		.proc_handler	= &dirty_background_ratio_handler,
 		.strategy	= &sysctl_intvec,
 		.extra1		= &zero,
 		.extra2		= &one_hundred,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "dirty_background_bytes",
+		.data		= &dirty_background_bytes,
+		.maxlen		= sizeof(dirty_background_bytes),
+		.mode		= 0644,
+		.proc_handler	= &dirty_background_bytes_handler,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &one,
+	},
 	{
 		.ctl_name	= VM_DIRTY_RATIO,
 		.procname	= "dirty_ratio",
@@ -968,6 +975,16 @@ static struct ctl_table vm_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one_hundred,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "dirty_bytes",
+		.data		= &vm_dirty_bytes,
+		.maxlen		= sizeof(vm_dirty_bytes),
+		.mode		= 0644,
+		.proc_handler	= &dirty_bytes_handler,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &one,
+	},
 	{
 		.procname	= "dirty_writeback_centisecs",
 		.data		= &dirty_writeback_interval,
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 4d4074cff300..b493db7841dc 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -68,6 +68,12 @@ static inline long sync_writeback_pages(void)
  */
 int dirty_background_ratio = 5;
 
+/*
+ * dirty_background_bytes starts at 0 (disabled) so that it is a function of
+ * dirty_background_ratio * the amount of dirtyable memory
+ */
+unsigned long dirty_background_bytes;
+
 /*
  * free highmem will not be subtracted from the total free memory
  * for calculating free ratios if vm_highmem_is_dirtyable is true
@@ -79,6 +85,12 @@ int vm_highmem_is_dirtyable;
  */
 int vm_dirty_ratio = 10;
 
+/*
+ * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
+ * vm_dirty_ratio * the amount of dirtyable memory
+ */
+unsigned long vm_dirty_bytes;
+
 /*
  * The interval between `kupdate'-style writebacks, in jiffies
  */
@@ -135,23 +147,75 @@ static int calc_period_shift(void)
 {
 	unsigned long dirty_total;
 
-	dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / 100;
+	if (vm_dirty_bytes)
+		dirty_total = vm_dirty_bytes / PAGE_SIZE;
+	else
+		dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) /
+				100;
 	return 2 + ilog2(dirty_total - 1);
 }
 
 /*
- * update the period when the dirty ratio changes.
+ * update the period when the dirty threshold changes.
  */
+static void update_completion_period(void)
+{
+	int shift = calc_period_shift();
+	prop_change_shift(&vm_completions, shift);
+	prop_change_shift(&vm_dirties, shift);
+}
+
+int dirty_background_ratio_handler(struct ctl_table *table, int write,
+		struct file *filp, void __user *buffer, size_t *lenp,
+		loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+	if (ret == 0 && write)
+		dirty_background_bytes = 0;
+	return ret;
+}
+
+int dirty_background_bytes_handler(struct ctl_table *table, int write,
+		struct file *filp, void __user *buffer, size_t *lenp,
+		loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+	if (ret == 0 && write)
+		dirty_background_ratio = 0;
+	return ret;
+}
+
 int dirty_ratio_handler(struct ctl_table *table, int write,
 		struct file *filp, void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 {
 	int old_ratio = vm_dirty_ratio;
-	int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+	int ret;
+
+	ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
 	if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
-		int shift = calc_period_shift();
-		prop_change_shift(&vm_completions, shift);
-		prop_change_shift(&vm_dirties, shift);
+		update_completion_period();
+		vm_dirty_bytes = 0;
+	}
+	return ret;
+}
+
+
+int dirty_bytes_handler(struct ctl_table *table, int write,
+		struct file *filp, void __user *buffer, size_t *lenp,
+		loff_t *ppos)
+{
+	int old_bytes = vm_dirty_bytes;
+	int ret;
+
+	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+	if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
+		update_completion_period();
+		vm_dirty_ratio = 0;
 	}
 	return ret;
 }
@@ -365,23 +429,29 @@ void
 get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
 		 unsigned long *pbdi_dirty, struct backing_dev_info *bdi)
 {
-	int background_ratio;		/* Percentages */
-	int dirty_ratio;
 	unsigned long background;
 	unsigned long dirty;
 	unsigned long available_memory = determine_dirtyable_memory();
 	struct task_struct *tsk;
 
-	dirty_ratio = vm_dirty_ratio;
-	if (dirty_ratio < 5)
-		dirty_ratio = 5;
+	if (vm_dirty_bytes)
+		dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE);
+	else {
+		int dirty_ratio;
 
-	background_ratio = dirty_background_ratio;
-	if (background_ratio >= dirty_ratio)
-		background_ratio = dirty_ratio / 2;
+		dirty_ratio = vm_dirty_ratio;
+		if (dirty_ratio < 5)
+			dirty_ratio = 5;
+		dirty = (dirty_ratio * available_memory) / 100;
+	}
+
+	if (dirty_background_bytes)
+		background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE);
+	else
+		background = (dirty_background_ratio * available_memory) / 100;
 
-	background = (background_ratio * available_memory) / 100;
-	dirty = (dirty_ratio * available_memory) / 100;
+	if (background >= dirty)
+		background = dirty / 2;
 	tsk = current;
 	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
 		background += background / 4;
-- 
cgit v1.2.3


From 7b1fe59793e61f826bef053107b57b23954833bb Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:39:34 -0800
Subject: mm: reuse_swap_page replaces can_share_swap_page

A good place to free up old swap is where do_wp_page(), or do_swap_page(),
is about to redirty the page: the data on disk is then stale and won't be
read again; and if we do decide to write the page out later, using the
previous swap location makes an unnecessary disk seek very likely.

So give can_share_swap_page() the side-effect of delete_from_swap_cache()
when it safely can.  And can_share_swap_page() was always a misleading
name, the more so if it has a side-effect: rename it reuse_swap_page().

Irrelevant cleanup nearby: remove swap_token_default_timeout definition
from swap.h: it's used nowhere.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Robin Holt <holt@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  6 ++----
 mm/memory.c          |  4 ++--
 mm/swapfile.c        | 15 +++++++++++----
 3 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 48f309dc5a0c..366556c5b148 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -304,7 +304,7 @@ extern unsigned int count_swap_pages(int, int);
 extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t);
 extern sector_t swapdev_block(int, pgoff_t);
 extern struct swap_info_struct *get_swap_info_struct(unsigned);
-extern int can_share_swap_page(struct page *);
+extern int reuse_swap_page(struct page *);
 extern int remove_exclusive_swap_page(struct page *);
 extern int remove_exclusive_swap_page_ref(struct page *);
 struct backing_dev_info;
@@ -372,8 +372,6 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp)
 	return NULL;
 }
 
-#define can_share_swap_page(p)			(page_mapcount(p) == 1)
-
 static inline int add_to_swap_cache(struct page *page, swp_entry_t entry,
 							gfp_t gfp_mask)
 {
@@ -388,7 +386,7 @@ static inline void delete_from_swap_cache(struct page *page)
 {
 }
 
-#define swap_token_default_timeout		0
+#define reuse_swap_page(page)	(page_mapcount(page) == 1)
 
 static inline int remove_exclusive_swap_page(struct page *p)
 {
diff --git a/mm/memory.c b/mm/memory.c
index 3922ffcf3dff..8f471edcb985 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1861,7 +1861,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			}
 			page_cache_release(old_page);
 		}
-		reuse = can_share_swap_page(old_page);
+		reuse = reuse_swap_page(old_page);
 		unlock_page(old_page);
 	} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
 					(VM_WRITE|VM_SHARED))) {
@@ -2392,7 +2392,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	inc_mm_counter(mm, anon_rss);
 	pte = mk_pte(page, vma->vm_page_prot);
-	if (write_access && can_share_swap_page(page)) {
+	if (write_access && reuse_swap_page(page)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
 		write_access = 0;
 	}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 214e90b94946..bfd4ee59cb88 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -326,17 +326,24 @@ static inline int page_swapcount(struct page *page)
 }
 
 /*
- * We can use this swap cache entry directly
- * if there are no other references to it.
+ * We can write to an anon page without COW if there are no other references
+ * to it.  And as a side-effect, free up its swap: because the old content
+ * on disk will never be read, and seeking back there to write new content
+ * later would only waste time away from clustering.
  */
-int can_share_swap_page(struct page *page)
+int reuse_swap_page(struct page *page)
 {
 	int count;
 
 	VM_BUG_ON(!PageLocked(page));
 	count = page_mapcount(page);
-	if (count <= 1 && PageSwapCache(page))
+	if (count <= 1 && PageSwapCache(page)) {
 		count += page_swapcount(page);
+		if (count == 1 && !PageWriteback(page)) {
+			delete_from_swap_cache(page);
+			SetPageDirty(page);
+		}
+	}
 	return count == 1;
 }
 
-- 
cgit v1.2.3


From a2c43eed8334e878702fca713b212ae2a11d84b9 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:39:36 -0800
Subject: mm: try_to_free_swap replaces remove_exclusive_swap_page

remove_exclusive_swap_page(): its problem is in living up to its name.

It doesn't matter if someone else has a reference to the page (raised
page_count); it doesn't matter if the page is mapped into userspace
(raised page_mapcount - though that hints it may be worth keeping the
swap): all that matters is that there be no more references to the swap
(and no writeback in progress).

swapoff (try_to_unuse) has been removing pages from swapcache for years,
with no concern for page count or page mapcount, and we used to have a
comment in lookup_swap_cache() recognizing that: if you go for a page of
swapcache, you'll get the right page, but it could have been removed from
swapcache by the time you get page lock.

So, give up asking for exclusivity: get rid of
remove_exclusive_swap_page(), and remove_exclusive_swap_page_ref() and
remove_exclusive_swap_page_count() which were spawned for the recent LRU
work: replace them by the simpler try_to_free_swap() which just checks
page_swapcount().

Similarly, remove the page_count limitation from free_swap_and_count(),
but assume that it's worth holding on to the swap if page is mapped and
swap nowhere near full.  Add a vm_swap_full() test in free_swap_cache()?
It would be consistent, but I think we probably have enough for now.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Robin Holt <holt@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 10 ++------
 mm/memory.c          |  2 +-
 mm/page_io.c         |  2 +-
 mm/swap.c            |  3 +--
 mm/swap_state.c      |  8 +++---
 mm/swapfile.c        | 70 +++++++++-------------------------------------------
 mm/vmscan.c          |  2 +-
 7 files changed, 22 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 366556c5b148..c3ecd478840e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -305,8 +305,7 @@ extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t);
 extern sector_t swapdev_block(int, pgoff_t);
 extern struct swap_info_struct *get_swap_info_struct(unsigned);
 extern int reuse_swap_page(struct page *);
-extern int remove_exclusive_swap_page(struct page *);
-extern int remove_exclusive_swap_page_ref(struct page *);
+extern int try_to_free_swap(struct page *);
 struct backing_dev_info;
 
 /* linux/mm/thrash.c */
@@ -388,12 +387,7 @@ static inline void delete_from_swap_cache(struct page *page)
 
 #define reuse_swap_page(page)	(page_mapcount(page) == 1)
 
-static inline int remove_exclusive_swap_page(struct page *p)
-{
-	return 0;
-}
-
-static inline int remove_exclusive_swap_page_ref(struct page *page)
+static inline int try_to_free_swap(struct page *page)
 {
 	return 0;
 }
diff --git a/mm/memory.c b/mm/memory.c
index 8f471edcb985..1a83fe5339a9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2403,7 +2403,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	swap_free(entry);
 	if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
-		remove_exclusive_swap_page(page);
+		try_to_free_swap(page);
 	unlock_page(page);
 
 	if (write_access) {
diff --git a/mm/page_io.c b/mm/page_io.c
index d277a80efa71..dc6ce0afbded 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -98,7 +98,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
 	struct bio *bio;
 	int ret = 0, rw = WRITE;
 
-	if (remove_exclusive_swap_page(page)) {
+	if (try_to_free_swap(page)) {
 		unlock_page(page);
 		goto out;
 	}
diff --git a/mm/swap.c b/mm/swap.c
index ff0b290475fd..ba2c0e8b8b54 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -454,8 +454,7 @@ void pagevec_swap_free(struct pagevec *pvec)
 		struct page *page = pvec->pages[i];
 
 		if (PageSwapCache(page) && trylock_page(page)) {
-			if (PageSwapCache(page))
-				remove_exclusive_swap_page_ref(page);
+			try_to_free_swap(page);
 			unlock_page(page);
 		}
 	}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e793fdea275d..bcb472769299 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -195,14 +195,14 @@ void delete_from_swap_cache(struct page *page)
  * If we are the only user, then try to free up the swap cache. 
  * 
  * Its ok to check for PageSwapCache without the page lock
- * here because we are going to recheck again inside 
- * exclusive_swap_page() _with_ the lock. 
+ * here because we are going to recheck again inside
+ * try_to_free_swap() _with_ the lock.
  * 					- Marcelo
  */
 static inline void free_swap_cache(struct page *page)
 {
-	if (PageSwapCache(page) && trylock_page(page)) {
-		remove_exclusive_swap_page(page);
+	if (PageSwapCache(page) && !page_mapped(page) && trylock_page(page)) {
+		try_to_free_swap(page);
 		unlock_page(page);
 	}
 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index bfd4ee59cb88..f43601827607 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -348,68 +348,23 @@ int reuse_swap_page(struct page *page)
 }
 
 /*
- * Work out if there are any other processes sharing this
- * swap cache page. Free it if you can. Return success.
+ * If swap is getting full, or if there are no more mappings of this page,
+ * then try_to_free_swap is called to free its swap space.
  */
-static int remove_exclusive_swap_page_count(struct page *page, int count)
+int try_to_free_swap(struct page *page)
 {
-	int retval;
-	struct swap_info_struct * p;
-	swp_entry_t entry;
-
 	VM_BUG_ON(!PageLocked(page));
 
 	if (!PageSwapCache(page))
 		return 0;
 	if (PageWriteback(page))
 		return 0;
-	if (page_count(page) != count) /* us + cache + ptes */
-		return 0;
-
-	entry.val = page_private(page);
-	p = swap_info_get(entry);
-	if (!p)
+	if (page_swapcount(page))
 		return 0;
 
-	/* Is the only swap cache user the cache itself? */
-	retval = 0;
-	if (p->swap_map[swp_offset(entry)] == 1) {
-		/* Recheck the page count with the swapcache lock held.. */
-		spin_lock_irq(&swapper_space.tree_lock);
-		if ((page_count(page) == count) && !PageWriteback(page)) {
-			__delete_from_swap_cache(page);
-			SetPageDirty(page);
-			retval = 1;
-		}
-		spin_unlock_irq(&swapper_space.tree_lock);
-	}
-	spin_unlock(&swap_lock);
-
-	if (retval) {
-		swap_free(entry);
-		page_cache_release(page);
-	}
-
-	return retval;
-}
-
-/*
- * Most of the time the page should have two references: one for the
- * process and one for the swap cache.
- */
-int remove_exclusive_swap_page(struct page *page)
-{
-	return remove_exclusive_swap_page_count(page, 2);
-}
-
-/*
- * The pageout code holds an extra reference to the page.  That raises
- * the reference count to test for to 2 for a page that is only in the
- * swap cache plus 1 for each process that maps the page.
- */
-int remove_exclusive_swap_page_ref(struct page *page)
-{
-	return remove_exclusive_swap_page_count(page, 2 + page_mapcount(page));
+	delete_from_swap_cache(page);
+	SetPageDirty(page);
+	return 1;
 }
 
 /*
@@ -436,13 +391,12 @@ void free_swap_and_cache(swp_entry_t entry)
 		spin_unlock(&swap_lock);
 	}
 	if (page) {
-		int one_user;
-
-		one_user = (page_count(page) == 2);
-		/* Only cache user (+us), or swap space full? Free it! */
-		/* Also recheck PageSwapCache after page is locked (above) */
+		/*
+		 * Not mapped elsewhere, or swap space full? Free it!
+		 * Also recheck PageSwapCache now page is locked (above).
+		 */
 		if (PageSwapCache(page) && !PageWriteback(page) &&
-					(one_user || vm_swap_full())) {
+				(!page_mapped(page) || vm_swap_full())) {
 			delete_from_swap_cache(page);
 			SetPageDirty(page);
 		}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d196f46c8808..c8601dd36603 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -759,7 +759,7 @@ cull_mlocked:
 activate_locked:
 		/* Not a candidate for swapping, so reclaim swap space. */
 		if (PageSwapCache(page) && vm_swap_full())
-			remove_exclusive_swap_page_ref(page);
+			try_to_free_swap(page);
 		VM_BUG_ON(PageActive(page));
 		SetPageActive(page);
 		pgactivate++;
-- 
cgit v1.2.3


From ac47b003d03c2a4f28aef1d505b66d24ad191c4f Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:39:39 -0800
Subject: mm: remove gfp_mask from add_to_swap

Remove gfp_mask argument from add_to_swap(): it's misleading because its
only caller, shrink_page_list(), is not atomic at that point; and in due
course (implementing discard) we'll sometimes want to allocate some memory
with GFP_NOIO (as is used in swap_writepage) when allocating swap.

No change to the gfp_mask passed down to add_to_swap_cache(): still use
__GFP_HIGH without __GFP_WAIT (with nomemalloc and nowarn as before):
though it's not obvious if that's the best combination to ask for here.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Robin Holt <holt@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 2 +-
 mm/swap_state.c      | 4 ++--
 mm/vmscan.c          | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index c3ecd478840e..c38bd157695b 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -278,7 +278,7 @@ extern void end_swap_bio_read(struct bio *bio, int err);
 extern struct address_space swapper_space;
 #define total_swapcache_pages  swapper_space.nrpages
 extern void show_swap_cache_info(void);
-extern int add_to_swap(struct page *, gfp_t);
+extern int add_to_swap(struct page *);
 extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
 extern void __delete_from_swap_cache(struct page *);
 extern void delete_from_swap_cache(struct page *);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index bcb472769299..81c825f67a7f 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -128,7 +128,7 @@ void __delete_from_swap_cache(struct page *page)
  * Allocate swap space for the page and add the page to the
  * swap cache.  Caller needs to hold the page lock. 
  */
-int add_to_swap(struct page * page, gfp_t gfp_mask)
+int add_to_swap(struct page *page)
 {
 	swp_entry_t entry;
 	int err;
@@ -153,7 +153,7 @@ int add_to_swap(struct page * page, gfp_t gfp_mask)
 		 * Add it to the swap cache and mark it dirty
 		 */
 		err = add_to_swap_cache(page, entry,
-				gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN);
+				__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
 
 		switch (err) {
 		case 0:				/* Success */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 74f875733e2b..cc7401571cba 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -625,7 +625,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		if (PageAnon(page) && !PageSwapCache(page)) {
 			if (!(sc->gfp_mask & __GFP_IO))
 				goto keep_locked;
-			if (!add_to_swap(page, GFP_ATOMIC))
+			if (!add_to_swap(page))
 				goto activate_locked;
 			may_enter_fs = 1;
 		}
-- 
cgit v1.2.3


From 60371d971a3d01afd102f0bbf2681f32ecc31d78 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:39:40 -0800
Subject: mm: add add_to_swap stub

If we add a failing stub for add_to_swap(), then we can remove the #ifdef
CONFIG_SWAP from mm/vmscan.c.

This was intended as a source cleanup, but looking more closely, it turns
out that the !CONFIG_SWAP case was going to keep_locked for an anonymous
page, whereas now it goes to the more suitable activate_locked, like the
CONFIG_SWAP nr_swap_pages 0 case.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Robin Holt <holt@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 5 +++++
 mm/vmscan.c          | 2 --
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index c38bd157695b..c0d23ac710d5 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -371,6 +371,11 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp)
 	return NULL;
 }
 
+static inline int add_to_swap(struct page *page)
+{
+	return 0;
+}
+
 static inline int add_to_swap_cache(struct page *page, swp_entry_t entry,
 							gfp_t gfp_mask)
 {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cc7401571cba..f350523a8eee 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -617,7 +617,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 					referenced && page_mapping_inuse(page))
 			goto activate_locked;
 
-#ifdef CONFIG_SWAP
 		/*
 		 * Anonymous process memory has backing store?
 		 * Try to allocate it some swap space here.
@@ -629,7 +628,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 				goto activate_locked;
 			may_enter_fs = 1;
 		}
-#endif /* CONFIG_SWAP */
 
 		mapping = page_mapping(page);
 
-- 
cgit v1.2.3


From b962716b459505a8d83aea313fea0abe76749f42 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:39:41 -0800
Subject: mm: optimize get_scan_ratio for no swap

Rik suggests a simplified get_scan_ratio() for !CONFIG_SWAP.  Yes, the gcc
optimizer gives us that, when nr_swap_pages is #defined as 0L.  Move usual
declaration to swapfile.c: it never belonged in page_alloc.c.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Robin Holt <holt@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  5 +++--
 mm/page_alloc.c      |  1 -
 mm/swapfile.c        |  1 +
 mm/vmscan.c          | 12 ++++++------
 4 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index c0d23ac710d5..3a31cc25bd2c 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -163,7 +163,6 @@ struct swap_list_t {
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
 extern unsigned long totalreserve_pages;
-extern long nr_swap_pages;
 extern unsigned int nr_free_buffer_pages(void);
 extern unsigned int nr_free_pagecache_pages(void);
 
@@ -291,6 +290,7 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
 			struct vm_area_struct *vma, unsigned long addr);
 
 /* linux/mm/swapfile.c */
+extern long nr_swap_pages;
 extern long total_swap_pages;
 extern void si_swapinfo(struct sysinfo *);
 extern swp_entry_t get_swap_page(void);
@@ -331,7 +331,8 @@ static inline void disable_swap_token(void)
 
 #else /* CONFIG_SWAP */
 
-#define total_swap_pages			0
+#define nr_swap_pages				0L
+#define total_swap_pages			0L
 #define total_swapcache_pages			0UL
 
 #define si_swapinfo(val) \
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ce2a026219bc..65133436fe3d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -69,7 +69,6 @@ EXPORT_SYMBOL(node_states);
 
 unsigned long totalram_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
-long nr_swap_pages;
 int percpu_pagelist_fraction;
 
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 9ce7f81c8abc..725e56c362de 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -35,6 +35,7 @@
 
 static DEFINE_SPINLOCK(swap_lock);
 static unsigned int nr_swapfiles;
+long nr_swap_pages;
 long total_swap_pages;
 static int swap_overflow;
 static int least_priority;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f350523a8eee..d500b906746c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1327,12 +1327,6 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
 	unsigned long anon_prio, file_prio;
 	unsigned long ap, fp;
 
-	anon  = zone_page_state(zone, NR_ACTIVE_ANON) +
-		zone_page_state(zone, NR_INACTIVE_ANON);
-	file  = zone_page_state(zone, NR_ACTIVE_FILE) +
-		zone_page_state(zone, NR_INACTIVE_FILE);
-	free  = zone_page_state(zone, NR_FREE_PAGES);
-
 	/* If we have no swap space, do not bother scanning anon pages. */
 	if (nr_swap_pages <= 0) {
 		percent[0] = 0;
@@ -1340,6 +1334,12 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
 		return;
 	}
 
+	anon  = zone_page_state(zone, NR_ACTIVE_ANON) +
+		zone_page_state(zone, NR_INACTIVE_ANON);
+	file  = zone_page_state(zone, NR_ACTIVE_FILE) +
+		zone_page_state(zone, NR_INACTIVE_FILE);
+	free  = zone_page_state(zone, NR_FREE_PAGES);
+
 	/* If we have very few page cache pages, force-scan anon pages. */
 	if (unlikely(file + free <= zone->pages_high)) {
 		percent[0] = 100;
-- 
cgit v1.2.3


From 69beeb1d3428424fbc7546f85e5cd7ac4119c09d Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 6 Jan 2009 14:39:46 -0800
Subject: mm: make vread() and vwrite() declaration

Sparse output following warnings.

mm/vmalloc.c:1436:6: warning: symbol 'vread' was not declared. Should it be static?
mm/vmalloc.c:1474:6: warning: symbol 'vwrite' was not declared. Should it be static?

However, it is used by /dev/kmem. fixed here.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/mem.c      | 3 ---
 include/linux/vmalloc.h | 4 ++++
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 6431f6921a67..3586b3b3df3f 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -425,9 +425,6 @@ static ssize_t read_oldmem(struct file *file, char __user *buf,
 }
 #endif
 
-extern long vread(char *buf, char *addr, unsigned long count);
-extern long vwrite(char *buf, char *addr, unsigned long count);
-
 #ifdef CONFIG_DEVKMEM
 /*
  * This function reads the *virtual* memory as seen by the kernel.
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 307b88577eaa..506e7620a986 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -97,6 +97,10 @@ extern void unmap_kernel_range(unsigned long addr, unsigned long size);
 extern struct vm_struct *alloc_vm_area(size_t size);
 extern void free_vm_area(struct vm_struct *area);
 
+/* for /dev/kmem */
+extern long vread(char *buf, char *addr, unsigned long count);
+extern long vwrite(char *buf, char *addr, unsigned long count);
+
 /*
  *	Internals.  Dont't use..
  */
-- 
cgit v1.2.3


From 22c6f8fdb31993cf49bdd4a47b64a7002391e1c7 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:39:48 -0800
Subject: swapfile: remove SWP_ACTIVE mask

Remove the SWP_ACTIVE mask: it just obscures the SWP_WRITEOK flag.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 1 -
 mm/swapfile.c        | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3a31cc25bd2c..410c8e473727 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -120,7 +120,6 @@ struct swap_extent {
 enum {
 	SWP_USED	= (1 << 0),	/* is slot in swap_info[] used? */
 	SWP_WRITEOK	= (1 << 1),	/* ok to write to this swap?	*/
-	SWP_ACTIVE	= (SWP_USED | SWP_WRITEOK),
 					/* add others here before... */
 	SWP_SCANNING	= (1 << 8),	/* refcount in scan_swap_map */
 };
diff --git a/mm/swapfile.c b/mm/swapfile.c
index e2adc8eb9317..915cb3fc43d7 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1222,7 +1222,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
 	spin_lock(&swap_lock);
 	for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
 		p = swap_info + type;
-		if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) {
+		if (p->flags & SWP_WRITEOK) {
 			if (p->swap_file->f_mapping == mapping)
 				break;
 		}
@@ -1674,7 +1674,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
 	else
 		p->prio = --least_priority;
 	p->swap_map = swap_map;
-	p->flags = SWP_ACTIVE;
+	p->flags |= SWP_WRITEOK;
 	nr_swap_pages += nr_good_pages;
 	total_swap_pages += nr_good_pages;
 
-- 
cgit v1.2.3


From ebebbbe904634b0ca1c674457b399f68db5e05b1 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:39:50 -0800
Subject: swapfile: rearrange scan and swap_info

Before making functional changes, rearrange scan_swap_map() to simplify
subsequent diffs.  Actually, there is one functional change in there:
leave cluster_nr negative while scanning for a new cluster - resetting it
early increased the likelihood that when we have difficulty finding a free
cluster, another task may come in and try doing exactly the same - just a
waste of cpu.

Before making functional changes, rearrange struct swap_info_struct
slightly: flags will be needed as an unsigned long (for wait_on_bit), next
is a good int to pair with prio, old_block_size is uninteresting so shift
it to the end.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  8 +++----
 mm/swapfile.c        | 66 +++++++++++++++++++++++++++++-----------------------
 2 files changed, 41 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 410c8e473727..9cabb8b21aba 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -133,14 +133,14 @@ enum {
  * The in-memory structure used to track swap areas.
  */
 struct swap_info_struct {
-	unsigned int flags;
+	unsigned long flags;
 	int prio;			/* swap priority */
+	int next;			/* next entry on swap list */
 	struct file *swap_file;
 	struct block_device *bdev;
 	struct list_head extent_list;
 	struct swap_extent *curr_swap_extent;
-	unsigned old_block_size;
-	unsigned short * swap_map;
+	unsigned short *swap_map;
 	unsigned int lowest_bit;
 	unsigned int highest_bit;
 	unsigned int cluster_next;
@@ -148,7 +148,7 @@ struct swap_info_struct {
 	unsigned int pages;
 	unsigned int max;
 	unsigned int inuse_pages;
-	int next;			/* next entry on swap list */
+	unsigned int old_block_size;
 };
 
 struct swap_list_t {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 85ff603385c3..4d9855f86e7d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -89,7 +89,8 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
 
 static inline unsigned long scan_swap_map(struct swap_info_struct *si)
 {
-	unsigned long offset, last_in_cluster;
+	unsigned long offset;
+	unsigned long last_in_cluster;
 	int latency_ration = LATENCY_LIMIT;
 
 	/*
@@ -103,10 +104,13 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
 	 */
 
 	si->flags += SWP_SCANNING;
-	if (unlikely(!si->cluster_nr)) {
-		si->cluster_nr = SWAPFILE_CLUSTER - 1;
-		if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER)
-			goto lowest;
+	offset = si->cluster_next;
+
+	if (unlikely(!si->cluster_nr--)) {
+		if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) {
+			si->cluster_nr = SWAPFILE_CLUSTER - 1;
+			goto checks;
+		}
 		spin_unlock(&swap_lock);
 
 		offset = si->lowest_bit;
@@ -118,43 +122,47 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
 				last_in_cluster = offset + SWAPFILE_CLUSTER;
 			else if (offset == last_in_cluster) {
 				spin_lock(&swap_lock);
-				si->cluster_next = offset-SWAPFILE_CLUSTER+1;
-				goto cluster;
+				offset -= SWAPFILE_CLUSTER - 1;
+				si->cluster_next = offset;
+				si->cluster_nr = SWAPFILE_CLUSTER - 1;
+				goto checks;
 			}
 			if (unlikely(--latency_ration < 0)) {
 				cond_resched();
 				latency_ration = LATENCY_LIMIT;
 			}
 		}
+
+		offset = si->lowest_bit;
 		spin_lock(&swap_lock);
-		goto lowest;
+		si->cluster_nr = SWAPFILE_CLUSTER - 1;
 	}
 
-	si->cluster_nr--;
-cluster:
-	offset = si->cluster_next;
-	if (offset > si->highest_bit)
-lowest:		offset = si->lowest_bit;
-checks:	if (!(si->flags & SWP_WRITEOK))
+checks:
+	if (!(si->flags & SWP_WRITEOK))
 		goto no_page;
 	if (!si->highest_bit)
 		goto no_page;
-	if (!si->swap_map[offset]) {
-		if (offset == si->lowest_bit)
-			si->lowest_bit++;
-		if (offset == si->highest_bit)
-			si->highest_bit--;
-		si->inuse_pages++;
-		if (si->inuse_pages == si->pages) {
-			si->lowest_bit = si->max;
-			si->highest_bit = 0;
-		}
-		si->swap_map[offset] = 1;
-		si->cluster_next = offset + 1;
-		si->flags -= SWP_SCANNING;
-		return offset;
+	if (offset > si->highest_bit)
+		offset = si->lowest_bit;
+	if (si->swap_map[offset])
+		goto scan;
+
+	if (offset == si->lowest_bit)
+		si->lowest_bit++;
+	if (offset == si->highest_bit)
+		si->highest_bit--;
+	si->inuse_pages++;
+	if (si->inuse_pages == si->pages) {
+		si->lowest_bit = si->max;
+		si->highest_bit = 0;
 	}
+	si->swap_map[offset] = 1;
+	si->cluster_next = offset + 1;
+	si->flags -= SWP_SCANNING;
+	return offset;
 
+scan:
 	spin_unlock(&swap_lock);
 	while (++offset <= si->highest_bit) {
 		if (!si->swap_map[offset]) {
@@ -167,7 +175,7 @@ checks:	if (!(si->flags & SWP_WRITEOK))
 		}
 	}
 	spin_lock(&swap_lock);
-	goto lowest;
+	goto checks;
 
 no_page:
 	si->flags -= SWP_SCANNING;
-- 
cgit v1.2.3


From 6a6ba83175c029c7820765bae44692266b29e67a Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:39:51 -0800
Subject: swapfile: swapon use discard (trim)

When adding swap, all the old data on swap can be forgotten: sys_swapon()
discard all but the header page of the swap partition (or every extent but
the header of the swap file), to give a solidstate swap device the
opportunity to optimize its wear-levelling.

If that succeeds, note SWP_DISCARDABLE for later use, and report it with a
"D" at the right end of the kernel's "Adding ...  swap" message.  Perhaps
something should be shown in /proc/swaps (swapon -s), but we have to be
more cautious before making any addition to that format.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Joern Engel <joern@logfs.org>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Donjun Shin <djshin90@gmail.com>
Cc: Tejun Heo <teheo@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  1 +
 mm/swapfile.c        | 39 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 38 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 9cabb8b21aba..0b9210ea96c7 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -120,6 +120,7 @@ struct swap_extent {
 enum {
 	SWP_USED	= (1 << 0),	/* is slot in swap_info[] used? */
 	SWP_WRITEOK	= (1 << 1),	/* ok to write to this swap?	*/
+	SWP_DISCARDABLE = (1 << 2),	/* blkdev supports discard */
 					/* add others here before... */
 	SWP_SCANNING	= (1 << 8),	/* refcount in scan_swap_map */
 };
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4d9855f86e7d..fbeb4bb8eb50 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -84,6 +84,37 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
 	up_read(&swap_unplug_sem);
 }
 
+/*
+ * swapon tell device that all the old swap contents can be discarded,
+ * to allow the swap device to optimize its wear-levelling.
+ */
+static int discard_swap(struct swap_info_struct *si)
+{
+	struct swap_extent *se;
+	int err = 0;
+
+	list_for_each_entry(se, &si->extent_list, list) {
+		sector_t start_block = se->start_block << (PAGE_SHIFT - 9);
+		pgoff_t nr_blocks = se->nr_pages << (PAGE_SHIFT - 9);
+
+		if (se->start_page == 0) {
+			/* Do not discard the swap header page! */
+			start_block += 1 << (PAGE_SHIFT - 9);
+			nr_blocks -= 1 << (PAGE_SHIFT - 9);
+			if (!nr_blocks)
+				continue;
+		}
+
+		err = blkdev_issue_discard(si->bdev, start_block,
+						nr_blocks, GFP_KERNEL);
+		if (err)
+			break;
+
+		cond_resched();
+	}
+	return err;		/* That will often be -EOPNOTSUPP */
+}
+
 #define SWAPFILE_CLUSTER	256
 #define LATENCY_LIMIT		256
 
@@ -1658,6 +1689,9 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
 		goto bad_swap;
 	}
 
+	if (discard_swap(p) == 0)
+		p->flags |= SWP_DISCARDABLE;
+
 	mutex_lock(&swapon_mutex);
 	spin_lock(&swap_lock);
 	if (swap_flags & SWAP_FLAG_PREFER)
@@ -1671,9 +1705,10 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
 	total_swap_pages += nr_good_pages;
 
 	printk(KERN_INFO "Adding %uk swap on %s.  "
-			"Priority:%d extents:%d across:%lluk\n",
+			"Priority:%d extents:%d across:%lluk%s\n",
 		nr_good_pages<<(PAGE_SHIFT-10), name, p->prio,
-		nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10));
+		nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
+		(p->flags & SWP_DISCARDABLE) ? " D" : "");
 
 	/* insert swap space into swap_list: */
 	prev = -1;
-- 
cgit v1.2.3


From 7992fde72ce06c73280a1939b7a1e903bc95ef85 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:39:53 -0800
Subject: swapfile: swap allocation use discard

When scan_swap_map() finds a free cluster of swap pages to allocate,
discard the old contents of the cluster if the device supports discard.
But don't bother when swap is so fragmented that we allocate single pages.

Be careful about racing allocations made while we're scanning for a
cluster; and hold up allocations made while we're discarding.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Joern Engel <joern@logfs.org>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Donjun Shin <djshin90@gmail.com>
Cc: Tejun Heo <teheo@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |   3 ++
 mm/swapfile.c        | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 121 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0b9210ea96c7..fe79f44c858e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -121,6 +121,7 @@ enum {
 	SWP_USED	= (1 << 0),	/* is slot in swap_info[] used? */
 	SWP_WRITEOK	= (1 << 1),	/* ok to write to this swap?	*/
 	SWP_DISCARDABLE = (1 << 2),	/* blkdev supports discard */
+	SWP_DISCARDING	= (1 << 3),	/* now discarding a free cluster */
 					/* add others here before... */
 	SWP_SCANNING	= (1 << 8),	/* refcount in scan_swap_map */
 };
@@ -144,6 +145,8 @@ struct swap_info_struct {
 	unsigned short *swap_map;
 	unsigned int lowest_bit;
 	unsigned int highest_bit;
+	unsigned int lowest_alloc;	/* while preparing discard cluster */
+	unsigned int highest_alloc;	/* while preparing discard cluster */
 	unsigned int cluster_next;
 	unsigned int cluster_nr;
 	unsigned int pages;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index fbeb4bb8eb50..ca75b9e7c09f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -115,14 +115,62 @@ static int discard_swap(struct swap_info_struct *si)
 	return err;		/* That will often be -EOPNOTSUPP */
 }
 
+/*
+ * swap allocation tell device that a cluster of swap can now be discarded,
+ * to allow the swap device to optimize its wear-levelling.
+ */
+static void discard_swap_cluster(struct swap_info_struct *si,
+				 pgoff_t start_page, pgoff_t nr_pages)
+{
+	struct swap_extent *se = si->curr_swap_extent;
+	int found_extent = 0;
+
+	while (nr_pages) {
+		struct list_head *lh;
+
+		if (se->start_page <= start_page &&
+		    start_page < se->start_page + se->nr_pages) {
+			pgoff_t offset = start_page - se->start_page;
+			sector_t start_block = se->start_block + offset;
+			pgoff_t nr_blocks = se->nr_pages - offset;
+
+			if (nr_blocks > nr_pages)
+				nr_blocks = nr_pages;
+			start_page += nr_blocks;
+			nr_pages -= nr_blocks;
+
+			if (!found_extent++)
+				si->curr_swap_extent = se;
+
+			start_block <<= PAGE_SHIFT - 9;
+			nr_blocks <<= PAGE_SHIFT - 9;
+			if (blkdev_issue_discard(si->bdev, start_block,
+							nr_blocks, GFP_NOIO))
+				break;
+		}
+
+		lh = se->list.next;
+		if (lh == &si->extent_list)
+			lh = lh->next;
+		se = list_entry(lh, struct swap_extent, list);
+	}
+}
+
+static int wait_for_discard(void *word)
+{
+	schedule();
+	return 0;
+}
+
 #define SWAPFILE_CLUSTER	256
 #define LATENCY_LIMIT		256
 
 static inline unsigned long scan_swap_map(struct swap_info_struct *si)
 {
 	unsigned long offset;
-	unsigned long last_in_cluster;
+	unsigned long last_in_cluster = 0;
 	int latency_ration = LATENCY_LIMIT;
+	int found_free_cluster = 0;
 
 	/*
 	 * We try to cluster swap pages by allocating them sequentially
@@ -142,6 +190,19 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
 			si->cluster_nr = SWAPFILE_CLUSTER - 1;
 			goto checks;
 		}
+		if (si->flags & SWP_DISCARDABLE) {
+			/*
+			 * Start range check on racing allocations, in case
+			 * they overlap the cluster we eventually decide on
+			 * (we scan without swap_lock to allow preemption).
+			 * It's hardly conceivable that cluster_nr could be
+			 * wrapped during our scan, but don't depend on it.
+			 */
+			if (si->lowest_alloc)
+				goto checks;
+			si->lowest_alloc = si->max;
+			si->highest_alloc = 0;
+		}
 		spin_unlock(&swap_lock);
 
 		offset = si->lowest_bit;
@@ -156,6 +217,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
 				offset -= SWAPFILE_CLUSTER - 1;
 				si->cluster_next = offset;
 				si->cluster_nr = SWAPFILE_CLUSTER - 1;
+				found_free_cluster = 1;
 				goto checks;
 			}
 			if (unlikely(--latency_ration < 0)) {
@@ -167,6 +229,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
 		offset = si->lowest_bit;
 		spin_lock(&swap_lock);
 		si->cluster_nr = SWAPFILE_CLUSTER - 1;
+		si->lowest_alloc = 0;
 	}
 
 checks:
@@ -191,6 +254,60 @@ checks:
 	si->swap_map[offset] = 1;
 	si->cluster_next = offset + 1;
 	si->flags -= SWP_SCANNING;
+
+	if (si->lowest_alloc) {
+		/*
+		 * Only set when SWP_DISCARDABLE, and there's a scan
+		 * for a free cluster in progress or just completed.
+		 */
+		if (found_free_cluster) {
+			/*
+			 * To optimize wear-levelling, discard the
+			 * old data of the cluster, taking care not to
+			 * discard any of its pages that have already
+			 * been allocated by racing tasks (offset has
+			 * already stepped over any at the beginning).
+			 */
+			if (offset < si->highest_alloc &&
+			    si->lowest_alloc <= last_in_cluster)
+				last_in_cluster = si->lowest_alloc - 1;
+			si->flags |= SWP_DISCARDING;
+			spin_unlock(&swap_lock);
+
+			if (offset < last_in_cluster)
+				discard_swap_cluster(si, offset,
+					last_in_cluster - offset + 1);
+
+			spin_lock(&swap_lock);
+			si->lowest_alloc = 0;
+			si->flags &= ~SWP_DISCARDING;
+
+			smp_mb();	/* wake_up_bit advises this */
+			wake_up_bit(&si->flags, ilog2(SWP_DISCARDING));
+
+		} else if (si->flags & SWP_DISCARDING) {
+			/*
+			 * Delay using pages allocated by racing tasks
+			 * until the whole discard has been issued. We
+			 * could defer that delay until swap_writepage,
+			 * but it's easier to keep this self-contained.
+			 */
+			spin_unlock(&swap_lock);
+			wait_on_bit(&si->flags, ilog2(SWP_DISCARDING),
+				wait_for_discard, TASK_UNINTERRUPTIBLE);
+			spin_lock(&swap_lock);
+		} else {
+			/*
+			 * Note pages allocated by racing tasks while
+			 * scan for a free cluster is in progress, so
+			 * that its final discard can exclude them.
+			 */
+			if (offset < si->lowest_alloc)
+				si->lowest_alloc = offset;
+			if (offset > si->highest_alloc)
+				si->highest_alloc = offset;
+		}
+	}
 	return offset;
 
 scan:
-- 
cgit v1.2.3


From 20137a490f397d9c01fc9fadd83a8d198bda4477 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:39:54 -0800
Subject: swapfile: swapon randomize if nonrot

Swap allocation has always started from the beginning of the swap area;
but if we're dealing with a solidstate swap device which can only remap
blocks within limited zones, that would sooner wear out the first zone.

Therefore sys_swapon() test whether blk_queue is non-rotational, and if so
randomize the cluster_next starting position for allocation.

If blk_queue is nonrot, note SWP_SOLIDSTATE for later use, and report it
with an "SS" at the right end of the kernel's "Adding ...  swap" message
(so that if it's both nonrot and discardable, "SSD" will be shown there).
Perhaps something should be shown in /proc/swaps (swapon -s), but we have
to be more cautious before making any addition to that format.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Joern Engel <joern@logfs.org>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Donjun Shin <djshin90@gmail.com>
Cc: Tejun Heo <teheo@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  1 +
 mm/swapfile.c        | 11 +++++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index fe79f44c858e..cbf7fbed3dfd 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -122,6 +122,7 @@ enum {
 	SWP_WRITEOK	= (1 << 1),	/* ok to write to this swap?	*/
 	SWP_DISCARDABLE = (1 << 2),	/* blkdev supports discard */
 	SWP_DISCARDING	= (1 << 3),	/* now discarding a free cluster */
+	SWP_SOLIDSTATE	= (1 << 4),	/* blkdev seeks are cheap */
 					/* add others here before... */
 	SWP_SCANNING	= (1 << 8),	/* refcount in scan_swap_map */
 };
diff --git a/mm/swapfile.c b/mm/swapfile.c
index ca75b9e7c09f..b0f56603b9be 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -16,6 +16,7 @@
 #include <linux/namei.h>
 #include <linux/shm.h>
 #include <linux/blkdev.h>
+#include <linux/random.h>
 #include <linux/writeback.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -1806,6 +1807,11 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
 		goto bad_swap;
 	}
 
+	if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
+		p->flags |= SWP_SOLIDSTATE;
+		srandom32((u32)get_seconds());
+		p->cluster_next = 1 + (random32() % p->highest_bit);
+	}
 	if (discard_swap(p) == 0)
 		p->flags |= SWP_DISCARDABLE;
 
@@ -1822,10 +1828,11 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
 	total_swap_pages += nr_good_pages;
 
 	printk(KERN_INFO "Adding %uk swap on %s.  "
-			"Priority:%d extents:%d across:%lluk%s\n",
+			"Priority:%d extents:%d across:%lluk %s%s\n",
 		nr_good_pages<<(PAGE_SHIFT-10), name, p->prio,
 		nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
-		(p->flags & SWP_DISCARDABLE) ? " D" : "");
+		(p->flags & SWP_SOLIDSTATE) ? "SS" : "",
+		(p->flags & SWP_DISCARDABLE) ? "D" : "");
 
 	/* insert swap space into swap_list: */
 	prev = -1;
-- 
cgit v1.2.3


From 79f4b7bf393e67bbffec807cc68caaefc72b82ee Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:40:05 -0800
Subject: badpage: simplify page_alloc flag check+clear

Simplify the PAGE_FLAGS checking and clearing when freeing and allocating
a page: check the same flags as before when freeing, clear ALL the flags
(unless PageReserved) when freeing, check ALL flags off when allocating.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 25 ++++++++-----------------
 mm/page_alloc.c            | 19 ++++++-------------
 2 files changed, 14 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 628ec0802492..219a523ecdb0 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -373,31 +373,22 @@ static inline void __ClearPageTail(struct page *page)
 #define __PG_MLOCKED		0
 #endif
 
-#define PAGE_FLAGS	(1 << PG_lru   | 1 << PG_private   | 1 << PG_locked | \
-			 1 << PG_buddy | 1 << PG_writeback | \
-			 1 << PG_slab  | 1 << PG_swapcache | 1 << PG_active | \
-			 __PG_UNEVICTABLE | __PG_MLOCKED)
-
-/*
- * Flags checked in bad_page().  Pages on the free list should not have
- * these flags set.  It they are, there is a problem.
- */
-#define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | \
-		1 << PG_reclaim | 1 << PG_dirty | 1 << PG_swapbacked)
-
 /*
  * Flags checked when a page is freed.  Pages being freed should not have
  * these flags set.  It they are, there is a problem.
  */
-#define PAGE_FLAGS_CHECK_AT_FREE (PAGE_FLAGS | 1 << PG_reserved)
+#define PAGE_FLAGS_CHECK_AT_FREE \
+	(1 << PG_lru   | 1 << PG_private   | 1 << PG_locked | \
+	 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \
+	 1 << PG_slab  | 1 << PG_swapcache | 1 << PG_active | \
+	 __PG_UNEVICTABLE | __PG_MLOCKED)
 
 /*
  * Flags checked when a page is prepped for return by the page allocator.
- * Pages being prepped should not have these flags set.  It they are, there
- * is a problem.
+ * Pages being prepped should not have any flags set.  It they are set,
+ * there has been a kernel bug or struct page corruption.
  */
-#define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | \
-		1 << PG_reserved | 1 << PG_dirty | 1 << PG_swapbacked)
+#define PAGE_FLAGS_CHECK_AT_PREP	((1 << NR_PAGEFLAGS) - 1)
 
 #endif /* !__GENERATING_BOUNDS_H */
 #endif	/* PAGE_FLAGS_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 31c512410a99..b90a74d28485 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -231,7 +231,6 @@ static void bad_page(struct page *page)
 	printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
 		KERN_EMERG "Backtrace:\n");
 	dump_stack();
-	page->flags &= ~PAGE_FLAGS_CLEAR_WHEN_BAD;
 	set_page_count(page, 0);
 	reset_page_mapcount(page);
 	page->mapping = NULL;
@@ -468,16 +467,16 @@ static inline int free_pages_check(struct page *page)
 		(page_count(page) != 0)  |
 		(page->flags & PAGE_FLAGS_CHECK_AT_FREE)))
 		bad_page(page);
-	if (PageDirty(page))
-		__ClearPageDirty(page);
-	if (PageSwapBacked(page))
-		__ClearPageSwapBacked(page);
 	/*
 	 * For now, we report if PG_reserved was found set, but do not
 	 * clear it, and do not free the page.  But we shall soon need
 	 * to do more, for when the ZERO_PAGE count wraps negative.
 	 */
-	return PageReserved(page);
+	if (PageReserved(page))
+		return 1;
+	if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
+		page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
+	return 0;
 }
 
 /*
@@ -621,13 +620,7 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
 	if (PageReserved(page))
 		return 1;
 
-	page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_reclaim |
-			1 << PG_referenced | 1 << PG_arch_1 |
-			1 << PG_owner_priv_1 | 1 << PG_mappedtodisk
-#ifdef CONFIG_UNEVICTABLE_LRU
-			| 1 << PG_mlocked
-#endif
-			);
+	page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
 	set_page_private(page, 0);
 	set_page_refcounted(page);
 
-- 
cgit v1.2.3


From 2509ef26db4699a5d9fa876e90ddfc107afcab84 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:40:10 -0800
Subject: badpage: zap print_bad_pte on swap and file

Complete zap_pte_range()'s coverage of bad pagetable entries by calling
print_bad_pte() on a pte_file in a linear vma and on a bad swap entry.
That needs free_swap_and_cache() to tell it, which will also have shown
one of those "swap_free" errors (but with much less information).

Similar checks in fork's copy_one_pte()?  No, that would be more noisy
than helpful: we'll see them when parent and child exec or exit.

Where do_nonlinear_fault() calls print_bad_pte(): omit !VM_CAN_NONLINEAR
case, that could only be a bug in sys_remap_file_pages(), not a bad pte.
VM_FAULT_OOM rather than VM_FAULT_SIGBUS?  Well, okay, that is consistent
with what happens if do_swap_page() operates a bad swap entry; but don't
we have patches to be more careful about killing when VM_FAULT_OOM?

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 12 +++---------
 mm/memory.c          | 11 +++++++----
 mm/swapfile.c        |  7 ++++---
 3 files changed, 14 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index cbf7fbed3dfd..91dee50fe260 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -302,7 +302,7 @@ extern swp_entry_t get_swap_page_of_type(int);
 extern int swap_duplicate(swp_entry_t);
 extern int valid_swaphandles(swp_entry_t, unsigned long *);
 extern void swap_free(swp_entry_t);
-extern void free_swap_and_cache(swp_entry_t);
+extern int free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
 extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t);
@@ -352,14 +352,8 @@ static inline void show_swap_cache_info(void)
 {
 }
 
-static inline void free_swap_and_cache(swp_entry_t swp)
-{
-}
-
-static inline int swap_duplicate(swp_entry_t swp)
-{
-	return 0;
-}
+#define free_swap_and_cache(swp)	is_migration_entry(swp)
+#define swap_duplicate(swp)		is_migration_entry(swp)
 
 static inline void swap_free(swp_entry_t swp)
 {
diff --git a/mm/memory.c b/mm/memory.c
index 890095f5f36d..b273cc12b15d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -810,8 +810,12 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 		 */
 		if (unlikely(details))
 			continue;
-		if (!pte_file(ptent))
-			free_swap_and_cache(pte_to_swp_entry(ptent));
+		if (pte_file(ptent)) {
+			if (unlikely(!(vma->vm_flags & VM_NONLINEAR)))
+				print_bad_pte(vma, addr, ptent, NULL);
+		} else if
+		  (unlikely(!free_swap_and_cache(pte_to_swp_entry(ptent))))
+			print_bad_pte(vma, addr, ptent, NULL);
 		pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
 	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
 
@@ -2707,8 +2711,7 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
 		return 0;
 
-	if (unlikely(!(vma->vm_flags & VM_NONLINEAR) ||
-			!(vma->vm_flags & VM_CAN_NONLINEAR))) {
+	if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
 		/*
 		 * Page table corrupted: show pte and kill process.
 		 */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index d00523601913..f28745855772 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -571,13 +571,13 @@ int try_to_free_swap(struct page *page)
  * Free the swap entry like above, but also try to
  * free the page cache entry if it is the last user.
  */
-void free_swap_and_cache(swp_entry_t entry)
+int free_swap_and_cache(swp_entry_t entry)
 {
-	struct swap_info_struct * p;
+	struct swap_info_struct *p;
 	struct page *page = NULL;
 
 	if (is_migration_entry(entry))
-		return;
+		return 1;
 
 	p = swap_info_get(entry);
 	if (p) {
@@ -603,6 +603,7 @@ void free_swap_and_cache(swp_entry_t entry)
 		unlock_page(page);
 		page_cache_release(page);
 	}
+	return p != NULL;
 }
 
 #ifdef CONFIG_HIBERNATION
-- 
cgit v1.2.3


From edc315fd222497ae4f4b959a9e31ada1e68a4755 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 6 Jan 2009 14:40:11 -0800
Subject: badpage: remove vma from page_remove_rmap

Remove page_remove_rmap()'s vma arg, which was only for the Eeek message.
And remove the BUG_ON(page_mapcount(page) == 0) from CONFIG_DEBUG_VM's
page_dup_rmap(): we're trying to be more resilient about that than BUGs.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h | 2 +-
 mm/filemap_xip.c     | 2 +-
 mm/fremap.c          | 2 +-
 mm/memory.c          | 4 ++--
 mm/rmap.c            | 8 +++-----
 5 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 3593b18a07dd..b35bc0e19cd9 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -69,7 +69,7 @@ void __anon_vma_link(struct vm_area_struct *);
 void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_file_rmap(struct page *);
-void page_remove_rmap(struct page *, struct vm_area_struct *);
+void page_remove_rmap(struct page *);
 
 #ifdef CONFIG_DEBUG_VM
 void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address);
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index b5167dfb2f2d..0c04615651b7 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -193,7 +193,7 @@ retry:
 			/* Nuke the page table entry. */
 			flush_cache_page(vma, address, pte_pfn(*pte));
 			pteval = ptep_clear_flush_notify(vma, address, pte);
-			page_remove_rmap(page, vma);
+			page_remove_rmap(page);
 			dec_mm_counter(mm, file_rss);
 			BUG_ON(pte_dirty(pteval));
 			pte_unmap_unlock(pte, ptl);
diff --git a/mm/fremap.c b/mm/fremap.c
index 7d12ca70ef7b..62d5bbda921a 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -37,7 +37,7 @@ static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
 		if (page) {
 			if (pte_dirty(pte))
 				set_page_dirty(page);
-			page_remove_rmap(page, vma);
+			page_remove_rmap(page);
 			page_cache_release(page);
 			update_hiwater_rss(mm);
 			dec_mm_counter(mm, file_rss);
diff --git a/mm/memory.c b/mm/memory.c
index b273cc12b15d..0f9abbaf618c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -798,7 +798,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 					mark_page_accessed(page);
 				file_rss--;
 			}
-			page_remove_rmap(page, vma);
+			page_remove_rmap(page);
 			if (unlikely(page_mapcount(page) < 0))
 				print_bad_pte(vma, addr, ptent, page);
 			tlb_remove_page(tlb, page);
@@ -2023,7 +2023,7 @@ gotten:
 			 * mapcount is visible. So transitively, TLBs to
 			 * old page will be flushed before it can be reused.
 			 */
-			page_remove_rmap(old_page, vma);
+			page_remove_rmap(old_page);
 		}
 
 		/* Free the old page.. */
diff --git a/mm/rmap.c b/mm/rmap.c
index 32098255082e..ac4af8cffbf9 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -707,7 +707,6 @@ void page_add_file_rmap(struct page *page)
  */
 void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address)
 {
-	BUG_ON(page_mapcount(page) == 0);
 	if (PageAnon(page))
 		__page_check_anon_rmap(page, vma, address);
 	atomic_inc(&page->_mapcount);
@@ -717,11 +716,10 @@ void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long
 /**
  * page_remove_rmap - take down pte mapping from a page
  * @page: page to remove mapping from
- * @vma: the vm area in which the mapping is removed
  *
  * The caller needs to hold the pte lock.
  */
-void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
+void page_remove_rmap(struct page *page)
 {
 	if (atomic_add_negative(-1, &page->_mapcount)) {
 		/*
@@ -837,7 +835,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 		dec_mm_counter(mm, file_rss);
 
 
-	page_remove_rmap(page, vma);
+	page_remove_rmap(page);
 	page_cache_release(page);
 
 out_unmap:
@@ -952,7 +950,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
 		if (pte_dirty(pteval))
 			set_page_dirty(page);
 
-		page_remove_rmap(page, vma);
+		page_remove_rmap(page);
 		page_cache_release(page);
 		dec_mm_counter(mm, file_rss);
 		(*mapcount)--;
-- 
cgit v1.2.3


From 4f5a99d64c17470a784a6c68064207d82e3e74a5 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Tue, 6 Jan 2009 14:40:25 -0800
Subject: fs: remove WB_SYNC_HOLD

Remove WB_SYNC_HOLD.  The primary motiviation is the design of my
anti-starvation code for fsync.  It requires taking an inode lock over the
sync operation, so we could run into lock ordering problems with multiple
inodes.  It is possible to take a single global lock to solve the ordering
problem, but then that would prevent a future nice implementation of "sync
multiple inodes" based on lock order via inode address.

Seems like a backward step to remove this, but actually it is busted
anyway: we can't use the inode lists for data integrity wait: an inode can
be taken off the dirty lists but still be under writeback.  In order to
satisfy data integrity semantics, we should wait for it to finish
writeback, but if we only search the dirty lists, we'll miss it.

It would be possible to have a "writeback" list, for sys_sync, I suppose.
But why complicate things by prematurely optimise?  For unmounting, we
could avoid the "livelock avoidance" code, which would be easier, but
again premature IMO.

Fixing the existing data integrity problem will come next.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fs-writeback.c         | 12 ++----------
 include/linux/writeback.h |  1 -
 2 files changed, 2 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d0ff0b8cf309..d99601af9e48 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -421,9 +421,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
  * If we're a pdlfush thread, then implement pdflush collision avoidance
  * against the entire list.
  *
- * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so
- * that it can be located for waiting on in __writeback_single_inode().
- *
  * If `bdi' is non-zero then we're being asked to writeback a specific queue.
  * This function assumes that the blockdev superblock's inodes are backed by
  * a variety of queues, so all inodes are searched.  For other superblocks,
@@ -499,10 +496,6 @@ void generic_sync_sb_inodes(struct super_block *sb,
 		__iget(inode);
 		pages_skipped = wbc->pages_skipped;
 		__writeback_single_inode(inode, wbc);
-		if (wbc->sync_mode == WB_SYNC_HOLD) {
-			inode->dirtied_when = jiffies;
-			list_move(&inode->i_list, &sb->s_dirty);
-		}
 		if (current_is_pdflush())
 			writeback_release(bdi);
 		if (wbc->pages_skipped != pages_skipped) {
@@ -588,8 +581,7 @@ restart:
 
 /*
  * writeback and wait upon the filesystem's dirty inodes.  The caller will
- * do this in two passes - one to write, and one to wait.  WB_SYNC_HOLD is
- * used to park the written inodes on sb->s_dirty for the wait pass.
+ * do this in two passes - one to write, and one to wait.
  *
  * A finite limit is set on the number of pages which will be written.
  * To prevent infinite livelock of sys_sync().
@@ -600,7 +592,7 @@ restart:
 void sync_inodes_sb(struct super_block *sb, int wait)
 {
 	struct writeback_control wbc = {
-		.sync_mode	= wait ? WB_SYNC_ALL : WB_SYNC_HOLD,
+		.sync_mode	= wait ? WB_SYNC_ALL : WB_SYNC_NONE,
 		.range_start	= 0,
 		.range_end	= LLONG_MAX,
 	};
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index bb28c975c1d7..7300ecdc480c 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -30,7 +30,6 @@ static inline int task_is_pdflush(struct task_struct *task)
 enum writeback_sync_modes {
 	WB_SYNC_NONE,	/* Don't wait on anything */
 	WB_SYNC_ALL,	/* Wait on every mapping */
-	WB_SYNC_HOLD,	/* Hold the inode on sb_dirty for sys_sync() */
 };
 
 /*
-- 
cgit v1.2.3


From 856bf4d717feb8c55d4e2f817b71ebb70cfbc67b Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Tue, 6 Jan 2009 14:40:26 -0800
Subject: fs: sys_sync fix

s_syncing livelock avoidance was breaking data integrity guarantee of
sys_sync, by allowing sys_sync to skip writing or waiting for superblocks
if there is a concurrent sys_sync happening.

This livelock avoidance is much less important now that we don't have the
get_super_to_sync() call after every sb that we sync.  This was replaced
by __put_super_and_need_restart.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fs-writeback.c  | 20 +-------------------
 include/linux/fs.h |  1 -
 2 files changed, 1 insertion(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index a9ee474f9691..e5eaa62fd17f 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -652,18 +652,6 @@ void sync_inodes_sb(struct super_block *sb, int wait)
 	sync_sb_inodes(sb, &wbc);
 }
 
-/*
- * Rather lame livelock avoidance.
- */
-static void set_sb_syncing(int val)
-{
-	struct super_block *sb;
-	spin_lock(&sb_lock);
-	list_for_each_entry_reverse(sb, &super_blocks, s_list)
-		sb->s_syncing = val;
-	spin_unlock(&sb_lock);
-}
-
 /**
  * sync_inodes - writes all inodes to disk
  * @wait: wait for completion
@@ -690,9 +678,6 @@ static void __sync_inodes(int wait)
 	spin_lock(&sb_lock);
 restart:
 	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (sb->s_syncing)
-			continue;
-		sb->s_syncing = 1;
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 		down_read(&sb->s_umount);
@@ -710,13 +695,10 @@ restart:
 
 void sync_inodes(int wait)
 {
-	set_sb_syncing(0);
 	__sync_inodes(0);
 
-	if (wait) {
-		set_sb_syncing(0);
+	if (wait)
 		__sync_inodes(1);
-	}
 }
 
 /**
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fb59673c60b1..d7eba77f666e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1133,7 +1133,6 @@ struct super_block {
 	struct rw_semaphore	s_umount;
 	struct mutex		s_lock;
 	int			s_count;
-	int			s_syncing;
 	int			s_need_sync_fs;
 	atomic_t		s_active;
 #ifdef CONFIG_SECURITY
-- 
cgit v1.2.3


From 901608d9045146aec6f14a7777ea4b1501c379f0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 6 Jan 2009 14:40:29 -0800
Subject: mm: introduce get_mm_hiwater_xxx(), fix taskstats->hiwater_xxx
 accounting

xacct_add_tsk() relies on do_exit()->update_hiwater_xxx() and uses
mm->hiwater_xxx directly, this leads to 2 problems:

- taskstats_user_cmd() can call fill_pid()->xacct_add_tsk() at any
  moment before the task exits, so we should check the current values of
  rss/vm anyway.

- do_exit()->update_hiwater_xxx() calls are racy.  An exiting thread can
  be preempted right before mm->hiwater_xxx = new_val, and another thread
  can use A_LOT of memory and exit in between.  When the first thread
  resumes it can be the last thread in the thread group, in that case we
  report the wrong hiwater_xxx values which do not take A_LOT into
  account.

Introduce get_mm_hiwater_rss() and get_mm_hiwater_vm() helpers and change
xacct_add_tsk() to use them.  The first helper will also be used by
rusage->ru_maxrss accounting.

Kill do_exit()->update_hiwater_xxx() calls.  Unless we are going to
decrease rss/vm there is no point to update mm->hiwater_xxx, and nobody
can look at this mm_struct when exit_mmap() actually unmaps the memory.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Hugh Dickins <hugh@veritas.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 3 +++
 kernel/exit.c         | 5 +----
 kernel/tsacct.c       | 4 ++--
 mm/mmap.c             | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 38a3f4b15394..ea415136ac9e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -386,6 +386,9 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
 		(mm)->hiwater_vm = (mm)->total_vm;	\
 } while (0)
 
+#define get_mm_hiwater_rss(mm)	max((mm)->hiwater_rss, get_mm_rss(mm))
+#define get_mm_hiwater_vm(mm)	max((mm)->hiwater_vm, (mm)->total_vm)
+
 extern void set_dumpable(struct mm_struct *mm, int value);
 extern int get_dumpable(struct mm_struct *mm);
 
diff --git a/kernel/exit.c b/kernel/exit.c
index f923724ab3c9..c7740fa3252c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1051,10 +1051,7 @@ NORET_TYPE void do_exit(long code)
 				preempt_count());
 
 	acct_update_integrals(tsk);
-	if (tsk->mm) {
-		update_hiwater_rss(tsk->mm);
-		update_hiwater_vm(tsk->mm);
-	}
+
 	group_dead = atomic_dec_and_test(&tsk->signal->live);
 	if (group_dead) {
 		hrtimer_cancel(&tsk->signal->real_timer);
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 2dc06ab35716..43f891b05a4b 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -92,8 +92,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 	mm = get_task_mm(p);
 	if (mm) {
 		/* adjust to KB unit */
-		stats->hiwater_rss   = mm->hiwater_rss * PAGE_SIZE / KB;
-		stats->hiwater_vm    = mm->hiwater_vm * PAGE_SIZE / KB;
+		stats->hiwater_rss   = get_mm_hiwater_rss(mm) * PAGE_SIZE / KB;
+		stats->hiwater_vm    = get_mm_hiwater_vm(mm)  * PAGE_SIZE / KB;
 		mmput(mm);
 	}
 	stats->read_char	= p->ioac.rchar;
diff --git a/mm/mmap.c b/mm/mmap.c
index e4507b23e620..1f97d8aa9b05 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2102,7 +2102,7 @@ void exit_mmap(struct mm_struct *mm)
 	lru_add_drain();
 	flush_cache_mm(mm);
 	tlb = tlb_gather_mmu(mm, 1);
-	/* Don't update_hiwater_rss(mm) here, do_exit already did */
+	/* update_hiwater_rss(mm) here? but nobody should be looking */
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
-- 
cgit v1.2.3


From ea435467500612636f8f4fb639ff6e76b2496e4b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Tue, 6 Jan 2009 14:40:39 -0800
Subject: atomic_t: unify all arch definitions

The atomic_t type cannot currently be used in some header files because it
would create an include loop with asm/atomic.h.  Move the type definition
to linux/types.h to break the loop.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/atomic.h     |  9 +--------
 arch/arm/include/asm/atomic.h       |  3 +--
 arch/avr32/include/asm/atomic.h     |  2 +-
 arch/blackfin/include/asm/atomic.h  |  4 +---
 arch/cris/include/asm/atomic.h      |  4 +---
 arch/h8300/include/asm/atomic.h     |  3 ++-
 arch/ia64/include/asm/atomic.h      |  6 ------
 arch/m68knommu/include/asm/atomic.h |  2 +-
 arch/mips/include/asm/atomic.h      |  5 +----
 arch/parisc/include/asm/atomic.h    | 11 +++--------
 arch/powerpc/include/asm/atomic.h   |  4 +---
 arch/s390/include/asm/atomic.h      |  7 +------
 arch/sh/include/asm/atomic.h        |  7 +++----
 arch/sparc/include/asm/atomic_32.h  |  2 --
 arch/sparc/include/asm/atomic_64.h  |  3 ---
 arch/x86/include/asm/atomic_32.h    | 10 +---------
 arch/x86/include/asm/atomic_64.h    | 18 ++----------------
 include/asm-frv/atomic.h            |  4 ----
 include/asm-m32r/atomic.h           |  8 +-------
 include/asm-m68k/atomic.h           |  3 +--
 include/asm-mn10300/atomic.h        |  9 ---------
 include/asm-xtensa/atomic.h         |  3 +--
 include/linux/types.h               | 10 ++++++++++
 23 files changed, 33 insertions(+), 104 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index ca88e54dec93..62b363584b2b 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -1,6 +1,7 @@
 #ifndef _ALPHA_ATOMIC_H
 #define _ALPHA_ATOMIC_H
 
+#include <linux/types.h>
 #include <asm/barrier.h>
 #include <asm/system.h>
 
@@ -13,14 +14,6 @@
  */
 
 
-/*
- * Counter is volatile to make sure gcc doesn't try to be clever
- * and move things around on us. We need to use _exactly_ the address
- * the user gave us, not some alias that contains the same information.
- */
-typedef struct { volatile int counter; } atomic_t;
-typedef struct { volatile long counter; } atomic64_t;
-
 #define ATOMIC_INIT(i)		( (atomic_t) { (i) } )
 #define ATOMIC64_INIT(i)	( (atomic64_t) { (i) } )
 
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 325f881ccb50..ee99723b3a6c 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -12,10 +12,9 @@
 #define __ASM_ARM_ATOMIC_H
 
 #include <linux/compiler.h>
+#include <linux/types.h>
 #include <asm/system.h>
 
-typedef struct { volatile int counter; } atomic_t;
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 #ifdef __KERNEL__
diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h
index 7ef3862a73d0..318815107748 100644
--- a/arch/avr32/include/asm/atomic.h
+++ b/arch/avr32/include/asm/atomic.h
@@ -14,9 +14,9 @@
 #ifndef __ASM_AVR32_ATOMIC_H
 #define __ASM_AVR32_ATOMIC_H
 
+#include <linux/types.h>
 #include <asm/system.h>
 
-typedef struct { volatile int counter; } atomic_t;
 #define ATOMIC_INIT(i)  { (i) }
 
 #define atomic_read(v)		((v)->counter)
diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h
index 7cf508718605..25776c19064b 100644
--- a/arch/blackfin/include/asm/atomic.h
+++ b/arch/blackfin/include/asm/atomic.h
@@ -1,6 +1,7 @@
 #ifndef __ARCH_BLACKFIN_ATOMIC__
 #define __ARCH_BLACKFIN_ATOMIC__
 
+#include <linux/types.h>
 #include <asm/system.h>	/* local_irq_XXX() */
 
 /*
@@ -13,9 +14,6 @@
  * Tony Kou (tonyko@lineo.ca)   Lineo Inc.   2001
  */
 
-typedef struct {
-	int counter;
-} atomic_t;
 #define ATOMIC_INIT(i)	{ (i) }
 
 #define atomic_read(v)		((v)->counter)
diff --git a/arch/cris/include/asm/atomic.h b/arch/cris/include/asm/atomic.h
index f71ea686a2ea..5718dd8902a1 100644
--- a/arch/cris/include/asm/atomic.h
+++ b/arch/cris/include/asm/atomic.h
@@ -4,7 +4,7 @@
 #define __ASM_CRIS_ATOMIC__
 
 #include <linux/compiler.h>
-
+#include <linux/types.h>
 #include <asm/system.h>
 #include <arch/atomic.h>
 
@@ -13,8 +13,6 @@
  * resource counting etc..
  */
 
-typedef struct { volatile int counter; } atomic_t;
-
 #define ATOMIC_INIT(i)  { (i) }
 
 #define atomic_read(v) ((v)->counter)
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index b4cf0ea97ede..833186c8dc3b 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -1,12 +1,13 @@
 #ifndef __ARCH_H8300_ATOMIC__
 #define __ARCH_H8300_ATOMIC__
 
+#include <linux/types.h>
+
 /*
  * Atomic operations that C can't guarantee us.  Useful for
  * resource counting etc..
  */
 
-typedef struct { int counter; } atomic_t;
 #define ATOMIC_INIT(i)	{ (i) }
 
 #define atomic_read(v)		((v)->counter)
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 50c2b83fd5a0..d37292bd9875 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -17,12 +17,6 @@
 #include <asm/intrinsics.h>
 #include <asm/system.h>
 
-/*
- * On IA-64, counter must always be volatile to ensure that that the
- * memory accesses are ordered.
- */
-typedef struct { volatile __s32 counter; } atomic_t;
-typedef struct { volatile __s64 counter; } atomic64_t;
 
 #define ATOMIC_INIT(i)		((atomic_t) { (i) })
 #define ATOMIC64_INIT(i)	((atomic64_t) { (i) })
diff --git a/arch/m68knommu/include/asm/atomic.h b/arch/m68knommu/include/asm/atomic.h
index d5632a305dae..6bb674855a3f 100644
--- a/arch/m68knommu/include/asm/atomic.h
+++ b/arch/m68knommu/include/asm/atomic.h
@@ -1,6 +1,7 @@
 #ifndef __ARCH_M68KNOMMU_ATOMIC__
 #define __ARCH_M68KNOMMU_ATOMIC__
 
+#include <linux/types.h>
 #include <asm/system.h>
 
 /*
@@ -12,7 +13,6 @@
  * We do not have SMP m68k systems, so we don't have to deal with that.
  */
 
-typedef struct { int counter; } atomic_t;
 #define ATOMIC_INIT(i)	{ (i) }
 
 #define atomic_read(v)		((v)->counter)
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 1232be3885b0..c996c3b4d074 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -15,13 +15,12 @@
 #define _ASM_ATOMIC_H
 
 #include <linux/irqflags.h>
+#include <linux/types.h>
 #include <asm/barrier.h>
 #include <asm/cpu-features.h>
 #include <asm/war.h>
 #include <asm/system.h>
 
-typedef struct { volatile int counter; } atomic_t;
-
 #define ATOMIC_INIT(i)    { (i) }
 
 /*
@@ -404,8 +403,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 
 #ifdef CONFIG_64BIT
 
-typedef struct { volatile long counter; } atomic64_t;
-
 #define ATOMIC64_INIT(i)    { (i) }
 
 /*
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 57fcc4a5ebb4..edbfe25c5fc1 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -155,14 +155,11 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
 #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
 #endif
 
-/* Note that we need not lock read accesses - aligned word writes/reads
- * are atomic, so a reader never sees unconsistent values.
- *
- * Cache-line alignment would conflict with, for example, linux/module.h
+/*
+ * Note that we need not lock read accesses - aligned word writes/reads
+ * are atomic, so a reader never sees inconsistent values.
  */
 
-typedef struct { volatile int counter; } atomic_t;
-
 /* It's possible to reduce all atomic operations to either
  * __atomic_add_return, atomic_set and atomic_read (the latter
  * is there only for consistency).
@@ -260,8 +257,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 
 #ifdef CONFIG_64BIT
 
-typedef struct { volatile s64 counter; } atomic64_t;
-
 #define ATOMIC64_INIT(i) ((atomic64_t) { (i) })
 
 static __inline__ int
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 499be5bdd6fa..b401950f5259 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -5,7 +5,7 @@
  * PowerPC atomic operations
  */
 
-typedef struct { int counter; } atomic_t;
+#include <linux/types.h>
 
 #ifdef __KERNEL__
 #include <linux/compiler.h>
@@ -251,8 +251,6 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
 
 #ifdef __powerpc64__
 
-typedef struct { long counter; } atomic64_t;
-
 #define ATOMIC64_INIT(i)	{ (i) }
 
 static __inline__ long atomic64_read(const atomic64_t *v)
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 2d184655bc5d..de432f2de2d2 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -2,6 +2,7 @@
 #define __ARCH_S390_ATOMIC__
 
 #include <linux/compiler.h>
+#include <linux/types.h>
 
 /*
  *  include/asm-s390/atomic.h
@@ -23,9 +24,6 @@
  * S390 uses 'Compare And Swap' for atomicity in SMP enviroment
  */
 
-typedef struct {
-	int counter;
-} __attribute__ ((aligned (4))) atomic_t;
 #define ATOMIC_INIT(i)  { (i) }
 
 #ifdef __KERNEL__
@@ -149,9 +147,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 #undef __CS_LOOP
 
 #ifdef __s390x__
-typedef struct {
-	long long counter;
-} __attribute__ ((aligned (8))) atomic64_t;
 #define ATOMIC64_INIT(i)  { (i) }
 
 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index c043ef003028..6327ffbb1992 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -7,16 +7,15 @@
  *
  */
 
-typedef struct { volatile int counter; } atomic_t;
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/system.h>
 
 #define ATOMIC_INIT(i)	( (atomic_t) { (i) } )
 
 #define atomic_read(v)		((v)->counter)
 #define atomic_set(v,i)		((v)->counter = (i))
 
-#include <linux/compiler.h>
-#include <asm/system.h>
-
 #if defined(CONFIG_GUSA_RB)
 #include <asm/atomic-grb.h>
 #elif defined(CONFIG_CPU_SH4A)
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index 5c944b5a8040..ce465975a6a5 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -13,8 +13,6 @@
 
 #include <linux/types.h>
 
-typedef struct { volatile int counter; } atomic_t;
-
 #ifdef __KERNEL__
 
 #define ATOMIC_INIT(i)  { (i) }
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 5982c5ae7f07..a0a706492696 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -10,9 +10,6 @@
 #include <linux/types.h>
 #include <asm/system.h>
 
-typedef struct { volatile int counter; } atomic_t;
-typedef struct { volatile __s64 counter; } atomic64_t;
-
 #define ATOMIC_INIT(i)		{ (i) }
 #define ATOMIC64_INIT(i)	{ (i) }
 
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
index ad5b9f6ecddf..85b46fba4229 100644
--- a/arch/x86/include/asm/atomic_32.h
+++ b/arch/x86/include/asm/atomic_32.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_ATOMIC_32_H
 
 #include <linux/compiler.h>
+#include <linux/types.h>
 #include <asm/processor.h>
 #include <asm/cmpxchg.h>
 
@@ -10,15 +11,6 @@
  * resource counting etc..
  */
 
-/*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
- * not some alias that contains the same information.
- */
-typedef struct {
-	int counter;
-} atomic_t;
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 /**
diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h
index 279d2a731f3f..8c21731984da 100644
--- a/arch/x86/include/asm/atomic_64.h
+++ b/arch/x86/include/asm/atomic_64.h
@@ -1,25 +1,15 @@
 #ifndef _ASM_X86_ATOMIC_64_H
 #define _ASM_X86_ATOMIC_64_H
 
+#include <linux/types.h>
 #include <asm/alternative.h>
 #include <asm/cmpxchg.h>
 
-/* atomic_t should be 32 bit signed type */
-
 /*
  * Atomic operations that C can't guarantee us.  Useful for
  * resource counting etc..
  */
 
-/*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
- * not some alias that contains the same information.
- */
-typedef struct {
-	int counter;
-} atomic_t;
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 /**
@@ -191,11 +181,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 #define atomic_inc_return(v)  (atomic_add_return(1, v))
 #define atomic_dec_return(v)  (atomic_sub_return(1, v))
 
-/* An 64bit atomic type */
-
-typedef struct {
-	long counter;
-} atomic64_t;
+/* The 64-bit atomic type */
 
 #define ATOMIC64_INIT(i)	{ (i) }
 
diff --git a/include/asm-frv/atomic.h b/include/asm-frv/atomic.h
index 46d696b331e7..296c35cfb207 100644
--- a/include/asm-frv/atomic.h
+++ b/include/asm-frv/atomic.h
@@ -35,10 +35,6 @@
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
-typedef struct {
-	int counter;
-} atomic_t;
-
 #define ATOMIC_INIT(i)		{ (i) }
 #define atomic_read(v)		((v)->counter)
 #define atomic_set(v, i)	(((v)->counter) = (i))
diff --git a/include/asm-m32r/atomic.h b/include/asm-m32r/atomic.h
index 3a38ffe4a4f4..2eed30f84080 100644
--- a/include/asm-m32r/atomic.h
+++ b/include/asm-m32r/atomic.h
@@ -9,6 +9,7 @@
  *    Copyright (C) 2004  Hirokazu Takata <takata at linux-m32r.org>
  */
 
+#include <linux/types.h>
 #include <asm/assembler.h>
 #include <asm/system.h>
 
@@ -17,13 +18,6 @@
  * resource counting etc..
  */
 
-/*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
- * not some alias that contains the same information.
- */
-typedef struct { volatile int counter; } atomic_t;
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 /**
diff --git a/include/asm-m68k/atomic.h b/include/asm-m68k/atomic.h
index 4915294fea63..eb0ab9d4ee77 100644
--- a/include/asm-m68k/atomic.h
+++ b/include/asm-m68k/atomic.h
@@ -1,7 +1,7 @@
 #ifndef __ARCH_M68K_ATOMIC__
 #define __ARCH_M68K_ATOMIC__
 
-
+#include <linux/types.h>
 #include <asm/system.h>
 
 /*
@@ -13,7 +13,6 @@
  * We do not have SMP m68k systems, so we don't have to deal with that.
  */
 
-typedef struct { int counter; } atomic_t;
 #define ATOMIC_INIT(i)	{ (i) }
 
 #define atomic_read(v)		((v)->counter)
diff --git a/include/asm-mn10300/atomic.h b/include/asm-mn10300/atomic.h
index 27c9690b9574..bc064825f9b1 100644
--- a/include/asm-mn10300/atomic.h
+++ b/include/asm-mn10300/atomic.h
@@ -20,15 +20,6 @@
  * resource counting etc..
  */
 
-/*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
- * not some alias that contains the same information.
- */
-typedef struct {
-	int	counter;
-} atomic_t;
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 #ifdef __KERNEL__
diff --git a/include/asm-xtensa/atomic.h b/include/asm-xtensa/atomic.h
index b3b23540f14d..67ad67bed8c1 100644
--- a/include/asm-xtensa/atomic.h
+++ b/include/asm-xtensa/atomic.h
@@ -14,8 +14,7 @@
 #define _XTENSA_ATOMIC_H
 
 #include <linux/stringify.h>
-
-typedef struct { volatile int counter; } atomic_t;
+#include <linux/types.h>
 
 #ifdef __KERNEL__
 #include <asm/processor.h>
diff --git a/include/linux/types.h b/include/linux/types.h
index 121f349cb7ec..3b864f2d9560 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -195,6 +195,16 @@ typedef u32 phys_addr_t;
 
 typedef phys_addr_t resource_size_t;
 
+typedef struct {
+	volatile int counter;
+} atomic_t;
+
+#ifdef CONFIG_64BIT
+typedef struct {
+	volatile long counter;
+} atomic64_t;
+#endif
+
 struct ustat {
 	__kernel_daddr_t	f_tfree;
 	__kernel_ino_t		f_tinode;
-- 
cgit v1.2.3


From f1883f86dea84fe47a71a39fc1afccc005915ed8 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 6 Jan 2009 14:40:45 -0800
Subject: Remove remaining unwinder code

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Gabor Gombas <gombasg@sztaki.hu>
Cc: Jan Beulich <jbeulich@novell.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Ingo Molnar <mingo@elte.hu>,
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/unwind.h | 13 ---------
 arch/x86/kernel/traps.c       |  2 --
 include/linux/module.h        |  3 --
 include/linux/unwind.h        | 68 -------------------------------------------
 init/main.c                   |  3 --
 kernel/module.c               | 15 ----------
 lib/fault-inject.c            |  1 -
 7 files changed, 105 deletions(-)
 delete mode 100644 arch/x86/include/asm/unwind.h
 delete mode 100644 include/linux/unwind.h

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
deleted file mode 100644
index 8b064bd9c553..000000000000
--- a/arch/x86/include/asm/unwind.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef _ASM_X86_UNWIND_H
-#define _ASM_X86_UNWIND_H
-
-#define UNW_PC(frame) ((void)(frame), 0UL)
-#define UNW_SP(frame) ((void)(frame), 0UL)
-#define UNW_FP(frame) ((void)(frame), 0UL)
-
-static inline int arch_unw_user_mode(const void *info)
-{
-	return 0;
-}
-
-#endif /* _ASM_X86_UNWIND_H */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ce6650eb64e9..c9a666cdd3db 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -20,7 +20,6 @@
 #include <linux/module.h>
 #include <linux/ptrace.h>
 #include <linux/string.h>
-#include <linux/unwind.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/kexec.h>
@@ -51,7 +50,6 @@
 #include <asm/debugreg.h>
 #include <asm/atomic.h>
 #include <asm/system.h>
-#include <asm/unwind.h>
 #include <asm/traps.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
diff --git a/include/linux/module.h b/include/linux/module.h
index 3bfed013350b..03cb93d1865a 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -294,9 +294,6 @@ struct module
 	/* The size of the executable code in each section.  */
 	unsigned int init_text_size, core_text_size;
 
-	/* The handle returned from unwind_add_table. */
-	void *unwind_info;
-
 	/* Arch-specific module values */
 	struct mod_arch_specific arch;
 
diff --git a/include/linux/unwind.h b/include/linux/unwind.h
deleted file mode 100644
index 7760860fa170..000000000000
--- a/include/linux/unwind.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef _LINUX_UNWIND_H
-#define _LINUX_UNWIND_H
-
-/*
- * Copyright (C) 2002-2006 Novell, Inc.
- *	Jan Beulich <jbeulich@novell.com>
- * This code is released under version 2 of the GNU GPL.
- *
- * A simple API for unwinding kernel stacks.  This is used for
- * debugging and error reporting purposes.  The kernel doesn't need
- * full-blown stack unwinding with all the bells and whistles, so there
- * is not much point in implementing the full Dwarf2 unwind API.
- */
-
-struct module;
-
-struct unwind_frame_info {};
-
-static inline void unwind_init(void) {}
-static inline void unwind_setup(void) {}
-
-#ifdef CONFIG_MODULES
-
-static inline void *unwind_add_table(struct module *mod,
-                                     const void *table_start,
-                                     unsigned long table_size)
-{
-	return NULL;
-}
-
-static inline void unwind_remove_table(void *handle, int init_only)
-{
-}
-
-#endif
-
-static inline int unwind_init_frame_info(struct unwind_frame_info *info,
-                                         struct task_struct *tsk,
-                                         const struct pt_regs *regs)
-{
-	return -ENOSYS;
-}
-
-static inline int unwind_init_blocked(struct unwind_frame_info *info,
-                                      struct task_struct *tsk)
-{
-	return -ENOSYS;
-}
-
-static inline int unwind_init_running(struct unwind_frame_info *info,
-                                      asmlinkage int (*cb)(struct unwind_frame_info *,
-                                                           void *arg),
-                                      void *arg)
-{
-	return -ENOSYS;
-}
-
-static inline int unwind(struct unwind_frame_info *info)
-{
-	return -ENOSYS;
-}
-
-static inline int unwind_to_user(struct unwind_frame_info *info)
-{
-	return -ENOSYS;
-}
-
-#endif /* _LINUX_UNWIND_H */
diff --git a/init/main.c b/init/main.c
index 90926dadc20d..e119dd28dd7d 100644
--- a/init/main.c
+++ b/init/main.c
@@ -50,7 +50,6 @@
 #include <linux/rmap.h>
 #include <linux/mempolicy.h>
 #include <linux/key.h>
-#include <linux/unwind.h>
 #include <linux/buffer_head.h>
 #include <linux/page_cgroup.h>
 #include <linux/debug_locks.h>
@@ -537,7 +536,6 @@ asmlinkage void __init start_kernel(void)
 	 * Need to run as early as possible, to initialize the
 	 * lockdep hash:
 	 */
-	unwind_init();
 	lockdep_init();
 	debug_objects_early_init();
 	cgroup_init_early();
@@ -559,7 +557,6 @@ asmlinkage void __init start_kernel(void)
 	setup_arch(&command_line);
 	mm_init_owner(&init_mm, &init_task);
 	setup_command_line(command_line);
-	unwind_setup();
 	setup_per_cpu_areas();
 	setup_nr_cpu_ids();
 	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */
diff --git a/kernel/module.c b/kernel/module.c
index f47cce910f25..34b56cf06615 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -43,7 +43,6 @@
 #include <linux/device.h>
 #include <linux/string.h>
 #include <linux/mutex.h>
-#include <linux/unwind.h>
 #include <linux/rculist.h>
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -1449,8 +1448,6 @@ static void free_module(struct module *mod)
 	remove_sect_attrs(mod);
 	mod_kobject_remove(mod);
 
-	unwind_remove_table(mod->unwind_info, 0);
-
 	/* Arch-specific cleanup. */
 	module_arch_cleanup(mod);
 
@@ -1867,7 +1864,6 @@ static noinline struct module *load_module(void __user *umod,
 	unsigned int symindex = 0;
 	unsigned int strindex = 0;
 	unsigned int modindex, versindex, infoindex, pcpuindex;
-	unsigned int unwindex = 0;
 	unsigned int num_kp, num_mcount;
 	struct kernel_param *kp;
 	struct module *mod;
@@ -1957,9 +1953,6 @@ static noinline struct module *load_module(void __user *umod,
 	versindex = find_sec(hdr, sechdrs, secstrings, "__versions");
 	infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
 	pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);
-#ifdef ARCH_UNWIND_SECTION_NAME
-	unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME);
-#endif
 
 	/* Don't keep modinfo and version sections. */
 	sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -1969,8 +1962,6 @@ static noinline struct module *load_module(void __user *umod,
 	sechdrs[symindex].sh_flags |= SHF_ALLOC;
 	sechdrs[strindex].sh_flags |= SHF_ALLOC;
 #endif
-	if (unwindex)
-		sechdrs[unwindex].sh_flags |= SHF_ALLOC;
 
 	/* Check module struct version now, before we try to use module. */
 	if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -2267,11 +2258,6 @@ static noinline struct module *load_module(void __user *umod,
 	add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
 	add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
 
-	/* Size of section 0 is 0, so this works well if no unwind info. */
-	mod->unwind_info = unwind_add_table(mod,
-					    (void *)sechdrs[unwindex].sh_addr,
-					    sechdrs[unwindex].sh_size);
-
 	/* Get rid of temporary copy */
 	vfree(hdr);
 
@@ -2370,7 +2356,6 @@ sys_init_module(void __user *umod,
 	mutex_lock(&module_mutex);
 	/* Drop initial reference. */
 	module_put(mod);
-	unwind_remove_table(mod->unwind_info, 1);
 	module_free(mod, mod->module_init);
 	mod->module_init = NULL;
 	mod->init_size = 0;
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index a50a311554cc..f97af55bdd96 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -6,7 +6,6 @@
 #include <linux/fs.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/unwind.h>
 #include <linux/stacktrace.h>
 #include <linux/kallsyms.h>
 #include <linux/fault-inject.h>
-- 
cgit v1.2.3


From 9fe06081ef145d6582c39e18203b5fffe6f3abc2 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Tue, 6 Jan 2009 14:40:51 -0800
Subject: Create a DIV_ROUND_CLOSEST macro to do division with rounding

Create a helper macro to divide two numbers and round the result to the
nearest whole number.  This is a helper macro for hwmon drivers that want
to convert incoming sysfs values per standard hwmon practice, though the
macro itself can be used by anyone.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index ca9ff6411dfa..721984844c94 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -48,6 +48,12 @@ extern const char linux_proc_banner[];
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
+#define DIV_ROUND_CLOSEST(x, divisor)(			\
+{							\
+	typeof(divisor) __divisor = divisor;		\
+	(((x) + ((__divisor) / 2)) / (__divisor));	\
+}							\
+)
 
 #define _RET_IP_		(unsigned long)__builtin_return_address(0)
 #define _THIS_IP_  ({ __label__ __here; __here: (unsigned long)&&__here; })
-- 
cgit v1.2.3


From 5f820f648c92a5ecc771a96b3c29aa6e90013bba Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Tue, 6 Jan 2009 14:40:59 -0800
Subject: poll: allow f_op->poll to sleep

f_op->poll is the only vfs operation which is not allowed to sleep.  It's
because poll and select implementation used task state to synchronize
against wake ups, which doesn't have to be the case anymore as wait/wake
interface can now use custom wake up functions.  The non-sleep restriction
can be a bit tricky because ->poll is not called from an atomic context
and the result of accidentally sleeping in ->poll only shows up as
temporary busy looping when the timing is right or rather wrong.

This patch converts poll/select to use custom wake up function and use
separate triggered variable to synchronize against wake up events.  The
only added overhead is an extra function call during wake up and
negligible.

This patch removes the one non-sleep exception from vfs locking rules and
is beneficial to userland filesystem implementations like FUSE, 9p or
peculiar fs like spufs as it's very difficult for those to implement
non-sleeping poll method.

While at it, make the following cosmetic changes to make poll.h and
select.c checkpatch friendly.

* s/type * symbol/type *symbol/		   : three places in poll.h
* remove blank line before EXPORT_SYMBOL() : two places in select.c

Oleg: spotted missing barrier in poll_schedule_timeout()
Davide: spotted missing write barrier in pollwake()

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Ron Minnich <rminnich@sandia.gov>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: Brad Boyer <flar@allandria.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Roland McGrath <roland@redhat.com>
Cc: Mauro Carvalho Chehab <mchehab@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/Locking |  2 +-
 drivers/media/video/v4l1-compat.c |  4 +--
 fs/select.c                       | 76 +++++++++++++++++++++++++++++++--------
 include/linux/poll.h              | 15 ++++++--
 4 files changed, 76 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index ccec55394380..cfbfa15a46ba 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -397,7 +397,7 @@ prototypes:
 };
 
 locking rules:
-	All except ->poll() may block.
+	All may block.
 			BKL
 llseek:			no	(see below)
 read:			no
diff --git a/drivers/media/video/v4l1-compat.c b/drivers/media/video/v4l1-compat.c
index d450cab20be4..b617bf05e2d7 100644
--- a/drivers/media/video/v4l1-compat.c
+++ b/drivers/media/video/v4l1-compat.c
@@ -203,7 +203,6 @@ static int poll_one(struct file *file, struct poll_wqueues *pwq)
 	table = &pwq->pt;
 	for (;;) {
 		int mask;
-		set_current_state(TASK_INTERRUPTIBLE);
 		mask = file->f_op->poll(file, table);
 		if (mask & POLLIN)
 			break;
@@ -212,9 +211,8 @@ static int poll_one(struct file *file, struct poll_wqueues *pwq)
 			retval = -ERESTARTSYS;
 			break;
 		}
-		schedule();
+		poll_schedule(pwq, TASK_INTERRUPTIBLE);
 	}
-	set_current_state(TASK_RUNNING);
 	poll_freewait(pwq);
 	return retval;
 }
diff --git a/fs/select.c b/fs/select.c
index 87df51eadcf2..08b91beed806 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -109,11 +109,11 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
 void poll_initwait(struct poll_wqueues *pwq)
 {
 	init_poll_funcptr(&pwq->pt, __pollwait);
+	pwq->polling_task = current;
 	pwq->error = 0;
 	pwq->table = NULL;
 	pwq->inline_index = 0;
 }
-
 EXPORT_SYMBOL(poll_initwait);
 
 static void free_poll_entry(struct poll_table_entry *entry)
@@ -142,12 +142,10 @@ void poll_freewait(struct poll_wqueues *pwq)
 		free_page((unsigned long) old);
 	}
 }
-
 EXPORT_SYMBOL(poll_freewait);
 
-static struct poll_table_entry *poll_get_entry(poll_table *_p)
+static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
 {
-	struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt);
 	struct poll_table_page *table = p->table;
 
 	if (p->inline_index < N_INLINE_POLL_ENTRIES)
@@ -159,7 +157,6 @@ static struct poll_table_entry *poll_get_entry(poll_table *_p)
 		new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
 		if (!new_table) {
 			p->error = -ENOMEM;
-			__set_current_state(TASK_RUNNING);
 			return NULL;
 		}
 		new_table->entry = new_table->entries;
@@ -171,20 +168,75 @@ static struct poll_table_entry *poll_get_entry(poll_table *_p)
 	return table->entry++;
 }
 
+static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+	struct poll_wqueues *pwq = wait->private;
+	DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
+
+	/*
+	 * Although this function is called under waitqueue lock, LOCK
+	 * doesn't imply write barrier and the users expect write
+	 * barrier semantics on wakeup functions.  The following
+	 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
+	 * and is paired with set_mb() in poll_schedule_timeout.
+	 */
+	smp_wmb();
+	pwq->triggered = 1;
+
+	/*
+	 * Perform the default wake up operation using a dummy
+	 * waitqueue.
+	 *
+	 * TODO: This is hacky but there currently is no interface to
+	 * pass in @sync.  @sync is scheduled to be removed and once
+	 * that happens, wake_up_process() can be used directly.
+	 */
+	return default_wake_function(&dummy_wait, mode, sync, key);
+}
+
 /* Add a new entry */
 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
 				poll_table *p)
 {
-	struct poll_table_entry *entry = poll_get_entry(p);
+	struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
+	struct poll_table_entry *entry = poll_get_entry(pwq);
 	if (!entry)
 		return;
 	get_file(filp);
 	entry->filp = filp;
 	entry->wait_address = wait_address;
-	init_waitqueue_entry(&entry->wait, current);
+	init_waitqueue_func_entry(&entry->wait, pollwake);
+	entry->wait.private = pwq;
 	add_wait_queue(wait_address, &entry->wait);
 }
 
+int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
+			  ktime_t *expires, unsigned long slack)
+{
+	int rc = -EINTR;
+
+	set_current_state(state);
+	if (!pwq->triggered)
+		rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
+	__set_current_state(TASK_RUNNING);
+
+	/*
+	 * Prepare for the next iteration.
+	 *
+	 * The following set_mb() serves two purposes.  First, it's
+	 * the counterpart rmb of the wmb in pollwake() such that data
+	 * written before wake up is always visible after wake up.
+	 * Second, the full barrier guarantees that triggered clearing
+	 * doesn't pass event check of the next iteration.  Note that
+	 * this problem doesn't exist for the first iteration as
+	 * add_wait_queue() has full barrier semantics.
+	 */
+	set_mb(pwq->triggered, 0);
+
+	return rc;
+}
+EXPORT_SYMBOL(poll_schedule_timeout);
+
 /**
  * poll_select_set_timeout - helper function to setup the timeout value
  * @to:		pointer to timespec variable for the final timeout
@@ -340,8 +392,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
 	for (;;) {
 		unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
 
-		set_current_state(TASK_INTERRUPTIBLE);
-
 		inp = fds->in; outp = fds->out; exp = fds->ex;
 		rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;
 
@@ -411,10 +461,10 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
 			to = &expire;
 		}
 
-		if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
+		if (!poll_schedule_timeout(&table, TASK_INTERRUPTIBLE,
+					   to, slack))
 			timed_out = 1;
 	}
-	__set_current_state(TASK_RUNNING);
 
 	poll_freewait(&table);
 
@@ -666,7 +716,6 @@ static int do_poll(unsigned int nfds,  struct poll_list *list,
 	for (;;) {
 		struct poll_list *walk;
 
-		set_current_state(TASK_INTERRUPTIBLE);
 		for (walk = list; walk != NULL; walk = walk->next) {
 			struct pollfd * pfd, * pfd_end;
 
@@ -709,10 +758,9 @@ static int do_poll(unsigned int nfds,  struct poll_list *list,
 			to = &expire;
 		}
 
-		if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
+		if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack))
 			timed_out = 1;
 	}
-	__set_current_state(TASK_RUNNING);
 	return count;
 }
 
diff --git a/include/linux/poll.h b/include/linux/poll.h
index badd98ab06f6..8c24ef8d9976 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -46,9 +46,9 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
 }
 
 struct poll_table_entry {
-	struct file * filp;
+	struct file *filp;
 	wait_queue_t wait;
-	wait_queue_head_t * wait_address;
+	wait_queue_head_t *wait_address;
 };
 
 /*
@@ -56,7 +56,9 @@ struct poll_table_entry {
  */
 struct poll_wqueues {
 	poll_table pt;
-	struct poll_table_page * table;
+	struct poll_table_page *table;
+	struct task_struct *polling_task;
+	int triggered;
 	int error;
 	int inline_index;
 	struct poll_table_entry inline_entries[N_INLINE_POLL_ENTRIES];
@@ -64,6 +66,13 @@ struct poll_wqueues {
 
 extern void poll_initwait(struct poll_wqueues *pwq);
 extern void poll_freewait(struct poll_wqueues *pwq);
+extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
+				 ktime_t *expires, unsigned long slack);
+
+static inline int poll_schedule(struct poll_wqueues *pwq, int state)
+{
+	return poll_schedule_timeout(pwq, state, NULL, 0);
+}
 
 /*
  * Scaleable version of the fd_set.
-- 
cgit v1.2.3


From 179f7ebff6be45738c6e2fa68c8d2cc5c2c6308e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 6 Jan 2009 14:41:04 -0800
Subject: percpu_counter: FBC_BATCH should be a variable

For NR_CPUS >= 16 values, FBC_BATCH is 2*NR_CPUS

Considering more and more distros are using high NR_CPUS values, it makes
sense to use a more sensible value for FBC_BATCH, and get rid of NR_CPUS.

A sensible value is 2*num_online_cpus(), with a minimum value of 32 (This
minimum value helps branch prediction in __percpu_counter_add())

We already have a hotcpu notifier, so we can adjust FBC_BATCH dynamically.

We rename FBC_BATCH to percpu_counter_batch since its not a constant
anymore.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext4/ext4.h                 |  6 +++---
 fs/ext4/inode.c                |  2 +-
 include/linux/percpu_counter.h |  8 ++------
 lib/percpu_counter.c           | 18 ++++++++++++++----
 4 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b0537c827024..6c46c648430d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1225,11 +1225,11 @@ do {								\
 } while (0)
 
 #ifdef CONFIG_SMP
-/* Each CPU can accumulate FBC_BATCH blocks in their local
+/* Each CPU can accumulate percpu_counter_batch blocks in their local
  * counters. So we need to make sure we have free blocks more
- * than FBC_BATCH  * nr_cpu_ids. Also add a window of 4 times.
+ * than percpu_counter_batch  * nr_cpu_ids. Also add a window of 4 times.
  */
-#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
+#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
 #else
 #define EXT4_FREEBLOCKS_WATERMARK 0
 #endif
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 6702a49992a6..98d3fe7057ef 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2498,7 +2498,7 @@ static int ext4_nonda_switch(struct super_block *sb)
 	/*
 	 * switch to non delalloc mode if we are running low
 	 * on free block. The free block accounting via percpu
-	 * counters can get slightly wrong with FBC_BATCH getting
+	 * counters can get slightly wrong with percpu_counter_batch getting
 	 * accumulated on each CPU without updating global counters
 	 * Delalloc need an accurate free block accounting. So switch
 	 * to non delalloc when we are near to error range.
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 9007ccdfc112..99de7a31bab8 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -24,11 +24,7 @@ struct percpu_counter {
 	s32 *counters;
 };
 
-#if NR_CPUS >= 16
-#define FBC_BATCH	(NR_CPUS*2)
-#else
-#define FBC_BATCH	(NR_CPUS*4)
-#endif
+extern int percpu_counter_batch;
 
 int percpu_counter_init(struct percpu_counter *fbc, s64 amount);
 int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
@@ -39,7 +35,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc);
 
 static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 {
-	__percpu_counter_add(fbc, amount, FBC_BATCH);
+	__percpu_counter_add(fbc, amount, percpu_counter_batch);
 }
 
 static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index b255b939bc1b..a60bd8046095 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -9,10 +9,8 @@
 #include <linux/cpu.h>
 #include <linux/module.h>
 
-#ifdef CONFIG_HOTPLUG_CPU
 static LIST_HEAD(percpu_counters);
 static DEFINE_MUTEX(percpu_counters_lock);
-#endif
 
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
 {
@@ -111,13 +109,24 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
 }
 EXPORT_SYMBOL(percpu_counter_destroy);
 
-#ifdef CONFIG_HOTPLUG_CPU
+int percpu_counter_batch __read_mostly = 32;
+EXPORT_SYMBOL(percpu_counter_batch);
+
+static void compute_batch_value(void)
+{
+	int nr = num_online_cpus();
+
+	percpu_counter_batch = max(32, nr*2);
+}
+
 static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
 					unsigned long action, void *hcpu)
 {
+#ifdef CONFIG_HOTPLUG_CPU
 	unsigned int cpu;
 	struct percpu_counter *fbc;
 
+	compute_batch_value();
 	if (action != CPU_DEAD)
 		return NOTIFY_OK;
 
@@ -134,13 +143,14 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
 		spin_unlock_irqrestore(&fbc->lock, flags);
 	}
 	mutex_unlock(&percpu_counters_lock);
+#endif
 	return NOTIFY_OK;
 }
 
 static int __init percpu_counter_startup(void)
 {
+	compute_batch_value();
 	hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
 	return 0;
 }
 module_init(percpu_counter_startup);
-#endif
-- 
cgit v1.2.3


From 8c3659347efb43857b2c2d7bc63a9c7d68d1a608 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jj@chaosbits.net>
Date: Tue, 6 Jan 2009 14:41:14 -0800
Subject: include/linux/interrupt.h: do not include linux/irqnr.h twice

Signed-off-by: Jesper Juhl <jj@chaosbits.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/interrupt.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 0702c4d7bdf0..af886b26c9d1 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -14,7 +14,6 @@
 #include <linux/irqflags.h>
 #include <linux/smp.h>
 #include <linux/percpu.h>
-#include <linux/irqnr.h>
 
 #include <asm/atomic.h>
 #include <asm/ptrace.h>
-- 
cgit v1.2.3


From 5cf0cc4e670b8da2231a3375db87ec3b6cb84432 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Tue, 6 Jan 2009 14:41:38 -0800
Subject: binfmts.h: include list.h

linux_binfmt uses list_head, so list.h is needed.

[akpm@linux-foundation.org: fix `make headerscheck']
Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/binfmts.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 6cbfbe297180..0d0150b4901e 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -18,6 +18,7 @@ struct pt_regs;
 #define BINPRM_BUF_SIZE 128
 
 #ifdef __KERNEL__
+#include <linux/list.h>
 
 #define CORENAME_MAX_SIZE 128
 
-- 
cgit v1.2.3


From d29389de0b0ee1715333bafc6ac3f22a75aa4313 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Tue, 6 Jan 2009 14:41:41 -0800
Subject: spi_gpio driver

Generalize the old at91rm9200 "bootstrap" bitbanging SPI master driver as
"spi_gpio", so it works with arbitrary GPIOs and can be configured through
platform_data.  Such SPI masters support:

 - any number of bus instances (bus_num is the platform_device.id)
 - any number of chipselects (one GPIO per spi_device)
 - all four SPI_MODE values, and SPI_CS_HIGH
 - i/o word sizes from 1 to 32 bits;
 - devices configured as with any other spi_master controller

When configured using platform_data, this provides relatively low clock
rates.  On platforms that support inlined GPIO calls, significantly
improved transfer speeds are also possible with a semi-custom driver.
(It's still painful when accessing flash memory, but less so.)

Sanity checked by using this version to replace both native controllers on
a board with six different SPI slaves, relying on three different
SPI_MODE_* values and both SPI_CS_HIGH settings for correct operation.

[akpm@linux-foundation.org: cleanups]
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Acked-by: Magnus Damm <damm@igel.co.jp>
Tested-by: Magnus Damm <damm@igel.co.jp>
Cc: Torgil Svensson <torgil.svensson@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/Kconfig          |  18 ++-
 drivers/spi/Makefile         |   1 +
 drivers/spi/spi_gpio.c       | 360 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/spi_gpio.h |  60 ++++++++
 4 files changed, 438 insertions(+), 1 deletion(-)
 create mode 100644 drivers/spi/spi_gpio.c
 create mode 100644 include/linux/spi/spi_gpio.h

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index b9d0efb6803f..4a6fe01831a8 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -78,7 +78,7 @@ config SPI_AU1550
 	  will be called au1550_spi.
 
 config SPI_BITBANG
-	tristate "Bitbanging SPI master"
+	tristate "Utilities for Bitbanging SPI masters"
 	help
 	  With a few GPIO pins, your system can bitbang the SPI protocol.
 	  Select this to get SPI support through I/O pins (GPIO, parallel
@@ -100,6 +100,22 @@ config SPI_BUTTERFLY
 	  inexpensive battery powered microcontroller evaluation board.
 	  This same cable can be used to flash new firmware.
 
+config SPI_GPIO
+	tristate "GPIO-based bitbanging SPI Master"
+	depends on GENERIC_GPIO
+	select SPI_BITBANG
+	help
+	  This simple GPIO bitbanging SPI master uses the arch-neutral GPIO
+	  interface to manage MOSI, MISO, SCK, and chipselect signals.  SPI
+	  slaves connected to a bus using this driver are configured as usual,
+	  except that the spi_board_info.controller_data holds the GPIO number
+	  for the chipselect used by this controller driver.
+
+	  Note that this driver often won't achieve even 1 Mbit/sec speeds,
+	  making it unusually slow for SPI.  If your platform can inline
+	  GPIO operations, you should be able to leverage that for better
+	  speed with a custom version of this driver; see the source code.
+
 config SPI_IMX
 	tristate "Freescale iMX SPI controller"
 	depends on ARCH_IMX && EXPERIMENTAL
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index ccf18de34e1e..5e9f521b8844 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_SPI_BFIN)			+= spi_bfin5xx.o
 obj-$(CONFIG_SPI_BITBANG)		+= spi_bitbang.o
 obj-$(CONFIG_SPI_AU1550)		+= au1550_spi.o
 obj-$(CONFIG_SPI_BUTTERFLY)		+= spi_butterfly.o
+obj-$(CONFIG_SPI_GPIO)			+= spi_gpio.o
 obj-$(CONFIG_SPI_IMX)			+= spi_imx.o
 obj-$(CONFIG_SPI_LM70_LLP)		+= spi_lm70llp.o
 obj-$(CONFIG_SPI_PXA2XX)		+= pxa2xx_spi.o
diff --git a/drivers/spi/spi_gpio.c b/drivers/spi/spi_gpio.c
new file mode 100644
index 000000000000..49698cabc30d
--- /dev/null
+++ b/drivers/spi/spi_gpio.c
@@ -0,0 +1,360 @@
+/*
+ * spi_gpio.c - SPI master driver using generic bitbanged GPIO
+ *
+ * Copyright (C) 2006,2008 David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/spi_bitbang.h>
+#include <linux/spi/spi_gpio.h>
+
+
+/*
+ * This bitbanging SPI master driver should help make systems usable
+ * when a native hardware SPI engine is not available, perhaps because
+ * its driver isn't yet working or because the I/O pins it requires
+ * are used for other purposes.
+ *
+ * platform_device->driver_data ... points to spi_gpio
+ *
+ * spi->controller_state ... reserved for bitbang framework code
+ * spi->controller_data ... holds chipselect GPIO
+ *
+ * spi->master->dev.driver_data ... points to spi_gpio->bitbang
+ */
+
+struct spi_gpio {
+	struct spi_bitbang		bitbang;
+	struct spi_gpio_platform_data	pdata;
+	struct platform_device		*pdev;
+};
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Because the overhead of going through four GPIO procedure calls
+ * per transferred bit can make performance a problem, this code
+ * is set up so that you can use it in either of two ways:
+ *
+ *   - The slow generic way:  set up platform_data to hold the GPIO
+ *     numbers used for MISO/MOSI/SCK, and issue procedure calls for
+ *     each of them.  This driver can handle several such busses.
+ *
+ *   - The quicker inlined way:  only helps with platform GPIO code
+ *     that inlines operations for constant GPIOs.  This can give
+ *     you tight (fast!) inner loops, but each such bus needs a
+ *     new driver.  You'll define a new C file, with Makefile and
+ *     Kconfig support; the C code can be a total of six lines:
+ *
+ *		#define DRIVER_NAME	"myboard_spi2"
+ *		#define	SPI_MISO_GPIO	119
+ *		#define	SPI_MOSI_GPIO	120
+ *		#define	SPI_SCK_GPIO	121
+ *		#define	SPI_N_CHIPSEL	4
+ *		#include "spi_gpio.c"
+ */
+
+#ifndef DRIVER_NAME
+#define DRIVER_NAME	"spi_gpio"
+
+#define GENERIC_BITBANG	/* vs tight inlines */
+
+/* all functions referencing these symbols must define pdata */
+#define SPI_MISO_GPIO	((pdata)->miso)
+#define SPI_MOSI_GPIO	((pdata)->mosi)
+#define SPI_SCK_GPIO	((pdata)->sck)
+
+#define SPI_N_CHIPSEL	((pdata)->num_chipselect)
+
+#endif
+
+/*----------------------------------------------------------------------*/
+
+static inline const struct spi_gpio_platform_data * __pure
+spi_to_pdata(const struct spi_device *spi)
+{
+	const struct spi_bitbang	*bang;
+	const struct spi_gpio		*spi_gpio;
+
+	bang = spi_master_get_devdata(spi->master);
+	spi_gpio = container_of(bang, struct spi_gpio, bitbang);
+	return &spi_gpio->pdata;
+}
+
+/* this is #defined to avoid unused-variable warnings when inlining */
+#define pdata		spi_to_pdata(spi)
+
+static inline void setsck(const struct spi_device *spi, int is_on)
+{
+	gpio_set_value(SPI_SCK_GPIO, is_on);
+}
+
+static inline void setmosi(const struct spi_device *spi, int is_on)
+{
+	gpio_set_value(SPI_MOSI_GPIO, is_on);
+}
+
+static inline int getmiso(const struct spi_device *spi)
+{
+	return gpio_get_value(SPI_MISO_GPIO);
+}
+
+#undef pdata
+
+/*
+ * NOTE:  this clocks "as fast as we can".  It "should" be a function of the
+ * requested device clock.  Software overhead means we usually have trouble
+ * reaching even one Mbit/sec (except when we can inline bitops), so for now
+ * we'll just assume we never need additional per-bit slowdowns.
+ */
+#define spidelay(nsecs)	do {} while (0)
+
+#define	EXPAND_BITBANG_TXRX
+#include <linux/spi/spi_bitbang.h>
+
+/*
+ * These functions can leverage inline expansion of GPIO calls to shrink
+ * costs for a txrx bit, often by factors of around ten (by instruction
+ * count).  That is particularly visible for larger word sizes, but helps
+ * even with default 8-bit words.
+ *
+ * REVISIT overheads calling these functions for each word also have
+ * significant performance costs.  Having txrx_bufs() calls that inline
+ * the txrx_word() logic would help performance, e.g. on larger blocks
+ * used with flash storage or MMC/SD.  There should also be ways to make
+ * GCC be less stupid about reloading registers inside the I/O loops,
+ * even without inlined GPIO calls; __attribute__((hot)) on GCC 4.3?
+ */
+
+static u32 spi_gpio_txrx_word_mode0(struct spi_device *spi,
+		unsigned nsecs, u32 word, u8 bits)
+{
+	return bitbang_txrx_be_cpha0(spi, nsecs, 0, word, bits);
+}
+
+static u32 spi_gpio_txrx_word_mode1(struct spi_device *spi,
+		unsigned nsecs, u32 word, u8 bits)
+{
+	return bitbang_txrx_be_cpha1(spi, nsecs, 0, word, bits);
+}
+
+static u32 spi_gpio_txrx_word_mode2(struct spi_device *spi,
+		unsigned nsecs, u32 word, u8 bits)
+{
+	return bitbang_txrx_be_cpha0(spi, nsecs, 1, word, bits);
+}
+
+static u32 spi_gpio_txrx_word_mode3(struct spi_device *spi,
+		unsigned nsecs, u32 word, u8 bits)
+{
+	return bitbang_txrx_be_cpha1(spi, nsecs, 1, word, bits);
+}
+
+/*----------------------------------------------------------------------*/
+
+static void spi_gpio_chipselect(struct spi_device *spi, int is_active)
+{
+	unsigned long cs = (unsigned long) spi->controller_data;
+
+	/* set initial clock polarity */
+	if (is_active)
+		setsck(spi, spi->mode & SPI_CPOL);
+
+	/* SPI is normally active-low */
+	gpio_set_value(cs, (spi->mode & SPI_CS_HIGH) ? is_active : !is_active);
+}
+
+static int spi_gpio_setup(struct spi_device *spi)
+{
+	unsigned long	cs = (unsigned long) spi->controller_data;
+	int		status = 0;
+
+	if (spi->bits_per_word > 32)
+		return -EINVAL;
+
+	if (!spi->controller_state) {
+		status = gpio_request(cs, spi->dev.bus_id);
+		if (status)
+			return status;
+		status = gpio_direction_output(cs, spi->mode & SPI_CS_HIGH);
+	}
+	if (!status)
+		status = spi_bitbang_setup(spi);
+	if (status) {
+		if (!spi->controller_state)
+			gpio_free(cs);
+	}
+	return status;
+}
+
+static void spi_gpio_cleanup(struct spi_device *spi)
+{
+	unsigned long	cs = (unsigned long) spi->controller_data;
+
+	gpio_free(cs);
+	spi_bitbang_cleanup(spi);
+}
+
+static int __init spi_gpio_alloc(unsigned pin, const char *label, bool is_in)
+{
+	int value;
+
+	value = gpio_request(pin, label);
+	if (value == 0) {
+		if (is_in)
+			value = gpio_direction_input(pin);
+		else
+			value = gpio_direction_output(pin, 0);
+	}
+	return value;
+}
+
+static int __init
+spi_gpio_request(struct spi_gpio_platform_data *pdata, const char *label)
+{
+	int value;
+
+	/* NOTE:  SPI_*_GPIO symbols may reference "pdata" */
+
+	value = spi_gpio_alloc(SPI_MOSI_GPIO, label, false);
+	if (value)
+		goto done;
+
+	value = spi_gpio_alloc(SPI_MISO_GPIO, label, true);
+	if (value)
+		goto free_mosi;
+
+	value = spi_gpio_alloc(SPI_SCK_GPIO, label, false);
+	if (value)
+		goto free_miso;
+
+	goto done;
+
+free_miso:
+	gpio_free(SPI_MISO_GPIO);
+free_mosi:
+	gpio_free(SPI_MOSI_GPIO);
+done:
+	return value;
+}
+
+static int __init spi_gpio_probe(struct platform_device *pdev)
+{
+	int				status;
+	struct spi_master		*master;
+	struct spi_gpio			*spi_gpio;
+	struct spi_gpio_platform_data	*pdata;
+
+	pdata = pdev->dev.platform_data;
+#ifdef GENERIC_BITBANG
+	if (!pdata || !pdata->num_chipselect)
+		return -ENODEV;
+#endif
+
+	status = spi_gpio_request(pdata, dev_name(&pdev->dev));
+	if (status < 0)
+		return status;
+
+	master = spi_alloc_master(&pdev->dev, sizeof *spi_gpio);
+	if (!master) {
+		status = -ENOMEM;
+		goto gpio_free;
+	}
+	spi_gpio = spi_master_get_devdata(master);
+	platform_set_drvdata(pdev, spi_gpio);
+
+	spi_gpio->pdev = pdev;
+	if (pdata)
+		spi_gpio->pdata = *pdata;
+
+	master->bus_num = pdev->id;
+	master->num_chipselect = SPI_N_CHIPSEL;
+	master->setup = spi_gpio_setup;
+	master->cleanup = spi_gpio_cleanup;
+
+	spi_gpio->bitbang.master = spi_master_get(master);
+	spi_gpio->bitbang.chipselect = spi_gpio_chipselect;
+	spi_gpio->bitbang.txrx_word[SPI_MODE_0] = spi_gpio_txrx_word_mode0;
+	spi_gpio->bitbang.txrx_word[SPI_MODE_1] = spi_gpio_txrx_word_mode1;
+	spi_gpio->bitbang.txrx_word[SPI_MODE_2] = spi_gpio_txrx_word_mode2;
+	spi_gpio->bitbang.txrx_word[SPI_MODE_3] = spi_gpio_txrx_word_mode3;
+	spi_gpio->bitbang.setup_transfer = spi_bitbang_setup_transfer;
+	spi_gpio->bitbang.flags = SPI_CS_HIGH;
+
+	status = spi_bitbang_start(&spi_gpio->bitbang);
+	if (status < 0) {
+		spi_master_put(spi_gpio->bitbang.master);
+gpio_free:
+		gpio_free(SPI_MISO_GPIO);
+		gpio_free(SPI_MOSI_GPIO);
+		gpio_free(SPI_SCK_GPIO);
+		spi_master_put(master);
+	}
+
+	return status;
+}
+
+static int __exit spi_gpio_remove(struct platform_device *pdev)
+{
+	struct spi_gpio			*spi_gpio;
+	struct spi_gpio_platform_data	*pdata;
+	int				status;
+
+	spi_gpio = platform_get_drvdata(pdev);
+	pdata = pdev->dev.platform_data;
+
+	/* stop() unregisters child devices too */
+	status = spi_bitbang_stop(&spi_gpio->bitbang);
+	spi_master_put(spi_gpio->bitbang.master);
+
+	platform_set_drvdata(pdev, NULL);
+
+	gpio_free(SPI_MISO_GPIO);
+	gpio_free(SPI_MOSI_GPIO);
+	gpio_free(SPI_SCK_GPIO);
+
+	return status;
+}
+
+MODULE_ALIAS("platform:" DRIVER_NAME);
+
+static struct platform_driver spi_gpio_driver = {
+	.driver.name	= DRIVER_NAME,
+	.driver.owner	= THIS_MODULE,
+	.remove		= __exit_p(spi_gpio_remove),
+};
+
+static int __init spi_gpio_init(void)
+{
+	return platform_driver_probe(&spi_gpio_driver, spi_gpio_probe);
+}
+module_init(spi_gpio_init);
+
+static void __exit spi_gpio_exit(void)
+{
+	platform_driver_unregister(&spi_gpio_driver);
+}
+module_exit(spi_gpio_exit);
+
+
+MODULE_DESCRIPTION("SPI master driver using generic bitbanged GPIO ");
+MODULE_AUTHOR("David Brownell");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/spi/spi_gpio.h b/include/linux/spi/spi_gpio.h
new file mode 100644
index 000000000000..0f01a0f1f40c
--- /dev/null
+++ b/include/linux/spi/spi_gpio.h
@@ -0,0 +1,60 @@
+#ifndef __LINUX_SPI_GPIO_H
+#define __LINUX_SPI_GPIO_H
+
+/*
+ * For each bitbanged SPI bus, set up a platform_device node with:
+ *   - name "spi_gpio"
+ *   - id the same as the SPI bus number it implements
+ *   - dev.platform data pointing to a struct spi_gpio_platform_data
+ *
+ * Or, see the driver code for information about speedups that are
+ * possible on platforms that support inlined access for GPIOs (no
+ * spi_gpio_platform_data is used).
+ *
+ * Use spi_board_info with these busses in the usual way, being sure
+ * that the controller_data being the GPIO used for each device's
+ * chipselect:
+ *
+ *	static struct spi_board_info ... [] = {
+ *	...
+ *		// this slave uses GPIO 42 for its chipselect
+ *		.controller_data = (void *) 42,
+ *	...
+ *		// this one uses GPIO 86 for its chipselect
+ *		.controller_data = (void *) 86,
+ *	...
+ *	};
+ *
+ * If the bitbanged bus is later switched to a "native" controller,
+ * that platform_device and controller_data should be removed.
+ */
+
+/**
+ * struct spi_gpio_platform_data - parameter for bitbanged SPI master
+ * @sck: number of the GPIO used for clock output
+ * @mosi: number of the GPIO used for Master Output, Slave In (MOSI) data
+ * @miso: number of the GPIO used for Master Input, Slave Output (MISO) data
+ * @num_chipselect: how many slaves to allow
+ *
+ * All GPIO signals used with the SPI bus managed through this driver
+ * (chipselects, MOSI, MISO, SCK) must be configured as GPIOs, instead
+ * of some alternate function.
+ *
+ * It can be convenient to use this driver with pins that have alternate
+ * functions associated with a "native" SPI controller if a driver for that
+ * controller is not available, or is missing important functionality.
+ *
+ * On platforms which can do so, configure MISO with a weak pullup unless
+ * there's an external pullup on that signal.  That saves power by avoiding
+ * floating signals.  (A weak pulldown would save power too, but many
+ * drivers expect to see all-ones data as the no slave "response".)
+ */
+struct spi_gpio_platform_data {
+	unsigned	sck;
+	unsigned	mosi;
+	unsigned	miso;
+
+	u16		num_chipselect;
+};
+
+#endif /* __LINUX_SPI_GPIO_H */
-- 
cgit v1.2.3


From a06f6211ef9b1785922f9d0e8766d63ac4e66de1 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 6 Jan 2009 14:41:49 -0800
Subject: module: add within_module_core() and within_module_init()

This series of patches allows kprobes to probe module's __init and __exit
functions.  This means, you can probe driver initialization and
terminating.

Currently, kprobes can't probe __init function because these functions are
freed after module initialization.  And it also can't probe module __exit
functions because kprobe increments reference count of target module and
user can't unload it.  this means __exit functions never be called unless
removing probes from the module.

To solve both cases, this series of patches introduces GONE flag and sets
it when the target code is freed(for this purpose, kprobes hooks
MODULE_STATE_* events).  This also removes refcount incrementing for
allowing user to unload target module.  Users can check which probes are
GONE by debugfs interface.  For taking timing of freeing module's .init
text, these also include a patch which adds module's notifier of
MODULE_STATE_LIVE event.

This patch:

Add within_module_core() and within_module_init() for checking whether an
address is in the module .init.text section or .text section, and replace
within() local inline functions in kernel/module.c with them.

kprobes uses these functions to check where the kprobe is inserted.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/module.h | 12 ++++++++++++
 kernel/module.c        | 16 ++++++++--------
 2 files changed, 20 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 03cb93d1865a..4f7ea12463d3 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -365,6 +365,18 @@ struct module *module_text_address(unsigned long addr);
 struct module *__module_text_address(unsigned long addr);
 int is_module_address(unsigned long addr);
 
+static inline int within_module_core(unsigned long addr, struct module *mod)
+{
+	return (unsigned long)mod->module_core <= addr &&
+	       addr < (unsigned long)mod->module_core + mod->core_size;
+}
+
+static inline int within_module_init(unsigned long addr, struct module *mod)
+{
+	return (unsigned long)mod->module_init <= addr &&
+	       addr < (unsigned long)mod->module_init + mod->init_size;
+}
+
 /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
    symnum out of range. */
 int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
diff --git a/kernel/module.c b/kernel/module.c
index 34b56cf06615..cc79c942c572 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2390,7 +2390,7 @@ static const char *get_ksymbol(struct module *mod,
 	unsigned long nextval;
 
 	/* At worse, next value is at end of module */
-	if (within(addr, mod->module_init, mod->init_size))
+	if (within_module_init(addr, mod))
 		nextval = (unsigned long)mod->module_init+mod->init_text_size;
 	else
 		nextval = (unsigned long)mod->module_core+mod->core_text_size;
@@ -2438,8 +2438,8 @@ const char *module_address_lookup(unsigned long addr,
 
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
-		if (within(addr, mod->module_init, mod->init_size)
-		    || within(addr, mod->module_core, mod->core_size)) {
+		if (within_module_init(addr, mod) ||
+		    within_module_core(addr, mod)) {
 			if (modname)
 				*modname = mod->name;
 			ret = get_ksymbol(mod, addr, size, offset);
@@ -2461,8 +2461,8 @@ int lookup_module_symbol_name(unsigned long addr, char *symname)
 
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
-		if (within(addr, mod->module_init, mod->init_size) ||
-		    within(addr, mod->module_core, mod->core_size)) {
+		if (within_module_init(addr, mod) ||
+		    within_module_core(addr, mod)) {
 			const char *sym;
 
 			sym = get_ksymbol(mod, addr, NULL, NULL);
@@ -2485,8 +2485,8 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
 
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
-		if (within(addr, mod->module_init, mod->init_size) ||
-		    within(addr, mod->module_core, mod->core_size)) {
+		if (within_module_init(addr, mod) ||
+		    within_module_core(addr, mod)) {
 			const char *sym;
 
 			sym = get_ksymbol(mod, addr, size, offset);
@@ -2705,7 +2705,7 @@ int is_module_address(unsigned long addr)
 	preempt_disable();
 
 	list_for_each_entry_rcu(mod, &modules, list) {
-		if (within(addr, mod->module_core, mod->core_size)) {
+		if (within_module_core(addr, mod)) {
 			preempt_enable();
 			return 1;
 		}
-- 
cgit v1.2.3


From 129415607845d4daea11ddcba706005c69dcb942 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 6 Jan 2009 14:41:50 -0800
Subject: kprobes: add kprobe_insn_mutex and cleanup arch_remove_kprobe()

Add kprobe_insn_mutex for protecting kprobe_insn_pages hlist, and remove
kprobe_mutex from architecture dependent code.

This allows us to call arch_remove_kprobe() (and free_insn_slot) while
holding kprobe_mutex.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/kernel/kprobes.c     |  2 --
 arch/ia64/kernel/kprobes.c    |  8 +++++---
 arch/powerpc/kernel/kprobes.c |  7 ++++---
 arch/s390/kernel/kprobes.c    |  7 ++++---
 arch/x86/kernel/kprobes.c     |  7 ++++---
 include/linux/kprobes.h       |  1 -
 kernel/kprobes.c              | 25 +++++++++++++++++++++----
 7 files changed, 38 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 3f9abe0e9aff..f692efddd449 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -92,9 +92,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 	if (p->ainsn.insn) {
-		mutex_lock(&kprobe_mutex);
 		free_insn_slot(p->ainsn.insn, 0);
-		mutex_unlock(&kprobe_mutex);
 		p->ainsn.insn = NULL;
 	}
 }
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index f07688da947c..097b84d54e73 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -670,9 +670,11 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
-	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn, p->ainsn.inst_flag & INST_FLAG_BOOSTABLE);
-	mutex_unlock(&kprobe_mutex);
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn,
+			       p->ainsn.inst_flag & INST_FLAG_BOOSTABLE);
+		p->ainsn.insn = NULL;
+	}
 }
 /*
  * We are resuming execution after a single step fault, so the pt_regs
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index de79915452c8..989edcdf0297 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -96,9 +96,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
-	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn, 0);
-	mutex_unlock(&kprobe_mutex);
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn, 0);
+		p->ainsn.insn = NULL;
+	}
 }
 
 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 569079ec4ff0..9b92856632cf 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -218,9 +218,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
-	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn, 0);
-	mutex_unlock(&kprobe_mutex);
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn, 0);
+		p->ainsn.insn = NULL;
+	}
 }
 
 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 6c27679ec6aa..eead6f8f9218 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -376,9 +376,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
-	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
-	mutex_unlock(&kprobe_mutex);
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
+		p->ainsn.insn = NULL;
+	}
 }
 
 static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 497b1d1f7a05..b93e44ce2284 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -201,7 +201,6 @@ static inline int init_test_probes(void)
 }
 #endif /* CONFIG_KPROBES_SANITY_TEST */
 
-extern struct mutex kprobe_mutex;
 extern int arch_prepare_kprobe(struct kprobe *p);
 extern void arch_arm_kprobe(struct kprobe *p);
 extern void arch_disarm_kprobe(struct kprobe *p);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3afd354c46f1..29e87921437d 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -69,7 +69,7 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
 /* NOTE: change this value only with kprobe_mutex held */
 static bool kprobe_enabled;
 
-DEFINE_MUTEX(kprobe_mutex);		/* Protects kprobe_table */
+static DEFINE_MUTEX(kprobe_mutex);	/* Protects kprobe_table */
 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
 static struct {
 	spinlock_t lock ____cacheline_aligned_in_smp;
@@ -115,6 +115,7 @@ enum kprobe_slot_state {
 	SLOT_USED = 2,
 };
 
+static DEFINE_MUTEX(kprobe_insn_mutex);	/* Protects kprobe_insn_pages */
 static struct hlist_head kprobe_insn_pages;
 static int kprobe_garbage_slots;
 static int collect_garbage_slots(void);
@@ -144,10 +145,10 @@ loop_end:
 }
 
 /**
- * get_insn_slot() - Find a slot on an executable page for an instruction.
+ * __get_insn_slot() - Find a slot on an executable page for an instruction.
  * We allocate an executable page if there's no room on existing ones.
  */
-kprobe_opcode_t __kprobes *get_insn_slot(void)
+static kprobe_opcode_t __kprobes *__get_insn_slot(void)
 {
 	struct kprobe_insn_page *kip;
 	struct hlist_node *pos;
@@ -196,6 +197,15 @@ kprobe_opcode_t __kprobes *get_insn_slot(void)
 	return kip->insns;
 }
 
+kprobe_opcode_t __kprobes *get_insn_slot(void)
+{
+	kprobe_opcode_t *ret;
+	mutex_lock(&kprobe_insn_mutex);
+	ret = __get_insn_slot();
+	mutex_unlock(&kprobe_insn_mutex);
+	return ret;
+}
+
 /* Return 1 if all garbages are collected, otherwise 0. */
 static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
 {
@@ -226,9 +236,13 @@ static int __kprobes collect_garbage_slots(void)
 {
 	struct kprobe_insn_page *kip;
 	struct hlist_node *pos, *next;
+	int safety;
 
 	/* Ensure no-one is preepmted on the garbages */
-	if (check_safety() != 0)
+	mutex_unlock(&kprobe_insn_mutex);
+	safety = check_safety();
+	mutex_lock(&kprobe_insn_mutex);
+	if (safety != 0)
 		return -EAGAIN;
 
 	hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) {
@@ -251,6 +265,7 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
 	struct kprobe_insn_page *kip;
 	struct hlist_node *pos;
 
+	mutex_lock(&kprobe_insn_mutex);
 	hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) {
 		if (kip->insns <= slot &&
 		    slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
@@ -267,6 +282,8 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
 
 	if (dirty && ++kprobe_garbage_slots > INSNS_PER_PAGE)
 		collect_garbage_slots();
+
+	mutex_unlock(&kprobe_insn_mutex);
 }
 #endif
 
-- 
cgit v1.2.3


From e8386a0cb22f4a2d439384212c494ad0bda848fe Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@redhat.com>
Date: Tue, 6 Jan 2009 14:41:52 -0800
Subject: kprobes: support probing module __exit function

Allows kprobes to probe __exit routine.  This adds flags member to struct
kprobe.  When module is freed(kprobes hooks module_notifier to get this
event), kprobes which probe the functions in that module are set to "Gone"
flag to the flags member.  These "Gone" probes are never be enabled.
Users can check the GONE flag through debugfs.

This also removes mod_refcounted, because we couldn't free a module if
kprobe incremented the refcount of that module.

[akpm@linux-foundation.org: document some locking]
[mhiramat@redhat.com: bugfix: pass aggr_kprobe to arch_remove_kprobe]
[mhiramat@redhat.com: bugfix: release old_p's insn_slot before error return]
Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kprobes.txt |   5 +-
 include/linux/kprobes.h   |  14 +++-
 kernel/kprobes.c          | 159 ++++++++++++++++++++++++++++++++++------------
 3 files changed, 134 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index a79633d702bf..48b3de90eb1e 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -497,7 +497,10 @@ The first column provides the kernel address where the probe is inserted.
 The second column identifies the type of probe (k - kprobe, r - kretprobe
 and j - jprobe), while the third column specifies the symbol+offset of
 the probe. If the probed function belongs to a module, the module name
-is also specified.
+is also specified. Following columns show probe status. If the probe is on
+a virtual address that is no longer valid (module init sections, module
+virtual addresses that correspond to modules that've been unloaded),
+such probes are marked with [GONE].
 
 /debug/kprobes/enabled: Turn kprobes ON/OFF
 
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index b93e44ce2284..d6ea19e314bb 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -69,9 +69,6 @@ struct kprobe {
 	/* list of kprobes for multi-handler support */
 	struct list_head list;
 
-	/* Indicates that the corresponding module has been ref counted */
-	unsigned int mod_refcounted;
-
 	/*count the number of times this probe was temporarily disarmed */
 	unsigned long nmissed;
 
@@ -103,8 +100,19 @@ struct kprobe {
 
 	/* copy of the original instruction */
 	struct arch_specific_insn ainsn;
+
+	/* Indicates various status flags.  Protected by kprobe_mutex. */
+	u32 flags;
 };
 
+/* Kprobe status flags */
+#define KPROBE_FLAG_GONE	1 /* breakpoint has already gone */
+
+static inline int kprobe_gone(struct kprobe *p)
+{
+	return p->flags & KPROBE_FLAG_GONE;
+}
+
 /*
  * Special probe type that uses setjmp-longjmp type tricks to resume
  * execution at a specified entry with a matching prototype corresponding
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index a1e233a19586..cb732a9aa55f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -327,7 +327,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	struct kprobe *kp;
 
 	list_for_each_entry_rcu(kp, &p->list, list) {
-		if (kp->pre_handler) {
+		if (kp->pre_handler && !kprobe_gone(kp)) {
 			set_kprobe_instance(kp);
 			if (kp->pre_handler(kp, regs))
 				return 1;
@@ -343,7 +343,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
 	struct kprobe *kp;
 
 	list_for_each_entry_rcu(kp, &p->list, list) {
-		if (kp->post_handler) {
+		if (kp->post_handler && !kprobe_gone(kp)) {
 			set_kprobe_instance(kp);
 			kp->post_handler(kp, regs, flags);
 			reset_kprobe_instance();
@@ -545,9 +545,10 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
 	ap->addr = p->addr;
 	ap->pre_handler = aggr_pre_handler;
 	ap->fault_handler = aggr_fault_handler;
-	if (p->post_handler)
+	/* We don't care the kprobe which has gone. */
+	if (p->post_handler && !kprobe_gone(p))
 		ap->post_handler = aggr_post_handler;
-	if (p->break_handler)
+	if (p->break_handler && !kprobe_gone(p))
 		ap->break_handler = aggr_break_handler;
 
 	INIT_LIST_HEAD(&ap->list);
@@ -566,17 +567,41 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
 	int ret = 0;
 	struct kprobe *ap;
 
+	if (kprobe_gone(old_p)) {
+		/*
+		 * Attempting to insert new probe at the same location that
+		 * had a probe in the module vaddr area which already
+		 * freed. So, the instruction slot has already been
+		 * released. We need a new slot for the new probe.
+		 */
+		ret = arch_prepare_kprobe(old_p);
+		if (ret)
+			return ret;
+	}
 	if (old_p->pre_handler == aggr_pre_handler) {
 		copy_kprobe(old_p, p);
 		ret = add_new_kprobe(old_p, p);
+		ap = old_p;
 	} else {
 		ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
-		if (!ap)
+		if (!ap) {
+			if (kprobe_gone(old_p))
+				arch_remove_kprobe(old_p);
 			return -ENOMEM;
+		}
 		add_aggr_kprobe(ap, old_p);
 		copy_kprobe(ap, p);
 		ret = add_new_kprobe(ap, p);
 	}
+	if (kprobe_gone(old_p)) {
+		/*
+		 * If the old_p has gone, its breakpoint has been disarmed.
+		 * We have to arm it again after preparing real kprobes.
+		 */
+		ap->flags &= ~KPROBE_FLAG_GONE;
+		if (kprobe_enabled)
+			arch_arm_kprobe(ap);
+	}
 	return ret;
 }
 
@@ -639,8 +664,7 @@ static int __kprobes __register_kprobe(struct kprobe *p,
 		return -EINVAL;
 	}
 
-	p->mod_refcounted = 0;
-
+	p->flags = 0;
 	/*
 	 * Check if are we probing a module.
 	 */
@@ -649,16 +673,14 @@ static int __kprobes __register_kprobe(struct kprobe *p,
 		struct module *calling_mod;
 		calling_mod = __module_text_address(called_from);
 		/*
-		 * We must allow modules to probe themself and in this case
-		 * avoid incrementing the module refcount, so as to allow
-		 * unloading of self probing modules.
+		 * We must hold a refcount of the probed module while updating
+		 * its code to prohibit unexpected unloading.
 		 */
 		if (calling_mod != probed_mod) {
 			if (unlikely(!try_module_get(probed_mod))) {
 				preempt_enable();
 				return -EINVAL;
 			}
-			p->mod_refcounted = 1;
 		} else
 			probed_mod = NULL;
 	}
@@ -687,8 +709,9 @@ static int __kprobes __register_kprobe(struct kprobe *p,
 out:
 	mutex_unlock(&kprobe_mutex);
 
-	if (ret && probed_mod)
+	if (probed_mod)
 		module_put(probed_mod);
+
 	return ret;
 }
 
@@ -716,16 +739,16 @@ valid_p:
 	     list_is_singular(&old_p->list))) {
 		/*
 		 * Only probe on the hash list. Disarm only if kprobes are
-		 * enabled - otherwise, the breakpoint would already have
-		 * been removed. We save on flushing icache.
+		 * enabled and not gone - otherwise, the breakpoint would
+		 * already have been removed. We save on flushing icache.
 		 */
-		if (kprobe_enabled)
+		if (kprobe_enabled && !kprobe_gone(old_p))
 			arch_disarm_kprobe(p);
 		hlist_del_rcu(&old_p->hlist);
 	} else {
-		if (p->break_handler)
+		if (p->break_handler && !kprobe_gone(p))
 			old_p->break_handler = NULL;
-		if (p->post_handler) {
+		if (p->post_handler && !kprobe_gone(p)) {
 			list_for_each_entry_rcu(list_p, &old_p->list, list) {
 				if ((list_p != p) && (list_p->post_handler))
 					goto noclean;
@@ -740,27 +763,16 @@ noclean:
 
 static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
 {
-	struct module *mod;
 	struct kprobe *old_p;
 
-	if (p->mod_refcounted) {
-		/*
-		 * Since we've already incremented refcount,
-		 * we don't need to disable preemption.
-		 */
-		mod = module_text_address((unsigned long)p->addr);
-		if (mod)
-			module_put(mod);
-	}
-
-	if (list_empty(&p->list) || list_is_singular(&p->list)) {
-		if (!list_empty(&p->list)) {
-			/* "p" is the last child of an aggr_kprobe */
-			old_p = list_entry(p->list.next, struct kprobe, list);
-			list_del(&p->list);
-			kfree(old_p);
-		}
+	if (list_empty(&p->list))
 		arch_remove_kprobe(p);
+	else if (list_is_singular(&p->list)) {
+		/* "p" is the last child of an aggr_kprobe */
+		old_p = list_entry(p->list.next, struct kprobe, list);
+		list_del(&p->list);
+		arch_remove_kprobe(old_p);
+		kfree(old_p);
 	}
 }
 
@@ -1074,6 +1086,67 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
 
 #endif /* CONFIG_KRETPROBES */
 
+/* Set the kprobe gone and remove its instruction buffer. */
+static void __kprobes kill_kprobe(struct kprobe *p)
+{
+	struct kprobe *kp;
+	p->flags |= KPROBE_FLAG_GONE;
+	if (p->pre_handler == aggr_pre_handler) {
+		/*
+		 * If this is an aggr_kprobe, we have to list all the
+		 * chained probes and mark them GONE.
+		 */
+		list_for_each_entry_rcu(kp, &p->list, list)
+			kp->flags |= KPROBE_FLAG_GONE;
+		p->post_handler = NULL;
+		p->break_handler = NULL;
+	}
+	/*
+	 * Here, we can remove insn_slot safely, because no thread calls
+	 * the original probed function (which will be freed soon) any more.
+	 */
+	arch_remove_kprobe(p);
+}
+
+/* Module notifier call back, checking kprobes on the module */
+static int __kprobes kprobes_module_callback(struct notifier_block *nb,
+					     unsigned long val, void *data)
+{
+	struct module *mod = data;
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct kprobe *p;
+	unsigned int i;
+
+	if (val != MODULE_STATE_GOING)
+		return NOTIFY_DONE;
+
+	/*
+	 * module .text section will be freed. We need to
+	 * disable kprobes which have been inserted in the section.
+	 */
+	mutex_lock(&kprobe_mutex);
+	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
+		head = &kprobe_table[i];
+		hlist_for_each_entry_rcu(p, node, head, hlist)
+			if (within_module_core((unsigned long)p->addr, mod)) {
+				/*
+				 * The vaddr this probe is installed will soon
+				 * be vfreed buy not synced to disk. Hence,
+				 * disarming the breakpoint isn't needed.
+				 */
+				kill_kprobe(p);
+			}
+	}
+	mutex_unlock(&kprobe_mutex);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block kprobe_module_nb = {
+	.notifier_call = kprobes_module_callback,
+	.priority = 0
+};
+
 static int __init init_kprobes(void)
 {
 	int i, err = 0;
@@ -1130,6 +1203,9 @@ static int __init init_kprobes(void)
 	err = arch_init_kprobes();
 	if (!err)
 		err = register_die_notifier(&kprobe_exceptions_nb);
+	if (!err)
+		err = register_module_notifier(&kprobe_module_nb);
+
 	kprobes_initialized = (err == 0);
 
 	if (!err)
@@ -1150,10 +1226,12 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
 	else
 		kprobe_type = "k";
 	if (sym)
-		seq_printf(pi, "%p  %s  %s+0x%x  %s\n", p->addr, kprobe_type,
-			sym, offset, (modname ? modname : " "));
+		seq_printf(pi, "%p  %s  %s+0x%x  %s %s\n", p->addr, kprobe_type,
+			sym, offset, (modname ? modname : " "),
+			(kprobe_gone(p) ? "[GONE]" : ""));
 	else
-		seq_printf(pi, "%p  %s  %p\n", p->addr, kprobe_type, p->addr);
+		seq_printf(pi, "%p  %s  %p %s\n", p->addr, kprobe_type, p->addr,
+			(kprobe_gone(p) ? "[GONE]" : ""));
 }
 
 static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
@@ -1234,7 +1312,8 @@ static void __kprobes enable_all_kprobes(void)
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
 		hlist_for_each_entry_rcu(p, node, head, hlist)
-			arch_arm_kprobe(p);
+			if (!kprobe_gone(p))
+				arch_arm_kprobe(p);
 	}
 
 	kprobe_enabled = true;
@@ -1263,7 +1342,7 @@ static void __kprobes disable_all_kprobes(void)
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
 		hlist_for_each_entry_rcu(p, node, head, hlist) {
-			if (!arch_trampoline_kprobe(p))
+			if (!arch_trampoline_kprobe(p) && !kprobe_gone(p))
 				arch_disarm_kprobe(p);
 		}
 	}
-- 
cgit v1.2.3


From 730c9eeca9808fc2cfb506cc68c90aa330da17b0 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Tue, 6 Jan 2009 14:42:06 -0800
Subject: autofs4: improve parameter usage

The parameter usage in the device node ioctl code uses arg1 and arg2 as
parameter names.  This patch redefines the parameter names to reflect what
they actually are in an effort to make the code more readable.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/dev-ioctl.c         | 54 ++++++++++++++++--------------
 include/linux/auto_dev-ioctl.h | 75 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 100 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 63b7c7afe8df..054d6d9ad9ba 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -180,7 +180,7 @@ static int autofs_dev_ioctl_protover(struct file *fp,
 				     struct autofs_sb_info *sbi,
 				     struct autofs_dev_ioctl *param)
 {
-	param->arg1 = sbi->version;
+	param->protover.version = sbi->version;
 	return 0;
 }
 
@@ -189,7 +189,7 @@ static int autofs_dev_ioctl_protosubver(struct file *fp,
 					struct autofs_sb_info *sbi,
 					struct autofs_dev_ioctl *param)
 {
-	param->arg1 = sbi->sub_version;
+	param->protosubver.sub_version = sbi->sub_version;
 	return 0;
 }
 
@@ -335,13 +335,13 @@ static int autofs_dev_ioctl_openmount(struct file *fp,
 	int err, fd;
 
 	/* param->path has already been checked */
-	if (!param->arg1)
+	if (!param->openmount.devid)
 		return -EINVAL;
 
 	param->ioctlfd = -1;
 
 	path = param->path;
-	devid = param->arg1;
+	devid = param->openmount.devid;
 
 	err = 0;
 	fd = autofs_dev_ioctl_open_mountpoint(path, devid);
@@ -373,7 +373,7 @@ static int autofs_dev_ioctl_ready(struct file *fp,
 {
 	autofs_wqt_t token;
 
-	token = (autofs_wqt_t) param->arg1;
+	token = (autofs_wqt_t) param->ready.token;
 	return autofs4_wait_release(sbi, token, 0);
 }
 
@@ -388,8 +388,8 @@ static int autofs_dev_ioctl_fail(struct file *fp,
 	autofs_wqt_t token;
 	int status;
 
-	token = (autofs_wqt_t) param->arg1;
-	status = param->arg2 ? param->arg2 : -ENOENT;
+	token = (autofs_wqt_t) param->fail.token;
+	status = param->fail.status ? param->fail.status : -ENOENT;
 	return autofs4_wait_release(sbi, token, status);
 }
 
@@ -412,10 +412,10 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
 	int pipefd;
 	int err = 0;
 
-	if (param->arg1 == -1)
+	if (param->setpipefd.pipefd == -1)
 		return -EINVAL;
 
-	pipefd = param->arg1;
+	pipefd = param->setpipefd.pipefd;
 
 	mutex_lock(&sbi->wq_mutex);
 	if (!sbi->catatonic) {
@@ -457,8 +457,8 @@ static int autofs_dev_ioctl_timeout(struct file *fp,
 {
 	unsigned long timeout;
 
-	timeout = param->arg1;
-	param->arg1 = sbi->exp_timeout / HZ;
+	timeout = param->timeout.timeout;
+	param->timeout.timeout = sbi->exp_timeout / HZ;
 	sbi->exp_timeout = timeout * HZ;
 	return 0;
 }
@@ -489,7 +489,7 @@ static int autofs_dev_ioctl_requester(struct file *fp,
 	path = param->path;
 	devid = sbi->sb->s_dev;
 
-	param->arg1 = param->arg2 = -1;
+	param->requester.uid = param->requester.gid = -1;
 
 	/* Get nameidata of the parent directory */
 	err = path_lookup(path, LOOKUP_PARENT, &nd);
@@ -505,8 +505,8 @@ static int autofs_dev_ioctl_requester(struct file *fp,
 		err = 0;
 		autofs4_expire_wait(nd.path.dentry);
 		spin_lock(&sbi->fs_lock);
-		param->arg1 = ino->uid;
-		param->arg2 = ino->gid;
+		param->requester.uid = ino->uid;
+		param->requester.gid = ino->gid;
 		spin_unlock(&sbi->fs_lock);
 	}
 
@@ -529,7 +529,7 @@ static int autofs_dev_ioctl_expire(struct file *fp,
 	int err = -EAGAIN;
 	int how;
 
-	how = param->arg1;
+	how = param->expire.how;
 	mnt = fp->f_path.mnt;
 
 	if (sbi->type & AUTOFS_TYPE_TRIGGER)
@@ -565,9 +565,9 @@ static int autofs_dev_ioctl_askumount(struct file *fp,
 				      struct autofs_sb_info *sbi,
 				      struct autofs_dev_ioctl *param)
 {
-	param->arg1 = 0;
+	param->askumount.may_umount = 0;
 	if (may_umount(fp->f_path.mnt))
-		param->arg1 = 1;
+		param->askumount.may_umount = 1;
 	return 0;
 }
 
@@ -600,6 +600,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
 	struct nameidata nd;
 	const char *path;
 	unsigned int type;
+	unsigned int devid, magic;
 	int err = -ENOENT;
 
 	if (param->size <= sizeof(*param)) {
@@ -608,10 +609,10 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
 	}
 
 	path = param->path;
-	type = param->arg1;
+	type = param->ismountpoint.in.type;
 
-	param->arg1 = 0;
-	param->arg2 = 0;
+	param->ismountpoint.out.devid = devid = 0;
+	param->ismountpoint.out.magic = magic = 0;
 
 	if (!fp || param->ioctlfd == -1) {
 		if (type == AUTOFS_TYPE_ANY) {
@@ -622,7 +623,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
 				goto out;
 
 			sb = nd.path.dentry->d_sb;
-			param->arg1 = new_encode_dev(sb->s_dev);
+			devid = new_encode_dev(sb->s_dev);
 		} else {
 			struct autofs_info *ino;
 
@@ -635,14 +636,14 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
 				goto out_release;
 
 			ino = autofs4_dentry_ino(nd.path.dentry);
-			param->arg1 = autofs4_get_dev(ino->sbi);
+			devid = autofs4_get_dev(ino->sbi);
 		}
 
 		err = 0;
 		if (nd.path.dentry->d_inode &&
 		    nd.path.mnt->mnt_root == nd.path.dentry) {
 			err = 1;
-			param->arg2 = nd.path.dentry->d_inode->i_sb->s_magic;
+			magic = nd.path.dentry->d_inode->i_sb->s_magic;
 		}
 	} else {
 		dev_t devid = new_encode_dev(sbi->sb->s_dev);
@@ -655,18 +656,21 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
 		if (err)
 			goto out_release;
 
-		param->arg1 = autofs4_get_dev(sbi);
+		devid = autofs4_get_dev(sbi);
 
 		err = have_submounts(nd.path.dentry);
 
 		if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) {
 			if (follow_down(&nd.path.mnt, &nd.path.dentry)) {
 				struct inode *inode = nd.path.dentry->d_inode;
-				param->arg2 = inode->i_sb->s_magic;
+				magic = inode->i_sb->s_magic;
 			}
 		}
 	}
 
+	param->ismountpoint.out.devid = devid;
+	param->ismountpoint.out.magic = magic;
+
 out_release:
 	path_put(&nd.path);
 out:
diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h
index f4d05ccd731f..91a773993a5c 100644
--- a/include/linux/auto_dev-ioctl.h
+++ b/include/linux/auto_dev-ioctl.h
@@ -10,6 +10,7 @@
 #ifndef _LINUX_AUTO_DEV_IOCTL_H
 #define _LINUX_AUTO_DEV_IOCTL_H
 
+#include <linux/string.h>
 #include <linux/types.h>
 
 #define AUTOFS_DEVICE_NAME		"autofs"
@@ -25,6 +26,60 @@
  * An ioctl interface for autofs mount point control.
  */
 
+struct args_protover {
+	__u32	version;
+};
+
+struct args_protosubver {
+	__u32	sub_version;
+};
+
+struct args_openmount {
+	__u32	devid;
+};
+
+struct args_ready {
+	__u32	token;
+};
+
+struct args_fail {
+	__u32	token;
+	__s32	status;
+};
+
+struct args_setpipefd {
+	__s32	pipefd;
+};
+
+struct args_timeout {
+	__u64	timeout;
+};
+
+struct args_requester {
+	__u32	uid;
+	__u32	gid;
+};
+
+struct args_expire {
+	__u32	how;
+};
+
+struct args_askumount {
+	__u32	may_umount;
+};
+
+struct args_ismountpoint {
+	union {
+		struct args_in {
+			__u32	type;
+		} in;
+		struct args_out {
+			__u32	devid;
+			__u32	magic;
+		} out;
+	};
+};
+
 /*
  * All the ioctls use this structure.
  * When sending a path size must account for the total length
@@ -39,20 +94,32 @@ struct autofs_dev_ioctl {
 				 * including this struct */
 	__s32 ioctlfd;		/* automount command fd */
 
-	__u32 arg1;		/* Command parameters */
-	__u32 arg2;
+	/* Command parameters */
+
+	union {
+		struct args_protover		protover;
+		struct args_protosubver		protosubver;
+		struct args_openmount		openmount;
+		struct args_ready		ready;
+		struct args_fail		fail;
+		struct args_setpipefd		setpipefd;
+		struct args_timeout		timeout;
+		struct args_requester		requester;
+		struct args_expire		expire;
+		struct args_askumount		askumount;
+		struct args_ismountpoint	ismountpoint;
+	};
 
 	char path[0];
 };
 
 static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in)
 {
+	memset(in, 0, sizeof(struct autofs_dev_ioctl));
 	in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
 	in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
 	in->size = sizeof(struct autofs_dev_ioctl);
 	in->ioctlfd = -1;
-	in->arg1 = 0;
-	in->arg2 = 0;
 	return;
 }
 
-- 
cgit v1.2.3


From a92daf6ba1f9ace8584edc8eb557a77aa7c2c71d Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Tue, 6 Jan 2009 14:42:08 -0800
Subject: autofs4: make autofs type usage explicit

- the type assigned at mount when no type is given is changed
  from 0 to AUTOFS_TYPE_INDIRECT. This was done because 0 and
  AUTOFS_TYPE_INDIRECT were being treated implicitly as the same
  type.

- previously, an offset mount had it's type set to
  AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET but the mount control
  re-implementation needs to be able distinguish all three types.
  So this was changed to make the type setting explicit.

- a type AUTOFS_TYPE_ANY was added for use by the re-implementation
  when checking if a given path is a mountpoint. It's not really a
  type as we use this to ask if a given path is a mountpoint in the
  autofs_dev_ioctl_ismountpoint() function.

- functions to set and test the autofs mount types have been added to
  improve readability and make the type usage explicit.

- the mount type is used from user space for the mount control
  re-implementtion so, for consistency, all the definitions have
  been moved to the user space include file include/linux/auto_fs4.h.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/autofs_i.h    |  2 --
 fs/autofs4/dev-ioctl.c   |  4 ++--
 fs/autofs4/expire.c      |  4 ++--
 fs/autofs4/inode.c       | 14 +++++------
 fs/autofs4/waitq.c       |  8 +++----
 include/linux/auto_fs4.h | 62 ++++++++++++++++++++++++++++++++++++++++++++----
 6 files changed, 73 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index e0f16da00e54..a76803108d06 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -25,8 +25,6 @@
 #define AUTOFS_DEV_IOCTL_IOC_FIRST	(AUTOFS_DEV_IOCTL_VERSION)
 #define AUTOFS_DEV_IOCTL_IOC_COUNT	(AUTOFS_IOC_COUNT - 11)
 
-#define AUTOFS_TYPE_TRIGGER	(AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET)
-
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/time.h>
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 0566ff8db4cd..aa3f47293bb8 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -532,7 +532,7 @@ static int autofs_dev_ioctl_expire(struct file *fp,
 	how = param->expire.how;
 	mnt = fp->f_path.mnt;
 
-	if (sbi->type & AUTOFS_TYPE_TRIGGER)
+	if (autofs_type_trigger(sbi->type))
 		dentry = autofs4_expire_direct(sbi->sb, mnt, sbi, how);
 	else
 		dentry = autofs4_expire_indirect(sbi->sb, mnt, sbi, how);
@@ -615,7 +615,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
 	param->ismountpoint.out.magic = magic = 0;
 
 	if (!fp || param->ioctlfd == -1) {
-		if (type == AUTOFS_TYPE_ANY) {
+		if (autofs_type_any(type)) {
 			struct super_block *sb;
 
 			err = path_lookup(path, LOOKUP_FOLLOW, &nd);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 4b6fb3f628c0..e3bd50776f9e 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -63,7 +63,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 		struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 
 		/* This is an autofs submount, we can't expire it */
-		if (sbi->type == AUTOFS_TYPE_INDIRECT)
+		if (autofs_type_indirect(sbi->type))
 			goto done;
 
 		/*
@@ -490,7 +490,7 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 	if (arg && get_user(do_now, arg))
 		return -EFAULT;
 
-	if (sbi->type & AUTOFS_TYPE_TRIGGER)
+	if (autofs_type_trigger(sbi->type))
 		dentry = autofs4_expire_direct(sb, mnt, sbi, do_now);
 	else
 		dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index cfc23e53b6f4..716e12b627b2 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -197,9 +197,9 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
 	seq_printf(m, ",minproto=%d", sbi->min_proto);
 	seq_printf(m, ",maxproto=%d", sbi->max_proto);
 
-	if (sbi->type & AUTOFS_TYPE_OFFSET)
+	if (autofs_type_offset(sbi->type))
 		seq_printf(m, ",offset");
-	else if (sbi->type & AUTOFS_TYPE_DIRECT)
+	else if (autofs_type_direct(sbi->type))
 		seq_printf(m, ",direct");
 	else
 		seq_printf(m, ",indirect");
@@ -284,13 +284,13 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
 			*maxproto = option;
 			break;
 		case Opt_indirect:
-			*type = AUTOFS_TYPE_INDIRECT;
+			set_autofs_type_indirect(type);
 			break;
 		case Opt_direct:
-			*type = AUTOFS_TYPE_DIRECT;
+			set_autofs_type_direct(type);
 			break;
 		case Opt_offset:
-			*type = AUTOFS_TYPE_OFFSET;
+			set_autofs_type_offset(type);
 			break;
 		default:
 			return 1;
@@ -338,7 +338,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
-	sbi->type = AUTOFS_TYPE_INDIRECT;
+	set_autofs_type_indirect(&sbi->type);
 	sbi->min_proto = 0;
 	sbi->max_proto = 0;
 	mutex_init(&sbi->wq_mutex);
@@ -380,7 +380,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	}
 
 	root_inode->i_fop = &autofs4_root_operations;
-	root_inode->i_op = sbi->type & AUTOFS_TYPE_TRIGGER ?
+	root_inode->i_op = autofs_type_trigger(sbi->type) ?
 			&autofs4_direct_root_inode_operations :
 			&autofs4_indirect_root_inode_operations;
 
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index e02cc8ae5eb3..eeb246845909 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -337,7 +337,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		 * is very similar for indirect mounts except only dentrys
 		 * in the root of the autofs file system may be negative.
 		 */
-		if (sbi->type & AUTOFS_TYPE_TRIGGER)
+		if (autofs_type_trigger(sbi->type))
 			return -ENOENT;
 		else if (!IS_ROOT(dentry->d_parent))
 			return -ENOENT;
@@ -348,7 +348,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		return -ENOMEM;
 
 	/* If this is a direct mount request create a dummy name */
-	if (IS_ROOT(dentry) && sbi->type & AUTOFS_TYPE_TRIGGER)
+	if (IS_ROOT(dentry) && autofs_type_trigger(sbi->type))
 		qstr.len = sprintf(name, "%p", dentry);
 	else {
 		qstr.len = autofs4_getpath(sbi, dentry, &name);
@@ -406,11 +406,11 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 				type = autofs_ptype_expire_multi;
 		} else {
 			if (notify == NFY_MOUNT)
-				type = (sbi->type & AUTOFS_TYPE_TRIGGER) ?
+				type = autofs_type_trigger(sbi->type) ?
 					autofs_ptype_missing_direct :
 					 autofs_ptype_missing_indirect;
 			else
-				type = (sbi->type & AUTOFS_TYPE_TRIGGER) ?
+				type = autofs_type_trigger(sbi->type) ?
 					autofs_ptype_expire_direct :
 					autofs_ptype_expire_indirect;
 		}
diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h
index 2253716d4b92..55fa478bd639 100644
--- a/include/linux/auto_fs4.h
+++ b/include/linux/auto_fs4.h
@@ -29,10 +29,64 @@
 #define AUTOFS_EXP_IMMEDIATE		1
 #define AUTOFS_EXP_LEAVES		2
 
-#define AUTOFS_TYPE_ANY			0x0000
-#define AUTOFS_TYPE_INDIRECT		0x0001
-#define AUTOFS_TYPE_DIRECT		0x0002
-#define AUTOFS_TYPE_OFFSET		0x0004
+#define AUTOFS_TYPE_ANY			0U
+#define AUTOFS_TYPE_INDIRECT		1U
+#define AUTOFS_TYPE_DIRECT		2U
+#define AUTOFS_TYPE_OFFSET		4U
+
+static inline void set_autofs_type_indirect(unsigned int *type)
+{
+	*type = AUTOFS_TYPE_INDIRECT;
+	return;
+}
+
+static inline unsigned int autofs_type_indirect(unsigned int type)
+{
+	return (type == AUTOFS_TYPE_INDIRECT);
+}
+
+static inline void set_autofs_type_direct(unsigned int *type)
+{
+	*type = AUTOFS_TYPE_DIRECT;
+	return;
+}
+
+static inline unsigned int autofs_type_direct(unsigned int type)
+{
+	return (type == AUTOFS_TYPE_DIRECT);
+}
+
+static inline void set_autofs_type_offset(unsigned int *type)
+{
+	*type = AUTOFS_TYPE_OFFSET;
+	return;
+}
+
+static inline unsigned int autofs_type_offset(unsigned int type)
+{
+	return (type == AUTOFS_TYPE_OFFSET);
+}
+
+static inline unsigned int autofs_type_trigger(unsigned int type)
+{
+	return (type == AUTOFS_TYPE_DIRECT || type == AUTOFS_TYPE_OFFSET);
+}
+
+/*
+ * This isn't really a type as we use it to say "no type set" to
+ * indicate we want to search for "any" mount in the
+ * autofs_dev_ioctl_ismountpoint() device ioctl function.
+ */
+static inline void set_autofs_type_any(unsigned int *type)
+{
+	*type = AUTOFS_TYPE_ANY;
+	return;
+}
+
+static inline unsigned int autofs_type_any(unsigned int type)
+{
+	return (type == AUTOFS_TYPE_ANY);
+}
 
 /* Daemon notification packet types */
 enum autofs_notify {
-- 
cgit v1.2.3


From cabb3fc4bd1628c37c37e054960eb3e4bf30dc26 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Tue, 6 Jan 2009 14:42:26 -0800
Subject: twl4030-gpio: cleanup debounce

Provide a static debounce configuration mechanism for twl4030 GPIOs,
replacing the previous dynamic one.  The single user of that mechanism was
for MMC card detect debouncing.

Boards can provide a bitmask saying which GPIOs to debounce (30 msec).
It's always enabled for pins with the MMC card-detect/VMMCx link active,
so most boards won't need to set the debounce mask.

This is a net code shrink, including runtime footprint.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/twl4030-gpio.c | 54 +++++++++++++++++++--------------------------
 include/linux/i2c/twl4030.h |  9 +++-----
 2 files changed, 26 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/twl4030-gpio.c b/drivers/gpio/twl4030-gpio.c
index 37d3eec8730a..afad14792141 100644
--- a/drivers/gpio/twl4030-gpio.c
+++ b/drivers/gpio/twl4030-gpio.c
@@ -202,37 +202,6 @@ static int twl4030_get_gpio_datain(int gpio)
 	return ret;
 }
 
-/*
- * Configure debounce timing value for a GPIO pin on TWL4030
- */
-int twl4030_set_gpio_debounce(int gpio, int enable)
-{
-	u8 d_bnk = gpio >> 3;
-	u8 d_msk = BIT(gpio & 0x7);
-	u8 reg = 0;
-	u8 base = 0;
-	int ret = 0;
-
-	if (unlikely((gpio >= TWL4030_GPIO_MAX)
-		|| !(gpio_usage_count & BIT(gpio))))
-		return -EPERM;
-
-	base = REG_GPIO_DEBEN1 + d_bnk;
-	mutex_lock(&gpio_lock);
-	ret = gpio_twl4030_read(base);
-	if (ret >= 0) {
-		if (enable)
-			reg = ret | d_msk;
-		else
-			reg = ret & ~d_msk;
-
-		ret = gpio_twl4030_write(base, reg);
-	}
-	mutex_unlock(&gpio_lock);
-	return ret;
-}
-EXPORT_SYMBOL(twl4030_set_gpio_debounce);
-
 /*----------------------------------------------------------------------*/
 
 static int twl_request(struct gpio_chip *chip, unsigned offset)
@@ -405,6 +374,23 @@ static int __devinit gpio_twl4030_pulls(u32 ups, u32 downs)
 				REG_GPIOPUPDCTR1, 5);
 }
 
+static int __devinit gpio_twl4030_debounce(u32 debounce, u8 mmc_cd)
+{
+	u8		message[4];
+
+	/* 30 msec of debouncing is always used for MMC card detect,
+	 * and is optional for everything else.
+	 */
+	message[1] = (debounce & 0xff) | (mmc_cd & 0x03);
+	debounce >>= 8;
+	message[2] = (debounce & 0xff);
+	debounce >>= 8;
+	message[3] = (debounce & 0x03);
+
+	return twl4030_i2c_write(TWL4030_MODULE_GPIO, message,
+				REG_GPIO_DEBEN1, 3);
+}
+
 static int gpio_twl4030_remove(struct platform_device *pdev);
 
 static int __devinit gpio_twl4030_probe(struct platform_device *pdev)
@@ -439,6 +425,12 @@ no_irqs:
 				pdata->pullups, pdata->pulldowns,
 				ret);
 
+	ret = gpio_twl4030_debounce(pdata->debounce, pdata->mmc_cd);
+	if (ret)
+		dev_dbg(&pdev->dev, "debounce %.03x %.01x --> %d\n",
+				pdata->debounce, pdata->mmc_cd,
+				ret);
+
 	twl_gpiochip.base = pdata->gpio_base;
 	twl_gpiochip.ngpio = TWL4030_GPIO_MAX;
 	twl_gpiochip.dev = &pdev->dev;
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index a8f84c01f82e..8137f660a5cc 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -234,6 +234,9 @@ struct twl4030_gpio_platform_data {
 	/* gpio-n should control VMMC(n+1) if BIT(n) in mmc_cd is set */
 	u8		mmc_cd;
 
+	/* if BIT(N) is set, or VMMC(n+1) is linked, debounce GPIO-N */
+	u32		debounce;
+
 	/* For gpio-N, bit (1 << N) in "pullups" is set if that pullup
 	 * should be enabled.  Else, if that bit is set in "pulldowns",
 	 * that pulldown is enabled.  Don't waste power by letting any
@@ -307,12 +310,6 @@ int twl4030_sih_setup(int module);
 #define TWL4030_VAUX3_DEV_GRP		0x1F
 #define TWL4030_VAUX3_DEDICATED		0x22
 
-/*
- * Exported TWL4030 GPIO APIs
- *
- * WARNING -- use standard GPIO and IRQ calls instead; these will vanish.
- */
-int twl4030_set_gpio_debounce(int gpio, int enable);
 
 #if defined(CONFIG_TWL4030_BCI_BATTERY) || \
 	defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
-- 
cgit v1.2.3


From d3635abfee0c55ad9dcd6b6172a0c6a5455900c9 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 6 Jan 2009 14:42:41 -0800
Subject: rapidio: remove excess kernel-doc notation

Remove excess kernel-doc notation from rio header and driver:

Warning(include/linux/rio_drv.h:399): Excess function parameter or struct member 'buffer' description in 'rio_get_inb_message'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/rio-driver.c | 1 -
 include/linux/rio_drv.h      | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c
index 956d3e79f6aa..addb87cf44d9 100644
--- a/drivers/rapidio/rio-driver.c
+++ b/drivers/rapidio/rio-driver.c
@@ -79,7 +79,6 @@ void rio_dev_put(struct rio_dev *rdev)
 
 /**
  *  rio_device_probe - Tell if a RIO device structure has a matching RIO device id structure
- *  @id: the RIO device id structure to match against
  *  @dev: the RIO device structure to match against
  *
  * return 0 and set rio_dev->driver when drv claims rio_dev, else error
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index 32c0547ffafc..c93a58a40033 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -391,7 +391,6 @@ static inline int rio_add_inb_buffer(struct rio_mport *mport, int mbox,
  * rio_get_inb_message - Get A RIO message from an inbound mailbox queue
  * @mport: Master port containing the inbound mailbox
  * @mbox: The inbound mailbox number
- * @buffer: Pointer to the message buffer
  *
  * Get a RIO message from an inbound mailbox queue. Returns 0 on success.
  */
-- 
cgit v1.2.3


From 8cd3ac3aca3f2afe8570708066d64d893da468e8 Mon Sep 17 00:00:00 2001
From: WANG Cong <wangcong@zeuux.org>
Date: Tue, 6 Jan 2009 14:42:48 -0800
Subject: fs/exec.c: make do_coredump() void

No one cares do_coredump()'s return value, and also it seems that it
is also not necessary. So make it void.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: WANG Cong <wangcong@zeuux.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c               | 4 ++--
 include/linux/binfmts.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 6b09d6fa4f7e..71a6efe5d8bd 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1686,7 +1686,7 @@ int get_dumpable(struct mm_struct *mm)
 	return (ret >= 2) ? 2 : ret;
 }
 
-int do_coredump(long signr, int exit_code, struct pt_regs * regs)
+void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 {
 	struct core_state core_state;
 	char corename[CORENAME_MAX_SIZE + 1];
@@ -1842,5 +1842,5 @@ fail_unlock:
 	put_cred(cred);
 	coredump_finish(mm);
 fail:
-	return retval;
+	return;
 }
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 0d0150b4901e..77b4a9e46004 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -107,7 +107,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm,
 extern int bprm_mm_init(struct linux_binprm *bprm);
 extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
 extern void install_exec_creds(struct linux_binprm *bprm);
-extern int do_coredump(long signr, int exit_code, struct pt_regs * regs);
+extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
 extern int set_binfmt(struct linux_binfmt *new);
 extern void free_bprm(struct linux_binprm *);
 
-- 
cgit v1.2.3


From 991c0e6d1ae3df59f0ddfe05edecec8319e35a1b Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 6 Jan 2009 14:56:21 -0800
Subject: byteorder: only use linux/swab.h

The first step to make swab.h a regular header that will
include an asm/swab.h with arch overrides.

Avoid the gratuitous differences introduced in the new
linux/swab.h by naming the ___constant_swabXX bits and
__fswabXX bits exactly as found in the old implementation
in byteorder/swab[b].h

Use this new swab.h in byteorder/[big|little]_endian.h and
remove the two old swab headers.

Although the inclusion of asm/byteorder.h looks strange in
linux/swab.h, this will allow each arch to move the actual
arch overrides for the swab bits in an asm file and then
the includes can be cleaned up without requiring a flag day
for all arches at once.

Keep providing __fswabXX in case some userspace was using them
directly, but the revised __swabXX should be used instead in
any new code and will always do constant folding not dependent
on the optimization level, which means the __constant versions
can be phased out in-kernel.

Arches that use the old-style arch macros will lose their
optimized versions until they move to the new style, but at
least they will still compile.  Many arches have already moved
and the patches to move the remaining arches are trivial.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/byteorder/Kbuild          |   2 -
 include/linux/byteorder/big_endian.h    |   3 +-
 include/linux/byteorder/little_endian.h |   3 +-
 include/linux/byteorder/swab.h          | 222 --------------------------------
 include/linux/byteorder/swabb.h         | 135 -------------------
 include/linux/swab.h                    |  50 +++----
 6 files changed, 27 insertions(+), 388 deletions(-)
 delete mode 100644 include/linux/byteorder/swab.h
 delete mode 100644 include/linux/byteorder/swabb.h

(limited to 'include/linux')

diff --git a/include/linux/byteorder/Kbuild b/include/linux/byteorder/Kbuild
index fbaa7f9cee32..38437225b092 100644
--- a/include/linux/byteorder/Kbuild
+++ b/include/linux/byteorder/Kbuild
@@ -1,4 +1,2 @@
 unifdef-y += big_endian.h
 unifdef-y += little_endian.h
-unifdef-y += swab.h
-unifdef-y += swabb.h
diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h
index 1cba3f3efe5f..3c80fd7e8b56 100644
--- a/include/linux/byteorder/big_endian.h
+++ b/include/linux/byteorder/big_endian.h
@@ -9,8 +9,7 @@
 #endif
 
 #include <linux/types.h>
-#include <linux/byteorder/swab.h>
-#include <linux/byteorder/swabb.h>
+#include <linux/swab.h>
 
 #define __constant_htonl(x) ((__force __be32)(__u32)(x))
 #define __constant_ntohl(x) ((__force __u32)(__be32)(x))
diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h
index cedc1b5a289c..83195fb82962 100644
--- a/include/linux/byteorder/little_endian.h
+++ b/include/linux/byteorder/little_endian.h
@@ -9,8 +9,7 @@
 #endif
 
 #include <linux/types.h>
-#include <linux/byteorder/swab.h>
-#include <linux/byteorder/swabb.h>
+#include <linux/swab.h>
 
 #define __constant_htonl(x) ((__force __be32)___constant_swab32((x)))
 #define __constant_ntohl(x) ___constant_swab32((__force __be32)(x))
diff --git a/include/linux/byteorder/swab.h b/include/linux/byteorder/swab.h
deleted file mode 100644
index 142134ff1645..000000000000
--- a/include/linux/byteorder/swab.h
+++ /dev/null
@@ -1,222 +0,0 @@
-#ifndef _LINUX_BYTEORDER_SWAB_H
-#define _LINUX_BYTEORDER_SWAB_H
-
-/*
- * linux/byteorder/swab.h
- * Byte-swapping, independently from CPU endianness
- *	swabXX[ps]?(foo)
- *
- * Francois-Rene Rideau <fare@tunes.org> 19971205
- *    separated swab functions from cpu_to_XX,
- *    to clean up support for bizarre-endian architectures.
- *
- * Trent Piepho <xyzzy@speakeasy.org> 2007114
- *    make constant-folding work, provide C versions that
- *    gcc can optimize better, explain different versions
- *
- * See asm-i386/byteorder.h and suches for examples of how to provide
- * architecture-dependent optimized versions
- *
- */
-
-#include <linux/compiler.h>
-
-/* Functions/macros defined, there are a lot:
- *
- * ___swabXX
- *    Generic C versions of the swab functions.
- *
- * ___constant_swabXX
- *    C versions that gcc can fold into a compile-time constant when
- *    the argument is a compile-time constant.
- *
- * __arch__swabXX[sp]?
- *    Architecture optimized versions of all the swab functions
- *    (including the s and p versions).  These can be defined in
- *    asm-arch/byteorder.h.  Any which are not, are defined here.
- *    __arch__swabXXs() is defined in terms of __arch__swabXXp(), which
- *    is defined in terms of __arch__swabXX(), which is in turn defined
- *    in terms of ___swabXX(x).
- *    These must be macros.  They may be unsafe for arguments with
- *    side-effects.
- *
- * __fswabXX
- *    Inline function versions of the __arch__ macros.  These _are_ safe
- *    if the arguments have side-effects.  Note there are no s and p
- *    versions of these.
- *
- * __swabXX[sb]
- *    There are the ones you should actually use.  The __swabXX versions
- *    will be a constant given a constant argument and use the arch
- *    specific code (if any) for non-constant arguments.  The s and p
- *    versions always use the arch specific code (constant folding
- *    doesn't apply).  They are safe to use with arguments with
- *    side-effects.
- *
- * swabXX[sb]
- *    Nicknames for __swabXX[sb] to use in the kernel.
- */
-
-/* casts are necessary for constants, because we never know how for sure
- * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way.
- */
-
-static __inline__ __attribute_const__ __u16 ___swab16(__u16 x)
-{
-	return x<<8 | x>>8;
-}
-static __inline__ __attribute_const__ __u32 ___swab32(__u32 x)
-{
-	return x<<24 | x>>24 |
-		(x & (__u32)0x0000ff00UL)<<8 |
-		(x & (__u32)0x00ff0000UL)>>8;
-}
-static __inline__ __attribute_const__ __u64 ___swab64(__u64 x)
-{
-	return x<<56 | x>>56 |
-		(x & (__u64)0x000000000000ff00ULL)<<40 |
-		(x & (__u64)0x0000000000ff0000ULL)<<24 |
-		(x & (__u64)0x00000000ff000000ULL)<< 8 |
-	        (x & (__u64)0x000000ff00000000ULL)>> 8 |
-		(x & (__u64)0x0000ff0000000000ULL)>>24 |
-		(x & (__u64)0x00ff000000000000ULL)>>40;
-}
-
-#define ___constant_swab16(x) \
-	((__u16)( \
-		(((__u16)(x) & (__u16)0x00ffU) << 8) | \
-		(((__u16)(x) & (__u16)0xff00U) >> 8) ))
-#define ___constant_swab32(x) \
-	((__u32)( \
-		(((__u32)(x) & (__u32)0x000000ffUL) << 24) | \
-		(((__u32)(x) & (__u32)0x0000ff00UL) <<  8) | \
-		(((__u32)(x) & (__u32)0x00ff0000UL) >>  8) | \
-		(((__u32)(x) & (__u32)0xff000000UL) >> 24) ))
-#define ___constant_swab64(x) \
-	((__u64)( \
-		(__u64)(((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) | \
-		(__u64)(((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) | \
-		(__u64)(((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) | \
-		(__u64)(((__u64)(x) & (__u64)0x00000000ff000000ULL) <<  8) | \
-	        (__u64)(((__u64)(x) & (__u64)0x000000ff00000000ULL) >>  8) | \
-		(__u64)(((__u64)(x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
-		(__u64)(((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) | \
-		(__u64)(((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56) ))
-
-/*
- * provide defaults when no architecture-specific optimization is detected
- */
-#ifndef __arch__swab16
-#  define __arch__swab16(x) ___swab16(x)
-#endif
-#ifndef __arch__swab32
-#  define __arch__swab32(x) ___swab32(x)
-#endif
-#ifndef __arch__swab64
-#  define __arch__swab64(x) ___swab64(x)
-#endif
-
-#ifndef __arch__swab16p
-#  define __arch__swab16p(x) __arch__swab16(*(x))
-#endif
-#ifndef __arch__swab32p
-#  define __arch__swab32p(x) __arch__swab32(*(x))
-#endif
-#ifndef __arch__swab64p
-#  define __arch__swab64p(x) __arch__swab64(*(x))
-#endif
-
-#ifndef __arch__swab16s
-#  define __arch__swab16s(x) ((void)(*(x) = __arch__swab16p(x)))
-#endif
-#ifndef __arch__swab32s
-#  define __arch__swab32s(x) ((void)(*(x) = __arch__swab32p(x)))
-#endif
-#ifndef __arch__swab64s
-#  define __arch__swab64s(x) ((void)(*(x) = __arch__swab64p(x)))
-#endif
-
-
-/*
- * Allow constant folding
- */
-#if defined(__GNUC__) && defined(__OPTIMIZE__)
-#  define __swab16(x) \
-(__builtin_constant_p((__u16)(x)) ? \
- ___constant_swab16((x)) : \
- __fswab16((x)))
-#  define __swab32(x) \
-(__builtin_constant_p((__u32)(x)) ? \
- ___constant_swab32((x)) : \
- __fswab32((x)))
-#  define __swab64(x) \
-(__builtin_constant_p((__u64)(x)) ? \
- ___constant_swab64((x)) : \
- __fswab64((x)))
-#else
-#  define __swab16(x) __fswab16(x)
-#  define __swab32(x) __fswab32(x)
-#  define __swab64(x) __fswab64(x)
-#endif /* OPTIMIZE */
-
-
-static __inline__ __attribute_const__ __u16 __fswab16(__u16 x)
-{
-	return __arch__swab16(x);
-}
-static __inline__ __u16 __swab16p(const __u16 *x)
-{
-	return __arch__swab16p(x);
-}
-static __inline__ void __swab16s(__u16 *addr)
-{
-	__arch__swab16s(addr);
-}
-
-static __inline__ __attribute_const__ __u32 __fswab32(__u32 x)
-{
-	return __arch__swab32(x);
-}
-static __inline__ __u32 __swab32p(const __u32 *x)
-{
-	return __arch__swab32p(x);
-}
-static __inline__ void __swab32s(__u32 *addr)
-{
-	__arch__swab32s(addr);
-}
-
-#ifdef __BYTEORDER_HAS_U64__
-static __inline__ __attribute_const__ __u64 __fswab64(__u64 x)
-{
-#  ifdef __SWAB_64_THRU_32__
-	__u32 h = x >> 32;
-        __u32 l = x & ((1ULL<<32)-1);
-        return (((__u64)__swab32(l)) << 32) | ((__u64)(__swab32(h)));
-#  else
-	return __arch__swab64(x);
-#  endif
-}
-static __inline__ __u64 __swab64p(const __u64 *x)
-{
-	return __arch__swab64p(x);
-}
-static __inline__ void __swab64s(__u64 *addr)
-{
-	__arch__swab64s(addr);
-}
-#endif /* __BYTEORDER_HAS_U64__ */
-
-#if defined(__KERNEL__)
-#define swab16 __swab16
-#define swab32 __swab32
-#define swab64 __swab64
-#define swab16p __swab16p
-#define swab32p __swab32p
-#define swab64p __swab64p
-#define swab16s __swab16s
-#define swab32s __swab32s
-#define swab64s __swab64s
-#endif
-
-#endif /* _LINUX_BYTEORDER_SWAB_H */
diff --git a/include/linux/byteorder/swabb.h b/include/linux/byteorder/swabb.h
deleted file mode 100644
index 8c780c7d779e..000000000000
--- a/include/linux/byteorder/swabb.h
+++ /dev/null
@@ -1,135 +0,0 @@
-#ifndef _LINUX_BYTEORDER_SWABB_H
-#define _LINUX_BYTEORDER_SWABB_H
-
-/*
- * linux/byteorder/swabb.h
- * SWAp Bytes Bizarrely
- *	swaHHXX[ps]?(foo)
- *
- * Support for obNUXIous pdp-endian and other bizarre architectures.
- * Will Linux ever run on such ancient beasts? if not, this file
- * will be but a programming pearl. Still, it's a reminder that we
- * shouldn't be making too many assumptions when trying to be portable.
- *
- */
-
-/*
- * Meaning of the names I chose (vaxlinux people feel free to correct them):
- * swahw32	swap 16-bit half-words in a 32-bit word
- * swahb32	swap 8-bit halves of each 16-bit half-word in a 32-bit word
- *
- * No 64-bit support yet. I don't know NUXI conventions for long longs.
- * I guarantee it will be a mess when it's there, though :->
- * It will be even worse if there are conflicting 64-bit conventions.
- * Hopefully, no one ever used 64-bit objects on NUXI machines.
- *
- */
-
-#include <linux/types.h>
-
-#define ___swahw32(x) \
-({ \
-	__u32 __x = (x); \
-	((__u32)( \
-		(((__u32)(__x) & (__u32)0x0000ffffUL) << 16) | \
-		(((__u32)(__x) & (__u32)0xffff0000UL) >> 16) )); \
-})
-#define ___swahb32(x) \
-({ \
-	__u32 __x = (x); \
-	((__u32)( \
-		(((__u32)(__x) & (__u32)0x00ff00ffUL) << 8) | \
-		(((__u32)(__x) & (__u32)0xff00ff00UL) >> 8) )); \
-})
-
-#define ___constant_swahw32(x) \
-	((__u32)( \
-		(((__u32)(x) & (__u32)0x0000ffffUL) << 16) | \
-		(((__u32)(x) & (__u32)0xffff0000UL) >> 16) ))
-#define ___constant_swahb32(x) \
-	((__u32)( \
-		(((__u32)(x) & (__u32)0x00ff00ffUL) << 8) | \
-		(((__u32)(x) & (__u32)0xff00ff00UL) >> 8) ))
-
-/*
- * provide defaults when no architecture-specific optimization is detected
- */
-#ifndef __arch__swahw32
-#  define __arch__swahw32(x) ___swahw32(x)
-#endif
-#ifndef __arch__swahb32
-#  define __arch__swahb32(x) ___swahb32(x)
-#endif
-
-#ifndef __arch__swahw32p
-#  define __arch__swahw32p(x) __swahw32(*(x))
-#endif
-#ifndef __arch__swahb32p
-#  define __arch__swahb32p(x) __swahb32(*(x))
-#endif
-
-#ifndef __arch__swahw32s
-#  define __arch__swahw32s(x) do { *(x) = __swahw32p((x)); } while (0)
-#endif
-#ifndef __arch__swahb32s
-#  define __arch__swahb32s(x) do { *(x) = __swahb32p((x)); } while (0)
-#endif
-
-
-/*
- * Allow constant folding
- */
-#define __swahw32(x) \
-(__builtin_constant_p((__u32)(x)) ? \
- ___swahw32((x)) : \
- __fswahw32((x)))
-#define __swahb32(x) \
-(__builtin_constant_p((__u32)(x)) ? \
- ___swahb32((x)) : \
- __fswahb32((x)))
-
-
-static inline __u32 __fswahw32(__u32 x)
-{
-	return __arch__swahw32(x);
-}
-
-static inline __u32 __swahw32p(__u32 *x)
-{
-	return __arch__swahw32p(x);
-}
-
-static inline void __swahw32s(__u32 *addr)
-{
-	__arch__swahw32s(addr);
-}
-
-static inline __u32 __fswahb32(__u32 x)
-{
-	return __arch__swahb32(x);
-}
-
-static inline __u32 __swahb32p(__u32 *x)
-{
-	return __arch__swahb32p(x);
-}
-
-static inline void __swahb32s(__u32 *addr)
-{
-	__arch__swahb32s(addr);
-}
-
-#ifdef __BYTEORDER_HAS_U64__
-/*
- * Not supported yet
- */
-#endif /* __BYTEORDER_HAS_U64__ */
-
-#define swahw32 __swahw32
-#define swahb32 __swahb32
-#define swahw32p __swahw32p
-#define swahb32p __swahb32p
-#define swahw32s __swahw32s
-#define swahb32s __swahb32s
-
-#endif /* _LINUX_BYTEORDER_SWABB_H */
diff --git a/include/linux/swab.h b/include/linux/swab.h
index bbed279f3b32..9a2d33e0a98a 100644
--- a/include/linux/swab.h
+++ b/include/linux/swab.h
@@ -9,17 +9,17 @@
  * casts are necessary for constants, because we never know how for sure
  * how U/UL/ULL map to __u16, __u32, __u64. At least not in a portable way.
  */
-#define __const_swab16(x) ((__u16)(				\
+#define ___constant_swab16(x) ((__u16)(				\
 	(((__u16)(x) & (__u16)0x00ffU) << 8) |			\
 	(((__u16)(x) & (__u16)0xff00U) >> 8)))
 
-#define __const_swab32(x) ((__u32)(				\
+#define ___constant_swab32(x) ((__u32)(				\
 	(((__u32)(x) & (__u32)0x000000ffUL) << 24) |		\
 	(((__u32)(x) & (__u32)0x0000ff00UL) <<  8) |		\
 	(((__u32)(x) & (__u32)0x00ff0000UL) >>  8) |		\
 	(((__u32)(x) & (__u32)0xff000000UL) >> 24)))
 
-#define __const_swab64(x) ((__u64)(				\
+#define ___constant_swab64(x) ((__u64)(				\
 	(((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) |	\
 	(((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) |	\
 	(((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) |	\
@@ -29,11 +29,11 @@
 	(((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) |	\
 	(((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56)))
 
-#define __const_swahw32(x) ((__u32)(				\
+#define ___constant_swahw32(x) ((__u32)(			\
 	(((__u32)(x) & (__u32)0x0000ffffUL) << 16) |		\
 	(((__u32)(x) & (__u32)0xffff0000UL) >> 16)))
 
-#define __const_swahb32(x) ((__u32)(				\
+#define ___constant_swahb32(x) ((__u32)(			\
 	(((__u32)(x) & (__u32)0x00ff00ffUL) << 8) |		\
 	(((__u32)(x) & (__u32)0xff00ff00UL) >> 8)))
 
@@ -43,25 +43,25 @@
  * ___swab16, ___swab32, ___swab64, ___swahw32, ___swahb32
  */
 
-static inline __attribute_const__ __u16 ___swab16(__u16 val)
+static inline __attribute_const__ __u16 __fswab16(__u16 val)
 {
 #ifdef __arch_swab16
 	return __arch_swab16(val);
 #else
-	return __const_swab16(val);
+	return ___constant_swab16(val);
 #endif
 }
 
-static inline __attribute_const__ __u32 ___swab32(__u32 val)
+static inline __attribute_const__ __u32 __fswab32(__u32 val)
 {
 #ifdef __arch_swab32
 	return __arch_swab32(val);
 #else
-	return __const_swab32(val);
+	return ___constant_swab32(val);
 #endif
 }
 
-static inline __attribute_const__ __u64 ___swab64(__u64 val)
+static inline __attribute_const__ __u64 __fswab64(__u64 val)
 {
 #ifdef __arch_swab64
 	return __arch_swab64(val);
@@ -70,25 +70,25 @@ static inline __attribute_const__ __u64 ___swab64(__u64 val)
 	__u32 l = val & ((1ULL << 32) - 1);
 	return (((__u64)___swab32(l)) << 32) | ((__u64)(___swab32(h)));
 #else
-	return __const_swab64(val);
+	return ___constant_swab64(val);
 #endif
 }
 
-static inline __attribute_const__ __u32 ___swahw32(__u32 val)
+static inline __attribute_const__ __u32 __fswahw32(__u32 val)
 {
 #ifdef __arch_swahw32
 	return __arch_swahw32(val);
 #else
-	return __const_swahw32(val);
+	return ___constant_swahw32(val);
 #endif
 }
 
-static inline __attribute_const__ __u32 ___swahb32(__u32 val)
+static inline __attribute_const__ __u32 __fswahb32(__u32 val)
 {
 #ifdef __arch_swahb32
 	return __arch_swahb32(val);
 #else
-	return __const_swahb32(val);
+	return ___constant_swahb32(val);
 #endif
 }
 
@@ -98,8 +98,8 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val)
  */
 #define __swab16(x)				\
 	(__builtin_constant_p((__u16)(x)) ?	\
-	__const_swab16((x)) :			\
-	___swab16((x)))
+	___constant_swab16(x) :			\
+	__fswab16(x))
 
 /**
  * __swab32 - return a byteswapped 32-bit value
@@ -107,8 +107,8 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val)
  */
 #define __swab32(x)				\
 	(__builtin_constant_p((__u32)(x)) ?	\
-	__const_swab32((x)) :			\
-	___swab32((x)))
+	___constant_swab32(x) :			\
+	__fswab32(x))
 
 /**
  * __swab64 - return a byteswapped 64-bit value
@@ -116,8 +116,8 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val)
  */
 #define __swab64(x)				\
 	(__builtin_constant_p((__u64)(x)) ?	\
-	__const_swab64((x)) :			\
-	___swab64((x)))
+	___constant_swab64(x) :			\
+	__fswab64(x))
 
 /**
  * __swahw32 - return a word-swapped 32-bit value
@@ -127,8 +127,8 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val)
  */
 #define __swahw32(x)				\
 	(__builtin_constant_p((__u32)(x)) ?	\
-	__const_swahw32((x)) :			\
-	___swahw32((x)))
+	___constant_swahw32(x) :		\
+	__fswahw32(x))
 
 /**
  * __swahb32 - return a high and low byte-swapped 32-bit value
@@ -138,8 +138,8 @@ static inline __attribute_const__ __u32 ___swahb32(__u32 val)
  */
 #define __swahb32(x)				\
 	(__builtin_constant_p((__u32)(x)) ?	\
-	__const_swahb32((x)) :			\
-	___swahb32((x)))
+	___constant_swahb32(x) :		\
+	__fswahb32(x))
 
 /**
  * __swab16p - return a byteswapped 16-bit value from a pointer
-- 
cgit v1.2.3


From 637b180c23313f2964e0ef20f1ee375203866968 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Tue, 6 Jan 2009 13:30:58 -0800
Subject: byteorder: remove the now unused byteorder.h

This implementation caused problems in userspace which can, and does
define _both_ __LITTLE_ENDIAN and __BIG_ENDIAN.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/Kbuild      |   1 -
 include/linux/byteorder.h | 372 ----------------------------------------------
 2 files changed, 373 deletions(-)
 delete mode 100644 include/linux/byteorder.h

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 39da666067b9..a3323f337e4d 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -179,7 +179,6 @@ unifdef-y += auto_fs.h
 unifdef-y += auxvec.h
 unifdef-y += binfmts.h
 unifdef-y += blktrace_api.h
-unifdef-y += byteorder.h
 unifdef-y += capability.h
 unifdef-y += capi.h
 unifdef-y += cciss_ioctl.h
diff --git a/include/linux/byteorder.h b/include/linux/byteorder.h
deleted file mode 100644
index 29f002d73d98..000000000000
--- a/include/linux/byteorder.h
+++ /dev/null
@@ -1,372 +0,0 @@
-#ifndef _LINUX_BYTEORDER_H
-#define _LINUX_BYTEORDER_H
-
-#include <linux/types.h>
-#include <linux/swab.h>
-
-#if defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
-# error Fix asm/byteorder.h to define one endianness
-#endif
-
-#if !defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN)
-# error Fix asm/byteorder.h to define arch endianness
-#endif
-
-#ifdef __LITTLE_ENDIAN
-# undef __LITTLE_ENDIAN
-# define __LITTLE_ENDIAN 1234
-#endif
-
-#ifdef __BIG_ENDIAN
-# undef __BIG_ENDIAN
-# define __BIG_ENDIAN 4321
-#endif
-
-#if defined(__LITTLE_ENDIAN) && !defined(__LITTLE_ENDIAN_BITFIELD)
-# define __LITTLE_ENDIAN_BITFIELD
-#endif
-
-#if defined(__BIG_ENDIAN) && !defined(__BIG_ENDIAN_BITFIELD)
-# define __BIG_ENDIAN_BITFIELD
-#endif
-
-#ifdef __LITTLE_ENDIAN
-# define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
-# define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
-# define __le64_to_cpu(x) ((__force __u64)(__le64)(x))
-# define __cpu_to_le16(x) ((__force __le16)(__u16)(x))
-# define __cpu_to_le32(x) ((__force __le32)(__u32)(x))
-# define __cpu_to_le64(x) ((__force __le64)(__u64)(x))
-
-# define __be16_to_cpu(x) __swab16((__force __u16)(__be16)(x))
-# define __be32_to_cpu(x) __swab32((__force __u32)(__be32)(x))
-# define __be64_to_cpu(x) __swab64((__force __u64)(__be64)(x))
-# define __cpu_to_be16(x) ((__force __be16)__swab16(x))
-# define __cpu_to_be32(x) ((__force __be32)__swab32(x))
-# define __cpu_to_be64(x) ((__force __be64)__swab64(x))
-#endif
-
-#ifdef __BIG_ENDIAN
-# define __be16_to_cpu(x) ((__force __u16)(__be16)(x))
-# define __be32_to_cpu(x) ((__force __u32)(__be32)(x))
-# define __be64_to_cpu(x) ((__force __u64)(__be64)(x))
-# define __cpu_to_be16(x) ((__force __be16)(__u16)(x))
-# define __cpu_to_be32(x) ((__force __be32)(__u32)(x))
-# define __cpu_to_be64(x) ((__force __be64)(__u64)(x))
-
-# define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
-# define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
-# define __le64_to_cpu(x) __swab64((__force __u64)(__le64)(x))
-# define __cpu_to_le16(x) ((__force __le16)__swab16(x))
-# define __cpu_to_le32(x) ((__force __le32)__swab32(x))
-# define __cpu_to_le64(x) ((__force __le64)__swab64(x))
-#endif
-
-/*
- * These helpers could be phased out over time as the base version
- * handles constant folding.
- */
-#define __constant_htonl(x) __cpu_to_be32(x)
-#define __constant_ntohl(x) __be32_to_cpu(x)
-#define __constant_htons(x) __cpu_to_be16(x)
-#define __constant_ntohs(x) __be16_to_cpu(x)
-
-#define __constant_le16_to_cpu(x) __le16_to_cpu(x)
-#define __constant_le32_to_cpu(x) __le32_to_cpu(x)
-#define __constant_le64_to_cpu(x) __le64_to_cpu(x)
-#define __constant_be16_to_cpu(x) __be16_to_cpu(x)
-#define __constant_be32_to_cpu(x) __be32_to_cpu(x)
-#define __constant_be64_to_cpu(x) __be64_to_cpu(x)
-
-#define __constant_cpu_to_le16(x) __cpu_to_le16(x)
-#define __constant_cpu_to_le32(x) __cpu_to_le32(x)
-#define __constant_cpu_to_le64(x) __cpu_to_le64(x)
-#define __constant_cpu_to_be16(x) __cpu_to_be16(x)
-#define __constant_cpu_to_be32(x) __cpu_to_be32(x)
-#define __constant_cpu_to_be64(x) __cpu_to_be64(x)
-
-static inline void __le16_to_cpus(__u16 *p)
-{
-#ifdef __BIG_ENDIAN
-	__swab16s(p);
-#endif
-}
-
-static inline void __cpu_to_le16s(__u16 *p)
-{
-#ifdef __BIG_ENDIAN
-	__swab16s(p);
-#endif
-}
-
-static inline void __le32_to_cpus(__u32 *p)
-{
-#ifdef __BIG_ENDIAN
-	__swab32s(p);
-#endif
-}
-
-static inline void __cpu_to_le32s(__u32 *p)
-{
-#ifdef __BIG_ENDIAN
-	__swab32s(p);
-#endif
-}
-
-static inline void __le64_to_cpus(__u64 *p)
-{
-#ifdef __BIG_ENDIAN
-	__swab64s(p);
-#endif
-}
-
-static inline void __cpu_to_le64s(__u64 *p)
-{
-#ifdef __BIG_ENDIAN
-	__swab64s(p);
-#endif
-}
-
-static inline void __be16_to_cpus(__u16 *p)
-{
-#ifdef __LITTLE_ENDIAN
-	__swab16s(p);
-#endif
-}
-
-static inline void __cpu_to_be16s(__u16 *p)
-{
-#ifdef __LITTLE_ENDIAN
-	__swab16s(p);
-#endif
-}
-
-static inline void __be32_to_cpus(__u32 *p)
-{
-#ifdef __LITTLE_ENDIAN
-	__swab32s(p);
-#endif
-}
-
-static inline void __cpu_to_be32s(__u32 *p)
-{
-#ifdef __LITTLE_ENDIAN
-	__swab32s(p);
-#endif
-}
-
-static inline void __be64_to_cpus(__u64 *p)
-{
-#ifdef __LITTLE_ENDIAN
-	__swab64s(p);
-#endif
-}
-
-static inline void __cpu_to_be64s(__u64 *p)
-{
-#ifdef __LITTLE_ENDIAN
-	__swab64s(p);
-#endif
-}
-
-static inline __u16 __le16_to_cpup(const __le16 *p)
-{
-#ifdef __LITTLE_ENDIAN
-	return (__force __u16)*p;
-#else
-	return __swab16p((__force __u16 *)p);
-#endif
-}
-
-static inline __u32 __le32_to_cpup(const __le32 *p)
-{
-#ifdef __LITTLE_ENDIAN
-	return (__force __u32)*p;
-#else
-	return __swab32p((__force __u32 *)p);
-#endif
-}
-
-static inline __u64 __le64_to_cpup(const __le64 *p)
-{
-#ifdef __LITTLE_ENDIAN
-	return (__force __u64)*p;
-#else
-	return __swab64p((__force __u64 *)p);
-#endif
-}
-
-static inline __le16 __cpu_to_le16p(const __u16 *p)
-{
-#ifdef __LITTLE_ENDIAN
-	return (__force __le16)*p;
-#else
-	return (__force __le16)__swab16p(p);
-#endif
-}
-
-static inline __le32 __cpu_to_le32p(const __u32 *p)
-{
-#ifdef __LITTLE_ENDIAN
-	return (__force __le32)*p;
-#else
-	return (__force __le32)__swab32p(p);
-#endif
-}
-
-static inline __le64 __cpu_to_le64p(const __u64 *p)
-{
-#ifdef __LITTLE_ENDIAN
-	return (__force __le64)*p;
-#else
-	return (__force __le64)__swab64p(p);
-#endif
-}
-
-static inline __u16 __be16_to_cpup(const __be16 *p)
-{
-#ifdef __BIG_ENDIAN
-	return (__force __u16)*p;
-#else
-	return __swab16p((__force __u16 *)p);
-#endif
-}
-
-static inline __u32 __be32_to_cpup(const __be32 *p)
-{
-#ifdef __BIG_ENDIAN
-	return (__force __u32)*p;
-#else
-	return __swab32p((__force __u32 *)p);
-#endif
-}
-
-static inline __u64 __be64_to_cpup(const __be64 *p)
-{
-#ifdef __BIG_ENDIAN
-	return (__force __u64)*p;
-#else
-	return __swab64p((__force __u64 *)p);
-#endif
-}
-
-static inline __be16 __cpu_to_be16p(const __u16 *p)
-{
-#ifdef __BIG_ENDIAN
-	return (__force __be16)*p;
-#else
-	return (__force __be16)__swab16p(p);
-#endif
-}
-
-static inline __be32 __cpu_to_be32p(const __u32 *p)
-{
-#ifdef __BIG_ENDIAN
-	return (__force __be32)*p;
-#else
-	return (__force __be32)__swab32p(p);
-#endif
-}
-
-static inline __be64 __cpu_to_be64p(const __u64 *p)
-{
-#ifdef __BIG_ENDIAN
-	return (__force __be64)*p;
-#else
-	return (__force __be64)__swab64p(p);
-#endif
-}
-
-#ifdef __KERNEL__
-
-# define le16_to_cpu __le16_to_cpu
-# define le32_to_cpu __le32_to_cpu
-# define le64_to_cpu __le64_to_cpu
-# define be16_to_cpu __be16_to_cpu
-# define be32_to_cpu __be32_to_cpu
-# define be64_to_cpu __be64_to_cpu
-# define cpu_to_le16 __cpu_to_le16
-# define cpu_to_le32 __cpu_to_le32
-# define cpu_to_le64 __cpu_to_le64
-# define cpu_to_be16 __cpu_to_be16
-# define cpu_to_be32 __cpu_to_be32
-# define cpu_to_be64 __cpu_to_be64
-
-# define le16_to_cpup __le16_to_cpup
-# define le32_to_cpup __le32_to_cpup
-# define le64_to_cpup __le64_to_cpup
-# define be16_to_cpup __be16_to_cpup
-# define be32_to_cpup __be32_to_cpup
-# define be64_to_cpup __be64_to_cpup
-# define cpu_to_le16p __cpu_to_le16p
-# define cpu_to_le32p __cpu_to_le32p
-# define cpu_to_le64p __cpu_to_le64p
-# define cpu_to_be16p __cpu_to_be16p
-# define cpu_to_be32p __cpu_to_be32p
-# define cpu_to_be64p __cpu_to_be64p
-
-# define le16_to_cpus __le16_to_cpus
-# define le32_to_cpus __le32_to_cpus
-# define le64_to_cpus __le64_to_cpus
-# define be16_to_cpus __be16_to_cpus
-# define be32_to_cpus __be32_to_cpus
-# define be64_to_cpus __be64_to_cpus
-# define cpu_to_le16s __cpu_to_le16s
-# define cpu_to_le32s __cpu_to_le32s
-# define cpu_to_le64s __cpu_to_le64s
-# define cpu_to_be16s __cpu_to_be16s
-# define cpu_to_be32s __cpu_to_be32s
-# define cpu_to_be64s __cpu_to_be64s
-
-/*
- * They have to be macros in order to do the constant folding
- * correctly - if the argument passed into a inline function
- * it is no longer constant according to gcc..
- */
-# undef ntohl
-# undef ntohs
-# undef htonl
-# undef htons
-
-# define ___htonl(x) __cpu_to_be32(x)
-# define ___htons(x) __cpu_to_be16(x)
-# define ___ntohl(x) __be32_to_cpu(x)
-# define ___ntohs(x) __be16_to_cpu(x)
-
-# define htonl(x) ___htonl(x)
-# define ntohl(x) ___ntohl(x)
-# define htons(x) ___htons(x)
-# define ntohs(x) ___ntohs(x)
-
-static inline void le16_add_cpu(__le16 *var, u16 val)
-{
-	*var = cpu_to_le16(le16_to_cpup(var) + val);
-}
-
-static inline void le32_add_cpu(__le32 *var, u32 val)
-{
-	*var = cpu_to_le32(le32_to_cpup(var) + val);
-}
-
-static inline void le64_add_cpu(__le64 *var, u64 val)
-{
-	*var = cpu_to_le64(le64_to_cpup(var) + val);
-}
-
-static inline void be16_add_cpu(__be16 *var, u16 val)
-{
-	*var = cpu_to_be16(be16_to_cpup(var) + val);
-}
-
-static inline void be32_add_cpu(__be32 *var, u32 val)
-{
-	*var = cpu_to_be32(be32_to_cpup(var) + val);
-}
-
-static inline void be64_add_cpu(__be64 *var, u64 val)
-{
-	*var = cpu_to_be64(be64_to_cpup(var) + val);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _LINUX_BYTEORDER_H */
-- 
cgit v1.2.3


From ede6f5aea054d3fb67c78857f7abdee602302043 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 6 Jan 2009 21:17:57 -0800
Subject: Fix up 64-bit byte swaps for most 32-bit architectures

The __SWAB_64_THRU_32__ case of a 64-bit byte swap was depending on the
no-longer-existant ___swab32() method (three underscores).  We got rid
of some of the worst indirection and complexity, and now it should just
use the 32-bit swab function that was defined right above it.

Reported-and-tested-by: Nicolas Pitre <nico@cam.org>
Reported-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swab.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/swab.h b/include/linux/swab.h
index 9a2d33e0a98a..be5284d4a053 100644
--- a/include/linux/swab.h
+++ b/include/linux/swab.h
@@ -68,7 +68,7 @@ static inline __attribute_const__ __u64 __fswab64(__u64 val)
 #elif defined(__SWAB_64_THRU_32__)
 	__u32 h = val >> 32;
 	__u32 l = val & ((1ULL << 32) - 1);
-	return (((__u64)___swab32(l)) << 32) | ((__u64)(___swab32(h)));
+	return (((__u64)__fswab32(l)) << 32) | ((__u64)(__fswab32(h)));
 #else
 	return ___constant_swab64(val);
 #endif
-- 
cgit v1.2.3


From e1995f65be0786ca201f466f049dad1e2e4c3421 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Wed, 7 Jan 2009 14:29:16 +0100
Subject: i2c: Drop I2C_CLASS_ALL

I2C_CLASS_ALL is almost never what bus driver authors really want.
These i2c classes are really only about which devices must be probed,
not what devices can be present. As device drivers get converted to the
new i2c device driver model, only a few device types will keep relying
on probing.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Sonic Zhang <sonic.zhang@analog.com>
---
 drivers/i2c/busses/i2c-bfin-twi.c | 2 +-
 drivers/i2c/busses/i2c-sh7760.c   | 2 +-
 include/linux/i2c.h               | 1 -
 3 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-bfin-twi.c b/drivers/i2c/busses/i2c-bfin-twi.c
index 3c855ff2992f..3fd2c417c1e0 100644
--- a/drivers/i2c/busses/i2c-bfin-twi.c
+++ b/drivers/i2c/busses/i2c-bfin-twi.c
@@ -656,7 +656,7 @@ static int i2c_bfin_twi_probe(struct platform_device *pdev)
 	strlcpy(p_adap->name, pdev->name, sizeof(p_adap->name));
 	p_adap->algo = &bfin_twi_algorithm;
 	p_adap->algo_data = iface;
-	p_adap->class = I2C_CLASS_ALL;
+	p_adap->class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
 	p_adap->dev.parent = &pdev->dev;
 
 	rc = peripheral_request_list(pin_req[pdev->id], "i2c-bfin-twi");
diff --git a/drivers/i2c/busses/i2c-sh7760.c b/drivers/i2c/busses/i2c-sh7760.c
index 5e0e254976de..baa28b73ae42 100644
--- a/drivers/i2c/busses/i2c-sh7760.c
+++ b/drivers/i2c/busses/i2c-sh7760.c
@@ -475,7 +475,7 @@ static int __devinit sh7760_i2c_probe(struct platform_device *pdev)
 
 	id->adap.nr = pdev->id;
 	id->adap.algo = &sh7760_i2c_algo;
-	id->adap.class = I2C_CLASS_ALL;
+	id->adap.class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
 	id->adap.retries = 3;
 	id->adap.algo_data = id;
 	id->adap.dev.parent = &pdev->dev;
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 33a5992d4936..0184de4050b8 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -397,7 +397,6 @@ static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data)
 #define I2C_CLASS_CAM_DIGITAL	(1<<5)	/* most webcams */
 #define I2C_CLASS_SOUND		(1<<6)	/* sound devices */
 #define I2C_CLASS_SPD		(1<<7)	/* SPD EEPROMs and similar */
-#define I2C_CLASS_ALL		(UINT_MAX) /* all of the above */
 
 /* i2c_client_address_data is the struct for holding default client
  * addresses for a driver and for the parameters supplied on the
-- 
cgit v1.2.3


From 994a075f0f2e8cdb919d8e495f98211651e3c461 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Wed, 7 Jan 2009 14:29:17 +0100
Subject: i2c: Drop I2C_CLASS_CAM_ANALOG and I2C_CLASS_SOUND

There are no users left of these two i2c probe class flags so we can
drop the now.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/i2c.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 0184de4050b8..4fd79d2e4021 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -393,9 +393,7 @@ static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data)
 #define I2C_CLASS_TV_ANALOG	(1<<1)	/* bttv + friends */
 #define I2C_CLASS_TV_DIGITAL	(1<<2)	/* dvb cards */
 #define I2C_CLASS_DDC		(1<<3)	/* DDC bus on graphics adapters */
-#define I2C_CLASS_CAM_ANALOG	(1<<4)	/* camera with analog CCD */
 #define I2C_CLASS_CAM_DIGITAL	(1<<5)	/* most webcams */
-#define I2C_CLASS_SOUND		(1<<6)	/* sound devices */
 #define I2C_CLASS_SPD		(1<<7)	/* SPD EEPROMs and similar */
 
 /* i2c_client_address_data is the struct for holding default client
-- 
cgit v1.2.3


From b305271861219f0ce162eb565f0f28f4c781299d Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Wed, 7 Jan 2009 14:29:17 +0100
Subject: i2c: Drop I2C_CLASS_CAM_DIGITAL

There are a number of drivers which set their i2c bus class to
I2C_CLASS_CAM_DIGITAL, however no chip driver actually checks for this
flag, so we might as well drop it now.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 drivers/media/video/cafe_ccic.c                | 1 -
 drivers/media/video/ov7670.c                   | 1 -
 drivers/media/video/ovcamchip/ovcamchip_core.c | 1 -
 drivers/media/video/w9968cf.c                  | 1 -
 include/linux/i2c.h                            | 1 -
 5 files changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/video/cafe_ccic.c b/drivers/media/video/cafe_ccic.c
index 34a39d2e4703..46fd573a4f15 100644
--- a/drivers/media/video/cafe_ccic.c
+++ b/drivers/media/video/cafe_ccic.c
@@ -569,7 +569,6 @@ static int cafe_smbus_setup(struct cafe_camera *cam)
 
 	cafe_smbus_enable_irq(cam);
 	adap->id = I2C_HW_SMBUS_CAFE;
-	adap->class = I2C_CLASS_CAM_DIGITAL;
 	adap->owner = THIS_MODULE;
 	adap->client_register = cafe_smbus_attach;
 	adap->client_unregister = cafe_smbus_detach;
diff --git a/drivers/media/video/ov7670.c b/drivers/media/video/ov7670.c
index ca26b0c50cf2..05c14a29375a 100644
--- a/drivers/media/video/ov7670.c
+++ b/drivers/media/video/ov7670.c
@@ -1347,7 +1347,6 @@ static struct i2c_driver ov7670_driver = {
 		.name = "ov7670",
 	},
 	.id 		= I2C_DRIVERID_OV7670,
-	.class 		= I2C_CLASS_CAM_DIGITAL,
 	.attach_adapter = ov7670_attach,
 	.detach_client	= ov7670_detach,
 	.command	= ov7670_command,
diff --git a/drivers/media/video/ovcamchip/ovcamchip_core.c b/drivers/media/video/ovcamchip/ovcamchip_core.c
index 2c4acbf5a4fe..c841f4e4fbe4 100644
--- a/drivers/media/video/ovcamchip/ovcamchip_core.c
+++ b/drivers/media/video/ovcamchip/ovcamchip_core.c
@@ -405,7 +405,6 @@ static struct i2c_driver driver = {
 		.name =		"ovcamchip",
 	},
 	.id =			I2C_DRIVERID_OVCAMCHIP,
-	.class =		I2C_CLASS_CAM_DIGITAL,
 	.attach_adapter =	ovcamchip_attach,
 	.detach_client =	ovcamchip_detach,
 	.command =		ovcamchip_command,
diff --git a/drivers/media/video/w9968cf.c b/drivers/media/video/w9968cf.c
index a3997b7d4366..105a832531f2 100644
--- a/drivers/media/video/w9968cf.c
+++ b/drivers/media/video/w9968cf.c
@@ -1553,7 +1553,6 @@ static int w9968cf_i2c_init(struct w9968cf_device* cam)
 
 	static struct i2c_adapter adap = {
 		.id =                I2C_HW_SMBUS_W9968CF,
-		.class =             I2C_CLASS_CAM_DIGITAL,
 		.owner =             THIS_MODULE,
 		.client_register =   w9968cf_i2c_attach_inform,
 		.client_unregister = w9968cf_i2c_detach_inform,
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 4fd79d2e4021..20873d402467 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -393,7 +393,6 @@ static inline void i2c_set_adapdata(struct i2c_adapter *dev, void *data)
 #define I2C_CLASS_TV_ANALOG	(1<<1)	/* bttv + friends */
 #define I2C_CLASS_TV_DIGITAL	(1<<2)	/* dvb cards */
 #define I2C_CLASS_DDC		(1<<3)	/* DDC bus on graphics adapters */
-#define I2C_CLASS_CAM_DIGITAL	(1<<5)	/* most webcams */
 #define I2C_CLASS_SPD		(1<<7)	/* SPD EEPROMs and similar */
 
 /* i2c_client_address_data is the struct for holding default client
-- 
cgit v1.2.3


From 22a9d645677feefd402befd02edd59b122289ef1 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 7 Jan 2009 08:45:46 -0800
Subject: async: Asynchronous function calls to speed up kernel boot

Right now, most of the kernel boot is strictly synchronous, such that
various hardware delays are done sequentially.

In order to make the kernel boot faster, this patch introduces
infrastructure to allow doing some of the initialization steps
asynchronously, which will hide significant portions of the hardware delays
in practice.

In order to not change device order and other similar observables, this
patch does NOT do full parallel initialization.

Rather, it operates more in the way an out of order CPU does; the work may
be done out of order and asynchronous, but the observable effects
(instruction retiring for the CPU) are still done in the original sequence.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
---
 include/linux/async.h  |  25 ++++
 init/do_mounts.c       |   2 +
 init/main.c            |   5 +-
 kernel/Makefile        |   3 +-
 kernel/async.c         | 321 +++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/irq/autoprobe.c |   5 +
 kernel/module.c        |   2 +
 7 files changed, 361 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/async.h
 create mode 100644 kernel/async.c

(limited to 'include/linux')

diff --git a/include/linux/async.h b/include/linux/async.h
new file mode 100644
index 000000000000..c4ecacd0b327
--- /dev/null
+++ b/include/linux/async.h
@@ -0,0 +1,25 @@
+/*
+ * async.h: Asynchronous function calls for boot performance
+ *
+ * (C) Copyright 2009 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+typedef u64 async_cookie_t;
+typedef void (async_func_ptr) (void *data, async_cookie_t cookie);
+
+extern async_cookie_t async_schedule(async_func_ptr *ptr, void *data);
+extern async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *list);
+extern void async_synchronize_full(void);
+extern void async_synchronize_full_special(struct list_head *list);
+extern void async_synchronize_cookie(async_cookie_t cookie);
+extern void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *list);
+
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 5efca73b39f9..708105e163df 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/initrd.h>
+#include <linux/async.h>
 
 #include <linux/nfs_fs.h>
 #include <linux/nfs_fs_sb.h>
@@ -372,6 +373,7 @@ void __init prepare_namespace(void)
 	/* wait for the known devices to complete their probing */
 	while (driver_probe_done() != 0)
 		msleep(100);
+	async_synchronize_full();
 
 	md_run_setup();
 
diff --git a/init/main.c b/init/main.c
index b5a892c68375..f66715d8a853 100644
--- a/init/main.c
+++ b/init/main.c
@@ -62,6 +62,7 @@
 #include <linux/signal.h>
 #include <linux/idr.h>
 #include <linux/ftrace.h>
+#include <linux/async.h>
 #include <trace/boot.h>
 
 #include <asm/io.h>
@@ -684,7 +685,7 @@ asmlinkage void __init start_kernel(void)
 	rest_init();
 }
 
-static int initcall_debug;
+int initcall_debug;
 core_param(initcall_debug, initcall_debug, bool, 0644);
 
 int do_one_initcall(initcall_t fn)
@@ -785,6 +786,8 @@ static void run_init_process(char *init_filename)
  */
 static noinline int init_post(void)
 {
+	/* need to finish all async __init code before freeing the memory */
+	async_synchronize_full();
 	free_initmem();
 	unlock_kernel();
 	mark_rodata_ro();
diff --git a/kernel/Makefile b/kernel/Makefile
index e1c5bf3365c0..2921d90ce32f 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,8 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    rcupdate.o extable.o params.o posix-timers.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
-	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o
+	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
+	    async.o
 
 ifdef CONFIG_FUNCTION_TRACER
 # Do not trace debug files and internal ftrace files
diff --git a/kernel/async.c b/kernel/async.c
new file mode 100644
index 000000000000..afaa8a653d5a
--- /dev/null
+++ b/kernel/async.c
@@ -0,0 +1,321 @@
+/*
+ * async.c: Asynchronous function calls for boot performance
+ *
+ * (C) Copyright 2009 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+
+/*
+
+Goals and Theory of Operation
+
+The primary goal of this feature is to reduce the kernel boot time,
+by doing various independent hardware delays and discovery operations
+decoupled and not strictly serialized.
+
+More specifically, the asynchronous function call concept allows
+certain operations (primarily during system boot) to happen
+asynchronously, out of order, while these operations still
+have their externally visible parts happen sequentially and in-order.
+(not unlike how out-of-order CPUs retire their instructions in order)
+
+Key to the asynchronous function call implementation is the concept of
+a "sequence cookie" (which, although it has an abstracted type, can be
+thought of as a monotonically incrementing number).
+
+The async core will assign each scheduled event such a sequence cookie and
+pass this to the called functions.
+
+The asynchronously called function should before doing a globally visible
+operation, such as registering device numbers, call the
+async_synchronize_cookie() function and pass in its own cookie. The
+async_synchronize_cookie() function will make sure that all asynchronous
+operations that were scheduled prior to the operation corresponding with the
+cookie have completed.
+
+Subsystem/driver initialization code that scheduled asynchronous probe
+functions, but which shares global resources with other drivers/subsystems
+that do not use the asynchronous call feature, need to do a full
+synchronization with the async_synchronize_full() function, before returning
+from their init function. This is to maintain strict ordering between the
+asynchronous and synchronous parts of the kernel.
+
+*/
+
+#include <linux/async.h>
+#include <linux/module.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <asm/atomic.h>
+
+static async_cookie_t next_cookie = 1;
+
+#define MAX_THREADS	256
+#define MAX_WORK	32768
+
+static LIST_HEAD(async_pending);
+static LIST_HEAD(async_running);
+static DEFINE_SPINLOCK(async_lock);
+
+struct async_entry {
+	struct list_head list;
+	async_cookie_t   cookie;
+	async_func_ptr	 *func;
+	void             *data;
+	struct list_head *running;
+};
+
+static DECLARE_WAIT_QUEUE_HEAD(async_done);
+static DECLARE_WAIT_QUEUE_HEAD(async_new);
+
+static atomic_t entry_count;
+static atomic_t thread_count;
+
+extern int initcall_debug;
+
+
+/*
+ * MUST be called with the lock held!
+ */
+static async_cookie_t  __lowest_in_progress(struct list_head *running)
+{
+	struct async_entry *entry;
+	if (!list_empty(&async_pending)) {
+		entry = list_first_entry(&async_pending,
+			struct async_entry, list);
+		return entry->cookie;
+	} else if (!list_empty(running)) {
+		entry = list_first_entry(running,
+			struct async_entry, list);
+		return entry->cookie;
+	} else {
+		/* nothing in progress... next_cookie is "infinity" */
+		return next_cookie;
+	}
+
+}
+/*
+ * pick the first pending entry and run it
+ */
+static void run_one_entry(void)
+{
+	unsigned long flags;
+	struct async_entry *entry;
+	ktime_t calltime, delta, rettime;
+
+	/* 1) pick one task from the pending queue */
+
+	spin_lock_irqsave(&async_lock, flags);
+	if (list_empty(&async_pending))
+		goto out;
+	entry = list_first_entry(&async_pending, struct async_entry, list);
+
+	/* 2) move it to the running queue */
+	list_del(&entry->list);
+	list_add_tail(&entry->list, &async_running);
+	spin_unlock_irqrestore(&async_lock, flags);
+
+	/* 3) run it (and print duration)*/
+	if (initcall_debug) {
+		printk("calling  %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current));
+		calltime = ktime_get();
+	}
+	entry->func(entry->data, entry->cookie);
+	if (initcall_debug) {
+		rettime = ktime_get();
+		delta = ktime_sub(rettime, calltime);
+		printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie,
+			entry->func, ktime_to_ns(delta) >> 10);
+	}
+
+	/* 4) remove it from the running queue */
+	spin_lock_irqsave(&async_lock, flags);
+	list_del(&entry->list);
+
+	/* 5) free the entry  */
+	kfree(entry);
+	atomic_dec(&entry_count);
+
+	spin_unlock_irqrestore(&async_lock, flags);
+
+	/* 6) wake up any waiters. */
+	wake_up(&async_done);
+	return;
+
+out:
+	spin_unlock_irqrestore(&async_lock, flags);
+}
+
+
+static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct list_head *running)
+{
+	struct async_entry *entry;
+	unsigned long flags;
+	async_cookie_t newcookie;
+	
+
+	/* allow irq-off callers */
+	entry = kzalloc(sizeof(struct async_entry), GFP_ATOMIC);
+
+	/*
+	 * If we're out of memory or if there's too much work
+	 * pending already, we execute synchronously.
+	 */
+	if (!entry || atomic_read(&entry_count) > MAX_WORK) {
+		kfree(entry);
+		spin_lock_irqsave(&async_lock, flags);
+		newcookie = next_cookie++;
+		spin_unlock_irqrestore(&async_lock, flags);
+
+		/* low on memory.. run synchronously */
+		ptr(data, newcookie);
+		return newcookie;
+	}
+	entry->func = ptr;
+	entry->data = data;
+	entry->running = running;
+
+	spin_lock_irqsave(&async_lock, flags);
+	newcookie = entry->cookie = next_cookie++;
+	list_add_tail(&entry->list, &async_pending);
+	atomic_inc(&entry_count);
+	spin_unlock_irqrestore(&async_lock, flags);
+	wake_up(&async_new);
+	return newcookie;
+}
+
+async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
+{
+	return __async_schedule(ptr, data, &async_pending);
+}
+EXPORT_SYMBOL_GPL(async_schedule);
+
+async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *running)
+{
+	return __async_schedule(ptr, data, running);
+}
+EXPORT_SYMBOL_GPL(async_schedule_special);
+
+void async_synchronize_full(void)
+{
+	async_synchronize_cookie(next_cookie);
+}
+EXPORT_SYMBOL_GPL(async_synchronize_full);
+
+void async_synchronize_full_special(struct list_head *list)
+{
+	async_synchronize_cookie_special(next_cookie, list);
+}
+EXPORT_SYMBOL_GPL(async_synchronize_full_special);
+
+void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *running)
+{
+	ktime_t starttime, delta, endtime;
+
+	if (initcall_debug) {
+		printk("async_waiting @ %i\n", task_pid_nr(current));
+		starttime = ktime_get();
+	}
+
+	wait_event(async_done, __lowest_in_progress(running) >= cookie);
+
+	if (initcall_debug) {
+		endtime = ktime_get();
+		delta = ktime_sub(endtime, starttime);
+
+		printk("async_continuing @ %i after %lli usec\n",
+			task_pid_nr(current), ktime_to_ns(delta) >> 10);
+	}
+}
+EXPORT_SYMBOL_GPL(async_synchronize_cookie_special);
+
+void async_synchronize_cookie(async_cookie_t cookie)
+{
+	async_synchronize_cookie_special(cookie, &async_running);
+}
+EXPORT_SYMBOL_GPL(async_synchronize_cookie);
+
+
+static int async_thread(void *unused)
+{
+	DECLARE_WAITQUEUE(wq, current);
+	add_wait_queue(&async_new, &wq);
+
+	while (!kthread_should_stop()) {
+		int ret = HZ;
+		set_current_state(TASK_INTERRUPTIBLE);
+		/*
+		 * check the list head without lock.. false positives
+		 * are dealt with inside run_one_entry() while holding
+		 * the lock.
+		 */
+		rmb();
+		if (!list_empty(&async_pending))
+			run_one_entry();
+		else
+			ret = schedule_timeout(HZ);
+
+		if (ret == 0) {
+			/*
+			 * we timed out, this means we as thread are redundant.
+			 * we sign off and die, but we to avoid any races there
+			 * is a last-straw check to see if work snuck in.
+			 */
+			atomic_dec(&thread_count);
+			wmb(); /* manager must see our departure first */
+			if (list_empty(&async_pending))
+				break;
+			/*
+			 * woops work came in between us timing out and us
+			 * signing off; we need to stay alive and keep working.
+			 */
+			atomic_inc(&thread_count);
+		}
+	}
+	remove_wait_queue(&async_new, &wq);
+
+	return 0;
+}
+
+static int async_manager_thread(void *unused)
+{
+	DECLARE_WAITQUEUE(wq, current);
+	add_wait_queue(&async_new, &wq);
+
+	while (!kthread_should_stop()) {
+		int tc, ec;
+
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		tc = atomic_read(&thread_count);
+		rmb();
+		ec = atomic_read(&entry_count);
+
+		while (tc < ec && tc < MAX_THREADS) {
+			kthread_run(async_thread, NULL, "async/%i", tc);
+			atomic_inc(&thread_count);
+			tc++;
+		}
+
+		schedule();
+	}
+	remove_wait_queue(&async_new, &wq);
+
+	return 0;
+}
+
+static int __init async_init(void)
+{
+	kthread_run(async_manager_thread, NULL, "async/mgr");
+	return 0;
+}
+
+core_initcall(async_init);
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index cc0f7321b8ce..1de9700f416e 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
+#include <linux/async.h>
 
 #include "internals.h"
 
@@ -34,6 +35,10 @@ unsigned long probe_irq_on(void)
 	unsigned int status;
 	int i;
 
+	/*
+	 * quiesce the kernel, or at least the asynchronous portion
+	 */
+	async_synchronize_full();
 	mutex_lock(&probing_active);
 	/*
 	 * something may have generated an irq long ago and we want to
diff --git a/kernel/module.c b/kernel/module.c
index 496dcb57b608..c9332c90d5a0 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -50,6 +50,7 @@
 #include <asm/sections.h>
 #include <linux/tracepoint.h>
 #include <linux/ftrace.h>
+#include <linux/async.h>
 
 #if 0
 #define DEBUGP printk
@@ -816,6 +817,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
 		mod->exit();
 	blocking_notifier_call_chain(&module_notify_list,
 				     MODULE_STATE_GOING, mod);
+	async_synchronize_full();
 	mutex_lock(&module_mutex);
 	/* Store the name of the last unloaded module for diagnostic purposes */
 	strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
-- 
cgit v1.2.3


From efaee192063a54749c56b7383803e16fe553630e Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Tue, 6 Jan 2009 07:20:54 -0800
Subject: async: make the final inode deletion an asynchronous event

this makes "rm -rf" on a (names cached) kernel tree go from
11.6 to 8.6 seconds on an ext3 filesystem

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
---
 fs/inode.c         | 20 +++++++++++++-------
 fs/super.c         | 10 ++++++++++
 include/linux/fs.h |  5 +++++
 3 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 7a6e8c2ff7b1..0013ac1af8e7 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -22,6 +22,7 @@
 #include <linux/bootmem.h>
 #include <linux/inotify.h>
 #include <linux/mount.h>
+#include <linux/async.h>
 
 /*
  * This is needed for the following functions:
@@ -1138,16 +1139,11 @@ EXPORT_SYMBOL(remove_inode_hash);
  * I_FREEING is set so that no-one will take a new reference to the inode while
  * it is being deleted.
  */
-void generic_delete_inode(struct inode *inode)
+static void generic_delete_inode_async(void *data, async_cookie_t cookie)
 {
+	struct inode *inode = data;
 	const struct super_operations *op = inode->i_sb->s_op;
 
-	list_del_init(&inode->i_list);
-	list_del_init(&inode->i_sb_list);
-	inode->i_state |= I_FREEING;
-	inodes_stat.nr_inodes--;
-	spin_unlock(&inode_lock);
-
 	security_inode_delete(inode);
 
 	if (op->delete_inode) {
@@ -1171,6 +1167,16 @@ void generic_delete_inode(struct inode *inode)
 	destroy_inode(inode);
 }
 
+void generic_delete_inode(struct inode *inode)
+{
+	list_del_init(&inode->i_list);
+	list_del_init(&inode->i_sb_list);
+	inode->i_state |= I_FREEING;
+	inodes_stat.nr_inodes--;
+	spin_unlock(&inode_lock);
+	async_schedule_special(generic_delete_inode_async, inode, &inode->i_sb->s_async_list);
+}
+
 EXPORT_SYMBOL(generic_delete_inode);
 
 static void generic_forget_inode(struct inode *inode)
diff --git a/fs/super.c b/fs/super.c
index ddba069d7a99..cb20744ec789 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -38,6 +38,7 @@
 #include <linux/kobject.h>
 #include <linux/mutex.h>
 #include <linux/file.h>
+#include <linux/async.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -71,6 +72,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
 		INIT_HLIST_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
 		INIT_LIST_HEAD(&s->s_dentry_lru);
+		INIT_LIST_HEAD(&s->s_async_list);
 		init_rwsem(&s->s_umount);
 		mutex_init(&s->s_lock);
 		lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -289,11 +291,18 @@ void generic_shutdown_super(struct super_block *sb)
 {
 	const struct super_operations *sop = sb->s_op;
 
+
 	if (sb->s_root) {
 		shrink_dcache_for_umount(sb);
 		fsync_super(sb);
 		lock_super(sb);
 		sb->s_flags &= ~MS_ACTIVE;
+
+		/*
+		 * wait for asynchronous fs operations to finish before going further
+		 */
+		async_synchronize_full_special(&sb->s_async_list);
+
 		/* bad name - it should be evict_inodes() */
 		invalidate_inodes(sb);
 		lock_kernel();
@@ -449,6 +458,7 @@ void sync_filesystems(int wait)
 		if (sb->s_flags & MS_RDONLY)
 			continue;
 		sb->s_need_sync_fs = 1;
+		async_synchronize_full_special(&sb->s_async_list);
 	}
 
 restart:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7eba77f666e..e38a64d71eff 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1184,6 +1184,11 @@ struct super_block {
 	 * generic_show_options()
 	 */
 	char *s_options;
+
+	/*
+	 * storage for asynchronous operations
+	 */
+	struct list_head s_async_list;
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);
-- 
cgit v1.2.3


From b92a78e582b1a45649143dc86e526f5824092478 Mon Sep 17 00:00:00 2001
From: Rodolfo Giometti <giometti@linux.it>
Date: Thu, 23 Oct 2008 10:08:07 +0200
Subject: usb host: Oxford OXU210HP HCD driver.

This driver implements the support for Oxford OXU210HP USB high-speed host,
no peripheral nor OTG.

Signed-off-by: Rodolfo Giometti <giometti@linux.it>
Cc: Kan Liu <kan.k.liu@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/host/Kconfig        |   13 +
 drivers/usb/host/Makefile       |    1 +
 drivers/usb/host/oxu210hp-hcd.c | 3985 +++++++++++++++++++++++++++++++++++++++
 drivers/usb/host/oxu210hp.h     |  447 +++++
 include/linux/oxu210hp.h        |    7 +
 5 files changed, 4453 insertions(+)
 create mode 100644 drivers/usb/host/oxu210hp-hcd.c
 create mode 100644 drivers/usb/host/oxu210hp.h
 create mode 100644 include/linux/oxu210hp.h

(limited to 'include/linux')

diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index f3a75a929e0a..2b476b6b3d4d 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -96,6 +96,19 @@ config USB_EHCI_HCD_PPC_OF
 	  Enables support for the USB controller present on the PowerPC
 	  OpenFirmware platform bus.
 
+config USB_OXU210HP_HCD
+	tristate "OXU210HP HCD support"
+	depends on USB
+	---help---
+	  The OXU210HP is an USB host/OTG/device controller. Enable this
+	  option if your board has this chip. If unsure, say N.
+
+	  This driver does not support isochronous transfers and doesn't
+	  implement OTG nor USB device controllers.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called oxu210hp-hcd.
+
 config USB_ISP116X_HCD
 	tristate "ISP116X HCD support"
 	depends on USB
diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile
index 23be22224044..e5f3f20787e4 100644
--- a/drivers/usb/host/Makefile
+++ b/drivers/usb/host/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_USB_WHCI_HCD)	+= whci/
 obj-$(CONFIG_PCI)		+= pci-quirks.o
 
 obj-$(CONFIG_USB_EHCI_HCD)	+= ehci-hcd.o
+obj-$(CONFIG_USB_OXU210HP_HCD)	+= oxu210hp-hcd.o
 obj-$(CONFIG_USB_ISP116X_HCD)	+= isp116x-hcd.o
 obj-$(CONFIG_USB_OHCI_HCD)	+= ohci-hcd.o
 obj-$(CONFIG_USB_UHCI_HCD)	+= uhci-hcd.o
diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c
new file mode 100644
index 000000000000..251123c29d81
--- /dev/null
+++ b/drivers/usb/host/oxu210hp-hcd.c
@@ -0,0 +1,3985 @@
+/*
+ * Copyright (c) 2008 Rodolfo Giometti <giometti@linux.it>
+ * Copyright (c) 2008 Eurotech S.p.A. <info@eurtech.it>
+ *
+ * This code is *strongly* based on EHCI-HCD code by David Brownell since
+ * the chip is a quasi-EHCI compatible.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/dmapool.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+#include <linux/interrupt.h>
+#include <linux/reboot.h>
+#include <linux/usb.h>
+#include <linux/moduleparam.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+
+#include "../core/hcd.h"
+
+#include <asm/irq.h>
+#include <asm/system.h>
+#include <asm/unaligned.h>
+
+#include <linux/irq.h>
+#include <linux/platform_device.h>
+
+#include "oxu210hp.h"
+
+#define DRIVER_VERSION "0.0.50"
+
+/*
+ * Main defines
+ */
+
+#define oxu_dbg(oxu, fmt, args...) \
+		dev_dbg(oxu_to_hcd(oxu)->self.controller , fmt , ## args)
+#define oxu_err(oxu, fmt, args...) \
+		dev_err(oxu_to_hcd(oxu)->self.controller , fmt , ## args)
+#define oxu_info(oxu, fmt, args...) \
+		dev_info(oxu_to_hcd(oxu)->self.controller , fmt , ## args)
+
+static inline struct usb_hcd *oxu_to_hcd(struct oxu_hcd *oxu)
+{
+	return container_of((void *) oxu, struct usb_hcd, hcd_priv);
+}
+
+static inline struct oxu_hcd *hcd_to_oxu(struct usb_hcd *hcd)
+{
+	return (struct oxu_hcd *) (hcd->hcd_priv);
+}
+
+/*
+ * Debug stuff
+ */
+
+#undef OXU_URB_TRACE
+#undef OXU_VERBOSE_DEBUG
+
+#ifdef OXU_VERBOSE_DEBUG
+#define oxu_vdbg			oxu_dbg
+#else
+#define oxu_vdbg(oxu, fmt, args...)	/* Nop */
+#endif
+
+#ifdef DEBUG
+
+static int __attribute__((__unused__))
+dbg_status_buf(char *buf, unsigned len, const char *label, u32 status)
+{
+	return scnprintf(buf, len, "%s%sstatus %04x%s%s%s%s%s%s%s%s%s%s",
+		label, label[0] ? " " : "", status,
+		(status & STS_ASS) ? " Async" : "",
+		(status & STS_PSS) ? " Periodic" : "",
+		(status & STS_RECL) ? " Recl" : "",
+		(status & STS_HALT) ? " Halt" : "",
+		(status & STS_IAA) ? " IAA" : "",
+		(status & STS_FATAL) ? " FATAL" : "",
+		(status & STS_FLR) ? " FLR" : "",
+		(status & STS_PCD) ? " PCD" : "",
+		(status & STS_ERR) ? " ERR" : "",
+		(status & STS_INT) ? " INT" : ""
+		);
+}
+
+static int __attribute__((__unused__))
+dbg_intr_buf(char *buf, unsigned len, const char *label, u32 enable)
+{
+	return scnprintf(buf, len, "%s%sintrenable %02x%s%s%s%s%s%s",
+		label, label[0] ? " " : "", enable,
+		(enable & STS_IAA) ? " IAA" : "",
+		(enable & STS_FATAL) ? " FATAL" : "",
+		(enable & STS_FLR) ? " FLR" : "",
+		(enable & STS_PCD) ? " PCD" : "",
+		(enable & STS_ERR) ? " ERR" : "",
+		(enable & STS_INT) ? " INT" : ""
+		);
+}
+
+static const char *const fls_strings[] =
+    { "1024", "512", "256", "??" };
+
+static int dbg_command_buf(char *buf, unsigned len,
+				const char *label, u32 command)
+{
+	return scnprintf(buf, len,
+		"%s%scommand %06x %s=%d ithresh=%d%s%s%s%s period=%s%s %s",
+		label, label[0] ? " " : "", command,
+		(command & CMD_PARK) ? "park" : "(park)",
+		CMD_PARK_CNT(command),
+		(command >> 16) & 0x3f,
+		(command & CMD_LRESET) ? " LReset" : "",
+		(command & CMD_IAAD) ? " IAAD" : "",
+		(command & CMD_ASE) ? " Async" : "",
+		(command & CMD_PSE) ? " Periodic" : "",
+		fls_strings[(command >> 2) & 0x3],
+		(command & CMD_RESET) ? " Reset" : "",
+		(command & CMD_RUN) ? "RUN" : "HALT"
+		);
+}
+
+static int dbg_port_buf(char *buf, unsigned len, const char *label,
+				int port, u32 status)
+{
+	char	*sig;
+
+	/* signaling state */
+	switch (status & (3 << 10)) {
+	case 0 << 10:
+		sig = "se0";
+		break;
+	case 1 << 10:
+		sig = "k";	/* low speed */
+		break;
+	case 2 << 10:
+		sig = "j";
+		break;
+	default:
+		sig = "?";
+		break;
+	}
+
+	return scnprintf(buf, len,
+		"%s%sport %d status %06x%s%s sig=%s%s%s%s%s%s%s%s%s%s",
+		label, label[0] ? " " : "", port, status,
+		(status & PORT_POWER) ? " POWER" : "",
+		(status & PORT_OWNER) ? " OWNER" : "",
+		sig,
+		(status & PORT_RESET) ? " RESET" : "",
+		(status & PORT_SUSPEND) ? " SUSPEND" : "",
+		(status & PORT_RESUME) ? " RESUME" : "",
+		(status & PORT_OCC) ? " OCC" : "",
+		(status & PORT_OC) ? " OC" : "",
+		(status & PORT_PEC) ? " PEC" : "",
+		(status & PORT_PE) ? " PE" : "",
+		(status & PORT_CSC) ? " CSC" : "",
+		(status & PORT_CONNECT) ? " CONNECT" : ""
+	    );
+}
+
+#else
+
+static inline int __attribute__((__unused__))
+dbg_status_buf(char *buf, unsigned len, const char *label, u32 status)
+{ return 0; }
+
+static inline int __attribute__((__unused__))
+dbg_command_buf(char *buf, unsigned len, const char *label, u32 command)
+{ return 0; }
+
+static inline int __attribute__((__unused__))
+dbg_intr_buf(char *buf, unsigned len, const char *label, u32 enable)
+{ return 0; }
+
+static inline int __attribute__((__unused__))
+dbg_port_buf(char *buf, unsigned len, const char *label, int port, u32 status)
+{ return 0; }
+
+#endif /* DEBUG */
+
+/* functions have the "wrong" filename when they're output... */
+#define dbg_status(oxu, label, status) { \
+	char _buf[80]; \
+	dbg_status_buf(_buf, sizeof _buf, label, status); \
+	oxu_dbg(oxu, "%s\n", _buf); \
+}
+
+#define dbg_cmd(oxu, label, command) { \
+	char _buf[80]; \
+	dbg_command_buf(_buf, sizeof _buf, label, command); \
+	oxu_dbg(oxu, "%s\n", _buf); \
+}
+
+#define dbg_port(oxu, label, port, status) { \
+	char _buf[80]; \
+	dbg_port_buf(_buf, sizeof _buf, label, port, status); \
+	oxu_dbg(oxu, "%s\n", _buf); \
+}
+
+/*
+ * Module parameters
+ */
+
+/* Initial IRQ latency: faster than hw default */
+static int log2_irq_thresh;			/* 0 to 6 */
+module_param(log2_irq_thresh, int, S_IRUGO);
+MODULE_PARM_DESC(log2_irq_thresh, "log2 IRQ latency, 1-64 microframes");
+
+/* Initial park setting: slower than hw default */
+static unsigned park;
+module_param(park, uint, S_IRUGO);
+MODULE_PARM_DESC(park, "park setting; 1-3 back-to-back async packets");
+
+/* For flakey hardware, ignore overcurrent indicators */
+static int ignore_oc;
+module_param(ignore_oc, bool, S_IRUGO);
+MODULE_PARM_DESC(ignore_oc, "ignore bogus hardware overcurrent indications");
+
+
+static void ehci_work(struct oxu_hcd *oxu);
+static int oxu_hub_control(struct usb_hcd *hcd,
+				u16 typeReq, u16 wValue, u16 wIndex,
+				char *buf, u16 wLength);
+
+/*
+ * Local functions
+ */
+
+/* Low level read/write registers functions */
+static inline u32 oxu_readl(void *base, u32 reg)
+{
+	return readl(base + reg);
+}
+
+static inline void oxu_writel(void *base, u32 reg, u32 val)
+{
+	writel(val, base + reg);
+}
+
+static inline void timer_action_done(struct oxu_hcd *oxu,
+					enum ehci_timer_action action)
+{
+	clear_bit(action, &oxu->actions);
+}
+
+static inline void timer_action(struct oxu_hcd *oxu,
+					enum ehci_timer_action action)
+{
+	if (!test_and_set_bit(action, &oxu->actions)) {
+		unsigned long t;
+
+		switch (action) {
+		case TIMER_IAA_WATCHDOG:
+			t = EHCI_IAA_JIFFIES;
+			break;
+		case TIMER_IO_WATCHDOG:
+			t = EHCI_IO_JIFFIES;
+			break;
+		case TIMER_ASYNC_OFF:
+			t = EHCI_ASYNC_JIFFIES;
+			break;
+		case TIMER_ASYNC_SHRINK:
+		default:
+			t = EHCI_SHRINK_JIFFIES;
+			break;
+		}
+		t += jiffies;
+		/* all timings except IAA watchdog can be overridden.
+		 * async queue SHRINK often precedes IAA.  while it's ready
+		 * to go OFF neither can matter, and afterwards the IO
+		 * watchdog stops unless there's still periodic traffic.
+		 */
+		if (action != TIMER_IAA_WATCHDOG
+				&& t > oxu->watchdog.expires
+				&& timer_pending(&oxu->watchdog))
+			return;
+		mod_timer(&oxu->watchdog, t);
+	}
+}
+
+/*
+ * handshake - spin reading hc until handshake completes or fails
+ * @ptr: address of hc register to be read
+ * @mask: bits to look at in result of read
+ * @done: value of those bits when handshake succeeds
+ * @usec: timeout in microseconds
+ *
+ * Returns negative errno, or zero on success
+ *
+ * Success happens when the "mask" bits have the specified value (hardware
+ * handshake done).  There are two failure modes:  "usec" have passed (major
+ * hardware flakeout), or the register reads as all-ones (hardware removed).
+ *
+ * That last failure should_only happen in cases like physical cardbus eject
+ * before driver shutdown. But it also seems to be caused by bugs in cardbus
+ * bridge shutdown:  shutting down the bridge before the devices using it.
+ */
+static int handshake(struct oxu_hcd *oxu, void __iomem *ptr,
+					u32 mask, u32 done, int usec)
+{
+	u32 result;
+
+	do {
+		result = readl(ptr);
+		if (result == ~(u32)0)		/* card removed */
+			return -ENODEV;
+		result &= mask;
+		if (result == done)
+			return 0;
+		udelay(1);
+		usec--;
+	} while (usec > 0);
+	return -ETIMEDOUT;
+}
+
+/* Force HC to halt state from unknown (EHCI spec section 2.3) */
+static int ehci_halt(struct oxu_hcd *oxu)
+{
+	u32	temp = readl(&oxu->regs->status);
+
+	/* disable any irqs left enabled by previous code */
+	writel(0, &oxu->regs->intr_enable);
+
+	if ((temp & STS_HALT) != 0)
+		return 0;
+
+	temp = readl(&oxu->regs->command);
+	temp &= ~CMD_RUN;
+	writel(temp, &oxu->regs->command);
+	return handshake(oxu, &oxu->regs->status,
+			  STS_HALT, STS_HALT, 16 * 125);
+}
+
+/* Put TDI/ARC silicon into EHCI mode */
+static void tdi_reset(struct oxu_hcd *oxu)
+{
+	u32 __iomem *reg_ptr;
+	u32 tmp;
+
+	reg_ptr = (u32 __iomem *)(((u8 __iomem *)oxu->regs) + 0x68);
+	tmp = readl(reg_ptr);
+	tmp |= 0x3;
+	writel(tmp, reg_ptr);
+}
+
+/* Reset a non-running (STS_HALT == 1) controller */
+static int ehci_reset(struct oxu_hcd *oxu)
+{
+	int	retval;
+	u32	command = readl(&oxu->regs->command);
+
+	command |= CMD_RESET;
+	dbg_cmd(oxu, "reset", command);
+	writel(command, &oxu->regs->command);
+	oxu_to_hcd(oxu)->state = HC_STATE_HALT;
+	oxu->next_statechange = jiffies;
+	retval = handshake(oxu, &oxu->regs->command,
+			    CMD_RESET, 0, 250 * 1000);
+
+	if (retval)
+		return retval;
+
+	tdi_reset(oxu);
+
+	return retval;
+}
+
+/* Idle the controller (from running) */
+static void ehci_quiesce(struct oxu_hcd *oxu)
+{
+	u32	temp;
+
+#ifdef DEBUG
+	if (!HC_IS_RUNNING(oxu_to_hcd(oxu)->state))
+		BUG();
+#endif
+
+	/* wait for any schedule enables/disables to take effect */
+	temp = readl(&oxu->regs->command) << 10;
+	temp &= STS_ASS | STS_PSS;
+	if (handshake(oxu, &oxu->regs->status, STS_ASS | STS_PSS,
+				temp, 16 * 125) != 0) {
+		oxu_to_hcd(oxu)->state = HC_STATE_HALT;
+		return;
+	}
+
+	/* then disable anything that's still active */
+	temp = readl(&oxu->regs->command);
+	temp &= ~(CMD_ASE | CMD_IAAD | CMD_PSE);
+	writel(temp, &oxu->regs->command);
+
+	/* hardware can take 16 microframes to turn off ... */
+	if (handshake(oxu, &oxu->regs->status, STS_ASS | STS_PSS,
+				0, 16 * 125) != 0) {
+		oxu_to_hcd(oxu)->state = HC_STATE_HALT;
+		return;
+	}
+}
+
+static int check_reset_complete(struct oxu_hcd *oxu, int index,
+				u32 __iomem *status_reg, int port_status)
+{
+	if (!(port_status & PORT_CONNECT)) {
+		oxu->reset_done[index] = 0;
+		return port_status;
+	}
+
+	/* if reset finished and it's still not enabled -- handoff */
+	if (!(port_status & PORT_PE)) {
+		oxu_dbg(oxu, "Failed to enable port %d on root hub TT\n",
+				index+1);
+		return port_status;
+	} else
+		oxu_dbg(oxu, "port %d high speed\n", index + 1);
+
+	return port_status;
+}
+
+static void ehci_hub_descriptor(struct oxu_hcd *oxu,
+				struct usb_hub_descriptor *desc)
+{
+	int ports = HCS_N_PORTS(oxu->hcs_params);
+	u16 temp;
+
+	desc->bDescriptorType = 0x29;
+	desc->bPwrOn2PwrGood = 10;	/* oxu 1.0, 2.3.9 says 20ms max */
+	desc->bHubContrCurrent = 0;
+
+	desc->bNbrPorts = ports;
+	temp = 1 + (ports / 8);
+	desc->bDescLength = 7 + 2 * temp;
+
+	/* two bitmaps:  ports removable, and usb 1.0 legacy PortPwrCtrlMask */
+	memset(&desc->bitmap[0], 0, temp);
+	memset(&desc->bitmap[temp], 0xff, temp);
+
+	temp = 0x0008;			/* per-port overcurrent reporting */
+	if (HCS_PPC(oxu->hcs_params))
+		temp |= 0x0001;		/* per-port power control */
+	else
+		temp |= 0x0002;		/* no power switching */
+	desc->wHubCharacteristics = (__force __u16)cpu_to_le16(temp);
+}
+
+
+/* Allocate an OXU210HP on-chip memory data buffer
+ *
+ * An on-chip memory data buffer is required for each OXU210HP USB transfer.
+ * Each transfer descriptor has one or more on-chip memory data buffers.
+ *
+ * Data buffers are allocated from a fix sized pool of data blocks.
+ * To minimise fragmentation and give reasonable memory utlisation,
+ * data buffers are allocated with sizes the power of 2 multiples of
+ * the block size, starting on an address a multiple of the allocated size.
+ *
+ * FIXME: callers of this function require a buffer to be allocated for
+ * len=0. This is a waste of on-chip memory and should be fix. Then this
+ * function should be changed to not allocate a buffer for len=0.
+ */
+static int oxu_buf_alloc(struct oxu_hcd *oxu, struct ehci_qtd *qtd, int len)
+{
+	int n_blocks;	/* minium blocks needed to hold len */
+	int a_blocks;	/* blocks allocated */
+	int i, j;
+
+	/* Don't allocte bigger than supported */
+	if (len > BUFFER_SIZE * BUFFER_NUM) {
+		oxu_err(oxu, "buffer too big (%d)\n", len);
+		return -ENOMEM;
+	}
+
+	spin_lock(&oxu->mem_lock);
+
+	/* Number of blocks needed to hold len */
+	n_blocks = (len + BUFFER_SIZE - 1) / BUFFER_SIZE;
+
+	/* Round the number of blocks up to the power of 2 */
+	for (a_blocks = 1; a_blocks < n_blocks; a_blocks <<= 1)
+		;
+
+	/* Find a suitable available data buffer */
+	for (i = 0; i < BUFFER_NUM;
+			i += max(a_blocks, (int)oxu->db_used[i])) {
+
+		/* Check all the required blocks are available */
+		for (j = 0; j < a_blocks; j++)
+			if (oxu->db_used[i + j])
+				break;
+
+		if (j != a_blocks)
+			continue;
+
+		/* Allocate blocks found! */
+		qtd->buffer = (void *) &oxu->mem->db_pool[i];
+		qtd->buffer_dma = virt_to_phys(qtd->buffer);
+
+		qtd->qtd_buffer_len = BUFFER_SIZE * a_blocks;
+		oxu->db_used[i] = a_blocks;
+
+		spin_unlock(&oxu->mem_lock);
+
+		return 0;
+	}
+
+	/* Failed */
+
+	spin_unlock(&oxu->mem_lock);
+
+	return -ENOMEM;
+}
+
+static void oxu_buf_free(struct oxu_hcd *oxu, struct ehci_qtd *qtd)
+{
+	int index;
+
+	spin_lock(&oxu->mem_lock);
+
+	index = (qtd->buffer - (void *) &oxu->mem->db_pool[0])
+							 / BUFFER_SIZE;
+	oxu->db_used[index] = 0;
+	qtd->qtd_buffer_len = 0;
+	qtd->buffer_dma = 0;
+	qtd->buffer = NULL;
+
+	spin_unlock(&oxu->mem_lock);
+
+	return;
+}
+
+static inline void ehci_qtd_init(struct ehci_qtd *qtd, dma_addr_t dma)
+{
+	memset(qtd, 0, sizeof *qtd);
+	qtd->qtd_dma = dma;
+	qtd->hw_token = cpu_to_le32(QTD_STS_HALT);
+	qtd->hw_next = EHCI_LIST_END;
+	qtd->hw_alt_next = EHCI_LIST_END;
+	INIT_LIST_HEAD(&qtd->qtd_list);
+}
+
+static inline void oxu_qtd_free(struct oxu_hcd *oxu, struct ehci_qtd *qtd)
+{
+	int index;
+
+	if (qtd->buffer)
+		oxu_buf_free(oxu, qtd);
+
+	spin_lock(&oxu->mem_lock);
+
+	index = qtd - &oxu->mem->qtd_pool[0];
+	oxu->qtd_used[index] = 0;
+
+	spin_unlock(&oxu->mem_lock);
+
+	return;
+}
+
+static struct ehci_qtd *ehci_qtd_alloc(struct oxu_hcd *oxu)
+{
+	int i;
+	struct ehci_qtd *qtd = NULL;
+
+	spin_lock(&oxu->mem_lock);
+
+	for (i = 0; i < QTD_NUM; i++)
+		if (!oxu->qtd_used[i])
+			break;
+
+	if (i < QTD_NUM) {
+		qtd = (struct ehci_qtd *) &oxu->mem->qtd_pool[i];
+		memset(qtd, 0, sizeof *qtd);
+
+		qtd->hw_token = cpu_to_le32(QTD_STS_HALT);
+		qtd->hw_next = EHCI_LIST_END;
+		qtd->hw_alt_next = EHCI_LIST_END;
+		INIT_LIST_HEAD(&qtd->qtd_list);
+
+		qtd->qtd_dma = virt_to_phys(qtd);
+
+		oxu->qtd_used[i] = 1;
+	}
+
+	spin_unlock(&oxu->mem_lock);
+
+	return qtd;
+}
+
+static void oxu_qh_free(struct oxu_hcd *oxu, struct ehci_qh *qh)
+{
+	int index;
+
+	spin_lock(&oxu->mem_lock);
+
+	index = qh - &oxu->mem->qh_pool[0];
+	oxu->qh_used[index] = 0;
+
+	spin_unlock(&oxu->mem_lock);
+
+	return;
+}
+
+static void qh_destroy(struct kref *kref)
+{
+	struct ehci_qh *qh = container_of(kref, struct ehci_qh, kref);
+	struct oxu_hcd *oxu = qh->oxu;
+
+	/* clean qtds first, and know this is not linked */
+	if (!list_empty(&qh->qtd_list) || qh->qh_next.ptr) {
+		oxu_dbg(oxu, "unused qh not empty!\n");
+		BUG();
+	}
+	if (qh->dummy)
+		oxu_qtd_free(oxu, qh->dummy);
+	oxu_qh_free(oxu, qh);
+}
+
+static struct ehci_qh *oxu_qh_alloc(struct oxu_hcd *oxu)
+{
+	int i;
+	struct ehci_qh *qh = NULL;
+
+	spin_lock(&oxu->mem_lock);
+
+	for (i = 0; i < QHEAD_NUM; i++)
+		if (!oxu->qh_used[i])
+			break;
+
+	if (i < QHEAD_NUM) {
+		qh = (struct ehci_qh *) &oxu->mem->qh_pool[i];
+		memset(qh, 0, sizeof *qh);
+
+		kref_init(&qh->kref);
+		qh->oxu = oxu;
+		qh->qh_dma = virt_to_phys(qh);
+		INIT_LIST_HEAD(&qh->qtd_list);
+
+		/* dummy td enables safe urb queuing */
+		qh->dummy = ehci_qtd_alloc(oxu);
+		if (qh->dummy == NULL) {
+			oxu_dbg(oxu, "no dummy td\n");
+			oxu->qh_used[i] = 0;
+
+			return NULL;
+		}
+
+		oxu->qh_used[i] = 1;
+	}
+
+	spin_unlock(&oxu->mem_lock);
+
+	return qh;
+}
+
+/* to share a qh (cpu threads, or hc) */
+static inline struct ehci_qh *qh_get(struct ehci_qh *qh)
+{
+	kref_get(&qh->kref);
+	return qh;
+}
+
+static inline void qh_put(struct ehci_qh *qh)
+{
+	kref_put(&qh->kref, qh_destroy);
+}
+
+static void oxu_murb_free(struct oxu_hcd *oxu, struct oxu_murb *murb)
+{
+	int index;
+
+	spin_lock(&oxu->mem_lock);
+
+	index = murb - &oxu->murb_pool[0];
+	oxu->murb_used[index] = 0;
+
+	spin_unlock(&oxu->mem_lock);
+
+	return;
+}
+
+static struct oxu_murb *oxu_murb_alloc(struct oxu_hcd *oxu)
+
+{
+	int i;
+	struct oxu_murb *murb = NULL;
+
+	spin_lock(&oxu->mem_lock);
+
+	for (i = 0; i < MURB_NUM; i++)
+		if (!oxu->murb_used[i])
+			break;
+
+	if (i < MURB_NUM) {
+		murb = &(oxu->murb_pool)[i];
+
+		oxu->murb_used[i] = 1;
+	}
+
+	spin_unlock(&oxu->mem_lock);
+
+	return murb;
+}
+
+/* The queue heads and transfer descriptors are managed from pools tied
+ * to each of the "per device" structures.
+ * This is the initialisation and cleanup code.
+ */
+static void ehci_mem_cleanup(struct oxu_hcd *oxu)
+{
+	kfree(oxu->murb_pool);
+	oxu->murb_pool = NULL;
+
+	if (oxu->async)
+		qh_put(oxu->async);
+	oxu->async = NULL;
+
+	del_timer(&oxu->urb_timer);
+
+	oxu->periodic = NULL;
+
+	/* shadow periodic table */
+	kfree(oxu->pshadow);
+	oxu->pshadow = NULL;
+}
+
+/* Remember to add cleanup code (above) if you add anything here.
+ */
+static int ehci_mem_init(struct oxu_hcd *oxu, gfp_t flags)
+{
+	int i;
+
+	for (i = 0; i < oxu->periodic_size; i++)
+		oxu->mem->frame_list[i] = EHCI_LIST_END;
+	for (i = 0; i < QHEAD_NUM; i++)
+		oxu->qh_used[i] = 0;
+	for (i = 0; i < QTD_NUM; i++)
+		oxu->qtd_used[i] = 0;
+
+	oxu->murb_pool = kcalloc(MURB_NUM, sizeof(struct oxu_murb), flags);
+	if (!oxu->murb_pool)
+		goto fail;
+
+	for (i = 0; i < MURB_NUM; i++)
+		oxu->murb_used[i] = 0;
+
+	oxu->async = oxu_qh_alloc(oxu);
+	if (!oxu->async)
+		goto fail;
+
+	oxu->periodic = (__le32 *) &oxu->mem->frame_list;
+	oxu->periodic_dma = virt_to_phys(oxu->periodic);
+
+	for (i = 0; i < oxu->periodic_size; i++)
+		oxu->periodic[i] = EHCI_LIST_END;
+
+	/* software shadow of hardware table */
+	oxu->pshadow = kcalloc(oxu->periodic_size, sizeof(void *), flags);
+	if (oxu->pshadow != NULL)
+		return 0;
+
+fail:
+	oxu_dbg(oxu, "couldn't init memory\n");
+	ehci_mem_cleanup(oxu);
+	return -ENOMEM;
+}
+
+/* Fill a qtd, returning how much of the buffer we were able to queue up.
+ */
+static int qtd_fill(struct ehci_qtd *qtd, dma_addr_t buf, size_t len,
+				int token, int maxpacket)
+{
+	int i, count;
+	u64 addr = buf;
+
+	/* one buffer entry per 4K ... first might be short or unaligned */
+	qtd->hw_buf[0] = cpu_to_le32((u32)addr);
+	qtd->hw_buf_hi[0] = cpu_to_le32((u32)(addr >> 32));
+	count = 0x1000 - (buf & 0x0fff);	/* rest of that page */
+	if (likely(len < count))		/* ... iff needed */
+		count = len;
+	else {
+		buf +=  0x1000;
+		buf &= ~0x0fff;
+
+		/* per-qtd limit: from 16K to 20K (best alignment) */
+		for (i = 1; count < len && i < 5; i++) {
+			addr = buf;
+			qtd->hw_buf[i] = cpu_to_le32((u32)addr);
+			qtd->hw_buf_hi[i] = cpu_to_le32((u32)(addr >> 32));
+			buf += 0x1000;
+			if ((count + 0x1000) < len)
+				count += 0x1000;
+			else
+				count = len;
+		}
+
+		/* short packets may only terminate transfers */
+		if (count != len)
+			count -= (count % maxpacket);
+	}
+	qtd->hw_token = cpu_to_le32((count << 16) | token);
+	qtd->length = count;
+
+	return count;
+}
+
+static inline void qh_update(struct oxu_hcd *oxu,
+				struct ehci_qh *qh, struct ehci_qtd *qtd)
+{
+	/* writes to an active overlay are unsafe */
+	BUG_ON(qh->qh_state != QH_STATE_IDLE);
+
+	qh->hw_qtd_next = QTD_NEXT(qtd->qtd_dma);
+	qh->hw_alt_next = EHCI_LIST_END;
+
+	/* Except for control endpoints, we make hardware maintain data
+	 * toggle (like OHCI) ... here (re)initialize the toggle in the QH,
+	 * and set the pseudo-toggle in udev. Only usb_clear_halt() will
+	 * ever clear it.
+	 */
+	if (!(qh->hw_info1 & cpu_to_le32(1 << 14))) {
+		unsigned	is_out, epnum;
+
+		is_out = !(qtd->hw_token & cpu_to_le32(1 << 8));
+		epnum = (le32_to_cpup(&qh->hw_info1) >> 8) & 0x0f;
+		if (unlikely(!usb_gettoggle(qh->dev, epnum, is_out))) {
+			qh->hw_token &= ~__constant_cpu_to_le32(QTD_TOGGLE);
+			usb_settoggle(qh->dev, epnum, is_out, 1);
+		}
+	}
+
+	/* HC must see latest qtd and qh data before we clear ACTIVE+HALT */
+	wmb();
+	qh->hw_token &= __constant_cpu_to_le32(QTD_TOGGLE | QTD_STS_PING);
+}
+
+/* If it weren't for a common silicon quirk (writing the dummy into the qh
+ * overlay, so qh->hw_token wrongly becomes inactive/halted), only fault
+ * recovery (including urb dequeue) would need software changes to a QH...
+ */
+static void qh_refresh(struct oxu_hcd *oxu, struct ehci_qh *qh)
+{
+	struct ehci_qtd *qtd;
+
+	if (list_empty(&qh->qtd_list))
+		qtd = qh->dummy;
+	else {
+		qtd = list_entry(qh->qtd_list.next,
+				struct ehci_qtd, qtd_list);
+		/* first qtd may already be partially processed */
+		if (cpu_to_le32(qtd->qtd_dma) == qh->hw_current)
+			qtd = NULL;
+	}
+
+	if (qtd)
+		qh_update(oxu, qh, qtd);
+}
+
+static void qtd_copy_status(struct oxu_hcd *oxu, struct urb *urb,
+				size_t length, u32 token)
+{
+	/* count IN/OUT bytes, not SETUP (even short packets) */
+	if (likely(QTD_PID(token) != 2))
+		urb->actual_length += length - QTD_LENGTH(token);
+
+	/* don't modify error codes */
+	if (unlikely(urb->status != -EINPROGRESS))
+		return;
+
+	/* force cleanup after short read; not always an error */
+	if (unlikely(IS_SHORT_READ(token)))
+		urb->status = -EREMOTEIO;
+
+	/* serious "can't proceed" faults reported by the hardware */
+	if (token & QTD_STS_HALT) {
+		if (token & QTD_STS_BABBLE) {
+			/* FIXME "must" disable babbling device's port too */
+			urb->status = -EOVERFLOW;
+		} else if (token & QTD_STS_MMF) {
+			/* fs/ls interrupt xfer missed the complete-split */
+			urb->status = -EPROTO;
+		} else if (token & QTD_STS_DBE) {
+			urb->status = (QTD_PID(token) == 1) /* IN ? */
+				? -ENOSR  /* hc couldn't read data */
+				: -ECOMM; /* hc couldn't write data */
+		} else if (token & QTD_STS_XACT) {
+			/* timeout, bad crc, wrong PID, etc; retried */
+			if (QTD_CERR(token))
+				urb->status = -EPIPE;
+			else {
+				oxu_dbg(oxu, "devpath %s ep%d%s 3strikes\n",
+					urb->dev->devpath,
+					usb_pipeendpoint(urb->pipe),
+					usb_pipein(urb->pipe) ? "in" : "out");
+				urb->status = -EPROTO;
+			}
+		/* CERR nonzero + no errors + halt --> stall */
+		} else if (QTD_CERR(token))
+			urb->status = -EPIPE;
+		else	/* unknown */
+			urb->status = -EPROTO;
+
+		oxu_vdbg(oxu, "dev%d ep%d%s qtd token %08x --> status %d\n",
+			usb_pipedevice(urb->pipe),
+			usb_pipeendpoint(urb->pipe),
+			usb_pipein(urb->pipe) ? "in" : "out",
+			token, urb->status);
+	}
+}
+
+static void ehci_urb_done(struct oxu_hcd *oxu, struct urb *urb)
+__releases(oxu->lock)
+__acquires(oxu->lock)
+{
+	if (likely(urb->hcpriv != NULL)) {
+		struct ehci_qh	*qh = (struct ehci_qh *) urb->hcpriv;
+
+		/* S-mask in a QH means it's an interrupt urb */
+		if ((qh->hw_info2 & __constant_cpu_to_le32(QH_SMASK)) != 0) {
+
+			/* ... update hc-wide periodic stats (for usbfs) */
+			oxu_to_hcd(oxu)->self.bandwidth_int_reqs--;
+		}
+		qh_put(qh);
+	}
+
+	urb->hcpriv = NULL;
+	switch (urb->status) {
+	case -EINPROGRESS:		/* success */
+		urb->status = 0;
+	default:			/* fault */
+		break;
+	case -EREMOTEIO:		/* fault or normal */
+		if (!(urb->transfer_flags & URB_SHORT_NOT_OK))
+			urb->status = 0;
+		break;
+	case -ECONNRESET:		/* canceled */
+	case -ENOENT:
+		break;
+	}
+
+#ifdef OXU_URB_TRACE
+	oxu_dbg(oxu, "%s %s urb %p ep%d%s status %d len %d/%d\n",
+		__func__, urb->dev->devpath, urb,
+		usb_pipeendpoint(urb->pipe),
+		usb_pipein(urb->pipe) ? "in" : "out",
+		urb->status,
+		urb->actual_length, urb->transfer_buffer_length);
+#endif
+
+	/* complete() can reenter this HCD */
+	spin_unlock(&oxu->lock);
+	usb_hcd_giveback_urb(oxu_to_hcd(oxu), urb, urb->status);
+	spin_lock(&oxu->lock);
+}
+
+static void start_unlink_async(struct oxu_hcd *oxu, struct ehci_qh *qh);
+static void unlink_async(struct oxu_hcd *oxu, struct ehci_qh *qh);
+
+static void intr_deschedule(struct oxu_hcd *oxu, struct ehci_qh *qh);
+static int qh_schedule(struct oxu_hcd *oxu, struct ehci_qh *qh);
+
+#define HALT_BIT __constant_cpu_to_le32(QTD_STS_HALT)
+
+/* Process and free completed qtds for a qh, returning URBs to drivers.
+ * Chases up to qh->hw_current.  Returns number of completions called,
+ * indicating how much "real" work we did.
+ */
+static unsigned qh_completions(struct oxu_hcd *oxu, struct ehci_qh *qh)
+{
+	struct ehci_qtd *last = NULL, *end = qh->dummy;
+	struct list_head *entry, *tmp;
+	int stopped;
+	unsigned count = 0;
+	int do_status = 0;
+	u8 state;
+	struct oxu_murb *murb = NULL;
+
+	if (unlikely(list_empty(&qh->qtd_list)))
+		return count;
+
+	/* completions (or tasks on other cpus) must never clobber HALT
+	 * till we've gone through and cleaned everything up, even when
+	 * they add urbs to this qh's queue or mark them for unlinking.
+	 *
+	 * NOTE:  unlinking expects to be done in queue order.
+	 */
+	state = qh->qh_state;
+	qh->qh_state = QH_STATE_COMPLETING;
+	stopped = (state == QH_STATE_IDLE);
+
+	/* remove de-activated QTDs from front of queue.
+	 * after faults (including short reads), cleanup this urb
+	 * then let the queue advance.
+	 * if queue is stopped, handles unlinks.
+	 */
+	list_for_each_safe(entry, tmp, &qh->qtd_list) {
+		struct ehci_qtd	*qtd;
+		struct urb *urb;
+		u32 token = 0;
+
+		qtd = list_entry(entry, struct ehci_qtd, qtd_list);
+		urb = qtd->urb;
+
+		/* Clean up any state from previous QTD ...*/
+		if (last) {
+			if (likely(last->urb != urb)) {
+				if (last->urb->complete == NULL) {
+					murb = (struct oxu_murb *) last->urb;
+					last->urb = murb->main;
+					if (murb->last) {
+						ehci_urb_done(oxu, last->urb);
+						count++;
+					}
+					oxu_murb_free(oxu, murb);
+				} else {
+					ehci_urb_done(oxu, last->urb);
+					count++;
+				}
+			}
+			oxu_qtd_free(oxu, last);
+			last = NULL;
+		}
+
+		/* ignore urbs submitted during completions we reported */
+		if (qtd == end)
+			break;
+
+		/* hardware copies qtd out of qh overlay */
+		rmb();
+		token = le32_to_cpu(qtd->hw_token);
+
+		/* always clean up qtds the hc de-activated */
+		if ((token & QTD_STS_ACTIVE) == 0) {
+
+			if ((token & QTD_STS_HALT) != 0) {
+				stopped = 1;
+
+			/* magic dummy for some short reads; qh won't advance.
+			 * that silicon quirk can kick in with this dummy too.
+			 */
+			} else if (IS_SHORT_READ(token) &&
+					!(qtd->hw_alt_next & EHCI_LIST_END)) {
+				stopped = 1;
+				goto halt;
+			}
+
+		/* stop scanning when we reach qtds the hc is using */
+		} else if (likely(!stopped &&
+				HC_IS_RUNNING(oxu_to_hcd(oxu)->state))) {
+			break;
+
+		} else {
+			stopped = 1;
+
+			if (unlikely(!HC_IS_RUNNING(oxu_to_hcd(oxu)->state)))
+				urb->status = -ESHUTDOWN;
+
+			/* ignore active urbs unless some previous qtd
+			 * for the urb faulted (including short read) or
+			 * its urb was canceled.  we may patch qh or qtds.
+			 */
+			if (likely(urb->status == -EINPROGRESS))
+				continue;
+
+			/* issue status after short control reads */
+			if (unlikely(do_status != 0)
+					&& QTD_PID(token) == 0 /* OUT */) {
+				do_status = 0;
+				continue;
+			}
+
+			/* token in overlay may be most current */
+			if (state == QH_STATE_IDLE
+					&& cpu_to_le32(qtd->qtd_dma)
+						== qh->hw_current)
+				token = le32_to_cpu(qh->hw_token);
+
+			/* force halt for unlinked or blocked qh, so we'll
+			 * patch the qh later and so that completions can't
+			 * activate it while we "know" it's stopped.
+			 */
+			if ((HALT_BIT & qh->hw_token) == 0) {
+halt:
+				qh->hw_token |= HALT_BIT;
+				wmb();
+			}
+		}
+
+		/* Remove it from the queue */
+		qtd_copy_status(oxu, urb->complete ?
+					urb : ((struct oxu_murb *) urb)->main,
+				qtd->length, token);
+		if ((usb_pipein(qtd->urb->pipe)) &&
+				(NULL != qtd->transfer_buffer))
+			memcpy(qtd->transfer_buffer, qtd->buffer, qtd->length);
+		do_status = (urb->status == -EREMOTEIO)
+				&& usb_pipecontrol(urb->pipe);
+
+		if (stopped && qtd->qtd_list.prev != &qh->qtd_list) {
+			last = list_entry(qtd->qtd_list.prev,
+					struct ehci_qtd, qtd_list);
+			last->hw_next = qtd->hw_next;
+		}
+		list_del(&qtd->qtd_list);
+		last = qtd;
+	}
+
+	/* last urb's completion might still need calling */
+	if (likely(last != NULL)) {
+		if (last->urb->complete == NULL) {
+			murb = (struct oxu_murb *) last->urb;
+			last->urb = murb->main;
+			if (murb->last) {
+				ehci_urb_done(oxu, last->urb);
+				count++;
+			}
+			oxu_murb_free(oxu, murb);
+		} else {
+			ehci_urb_done(oxu, last->urb);
+			count++;
+		}
+		oxu_qtd_free(oxu, last);
+	}
+
+	/* restore original state; caller must unlink or relink */
+	qh->qh_state = state;
+
+	/* be sure the hardware's done with the qh before refreshing
+	 * it after fault cleanup, or recovering from silicon wrongly
+	 * overlaying the dummy qtd (which reduces DMA chatter).
+	 */
+	if (stopped != 0 || qh->hw_qtd_next == EHCI_LIST_END) {
+		switch (state) {
+		case QH_STATE_IDLE:
+			qh_refresh(oxu, qh);
+			break;
+		case QH_STATE_LINKED:
+			/* should be rare for periodic transfers,
+			 * except maybe high bandwidth ...
+			 */
+			if ((__constant_cpu_to_le32(QH_SMASK)
+					& qh->hw_info2) != 0) {
+				intr_deschedule(oxu, qh);
+				(void) qh_schedule(oxu, qh);
+			} else
+				unlink_async(oxu, qh);
+			break;
+		/* otherwise, unlink already started */
+		}
+	}
+
+	return count;
+}
+
+/* High bandwidth multiplier, as encoded in highspeed endpoint descriptors */
+#define hb_mult(wMaxPacketSize)		(1 + (((wMaxPacketSize) >> 11) & 0x03))
+/* ... and packet size, for any kind of endpoint descriptor */
+#define max_packet(wMaxPacketSize)	((wMaxPacketSize) & 0x07ff)
+
+/* Reverse of qh_urb_transaction: free a list of TDs.
+ * used for cleanup after errors, before HC sees an URB's TDs.
+ */
+static void qtd_list_free(struct oxu_hcd *oxu,
+				struct urb *urb, struct list_head *qtd_list)
+{
+	struct list_head *entry, *temp;
+
+	list_for_each_safe(entry, temp, qtd_list) {
+		struct ehci_qtd	*qtd;
+
+		qtd = list_entry(entry, struct ehci_qtd, qtd_list);
+		list_del(&qtd->qtd_list);
+		oxu_qtd_free(oxu, qtd);
+	}
+}
+
+/* Create a list of filled qtds for this URB; won't link into qh.
+ */
+static struct list_head *qh_urb_transaction(struct oxu_hcd *oxu,
+						struct urb *urb,
+						struct list_head *head,
+						gfp_t flags)
+{
+	struct ehci_qtd	*qtd, *qtd_prev;
+	dma_addr_t buf;
+	int len, maxpacket;
+	int is_input;
+	u32 token;
+	void *transfer_buf = NULL;
+	int ret;
+
+	/*
+	 * URBs map to sequences of QTDs: one logical transaction
+	 */
+	qtd = ehci_qtd_alloc(oxu);
+	if (unlikely(!qtd))
+		return NULL;
+	list_add_tail(&qtd->qtd_list, head);
+	qtd->urb = urb;
+
+	token = QTD_STS_ACTIVE;
+	token |= (EHCI_TUNE_CERR << 10);
+	/* for split transactions, SplitXState initialized to zero */
+
+	len = urb->transfer_buffer_length;
+	is_input = usb_pipein(urb->pipe);
+	if (!urb->transfer_buffer && urb->transfer_buffer_length && is_input)
+		urb->transfer_buffer = phys_to_virt(urb->transfer_dma);
+
+	if (usb_pipecontrol(urb->pipe)) {
+		/* SETUP pid */
+		ret = oxu_buf_alloc(oxu, qtd, sizeof(struct usb_ctrlrequest));
+		if (ret)
+			goto cleanup;
+
+		qtd_fill(qtd, qtd->buffer_dma, sizeof(struct usb_ctrlrequest),
+				token | (2 /* "setup" */ << 8), 8);
+		memcpy(qtd->buffer, qtd->urb->setup_packet,
+				sizeof(struct usb_ctrlrequest));
+
+		/* ... and always at least one more pid */
+		token ^= QTD_TOGGLE;
+		qtd_prev = qtd;
+		qtd = ehci_qtd_alloc(oxu);
+		if (unlikely(!qtd))
+			goto cleanup;
+		qtd->urb = urb;
+		qtd_prev->hw_next = QTD_NEXT(qtd->qtd_dma);
+		list_add_tail(&qtd->qtd_list, head);
+
+		/* for zero length DATA stages, STATUS is always IN */
+		if (len == 0)
+			token |= (1 /* "in" */ << 8);
+	}
+
+	/*
+	 * Data transfer stage: buffer setup
+	 */
+
+	ret = oxu_buf_alloc(oxu, qtd, len);
+	if (ret)
+		goto cleanup;
+
+	buf = qtd->buffer_dma;
+	transfer_buf = urb->transfer_buffer;
+
+	if (!is_input)
+		memcpy(qtd->buffer, qtd->urb->transfer_buffer, len);
+
+	if (is_input)
+		token |= (1 /* "in" */ << 8);
+	/* else it's already initted to "out" pid (0 << 8) */
+
+	maxpacket = max_packet(usb_maxpacket(urb->dev, urb->pipe, !is_input));
+
+	/*
+	 * buffer gets wrapped in one or more qtds;
+	 * last one may be "short" (including zero len)
+	 * and may serve as a control status ack
+	 */
+	for (;;) {
+		int this_qtd_len;
+
+		this_qtd_len = qtd_fill(qtd, buf, len, token, maxpacket);
+		qtd->transfer_buffer = transfer_buf;
+		len -= this_qtd_len;
+		buf += this_qtd_len;
+		transfer_buf += this_qtd_len;
+		if (is_input)
+			qtd->hw_alt_next = oxu->async->hw_alt_next;
+
+		/* qh makes control packets use qtd toggle; maybe switch it */
+		if ((maxpacket & (this_qtd_len + (maxpacket - 1))) == 0)
+			token ^= QTD_TOGGLE;
+
+		if (likely(len <= 0))
+			break;
+
+		qtd_prev = qtd;
+		qtd = ehci_qtd_alloc(oxu);
+		if (unlikely(!qtd))
+			goto cleanup;
+		if (likely(len > 0)) {
+			ret = oxu_buf_alloc(oxu, qtd, len);
+			if (ret)
+				goto cleanup;
+		}
+		qtd->urb = urb;
+		qtd_prev->hw_next = QTD_NEXT(qtd->qtd_dma);
+		list_add_tail(&qtd->qtd_list, head);
+	}
+
+	/* unless the bulk/interrupt caller wants a chance to clean
+	 * up after short reads, hc should advance qh past this urb
+	 */
+	if (likely((urb->transfer_flags & URB_SHORT_NOT_OK) == 0
+				|| usb_pipecontrol(urb->pipe)))
+		qtd->hw_alt_next = EHCI_LIST_END;
+
+	/*
+	 * control requests may need a terminating data "status" ack;
+	 * bulk ones may need a terminating short packet (zero length).
+	 */
+	if (likely(urb->transfer_buffer_length != 0)) {
+		int	one_more = 0;
+
+		if (usb_pipecontrol(urb->pipe)) {
+			one_more = 1;
+			token ^= 0x0100;	/* "in" <--> "out"  */
+			token |= QTD_TOGGLE;	/* force DATA1 */
+		} else if (usb_pipebulk(urb->pipe)
+				&& (urb->transfer_flags & URB_ZERO_PACKET)
+				&& !(urb->transfer_buffer_length % maxpacket)) {
+			one_more = 1;
+		}
+		if (one_more) {
+			qtd_prev = qtd;
+			qtd = ehci_qtd_alloc(oxu);
+			if (unlikely(!qtd))
+				goto cleanup;
+			qtd->urb = urb;
+			qtd_prev->hw_next = QTD_NEXT(qtd->qtd_dma);
+			list_add_tail(&qtd->qtd_list, head);
+
+			/* never any data in such packets */
+			qtd_fill(qtd, 0, 0, token, 0);
+		}
+	}
+
+	/* by default, enable interrupt on urb completion */
+		qtd->hw_token |= __constant_cpu_to_le32(QTD_IOC);
+	return head;
+
+cleanup:
+	qtd_list_free(oxu, urb, head);
+	return NULL;
+}
+
+/* Each QH holds a qtd list; a QH is used for everything except iso.
+ *
+ * For interrupt urbs, the scheduler must set the microframe scheduling
+ * mask(s) each time the QH gets scheduled.  For highspeed, that's
+ * just one microframe in the s-mask.  For split interrupt transactions
+ * there are additional complications: c-mask, maybe FSTNs.
+ */
+static struct ehci_qh *qh_make(struct oxu_hcd *oxu,
+				struct urb *urb, gfp_t flags)
+{
+	struct ehci_qh *qh = oxu_qh_alloc(oxu);
+	u32 info1 = 0, info2 = 0;
+	int is_input, type;
+	int maxp = 0;
+
+	if (!qh)
+		return qh;
+
+	/*
+	 * init endpoint/device data for this QH
+	 */
+	info1 |= usb_pipeendpoint(urb->pipe) << 8;
+	info1 |= usb_pipedevice(urb->pipe) << 0;
+
+	is_input = usb_pipein(urb->pipe);
+	type = usb_pipetype(urb->pipe);
+	maxp = usb_maxpacket(urb->dev, urb->pipe, !is_input);
+
+	/* Compute interrupt scheduling parameters just once, and save.
+	 * - allowing for high bandwidth, how many nsec/uframe are used?
+	 * - split transactions need a second CSPLIT uframe; same question
+	 * - splits also need a schedule gap (for full/low speed I/O)
+	 * - qh has a polling interval
+	 *
+	 * For control/bulk requests, the HC or TT handles these.
+	 */
+	if (type == PIPE_INTERRUPT) {
+		qh->usecs = NS_TO_US(usb_calc_bus_time(USB_SPEED_HIGH,
+								is_input, 0,
+				hb_mult(maxp) * max_packet(maxp)));
+		qh->start = NO_FRAME;
+
+		if (urb->dev->speed == USB_SPEED_HIGH) {
+			qh->c_usecs = 0;
+			qh->gap_uf = 0;
+
+			qh->period = urb->interval >> 3;
+			if (qh->period == 0 && urb->interval != 1) {
+				/* NOTE interval 2 or 4 uframes could work.
+				 * But interval 1 scheduling is simpler, and
+				 * includes high bandwidth.
+				 */
+				dbg("intr period %d uframes, NYET!",
+						urb->interval);
+				goto done;
+			}
+		} else {
+			struct usb_tt	*tt = urb->dev->tt;
+			int		think_time;
+
+			/* gap is f(FS/LS transfer times) */
+			qh->gap_uf = 1 + usb_calc_bus_time(urb->dev->speed,
+					is_input, 0, maxp) / (125 * 1000);
+
+			/* FIXME this just approximates SPLIT/CSPLIT times */
+			if (is_input) {		/* SPLIT, gap, CSPLIT+DATA */
+				qh->c_usecs = qh->usecs + HS_USECS(0);
+				qh->usecs = HS_USECS(1);
+			} else {		/* SPLIT+DATA, gap, CSPLIT */
+				qh->usecs += HS_USECS(1);
+				qh->c_usecs = HS_USECS(0);
+			}
+
+			think_time = tt ? tt->think_time : 0;
+			qh->tt_usecs = NS_TO_US(think_time +
+					usb_calc_bus_time(urb->dev->speed,
+					is_input, 0, max_packet(maxp)));
+			qh->period = urb->interval;
+		}
+	}
+
+	/* support for tt scheduling, and access to toggles */
+	qh->dev = urb->dev;
+
+	/* using TT? */
+	switch (urb->dev->speed) {
+	case USB_SPEED_LOW:
+		info1 |= (1 << 12);	/* EPS "low" */
+		/* FALL THROUGH */
+
+	case USB_SPEED_FULL:
+		/* EPS 0 means "full" */
+		if (type != PIPE_INTERRUPT)
+			info1 |= (EHCI_TUNE_RL_TT << 28);
+		if (type == PIPE_CONTROL) {
+			info1 |= (1 << 27);	/* for TT */
+			info1 |= 1 << 14;	/* toggle from qtd */
+		}
+		info1 |= maxp << 16;
+
+		info2 |= (EHCI_TUNE_MULT_TT << 30);
+		info2 |= urb->dev->ttport << 23;
+
+		/* NOTE:  if (PIPE_INTERRUPT) { scheduler sets c-mask } */
+
+		break;
+
+	case USB_SPEED_HIGH:		/* no TT involved */
+		info1 |= (2 << 12);	/* EPS "high" */
+		if (type == PIPE_CONTROL) {
+			info1 |= (EHCI_TUNE_RL_HS << 28);
+			info1 |= 64 << 16;	/* usb2 fixed maxpacket */
+			info1 |= 1 << 14;	/* toggle from qtd */
+			info2 |= (EHCI_TUNE_MULT_HS << 30);
+		} else if (type == PIPE_BULK) {
+			info1 |= (EHCI_TUNE_RL_HS << 28);
+			info1 |= 512 << 16;	/* usb2 fixed maxpacket */
+			info2 |= (EHCI_TUNE_MULT_HS << 30);
+		} else {		/* PIPE_INTERRUPT */
+			info1 |= max_packet(maxp) << 16;
+			info2 |= hb_mult(maxp) << 30;
+		}
+		break;
+	default:
+		dbg("bogus dev %p speed %d", urb->dev, urb->dev->speed);
+done:
+		qh_put(qh);
+		return NULL;
+	}
+
+	/* NOTE:  if (PIPE_INTERRUPT) { scheduler sets s-mask } */
+
+	/* init as live, toggle clear, advance to dummy */
+	qh->qh_state = QH_STATE_IDLE;
+	qh->hw_info1 = cpu_to_le32(info1);
+	qh->hw_info2 = cpu_to_le32(info2);
+	usb_settoggle(urb->dev, usb_pipeendpoint(urb->pipe), !is_input, 1);
+	qh_refresh(oxu, qh);
+	return qh;
+}
+
+/* Move qh (and its qtds) onto async queue; maybe enable queue.
+ */
+static void qh_link_async(struct oxu_hcd *oxu, struct ehci_qh *qh)
+{
+	__le32 dma = QH_NEXT(qh->qh_dma);
+	struct ehci_qh *head;
+
+	/* (re)start the async schedule? */
+	head = oxu->async;
+	timer_action_done(oxu, TIMER_ASYNC_OFF);
+	if (!head->qh_next.qh) {
+		u32	cmd = readl(&oxu->regs->command);
+
+		if (!(cmd & CMD_ASE)) {
+			/* in case a clear of CMD_ASE didn't take yet */
+			(void)handshake(oxu, &oxu->regs->status,
+					STS_ASS, 0, 150);
+			cmd |= CMD_ASE | CMD_RUN;
+			writel(cmd, &oxu->regs->command);
+			oxu_to_hcd(oxu)->state = HC_STATE_RUNNING;
+			/* posted write need not be known to HC yet ... */
+		}
+	}
+
+	/* clear halt and/or toggle; and maybe recover from silicon quirk */
+	if (qh->qh_state == QH_STATE_IDLE)
+		qh_refresh(oxu, qh);
+
+	/* splice right after start */
+	qh->qh_next = head->qh_next;
+	qh->hw_next = head->hw_next;
+	wmb();
+
+	head->qh_next.qh = qh;
+	head->hw_next = dma;
+
+	qh->qh_state = QH_STATE_LINKED;
+	/* qtd completions reported later by interrupt */
+}
+
+#define	QH_ADDR_MASK	__constant_cpu_to_le32(0x7f)
+
+/*
+ * For control/bulk/interrupt, return QH with these TDs appended.
+ * Allocates and initializes the QH if necessary.
+ * Returns null if it can't allocate a QH it needs to.
+ * If the QH has TDs (urbs) already, that's great.
+ */
+static struct ehci_qh *qh_append_tds(struct oxu_hcd *oxu,
+				struct urb *urb, struct list_head *qtd_list,
+				int epnum, void	**ptr)
+{
+	struct ehci_qh *qh = NULL;
+
+	qh = (struct ehci_qh *) *ptr;
+	if (unlikely(qh == NULL)) {
+		/* can't sleep here, we have oxu->lock... */
+		qh = qh_make(oxu, urb, GFP_ATOMIC);
+		*ptr = qh;
+	}
+	if (likely(qh != NULL)) {
+		struct ehci_qtd	*qtd;
+
+		if (unlikely(list_empty(qtd_list)))
+			qtd = NULL;
+		else
+			qtd = list_entry(qtd_list->next, struct ehci_qtd,
+					qtd_list);
+
+		/* control qh may need patching ... */
+		if (unlikely(epnum == 0)) {
+
+			/* usb_reset_device() briefly reverts to address 0 */
+			if (usb_pipedevice(urb->pipe) == 0)
+				qh->hw_info1 &= ~QH_ADDR_MASK;
+		}
+
+		/* just one way to queue requests: swap with the dummy qtd.
+		 * only hc or qh_refresh() ever modify the overlay.
+		 */
+		if (likely(qtd != NULL)) {
+			struct ehci_qtd	*dummy;
+			dma_addr_t dma;
+			__le32 token;
+
+			/* to avoid racing the HC, use the dummy td instead of
+			 * the first td of our list (becomes new dummy).  both
+			 * tds stay deactivated until we're done, when the
+			 * HC is allowed to fetch the old dummy (4.10.2).
+			 */
+			token = qtd->hw_token;
+			qtd->hw_token = HALT_BIT;
+			wmb();
+			dummy = qh->dummy;
+
+			dma = dummy->qtd_dma;
+			*dummy = *qtd;
+			dummy->qtd_dma = dma;
+
+			list_del(&qtd->qtd_list);
+			list_add(&dummy->qtd_list, qtd_list);
+			list_splice(qtd_list, qh->qtd_list.prev);
+
+			ehci_qtd_init(qtd, qtd->qtd_dma);
+			qh->dummy = qtd;
+
+			/* hc must see the new dummy at list end */
+			dma = qtd->qtd_dma;
+			qtd = list_entry(qh->qtd_list.prev,
+					struct ehci_qtd, qtd_list);
+			qtd->hw_next = QTD_NEXT(dma);
+
+			/* let the hc process these next qtds */
+			dummy->hw_token = (token & ~(0x80));
+			wmb();
+			dummy->hw_token = token;
+
+			urb->hcpriv = qh_get(qh);
+		}
+	}
+	return qh;
+}
+
+static int submit_async(struct oxu_hcd	*oxu, struct urb *urb,
+			struct list_head *qtd_list, gfp_t mem_flags)
+{
+	struct ehci_qtd	*qtd;
+	int epnum;
+	unsigned long flags;
+	struct ehci_qh *qh = NULL;
+	int rc = 0;
+
+	qtd = list_entry(qtd_list->next, struct ehci_qtd, qtd_list);
+	epnum = urb->ep->desc.bEndpointAddress;
+
+#ifdef OXU_URB_TRACE
+	oxu_dbg(oxu, "%s %s urb %p ep%d%s len %d, qtd %p [qh %p]\n",
+		__func__, urb->dev->devpath, urb,
+		epnum & 0x0f, (epnum & USB_DIR_IN) ? "in" : "out",
+		urb->transfer_buffer_length,
+		qtd, urb->ep->hcpriv);
+#endif
+
+	spin_lock_irqsave(&oxu->lock, flags);
+	if (unlikely(!test_bit(HCD_FLAG_HW_ACCESSIBLE,
+			       &oxu_to_hcd(oxu)->flags))) {
+		rc = -ESHUTDOWN;
+		goto done;
+	}
+
+	qh = qh_append_tds(oxu, urb, qtd_list, epnum, &urb->ep->hcpriv);
+	if (unlikely(qh == NULL)) {
+		rc = -ENOMEM;
+		goto done;
+	}
+
+	/* Control/bulk operations through TTs don't need scheduling,
+	 * the HC and TT handle it when the TT has a buffer ready.
+	 */
+	if (likely(qh->qh_state == QH_STATE_IDLE))
+		qh_link_async(oxu, qh_get(qh));
+done:
+	spin_unlock_irqrestore(&oxu->lock, flags);
+	if (unlikely(qh == NULL))
+		qtd_list_free(oxu, urb, qtd_list);
+	return rc;
+}
+
+/* The async qh for the qtds being reclaimed are now unlinked from the HC */
+
+static void end_unlink_async(struct oxu_hcd *oxu)
+{
+	struct ehci_qh *qh = oxu->reclaim;
+	struct ehci_qh *next;
+
+	timer_action_done(oxu, TIMER_IAA_WATCHDOG);
+
+	qh->qh_state = QH_STATE_IDLE;
+	qh->qh_next.qh = NULL;
+	qh_put(qh);			/* refcount from reclaim */
+
+	/* other unlink(s) may be pending (in QH_STATE_UNLINK_WAIT) */
+	next = qh->reclaim;
+	oxu->reclaim = next;
+	oxu->reclaim_ready = 0;
+	qh->reclaim = NULL;
+
+	qh_completions(oxu, qh);
+
+	if (!list_empty(&qh->qtd_list)
+			&& HC_IS_RUNNING(oxu_to_hcd(oxu)->state))
+		qh_link_async(oxu, qh);
+	else {
+		qh_put(qh);		/* refcount from async list */
+
+		/* it's not free to turn the async schedule on/off; leave it
+		 * active but idle for a while once it empties.
+		 */
+		if (HC_IS_RUNNING(oxu_to_hcd(oxu)->state)
+				&& oxu->async->qh_next.qh == NULL)
+			timer_action(oxu, TIMER_ASYNC_OFF);
+	}
+
+	if (next) {
+		oxu->reclaim = NULL;
+		start_unlink_async(oxu, next);
+	}
+}
+
+/* makes sure the async qh will become idle */
+/* caller must own oxu->lock */
+
+static void start_unlink_async(struct oxu_hcd *oxu, struct ehci_qh *qh)
+{
+	int cmd = readl(&oxu->regs->command);
+	struct ehci_qh *prev;
+
+#ifdef DEBUG
+	assert_spin_locked(&oxu->lock);
+	if (oxu->reclaim || (qh->qh_state != QH_STATE_LINKED
+				&& qh->qh_state != QH_STATE_UNLINK_WAIT))
+		BUG();
+#endif
+
+	/* stop async schedule right now? */
+	if (unlikely(qh == oxu->async)) {
+		/* can't get here without STS_ASS set */
+		if (oxu_to_hcd(oxu)->state != HC_STATE_HALT
+				&& !oxu->reclaim) {
+			/* ... and CMD_IAAD clear */
+			writel(cmd & ~CMD_ASE, &oxu->regs->command);
+			wmb();
+			/* handshake later, if we need to */
+			timer_action_done(oxu, TIMER_ASYNC_OFF);
+		}
+		return;
+	}
+
+	qh->qh_state = QH_STATE_UNLINK;
+	oxu->reclaim = qh = qh_get(qh);
+
+	prev = oxu->async;
+	while (prev->qh_next.qh != qh)
+		prev = prev->qh_next.qh;
+
+	prev->hw_next = qh->hw_next;
+	prev->qh_next = qh->qh_next;
+	wmb();
+
+	if (unlikely(oxu_to_hcd(oxu)->state == HC_STATE_HALT)) {
+		/* if (unlikely(qh->reclaim != 0))
+		 *	this will recurse, probably not much
+		 */
+		end_unlink_async(oxu);
+		return;
+	}
+
+	oxu->reclaim_ready = 0;
+	cmd |= CMD_IAAD;
+	writel(cmd, &oxu->regs->command);
+	(void) readl(&oxu->regs->command);
+	timer_action(oxu, TIMER_IAA_WATCHDOG);
+}
+
+static void scan_async(struct oxu_hcd *oxu)
+{
+	struct ehci_qh *qh;
+	enum ehci_timer_action action = TIMER_IO_WATCHDOG;
+
+	if (!++(oxu->stamp))
+		oxu->stamp++;
+	timer_action_done(oxu, TIMER_ASYNC_SHRINK);
+rescan:
+	qh = oxu->async->qh_next.qh;
+	if (likely(qh != NULL)) {
+		do {
+			/* clean any finished work for this qh */
+			if (!list_empty(&qh->qtd_list)
+					&& qh->stamp != oxu->stamp) {
+				int temp;
+
+				/* unlinks could happen here; completion
+				 * reporting drops the lock.  rescan using
+				 * the latest schedule, but don't rescan
+				 * qhs we already finished (no looping).
+				 */
+				qh = qh_get(qh);
+				qh->stamp = oxu->stamp;
+				temp = qh_completions(oxu, qh);
+				qh_put(qh);
+				if (temp != 0)
+					goto rescan;
+			}
+
+			/* unlink idle entries, reducing HC PCI usage as well
+			 * as HCD schedule-scanning costs.  delay for any qh
+			 * we just scanned, there's a not-unusual case that it
+			 * doesn't stay idle for long.
+			 * (plus, avoids some kind of re-activation race.)
+			 */
+			if (list_empty(&qh->qtd_list)) {
+				if (qh->stamp == oxu->stamp)
+					action = TIMER_ASYNC_SHRINK;
+				else if (!oxu->reclaim
+					    && qh->qh_state == QH_STATE_LINKED)
+					start_unlink_async(oxu, qh);
+			}
+
+			qh = qh->qh_next.qh;
+		} while (qh);
+	}
+	if (action == TIMER_ASYNC_SHRINK)
+		timer_action(oxu, TIMER_ASYNC_SHRINK);
+}
+
+/*
+ * periodic_next_shadow - return "next" pointer on shadow list
+ * @periodic: host pointer to qh/itd/sitd
+ * @tag: hardware tag for type of this record
+ */
+static union ehci_shadow *periodic_next_shadow(union ehci_shadow *periodic,
+						__le32 tag)
+{
+	switch (tag) {
+	default:
+	case Q_TYPE_QH:
+		return &periodic->qh->qh_next;
+	}
+}
+
+/* caller must hold oxu->lock */
+static void periodic_unlink(struct oxu_hcd *oxu, unsigned frame, void *ptr)
+{
+	union ehci_shadow *prev_p = &oxu->pshadow[frame];
+	__le32 *hw_p = &oxu->periodic[frame];
+	union ehci_shadow here = *prev_p;
+
+	/* find predecessor of "ptr"; hw and shadow lists are in sync */
+	while (here.ptr && here.ptr != ptr) {
+		prev_p = periodic_next_shadow(prev_p, Q_NEXT_TYPE(*hw_p));
+		hw_p = here.hw_next;
+		here = *prev_p;
+	}
+	/* an interrupt entry (at list end) could have been shared */
+	if (!here.ptr)
+		return;
+
+	/* update shadow and hardware lists ... the old "next" pointers
+	 * from ptr may still be in use, the caller updates them.
+	 */
+	*prev_p = *periodic_next_shadow(&here, Q_NEXT_TYPE(*hw_p));
+	*hw_p = *here.hw_next;
+}
+
+/* how many of the uframe's 125 usecs are allocated? */
+static unsigned short periodic_usecs(struct oxu_hcd *oxu,
+					unsigned frame, unsigned uframe)
+{
+	__le32 *hw_p = &oxu->periodic[frame];
+	union ehci_shadow *q = &oxu->pshadow[frame];
+	unsigned usecs = 0;
+
+	while (q->ptr) {
+		switch (Q_NEXT_TYPE(*hw_p)) {
+		case Q_TYPE_QH:
+		default:
+			/* is it in the S-mask? */
+			if (q->qh->hw_info2 & cpu_to_le32(1 << uframe))
+				usecs += q->qh->usecs;
+			/* ... or C-mask? */
+			if (q->qh->hw_info2 & cpu_to_le32(1 << (8 + uframe)))
+				usecs += q->qh->c_usecs;
+			hw_p = &q->qh->hw_next;
+			q = &q->qh->qh_next;
+			break;
+		}
+	}
+#ifdef DEBUG
+	if (usecs > 100)
+		oxu_err(oxu, "uframe %d sched overrun: %d usecs\n",
+						frame * 8 + uframe, usecs);
+#endif
+	return usecs;
+}
+
+static int enable_periodic(struct oxu_hcd *oxu)
+{
+	u32 cmd;
+	int status;
+
+	/* did clearing PSE did take effect yet?
+	 * takes effect only at frame boundaries...
+	 */
+	status = handshake(oxu, &oxu->regs->status, STS_PSS, 0, 9 * 125);
+	if (status != 0) {
+		oxu_to_hcd(oxu)->state = HC_STATE_HALT;
+		return status;
+	}
+
+	cmd = readl(&oxu->regs->command) | CMD_PSE;
+	writel(cmd, &oxu->regs->command);
+	/* posted write ... PSS happens later */
+	oxu_to_hcd(oxu)->state = HC_STATE_RUNNING;
+
+	/* make sure ehci_work scans these */
+	oxu->next_uframe = readl(&oxu->regs->frame_index)
+		% (oxu->periodic_size << 3);
+	return 0;
+}
+
+static int disable_periodic(struct oxu_hcd *oxu)
+{
+	u32 cmd;
+	int status;
+
+	/* did setting PSE not take effect yet?
+	 * takes effect only at frame boundaries...
+	 */
+	status = handshake(oxu, &oxu->regs->status, STS_PSS, STS_PSS, 9 * 125);
+	if (status != 0) {
+		oxu_to_hcd(oxu)->state = HC_STATE_HALT;
+		return status;
+	}
+
+	cmd = readl(&oxu->regs->command) & ~CMD_PSE;
+	writel(cmd, &oxu->regs->command);
+	/* posted write ... */
+
+	oxu->next_uframe = -1;
+	return 0;
+}
+
+/* periodic schedule slots have iso tds (normal or split) first, then a
+ * sparse tree for active interrupt transfers.
+ *
+ * this just links in a qh; caller guarantees uframe masks are set right.
+ * no FSTN support (yet; oxu 0.96+)
+ */
+static int qh_link_periodic(struct oxu_hcd *oxu, struct ehci_qh *qh)
+{
+	unsigned i;
+	unsigned period = qh->period;
+
+	dev_dbg(&qh->dev->dev,
+		"link qh%d-%04x/%p start %d [%d/%d us]\n",
+		period, le32_to_cpup(&qh->hw_info2) & (QH_CMASK | QH_SMASK),
+		qh, qh->start, qh->usecs, qh->c_usecs);
+
+	/* high bandwidth, or otherwise every microframe */
+	if (period == 0)
+		period = 1;
+
+	for (i = qh->start; i < oxu->periodic_size; i += period) {
+		union ehci_shadow	*prev = &oxu->pshadow[i];
+		__le32			*hw_p = &oxu->periodic[i];
+		union ehci_shadow	here = *prev;
+		__le32			type = 0;
+
+		/* skip the iso nodes at list head */
+		while (here.ptr) {
+			type = Q_NEXT_TYPE(*hw_p);
+			if (type == Q_TYPE_QH)
+				break;
+			prev = periodic_next_shadow(prev, type);
+			hw_p = &here.qh->hw_next;
+			here = *prev;
+		}
+
+		/* sorting each branch by period (slow-->fast)
+		 * enables sharing interior tree nodes
+		 */
+		while (here.ptr && qh != here.qh) {
+			if (qh->period > here.qh->period)
+				break;
+			prev = &here.qh->qh_next;
+			hw_p = &here.qh->hw_next;
+			here = *prev;
+		}
+		/* link in this qh, unless some earlier pass did that */
+		if (qh != here.qh) {
+			qh->qh_next = here;
+			if (here.qh)
+				qh->hw_next = *hw_p;
+			wmb();
+			prev->qh = qh;
+			*hw_p = QH_NEXT(qh->qh_dma);
+		}
+	}
+	qh->qh_state = QH_STATE_LINKED;
+	qh_get(qh);
+
+	/* update per-qh bandwidth for usbfs */
+	oxu_to_hcd(oxu)->self.bandwidth_allocated += qh->period
+		? ((qh->usecs + qh->c_usecs) / qh->period)
+		: (qh->usecs * 8);
+
+	/* maybe enable periodic schedule processing */
+	if (!oxu->periodic_sched++)
+		return enable_periodic(oxu);
+
+	return 0;
+}
+
+static void qh_unlink_periodic(struct oxu_hcd *oxu, struct ehci_qh *qh)
+{
+	unsigned i;
+	unsigned period;
+
+	/* FIXME:
+	 *   IF this isn't high speed
+	 *   and this qh is active in the current uframe
+	 *   (and overlay token SplitXstate is false?)
+	 * THEN
+	 *   qh->hw_info1 |= __constant_cpu_to_le32(1 << 7 "ignore");
+	 */
+
+	/* high bandwidth, or otherwise part of every microframe */
+	period = qh->period;
+	if (period == 0)
+		period = 1;
+
+	for (i = qh->start; i < oxu->periodic_size; i += period)
+		periodic_unlink(oxu, i, qh);
+
+	/* update per-qh bandwidth for usbfs */
+	oxu_to_hcd(oxu)->self.bandwidth_allocated -= qh->period
+		? ((qh->usecs + qh->c_usecs) / qh->period)
+		: (qh->usecs * 8);
+
+	dev_dbg(&qh->dev->dev,
+		"unlink qh%d-%04x/%p start %d [%d/%d us]\n",
+		qh->period,
+		le32_to_cpup(&qh->hw_info2) & (QH_CMASK | QH_SMASK),
+		qh, qh->start, qh->usecs, qh->c_usecs);
+
+	/* qh->qh_next still "live" to HC */
+	qh->qh_state = QH_STATE_UNLINK;
+	qh->qh_next.ptr = NULL;
+	qh_put(qh);
+
+	/* maybe turn off periodic schedule */
+	oxu->periodic_sched--;
+	if (!oxu->periodic_sched)
+		(void) disable_periodic(oxu);
+}
+
+static void intr_deschedule(struct oxu_hcd *oxu, struct ehci_qh *qh)
+{
+	unsigned wait;
+
+	qh_unlink_periodic(oxu, qh);
+
+	/* simple/paranoid:  always delay, expecting the HC needs to read
+	 * qh->hw_next or finish a writeback after SPLIT/CSPLIT ... and
+	 * expect khubd to clean up after any CSPLITs we won't issue.
+	 * active high speed queues may need bigger delays...
+	 */
+	if (list_empty(&qh->qtd_list)
+		|| (__constant_cpu_to_le32(QH_CMASK) & qh->hw_info2) != 0)
+		wait = 2;
+	else
+		wait = 55;	/* worst case: 3 * 1024 */
+
+	udelay(wait);
+	qh->qh_state = QH_STATE_IDLE;
+	qh->hw_next = EHCI_LIST_END;
+	wmb();
+}
+
+static int check_period(struct oxu_hcd *oxu,
+			unsigned frame, unsigned uframe,
+			unsigned period, unsigned usecs)
+{
+	int claimed;
+
+	/* complete split running into next frame?
+	 * given FSTN support, we could sometimes check...
+	 */
+	if (uframe >= 8)
+		return 0;
+
+	/*
+	 * 80% periodic == 100 usec/uframe available
+	 * convert "usecs we need" to "max already claimed"
+	 */
+	usecs = 100 - usecs;
+
+	/* we "know" 2 and 4 uframe intervals were rejected; so
+	 * for period 0, check _every_ microframe in the schedule.
+	 */
+	if (unlikely(period == 0)) {
+		do {
+			for (uframe = 0; uframe < 7; uframe++) {
+				claimed = periodic_usecs(oxu, frame, uframe);
+				if (claimed > usecs)
+					return 0;
+			}
+		} while ((frame += 1) < oxu->periodic_size);
+
+	/* just check the specified uframe, at that period */
+	} else {
+		do {
+			claimed = periodic_usecs(oxu, frame, uframe);
+			if (claimed > usecs)
+				return 0;
+		} while ((frame += period) < oxu->periodic_size);
+	}
+
+	return 1;
+}
+
+static int check_intr_schedule(struct oxu_hcd	*oxu,
+				unsigned frame, unsigned uframe,
+				const struct ehci_qh *qh, __le32 *c_maskp)
+{
+	int retval = -ENOSPC;
+
+	if (qh->c_usecs && uframe >= 6)		/* FSTN territory? */
+		goto done;
+
+	if (!check_period(oxu, frame, uframe, qh->period, qh->usecs))
+		goto done;
+	if (!qh->c_usecs) {
+		retval = 0;
+		*c_maskp = 0;
+		goto done;
+	}
+
+done:
+	return retval;
+}
+
+/* "first fit" scheduling policy used the first time through,
+ * or when the previous schedule slot can't be re-used.
+ */
+static int qh_schedule(struct oxu_hcd *oxu, struct ehci_qh *qh)
+{
+	int		status;
+	unsigned	uframe;
+	__le32		c_mask;
+	unsigned	frame;		/* 0..(qh->period - 1), or NO_FRAME */
+
+	qh_refresh(oxu, qh);
+	qh->hw_next = EHCI_LIST_END;
+	frame = qh->start;
+
+	/* reuse the previous schedule slots, if we can */
+	if (frame < qh->period) {
+		uframe = ffs(le32_to_cpup(&qh->hw_info2) & QH_SMASK);
+		status = check_intr_schedule(oxu, frame, --uframe,
+				qh, &c_mask);
+	} else {
+		uframe = 0;
+		c_mask = 0;
+		status = -ENOSPC;
+	}
+
+	/* else scan the schedule to find a group of slots such that all
+	 * uframes have enough periodic bandwidth available.
+	 */
+	if (status) {
+		/* "normal" case, uframing flexible except with splits */
+		if (qh->period) {
+			frame = qh->period - 1;
+			do {
+				for (uframe = 0; uframe < 8; uframe++) {
+					status = check_intr_schedule(oxu,
+							frame, uframe, qh,
+							&c_mask);
+					if (status == 0)
+						break;
+				}
+			} while (status && frame--);
+
+		/* qh->period == 0 means every uframe */
+		} else {
+			frame = 0;
+			status = check_intr_schedule(oxu, 0, 0, qh, &c_mask);
+		}
+		if (status)
+			goto done;
+		qh->start = frame;
+
+		/* reset S-frame and (maybe) C-frame masks */
+		qh->hw_info2 &= __constant_cpu_to_le32(~(QH_CMASK | QH_SMASK));
+		qh->hw_info2 |= qh->period
+			? cpu_to_le32(1 << uframe)
+			: __constant_cpu_to_le32(QH_SMASK);
+		qh->hw_info2 |= c_mask;
+	} else
+		oxu_dbg(oxu, "reused qh %p schedule\n", qh);
+
+	/* stuff into the periodic schedule */
+	status = qh_link_periodic(oxu, qh);
+done:
+	return status;
+}
+
+static int intr_submit(struct oxu_hcd *oxu, struct urb *urb,
+			struct list_head *qtd_list, gfp_t mem_flags)
+{
+	unsigned epnum;
+	unsigned long flags;
+	struct ehci_qh *qh;
+	int status = 0;
+	struct list_head	empty;
+
+	/* get endpoint and transfer/schedule data */
+	epnum = urb->ep->desc.bEndpointAddress;
+
+	spin_lock_irqsave(&oxu->lock, flags);
+
+	if (unlikely(!test_bit(HCD_FLAG_HW_ACCESSIBLE,
+			       &oxu_to_hcd(oxu)->flags))) {
+		status = -ESHUTDOWN;
+		goto done;
+	}
+
+	/* get qh and force any scheduling errors */
+	INIT_LIST_HEAD(&empty);
+	qh = qh_append_tds(oxu, urb, &empty, epnum, &urb->ep->hcpriv);
+	if (qh == NULL) {
+		status = -ENOMEM;
+		goto done;
+	}
+	if (qh->qh_state == QH_STATE_IDLE) {
+		status = qh_schedule(oxu, qh);
+		if (status != 0)
+			goto done;
+	}
+
+	/* then queue the urb's tds to the qh */
+	qh = qh_append_tds(oxu, urb, qtd_list, epnum, &urb->ep->hcpriv);
+	BUG_ON(qh == NULL);
+
+	/* ... update usbfs periodic stats */
+	oxu_to_hcd(oxu)->self.bandwidth_int_reqs++;
+
+done:
+	spin_unlock_irqrestore(&oxu->lock, flags);
+	if (status)
+		qtd_list_free(oxu, urb, qtd_list);
+
+	return status;
+}
+
+static inline int itd_submit(struct oxu_hcd *oxu, struct urb *urb,
+						gfp_t mem_flags)
+{
+	oxu_dbg(oxu, "iso support is missing!\n");
+	return -ENOSYS;
+}
+
+static inline int sitd_submit(struct oxu_hcd *oxu, struct urb *urb,
+						gfp_t mem_flags)
+{
+	oxu_dbg(oxu, "split iso support is missing!\n");
+	return -ENOSYS;
+}
+
+static void scan_periodic(struct oxu_hcd *oxu)
+{
+	unsigned frame, clock, now_uframe, mod;
+	unsigned modified;
+
+	mod = oxu->periodic_size << 3;
+
+	/*
+	 * When running, scan from last scan point up to "now"
+	 * else clean up by scanning everything that's left.
+	 * Touches as few pages as possible:  cache-friendly.
+	 */
+	now_uframe = oxu->next_uframe;
+	if (HC_IS_RUNNING(oxu_to_hcd(oxu)->state))
+		clock = readl(&oxu->regs->frame_index);
+	else
+		clock = now_uframe + mod - 1;
+	clock %= mod;
+
+	for (;;) {
+		union ehci_shadow	q, *q_p;
+		__le32			type, *hw_p;
+		unsigned		uframes;
+
+		/* don't scan past the live uframe */
+		frame = now_uframe >> 3;
+		if (frame == (clock >> 3))
+			uframes = now_uframe & 0x07;
+		else {
+			/* safe to scan the whole frame at once */
+			now_uframe |= 0x07;
+			uframes = 8;
+		}
+
+restart:
+		/* scan each element in frame's queue for completions */
+		q_p = &oxu->pshadow[frame];
+		hw_p = &oxu->periodic[frame];
+		q.ptr = q_p->ptr;
+		type = Q_NEXT_TYPE(*hw_p);
+		modified = 0;
+
+		while (q.ptr != NULL) {
+			union ehci_shadow temp;
+			int live;
+
+			live = HC_IS_RUNNING(oxu_to_hcd(oxu)->state);
+			switch (type) {
+			case Q_TYPE_QH:
+				/* handle any completions */
+				temp.qh = qh_get(q.qh);
+				type = Q_NEXT_TYPE(q.qh->hw_next);
+				q = q.qh->qh_next;
+				modified = qh_completions(oxu, temp.qh);
+				if (unlikely(list_empty(&temp.qh->qtd_list)))
+					intr_deschedule(oxu, temp.qh);
+				qh_put(temp.qh);
+				break;
+			default:
+				dbg("corrupt type %d frame %d shadow %p",
+					type, frame, q.ptr);
+				q.ptr = NULL;
+			}
+
+			/* assume completion callbacks modify the queue */
+			if (unlikely(modified))
+				goto restart;
+		}
+
+		/* Stop when we catch up to the HC */
+
+		/* FIXME:  this assumes we won't get lapped when
+		 * latencies climb; that should be rare, but...
+		 * detect it, and just go all the way around.
+		 * FLR might help detect this case, so long as latencies
+		 * don't exceed periodic_size msec (default 1.024 sec).
+		 */
+
+		/* FIXME: likewise assumes HC doesn't halt mid-scan */
+
+		if (now_uframe == clock) {
+			unsigned	now;
+
+			if (!HC_IS_RUNNING(oxu_to_hcd(oxu)->state))
+				break;
+			oxu->next_uframe = now_uframe;
+			now = readl(&oxu->regs->frame_index) % mod;
+			if (now_uframe == now)
+				break;
+
+			/* rescan the rest of this frame, then ... */
+			clock = now;
+		} else {
+			now_uframe++;
+			now_uframe %= mod;
+		}
+	}
+}
+
+/* On some systems, leaving remote wakeup enabled prevents system shutdown.
+ * The firmware seems to think that powering off is a wakeup event!
+ * This routine turns off remote wakeup and everything else, on all ports.
+ */
+static void ehci_turn_off_all_ports(struct oxu_hcd *oxu)
+{
+	int port = HCS_N_PORTS(oxu->hcs_params);
+
+	while (port--)
+		writel(PORT_RWC_BITS, &oxu->regs->port_status[port]);
+}
+
+static void ehci_port_power(struct oxu_hcd *oxu, int is_on)
+{
+	unsigned port;
+
+	if (!HCS_PPC(oxu->hcs_params))
+		return;
+
+	oxu_dbg(oxu, "...power%s ports...\n", is_on ? "up" : "down");
+	for (port = HCS_N_PORTS(oxu->hcs_params); port > 0; )
+		(void) oxu_hub_control(oxu_to_hcd(oxu),
+				is_on ? SetPortFeature : ClearPortFeature,
+				USB_PORT_FEAT_POWER,
+				port--, NULL, 0);
+	msleep(20);
+}
+
+/* Called from some interrupts, timers, and so on.
+ * It calls driver completion functions, after dropping oxu->lock.
+ */
+static void ehci_work(struct oxu_hcd *oxu)
+{
+	timer_action_done(oxu, TIMER_IO_WATCHDOG);
+	if (oxu->reclaim_ready)
+		end_unlink_async(oxu);
+
+	/* another CPU may drop oxu->lock during a schedule scan while
+	 * it reports urb completions.  this flag guards against bogus
+	 * attempts at re-entrant schedule scanning.
+	 */
+	if (oxu->scanning)
+		return;
+	oxu->scanning = 1;
+	scan_async(oxu);
+	if (oxu->next_uframe != -1)
+		scan_periodic(oxu);
+	oxu->scanning = 0;
+
+	/* the IO watchdog guards against hardware or driver bugs that
+	 * misplace IRQs, and should let us run completely without IRQs.
+	 * such lossage has been observed on both VT6202 and VT8235.
+	 */
+	if (HC_IS_RUNNING(oxu_to_hcd(oxu)->state) &&
+			(oxu->async->qh_next.ptr != NULL ||
+			 oxu->periodic_sched != 0))
+		timer_action(oxu, TIMER_IO_WATCHDOG);
+}
+
+static void unlink_async(struct oxu_hcd *oxu, struct ehci_qh *qh)
+{
+	/* if we need to use IAA and it's busy, defer */
+	if (qh->qh_state == QH_STATE_LINKED
+			&& oxu->reclaim
+			&& HC_IS_RUNNING(oxu_to_hcd(oxu)->state)) {
+		struct ehci_qh		*last;
+
+		for (last = oxu->reclaim;
+				last->reclaim;
+				last = last->reclaim)
+			continue;
+		qh->qh_state = QH_STATE_UNLINK_WAIT;
+		last->reclaim = qh;
+
+	/* bypass IAA if the hc can't care */
+	} else if (!HC_IS_RUNNING(oxu_to_hcd(oxu)->state) && oxu->reclaim)
+		end_unlink_async(oxu);
+
+	/* something else might have unlinked the qh by now */
+	if (qh->qh_state == QH_STATE_LINKED)
+		start_unlink_async(oxu, qh);
+}
+
+/*
+ * USB host controller methods
+ */
+
+static irqreturn_t oxu210_hcd_irq(struct usb_hcd *hcd)
+{
+	struct oxu_hcd *oxu = hcd_to_oxu(hcd);
+	u32 status, pcd_status = 0;
+	int bh;
+
+	spin_lock(&oxu->lock);
+
+	status = readl(&oxu->regs->status);
+
+	/* e.g. cardbus physical eject */
+	if (status == ~(u32) 0) {
+		oxu_dbg(oxu, "device removed\n");
+		goto dead;
+	}
+
+	status &= INTR_MASK;
+	if (!status) {			/* irq sharing? */
+		spin_unlock(&oxu->lock);
+		return IRQ_NONE;
+	}
+
+	/* clear (just) interrupts */
+	writel(status, &oxu->regs->status);
+	readl(&oxu->regs->command);	/* unblock posted write */
+	bh = 0;
+
+#ifdef OXU_VERBOSE_DEBUG
+	/* unrequested/ignored: Frame List Rollover */
+	dbg_status(oxu, "irq", status);
+#endif
+
+	/* INT, ERR, and IAA interrupt rates can be throttled */
+
+	/* normal [4.15.1.2] or error [4.15.1.1] completion */
+	if (likely((status & (STS_INT|STS_ERR)) != 0))
+		bh = 1;
+
+	/* complete the unlinking of some qh [4.15.2.3] */
+	if (status & STS_IAA) {
+		oxu->reclaim_ready = 1;
+		bh = 1;
+	}
+
+	/* remote wakeup [4.3.1] */
+	if (status & STS_PCD) {
+		unsigned i = HCS_N_PORTS(oxu->hcs_params);
+		pcd_status = status;
+
+		/* resume root hub? */
+		if (!(readl(&oxu->regs->command) & CMD_RUN))
+			usb_hcd_resume_root_hub(hcd);
+
+		while (i--) {
+			int pstatus = readl(&oxu->regs->port_status[i]);
+
+			if (pstatus & PORT_OWNER)
+				continue;
+			if (!(pstatus & PORT_RESUME)
+					|| oxu->reset_done[i] != 0)
+				continue;
+
+			/* start 20 msec resume signaling from this port,
+			 * and make khubd collect PORT_STAT_C_SUSPEND to
+			 * stop that signaling.
+			 */
+			oxu->reset_done[i] = jiffies + msecs_to_jiffies(20);
+			oxu_dbg(oxu, "port %d remote wakeup\n", i + 1);
+			mod_timer(&hcd->rh_timer, oxu->reset_done[i]);
+		}
+	}
+
+	/* PCI errors [4.15.2.4] */
+	if (unlikely((status & STS_FATAL) != 0)) {
+		/* bogus "fatal" IRQs appear on some chips... why?  */
+		status = readl(&oxu->regs->status);
+		dbg_cmd(oxu, "fatal", readl(&oxu->regs->command));
+		dbg_status(oxu, "fatal", status);
+		if (status & STS_HALT) {
+			oxu_err(oxu, "fatal error\n");
+dead:
+			ehci_reset(oxu);
+			writel(0, &oxu->regs->configured_flag);
+			/* generic layer kills/unlinks all urbs, then
+			 * uses oxu_stop to clean up the rest
+			 */
+			bh = 1;
+		}
+	}
+
+	if (bh)
+		ehci_work(oxu);
+	spin_unlock(&oxu->lock);
+	if (pcd_status & STS_PCD)
+		usb_hcd_poll_rh_status(hcd);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t oxu_irq(struct usb_hcd *hcd)
+{
+	struct oxu_hcd *oxu = hcd_to_oxu(hcd);
+	int ret = IRQ_HANDLED;
+
+	u32 status = oxu_readl(hcd->regs, OXU_CHIPIRQSTATUS);
+	u32 enable = oxu_readl(hcd->regs, OXU_CHIPIRQEN_SET);
+
+	/* Disable all interrupt */
+	oxu_writel(hcd->regs, OXU_CHIPIRQEN_CLR, enable);
+
+	if ((oxu->is_otg && (status & OXU_USBOTGI)) ||
+		(!oxu->is_otg && (status & OXU_USBSPHI)))
+		oxu210_hcd_irq(hcd);
+	else
+		ret = IRQ_NONE;
+
+	/* Enable all interrupt back */
+	oxu_writel(hcd->regs, OXU_CHIPIRQEN_SET, enable);
+
+	return ret;
+}
+
+static void oxu_watchdog(unsigned long param)
+{
+	struct oxu_hcd	*oxu = (struct oxu_hcd *) param;
+	unsigned long flags;
+
+	spin_lock_irqsave(&oxu->lock, flags);
+
+	/* lost IAA irqs wedge things badly; seen with a vt8235 */
+	if (oxu->reclaim) {
+		u32 status = readl(&oxu->regs->status);
+		if (status & STS_IAA) {
+			oxu_vdbg(oxu, "lost IAA\n");
+			writel(STS_IAA, &oxu->regs->status);
+			oxu->reclaim_ready = 1;
+		}
+	}
+
+	/* stop async processing after it's idled a bit */
+	if (test_bit(TIMER_ASYNC_OFF, &oxu->actions))
+		start_unlink_async(oxu, oxu->async);
+
+	/* oxu could run by timer, without IRQs ... */
+	ehci_work(oxu);
+
+	spin_unlock_irqrestore(&oxu->lock, flags);
+}
+
+/* One-time init, only for memory state.
+ */
+static int oxu_hcd_init(struct usb_hcd *hcd)
+{
+	struct oxu_hcd *oxu = hcd_to_oxu(hcd);
+	u32 temp;
+	int retval;
+	u32 hcc_params;
+
+	spin_lock_init(&oxu->lock);
+
+	init_timer(&oxu->watchdog);
+	oxu->watchdog.function = oxu_watchdog;
+	oxu->watchdog.data = (unsigned long) oxu;
+
+	/*
+	 * hw default: 1K periodic list heads, one per frame.
+	 * periodic_size can shrink by USBCMD update if hcc_params allows.
+	 */
+	oxu->periodic_size = DEFAULT_I_TDPS;
+	retval = ehci_mem_init(oxu, GFP_KERNEL);
+	if (retval < 0)
+		return retval;
+
+	/* controllers may cache some of the periodic schedule ... */
+	hcc_params = readl(&oxu->caps->hcc_params);
+	if (HCC_ISOC_CACHE(hcc_params))		/* full frame cache */
+		oxu->i_thresh = 8;
+	else					/* N microframes cached */
+		oxu->i_thresh = 2 + HCC_ISOC_THRES(hcc_params);
+
+	oxu->reclaim = NULL;
+	oxu->reclaim_ready = 0;
+	oxu->next_uframe = -1;
+
+	/*
+	 * dedicate a qh for the async ring head, since we couldn't unlink
+	 * a 'real' qh without stopping the async schedule [4.8].  use it
+	 * as the 'reclamation list head' too.
+	 * its dummy is used in hw_alt_next of many tds, to prevent the qh
+	 * from automatically advancing to the next td after short reads.
+	 */
+	oxu->async->qh_next.qh = NULL;
+	oxu->async->hw_next = QH_NEXT(oxu->async->qh_dma);
+	oxu->async->hw_info1 = cpu_to_le32(QH_HEAD);
+	oxu->async->hw_token = cpu_to_le32(QTD_STS_HALT);
+	oxu->async->hw_qtd_next = EHCI_LIST_END;
+	oxu->async->qh_state = QH_STATE_LINKED;
+	oxu->async->hw_alt_next = QTD_NEXT(oxu->async->dummy->qtd_dma);
+
+	/* clear interrupt enables, set irq latency */
+	if (log2_irq_thresh < 0 || log2_irq_thresh > 6)
+		log2_irq_thresh = 0;
+	temp = 1 << (16 + log2_irq_thresh);
+	if (HCC_CANPARK(hcc_params)) {
+		/* HW default park == 3, on hardware that supports it (like
+		 * NVidia and ALI silicon), maximizes throughput on the async
+		 * schedule by avoiding QH fetches between transfers.
+		 *
+		 * With fast usb storage devices and NForce2, "park" seems to
+		 * make problems:  throughput reduction (!), data errors...
+		 */
+		if (park) {
+			park = min(park, (unsigned) 3);
+			temp |= CMD_PARK;
+			temp |= park << 8;
+		}
+		oxu_dbg(oxu, "park %d\n", park);
+	}
+	if (HCC_PGM_FRAMELISTLEN(hcc_params)) {
+		/* periodic schedule size can be smaller than default */
+		temp &= ~(3 << 2);
+		temp |= (EHCI_TUNE_FLS << 2);
+	}
+	oxu->command = temp;
+
+	return 0;
+}
+
+/* Called during probe() after chip reset completes.
+ */
+static int oxu_reset(struct usb_hcd *hcd)
+{
+	struct oxu_hcd *oxu = hcd_to_oxu(hcd);
+	int ret;
+
+	spin_lock_init(&oxu->mem_lock);
+	INIT_LIST_HEAD(&oxu->urb_list);
+	oxu->urb_len = 0;
+
+	/* FIMXE */
+	hcd->self.controller->dma_mask = 0UL;
+
+	if (oxu->is_otg) {
+		oxu->caps = hcd->regs + OXU_OTG_CAP_OFFSET;
+		oxu->regs = hcd->regs + OXU_OTG_CAP_OFFSET + \
+			HC_LENGTH(readl(&oxu->caps->hc_capbase));
+
+		oxu->mem = hcd->regs + OXU_SPH_MEM;
+	} else {
+		oxu->caps = hcd->regs + OXU_SPH_CAP_OFFSET;
+		oxu->regs = hcd->regs + OXU_SPH_CAP_OFFSET + \
+			HC_LENGTH(readl(&oxu->caps->hc_capbase));
+
+		oxu->mem = hcd->regs + OXU_OTG_MEM;
+	}
+
+	oxu->hcs_params = readl(&oxu->caps->hcs_params);
+	oxu->sbrn = 0x20;
+
+	ret = oxu_hcd_init(hcd);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int oxu_run(struct usb_hcd *hcd)
+{
+	struct oxu_hcd *oxu = hcd_to_oxu(hcd);
+	int retval;
+	u32 temp, hcc_params;
+
+	hcd->uses_new_polling = 1;
+	hcd->poll_rh = 0;
+
+	/* EHCI spec section 4.1 */
+	retval = ehci_reset(oxu);
+	if (retval != 0) {
+		ehci_mem_cleanup(oxu);
+		return retval;
+	}
+	writel(oxu->periodic_dma, &oxu->regs->frame_list);
+	writel((u32) oxu->async->qh_dma, &oxu->regs->async_next);
+
+	/* hcc_params controls whether oxu->regs->segment must (!!!)
+	 * be used; it constrains QH/ITD/SITD and QTD locations.
+	 * pci_pool consistent memory always uses segment zero.
+	 * streaming mappings for I/O buffers, like pci_map_single(),
+	 * can return segments above 4GB, if the device allows.
+	 *
+	 * NOTE:  the dma mask is visible through dma_supported(), so
+	 * drivers can pass this info along ... like NETIF_F_HIGHDMA,
+	 * Scsi_Host.highmem_io, and so forth.  It's readonly to all
+	 * host side drivers though.
+	 */
+	hcc_params = readl(&oxu->caps->hcc_params);
+	if (HCC_64BIT_ADDR(hcc_params))
+		writel(0, &oxu->regs->segment);
+
+	oxu->command &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE |
+				CMD_ASE | CMD_RESET);
+	oxu->command |= CMD_RUN;
+	writel(oxu->command, &oxu->regs->command);
+	dbg_cmd(oxu, "init", oxu->command);
+
+	/*
+	 * Start, enabling full USB 2.0 functionality ... usb 1.1 devices
+	 * are explicitly handed to companion controller(s), so no TT is
+	 * involved with the root hub.  (Except where one is integrated,
+	 * and there's no companion controller unless maybe for USB OTG.)
+	 */
+	hcd->state = HC_STATE_RUNNING;
+	writel(FLAG_CF, &oxu->regs->configured_flag);
+	readl(&oxu->regs->command);	/* unblock posted writes */
+
+	temp = HC_VERSION(readl(&oxu->caps->hc_capbase));
+	oxu_info(oxu, "USB %x.%x started, quasi-EHCI %x.%02x, driver %s%s\n",
+		((oxu->sbrn & 0xf0)>>4), (oxu->sbrn & 0x0f),
+		temp >> 8, temp & 0xff, DRIVER_VERSION,
+		ignore_oc ? ", overcurrent ignored" : "");
+
+	writel(INTR_MASK, &oxu->regs->intr_enable); /* Turn On Interrupts */
+
+	return 0;
+}
+
+static void oxu_stop(struct usb_hcd *hcd)
+{
+	struct oxu_hcd *oxu = hcd_to_oxu(hcd);
+
+	/* Turn off port power on all root hub ports. */
+	ehci_port_power(oxu, 0);
+
+	/* no more interrupts ... */
+	del_timer_sync(&oxu->watchdog);
+
+	spin_lock_irq(&oxu->lock);
+	if (HC_IS_RUNNING(hcd->state))
+		ehci_quiesce(oxu);
+
+	ehci_reset(oxu);
+	writel(0, &oxu->regs->intr_enable);
+	spin_unlock_irq(&oxu->lock);
+
+	/* let companion controllers work when we aren't */
+	writel(0, &oxu->regs->configured_flag);
+
+	/* root hub is shut down separately (first, when possible) */
+	spin_lock_irq(&oxu->lock);
+	if (oxu->async)
+		ehci_work(oxu);
+	spin_unlock_irq(&oxu->lock);
+	ehci_mem_cleanup(oxu);
+
+	dbg_status(oxu, "oxu_stop completed", readl(&oxu->regs->status));
+}
+
+/* Kick in for silicon on any bus (not just pci, etc).
+ * This forcibly disables dma and IRQs, helping kexec and other cases
+ * where the next system software may expect clean state.
+ */
+static void oxu_shutdown(struct usb_hcd *hcd)
+{
+	struct oxu_hcd *oxu = hcd_to_oxu(hcd);
+
+	(void) ehci_halt(oxu);
+	ehci_turn_off_all_ports(oxu);
+
+	/* make BIOS/etc use companion controller during reboot */
+	writel(0, &oxu->regs->configured_flag);
+
+	/* unblock posted writes */
+	readl(&oxu->regs->configured_flag);
+}
+
+/* Non-error returns are a promise to giveback() the urb later
+ * we drop ownership so next owner (or urb unlink) can get it
+ *
+ * urb + dev is in hcd.self.controller.urb_list
+ * we're queueing TDs onto software and hardware lists
+ *
+ * hcd-specific init for hcpriv hasn't been done yet
+ *
+ * NOTE:  control, bulk, and interrupt share the same code to append TDs
+ * to a (possibly active) QH, and the same QH scanning code.
+ */
+static int __oxu_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
+				gfp_t mem_flags)
+{
+	struct oxu_hcd *oxu = hcd_to_oxu(hcd);
+	struct list_head qtd_list;
+
+	INIT_LIST_HEAD(&qtd_list);
+
+	switch (usb_pipetype(urb->pipe)) {
+	case PIPE_CONTROL:
+	case PIPE_BULK:
+	default:
+		if (!qh_urb_transaction(oxu, urb, &qtd_list, mem_flags))
+			return -ENOMEM;
+		return submit_async(oxu, urb, &qtd_list, mem_flags);
+
+	case PIPE_INTERRUPT:
+		if (!qh_urb_transaction(oxu, urb, &qtd_list, mem_flags))
+			return -ENOMEM;
+		return intr_submit(oxu, urb, &qtd_list, mem_flags);
+
+	case PIPE_ISOCHRONOUS:
+		if (urb->dev->speed == USB_SPEED_HIGH)
+			return itd_submit(oxu, urb, mem_flags);
+		else
+			return sitd_submit(oxu, urb, mem_flags);
+	}
+}
+
+/* This function is responsible for breaking URBs with big data size
+ * into smaller size and processing small urbs in sequence.
+ */
+static int oxu_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
+				gfp_t mem_flags)
+{
+	struct oxu_hcd *oxu = hcd_to_oxu(hcd);
+	int num, rem;
+	int transfer_buffer_length;
+	void *transfer_buffer;
+	struct urb *murb;
+	int i, ret;
+
+	/* If not bulk pipe just enqueue the URB */
+	if (!usb_pipebulk(urb->pipe))
+		return __oxu_urb_enqueue(hcd, urb, mem_flags);
+
+	/* Otherwise we should verify the USB transfer buffer size! */
+	transfer_buffer = urb->transfer_buffer;
+	transfer_buffer_length = urb->transfer_buffer_length;
+
+	num = urb->transfer_buffer_length / 4096;
+	rem = urb->transfer_buffer_length % 4096;
+	if (rem != 0)
+		num++;
+
+	/* If URB is smaller than 4096 bytes just enqueue it! */
+	if (num == 1)
+		return __oxu_urb_enqueue(hcd, urb, mem_flags);
+
+	/* Ok, we have more job to do! :) */
+
+	for (i = 0; i < num - 1; i++) {
+		/* Get free micro URB poll till a free urb is recieved */
+
+		do {
+			murb = (struct urb *) oxu_murb_alloc(oxu);
+			if (!murb)
+				schedule();
+		} while (!murb);
+
+		/* Coping the urb */
+		memcpy(murb, urb, sizeof(struct urb));
+
+		murb->transfer_buffer_length = 4096;
+		murb->transfer_buffer = transfer_buffer + i * 4096;
+
+		/* Null pointer for the encodes that this is a micro urb */
+		murb->complete = NULL;
+
+		((struct oxu_murb *) murb)->main = urb;
+		((struct oxu_murb *) murb)->last = 0;
+
+		/* This loop is to guarantee urb to be processed when there's
+		 * not enough resources at a particular time by retrying.
+		 */
+		do {
+			ret  = __oxu_urb_enqueue(hcd, murb, mem_flags);
+			if (ret)
+				schedule();
+		} while (ret);
+	}
+
+	/* Last urb requires special handling  */
+
+	/* Get free micro URB poll till a free urb is recieved */
+	do {
+		murb = (struct urb *) oxu_murb_alloc(oxu);
+		if (!murb)
+			schedule();
+	} while (!murb);
+
+	/* Coping the urb */
+	memcpy(murb, urb, sizeof(struct urb));
+
+	murb->transfer_buffer_length = rem > 0 ? rem : 4096;
+	murb->transfer_buffer = transfer_buffer + (num - 1) * 4096;
+
+	/* Null pointer for the encodes that this is a micro urb */
+	murb->complete = NULL;
+
+	((struct oxu_murb *) murb)->main = urb;
+	((struct oxu_murb *) murb)->last = 1;
+
+	do {
+		ret = __oxu_urb_enqueue(hcd, murb, mem_flags);
+		if (ret)
+			schedule();
+	} while (ret);
+
+	return ret;
+}
+
+/* Remove from hardware lists.
+ * Completions normally happen asynchronously
+ */
+static int oxu_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
+{
+	struct oxu_hcd *oxu = hcd_to_oxu(hcd);
+	struct ehci_qh *qh;
+	unsigned long flags;
+
+	spin_lock_irqsave(&oxu->lock, flags);
+	switch (usb_pipetype(urb->pipe)) {
+	case PIPE_CONTROL:
+	case PIPE_BULK:
+	default:
+		qh = (struct ehci_qh *) urb->hcpriv;
+		if (!qh)
+			break;
+		unlink_async(oxu, qh);
+		break;
+
+	case PIPE_INTERRUPT:
+		qh = (struct ehci_qh *) urb->hcpriv;
+		if (!qh)
+			break;
+		switch (qh->qh_state) {
+		case QH_STATE_LINKED:
+			intr_deschedule(oxu, qh);
+			/* FALL THROUGH */
+		case QH_STATE_IDLE:
+			qh_completions(oxu, qh);
+			break;
+		default:
+			oxu_dbg(oxu, "bogus qh %p state %d\n",
+					qh, qh->qh_state);
+			goto done;
+		}
+
+		/* reschedule QH iff another request is queued */
+		if (!list_empty(&qh->qtd_list)
+				&& HC_IS_RUNNING(hcd->state)) {
+			int status;
+
+			status = qh_schedule(oxu, qh);
+			spin_unlock_irqrestore(&oxu->lock, flags);
+
+			if (status != 0) {
+				/* shouldn't happen often, but ...
+				 * FIXME kill those tds' urbs
+				 */
+				err("can't reschedule qh %p, err %d",
+					qh, status);
+			}
+			return status;
+		}
+		break;
+	}
+done:
+	spin_unlock_irqrestore(&oxu->lock, flags);
+	return 0;
+}
+
+/* Bulk qh holds the data toggle */
+static void oxu_endpoint_disable(struct usb_hcd *hcd,
+					struct usb_host_endpoint *ep)
+{
+	struct oxu_hcd *oxu = hcd_to_oxu(hcd);
+	unsigned long		flags;
+	struct ehci_qh		*qh, *tmp;
+
+	/* ASSERT:  any requests/urbs are being unlinked */
+	/* ASSERT:  nobody can be submitting urbs for this any more */
+
+rescan:
+	spin_lock_irqsave(&oxu->lock, flags);
+	qh = ep->hcpriv;
+	if (!qh)
+		goto done;
+
+	/* endpoints can be iso streams.  for now, we don't
+	 * accelerate iso completions ... so spin a while.
+	 */
+	if (qh->hw_info1 == 0) {
+		oxu_vdbg(oxu, "iso delay\n");
+		goto idle_timeout;
+	}
+
+	if (!HC_IS_RUNNING(hcd->state))
+		qh->qh_state = QH_STATE_IDLE;
+	switch (qh->qh_state) {
+	case QH_STATE_LINKED:
+		for (tmp = oxu->async->qh_next.qh;
+				tmp && tmp != qh;
+				tmp = tmp->qh_next.qh)
+			continue;
+		/* periodic qh self-unlinks on empty */
+		if (!tmp)
+			goto nogood;
+		unlink_async(oxu, qh);
+		/* FALL THROUGH */
+	case QH_STATE_UNLINK:		/* wait for hw to finish? */
+idle_timeout:
+		spin_unlock_irqrestore(&oxu->lock, flags);
+		schedule_timeout_uninterruptible(1);
+		goto rescan;
+	case QH_STATE_IDLE:		/* fully unlinked */
+		if (list_empty(&qh->qtd_list)) {
+			qh_put(qh);
+			break;
+		}
+		/* else FALL THROUGH */
+	default:
+nogood:
+		/* caller was supposed to have unlinked any requests;
+		 * that's not our job.  just leak this memory.
+		 */
+		oxu_err(oxu, "qh %p (#%02x) state %d%s\n",
+			qh, ep->desc.bEndpointAddress, qh->qh_state,
+			list_empty(&qh->qtd_list) ? "" : "(has tds)");
+		break;
+	}
+	ep->hcpriv = NULL;
+done:
+	spin_unlock_irqrestore(&oxu->lock, flags);
+	return;
+}
+
+static int oxu_get_frame(struct usb_hcd *hcd)
+{
+	struct oxu_hcd *oxu = hcd_to_oxu(hcd);
+
+	return (readl(&oxu->regs->frame_index) >> 3) %
+		oxu->periodic_size;
+}
+
+/* Build "status change" packet (one or two bytes) from HC registers */
+static int oxu_hub_status_data(struct usb_hcd *hcd, char *buf)
+{
+	struct oxu_hcd *oxu = hcd_to_oxu(hcd);
+	u32 temp, mask, status = 0;
+	int ports, i, retval = 1;
+	unsigned long flags;
+
+	/* if !USB_SUSPEND, root hub timers won't get shut down ... */
+	if (!HC_IS_RUNNING(hcd->state))
+		return 0;
+
+	/* init status to no-changes */
+	buf[0] = 0;
+	ports = HCS_N_PORTS(oxu->hcs_params);
+	if (ports > 7) {
+		buf[1] = 0;
+		retval++;
+	}
+
+	/* Some boards (mostly VIA?) report bogus overcurrent indications,
+	 * causing massive log spam unless we completely ignore them.  It
+	 * may be relevant that VIA VT8235 controlers, where PORT_POWER is
+	 * always set, seem to clear PORT_OCC and PORT_CSC when writing to
+	 * PORT_POWER; that's surprising, but maybe within-spec.
+	 */
+	if (!ignore_oc)
+		mask = PORT_CSC | PORT_PEC | PORT_OCC;
+	else
+		mask = PORT_CSC | PORT_PEC;
+
+	/* no hub change reports (bit 0) for now (power, ...) */
+
+	/* port N changes (bit N)? */
+	spin_lock_irqsave(&oxu->lock, flags);
+	for (i = 0; i < ports; i++) {
+		temp = readl(&oxu->regs->port_status[i]);
+
+		/*
+		 * Return status information even for ports with OWNER set.
+		 * Otherwise khubd wouldn't see the disconnect event when a
+		 * high-speed device is switched over to the companion
+		 * controller by the user.
+		 */
+
+		if (!(temp & PORT_CONNECT))
+			oxu->reset_done[i] = 0;
+		if ((temp & mask) != 0 || ((temp & PORT_RESUME) != 0 &&
+				time_after_eq(jiffies, oxu->reset_done[i]))) {
+			if (i < 7)
+				buf[0] |= 1 << (i + 1);
+			else
+				buf[1] |= 1 << (i - 7);
+			status = STS_PCD;
+		}
+	}
+	/* FIXME autosuspend idle root hubs */
+	spin_unlock_irqrestore(&oxu->lock, flags);
+	return status ? retval : 0;
+}
+
+/* Returns the speed of a device attached to a port on the root hub. */
+static inline unsigned int oxu_port_speed(struct oxu_hcd *oxu,
+						unsigned int portsc)
+{
+	switch ((portsc >> 26) & 3) {
+	case 0:
+		return 0;
+	case 1:
+		return 1 << USB_PORT_FEAT_LOWSPEED;
+	case 2:
+	default:
+		return 1 << USB_PORT_FEAT_HIGHSPEED;
+	}
+}
+
+#define	PORT_WAKE_BITS	(PORT_WKOC_E|PORT_WKDISC_E|PORT_WKCONN_E)
+static int oxu_hub_control(struct usb_hcd *hcd, u16 typeReq,
+				u16 wValue, u16 wIndex, char *buf, u16 wLength)
+{
+	struct oxu_hcd *oxu = hcd_to_oxu(hcd);
+	int ports = HCS_N_PORTS(oxu->hcs_params);
+	u32 __iomem *status_reg = &oxu->regs->port_status[wIndex - 1];
+	u32 temp, status;
+	unsigned long	flags;
+	int retval = 0;
+	unsigned selector;
+
+	/*
+	 * FIXME:  support SetPortFeatures USB_PORT_FEAT_INDICATOR.
+	 * HCS_INDICATOR may say we can change LEDs to off/amber/green.
+	 * (track current state ourselves) ... blink for diagnostics,
+	 * power, "this is the one", etc.  EHCI spec supports this.
+	 */
+
+	spin_lock_irqsave(&oxu->lock, flags);
+	switch (typeReq) {
+	case ClearHubFeature:
+		switch (wValue) {
+		case C_HUB_LOCAL_POWER:
+		case C_HUB_OVER_CURRENT:
+			/* no hub-wide feature/status flags */
+			break;
+		default:
+			goto error;
+		}
+		break;
+	case ClearPortFeature:
+		if (!wIndex || wIndex > ports)
+			goto error;
+		wIndex--;
+		temp = readl(status_reg);
+
+		/*
+		 * Even if OWNER is set, so the port is owned by the
+		 * companion controller, khubd needs to be able to clear
+		 * the port-change status bits (especially
+		 * USB_PORT_FEAT_C_CONNECTION).
+		 */
+
+		switch (wValue) {
+		case USB_PORT_FEAT_ENABLE:
+			writel(temp & ~PORT_PE, status_reg);
+			break;
+		case USB_PORT_FEAT_C_ENABLE:
+			writel((temp & ~PORT_RWC_BITS) | PORT_PEC, status_reg);
+			break;
+		case USB_PORT_FEAT_SUSPEND:
+			if (temp & PORT_RESET)
+				goto error;
+			if (temp & PORT_SUSPEND) {
+				if ((temp & PORT_PE) == 0)
+					goto error;
+				/* resume signaling for 20 msec */
+				temp &= ~(PORT_RWC_BITS | PORT_WAKE_BITS);
+				writel(temp | PORT_RESUME, status_reg);
+				oxu->reset_done[wIndex] = jiffies
+						+ msecs_to_jiffies(20);
+			}
+			break;
+		case USB_PORT_FEAT_C_SUSPEND:
+			/* we auto-clear this feature */
+			break;
+		case USB_PORT_FEAT_POWER:
+			if (HCS_PPC(oxu->hcs_params))
+				writel(temp & ~(PORT_RWC_BITS | PORT_POWER),
+					  status_reg);
+			break;
+		case USB_PORT_FEAT_C_CONNECTION:
+			writel((temp & ~PORT_RWC_BITS) | PORT_CSC, status_reg);
+			break;
+		case USB_PORT_FEAT_C_OVER_CURRENT:
+			writel((temp & ~PORT_RWC_BITS) | PORT_OCC, status_reg);
+			break;
+		case USB_PORT_FEAT_C_RESET:
+			/* GetPortStatus clears reset */
+			break;
+		default:
+			goto error;
+		}
+		readl(&oxu->regs->command);	/* unblock posted write */
+		break;
+	case GetHubDescriptor:
+		ehci_hub_descriptor(oxu, (struct usb_hub_descriptor *)
+			buf);
+		break;
+	case GetHubStatus:
+		/* no hub-wide feature/status flags */
+		memset(buf, 0, 4);
+		break;
+	case GetPortStatus:
+		if (!wIndex || wIndex > ports)
+			goto error;
+		wIndex--;
+		status = 0;
+		temp = readl(status_reg);
+
+		/* wPortChange bits */
+		if (temp & PORT_CSC)
+			status |= 1 << USB_PORT_FEAT_C_CONNECTION;
+		if (temp & PORT_PEC)
+			status |= 1 << USB_PORT_FEAT_C_ENABLE;
+		if ((temp & PORT_OCC) && !ignore_oc)
+			status |= 1 << USB_PORT_FEAT_C_OVER_CURRENT;
+
+		/* whoever resumes must GetPortStatus to complete it!! */
+		if (temp & PORT_RESUME) {
+
+			/* Remote Wakeup received? */
+			if (!oxu->reset_done[wIndex]) {
+				/* resume signaling for 20 msec */
+				oxu->reset_done[wIndex] = jiffies
+						+ msecs_to_jiffies(20);
+				/* check the port again */
+				mod_timer(&oxu_to_hcd(oxu)->rh_timer,
+						oxu->reset_done[wIndex]);
+			}
+
+			/* resume completed? */
+			else if (time_after_eq(jiffies,
+					oxu->reset_done[wIndex])) {
+				status |= 1 << USB_PORT_FEAT_C_SUSPEND;
+				oxu->reset_done[wIndex] = 0;
+
+				/* stop resume signaling */
+				temp = readl(status_reg);
+				writel(temp & ~(PORT_RWC_BITS | PORT_RESUME),
+					status_reg);
+				retval = handshake(oxu, status_reg,
+					   PORT_RESUME, 0, 2000 /* 2msec */);
+				if (retval != 0) {
+					oxu_err(oxu,
+						"port %d resume error %d\n",
+						wIndex + 1, retval);
+					goto error;
+				}
+				temp &= ~(PORT_SUSPEND|PORT_RESUME|(3<<10));
+			}
+		}
+
+		/* whoever resets must GetPortStatus to complete it!! */
+		if ((temp & PORT_RESET)
+				&& time_after_eq(jiffies,
+					oxu->reset_done[wIndex])) {
+			status |= 1 << USB_PORT_FEAT_C_RESET;
+			oxu->reset_done[wIndex] = 0;
+
+			/* force reset to complete */
+			writel(temp & ~(PORT_RWC_BITS | PORT_RESET),
+					status_reg);
+			/* REVISIT:  some hardware needs 550+ usec to clear
+			 * this bit; seems too long to spin routinely...
+			 */
+			retval = handshake(oxu, status_reg,
+					PORT_RESET, 0, 750);
+			if (retval != 0) {
+				oxu_err(oxu, "port %d reset error %d\n",
+					wIndex + 1, retval);
+				goto error;
+			}
+
+			/* see what we found out */
+			temp = check_reset_complete(oxu, wIndex, status_reg,
+					readl(status_reg));
+		}
+
+		/* transfer dedicated ports to the companion hc */
+		if ((temp & PORT_CONNECT) &&
+				test_bit(wIndex, &oxu->companion_ports)) {
+			temp &= ~PORT_RWC_BITS;
+			temp |= PORT_OWNER;
+			writel(temp, status_reg);
+			oxu_dbg(oxu, "port %d --> companion\n", wIndex + 1);
+			temp = readl(status_reg);
+		}
+
+		/*
+		 * Even if OWNER is set, there's no harm letting khubd
+		 * see the wPortStatus values (they should all be 0 except
+		 * for PORT_POWER anyway).
+		 */
+
+		if (temp & PORT_CONNECT) {
+			status |= 1 << USB_PORT_FEAT_CONNECTION;
+			/* status may be from integrated TT */
+			status |= oxu_port_speed(oxu, temp);
+		}
+		if (temp & PORT_PE)
+			status |= 1 << USB_PORT_FEAT_ENABLE;
+		if (temp & (PORT_SUSPEND|PORT_RESUME))
+			status |= 1 << USB_PORT_FEAT_SUSPEND;
+		if (temp & PORT_OC)
+			status |= 1 << USB_PORT_FEAT_OVER_CURRENT;
+		if (temp & PORT_RESET)
+			status |= 1 << USB_PORT_FEAT_RESET;
+		if (temp & PORT_POWER)
+			status |= 1 << USB_PORT_FEAT_POWER;
+
+#ifndef	OXU_VERBOSE_DEBUG
+	if (status & ~0xffff)	/* only if wPortChange is interesting */
+#endif
+		dbg_port(oxu, "GetStatus", wIndex + 1, temp);
+		put_unaligned(cpu_to_le32(status), (__le32 *) buf);
+		break;
+	case SetHubFeature:
+		switch (wValue) {
+		case C_HUB_LOCAL_POWER:
+		case C_HUB_OVER_CURRENT:
+			/* no hub-wide feature/status flags */
+			break;
+		default:
+			goto error;
+		}
+		break;
+	case SetPortFeature:
+		selector = wIndex >> 8;
+		wIndex &= 0xff;
+		if (!wIndex || wIndex > ports)
+			goto error;
+		wIndex--;
+		temp = readl(status_reg);
+		if (temp & PORT_OWNER)
+			break;
+
+		temp &= ~PORT_RWC_BITS;
+		switch (wValue) {
+		case USB_PORT_FEAT_SUSPEND:
+			if ((temp & PORT_PE) == 0
+					|| (temp & PORT_RESET) != 0)
+				goto error;
+			if (device_may_wakeup(&hcd->self.root_hub->dev))
+				temp |= PORT_WAKE_BITS;
+			writel(temp | PORT_SUSPEND, status_reg);
+			break;
+		case USB_PORT_FEAT_POWER:
+			if (HCS_PPC(oxu->hcs_params))
+				writel(temp | PORT_POWER, status_reg);
+			break;
+		case USB_PORT_FEAT_RESET:
+			if (temp & PORT_RESUME)
+				goto error;
+			/* line status bits may report this as low speed,
+			 * which can be fine if this root hub has a
+			 * transaction translator built in.
+			 */
+			oxu_vdbg(oxu, "port %d reset\n", wIndex + 1);
+			temp |= PORT_RESET;
+			temp &= ~PORT_PE;
+
+			/*
+			 * caller must wait, then call GetPortStatus
+			 * usb 2.0 spec says 50 ms resets on root
+			 */
+			oxu->reset_done[wIndex] = jiffies
+					+ msecs_to_jiffies(50);
+			writel(temp, status_reg);
+			break;
+
+		/* For downstream facing ports (these):  one hub port is put
+		 * into test mode according to USB2 11.24.2.13, then the hub
+		 * must be reset (which for root hub now means rmmod+modprobe,
+		 * or else system reboot).  See EHCI 2.3.9 and 4.14 for info
+		 * about the EHCI-specific stuff.
+		 */
+		case USB_PORT_FEAT_TEST:
+			if (!selector || selector > 5)
+				goto error;
+			ehci_quiesce(oxu);
+			ehci_halt(oxu);
+			temp |= selector << 16;
+			writel(temp, status_reg);
+			break;
+
+		default:
+			goto error;
+		}
+		readl(&oxu->regs->command);	/* unblock posted writes */
+		break;
+
+	default:
+error:
+		/* "stall" on error */
+		retval = -EPIPE;
+	}
+	spin_unlock_irqrestore(&oxu->lock, flags);
+	return retval;
+}
+
+#ifdef CONFIG_PM
+
+static int oxu_bus_suspend(struct usb_hcd *hcd)
+{
+	struct oxu_hcd *oxu = hcd_to_oxu(hcd);
+	int port;
+	int mask;
+
+	oxu_dbg(oxu, "suspend root hub\n");
+
+	if (time_before(jiffies, oxu->next_statechange))
+		msleep(5);
+
+	port = HCS_N_PORTS(oxu->hcs_params);
+	spin_lock_irq(&oxu->lock);
+
+	/* stop schedules, clean any completed work */
+	if (HC_IS_RUNNING(hcd->state)) {
+		ehci_quiesce(oxu);
+		hcd->state = HC_STATE_QUIESCING;
+	}
+	oxu->command = readl(&oxu->regs->command);
+	if (oxu->reclaim)
+		oxu->reclaim_ready = 1;
+	ehci_work(oxu);
+
+	/* Unlike other USB host controller types, EHCI doesn't have
+	 * any notion of "global" or bus-wide suspend.  The driver has
+	 * to manually suspend all the active unsuspended ports, and
+	 * then manually resume them in the bus_resume() routine.
+	 */
+	oxu->bus_suspended = 0;
+	while (port--) {
+		u32 __iomem *reg = &oxu->regs->port_status[port];
+		u32 t1 = readl(reg) & ~PORT_RWC_BITS;
+		u32 t2 = t1;
+
+		/* keep track of which ports we suspend */
+		if ((t1 & PORT_PE) && !(t1 & PORT_OWNER) &&
+				!(t1 & PORT_SUSPEND)) {
+			t2 |= PORT_SUSPEND;
+			set_bit(port, &oxu->bus_suspended);
+		}
+
+		/* enable remote wakeup on all ports */
+		if (device_may_wakeup(&hcd->self.root_hub->dev))
+			t2 |= PORT_WKOC_E|PORT_WKDISC_E|PORT_WKCONN_E;
+		else
+			t2 &= ~(PORT_WKOC_E|PORT_WKDISC_E|PORT_WKCONN_E);
+
+		if (t1 != t2) {
+			oxu_vdbg(oxu, "port %d, %08x -> %08x\n",
+				port + 1, t1, t2);
+			writel(t2, reg);
+		}
+	}
+
+	/* turn off now-idle HC */
+	del_timer_sync(&oxu->watchdog);
+	ehci_halt(oxu);
+	hcd->state = HC_STATE_SUSPENDED;
+
+	/* allow remote wakeup */
+	mask = INTR_MASK;
+	if (!device_may_wakeup(&hcd->self.root_hub->dev))
+		mask &= ~STS_PCD;
+	writel(mask, &oxu->regs->intr_enable);
+	readl(&oxu->regs->intr_enable);
+
+	oxu->next_statechange = jiffies + msecs_to_jiffies(10);
+	spin_unlock_irq(&oxu->lock);
+	return 0;
+}
+
+/* Caller has locked the root hub, and should reset/reinit on error */
+static int oxu_bus_resume(struct usb_hcd *hcd)
+{
+	struct oxu_hcd *oxu = hcd_to_oxu(hcd);
+	u32 temp;
+	int i;
+
+	if (time_before(jiffies, oxu->next_statechange))
+		msleep(5);
+	spin_lock_irq(&oxu->lock);
+
+	/* Ideally and we've got a real resume here, and no port's power
+	 * was lost.  (For PCI, that means Vaux was maintained.)  But we
+	 * could instead be restoring a swsusp snapshot -- so that BIOS was
+	 * the last user of the controller, not reset/pm hardware keeping
+	 * state we gave to it.
+	 */
+	temp = readl(&oxu->regs->intr_enable);
+	oxu_dbg(oxu, "resume root hub%s\n", temp ? "" : " after power loss");
+
+	/* at least some APM implementations will try to deliver
+	 * IRQs right away, so delay them until we're ready.
+	 */
+	writel(0, &oxu->regs->intr_enable);
+
+	/* re-init operational registers */
+	writel(0, &oxu->regs->segment);
+	writel(oxu->periodic_dma, &oxu->regs->frame_list);
+	writel((u32) oxu->async->qh_dma, &oxu->regs->async_next);
+
+	/* restore CMD_RUN, framelist size, and irq threshold */
+	writel(oxu->command, &oxu->regs->command);
+
+	/* Some controller/firmware combinations need a delay during which
+	 * they set up the port statuses.  See Bugzilla #8190. */
+	mdelay(8);
+
+	/* manually resume the ports we suspended during bus_suspend() */
+	i = HCS_N_PORTS(oxu->hcs_params);
+	while (i--) {
+		temp = readl(&oxu->regs->port_status[i]);
+		temp &= ~(PORT_RWC_BITS
+			| PORT_WKOC_E | PORT_WKDISC_E | PORT_WKCONN_E);
+		if (test_bit(i, &oxu->bus_suspended) && (temp & PORT_SUSPEND)) {
+			oxu->reset_done[i] = jiffies + msecs_to_jiffies(20);
+			temp |= PORT_RESUME;
+		}
+		writel(temp, &oxu->regs->port_status[i]);
+	}
+	i = HCS_N_PORTS(oxu->hcs_params);
+	mdelay(20);
+	while (i--) {
+		temp = readl(&oxu->regs->port_status[i]);
+		if (test_bit(i, &oxu->bus_suspended) && (temp & PORT_SUSPEND)) {
+			temp &= ~(PORT_RWC_BITS | PORT_RESUME);
+			writel(temp, &oxu->regs->port_status[i]);
+			oxu_vdbg(oxu, "resumed port %d\n", i + 1);
+		}
+	}
+	(void) readl(&oxu->regs->command);
+
+	/* maybe re-activate the schedule(s) */
+	temp = 0;
+	if (oxu->async->qh_next.qh)
+		temp |= CMD_ASE;
+	if (oxu->periodic_sched)
+		temp |= CMD_PSE;
+	if (temp) {
+		oxu->command |= temp;
+		writel(oxu->command, &oxu->regs->command);
+	}
+
+	oxu->next_statechange = jiffies + msecs_to_jiffies(5);
+	hcd->state = HC_STATE_RUNNING;
+
+	/* Now we can safely re-enable irqs */
+	writel(INTR_MASK, &oxu->regs->intr_enable);
+
+	spin_unlock_irq(&oxu->lock);
+	return 0;
+}
+
+#else
+
+static int oxu_bus_suspend(struct usb_hcd *hcd)
+{
+	return 0;
+}
+
+static int oxu_bus_resume(struct usb_hcd *hcd)
+{
+	return 0;
+}
+
+#endif	/* CONFIG_PM */
+
+static const struct hc_driver oxu_hc_driver = {
+	.description =		"oxu210hp_hcd",
+	.product_desc =		"oxu210hp HCD",
+	.hcd_priv_size =	sizeof(struct oxu_hcd),
+
+	/*
+	 * Generic hardware linkage
+	 */
+	.irq =			oxu_irq,
+	.flags =		HCD_MEMORY | HCD_USB2,
+
+	/*
+	 * Basic lifecycle operations
+	 */
+	.reset =		oxu_reset,
+	.start =		oxu_run,
+	.stop =			oxu_stop,
+	.shutdown =		oxu_shutdown,
+
+	/*
+	 * Managing i/o requests and associated device resources
+	 */
+	.urb_enqueue =		oxu_urb_enqueue,
+	.urb_dequeue =		oxu_urb_dequeue,
+	.endpoint_disable =	oxu_endpoint_disable,
+
+	/*
+	 * Scheduling support
+	 */
+	.get_frame_number =	oxu_get_frame,
+
+	/*
+	 * Root hub support
+	 */
+	.hub_status_data =	oxu_hub_status_data,
+	.hub_control =		oxu_hub_control,
+	.bus_suspend =		oxu_bus_suspend,
+	.bus_resume =		oxu_bus_resume,
+};
+
+/*
+ * Module stuff
+ */
+
+static void oxu_configuration(struct platform_device *pdev, void *base)
+{
+	u32 tmp;
+
+	/* Initialize top level registers.
+	 * First write ever
+	 */
+	oxu_writel(base, OXU_HOSTIFCONFIG, 0x0000037D);
+	oxu_writel(base, OXU_SOFTRESET, OXU_SRESET);
+	oxu_writel(base, OXU_HOSTIFCONFIG, 0x0000037D);
+
+	tmp = oxu_readl(base, OXU_PIOBURSTREADCTRL);
+	oxu_writel(base, OXU_PIOBURSTREADCTRL, tmp | 0x0040);
+
+	oxu_writel(base, OXU_ASO, OXU_SPHPOEN | OXU_OVRCCURPUPDEN |
+					OXU_COMPARATOR | OXU_ASO_OP);
+
+	tmp = oxu_readl(base, OXU_CLKCTRL_SET);
+	oxu_writel(base, OXU_CLKCTRL_SET, tmp | OXU_SYSCLKEN | OXU_USBOTGCLKEN);
+
+	/* Clear all top interrupt enable */
+	oxu_writel(base, OXU_CHIPIRQEN_CLR, 0xff);
+
+	/* Clear all top interrupt status */
+	oxu_writel(base, OXU_CHIPIRQSTATUS, 0xff);
+
+	/* Enable all needed top interrupt except OTG SPH core */
+	oxu_writel(base, OXU_CHIPIRQEN_SET, OXU_USBSPHLPWUI | OXU_USBOTGLPWUI);
+}
+
+static int oxu_verify_id(struct platform_device *pdev, void *base)
+{
+	u32 id;
+	char *bo[] = {
+		"reserved",
+		"128-pin LQFP",
+		"84-pin TFBGA",
+		"reserved",
+	};
+
+	/* Read controller signature register to find a match */
+	id = oxu_readl(base, OXU_DEVICEID);
+	dev_info(&pdev->dev, "device ID %x\n", id);
+	if ((id & OXU_REV_MASK) != (OXU_REV_2100 << OXU_REV_SHIFT))
+		return -1;
+
+	dev_info(&pdev->dev, "found device %x %s (%04x:%04x)\n",
+		id >> OXU_REV_SHIFT,
+		bo[(id & OXU_BO_MASK) >> OXU_BO_SHIFT],
+		(id & OXU_MAJ_REV_MASK) >> OXU_MAJ_REV_SHIFT,
+		(id & OXU_MIN_REV_MASK) >> OXU_MIN_REV_SHIFT);
+
+	return 0;
+}
+
+static const struct hc_driver oxu_hc_driver;
+static struct usb_hcd *oxu_create(struct platform_device *pdev,
+				unsigned long memstart, unsigned long memlen,
+				void *base, int irq, int otg)
+{
+	struct device *dev = &pdev->dev;
+
+	struct usb_hcd *hcd;
+	struct oxu_hcd *oxu;
+	int ret;
+
+	/* Set endian mode and host mode */
+	oxu_writel(base + (otg ? OXU_OTG_CORE_OFFSET : OXU_SPH_CORE_OFFSET),
+				OXU_USBMODE,
+				OXU_CM_HOST_ONLY | OXU_ES_LITTLE | OXU_VBPS);
+
+	hcd = usb_create_hcd(&oxu_hc_driver, dev,
+				otg ? "oxu210hp_otg" : "oxu210hp_sph");
+	if (!hcd)
+		return ERR_PTR(-ENOMEM);
+
+	hcd->rsrc_start = memstart;
+	hcd->rsrc_len = memlen;
+	hcd->regs = base;
+	hcd->irq = irq;
+	hcd->state = HC_STATE_HALT;
+
+	oxu = hcd_to_oxu(hcd);
+	oxu->is_otg = otg;
+
+	ret = usb_add_hcd(hcd, irq, IRQF_SHARED);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	return hcd;
+}
+
+static int oxu_init(struct platform_device *pdev,
+				unsigned long memstart, unsigned long memlen,
+				void *base, int irq)
+{
+	struct oxu_info *info = platform_get_drvdata(pdev);
+	struct usb_hcd *hcd;
+	int ret;
+
+	/* First time configuration at start up */
+	oxu_configuration(pdev, base);
+
+	ret = oxu_verify_id(pdev, base);
+	if (ret) {
+		dev_err(&pdev->dev, "no devices found!\n");
+		return -ENODEV;
+	}
+
+	/* Create the OTG controller */
+	hcd = oxu_create(pdev, memstart, memlen, base, irq, 1);
+	if (IS_ERR(hcd)) {
+		dev_err(&pdev->dev, "cannot create OTG controller!\n");
+		ret = PTR_ERR(hcd);
+		goto error_create_otg;
+	}
+	info->hcd[0] = hcd;
+
+	/* Create the SPH host controller */
+	hcd = oxu_create(pdev, memstart, memlen, base, irq, 0);
+	if (IS_ERR(hcd)) {
+		dev_err(&pdev->dev, "cannot create SPH controller!\n");
+		ret = PTR_ERR(hcd);
+		goto error_create_sph;
+	}
+	info->hcd[1] = hcd;
+
+	oxu_writel(base, OXU_CHIPIRQEN_SET,
+		oxu_readl(base, OXU_CHIPIRQEN_SET) | 3);
+
+	return 0;
+
+error_create_sph:
+	usb_remove_hcd(info->hcd[0]);
+	usb_put_hcd(info->hcd[0]);
+
+error_create_otg:
+	return ret;
+}
+
+static int oxu_drv_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	void *base;
+	unsigned long memstart, memlen;
+	int irq, ret;
+	struct oxu_info *info;
+
+	if (usb_disabled())
+		return -ENODEV;
+
+	/*
+	 * Get the platform resources
+	 */
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res) {
+		dev_err(&pdev->dev,
+			"no IRQ! Check %s setup!\n", pdev->dev.bus_id);
+		return -ENODEV;
+	}
+	irq = res->start;
+	dev_dbg(&pdev->dev, "IRQ resource %d\n", irq);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "no registers address! Check %s setup!\n",
+			pdev->dev.bus_id);
+		return -ENODEV;
+	}
+	memstart = res->start;
+	memlen = res->end - res->start + 1;
+	dev_dbg(&pdev->dev, "MEM resource %lx-%lx\n", memstart, memlen);
+	if (!request_mem_region(memstart, memlen,
+				oxu_hc_driver.description)) {
+		dev_dbg(&pdev->dev, "memory area already in use\n");
+		return -EBUSY;
+	}
+
+	ret = set_irq_type(irq, IRQF_TRIGGER_FALLING);
+	if (ret) {
+		dev_err(&pdev->dev, "error setting irq type\n");
+		ret = -EFAULT;
+		goto error_set_irq_type;
+	}
+
+	base = ioremap(memstart, memlen);
+	if (!base) {
+		dev_dbg(&pdev->dev, "error mapping memory\n");
+		ret = -EFAULT;
+		goto error_ioremap;
+	}
+
+	/* Allocate a driver data struct to hold useful info for both
+	 * SPH & OTG devices
+	 */
+	info = kzalloc(sizeof(struct oxu_info), GFP_KERNEL);
+	if (!info) {
+		dev_dbg(&pdev->dev, "error allocating memory\n");
+		ret = -EFAULT;
+		goto error_alloc;
+	}
+	platform_set_drvdata(pdev, info);
+
+	ret = oxu_init(pdev, memstart, memlen, base, irq);
+	if (ret < 0) {
+		dev_dbg(&pdev->dev, "cannot init USB devices\n");
+		goto error_init;
+	}
+
+	dev_info(&pdev->dev, "devices enabled and running\n");
+	platform_set_drvdata(pdev, info);
+
+	return 0;
+
+error_init:
+	kfree(info);
+	platform_set_drvdata(pdev, NULL);
+
+error_alloc:
+	iounmap(base);
+
+error_set_irq_type:
+error_ioremap:
+	release_mem_region(memstart, memlen);
+
+	dev_err(&pdev->dev, "init %s fail, %d\n", pdev->dev.bus_id, ret);
+	return ret;
+}
+
+static void oxu_remove(struct platform_device *pdev, struct usb_hcd *hcd)
+{
+	usb_remove_hcd(hcd);
+	usb_put_hcd(hcd);
+}
+
+static int oxu_drv_remove(struct platform_device *pdev)
+{
+	struct oxu_info *info = platform_get_drvdata(pdev);
+	unsigned long memstart = info->hcd[0]->rsrc_start,
+			memlen = info->hcd[0]->rsrc_len;
+	void *base = info->hcd[0]->regs;
+
+	oxu_remove(pdev, info->hcd[0]);
+	oxu_remove(pdev, info->hcd[1]);
+
+	iounmap(base);
+	release_mem_region(memstart, memlen);
+
+	kfree(info);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static void oxu_drv_shutdown(struct platform_device *pdev)
+{
+	oxu_drv_remove(pdev);
+}
+
+#if 0
+/* FIXME: TODO */
+static int oxu_drv_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct usb_hcd *hcd = dev_get_drvdata(dev);
+
+	return 0;
+}
+
+static int oxu_drv_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct usb_hcd *hcd = dev_get_drvdata(dev);
+
+	return 0;
+}
+#else
+#define oxu_drv_suspend	NULL
+#define oxu_drv_resume	NULL
+#endif
+
+static struct platform_driver oxu_driver = {
+	.probe		= oxu_drv_probe,
+	.remove		= oxu_drv_remove,
+	.shutdown	= oxu_drv_shutdown,
+	.suspend	= oxu_drv_suspend,
+	.resume		= oxu_drv_resume,
+	.driver = {
+		.name = "oxu210hp-hcd",
+		.bus = &platform_bus_type
+	}
+};
+
+static int __init oxu_module_init(void)
+{
+	int retval = 0;
+
+	retval = platform_driver_register(&oxu_driver);
+	if (retval < 0)
+		return retval;
+
+	return retval;
+}
+
+static void __exit oxu_module_cleanup(void)
+{
+	platform_driver_unregister(&oxu_driver);
+}
+
+module_init(oxu_module_init);
+module_exit(oxu_module_cleanup);
+
+MODULE_DESCRIPTION("Oxford OXU210HP HCD driver - ver. " DRIVER_VERSION);
+MODULE_AUTHOR("Rodolfo Giometti <giometti@linux.it>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/usb/host/oxu210hp.h b/drivers/usb/host/oxu210hp.h
new file mode 100644
index 000000000000..8910e271cc7d
--- /dev/null
+++ b/drivers/usb/host/oxu210hp.h
@@ -0,0 +1,447 @@
+/*
+ * Host interface registers
+ */
+
+#define OXU_DEVICEID			0x00
+	#define OXU_REV_MASK		0xffff0000
+	#define OXU_REV_SHIFT		16
+	#define OXU_REV_2100		0x2100
+	#define OXU_BO_SHIFT		8
+	#define OXU_BO_MASK		(0x3 << OXU_BO_SHIFT)
+	#define OXU_MAJ_REV_SHIFT	4
+	#define OXU_MAJ_REV_MASK	(0xf << OXU_MAJ_REV_SHIFT)
+	#define OXU_MIN_REV_SHIFT	0
+	#define OXU_MIN_REV_MASK	(0xf << OXU_MIN_REV_SHIFT)
+#define OXU_HOSTIFCONFIG		0x04
+#define OXU_SOFTRESET			0x08
+	#define OXU_SRESET		(1 << 0)
+
+#define OXU_PIOBURSTREADCTRL		0x0C
+
+#define OXU_CHIPIRQSTATUS		0x10
+#define OXU_CHIPIRQEN_SET		0x14
+#define OXU_CHIPIRQEN_CLR		0x18
+	#define OXU_USBSPHLPWUI		0x00000080
+	#define OXU_USBOTGLPWUI		0x00000040
+	#define OXU_USBSPHI		0x00000002
+	#define OXU_USBOTGI		0x00000001
+
+#define OXU_CLKCTRL_SET			0x1C
+	#define OXU_SYSCLKEN		0x00000008
+	#define OXU_USBSPHCLKEN		0x00000002
+	#define OXU_USBOTGCLKEN		0x00000001
+
+#define OXU_ASO				0x68
+	#define OXU_SPHPOEN		0x00000100
+	#define OXU_OVRCCURPUPDEN	0x00000800
+	#define OXU_ASO_OP		(1 << 10)
+	#define OXU_COMPARATOR		0x000004000
+
+#define OXU_USBMODE			0x1A8
+	#define OXU_VBPS		0x00000020
+	#define OXU_ES_LITTLE		0x00000000
+	#define OXU_CM_HOST_ONLY	0x00000003
+
+/*
+ * Proper EHCI structs & defines
+ */
+
+/* Magic numbers that can affect system performance */
+#define EHCI_TUNE_CERR		3	/* 0-3 qtd retries; 0 == don't stop */
+#define EHCI_TUNE_RL_HS		4	/* nak throttle; see 4.9 */
+#define EHCI_TUNE_RL_TT		0
+#define EHCI_TUNE_MULT_HS	1	/* 1-3 transactions/uframe; 4.10.3 */
+#define EHCI_TUNE_MULT_TT	1
+#define EHCI_TUNE_FLS		2	/* (small) 256 frame schedule */
+
+struct oxu_hcd;
+
+/* EHCI register interface, corresponds to EHCI Revision 0.95 specification */
+
+/* Section 2.2 Host Controller Capability Registers */
+struct ehci_caps {
+	/* these fields are specified as 8 and 16 bit registers,
+	 * but some hosts can't perform 8 or 16 bit PCI accesses.
+	 */
+	u32		hc_capbase;
+#define HC_LENGTH(p)		(((p)>>00)&0x00ff)	/* bits 7:0 */
+#define HC_VERSION(p)		(((p)>>16)&0xffff)	/* bits 31:16 */
+	u32		hcs_params;     /* HCSPARAMS - offset 0x4 */
+#define HCS_DEBUG_PORT(p)	(((p)>>20)&0xf)	/* bits 23:20, debug port? */
+#define HCS_INDICATOR(p)	((p)&(1 << 16))	/* true: has port indicators */
+#define HCS_N_CC(p)		(((p)>>12)&0xf)	/* bits 15:12, #companion HCs */
+#define HCS_N_PCC(p)		(((p)>>8)&0xf)	/* bits 11:8, ports per CC */
+#define HCS_PORTROUTED(p)	((p)&(1 << 7))	/* true: port routing */
+#define HCS_PPC(p)		((p)&(1 << 4))	/* true: port power control */
+#define HCS_N_PORTS(p)		(((p)>>0)&0xf)	/* bits 3:0, ports on HC */
+
+	u32		hcc_params;      /* HCCPARAMS - offset 0x8 */
+#define HCC_EXT_CAPS(p)		(((p)>>8)&0xff)	/* for pci extended caps */
+#define HCC_ISOC_CACHE(p)       ((p)&(1 << 7))  /* true: can cache isoc frame */
+#define HCC_ISOC_THRES(p)       (((p)>>4)&0x7)  /* bits 6:4, uframes cached */
+#define HCC_CANPARK(p)		((p)&(1 << 2))  /* true: can park on async qh */
+#define HCC_PGM_FRAMELISTLEN(p) ((p)&(1 << 1))  /* true: periodic_size changes*/
+#define HCC_64BIT_ADDR(p)       ((p)&(1))       /* true: can use 64-bit addr */
+	u8		portroute[8];	 /* nibbles for routing - offset 0xC */
+} __attribute__ ((packed));
+
+
+/* Section 2.3 Host Controller Operational Registers */
+struct ehci_regs {
+	/* USBCMD: offset 0x00 */
+	u32		command;
+/* 23:16 is r/w intr rate, in microframes; default "8" == 1/msec */
+#define CMD_PARK	(1<<11)		/* enable "park" on async qh */
+#define CMD_PARK_CNT(c)	(((c)>>8)&3)	/* how many transfers to park for */
+#define CMD_LRESET	(1<<7)		/* partial reset (no ports, etc) */
+#define CMD_IAAD	(1<<6)		/* "doorbell" interrupt async advance */
+#define CMD_ASE		(1<<5)		/* async schedule enable */
+#define CMD_PSE		(1<<4)		/* periodic schedule enable */
+/* 3:2 is periodic frame list size */
+#define CMD_RESET	(1<<1)		/* reset HC not bus */
+#define CMD_RUN		(1<<0)		/* start/stop HC */
+
+	/* USBSTS: offset 0x04 */
+	u32		status;
+#define STS_ASS		(1<<15)		/* Async Schedule Status */
+#define STS_PSS		(1<<14)		/* Periodic Schedule Status */
+#define STS_RECL	(1<<13)		/* Reclamation */
+#define STS_HALT	(1<<12)		/* Not running (any reason) */
+/* some bits reserved */
+	/* these STS_* flags are also intr_enable bits (USBINTR) */
+#define STS_IAA		(1<<5)		/* Interrupted on async advance */
+#define STS_FATAL	(1<<4)		/* such as some PCI access errors */
+#define STS_FLR		(1<<3)		/* frame list rolled over */
+#define STS_PCD		(1<<2)		/* port change detect */
+#define STS_ERR		(1<<1)		/* "error" completion (overflow, ...) */
+#define STS_INT		(1<<0)		/* "normal" completion (short, ...) */
+
+#define INTR_MASK (STS_IAA | STS_FATAL | STS_PCD | STS_ERR | STS_INT)
+
+	/* USBINTR: offset 0x08 */
+	u32		intr_enable;
+
+	/* FRINDEX: offset 0x0C */
+	u32		frame_index;	/* current microframe number */
+	/* CTRLDSSEGMENT: offset 0x10 */
+	u32		segment;	/* address bits 63:32 if needed */
+	/* PERIODICLISTBASE: offset 0x14 */
+	u32		frame_list;	/* points to periodic list */
+	/* ASYNCLISTADDR: offset 0x18 */
+	u32		async_next;	/* address of next async queue head */
+
+	u32		reserved[9];
+
+	/* CONFIGFLAG: offset 0x40 */
+	u32		configured_flag;
+#define FLAG_CF		(1<<0)		/* true: we'll support "high speed" */
+
+	/* PORTSC: offset 0x44 */
+	u32		port_status[0];	/* up to N_PORTS */
+/* 31:23 reserved */
+#define PORT_WKOC_E	(1<<22)		/* wake on overcurrent (enable) */
+#define PORT_WKDISC_E	(1<<21)		/* wake on disconnect (enable) */
+#define PORT_WKCONN_E	(1<<20)		/* wake on connect (enable) */
+/* 19:16 for port testing */
+#define PORT_LED_OFF	(0<<14)
+#define PORT_LED_AMBER	(1<<14)
+#define PORT_LED_GREEN	(2<<14)
+#define PORT_LED_MASK	(3<<14)
+#define PORT_OWNER	(1<<13)		/* true: companion hc owns this port */
+#define PORT_POWER	(1<<12)		/* true: has power (see PPC) */
+#define PORT_USB11(x) (((x)&(3<<10)) == (1<<10))	/* USB 1.1 device */
+/* 11:10 for detecting lowspeed devices (reset vs release ownership) */
+/* 9 reserved */
+#define PORT_RESET	(1<<8)		/* reset port */
+#define PORT_SUSPEND	(1<<7)		/* suspend port */
+#define PORT_RESUME	(1<<6)		/* resume it */
+#define PORT_OCC	(1<<5)		/* over current change */
+#define PORT_OC		(1<<4)		/* over current active */
+#define PORT_PEC	(1<<3)		/* port enable change */
+#define PORT_PE		(1<<2)		/* port enable */
+#define PORT_CSC	(1<<1)		/* connect status change */
+#define PORT_CONNECT	(1<<0)		/* device connected */
+#define PORT_RWC_BITS   (PORT_CSC | PORT_PEC | PORT_OCC)
+} __attribute__ ((packed));
+
+/* Appendix C, Debug port ... intended for use with special "debug devices"
+ * that can help if there's no serial console.  (nonstandard enumeration.)
+ */
+struct ehci_dbg_port {
+	u32	control;
+#define DBGP_OWNER	(1<<30)
+#define DBGP_ENABLED	(1<<28)
+#define DBGP_DONE	(1<<16)
+#define DBGP_INUSE	(1<<10)
+#define DBGP_ERRCODE(x)	(((x)>>7)&0x07)
+#	define DBGP_ERR_BAD	1
+#	define DBGP_ERR_SIGNAL	2
+#define DBGP_ERROR	(1<<6)
+#define DBGP_GO		(1<<5)
+#define DBGP_OUT	(1<<4)
+#define DBGP_LEN(x)	(((x)>>0)&0x0f)
+	u32	pids;
+#define DBGP_PID_GET(x)		(((x)>>16)&0xff)
+#define DBGP_PID_SET(data, tok)	(((data)<<8)|(tok))
+	u32	data03;
+	u32	data47;
+	u32	address;
+#define DBGP_EPADDR(dev, ep)	(((dev)<<8)|(ep))
+} __attribute__ ((packed));
+
+
+#define	QTD_NEXT(dma)	cpu_to_le32((u32)dma)
+
+/*
+ * EHCI Specification 0.95 Section 3.5
+ * QTD: describe data transfer components (buffer, direction, ...)
+ * See Fig 3-6 "Queue Element Transfer Descriptor Block Diagram".
+ *
+ * These are associated only with "QH" (Queue Head) structures,
+ * used with control, bulk, and interrupt transfers.
+ */
+struct ehci_qtd {
+	/* first part defined by EHCI spec */
+	__le32			hw_next;		/* see EHCI 3.5.1 */
+	__le32			hw_alt_next;		/* see EHCI 3.5.2 */
+	__le32			hw_token;		/* see EHCI 3.5.3 */
+#define	QTD_TOGGLE	(1 << 31)	/* data toggle */
+#define	QTD_LENGTH(tok)	(((tok)>>16) & 0x7fff)
+#define	QTD_IOC		(1 << 15)	/* interrupt on complete */
+#define	QTD_CERR(tok)	(((tok)>>10) & 0x3)
+#define	QTD_PID(tok)	(((tok)>>8) & 0x3)
+#define	QTD_STS_ACTIVE	(1 << 7)	/* HC may execute this */
+#define	QTD_STS_HALT	(1 << 6)	/* halted on error */
+#define	QTD_STS_DBE	(1 << 5)	/* data buffer error (in HC) */
+#define	QTD_STS_BABBLE	(1 << 4)	/* device was babbling (qtd halted) */
+#define	QTD_STS_XACT	(1 << 3)	/* device gave illegal response */
+#define	QTD_STS_MMF	(1 << 2)	/* incomplete split transaction */
+#define	QTD_STS_STS	(1 << 1)	/* split transaction state */
+#define	QTD_STS_PING	(1 << 0)	/* issue PING? */
+	__le32			hw_buf[5];		/* see EHCI 3.5.4 */
+	__le32			hw_buf_hi[5];		/* Appendix B */
+
+	/* the rest is HCD-private */
+	dma_addr_t		qtd_dma;		/* qtd address */
+	struct list_head	qtd_list;		/* sw qtd list */
+	struct urb		*urb;			/* qtd's urb */
+	size_t			length;			/* length of buffer */
+
+	u32			qtd_buffer_len;
+	void			*buffer;
+	dma_addr_t		buffer_dma;
+	void			*transfer_buffer;
+	void			*transfer_dma;
+} __attribute__ ((aligned(32)));
+
+/* mask NakCnt+T in qh->hw_alt_next */
+#define QTD_MASK __constant_cpu_to_le32 (~0x1f)
+
+#define IS_SHORT_READ(token) (QTD_LENGTH(token) != 0 && QTD_PID(token) == 1)
+
+/* Type tag from {qh, itd, sitd, fstn}->hw_next */
+#define Q_NEXT_TYPE(dma) ((dma) & __constant_cpu_to_le32 (3 << 1))
+
+/* values for that type tag */
+#define Q_TYPE_QH	__constant_cpu_to_le32 (1 << 1)
+
+/* next async queue entry, or pointer to interrupt/periodic QH */
+#define	QH_NEXT(dma)	(cpu_to_le32(((u32)dma)&~0x01f)|Q_TYPE_QH)
+
+/* for periodic/async schedules and qtd lists, mark end of list */
+#define	EHCI_LIST_END	__constant_cpu_to_le32(1) /* "null pointer" to hw */
+
+/*
+ * Entries in periodic shadow table are pointers to one of four kinds
+ * of data structure.  That's dictated by the hardware; a type tag is
+ * encoded in the low bits of the hardware's periodic schedule.  Use
+ * Q_NEXT_TYPE to get the tag.
+ *
+ * For entries in the async schedule, the type tag always says "qh".
+ */
+union ehci_shadow {
+	struct ehci_qh		*qh;		/* Q_TYPE_QH */
+	__le32			*hw_next;	/* (all types) */
+	void			*ptr;
+};
+
+/*
+ * EHCI Specification 0.95 Section 3.6
+ * QH: describes control/bulk/interrupt endpoints
+ * See Fig 3-7 "Queue Head Structure Layout".
+ *
+ * These appear in both the async and (for interrupt) periodic schedules.
+ */
+
+struct ehci_qh {
+	/* first part defined by EHCI spec */
+	__le32			hw_next;	 /* see EHCI 3.6.1 */
+	__le32			hw_info1;	/* see EHCI 3.6.2 */
+#define	QH_HEAD		0x00008000
+	__le32			hw_info2;	/* see EHCI 3.6.2 */
+#define	QH_SMASK	0x000000ff
+#define	QH_CMASK	0x0000ff00
+#define	QH_HUBADDR	0x007f0000
+#define	QH_HUBPORT	0x3f800000
+#define	QH_MULT		0xc0000000
+	__le32			hw_current;	 /* qtd list - see EHCI 3.6.4 */
+
+	/* qtd overlay (hardware parts of a struct ehci_qtd) */
+	__le32			hw_qtd_next;
+	__le32			hw_alt_next;
+	__le32			hw_token;
+	__le32			hw_buf[5];
+	__le32			hw_buf_hi[5];
+
+	/* the rest is HCD-private */
+	dma_addr_t		qh_dma;		/* address of qh */
+	union ehci_shadow	qh_next;	/* ptr to qh; or periodic */
+	struct list_head	qtd_list;	/* sw qtd list */
+	struct ehci_qtd		*dummy;
+	struct ehci_qh		*reclaim;	/* next to reclaim */
+
+	struct oxu_hcd		*oxu;
+	struct kref		kref;
+	unsigned		stamp;
+
+	u8			qh_state;
+#define	QH_STATE_LINKED		1		/* HC sees this */
+#define	QH_STATE_UNLINK		2		/* HC may still see this */
+#define	QH_STATE_IDLE		3		/* HC doesn't see this */
+#define	QH_STATE_UNLINK_WAIT	4		/* LINKED and on reclaim q */
+#define	QH_STATE_COMPLETING	5		/* don't touch token.HALT */
+
+	/* periodic schedule info */
+	u8			usecs;		/* intr bandwidth */
+	u8			gap_uf;		/* uframes split/csplit gap */
+	u8			c_usecs;	/* ... split completion bw */
+	u16			tt_usecs;	/* tt downstream bandwidth */
+	unsigned short		period;		/* polling interval */
+	unsigned short		start;		/* where polling starts */
+#define NO_FRAME ((unsigned short)~0)			/* pick new start */
+	struct usb_device	*dev;		/* access to TT */
+} __attribute__ ((aligned(32)));
+
+/*
+ * Proper OXU210HP structs
+ */
+
+#define OXU_OTG_CORE_OFFSET	0x00400
+#define OXU_OTG_CAP_OFFSET	(OXU_OTG_CORE_OFFSET + 0x100)
+#define OXU_SPH_CORE_OFFSET	0x00800
+#define OXU_SPH_CAP_OFFSET	(OXU_SPH_CORE_OFFSET + 0x100)
+
+#define OXU_OTG_MEM		0xE000
+#define OXU_SPH_MEM		0x16000
+
+/* Only how many elements & element structure are specifies here. */
+/* 2 host controllers are enabled - total size <= 28 kbytes */
+#define	DEFAULT_I_TDPS		1024
+#define QHEAD_NUM		16
+#define QTD_NUM			32
+#define SITD_NUM		8
+#define MURB_NUM		8
+
+#define BUFFER_NUM		8
+#define BUFFER_SIZE		512
+
+struct oxu_info {
+	struct usb_hcd *hcd[2];
+};
+
+struct oxu_buf {
+	u8			buffer[BUFFER_SIZE];
+} __attribute__ ((aligned(BUFFER_SIZE)));
+
+struct oxu_onchip_mem {
+	struct oxu_buf		db_pool[BUFFER_NUM];
+
+	u32			frame_list[DEFAULT_I_TDPS];
+	struct ehci_qh		qh_pool[QHEAD_NUM];
+	struct ehci_qtd		qtd_pool[QTD_NUM];
+} __attribute__ ((aligned(4 << 10)));
+
+#define	EHCI_MAX_ROOT_PORTS	15		/* see HCS_N_PORTS */
+
+struct oxu_murb {
+	struct urb		urb;
+	struct urb		*main;
+	u8			last;
+};
+
+struct oxu_hcd {				/* one per controller */
+	unsigned int		is_otg:1;
+
+	u8			qh_used[QHEAD_NUM];
+	u8			qtd_used[QTD_NUM];
+	u8			db_used[BUFFER_NUM];
+	u8			murb_used[MURB_NUM];
+
+	struct oxu_onchip_mem	__iomem *mem;
+	spinlock_t		mem_lock;
+
+	struct timer_list	urb_timer;
+
+	struct ehci_caps __iomem *caps;
+	struct ehci_regs __iomem *regs;
+
+	__u32			hcs_params;	/* cached register copy */
+	spinlock_t		lock;
+
+	/* async schedule support */
+	struct ehci_qh		*async;
+	struct ehci_qh		*reclaim;
+	unsigned		reclaim_ready:1;
+	unsigned		scanning:1;
+
+	/* periodic schedule support */
+	unsigned		periodic_size;
+	__le32			*periodic;	/* hw periodic table */
+	dma_addr_t		periodic_dma;
+	unsigned		i_thresh;	/* uframes HC might cache */
+
+	union ehci_shadow	*pshadow;	/* mirror hw periodic table */
+	int			next_uframe;	/* scan periodic, start here */
+	unsigned		periodic_sched;	/* periodic activity count */
+
+	/* per root hub port */
+	unsigned long		reset_done[EHCI_MAX_ROOT_PORTS];
+	/* bit vectors (one bit per port) */
+	unsigned long		bus_suspended;	/* which ports were
+						 * already suspended at the
+						 * start of a bus suspend
+						 */
+	unsigned long		companion_ports;/* which ports are dedicated
+						 * to the companion controller
+						 */
+
+	struct timer_list	watchdog;
+	unsigned long		actions;
+	unsigned		stamp;
+	unsigned long		next_statechange;
+	u32			command;
+
+	/* SILICON QUIRKS */
+	struct list_head	urb_list;	/* this is the head to urb
+						 * queue that didn't get enough
+						 * resources
+						 */
+	struct oxu_murb		*murb_pool;	/* murb per split big urb */
+	unsigned urb_len;
+
+	u8			sbrn;		/* packed release number */
+};
+
+#define EHCI_IAA_JIFFIES	(HZ/100)	/* arbitrary; ~10 msec */
+#define EHCI_IO_JIFFIES	 	(HZ/10)		/* io watchdog > irq_thresh */
+#define EHCI_ASYNC_JIFFIES      (HZ/20)		/* async idle timeout */
+#define EHCI_SHRINK_JIFFIES     (HZ/200)	/* async qh unlink delay */
+
+enum ehci_timer_action {
+	TIMER_IO_WATCHDOG,
+	TIMER_IAA_WATCHDOG,
+	TIMER_ASYNC_SHRINK,
+	TIMER_ASYNC_OFF,
+};
+
+#include <linux/oxu210hp.h>
diff --git a/include/linux/oxu210hp.h b/include/linux/oxu210hp.h
new file mode 100644
index 000000000000..0bf96eae5389
--- /dev/null
+++ b/include/linux/oxu210hp.h
@@ -0,0 +1,7 @@
+/* platform data for the OXU210HP HCD */
+
+struct oxu210hp_platform_data {
+	unsigned int bus16:1;
+	unsigned int use_hcd_otg:1;
+	unsigned int use_hcd_sph:1;
+};
-- 
cgit v1.2.3


From d767d888750a8e15656b7ee15d68f90a151b8936 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Thu, 6 Nov 2008 22:32:15 -0800
Subject: USB: wusb: annotate association types withe proper endianness

Also a trivial annotation in rh.c for:
drivers/usb/wusbcore/rh.c:366:9: warning: incorrect type in assignment (different base types)
drivers/usb/wusbcore/rh.c:366:9:    expected unsigned short [unsigned] [short] [usertype] <noident>
drivers/usb/wusbcore/rh.c:366:9:    got restricted __le16 [usertype] <noident>
drivers/usb/wusbcore/rh.c:367:9: warning: incorrect type in assignment (different base types)
drivers/usb/wusbcore/rh.c:367:9:    expected unsigned short [unsigned] [short] [usertype] <noident>
drivers/usb/wusbcore/rh.c:367:9:    got restricted __le16 [usertype] <noident>

Association types annotation fixes piles of warnings similar to:
drivers/usb/wusbcore/cbaf.c:238:30: warning: incorrect type in initializer (different base types)
drivers/usb/wusbcore/cbaf.c:238:30:    expected restricted __le16 [usertype] id
drivers/usb/wusbcore/cbaf.c:238:30:    got int
drivers/usb/wusbcore/cbaf.c:238:30: warning: incorrect type in initializer (different base types)
drivers/usb/wusbcore/cbaf.c:238:30:    expected restricted __le16 [usertype] len
drivers/usb/wusbcore/cbaf.c:238:30:    got int

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: David Vrabel <david.vrabel@csr.com>
Cc: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/wusbcore/rh.c       |  2 +-
 include/linux/usb/association.h | 22 +++++++++++-----------
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/wusbcore/rh.c b/drivers/usb/wusbcore/rh.c
index 95c6fa3bf6b2..3937bf6f8cef 100644
--- a/drivers/usb/wusbcore/rh.c
+++ b/drivers/usb/wusbcore/rh.c
@@ -326,7 +326,7 @@ static int wusbhc_rh_clear_port_feat(struct wusbhc *wusbhc, u16 feature,
 static int wusbhc_rh_get_port_status(struct wusbhc *wusbhc, u16 port_idx,
 				     u32 *_buf, u16 wLength)
 {
-	u16 *buf = (u16 *) _buf;
+	__le16 *buf = (__le16 *)_buf;
 
 	if (port_idx > wusbhc->ports_max)
 		return -EINVAL;
diff --git a/include/linux/usb/association.h b/include/linux/usb/association.h
index 07c5e3cf5898..0a4a18b3c1bb 100644
--- a/include/linux/usb/association.h
+++ b/include/linux/usb/association.h
@@ -28,17 +28,17 @@ struct wusb_am_attr {
 };
 
 /* Different fields defined by the spec */
-#define WUSB_AR_AssociationTypeId	{ .id = 0x0000, .len =  2 }
-#define WUSB_AR_AssociationSubTypeId	{ .id = 0x0001, .len =  2 }
-#define WUSB_AR_Length			{ .id = 0x0002, .len =  4 }
-#define WUSB_AR_AssociationStatus	{ .id = 0x0004, .len =  4 }
-#define WUSB_AR_LangID			{ .id = 0x0008, .len =  2 }
-#define WUSB_AR_DeviceFriendlyName	{ .id = 0x000b, .len = 64 } /* max */
-#define WUSB_AR_HostFriendlyName	{ .id = 0x000c, .len = 64 } /* max */
-#define WUSB_AR_CHID			{ .id = 0x1000, .len = 16 }
-#define WUSB_AR_CDID			{ .id = 0x1001, .len = 16 }
-#define WUSB_AR_ConnectionContext	{ .id = 0x1002, .len = 48 }
-#define WUSB_AR_BandGroups		{ .id = 0x1004, .len =  2 }
+#define WUSB_AR_AssociationTypeId	{ .id = cpu_to_le16(0x0000), .len = cpu_to_le16(2) }
+#define WUSB_AR_AssociationSubTypeId	{ .id = cpu_to_le16(0x0001), .len = cpu_to_le16(2) }
+#define WUSB_AR_Length			{ .id = cpu_to_le16(0x0002), .len = cpu_to_le16(4) }
+#define WUSB_AR_AssociationStatus	{ .id = cpu_to_le16(0x0004), .len = cpu_to_le16(4) }
+#define WUSB_AR_LangID			{ .id = cpu_to_le16(0x0008), .len = cpu_to_le16(2) }
+#define WUSB_AR_DeviceFriendlyName	{ .id = cpu_to_le16(0x000b), .len = cpu_to_le16(64) } /* max */
+#define WUSB_AR_HostFriendlyName	{ .id = cpu_to_le16(0x000c), .len = cpu_to_le16(64) } /* max */
+#define WUSB_AR_CHID			{ .id = cpu_to_le16(0x1000), .len = cpu_to_le16(16) }
+#define WUSB_AR_CDID			{ .id = cpu_to_le16(0x1001), .len = cpu_to_le16(16) }
+#define WUSB_AR_ConnectionContext	{ .id = cpu_to_le16(0x1002), .len = cpu_to_le16(48) }
+#define WUSB_AR_BandGroups		{ .id = cpu_to_le16(0x1004), .len = cpu_to_le16(2) }
 
 /* CBAF Control Requests (AMS1.0[T4-1] */
 enum {
-- 
cgit v1.2.3


From 9ac39f28b5237a629e41ccfc1f73d3a55723045c Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 12 Nov 2008 16:19:49 -0500
Subject: USB: add asynchronous autosuspend/autoresume support

This patch (as1160b) adds support routines for asynchronous autosuspend
and autoresume, with accompanying documentation updates.  There
already are several potential users of this interface, and others are
likely to arise as autosuspend support becomes more widespread.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/usb/power-management.txt | 22 +++++++--
 drivers/usb/core/driver.c              | 86 ++++++++++++++++++++++++++++++++++
 drivers/usb/core/hub.c                 |  5 +-
 drivers/usb/core/usb.c                 |  1 +
 drivers/usb/core/usb.h                 |  1 +
 include/linux/usb.h                    |  9 ++++
 6 files changed, 117 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt
index e48ea1d51010..ad642615ad4c 100644
--- a/Documentation/usb/power-management.txt
+++ b/Documentation/usb/power-management.txt
@@ -313,11 +313,13 @@ three of the methods listed above.  In addition, a driver indicates
 that it supports autosuspend by setting the .supports_autosuspend flag
 in its usb_driver structure.  It is then responsible for informing the
 USB core whenever one of its interfaces becomes busy or idle.  The
-driver does so by calling these three functions:
+driver does so by calling these five functions:
 
 	int  usb_autopm_get_interface(struct usb_interface *intf);
 	void usb_autopm_put_interface(struct usb_interface *intf);
 	int  usb_autopm_set_interface(struct usb_interface *intf);
+	int  usb_autopm_get_interface_async(struct usb_interface *intf);
+	void usb_autopm_put_interface_async(struct usb_interface *intf);
 
 The functions work by maintaining a counter in the usb_interface
 structure.  When intf->pm_usage_count is > 0 then the interface is
@@ -330,10 +332,12 @@ associated with the device itself rather than any of its interfaces.
 This field is used only by the USB core.)
 
 The driver owns intf->pm_usage_count; it can modify the value however
-and whenever it likes.  A nice aspect of the usb_autopm_* routines is
-that the changes they make are protected by the usb_device structure's
-PM mutex (udev->pm_mutex); however drivers may change pm_usage_count
-without holding the mutex.
+and whenever it likes.  A nice aspect of the non-async usb_autopm_*
+routines is that the changes they make are protected by the usb_device
+structure's PM mutex (udev->pm_mutex); however drivers may change
+pm_usage_count without holding the mutex.  Drivers using the async
+routines are responsible for their own synchronization and mutual
+exclusion.
 
 	usb_autopm_get_interface() increments pm_usage_count and
 	attempts an autoresume if the new value is > 0 and the
@@ -348,6 +352,14 @@ without holding the mutex.
 	is suspended, and it attempts an autosuspend if the value is
 	<= 0 and the device isn't suspended.
 
+	usb_autopm_get_interface_async() and
+	usb_autopm_put_interface_async() do almost the same things as
+	their non-async counterparts.  The differences are: they do
+	not acquire the PM mutex, and they use a workqueue to do their
+	jobs.  As a result they can be called in an atomic context,
+	such as an URB's completion handler, but when they return the
+	device will not generally not yet be in the desired state.
+
 There also are a couple of utility routines drivers can use:
 
 	usb_autopm_enable() sets pm_usage_cnt to 0 and then calls
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 8c081308b0e2..23b3c7e79d4b 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -1341,6 +1341,19 @@ void usb_autosuspend_work(struct work_struct *work)
 	usb_autopm_do_device(udev, 0);
 }
 
+/* usb_autoresume_work - callback routine to autoresume a USB device */
+void usb_autoresume_work(struct work_struct *work)
+{
+	struct usb_device *udev =
+		container_of(work, struct usb_device, autoresume);
+
+	/* Wake it up, let the drivers do their thing, and then put it
+	 * back to sleep.
+	 */
+	if (usb_autopm_do_device(udev, 1) == 0)
+		usb_autopm_do_device(udev, -1);
+}
+
 /**
  * usb_autosuspend_device - delayed autosuspend of a USB device and its interfaces
  * @udev: the usb_device to autosuspend
@@ -1491,6 +1504,45 @@ void usb_autopm_put_interface(struct usb_interface *intf)
 }
 EXPORT_SYMBOL_GPL(usb_autopm_put_interface);
 
+/**
+ * usb_autopm_put_interface_async - decrement a USB interface's PM-usage counter
+ * @intf: the usb_interface whose counter should be decremented
+ *
+ * This routine does essentially the same thing as
+ * usb_autopm_put_interface(): it decrements @intf's usage counter and
+ * queues a delayed autosuspend request if the counter is <= 0.  The
+ * difference is that it does not acquire the device's pm_mutex;
+ * callers must handle all synchronization issues themselves.
+ *
+ * Typically a driver would call this routine during an URB's completion
+ * handler, if no more URBs were pending.
+ *
+ * This routine can run in atomic context.
+ */
+void usb_autopm_put_interface_async(struct usb_interface *intf)
+{
+	struct usb_device	*udev = interface_to_usbdev(intf);
+	int			status = 0;
+
+	if (intf->condition == USB_INTERFACE_UNBOUND) {
+		status = -ENODEV;
+	} else {
+		udev->last_busy = jiffies;
+		--intf->pm_usage_cnt;
+		if (udev->autosuspend_disabled || udev->autosuspend_delay < 0)
+			status = -EPERM;
+		else if (intf->pm_usage_cnt <= 0 &&
+				!timer_pending(&udev->autosuspend.timer)) {
+			queue_delayed_work(ksuspend_usb_wq, &udev->autosuspend,
+					round_jiffies_relative(
+						udev->autosuspend_delay));
+		}
+	}
+	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
+			__func__, status, intf->pm_usage_cnt);
+}
+EXPORT_SYMBOL_GPL(usb_autopm_put_interface_async);
+
 /**
  * usb_autopm_get_interface - increment a USB interface's PM-usage counter
  * @intf: the usb_interface whose counter should be incremented
@@ -1536,6 +1588,37 @@ int usb_autopm_get_interface(struct usb_interface *intf)
 }
 EXPORT_SYMBOL_GPL(usb_autopm_get_interface);
 
+/**
+ * usb_autopm_get_interface_async - increment a USB interface's PM-usage counter
+ * @intf: the usb_interface whose counter should be incremented
+ *
+ * This routine does much the same thing as
+ * usb_autopm_get_interface(): it increments @intf's usage counter and
+ * queues an autoresume request if the result is > 0.  The differences
+ * are that it does not acquire the device's pm_mutex (callers must
+ * handle all synchronization issues themselves), and it does not
+ * autoresume the device directly (it only queues a request).  After a
+ * successful call, the device will generally not yet be resumed.
+ *
+ * This routine can run in atomic context.
+ */
+int usb_autopm_get_interface_async(struct usb_interface *intf)
+{
+	struct usb_device	*udev = interface_to_usbdev(intf);
+	int			status = 0;
+
+	if (intf->condition == USB_INTERFACE_UNBOUND)
+		status = -ENODEV;
+	else if (udev->autoresume_disabled)
+		status = -EPERM;
+	else if (++intf->pm_usage_cnt > 0 && udev->state == USB_STATE_SUSPENDED)
+		queue_work(ksuspend_usb_wq, &udev->autoresume);
+	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
+			__func__, status, intf->pm_usage_cnt);
+	return status;
+}
+EXPORT_SYMBOL_GPL(usb_autopm_get_interface_async);
+
 /**
  * usb_autopm_set_interface - set a USB interface's autosuspend state
  * @intf: the usb_interface whose state should be set
@@ -1563,6 +1646,9 @@ EXPORT_SYMBOL_GPL(usb_autopm_set_interface);
 void usb_autosuspend_work(struct work_struct *work)
 {}
 
+void usb_autoresume_work(struct work_struct *work)
+{}
+
 #endif /* CONFIG_USB_SUSPEND */
 
 /**
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index b19cbfcd51da..95fb3104ba4f 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1374,8 +1374,9 @@ static void usb_stop_pm(struct usb_device *udev)
 		usb_autosuspend_device(udev->parent);
 	usb_pm_unlock(udev);
 
-	/* Stop any autosuspend requests already submitted */
-	cancel_rearming_delayed_work(&udev->autosuspend);
+	/* Stop any autosuspend or autoresume requests already submitted */
+	cancel_delayed_work_sync(&udev->autosuspend);
+	cancel_work_sync(&udev->autoresume);
 }
 
 #else
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 400fa4cc9a34..44f2fc750b6d 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -402,6 +402,7 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent,
 #ifdef	CONFIG_PM
 	mutex_init(&dev->pm_mutex);
 	INIT_DELAYED_WORK(&dev->autosuspend, usb_autosuspend_work);
+	INIT_WORK(&dev->autoresume, usb_autoresume_work);
 	dev->autosuspend_delay = usb_autosuspend_delay * HZ;
 	dev->connect_time = jiffies;
 	dev->active_duration = -jiffies;
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index 9a1a45ac3add..b60ebb4de1a8 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -45,6 +45,7 @@ extern int usb_suspend(struct device *dev, pm_message_t msg);
 extern int usb_resume(struct device *dev);
 
 extern void usb_autosuspend_work(struct work_struct *work);
+extern void usb_autoresume_work(struct work_struct *work);
 extern int usb_port_suspend(struct usb_device *dev);
 extern int usb_port_resume(struct usb_device *dev);
 extern int usb_external_suspend_device(struct usb_device *udev,
diff --git a/include/linux/usb.h b/include/linux/usb.h
index f72aa51f7bcd..859a88e6ce9c 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -398,6 +398,7 @@ struct usb_tt;
  * @urbnum: number of URBs submitted for the whole device
  * @active_duration: total time device is not suspended
  * @autosuspend: for delayed autosuspends
+ * @autoresume: for autoresumes requested while in_interrupt
  * @pm_mutex: protects PM operations
  * @last_busy: time of last use
  * @autosuspend_delay: in jiffies
@@ -476,6 +477,7 @@ struct usb_device {
 
 #ifdef CONFIG_PM
 	struct delayed_work autosuspend;
+	struct work_struct autoresume;
 	struct mutex pm_mutex;
 
 	unsigned long last_busy;
@@ -513,6 +515,8 @@ extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id);
 extern int usb_autopm_set_interface(struct usb_interface *intf);
 extern int usb_autopm_get_interface(struct usb_interface *intf);
 extern void usb_autopm_put_interface(struct usb_interface *intf);
+extern int usb_autopm_get_interface_async(struct usb_interface *intf);
+extern void usb_autopm_put_interface_async(struct usb_interface *intf);
 
 static inline void usb_autopm_enable(struct usb_interface *intf)
 {
@@ -539,8 +543,13 @@ static inline int usb_autopm_set_interface(struct usb_interface *intf)
 static inline int usb_autopm_get_interface(struct usb_interface *intf)
 { return 0; }
 
+static inline int usb_autopm_get_interface_async(struct usb_interface *intf)
+{ return 0; }
+
 static inline void usb_autopm_put_interface(struct usb_interface *intf)
 { }
+static inline void usb_autopm_put_interface_async(struct usb_interface *intf)
+{ }
 static inline void usb_autopm_enable(struct usb_interface *intf)
 { }
 static inline void usb_autopm_disable(struct usb_interface *intf)
-- 
cgit v1.2.3


From dc023dceec861c60bc1d1a17a2c6496ddac26ee7 Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Thu, 13 Nov 2008 10:31:35 -0800
Subject: USB: Introduce usb_queue_reset() to do resets from atomic contexts

This patch introduces a new call to be able to do a USB reset from an
atomic contect. This is quite helpful in USB callbacks to handle
errors (when the only thing that can be done is to do a device
reset).

It is done queuing a work struct that will do the actual reset. The
struct is "attached" to an interface so pending requests from an
interface are removed when said interface is unbound from the driver.

The call flow then becomes:

usb_queue_reset_device()
  __usb_queue_reset_device() [workqueue]
    usb_reset_device()

usb_probe_interface()
  usb_cancel_queue_reset()      [error path]

usb_unbind_interface()
  usb_cancel_queue_reset()

usb_driver_release_interface()
  usb_cancel_queue_reset()

Note usb_cancel_queue_reset() needs smarts to try not to unqueue when
it is actually being executed. This happens when we run the reset from
the workqueue: usb_reset_device() is called and on interface unbind
time, usb_cancel_queue_reset() would be called. That would deadlock on
cancel_work_sync(). To avoid that, we set (before running
usb_reset_device()) usb_intf->reset_running and clear it inmediately
after returning.

Patch is against 2.6.28-rc2 and depends on
http://marc.info/?l=linux-usb&m=122581634925308&w=2 (as submitted by
Alan Stern).

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/driver.c  | 23 +++++++++++++++++++++--
 drivers/usb/core/hub.c     | 43 +++++++++++++++++++++++++++++++++++++++++++
 drivers/usb/core/message.c | 41 +++++++++++++++++++++++++++++++++++++++++
 include/linux/usb.h        |  8 ++++++++
 4 files changed, 113 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 23b3c7e79d4b..7e26fb3c2759 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -184,6 +184,20 @@ static int usb_unbind_device(struct device *dev)
 	return 0;
 }
 
+/*
+ * Cancel any pending scheduled resets
+ *
+ * [see usb_queue_reset_device()]
+ *
+ * Called after unconfiguring / when releasing interfaces. See
+ * comments in __usb_queue_reset_device() regarding
+ * udev->reset_running.
+ */
+static void usb_cancel_queued_reset(struct usb_interface *iface)
+{
+	if (iface->reset_running == 0)
+		cancel_work_sync(&iface->reset_ws);
+}
 
 /* called from driver core with dev locked */
 static int usb_probe_interface(struct device *dev)
@@ -242,6 +256,7 @@ static int usb_probe_interface(struct device *dev)
 			mark_quiesced(intf);
 			intf->needs_remote_wakeup = 0;
 			intf->condition = USB_INTERFACE_UNBOUND;
+			usb_cancel_queued_reset(intf);
 		} else
 			intf->condition = USB_INTERFACE_BOUND;
 
@@ -272,6 +287,7 @@ static int usb_unbind_interface(struct device *dev)
 		usb_disable_interface(udev, intf);
 
 	driver->disconnect(intf);
+	usb_cancel_queued_reset(intf);
 
 	/* Reset other interface state.
 	 * We cannot do a Set-Interface if the device is suspended or
@@ -380,8 +396,10 @@ void usb_driver_release_interface(struct usb_driver *driver,
 	if (device_is_registered(dev)) {
 		iface->condition = USB_INTERFACE_UNBINDING;
 		device_release_driver(dev);
+	} else {
+		iface->condition = USB_INTERFACE_UNBOUND;
+		usb_cancel_queued_reset(iface);
 	}
-
 	dev->driver = NULL;
 	usb_set_intfdata(iface, NULL);
 
@@ -942,7 +960,8 @@ static int usb_suspend_interface(struct usb_device *udev,
 	if (udev->state == USB_STATE_NOTATTACHED || !is_active(intf))
 		goto done;
 
-	if (intf->condition == USB_INTERFACE_UNBOUND)	/* This can't happen */
+	/* This can happen; see usb_driver_release_interface() */
+	if (intf->condition == USB_INTERFACE_UNBOUND)
 		goto done;
 	driver = to_usb_driver(intf->dev.driver);
 
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 95fb3104ba4f..e65881899c8f 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3518,3 +3518,46 @@ int usb_reset_device(struct usb_device *udev)
 	return ret;
 }
 EXPORT_SYMBOL_GPL(usb_reset_device);
+
+
+/**
+ * usb_queue_reset_device - Reset a USB device from an atomic context
+ * @iface: USB interface belonging to the device to reset
+ *
+ * This function can be used to reset a USB device from an atomic
+ * context, where usb_reset_device() won't work (as it blocks).
+ *
+ * Doing a reset via this method is functionally equivalent to calling
+ * usb_reset_device(), except for the fact that it is delayed to a
+ * workqueue. This means that any drivers bound to other interfaces
+ * might be unbound, as well as users from usbfs in user space.
+ *
+ * Corner cases:
+ *
+ * - Scheduling two resets at the same time from two different drivers
+ *   attached to two different interfaces of the same device is
+ *   possible; depending on how the driver attached to each interface
+ *   handles ->pre_reset(), the second reset might happen or not.
+ *
+ * - If a driver is unbound and it had a pending reset, the reset will
+ *   be cancelled.
+ *
+ * - This function can be called during .probe() or .disconnect()
+ *   times. On return from .disconnect(), any pending resets will be
+ *   cancelled.
+ *
+ * There is no no need to lock/unlock the @reset_ws as schedule_work()
+ * does its own.
+ *
+ * NOTE: We don't do any reference count tracking because it is not
+ *     needed. The lifecycle of the work_struct is tied to the
+ *     usb_interface. Before destroying the interface we cancel the
+ *     work_struct, so the fact that work_struct is queued and or
+ *     running means the interface (and thus, the device) exist and
+ *     are referenced.
+ */
+void usb_queue_reset_device(struct usb_interface *iface)
+{
+	schedule_work(&iface->reset_ws);
+}
+EXPORT_SYMBOL_GPL(usb_queue_reset_device);
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index cc47d36798b1..aadf29f09c45 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -1441,6 +1441,46 @@ static struct usb_interface_assoc_descriptor *find_iad(struct usb_device *dev,
 	return retval;
 }
 
+
+/*
+ * Internal function to queue a device reset
+ *
+ * This is initialized into the workstruct in 'struct
+ * usb_device->reset_ws' that is launched by
+ * message.c:usb_set_configuration() when initializing each 'struct
+ * usb_interface'.
+ *
+ * It is safe to get the USB device without reference counts because
+ * the life cycle of @iface is bound to the life cycle of @udev. Then,
+ * this function will be ran only if @iface is alive (and before
+ * freeing it any scheduled instances of it will have been cancelled).
+ *
+ * We need to set a flag (usb_dev->reset_running) because when we call
+ * the reset, the interfaces might be unbound. The current interface
+ * cannot try to remove the queued work as it would cause a deadlock
+ * (you cannot remove your work from within your executing
+ * workqueue). This flag lets it know, so that
+ * usb_cancel_queued_reset() doesn't try to do it.
+ *
+ * See usb_queue_reset_device() for more details
+ */
+void __usb_queue_reset_device(struct work_struct *ws)
+{
+	int rc;
+	struct usb_interface *iface =
+		container_of(ws, struct usb_interface, reset_ws);
+	struct usb_device *udev = interface_to_usbdev(iface);
+
+	rc = usb_lock_device_for_reset(udev, iface);
+	if (rc >= 0) {
+		iface->reset_running = 1;
+		usb_reset_device(udev);
+		iface->reset_running = 0;
+		usb_unlock_device(udev);
+	}
+}
+
+
 /*
  * usb_set_configuration - Makes a particular device setting be current
  * @dev: the device whose configuration is being updated
@@ -1611,6 +1651,7 @@ free_interfaces:
 		intf->dev.type = &usb_if_device_type;
 		intf->dev.groups = usb_interface_groups;
 		intf->dev.dma_mask = dev->dev.dma_mask;
+		INIT_WORK(&intf->reset_ws, __usb_queue_reset_device);
 		device_initialize(&intf->dev);
 		mark_quiesced(intf);
 		dev_set_name(&intf->dev, "%d-%s:%d.%d",
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 859a88e6ce9c..c8e55aa979de 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -120,6 +120,11 @@ enum usb_interface_condition {
  *	to the sysfs representation for that device.
  * @pm_usage_cnt: PM usage counter for this interface; autosuspend is not
  *	allowed unless the counter is 0.
+ * @reset_ws: Used for scheduling resets from atomic context.
+ * @reset_running: set to 1 if the interface is currently running a
+ *      queued reset so that usb_cancel_queued_reset() doesn't try to
+ *      remove from the workqueue when running inside the worker
+ *      thread. See __usb_queue_reset_device().
  *
  * USB device drivers attach to interfaces on a physical device.  Each
  * interface encapsulates a single high level function, such as feeding
@@ -168,10 +173,12 @@ struct usb_interface {
 	unsigned needs_remote_wakeup:1;	/* driver requires remote wakeup */
 	unsigned needs_altsetting0:1;	/* switch to altsetting 0 is pending */
 	unsigned needs_binding:1;	/* needs delayed unbind/rebind */
+	unsigned reset_running:1;
 
 	struct device dev;		/* interface specific device info */
 	struct device *usb_dev;
 	int pm_usage_cnt;		/* usage counter for autosuspend */
+	struct work_struct reset_ws;	/* for resets in atomic context */
 };
 #define	to_usb_interface(d) container_of(d, struct usb_interface, dev)
 #define	interface_to_usbdev(intf) \
@@ -507,6 +514,7 @@ extern int usb_lock_device_for_reset(struct usb_device *udev,
 
 /* USB port reset for device reinitialization */
 extern int usb_reset_device(struct usb_device *dev);
+extern void usb_queue_reset_device(struct usb_interface *dev);
 
 extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id);
 
-- 
cgit v1.2.3


From f150fa1afbf69a87f54752579ff2bb769aad88b3 Mon Sep 17 00:00:00 2001
From: Pete Zaitcev <zaitcev@redhat.com>
Date: Thu, 13 Nov 2008 21:31:21 -0700
Subject: USB: Allow usbmon as a module even if usbcore is builtin

usbmon can only be built as a module if usbcore is a module too. Trivial
changes to the relevant Kconfig and Makefile (and a few trivial changes
elsewhere) allow usbmon to be built as a module even if usbcore is
builtin.

This is verified to work in all 9 permutations (3 correctly prohibited
by Kconfig, 6 build a suitable result).

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c   |  4 ++--
 drivers/usb/core/hcd.h   |  4 ++--
 drivers/usb/mon/Kconfig  | 13 ++++++-------
 drivers/usb/mon/Makefile |  3 +--
 include/linux/usb.h      |  2 +-
 5 files changed, 12 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index e1b42626d04d..7403ed871abd 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2028,7 +2028,7 @@ EXPORT_SYMBOL_GPL(usb_hcd_platform_shutdown);
 
 /*-------------------------------------------------------------------------*/
 
-#if defined(CONFIG_USB_MON)
+#if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE)
 
 struct usb_mon_operations *mon_ops;
 
@@ -2064,4 +2064,4 @@ void usb_mon_deregister (void)
 }
 EXPORT_SYMBOL_GPL (usb_mon_deregister);
 
-#endif /* CONFIG_USB_MON */
+#endif /* CONFIG_USB_MON || CONFIG_USB_MON_MODULE */
diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h
index d202342b36b6..0aaa9cea6b38 100644
--- a/drivers/usb/core/hcd.h
+++ b/drivers/usb/core/hcd.h
@@ -421,7 +421,7 @@ static inline void usbfs_cleanup(void) { }
 
 /*-------------------------------------------------------------------------*/
 
-#if defined(CONFIG_USB_MON)
+#if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE)
 
 struct usb_mon_operations {
 	void (*urb_submit)(struct usb_bus *bus, struct urb *urb);
@@ -463,7 +463,7 @@ static inline void usbmon_urb_submit_error(struct usb_bus *bus, struct urb *urb,
 static inline void usbmon_urb_complete(struct usb_bus *bus, struct urb *urb,
 		int status) {}
 
-#endif /* CONFIG_USB_MON */
+#endif /* CONFIG_USB_MON || CONFIG_USB_MON_MODULE */
 
 /*-------------------------------------------------------------------------*/
 
diff --git a/drivers/usb/mon/Kconfig b/drivers/usb/mon/Kconfig
index deb9ddffa402..f28f350cd96a 100644
--- a/drivers/usb/mon/Kconfig
+++ b/drivers/usb/mon/Kconfig
@@ -3,14 +3,13 @@
 #
 
 config USB_MON
-	bool "USB Monitor"
-	depends on USB!=n
-	default y
+	tristate "USB Monitor"
+	depends on USB
+	default y if USB=y
+	default m if USB=m
 	help
-	  If you say Y here, a component which captures the USB traffic
+	  If you select this option, a component which captures the USB traffic
 	  between peripheral-specific drivers and HC drivers will be built.
 	  For more information, see <file:Documentation/usb/usbmon.txt>.
 
-	  This is somewhat experimental at this time, but it should be safe.
-
-	  If unsure, say Y.
+	  If unsure, say Y (if allowed), otherwise M.
diff --git a/drivers/usb/mon/Makefile b/drivers/usb/mon/Makefile
index 0f76ed5e1617..c6516b566731 100644
--- a/drivers/usb/mon/Makefile
+++ b/drivers/usb/mon/Makefile
@@ -4,5 +4,4 @@
 
 usbmon-objs	:= mon_main.o mon_stat.o mon_text.o mon_bin.o mon_dma.o
 
-# This does not use CONFIG_USB_MON because we want this to use a tristate.
-obj-$(CONFIG_USB)	+= usbmon.o
+obj-$(CONFIG_USB_MON)	+= usbmon.o
diff --git a/include/linux/usb.h b/include/linux/usb.h
index c8e55aa979de..8bc81bffc195 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -336,7 +336,7 @@ struct usb_bus {
 #endif
 	struct device *dev;		/* device for this bus */
 
-#if defined(CONFIG_USB_MON)
+#if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE)
 	struct mon_bus *mon_bus;	/* non-null when associated */
 	int monitored;			/* non-zero when monitored */
 #endif
-- 
cgit v1.2.3


From 1537e0ad944acf3a4c2b311a646d7993b89499f7 Mon Sep 17 00:00:00 2001
From: Ben Efros <ben@pc-doctor.com>
Date: Tue, 18 Nov 2008 13:31:13 -0800
Subject: USB: storage devices and SAT

Add the SANE SENSE flag to indicate that a device is capable of handling
more than 18-bytes of sense data.  This functionality is required for
USB-ATA bridges implementing SAT.  A future patch will actually enable this
function for several devices.

The logic behind this is that we can detect support for SANE_SENSE in a few ways:
 1) ATA PASS THROUGH (12) or (16) execute successfully
 2) SPC-3 or higher is in use
 3) A previous CHECK CONDITION occurred with sense format 70-73 and had
    a length greater than 18-bytes total

Signed-off-by: Ben Efros <ben@pc-doctor.com>
Signed-off-by: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/storage/scsiglue.c  |  4 ++++
 drivers/usb/storage/transport.c | 40 +++++++++++++++++++++++++++++++++++++++-
 include/linux/usb_usual.h       |  5 +++--
 3 files changed, 46 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 09779f6a8179..1b35e011a34f 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -170,6 +170,10 @@ static int slave_configure(struct scsi_device *sdev)
 		if (us->fflags & US_FL_CAPACITY_HEURISTICS)
 			sdev->guess_capacity = 1;
 
+		/* assume SPC3 or latter devices support sense size > 18 */
+		if (sdev->scsi_level > SCSI_SPC_2)
+			us->fflags |= US_FL_SANE_SENSE;
+
 		/* Some devices report a SCSI revision level above 2 but are
 		 * unable to handle the REPORT LUNS command (for which
 		 * support is mandatory at level 3).  Since we already have
diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c
index 2058db41618c..f584e72cc689 100644
--- a/drivers/usb/storage/transport.c
+++ b/drivers/usb/storage/transport.c
@@ -578,6 +578,20 @@ void usb_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us)
 		need_auto_sense = 1;
 	}
 
+	/*
+	 * Determine if this device is SAT by seeing if the
+	 * command executed successfully.  Otherwise we'll have
+	 * to wait for at least one CHECK_CONDITION to determine
+	 * SANE_SENSE support
+	 */
+	if ((srb->cmnd[0] == ATA_16 || srb->cmnd[0] == ATA_12) &&
+	    result == USB_STOR_TRANSPORT_GOOD &&
+	    !(us->fflags & US_FL_SANE_SENSE) &&
+	    !(srb->cmnd[2] & 0x20)) {
+		US_DEBUGP("-- SAT supported, increasing auto-sense\n");
+		us->fflags |= US_FL_SANE_SENSE;
+	}
+
 	/*
 	 * A short transfer on a command where we don't expect it
 	 * is unusual, but it doesn't mean we need to auto-sense.
@@ -595,10 +609,15 @@ void usb_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us)
 	if (need_auto_sense) {
 		int temp_result;
 		struct scsi_eh_save ses;
+		int sense_size = US_SENSE_SIZE;
+
+		/* device supports and needs bigger sense buffer */
+		if (us->fflags & US_FL_SANE_SENSE)
+			sense_size = ~0;
 
 		US_DEBUGP("Issuing auto-REQUEST_SENSE\n");
 
-		scsi_eh_prep_cmnd(srb, &ses, NULL, 0, US_SENSE_SIZE);
+		scsi_eh_prep_cmnd(srb, &ses, NULL, 0, sense_size);
 
 		/* FIXME: we must do the protocol translation here */
 		if (us->subclass == US_SC_RBC || us->subclass == US_SC_SCSI ||
@@ -632,6 +651,25 @@ void usb_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us)
 			return;
 		}
 
+		/* If the sense data returned is larger than 18-bytes then we
+		 * assume this device supports requesting more in the future.
+		 * The response code must be 70h through 73h inclusive.
+		 */
+		if (srb->sense_buffer[7] > (US_SENSE_SIZE - 8) &&
+		    !(us->fflags & US_FL_SANE_SENSE) &&
+		    (srb->sense_buffer[0] & 0x7C) == 0x70) {
+			US_DEBUGP("-- SANE_SENSE support enabled\n");
+			us->fflags |= US_FL_SANE_SENSE;
+
+			/* Indicate to the user that we truncated their sense
+			 * because we didn't know it supported larger sense.
+			 */
+			US_DEBUGP("-- Sense data truncated to %i from %i\n",
+			          US_SENSE_SIZE,
+			          srb->sense_buffer[7] + 8);
+			srb->sense_buffer[7] = (US_SENSE_SIZE - 8);
+		}
+
 		US_DEBUGP("-- Result from auto-sense is %d\n", temp_result);
 		US_DEBUGP("-- code: 0x%x, key: 0x%x, ASC: 0x%x, ASCQ: 0x%x\n",
 			  srb->sense_buffer[0],
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index d9a3bbe38e6b..998e5cbbf29e 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -52,8 +52,9 @@
 	US_FLAG(MAX_SECTORS_MIN,0x00002000)			\
 		/* Sets max_sectors to arch min */		\
 	US_FLAG(BULK_IGNORE_TAG,0x00004000)			\
-		/* Ignore tag mismatch in bulk operations */
-
+		/* Ignore tag mismatch in bulk operations */    \
+	US_FLAG(SANE_SENSE,     0x00008000)
+		/* Sane Sense (> 18 bytes) */
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
-- 
cgit v1.2.3


From 6084f1bf0c51a99cbba612ee90a4607cffb8b042 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Mon, 24 Nov 2008 12:00:01 -0800
Subject: USB: otg: gpio_vbus transceiver stub

gpio_vbus provides simple GPIO VBUS sensing for peripheral
controllers with an internal transceiver.
Optionally, a second GPIO can be used to control D+ pullup.

It also interfaces with the regulator framework to limit charging
currents when powered via USB. gpio_vbus requests the regulator
supplying "vbus_draw" and can enable/disable it or limit its
current depending on USB state.

[dbrownell@users.sourceforge.net: use drivers/otg, cleanups ]

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/otg/Kconfig       |  13 ++
 drivers/usb/otg/Makefile      |   5 +
 drivers/usb/otg/gpio_vbus.c   | 335 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/gpio_vbus.h |  30 ++++
 4 files changed, 383 insertions(+)
 create mode 100644 drivers/usb/otg/gpio_vbus.c
 create mode 100644 include/linux/usb/gpio_vbus.h

(limited to 'include/linux')

diff --git a/drivers/usb/otg/Kconfig b/drivers/usb/otg/Kconfig
index 860342902bbb..afe91bfea7f2 100644
--- a/drivers/usb/otg/Kconfig
+++ b/drivers/usb/otg/Kconfig
@@ -14,6 +14,19 @@ config USB_OTG_UTILS
 	  Select this to make sure the build includes objects from
 	  the OTG infrastructure directory.
 
+#
+# USB Transceiver Drivers
+#
+config USB_GPIO_VBUS
+	tristate "GPIO based peripheral-only VBUS sensing 'transceiver'"
+	depends on GENERIC_GPIO
+	select USB_OTG_UTILS
+	help
+	  Provides simple GPIO VBUS sensing for controllers with an
+	  internal transceiver via the otg_transceiver interface, and
+	  optionally control of a D+ pullup GPIO as well as a VBUS
+	  current limit regulator.
+
 config ISP1301_OMAP
 	tristate "Philips ISP1301 with OMAP OTG"
 	depends on I2C && ARCH_OMAP_OTG
diff --git a/drivers/usb/otg/Makefile b/drivers/usb/otg/Makefile
index 483816685d00..6c58b36ca7cf 100644
--- a/drivers/usb/otg/Makefile
+++ b/drivers/usb/otg/Makefile
@@ -1,3 +1,8 @@
+#
+# OTG infrastructure and transceiver drivers
+#
+
+obj-$(CONFIG_USB_GPIO_VBUS)	+= gpio_vbus.o
 obj-$(CONFIG_ISP1301_OMAP)	+= isp1301_omap.o
 
 ifeq ($(CONFIG_USB_DEBUG),y)
diff --git a/drivers/usb/otg/gpio_vbus.c b/drivers/usb/otg/gpio_vbus.c
new file mode 100644
index 000000000000..63a6036f04be
--- /dev/null
+++ b/drivers/usb/otg/gpio_vbus.c
@@ -0,0 +1,335 @@
+/*
+ * gpio-vbus.c - simple GPIO VBUS sensing driver for B peripheral devices
+ *
+ * Copyright (c) 2008 Philipp Zabel <philipp.zabel@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/usb.h>
+
+#include <linux/regulator/consumer.h>
+
+#include <linux/usb/gadget.h>
+#include <linux/usb/gpio_vbus.h>
+#include <linux/usb/otg.h>
+
+
+/*
+ * A simple GPIO VBUS sensing driver for B peripheral only devices
+ * with internal transceivers. It can control a D+ pullup GPIO and
+ * a regulator to limit the current drawn from VBUS.
+ *
+ * Needs to be loaded before the UDC driver that will use it.
+ */
+struct gpio_vbus_data {
+	struct otg_transceiver otg;
+	struct device          *dev;
+	struct regulator       *vbus_draw;
+	int			vbus_draw_enabled;
+	unsigned		mA;
+};
+
+
+/*
+ * This driver relies on "both edges" triggering.  VBUS has 100 msec to
+ * stabilize, so the peripheral controller driver may need to cope with
+ * some bouncing due to current surges (e.g. charging local capacitance)
+ * and contact chatter.
+ *
+ * REVISIT in desperate straits, toggling between rising and falling
+ * edges might be workable.
+ */
+#define VBUS_IRQ_FLAGS \
+	( IRQF_SAMPLE_RANDOM | IRQF_SHARED \
+	| IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING )
+
+
+/* interface to regulator framework */
+static void set_vbus_draw(struct gpio_vbus_data *gpio_vbus, unsigned mA)
+{
+	struct regulator *vbus_draw = gpio_vbus->vbus_draw;
+	int enabled;
+
+	if (!vbus_draw)
+		return;
+
+	enabled = gpio_vbus->vbus_draw_enabled;
+	if (mA) {
+		regulator_set_current_limit(vbus_draw, 0, 1000 * mA);
+		if (!enabled) {
+			regulator_enable(vbus_draw);
+			gpio_vbus->vbus_draw_enabled = 1;
+		}
+	} else {
+		if (enabled) {
+			regulator_disable(vbus_draw);
+			gpio_vbus->vbus_draw_enabled = 0;
+		}
+	}
+	gpio_vbus->mA = mA;
+}
+
+/* VBUS change IRQ handler */
+static irqreturn_t gpio_vbus_irq(int irq, void *data)
+{
+	struct platform_device *pdev = data;
+	struct gpio_vbus_mach_info *pdata = pdev->dev.platform_data;
+	struct gpio_vbus_data *gpio_vbus = platform_get_drvdata(pdev);
+	int gpio, vbus;
+
+	vbus = gpio_get_value(pdata->gpio_vbus);
+	if (pdata->gpio_vbus_inverted)
+		vbus = !vbus;
+
+	dev_dbg(&pdev->dev, "VBUS %s (gadget: %s)\n",
+		vbus ? "supplied" : "inactive",
+		gpio_vbus->otg.gadget ? gpio_vbus->otg.gadget->name : "none");
+
+	if (!gpio_vbus->otg.gadget)
+		return IRQ_HANDLED;
+
+	/* Peripheral controllers which manage the pullup themselves won't have
+	 * gpio_pullup configured here.  If it's configured here, we'll do what
+	 * isp1301_omap::b_peripheral() does and enable the pullup here... although
+	 * that may complicate usb_gadget_{,dis}connect() support.
+	 */
+	gpio = pdata->gpio_pullup;
+	if (vbus) {
+		gpio_vbus->otg.state = OTG_STATE_B_PERIPHERAL;
+		usb_gadget_vbus_connect(gpio_vbus->otg.gadget);
+
+		/* drawing a "unit load" is *always* OK, except for OTG */
+		set_vbus_draw(gpio_vbus, 100);
+
+		/* optionally enable D+ pullup */
+		if (gpio_is_valid(gpio))
+			gpio_set_value(gpio, !pdata->gpio_pullup_inverted);
+	} else {
+		/* optionally disable D+ pullup */
+		if (gpio_is_valid(gpio))
+			gpio_set_value(gpio, pdata->gpio_pullup_inverted);
+
+		set_vbus_draw(gpio_vbus, 0);
+
+		usb_gadget_vbus_disconnect(gpio_vbus->otg.gadget);
+		gpio_vbus->otg.state = OTG_STATE_B_IDLE;
+	}
+
+	return IRQ_HANDLED;
+}
+
+/* OTG transceiver interface */
+
+/* bind/unbind the peripheral controller */
+static int gpio_vbus_set_peripheral(struct otg_transceiver *otg,
+				struct usb_gadget *gadget)
+{
+	struct gpio_vbus_data *gpio_vbus;
+	struct gpio_vbus_mach_info *pdata;
+	struct platform_device *pdev;
+	int gpio, irq;
+
+	gpio_vbus = container_of(otg, struct gpio_vbus_data, otg);
+	pdev = to_platform_device(gpio_vbus->dev);
+	pdata = gpio_vbus->dev->platform_data;
+	irq = gpio_to_irq(pdata->gpio_vbus);
+	gpio = pdata->gpio_pullup;
+
+	if (!gadget) {
+		dev_dbg(&pdev->dev, "unregistering gadget '%s'\n",
+			otg->gadget->name);
+
+		/* optionally disable D+ pullup */
+		if (gpio_is_valid(gpio))
+			gpio_set_value(gpio, pdata->gpio_pullup_inverted);
+
+		set_vbus_draw(gpio_vbus, 0);
+
+		usb_gadget_vbus_disconnect(otg->gadget);
+		otg->state = OTG_STATE_UNDEFINED;
+
+		otg->gadget = NULL;
+		return 0;
+	}
+
+	otg->gadget = gadget;
+	dev_dbg(&pdev->dev, "registered gadget '%s'\n", gadget->name);
+
+	/* initialize connection state */
+	gpio_vbus_irq(irq, pdev);
+	return 0;
+}
+
+/* effective for B devices, ignored for A-peripheral */
+static int gpio_vbus_set_power(struct otg_transceiver *otg, unsigned mA)
+{
+	struct gpio_vbus_data *gpio_vbus;
+
+	gpio_vbus = container_of(otg, struct gpio_vbus_data, otg);
+
+	if (otg->state == OTG_STATE_B_PERIPHERAL)
+		set_vbus_draw(gpio_vbus, mA);
+	return 0;
+}
+
+/* for non-OTG B devices: set/clear transceiver suspend mode */
+static int gpio_vbus_set_suspend(struct otg_transceiver *otg, int suspend)
+{
+	struct gpio_vbus_data *gpio_vbus;
+
+	gpio_vbus = container_of(otg, struct gpio_vbus_data, otg);
+
+	/* draw max 0 mA from vbus in suspend mode; or the previously
+	 * recorded amount of current if not suspended
+	 *
+	 * NOTE: high powered configs (mA > 100) may draw up to 2.5 mA
+	 * if they're wake-enabled ... we don't handle that yet.
+	 */
+	return gpio_vbus_set_power(otg, suspend ? 0 : gpio_vbus->mA);
+}
+
+/* platform driver interface */
+
+static int __init gpio_vbus_probe(struct platform_device *pdev)
+{
+	struct gpio_vbus_mach_info *pdata = pdev->dev.platform_data;
+	struct gpio_vbus_data *gpio_vbus;
+	struct resource *res;
+	int err, gpio, irq;
+
+	if (!pdata || !gpio_is_valid(pdata->gpio_vbus))
+		return -EINVAL;
+	gpio = pdata->gpio_vbus;
+
+	gpio_vbus = kzalloc(sizeof(struct gpio_vbus_data), GFP_KERNEL);
+	if (!gpio_vbus)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, gpio_vbus);
+	gpio_vbus->dev = &pdev->dev;
+	gpio_vbus->otg.label = "gpio-vbus";
+	gpio_vbus->otg.state = OTG_STATE_UNDEFINED;
+	gpio_vbus->otg.set_peripheral = gpio_vbus_set_peripheral;
+	gpio_vbus->otg.set_power = gpio_vbus_set_power;
+	gpio_vbus->otg.set_suspend = gpio_vbus_set_suspend;
+
+	err = gpio_request(gpio, "vbus_detect");
+	if (err) {
+		dev_err(&pdev->dev, "can't request vbus gpio %d, err: %d\n",
+			gpio, err);
+		goto err_gpio;
+	}
+	gpio_direction_input(gpio);
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (res) {
+		irq = res->start;
+		res->flags &= IRQF_TRIGGER_MASK;
+		res->flags |= IRQF_SAMPLE_RANDOM | IRQF_SHARED;
+	} else
+		irq = gpio_to_irq(gpio);
+
+	/* if data line pullup is in use, initialize it to "not pulling up" */
+	gpio = pdata->gpio_pullup;
+	if (gpio_is_valid(gpio)) {
+		err = gpio_request(gpio, "udc_pullup");
+		if (err) {
+			dev_err(&pdev->dev,
+				"can't request pullup gpio %d, err: %d\n",
+				gpio, err);
+			gpio_free(pdata->gpio_vbus);
+			goto err_gpio;
+		}
+		gpio_direction_output(gpio, pdata->gpio_pullup_inverted);
+	}
+
+	err = request_irq(irq, gpio_vbus_irq, VBUS_IRQ_FLAGS,
+		"vbus_detect", pdev);
+	if (err) {
+		dev_err(&pdev->dev, "can't request irq %i, err: %d\n",
+			irq, err);
+		goto err_irq;
+	}
+
+	/* only active when a gadget is registered */
+	err = otg_set_transceiver(&gpio_vbus->otg);
+	if (err) {
+		dev_err(&pdev->dev, "can't register transceiver, err: %d\n",
+			err);
+		goto err_otg;
+	}
+
+	gpio_vbus->vbus_draw = regulator_get(&pdev->dev, "vbus_draw");
+	if (IS_ERR(gpio_vbus->vbus_draw)) {
+		dev_dbg(&pdev->dev, "can't get vbus_draw regulator, err: %ld\n",
+			PTR_ERR(gpio_vbus->vbus_draw));
+		gpio_vbus->vbus_draw = NULL;
+	}
+
+	return 0;
+err_otg:
+	free_irq(irq, &pdev->dev);
+err_irq:
+	if (gpio_is_valid(pdata->gpio_pullup))
+		gpio_free(pdata->gpio_pullup);
+	gpio_free(pdata->gpio_vbus);
+err_gpio:
+	platform_set_drvdata(pdev, NULL);
+	kfree(gpio_vbus);
+	return err;
+}
+
+static int __exit gpio_vbus_remove(struct platform_device *pdev)
+{
+	struct gpio_vbus_data *gpio_vbus = platform_get_drvdata(pdev);
+	struct gpio_vbus_mach_info *pdata = pdev->dev.platform_data;
+	int gpio = pdata->gpio_vbus;
+
+	regulator_put(gpio_vbus->vbus_draw);
+
+	otg_set_transceiver(NULL);
+
+	free_irq(gpio_to_irq(gpio), &pdev->dev);
+	if (gpio_is_valid(pdata->gpio_pullup))
+		gpio_free(pdata->gpio_pullup);
+	gpio_free(gpio);
+	platform_set_drvdata(pdev, NULL);
+	kfree(gpio_vbus);
+
+	return 0;
+}
+
+/* NOTE:  the gpio-vbus device may *NOT* be hotplugged */
+
+MODULE_ALIAS("platform:gpio-vbus");
+
+static struct platform_driver gpio_vbus_driver = {
+	.driver = {
+		.name  = "gpio-vbus",
+		.owner = THIS_MODULE,
+	},
+	.remove  = __exit_p(gpio_vbus_remove),
+};
+
+static int __init gpio_vbus_init(void)
+{
+	return platform_driver_probe(&gpio_vbus_driver, gpio_vbus_probe);
+}
+module_init(gpio_vbus_init);
+
+static void __exit gpio_vbus_exit(void)
+{
+	platform_driver_unregister(&gpio_vbus_driver);
+}
+module_exit(gpio_vbus_exit);
+
+MODULE_DESCRIPTION("simple GPIO controlled OTG transceiver driver");
+MODULE_AUTHOR("Philipp Zabel");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/usb/gpio_vbus.h b/include/linux/usb/gpio_vbus.h
new file mode 100644
index 000000000000..d9f03ccc2d60
--- /dev/null
+++ b/include/linux/usb/gpio_vbus.h
@@ -0,0 +1,30 @@
+/*
+ * A simple GPIO VBUS sensing driver for B peripheral only devices
+ * with internal transceivers.
+ * Optionally D+ pullup can be controlled by a second GPIO.
+ *
+ * Copyright (c) 2008 Philipp Zabel <philipp.zabel@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/**
+ * struct gpio_vbus_mach_info - configuration for gpio_vbus
+ * @gpio_vbus: VBUS sensing GPIO
+ * @gpio_pullup: optional D+ or D- pullup GPIO (else negative/invalid)
+ * @gpio_vbus_inverted: true if gpio_vbus is active low
+ * @gpio_pullup_inverted: true if gpio_pullup is active low
+ *
+ * The VBUS sensing GPIO should have a pulldown, which will normally be
+ * part of a resistor ladder turning a 4.0V-5.25V level on VBUS into a
+ * value the GPIO detects as active.  Some systems will use comparators.
+ */
+struct gpio_vbus_mach_info {
+	int gpio_vbus;
+	int gpio_pullup;
+	bool gpio_vbus_inverted;
+	bool gpio_pullup_inverted;
+};
-- 
cgit v1.2.3


From 68144e0cc92125f41157ede7b060f83367bc4fe7 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Mon, 24 Nov 2008 12:01:17 -0800
Subject: USB: otg: add otg_put_transceiver()

As Russell King points out, calling put_device(otg_transceiver->dev)
directly in driver cleanup paths makes assumptions about otg_transceiver
internals.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/arm/plat-omap/usb.c      | 6 ++++++
 drivers/usb/gadget/omap_udc.c | 4 ++--
 include/linux/usb/otg.h       | 1 +
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-omap/usb.c b/arch/arm/plat-omap/usb.c
index 67ca1e216df7..2f88ca8b8f38 100644
--- a/arch/arm/plat-omap/usb.c
+++ b/arch/arm/plat-omap/usb.c
@@ -96,6 +96,12 @@ struct otg_transceiver *otg_get_transceiver(void)
 }
 EXPORT_SYMBOL(otg_get_transceiver);
 
+void otg_put_transceiver(struct otg_transceiver *x)
+{
+	put_device(x->dev);
+}
+EXPORT_SYMBOL(otg_put_transceiver);
+
 int otg_set_transceiver(struct otg_transceiver *x)
 {
 	if (xceiv && x)
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index 34e9e393f929..57d9641c6bf8 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -3006,7 +3006,7 @@ cleanup1:
 
 cleanup0:
 	if (xceiv)
-		put_device(xceiv->dev);
+		otg_put_transceiver(xceiv);
 
 	if (cpu_is_omap16xx() || cpu_is_omap24xx()) {
 		clk_disable(hhc_clk);
@@ -3034,7 +3034,7 @@ static int __exit omap_udc_remove(struct platform_device *pdev)
 
 	pullup_disable(udc);
 	if (udc->transceiver) {
-		put_device(udc->transceiver->dev);
+		otg_put_transceiver(udc->transceiver);
 		udc->transceiver = NULL;
 	}
 	omap_writew(0, UDC_SYSCON1);
diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
index 1db25d152ad8..94df4fe6c6c0 100644
--- a/include/linux/usb/otg.h
+++ b/include/linux/usb/otg.h
@@ -84,6 +84,7 @@ extern int otg_set_transceiver(struct otg_transceiver *);
 
 /* for usb host and peripheral controller drivers */
 extern struct otg_transceiver *otg_get_transceiver(void);
+extern void otg_put_transceiver(struct otg_transceiver *);
 
 static inline int
 otg_start_hnp(struct otg_transceiver *otg)
-- 
cgit v1.2.3


From 65bfd2967c906ca322a4bb69a285fe0de8916ac6 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 25 Nov 2008 16:39:18 -0500
Subject: USB: Enhance usage of pm_message_t

This patch (as1177) modifies the USB core suspend and resume
routines.  The resume functions now will take a pm_message_t argument,
so they will know what sort of resume is occurring.  The new argument
is also passed to the port suspend/resume and bus suspend/resume
routines (although they don't use it for anything but debugging).

In addition, special pm_message_t values are used for user-initiated,
device-initiated (i.e., remote wakeup), and automatic suspend/resume.
By testing these values, drivers can tell whether or not a particular
suspend was an autosuspend.  Unfortunately, they can't do the same for
resumes -- not until the pm_message_t argument is also passed to the
drivers' resume methods.  That will require a bigger change.

IMO, the whole Power Management framework should have been set up this
way in the first place.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/class/cdc-acm.c |  2 +-
 drivers/usb/class/cdc-wdm.c |  3 ++-
 drivers/usb/core/driver.c   | 61 +++++++++++++++++++++++++--------------------
 drivers/usb/core/generic.c  | 10 ++++----
 drivers/usb/core/hcd.c      | 10 ++++----
 drivers/usb/core/hcd.h      |  4 +--
 drivers/usb/core/hub.c      | 16 ++++++------
 drivers/usb/core/sysfs.c    |  6 ++---
 drivers/usb/core/usb.c      |  8 +++---
 drivers/usb/core/usb.h      | 15 ++++++-----
 include/linux/usb.h         |  2 +-
 11 files changed, 74 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index d50a99f70aee..00b47ea24f86 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1275,7 +1275,7 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message)
 	struct acm *acm = usb_get_intfdata(intf);
 	int cnt;
 
-	if (acm->dev->auto_pm) {
+	if (message.event & PM_EVENT_AUTO) {
 		int b;
 
 		spin_lock_irq(&acm->read_lock);
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 5a8ecc045e3f..3771d6e6d0cc 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -764,7 +764,8 @@ static int wdm_suspend(struct usb_interface *intf, pm_message_t message)
 
 	mutex_lock(&desc->plock);
 #ifdef CONFIG_PM
-	if (interface_to_usbdev(desc->intf)->auto_pm && test_bit(WDM_IN_USE, &desc->flags)) {
+	if ((message.event & PM_EVENT_AUTO) &&
+			test_bit(WDM_IN_USE, &desc->flags)) {
 		rv = -EBUSY;
 	} else {
 #endif
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 0226e019326a..41c06025506e 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -922,7 +922,7 @@ static int usb_suspend_device(struct usb_device *udev, pm_message_t msg)
 }
 
 /* Caller has locked udev's pm_mutex */
-static int usb_resume_device(struct usb_device *udev)
+static int usb_resume_device(struct usb_device *udev, pm_message_t msg)
 {
 	struct usb_device_driver	*udriver;
 	int				status = 0;
@@ -940,7 +940,7 @@ static int usb_resume_device(struct usb_device *udev)
 		udev->reset_resume = 1;
 
 	udriver = to_usb_device_driver(udev->dev.driver);
-	status = udriver->resume(udev);
+	status = udriver->resume(udev, msg);
 
  done:
 	dev_vdbg(&udev->dev, "%s: status %d\n", __func__, status);
@@ -969,7 +969,7 @@ static int usb_suspend_interface(struct usb_device *udev,
 		status = driver->suspend(intf, msg);
 		if (status == 0)
 			mark_quiesced(intf);
-		else if (!udev->auto_pm)
+		else if (!(msg.event & PM_EVENT_AUTO))
 			dev_err(&intf->dev, "%s error %d\n",
 					"suspend", status);
 	} else {
@@ -987,7 +987,7 @@ static int usb_suspend_interface(struct usb_device *udev,
 
 /* Caller has locked intf's usb_device's pm_mutex */
 static int usb_resume_interface(struct usb_device *udev,
-		struct usb_interface *intf, int reset_resume)
+		struct usb_interface *intf, pm_message_t msg, int reset_resume)
 {
 	struct usb_driver	*driver;
 	int			status = 0;
@@ -1138,10 +1138,9 @@ static inline int autosuspend_check(struct usb_device *udev, int reschedule)
  * all the interfaces which were suspended are resumed so that they remain
  * in the same state as the device.
  *
- * If an autosuspend is in progress (@udev->auto_pm is set), the routine
- * checks first to make sure that neither the device itself or any of its
- * active interfaces is in use (pm_usage_cnt is greater than 0).  If they
- * are, the autosuspend fails.
+ * If an autosuspend is in progress the routine checks first to make sure
+ * that neither the device itself or any of its active interfaces is in use
+ * (pm_usage_cnt is greater than 0).  If they are, the autosuspend fails.
  *
  * If the suspend succeeds, the routine recursively queues an autosuspend
  * request for @udev's parent device, thereby propagating the change up
@@ -1176,7 +1175,7 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg)
 
 	udev->do_remote_wakeup = device_may_wakeup(&udev->dev);
 
-	if (udev->auto_pm) {
+	if (msg.event & PM_EVENT_AUTO) {
 		status = autosuspend_check(udev, 0);
 		if (status < 0)
 			goto done;
@@ -1196,13 +1195,16 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg)
 
 	/* If the suspend failed, resume interfaces that did get suspended */
 	if (status != 0) {
+		pm_message_t msg2;
+
+		msg2.event = msg.event ^ (PM_EVENT_SUSPEND | PM_EVENT_RESUME);
 		while (--i >= 0) {
 			intf = udev->actconfig->interface[i];
-			usb_resume_interface(udev, intf, 0);
+			usb_resume_interface(udev, intf, msg2, 0);
 		}
 
 		/* Try another autosuspend when the interfaces aren't busy */
-		if (udev->auto_pm)
+		if (msg.event & PM_EVENT_AUTO)
 			autosuspend_check(udev, status == -EBUSY);
 
 	/* If the suspend succeeded then prevent any more URB submissions,
@@ -1232,6 +1234,7 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg)
 /**
  * usb_resume_both - resume a USB device and its interfaces
  * @udev: the usb_device to resume
+ * @msg: Power Management message describing this state transition
  *
  * This is the central routine for resuming USB devices.  It calls the
  * the resume method for @udev and then calls the resume methods for all
@@ -1257,7 +1260,7 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg)
  *
  * This routine can run only in process context.
  */
-static int usb_resume_both(struct usb_device *udev)
+static int usb_resume_both(struct usb_device *udev, pm_message_t msg)
 {
 	int			status = 0;
 	int			i;
@@ -1273,14 +1276,15 @@ static int usb_resume_both(struct usb_device *udev)
 
 	/* Propagate the resume up the tree, if necessary */
 	if (udev->state == USB_STATE_SUSPENDED) {
-		if (udev->auto_pm && udev->autoresume_disabled) {
+		if ((msg.event & PM_EVENT_AUTO) &&
+				udev->autoresume_disabled) {
 			status = -EPERM;
 			goto done;
 		}
 		if (parent) {
 			status = usb_autoresume_device(parent);
 			if (status == 0) {
-				status = usb_resume_device(udev);
+				status = usb_resume_device(udev, msg);
 				if (status || udev->state ==
 						USB_STATE_NOTATTACHED) {
 					usb_autosuspend_device(parent);
@@ -1303,15 +1307,16 @@ static int usb_resume_both(struct usb_device *udev)
 			/* We can't progagate beyond the USB subsystem,
 			 * so if a root hub's controller is suspended
 			 * then we're stuck. */
-			status = usb_resume_device(udev);
+			status = usb_resume_device(udev, msg);
 		}
 	} else if (udev->reset_resume)
-		status = usb_resume_device(udev);
+		status = usb_resume_device(udev, msg);
 
 	if (status == 0 && udev->actconfig) {
 		for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) {
 			intf = udev->actconfig->interface[i];
-			usb_resume_interface(udev, intf, udev->reset_resume);
+			usb_resume_interface(udev, intf, msg,
+					udev->reset_resume);
 		}
 	}
 
@@ -1339,13 +1344,13 @@ static int usb_autopm_do_device(struct usb_device *udev, int inc_usage_cnt)
 		udev->last_busy = jiffies;
 	if (inc_usage_cnt >= 0 && udev->pm_usage_cnt > 0) {
 		if (udev->state == USB_STATE_SUSPENDED)
-			status = usb_resume_both(udev);
+			status = usb_resume_both(udev, PMSG_AUTO_RESUME);
 		if (status != 0)
 			udev->pm_usage_cnt -= inc_usage_cnt;
 		else if (inc_usage_cnt)
 			udev->last_busy = jiffies;
 	} else if (inc_usage_cnt <= 0 && udev->pm_usage_cnt <= 0) {
-		status = usb_suspend_both(udev, PMSG_SUSPEND);
+		status = usb_suspend_both(udev, PMSG_AUTO_SUSPEND);
 	}
 	usb_pm_unlock(udev);
 	return status;
@@ -1469,13 +1474,14 @@ static int usb_autopm_do_interface(struct usb_interface *intf,
 		udev->last_busy = jiffies;
 		if (inc_usage_cnt >= 0 && intf->pm_usage_cnt > 0) {
 			if (udev->state == USB_STATE_SUSPENDED)
-				status = usb_resume_both(udev);
+				status = usb_resume_both(udev,
+						PMSG_AUTO_RESUME);
 			if (status != 0)
 				intf->pm_usage_cnt -= inc_usage_cnt;
 			else
 				udev->last_busy = jiffies;
 		} else if (inc_usage_cnt <= 0 && intf->pm_usage_cnt <= 0) {
-			status = usb_suspend_both(udev, PMSG_SUSPEND);
+			status = usb_suspend_both(udev, PMSG_AUTO_SUSPEND);
 		}
 	}
 	usb_pm_unlock(udev);
@@ -1700,6 +1706,7 @@ int usb_external_suspend_device(struct usb_device *udev, pm_message_t msg)
 /**
  * usb_external_resume_device - external resume of a USB device and its interfaces
  * @udev: the usb_device to resume
+ * @msg: Power Management message describing this state transition
  *
  * This routine handles external resume requests: ones not generated
  * internally by a USB driver (autoresume) but rather coming from the user
@@ -1708,13 +1715,13 @@ int usb_external_suspend_device(struct usb_device *udev, pm_message_t msg)
  *
  * The caller must hold @udev's device lock.
  */
-int usb_external_resume_device(struct usb_device *udev)
+int usb_external_resume_device(struct usb_device *udev, pm_message_t msg)
 {
 	int	status;
 
 	usb_pm_lock(udev);
 	udev->auto_pm = 0;
-	status = usb_resume_both(udev);
+	status = usb_resume_both(udev, msg);
 	udev->last_busy = jiffies;
 	usb_pm_unlock(udev);
 	if (status == 0)
@@ -1727,7 +1734,7 @@ int usb_external_resume_device(struct usb_device *udev)
 	return status;
 }
 
-int usb_suspend(struct device *dev, pm_message_t message)
+int usb_suspend(struct device *dev, pm_message_t msg)
 {
 	struct usb_device	*udev;
 
@@ -1746,10 +1753,10 @@ int usb_suspend(struct device *dev, pm_message_t message)
 	}
 
 	udev->skip_sys_resume = 0;
-	return usb_external_suspend_device(udev, message);
+	return usb_external_suspend_device(udev, msg);
 }
 
-int usb_resume(struct device *dev)
+int usb_resume(struct device *dev, pm_message_t msg)
 {
 	struct usb_device	*udev;
 
@@ -1761,7 +1768,7 @@ int usb_resume(struct device *dev)
 	 */
 	if (udev->skip_sys_resume)
 		return 0;
-	return usb_external_resume_device(udev);
+	return usb_external_resume_device(udev, msg);
 }
 
 #endif /* CONFIG_PM */
diff --git a/drivers/usb/core/generic.c b/drivers/usb/core/generic.c
index 7e912f21fd36..30ecac3af15a 100644
--- a/drivers/usb/core/generic.c
+++ b/drivers/usb/core/generic.c
@@ -200,18 +200,18 @@ static int generic_suspend(struct usb_device *udev, pm_message_t msg)
 	 * interfaces manually by doing a bus (or "global") suspend.
 	 */
 	if (!udev->parent)
-		rc = hcd_bus_suspend(udev);
+		rc = hcd_bus_suspend(udev, msg);
 
 	/* Non-root devices don't need to do anything for FREEZE or PRETHAW */
 	else if (msg.event == PM_EVENT_FREEZE || msg.event == PM_EVENT_PRETHAW)
 		rc = 0;
 	else
-		rc = usb_port_suspend(udev);
+		rc = usb_port_suspend(udev, msg);
 
 	return rc;
 }
 
-static int generic_resume(struct usb_device *udev)
+static int generic_resume(struct usb_device *udev, pm_message_t msg)
 {
 	int rc;
 
@@ -221,9 +221,9 @@ static int generic_resume(struct usb_device *udev)
 	 * interfaces manually by doing a bus (or "global") resume.
 	 */
 	if (!udev->parent)
-		rc = hcd_bus_resume(udev);
+		rc = hcd_bus_resume(udev, msg);
 	else
-		rc = usb_port_resume(udev);
+		rc = usb_port_resume(udev, msg);
 	return rc;
 }
 
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 7403ed871abd..a0079876d74e 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1573,14 +1573,14 @@ int usb_hcd_get_frame_number (struct usb_device *udev)
 
 #ifdef	CONFIG_PM
 
-int hcd_bus_suspend(struct usb_device *rhdev)
+int hcd_bus_suspend(struct usb_device *rhdev, pm_message_t msg)
 {
 	struct usb_hcd	*hcd = container_of(rhdev->bus, struct usb_hcd, self);
 	int		status;
 	int		old_state = hcd->state;
 
 	dev_dbg(&rhdev->dev, "bus %s%s\n",
-			rhdev->auto_pm ? "auto-" : "", "suspend");
+			(msg.event & PM_EVENT_AUTO ? "auto-" : ""), "suspend");
 	if (!hcd->driver->bus_suspend) {
 		status = -ENOENT;
 	} else {
@@ -1598,14 +1598,14 @@ int hcd_bus_suspend(struct usb_device *rhdev)
 	return status;
 }
 
-int hcd_bus_resume(struct usb_device *rhdev)
+int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg)
 {
 	struct usb_hcd	*hcd = container_of(rhdev->bus, struct usb_hcd, self);
 	int		status;
 	int		old_state = hcd->state;
 
 	dev_dbg(&rhdev->dev, "usb %s%s\n",
-			rhdev->auto_pm ? "auto-" : "", "resume");
+			(msg.event & PM_EVENT_AUTO ? "auto-" : ""), "resume");
 	if (!hcd->driver->bus_resume)
 		return -ENOENT;
 	if (hcd->state == HC_STATE_RUNNING)
@@ -1638,7 +1638,7 @@ static void hcd_resume_work(struct work_struct *work)
 
 	usb_lock_device(udev);
 	usb_mark_last_busy(udev);
-	usb_external_resume_device(udev);
+	usb_external_resume_device(udev, PMSG_REMOTE_RESUME);
 	usb_unlock_device(udev);
 }
 
diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h
index 0aaa9cea6b38..aa5da82d9071 100644
--- a/drivers/usb/core/hcd.h
+++ b/drivers/usb/core/hcd.h
@@ -388,8 +388,8 @@ extern int usb_find_interface_driver(struct usb_device *dev,
 #ifdef CONFIG_PM
 extern void usb_hcd_resume_root_hub(struct usb_hcd *hcd);
 extern void usb_root_hub_lost_power(struct usb_device *rhdev);
-extern int hcd_bus_suspend(struct usb_device *rhdev);
-extern int hcd_bus_resume(struct usb_device *rhdev);
+extern int hcd_bus_suspend(struct usb_device *rhdev, pm_message_t msg);
+extern int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg);
 #else
 static inline void usb_hcd_resume_root_hub(struct usb_hcd *hcd)
 {
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index ff066edf4dca..fc99ef67761d 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1984,7 +1984,7 @@ static int check_port_resume_type(struct usb_device *udev,
  *
  * Returns 0 on success, else negative errno.
  */
-int usb_port_suspend(struct usb_device *udev)
+int usb_port_suspend(struct usb_device *udev, pm_message_t msg)
 {
 	struct usb_hub	*hub = hdev_to_hub(udev->parent);
 	int		port1 = udev->portnum;
@@ -2023,7 +2023,7 @@ int usb_port_suspend(struct usb_device *udev)
 	} else {
 		/* device has up to 10 msec to fully suspend */
 		dev_dbg(&udev->dev, "usb %ssuspend\n",
-				udev->auto_pm ? "auto-" : "");
+				(msg.event & PM_EVENT_AUTO ? "auto-" : ""));
 		usb_set_device_state(udev, USB_STATE_SUSPENDED);
 		msleep(10);
 	}
@@ -2142,7 +2142,7 @@ static int finish_port_resume(struct usb_device *udev)
  *
  * Returns 0 on success, else negative errno.
  */
-int usb_port_resume(struct usb_device *udev)
+int usb_port_resume(struct usb_device *udev, pm_message_t msg)
 {
 	struct usb_hub	*hub = hdev_to_hub(udev->parent);
 	int		port1 = udev->portnum;
@@ -2167,7 +2167,7 @@ int usb_port_resume(struct usb_device *udev)
 	} else {
 		/* drive resume for at least 20 msec */
 		dev_dbg(&udev->dev, "usb %sresume\n",
-				udev->auto_pm ? "auto-" : "");
+				(msg.event & PM_EVENT_AUTO ? "auto-" : ""));
 		msleep(25);
 
 		/* Virtual root hubs can trigger on GET_PORT_STATUS to
@@ -2208,7 +2208,7 @@ static int remote_wakeup(struct usb_device *udev)
 	if (udev->state == USB_STATE_SUSPENDED) {
 		dev_dbg(&udev->dev, "usb %sresume\n", "wakeup-");
 		usb_mark_last_busy(udev);
-		status = usb_external_resume_device(udev);
+		status = usb_external_resume_device(udev, PMSG_REMOTE_RESUME);
 	}
 	return status;
 }
@@ -2217,14 +2217,14 @@ static int remote_wakeup(struct usb_device *udev)
 
 /* When CONFIG_USB_SUSPEND isn't set, we never suspend or resume any ports. */
 
-int usb_port_suspend(struct usb_device *udev)
+int usb_port_suspend(struct usb_device *udev, pm_message_t msg)
 {
 	return 0;
 }
 
 /* However we may need to do a reset-resume */
 
-int usb_port_resume(struct usb_device *udev)
+int usb_port_resume(struct usb_device *udev, pm_message_t msg)
 {
 	struct usb_hub	*hub = hdev_to_hub(udev->parent);
 	int		port1 = udev->portnum;
@@ -2264,7 +2264,7 @@ static int hub_suspend(struct usb_interface *intf, pm_message_t msg)
 
 		udev = hdev->children [port1-1];
 		if (udev && udev->can_submit) {
-			if (!hdev->auto_pm)
+			if (!(msg.event & PM_EVENT_AUTO))
 				dev_dbg(&intf->dev, "port %d nyet suspended\n",
 						port1);
 			return -EBUSY;
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index 8c65fa75b5c2..0f0ccf640114 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -359,19 +359,19 @@ set_level(struct device *dev, struct device_attribute *attr,
 			strncmp(buf, on_string, len) == 0) {
 		udev->autosuspend_disabled = 1;
 		udev->autoresume_disabled = 0;
-		rc = usb_external_resume_device(udev);
+		rc = usb_external_resume_device(udev, PMSG_USER_RESUME);
 
 	} else if (len == sizeof auto_string - 1 &&
 			strncmp(buf, auto_string, len) == 0) {
 		udev->autosuspend_disabled = 0;
 		udev->autoresume_disabled = 0;
-		rc = usb_external_resume_device(udev);
+		rc = usb_external_resume_device(udev, PMSG_USER_RESUME);
 
 	} else if (len == sizeof suspend_string - 1 &&
 			strncmp(buf, suspend_string, len) == 0) {
 		udev->autosuspend_disabled = 0;
 		udev->autoresume_disabled = 1;
-		rc = usb_external_suspend_device(udev, PMSG_SUSPEND);
+		rc = usb_external_suspend_device(udev, PMSG_USER_SUSPEND);
 
 	} else
 		rc = -EINVAL;
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 51854c2bc912..4c98f3975afe 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -253,7 +253,7 @@ static int usb_dev_prepare(struct device *dev)
 static void usb_dev_complete(struct device *dev)
 {
 	/* Currently used only for rebinding interfaces */
-	usb_resume(dev);	/* Implement eventually? */
+	usb_resume(dev, PMSG_RESUME);	/* Message event is meaningless */
 }
 
 static int usb_dev_suspend(struct device *dev)
@@ -263,7 +263,7 @@ static int usb_dev_suspend(struct device *dev)
 
 static int usb_dev_resume(struct device *dev)
 {
-	return usb_resume(dev);
+	return usb_resume(dev, PMSG_RESUME);
 }
 
 static int usb_dev_freeze(struct device *dev)
@@ -273,7 +273,7 @@ static int usb_dev_freeze(struct device *dev)
 
 static int usb_dev_thaw(struct device *dev)
 {
-	return usb_resume(dev);
+	return usb_resume(dev, PMSG_THAW);
 }
 
 static int usb_dev_poweroff(struct device *dev)
@@ -283,7 +283,7 @@ static int usb_dev_poweroff(struct device *dev)
 
 static int usb_dev_restore(struct device *dev)
 {
-	return usb_resume(dev);
+	return usb_resume(dev, PMSG_RESTORE);
 }
 
 static struct dev_pm_ops usb_device_pm_ops = {
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index b60ebb4de1a8..9fb195665fa8 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -1,3 +1,5 @@
+#include <linux/pm.h>
+
 /* Functions local to drivers/usb/core/ */
 
 extern int usb_create_sysfs_dev_files(struct usb_device *dev);
@@ -42,15 +44,16 @@ extern void usb_host_cleanup(void);
 #ifdef	CONFIG_PM
 
 extern int usb_suspend(struct device *dev, pm_message_t msg);
-extern int usb_resume(struct device *dev);
+extern int usb_resume(struct device *dev, pm_message_t msg);
 
 extern void usb_autosuspend_work(struct work_struct *work);
 extern void usb_autoresume_work(struct work_struct *work);
-extern int usb_port_suspend(struct usb_device *dev);
-extern int usb_port_resume(struct usb_device *dev);
+extern int usb_port_suspend(struct usb_device *dev, pm_message_t msg);
+extern int usb_port_resume(struct usb_device *dev, pm_message_t msg);
 extern int usb_external_suspend_device(struct usb_device *udev,
 		pm_message_t msg);
-extern int usb_external_resume_device(struct usb_device *udev);
+extern int usb_external_resume_device(struct usb_device *udev,
+		pm_message_t msg);
 
 static inline void usb_pm_lock(struct usb_device *udev)
 {
@@ -64,12 +67,12 @@ static inline void usb_pm_unlock(struct usb_device *udev)
 
 #else
 
-static inline int usb_port_suspend(struct usb_device *udev)
+static inline int usb_port_suspend(struct usb_device *udev, pm_message_t msg)
 {
 	return 0;
 }
 
-static inline int usb_port_resume(struct usb_device *udev)
+static inline int usb_port_resume(struct usb_device *udev, pm_message_t msg)
 {
 	return 0;
 }
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 8bc81bffc195..74d0b9990c73 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1067,7 +1067,7 @@ struct usb_device_driver {
 	void (*disconnect) (struct usb_device *udev);
 
 	int (*suspend) (struct usb_device *udev, pm_message_t message);
-	int (*resume) (struct usb_device *udev);
+	int (*resume) (struct usb_device *udev, pm_message_t message);
 	struct usbdrv_wrap drvwrap;
 	unsigned int supports_autosuspend:1;
 };
-- 
cgit v1.2.3


From 2ffcdb3bdadaf8260986e96384df26c94a6ad42c Mon Sep 17 00:00:00 2001
From: Bryan Wu <cooloney@kernel.org>
Date: Tue, 2 Dec 2008 21:33:43 +0200
Subject: USB: musb: use new platform data interface of musb to replace old one

Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Felipe Balbi <felipe.balbi@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/musb.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index 630962c04ca4..d6aad0ea6033 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -47,6 +47,11 @@ struct musb_hdrc_config {
 	u8		ram_bits;	/* ram address size */
 
 	struct musb_hdrc_eps_bits *eps_bits;
+#ifdef CONFIG_BLACKFIN
+        /* A GPIO controlling VRSEL in Blackfin */
+        unsigned int    gpio_vrsel;
+#endif
+
 };
 
 struct musb_hdrc_platform_data {
-- 
cgit v1.2.3


From 3b23dd6f8a718e5339de4f7d86ce76a078b5f771 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 5 Dec 2008 14:10:34 -0500
Subject: USB: utilize the bus notifiers

This patch (as1185) makes usbcore take advantage of the bus
notifications sent out by the driver core.  Now we can create all our
device and interface attribute files before the device or interface
uevent is broadcast.

A side effect is that we no longer create the endpoint "pseudo"
devices at the same time as a device or interface is registered -- it
seems like a bad idea to try registering an endpoint before the
registration of its parent is complete.  So the routines for creating
and removing endpoint devices have been split out and renamed, and
they are called explicitly when needed.  A new bitflag is used for
keeping track of whether or not the interface's endpoint devices have
been created, since (just as with the interface attributes) they vary
with the altsetting and hence can be changed at random times.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/endpoint.c |  4 ++--
 drivers/usb/core/hub.c      | 18 ++++++----------
 drivers/usb/core/message.c  | 50 ++++++++++++++++++++++++++++++++++++++-------
 drivers/usb/core/sysfs.c    | 22 +-------------------
 drivers/usb/core/usb.c      | 37 +++++++++++++++++++++++++++++++++
 drivers/usb/core/usb.h      |  4 ++--
 include/linux/usb.h         |  2 ++
 7 files changed, 93 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/endpoint.c b/drivers/usb/core/endpoint.c
index 946fae43d622..e1710f260b4f 100644
--- a/drivers/usb/core/endpoint.c
+++ b/drivers/usb/core/endpoint.c
@@ -276,7 +276,7 @@ static void ep_device_release(struct device *dev)
 	kfree(ep_dev);
 }
 
-int usb_create_ep_files(struct device *parent,
+int usb_create_ep_devs(struct device *parent,
 			struct usb_host_endpoint *endpoint,
 			struct usb_device *udev)
 {
@@ -340,7 +340,7 @@ exit:
 	return retval;
 }
 
-void usb_remove_ep_files(struct usb_host_endpoint *endpoint)
+void usb_remove_ep_devs(struct usb_host_endpoint *endpoint)
 {
 	struct ep_device *ep_dev = endpoint->ep_dev;
 
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 5abdc11be1e5..756b8d9993fc 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1437,17 +1437,12 @@ void usb_disconnect(struct usb_device **pdev)
 	usb_disable_device(udev, 0);
 	usb_hcd_synchronize_unlinks(udev);
 
+	usb_remove_ep_devs(&udev->ep0);
 	usb_unlock_device(udev);
 
-	/* Remove the device-specific files from sysfs.  This must be
-	 * done with udev unlocked, because some of the attribute
-	 * routines try to acquire the device lock.
-	 */
-	usb_remove_sysfs_dev_files(udev);
-
 	/* Unregister the device.  The device driver is responsible
-	 * for removing the device files from usbfs and sysfs and for
-	 * de-configuring the device.
+	 * for de-configuring the device and invoking the remove-device
+	 * notifier chain (used by usbfs and possibly others).
 	 */
 	device_del(&udev->dev);
 
@@ -1654,8 +1649,8 @@ int usb_new_device(struct usb_device *udev)
 	announce_device(udev);
 
 	/* Register the device.  The device driver is responsible
-	 * for adding the device files to sysfs and for configuring
-	 * the device.
+	 * for configuring the device and invoking the add-device
+	 * notifier chain (used by usbfs and possibly others).
 	 */
 	err = device_add(&udev->dev);
 	if (err) {
@@ -1663,8 +1658,7 @@ int usb_new_device(struct usb_device *udev)
 		goto fail;
 	}
 
-	/* put device-specific files into sysfs */
-	usb_create_sysfs_dev_files(udev);
+	(void) usb_create_ep_devs(&udev->dev, &udev->ep0, udev);
 	return err;
 
 fail:
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index aadf29f09c45..7943901c641c 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -1004,6 +1004,34 @@ int usb_clear_halt(struct usb_device *dev, int pipe)
 }
 EXPORT_SYMBOL_GPL(usb_clear_halt);
 
+static int create_intf_ep_devs(struct usb_interface *intf)
+{
+	struct usb_device *udev = interface_to_usbdev(intf);
+	struct usb_host_interface *alt = intf->cur_altsetting;
+	int i;
+
+	if (intf->ep_devs_created || intf->unregistering)
+		return 0;
+
+	for (i = 0; i < alt->desc.bNumEndpoints; ++i)
+		(void) usb_create_ep_devs(&intf->dev, &alt->endpoint[i], udev);
+	intf->ep_devs_created = 1;
+	return 0;
+}
+
+static void remove_intf_ep_devs(struct usb_interface *intf)
+{
+	struct usb_host_interface *alt = intf->cur_altsetting;
+	int i;
+
+	if (!intf->ep_devs_created)
+		return;
+
+	for (i = 0; i < alt->desc.bNumEndpoints; ++i)
+		usb_remove_ep_devs(&alt->endpoint[i]);
+	intf->ep_devs_created = 0;
+}
+
 /**
  * usb_disable_endpoint -- Disable an endpoint by address
  * @dev: the device whose endpoint is being disabled
@@ -1092,7 +1120,7 @@ void usb_disable_device(struct usb_device *dev, int skip_ep0)
 			dev_dbg(&dev->dev, "unregistering interface %s\n",
 				dev_name(&interface->dev));
 			interface->unregistering = 1;
-			usb_remove_sysfs_intf_files(interface);
+			remove_intf_ep_devs(interface);
 			device_del(&interface->dev);
 		}
 
@@ -1235,8 +1263,10 @@ int usb_set_interface(struct usb_device *dev, int interface, int alternate)
 	 */
 
 	/* prevent submissions using previous endpoint settings */
-	if (iface->cur_altsetting != alt)
+	if (iface->cur_altsetting != alt) {
+		remove_intf_ep_devs(iface);
 		usb_remove_sysfs_intf_files(iface);
+	}
 	usb_disable_interface(dev, iface);
 
 	iface->cur_altsetting = alt;
@@ -1272,9 +1302,10 @@ int usb_set_interface(struct usb_device *dev, int interface, int alternate)
 	 * (Likewise, EP0 never "halts" on well designed devices.)
 	 */
 	usb_enable_interface(dev, iface);
-	if (device_is_registered(&iface->dev))
+	if (device_is_registered(&iface->dev)) {
 		usb_create_sysfs_intf_files(iface);
-
+		create_intf_ep_devs(iface);
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(usb_set_interface);
@@ -1334,7 +1365,6 @@ int usb_reset_configuration(struct usb_device *dev)
 		struct usb_interface *intf = config->interface[i];
 		struct usb_host_interface *alt;
 
-		usb_remove_sysfs_intf_files(intf);
 		alt = usb_altnum_to_altsetting(intf, 0);
 
 		/* No altsetting 0?  We'll assume the first altsetting.
@@ -1345,10 +1375,16 @@ int usb_reset_configuration(struct usb_device *dev)
 		if (!alt)
 			alt = &intf->altsetting[0];
 
+		if (alt != intf->cur_altsetting) {
+			remove_intf_ep_devs(intf);
+			usb_remove_sysfs_intf_files(intf);
+		}
 		intf->cur_altsetting = alt;
 		usb_enable_interface(dev, intf);
-		if (device_is_registered(&intf->dev))
+		if (device_is_registered(&intf->dev)) {
 			usb_create_sysfs_intf_files(intf);
+			create_intf_ep_devs(intf);
+		}
 	}
 	return 0;
 }
@@ -1682,7 +1718,7 @@ free_interfaces:
 				dev_name(&intf->dev), ret);
 			continue;
 		}
-		usb_create_sysfs_intf_files(intf);
+		create_intf_ep_devs(intf);
 	}
 
 	usb_autosuspend_device(dev);
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index 0f0ccf640114..4cc2456ef3be 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -629,9 +629,6 @@ int usb_create_sysfs_dev_files(struct usb_device *udev)
 	struct device *dev = &udev->dev;
 	int retval;
 
-	/* Unforunately these attributes cannot be created before
-	 * the uevent is broadcast.
-	 */
 	retval = device_create_bin_file(dev, &dev_bin_attr_descriptors);
 	if (retval)
 		goto error;
@@ -643,11 +640,7 @@ int usb_create_sysfs_dev_files(struct usb_device *udev)
 	retval = add_power_attributes(dev);
 	if (retval)
 		goto error;
-
-	retval = usb_create_ep_files(dev, &udev->ep0, udev);
-	if (retval)
-		goto error;
-	return 0;
+	return retval;
 error:
 	usb_remove_sysfs_dev_files(udev);
 	return retval;
@@ -657,7 +650,6 @@ void usb_remove_sysfs_dev_files(struct usb_device *udev)
 {
 	struct device *dev = &udev->dev;
 
-	usb_remove_ep_files(&udev->ep0);
 	remove_power_attributes(dev);
 	remove_persist_attributes(dev);
 	device_remove_bin_file(dev, &dev_bin_attr_descriptors);
@@ -816,36 +808,24 @@ int usb_create_sysfs_intf_files(struct usb_interface *intf)
 {
 	struct usb_device *udev = interface_to_usbdev(intf);
 	struct usb_host_interface *alt = intf->cur_altsetting;
-	int i;
 	int retval;
 
 	if (intf->sysfs_files_created || intf->unregistering)
 		return 0;
 
-	/* The interface string may be present in some altsettings
-	 * and missing in others.  Hence its attribute cannot be created
-	 * before the uevent is broadcast.
-	 */
 	if (alt->string == NULL)
 		alt->string = usb_cache_string(udev, alt->desc.iInterface);
 	if (alt->string)
 		retval = device_create_file(&intf->dev, &dev_attr_interface);
-	for (i = 0; i < alt->desc.bNumEndpoints; ++i)
-		usb_create_ep_files(&intf->dev, &alt->endpoint[i], udev);
 	intf->sysfs_files_created = 1;
 	return 0;
 }
 
 void usb_remove_sysfs_intf_files(struct usb_interface *intf)
 {
-	struct usb_host_interface *alt = intf->cur_altsetting;
-	int i;
-
 	if (!intf->sysfs_files_created)
 		return;
 
-	for (i = 0; i < alt->desc.bNumEndpoints; ++i)
-		usb_remove_ep_files(&alt->endpoint[i]);
 	device_remove_file(&intf->dev, &dev_attr_interface);
 	intf->sysfs_files_created = 0;
 }
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 4c98f3975afe..c0821564a3fe 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -970,6 +970,37 @@ int usb_disabled(void)
 }
 EXPORT_SYMBOL_GPL(usb_disabled);
 
+/*
+ * Notifications of device and interface registration
+ */
+static int usb_bus_notify(struct notifier_block *nb, unsigned long action,
+		void *data)
+{
+	struct device *dev = data;
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		if (dev->type == &usb_device_type)
+			(void) usb_create_sysfs_dev_files(to_usb_device(dev));
+		else if (dev->type == &usb_if_device_type)
+			(void) usb_create_sysfs_intf_files(
+					to_usb_interface(dev));
+		break;
+
+	case BUS_NOTIFY_DEL_DEVICE:
+		if (dev->type == &usb_device_type)
+			usb_remove_sysfs_dev_files(to_usb_device(dev));
+		else if (dev->type == &usb_if_device_type)
+			usb_remove_sysfs_intf_files(to_usb_interface(dev));
+		break;
+	}
+	return 0;
+}
+
+static struct notifier_block usb_bus_nb = {
+	.notifier_call = usb_bus_notify,
+};
+
 /*
  * Init
  */
@@ -987,6 +1018,9 @@ static int __init usb_init(void)
 	retval = bus_register(&usb_bus_type);
 	if (retval)
 		goto bus_register_failed;
+	retval = bus_register_notifier(&usb_bus_type, &usb_bus_nb);
+	if (retval)
+		goto bus_notifier_failed;
 	retval = usb_host_init();
 	if (retval)
 		goto host_init_failed;
@@ -1021,6 +1055,8 @@ driver_register_failed:
 major_init_failed:
 	usb_host_cleanup();
 host_init_failed:
+	bus_unregister_notifier(&usb_bus_type, &usb_bus_nb);
+bus_notifier_failed:
 	bus_unregister(&usb_bus_type);
 bus_register_failed:
 	ksuspend_usb_cleanup();
@@ -1044,6 +1080,7 @@ static void __exit usb_exit(void)
 	usb_devio_cleanup();
 	usb_hub_cleanup();
 	usb_host_cleanup();
+	bus_unregister_notifier(&usb_bus_type, &usb_bus_nb);
 	bus_unregister(&usb_bus_type);
 	ksuspend_usb_cleanup();
 }
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index 9fb195665fa8..381eae90c3b7 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -6,10 +6,10 @@ extern int usb_create_sysfs_dev_files(struct usb_device *dev);
 extern void usb_remove_sysfs_dev_files(struct usb_device *dev);
 extern int usb_create_sysfs_intf_files(struct usb_interface *intf);
 extern void usb_remove_sysfs_intf_files(struct usb_interface *intf);
-extern int usb_create_ep_files(struct device *parent,
+extern int usb_create_ep_devs(struct device *parent,
 				struct usb_host_endpoint *endpoint,
 				struct usb_device *udev);
-extern void usb_remove_ep_files(struct usb_host_endpoint *endpoint);
+extern void usb_remove_ep_devs(struct usb_host_endpoint *endpoint);
 
 extern void usb_enable_endpoint(struct usb_device *dev,
 		struct usb_host_endpoint *ep);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 74d0b9990c73..e9d63562325a 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -108,6 +108,7 @@ enum usb_interface_condition {
  *	(in probe()), bound to a driver, or unbinding (in disconnect())
  * @is_active: flag set when the interface is bound and not suspended.
  * @sysfs_files_created: sysfs attributes exist
+ * @ep_devs_created: endpoint child pseudo-devices exist
  * @unregistering: flag set when the interface is being unregistered
  * @needs_remote_wakeup: flag set when the driver requires remote-wakeup
  *	capability during autosuspend.
@@ -169,6 +170,7 @@ struct usb_interface {
 	enum usb_interface_condition condition;		/* state of binding */
 	unsigned is_active:1;		/* the interface is not suspended */
 	unsigned sysfs_files_created:1;	/* the sysfs attributes exist */
+	unsigned ep_devs_created:1;	/* endpoint "devices" exist */
 	unsigned unregistering:1;	/* unregistration is in progress */
 	unsigned needs_remote_wakeup:1;	/* driver requires remote wakeup */
 	unsigned needs_altsetting0:1;	/* switch to altsetting 0 is pending */
-- 
cgit v1.2.3


From 49367d8f1d9f26482cf7089489e90f0afd0a942c Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Fri, 12 Dec 2008 21:38:45 +0800
Subject: USB: mark "reject" field of struct urb as atomic_t

It is enough to protect accesses to reject field of urb
by marking it as atomic_t,also it is the only reason of
existence of usb_reject_lock,so remove the lock to make
code more clean.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Acked-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c |  6 +++---
 drivers/usb/core/urb.c | 23 +++++------------------
 include/linux/usb.h    |  2 +-
 3 files changed, 9 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index a0079876d74e..3c711db55d86 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1010,7 +1010,7 @@ int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb)
 	spin_lock(&hcd_urb_list_lock);
 
 	/* Check that the URB isn't being killed */
-	if (unlikely(urb->reject)) {
+	if (unlikely(atomic_read(&urb->reject))) {
 		rc = -EPERM;
 		goto done;
 	}
@@ -1340,7 +1340,7 @@ int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags)
 		INIT_LIST_HEAD(&urb->urb_list);
 		atomic_dec(&urb->use_count);
 		atomic_dec(&urb->dev->urbnum);
-		if (urb->reject)
+		if (atomic_read(&urb->reject))
 			wake_up(&usb_kill_urb_queue);
 		usb_put_urb(urb);
 	}
@@ -1444,7 +1444,7 @@ void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status)
 	urb->status = status;
 	urb->complete (urb);
 	atomic_dec (&urb->use_count);
-	if (unlikely (urb->reject))
+	if (unlikely(atomic_read(&urb->reject)))
 		wake_up (&usb_kill_urb_queue);
 	usb_put_urb (urb);
 }
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index 1f68af9db3f7..b5e9948698bf 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -10,7 +10,6 @@
 
 #define to_urb(d) container_of(d, struct urb, kref)
 
-static DEFINE_SPINLOCK(usb_reject_lock);
 
 static void urb_destroy(struct kref *kref)
 {
@@ -131,9 +130,7 @@ void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor)
 	urb->anchor = anchor;
 
 	if (unlikely(anchor->poisoned)) {
-		spin_lock(&usb_reject_lock);
-		urb->reject++;
-		spin_unlock(&usb_reject_lock);
+		atomic_inc(&urb->reject);
 	}
 
 	spin_unlock_irqrestore(&anchor->lock, flags);
@@ -565,16 +562,12 @@ void usb_kill_urb(struct urb *urb)
 	might_sleep();
 	if (!(urb && urb->dev && urb->ep))
 		return;
-	spin_lock_irq(&usb_reject_lock);
-	++urb->reject;
-	spin_unlock_irq(&usb_reject_lock);
+	atomic_inc(&urb->reject);
 
 	usb_hcd_unlink_urb(urb, -ENOENT);
 	wait_event(usb_kill_urb_queue, atomic_read(&urb->use_count) == 0);
 
-	spin_lock_irq(&usb_reject_lock);
-	--urb->reject;
-	spin_unlock_irq(&usb_reject_lock);
+	atomic_dec(&urb->reject);
 }
 EXPORT_SYMBOL_GPL(usb_kill_urb);
 
@@ -606,9 +599,7 @@ void usb_poison_urb(struct urb *urb)
 	might_sleep();
 	if (!(urb && urb->dev && urb->ep))
 		return;
-	spin_lock_irq(&usb_reject_lock);
-	++urb->reject;
-	spin_unlock_irq(&usb_reject_lock);
+	atomic_inc(&urb->reject);
 
 	usb_hcd_unlink_urb(urb, -ENOENT);
 	wait_event(usb_kill_urb_queue, atomic_read(&urb->use_count) == 0);
@@ -617,14 +608,10 @@ EXPORT_SYMBOL_GPL(usb_poison_urb);
 
 void usb_unpoison_urb(struct urb *urb)
 {
-	unsigned long flags;
-
 	if (!urb)
 		return;
 
-	spin_lock_irqsave(&usb_reject_lock, flags);
-	--urb->reject;
-	spin_unlock_irqrestore(&usb_reject_lock, flags);
+	atomic_dec(&urb->reject);
 }
 EXPORT_SYMBOL_GPL(usb_unpoison_urb);
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index e9d63562325a..4e8654a18250 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1340,7 +1340,7 @@ struct urb {
 	struct kref kref;		/* reference count of the URB */
 	void *hcpriv;			/* private data for host controller */
 	atomic_t use_count;		/* concurrent submissions counter */
-	u8 reject;			/* submissions will fail */
+	atomic_t reject;		/* submissions will fail */
 	int unlinked;			/* unlink error code */
 
 	/* public: documented fields in the urb that can be used by drivers */
-- 
cgit v1.2.3


From 856395d6e137b4e7194972cb7765f3de6a72ba61 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Thu, 18 Dec 2008 09:17:49 +0100
Subject: USB: extension of anchor API to unpoison an anchor

This extension allows unpoisoning an anchor allowing drivers that
resubmit URBs to reuse an anchor for methods like resume()

Signed-off-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/urb.c | 20 ++++++++++++++++++++
 include/linux/usb.h    |  1 +
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index b5e9948698bf..58bc5e3c2560 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -678,6 +678,26 @@ void usb_poison_anchored_urbs(struct usb_anchor *anchor)
 }
 EXPORT_SYMBOL_GPL(usb_poison_anchored_urbs);
 
+/**
+ * usb_unpoison_anchored_urbs - let an anchor be used successfully again
+ * @anchor: anchor the requests are bound to
+ *
+ * Reverses the effect of usb_poison_anchored_urbs
+ * the anchor can be used normally after it returns
+ */
+void usb_unpoison_anchored_urbs(struct usb_anchor *anchor)
+{
+	unsigned long flags;
+	struct urb *lazarus;
+
+	spin_lock_irqsave(&anchor->lock, flags);
+	list_for_each_entry(lazarus, &anchor->urb_list, anchor_list) {
+		usb_unpoison_urb(lazarus);
+	}
+	anchor->poisoned = 0;
+	spin_unlock_irqrestore(&anchor->lock, flags);
+}
+EXPORT_SYMBOL_GPL(usb_unpoison_anchored_urbs);
 /**
  * usb_unlink_anchored_urbs - asynchronously cancel transfer requests en masse
  * @anchor: anchor the requests are bound to
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 4e8654a18250..e89639896508 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1485,6 +1485,7 @@ extern void usb_poison_urb(struct urb *urb);
 extern void usb_unpoison_urb(struct urb *urb);
 extern void usb_kill_anchored_urbs(struct usb_anchor *anchor);
 extern void usb_poison_anchored_urbs(struct usb_anchor *anchor);
+extern void usb_unpoison_anchored_urbs(struct usb_anchor *anchor);
 extern void usb_unlink_anchored_urbs(struct usb_anchor *anchor);
 extern void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor);
 extern void usb_unanchor_urb(struct urb *urb);
-- 
cgit v1.2.3


From 25ff1c316f6a763f1eefe7f8984b2d8c03888432 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 15 Dec 2008 12:43:41 -0500
Subject: USB: storage: add last-sector hacks

This patch (as1189b) adds some hacks to usb-storage for dealing with
the growing problems involving bad capacity values and last-sector
accesses:

	A new flag, US_FL_CAPACITY_OK, is created to indicate that
	the device is known to report its capacity correctly.  An
	unusual_devs entry for Linux's own File-backed Storage Gadget
	is added with this flag set, since g_file_storage always
	reports the correct capacity and since the capacity need
	not be even (it is determined by the size of the backing
	file).

	An entry in unusual_devs.h which has only the CAPACITY_OK
	flag set shouldn't prejudice libusual, since the device will
	work perfectly well with either usb-storage or ub.  So a
	new macro, COMPLIANT_DEV, is added to let libusual know
	about these entries.

	When a last-sector access succeeds and the total number of
	sectors is odd (the unexpected case, in which guessing that
	the number is even might cause trouble), a WARN is triggered.
	The kerneloops.org project will collect these warnings,
	allowing us to add CAPACITY_OK flags for the devices in
	question before implementing the default-to-even heuristic.
	If users want to prevent the stack dump produced by the WARN,
	they can disable the hack by adding an unusual_devs entry
	for their device with the CAPACITY_OK flag.

	When a last-sector access fails three times in a row and
	neither the FIX_CAPACITY nor the CAPACITY_OK flag is set,
	we assume the last-sector bug is present.  We replace the
	existing status and sense data with values that will cause
	the SCSI core to fail the access immediately rather than
	retry indefinitely.  This should fix the difficulties
	people have been having with Nokia phones.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/storage/libusual.c     |   7 +++
 drivers/usb/storage/scsiglue.c     |   8 +++
 drivers/usb/storage/transport.c    | 110 +++++++++++++++++++++++++++++++++++++
 drivers/usb/storage/unusual_devs.h |  16 +++++-
 drivers/usb/storage/usb.c          |   6 ++
 drivers/usb/storage/usb.h          |   4 ++
 include/linux/usb_usual.h          |   6 +-
 7 files changed, 154 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/storage/libusual.c b/drivers/usb/storage/libusual.c
index d617e8ae6b00..f970b27ba308 100644
--- a/drivers/usb/storage/libusual.c
+++ b/drivers/usb/storage/libusual.c
@@ -46,6 +46,12 @@ static int usu_probe_thread(void *arg);
 { USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin,bcdDeviceMax), \
   .driver_info = (flags)|(USB_US_TYPE_STOR<<24) }
 
+#define COMPLIANT_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \
+		    vendorName, productName, useProtocol, useTransport, \
+		    initFunction, flags) \
+{ USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax), \
+  .driver_info = (flags) }
+
 #define USUAL_DEV(useProto, useTrans, useType) \
 { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, useProto, useTrans), \
   .driver_info = ((useType)<<24) }
@@ -57,6 +63,7 @@ struct usb_device_id storage_usb_ids [] = {
 
 #undef USUAL_DEV
 #undef UNUSUAL_DEV
+#undef COMPLIANT_DEV
 
 MODULE_DEVICE_TABLE(usb, storage_usb_ids);
 EXPORT_SYMBOL_GPL(storage_usb_ids);
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index e9d6c196a7ab..8d78084abf9f 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -208,6 +208,14 @@ static int slave_configure(struct scsi_device *sdev)
 		 * sector in a larger then 1 sector read, since the performance
 		 * impact is negible we set this flag for all USB disks */
 		sdev->last_sector_bug = 1;
+
+		/* Enable last-sector hacks for single-target devices using
+		 * the Bulk-only transport, unless we already know the
+		 * capacity will be decremented or is correct. */
+		if (!(us->fflags & (US_FL_FIX_CAPACITY | US_FL_CAPACITY_OK |
+					US_FL_SCM_MULT_TARG)) &&
+				us->protocol == US_PR_BULK)
+			us->use_last_sector_hacks = 1;
 	} else {
 
 		/* Non-disk-type devices don't need to blacklist any pages
diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c
index 9cc30afd6d31..1d5438e6363b 100644
--- a/drivers/usb/storage/transport.c
+++ b/drivers/usb/storage/transport.c
@@ -57,6 +57,9 @@
 #include "scsiglue.h"
 #include "debug.h"
 
+#include <linux/blkdev.h>
+#include "../../scsi/sd.h"
+
 
 /***********************************************************************
  * Data transfer routines
@@ -511,6 +514,110 @@ int usb_stor_bulk_transfer_sg(struct us_data* us, unsigned int pipe,
  * Transport routines
  ***********************************************************************/
 
+/* There are so many devices that report the capacity incorrectly,
+ * this routine was written to counteract some of the resulting
+ * problems.
+ */
+static void last_sector_hacks(struct us_data *us, struct scsi_cmnd *srb)
+{
+	struct gendisk *disk;
+	struct scsi_disk *sdkp;
+	u32 sector;
+
+	/* To Report "Medium Error: Record Not Found */
+	static unsigned char record_not_found[18] = {
+		[0]	= 0x70,			/* current error */
+		[2]	= MEDIUM_ERROR,		/* = 0x03 */
+		[7]	= 0x0a,			/* additional length */
+		[12]	= 0x14			/* Record Not Found */
+	};
+
+	/* If last-sector problems can't occur, whether because the
+	 * capacity was already decremented or because the device is
+	 * known to report the correct capacity, then we don't need
+	 * to do anything.
+	 */
+	if (!us->use_last_sector_hacks)
+		return;
+
+	/* Was this command a READ(10) or a WRITE(10)? */
+	if (srb->cmnd[0] != READ_10 && srb->cmnd[0] != WRITE_10)
+		goto done;
+
+	/* Did this command access the last sector? */
+	sector = (srb->cmnd[2] << 24) | (srb->cmnd[3] << 16) |
+			(srb->cmnd[4] << 8) | (srb->cmnd[5]);
+	disk = srb->request->rq_disk;
+	if (!disk)
+		goto done;
+	sdkp = scsi_disk(disk);
+	if (!sdkp)
+		goto done;
+	if (sector + 1 != sdkp->capacity)
+		goto done;
+
+	if (srb->result == SAM_STAT_GOOD && scsi_get_resid(srb) == 0) {
+
+		/* The command succeeded.  If the capacity is odd
+		 * (i.e., if the sector number is even) then the
+		 * "always-even" heuristic would be wrong for this
+		 * device.  Issue a WARN() so that the kerneloops.org
+		 * project will be notified and we will then know to
+		 * mark the device with a CAPACITY_OK flag.  Hopefully
+		 * this will occur for only a few devices.
+		 *
+		 * Use the sign of us->last_sector_hacks to tell whether
+		 * the warning has already been issued; we don't need
+		 * more than one warning per device.
+		 */
+		if (!(sector & 1) && us->use_last_sector_hacks > 0) {
+			unsigned vid = le16_to_cpu(
+					us->pusb_dev->descriptor.idVendor);
+			unsigned pid = le16_to_cpu(
+					us->pusb_dev->descriptor.idProduct);
+			unsigned rev = le16_to_cpu(
+					us->pusb_dev->descriptor.bcdDevice);
+
+			WARN(1, "%s: Successful last sector success at %u, "
+					"device %04x:%04x:%04x\n",
+					sdkp->disk->disk_name, sector,
+					vid, pid, rev);
+			us->use_last_sector_hacks = -1;
+		}
+
+	} else {
+		/* The command failed.  Allow up to 3 retries in case this
+		 * is some normal sort of failure.  After that, assume the
+		 * capacity is wrong and we're trying to access the sector
+		 * beyond the end.  Replace the result code and sense data
+		 * with values that will cause the SCSI core to fail the
+		 * command immediately, instead of going into an infinite
+		 * (or even just a very long) retry loop.
+		 */
+		if (++us->last_sector_retries < 3)
+			return;
+		srb->result = SAM_STAT_CHECK_CONDITION;
+		memcpy(srb->sense_buffer, record_not_found,
+				sizeof(record_not_found));
+
+		/* In theory we might want to issue a WARN() here if the
+		 * capacity is even, since it could indicate the device
+		 * has the READ CAPACITY bug _and_ the real capacity is
+		 * odd.  But it could also indicate that the device
+		 * simply can't access its last sector, a failure mode
+		 * which is surprisingly common.  So no warning.
+		 */
+	}
+
+ done:
+	/* Don't reset the retry counter for TEST UNIT READY commands,
+	 * because they get issued after device resets which might be
+	 * caused by a failed last-sector access.
+	 */
+	if (srb->cmnd[0] != TEST_UNIT_READY)
+		us->last_sector_retries = 0;
+}
+
 /* Invoke the transport and basic error-handling/recovery methods
  *
  * This is used by the protocol layers to actually send the message to
@@ -544,6 +651,7 @@ void usb_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us)
 	/* if the transport provided its own sense data, don't auto-sense */
 	if (result == USB_STOR_TRANSPORT_NO_SENSE) {
 		srb->result = SAM_STAT_CHECK_CONDITION;
+		last_sector_hacks(us, srb);
 		return;
 	}
 
@@ -705,6 +813,7 @@ void usb_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us)
 			scsi_bufflen(srb) - scsi_get_resid(srb) < srb->underflow)
 		srb->result = (DID_ERROR << 16) | (SUGGEST_RETRY << 24);
 
+	last_sector_hacks(us, srb);
 	return;
 
 	/* Error and abort processing: try to resynchronize with the device
@@ -732,6 +841,7 @@ void usb_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us)
 		us->transport_reset(us);
 	}
 	clear_bit(US_FLIDX_RESETTING, &us->dflags);
+	last_sector_hacks(us, srb);
 }
 
 /* Stop the current URB transfer */
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 0330ed53ec1c..035bbc5d8231 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -27,7 +27,8 @@
 
 /* IMPORTANT NOTE: This file must be included in another file which does
  * the following thing for it to work:
- * The macro UNUSUAL_DEV() must be defined before this file is included
+ * The UNUSUAL_DEV, COMPLIANT_DEV, and USUAL_DEV macros must be defined
+ * before this file is included.
  */
 
 /* If you edit this file, please try to keep it sorted first by VendorID,
@@ -46,6 +47,12 @@
  * <usb-storage@lists.one-eyed-alien.net>
  */
 
+/* Note: If you add an entry only in order to set the CAPACITY_OK flag,
+ * use the COMPLIANT_DEV macro instead of UNUSUAL_DEV.  This is
+ * because such entries mark devices which actually work correctly,
+ * as opposed to devices that do something strangely or wrongly.
+ */
+
 /* patch submitted by Vivian Bregier <Vivian.Bregier@imag.fr>
  */
 UNUSUAL_DEV(  0x03eb, 0x2002, 0x0100, 0x0100,
@@ -704,6 +711,13 @@ UNUSUAL_DEV(  0x0525, 0xa140, 0x0100, 0x0100,
 		US_SC_8070, US_PR_DEVICE, NULL,
 		US_FL_FIX_INQUIRY ),
 
+/* Added by Alan Stern <stern@rowland.harvard.edu> */
+COMPLIANT_DEV(0x0525, 0xa4a5, 0x0000, 0x9999,
+		"Linux",
+		"File-backed Storage Gadget",
+		US_SC_DEVICE, US_PR_DEVICE, NULL,
+		US_FL_CAPACITY_OK ),
+
 /* Yakumo Mega Image 37
  * Submitted by Stephan Fuhrmann <atomenergie@t-online.de> */
 UNUSUAL_DEV(  0x052b, 0x1801, 0x0100, 0x0100,
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index ce0b580db5ea..80e234bf4e50 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -134,6 +134,8 @@ static struct quirks_entry *quirks_list, *quirks_end;
 { USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin,bcdDeviceMax), \
   .driver_info = (flags)|(USB_US_TYPE_STOR<<24) }
 
+#define COMPLIANT_DEV	UNUSUAL_DEV
+
 #define USUAL_DEV(useProto, useTrans, useType) \
 { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, useProto, useTrans), \
   .driver_info = (USB_US_TYPE_STOR<<24) }
@@ -142,6 +144,7 @@ static struct usb_device_id storage_usb_ids [] = {
 
 #	include "unusual_devs.h"
 #undef UNUSUAL_DEV
+#undef COMPLIANT_DEV
 #undef USUAL_DEV
 	/* Terminating entry */
 	{ }
@@ -172,6 +175,8 @@ MODULE_DEVICE_TABLE (usb, storage_usb_ids);
 	.initFunction = init_function,	\
 }
 
+#define COMPLIANT_DEV	UNUSUAL_DEV
+
 #define USUAL_DEV(use_protocol, use_transport, use_type) \
 { \
 	.useProtocol = use_protocol,	\
@@ -181,6 +186,7 @@ MODULE_DEVICE_TABLE (usb, storage_usb_ids);
 static struct us_unusual_dev us_unusual_dev_list[] = {
 #	include "unusual_devs.h" 
 #	undef UNUSUAL_DEV
+#	undef COMPLIANT_DEV
 #	undef USUAL_DEV
 
 	/* Terminating entry */
diff --git a/drivers/usb/storage/usb.h b/drivers/usb/storage/usb.h
index e4674fc715e6..65e674e4be99 100644
--- a/drivers/usb/storage/usb.h
+++ b/drivers/usb/storage/usb.h
@@ -154,6 +154,10 @@ struct us_data {
 #ifdef CONFIG_PM
 	pm_hook			suspend_resume_hook;
 #endif
+
+	/* hacks for READ CAPACITY bug handling */
+	int			use_last_sector_hacks;
+	int			last_sector_retries;
 };
 
 /* Convert between us_data and the corresponding Scsi_Host */
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index 998e5cbbf29e..1eea1ab68dc4 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -53,8 +53,10 @@
 		/* Sets max_sectors to arch min */		\
 	US_FLAG(BULK_IGNORE_TAG,0x00004000)			\
 		/* Ignore tag mismatch in bulk operations */    \
-	US_FLAG(SANE_SENSE,     0x00008000)
-		/* Sane Sense (> 18 bytes) */
+	US_FLAG(SANE_SENSE,     0x00008000)			\
+		/* Sane Sense (> 18 bytes) */			\
+	US_FLAG(CAPACITY_OK,	0x00010000)			\
+		/* READ CAPACITY response is correct */
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
-- 
cgit v1.2.3


From 338b67b0c1a97ca705023a8189cf41aa0828d294 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 14 Aug 2008 09:37:34 -0700
Subject: USB: remove warn() macro from usb.h

USB should not be having it's own printk macros, so remove warn() and
use the system-wide standard of dev_warn() wherever possible.  In the
few places that will not work out, use a basic printk().

Now that all in-tree users are gone, remove the macro.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index e89639896508..28f68f587793 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1744,8 +1744,6 @@ extern void usb_unregister_notify(struct notifier_block *nb);
 	format "\n" , ## arg)
 #define info(format, arg...) printk(KERN_INFO KBUILD_MODNAME ": " \
 	format "\n" , ## arg)
-#define warn(format, arg...) printk(KERN_WARNING KBUILD_MODNAME ": " \
-	format "\n" , ## arg)
 
 #endif  /* __KERNEL__ */
 
-- 
cgit v1.2.3


From 34c65d82e02147331701c7795e3144d511adf4e9 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 18 Aug 2008 13:21:04 -0700
Subject: USB: remove info() macro from usb.h

USB should not be having it's own printk macros, so remove info() and
use the system-wide standard of dev_info() wherever possible.

No one in the tree is using the macro, so it can now be removed.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 28f68f587793..85ee9be9361e 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1742,8 +1742,6 @@ extern void usb_unregister_notify(struct notifier_block *nb);
 
 #define err(format, arg...) printk(KERN_ERR KBUILD_MODNAME ": " \
 	format "\n" , ## arg)
-#define info(format, arg...) printk(KERN_INFO KBUILD_MODNAME ": " \
-	format "\n" , ## arg)
 
 #endif  /* __KERNEL__ */
 
-- 
cgit v1.2.3


From 5e07878787ad07361571150230cc3a8d522ae046 Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Sat, 20 Dec 2008 16:57:39 -0800
Subject: debugfs: add helpers for exporting a size_t simple value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the same spirit as debugfs_create_*(), introduce helpers for
exporting size_t values over debugfs.

The only trick done is that the format verifier is kept at %llu
instead of %zu; otherwise type warnings would pop up:

format ‘%zu’ expects type ‘size_t’, but argument 2 has type ‘long long unsigned int’

There is no real way to fix this one--however, we can consider %llu
and %zu to be compatible if we consider that we are using the same for
validating in debugfs_create_{x,u}{8,16,32}().

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/debugfs/file.c       | 32 ++++++++++++++++++++++++++++++++
 include/linux/debugfs.h |  2 ++
 2 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 159a5efd6a8a..33a90120f6ad 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -294,6 +294,38 @@ struct dentry *debugfs_create_x32(const char *name, mode_t mode,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_x32);
 
+
+static int debugfs_size_t_set(void *data, u64 val)
+{
+	*(size_t *)data = val;
+	return 0;
+}
+static int debugfs_size_t_get(void *data, u64 *val)
+{
+	*val = *(size_t *)data;
+	return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(fops_size_t, debugfs_size_t_get, debugfs_size_t_set,
+			"%llu\n");	/* %llu and %zu are more or less the same */
+
+/**
+ * debugfs_create_size_t - create a debugfs file that is used to read and write an size_t value
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have
+ * @parent: a pointer to the parent dentry for this file.  This should be a
+ *          directory dentry if set.  If this parameter is %NULL, then the
+ *          file will be created in the root of the debugfs filesystem.
+ * @value: a pointer to the variable that the file should read to and write
+ *         from.
+ */
+struct dentry *debugfs_create_size_t(const char *name, mode_t mode,
+				     struct dentry *parent, size_t *value)
+{
+	return debugfs_create_file(name, mode, parent, value, &fops_size_t);
+}
+EXPORT_SYMBOL_GPL(debugfs_create_size_t);
+
+
 static ssize_t read_file_bool(struct file *file, char __user *user_buf,
 			      size_t count, loff_t *ppos)
 {
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index e1a6c046cea3..23936b16426b 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -63,6 +63,8 @@ struct dentry *debugfs_create_x16(const char *name, mode_t mode,
 				  struct dentry *parent, u16 *value);
 struct dentry *debugfs_create_x32(const char *name, mode_t mode,
 				  struct dentry *parent, u32 *value);
+struct dentry *debugfs_create_size_t(const char *name, mode_t mode,
+				     struct dentry *parent, size_t *value);
 struct dentry *debugfs_create_bool(const char *name, mode_t mode,
 				  struct dentry *parent, u32 *value);
 
-- 
cgit v1.2.3


From ace22f0881e1333d0c55ddf484e5352fe03a806a Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Sat, 20 Dec 2008 16:57:33 -0800
Subject: wimax: headers for kernel API and user space interaction

Definitions for the user/kernel API protocol through generic
netlink. User space can copy it verbatim and use it.

Kernel API definition declares the main data types and calls for the
drivers to integrate into the WiMAX stack. Provides usage
documentation.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/wimax.h | 234 +++++++++++++++++++++++
 include/net/wimax.h   | 520 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 754 insertions(+)
 create mode 100644 include/linux/wimax.h
 create mode 100644 include/net/wimax.h

(limited to 'include/linux')

diff --git a/include/linux/wimax.h b/include/linux/wimax.h
new file mode 100644
index 000000000000..c89de7f4e5b9
--- /dev/null
+++ b/include/linux/wimax.h
@@ -0,0 +1,234 @@
+/*
+ * Linux WiMax
+ * API for user space
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *  - Initial implementation
+ *
+ *
+ * This file declares the user/kernel protocol that is spoken over
+ * Generic Netlink, as well as any type declaration that is to be used
+ * by kernel and user space.
+ *
+ * It is intended for user space to clone it verbatim to use it as a
+ * primary reference for definitions.
+ *
+ * Stuff intended for kernel usage as well as full protocol and stack
+ * documentation is rooted in include/net/wimax.h.
+ */
+
+#ifndef __LINUX__WIMAX_H__
+#define __LINUX__WIMAX_H__
+
+#include <linux/types.h>
+
+enum {
+	/**
+	 * Version of the interface (unsigned decimal, MMm, max 25.5)
+	 * M - Major: change if removing or modifying an existing call.
+	 * m - minor: change when adding a new call
+	 */
+	WIMAX_GNL_VERSION = 00,
+	/* Generic NetLink attributes */
+	WIMAX_GNL_ATTR_INVALID = 0x00,
+	WIMAX_GNL_ATTR_MAX = 10,
+};
+
+
+/*
+ * Generic NetLink operations
+ *
+ * Most of these map to an API call; _OP_ stands for operation, _RP_
+ * for reply and _RE_ for report (aka: signal).
+ */
+enum {
+	WIMAX_GNL_OP_MSG_FROM_USER,	/* User to kernel message */
+	WIMAX_GNL_OP_MSG_TO_USER,	/* Kernel to user message */
+	WIMAX_GNL_OP_RFKILL,	/* Run wimax_rfkill() */
+	WIMAX_GNL_OP_RESET,	/* Run wimax_rfkill() */
+	WIMAX_GNL_RE_STATE_CHANGE,	/* Report: status change */
+};
+
+
+/* Message from user / to user */
+enum {
+	WIMAX_GNL_MSG_IFIDX = 1,
+	WIMAX_GNL_MSG_PIPE_NAME,
+	WIMAX_GNL_MSG_DATA,
+};
+
+
+/*
+ * wimax_rfkill()
+ *
+ * The state of the radio (ON/OFF) is mapped to the rfkill subsystem's
+ * switch state (DISABLED/ENABLED).
+ */
+enum wimax_rf_state {
+	WIMAX_RF_OFF = 0,	/* Radio is off, rfkill on/enabled */
+	WIMAX_RF_ON = 1,	/* Radio is on, rfkill off/disabled */
+	WIMAX_RF_QUERY = 2,
+};
+
+/* Attributes */
+enum {
+	WIMAX_GNL_RFKILL_IFIDX = 1,
+	WIMAX_GNL_RFKILL_STATE,
+};
+
+
+/* Attributes for wimax_reset() */
+enum {
+	WIMAX_GNL_RESET_IFIDX = 1,
+};
+
+
+/*
+ * Attributes for the Report State Change
+ *
+ * For now we just have the old and new states; new attributes might
+ * be added later on.
+ */
+enum {
+	WIMAX_GNL_STCH_IFIDX = 1,
+	WIMAX_GNL_STCH_STATE_OLD,
+	WIMAX_GNL_STCH_STATE_NEW,
+};
+
+
+/**
+ * enum wimax_st - The different states of a WiMAX device
+ * @__WIMAX_ST_NULL: The device structure has been allocated and zeroed,
+ *     but still wimax_dev_add() hasn't been called. There is no state.
+ *
+ * @WIMAX_ST_DOWN: The device has been registered with the WiMAX and
+ *     networking stacks, but it is not initialized (normally that is
+ *     done with 'ifconfig DEV up' [or equivalent], which can upload
+ *     firmware and enable communications with the device).
+ *     In this state, the device is powered down and using as less
+ *     power as possible.
+ *     This state is the default after a call to wimax_dev_add(). It
+ *     is ok to have drivers move directly to %WIMAX_ST_UNINITIALIZED
+ *     or %WIMAX_ST_RADIO_OFF in _probe() after the call to
+ *     wimax_dev_add().
+ *     It is recommended that the driver leaves this state when
+ *     calling 'ifconfig DEV up' and enters it back on 'ifconfig DEV
+ *     down'.
+ *
+ * @__WIMAX_ST_QUIESCING: The device is being torn down, so no API
+ *     operations are allowed to proceed except the ones needed to
+ *     complete the device clean up process.
+ *
+ * @WIMAX_ST_UNINITIALIZED: [optional] Communication with the device
+ *     is setup, but the device still requires some configuration
+ *     before being operational.
+ *     Some WiMAX API calls might work.
+ *
+ * @WIMAX_ST_RADIO_OFF: The device is fully up; radio is off (wether
+ *     by hardware or software switches).
+ *     It is recommended to always leave the device in this state
+ *     after initialization.
+ *
+ * @WIMAX_ST_READY: The device is fully up and radio is on.
+ *
+ * @WIMAX_ST_SCANNING: [optional] The device has been instructed to
+ *     scan. In this state, the device cannot be actively connected to
+ *     a network.
+ *
+ * @WIMAX_ST_CONNECTING: The device is connecting to a network. This
+ *     state exists because in some devices, the connect process can
+ *     include a number of negotiations between user space, kernel
+ *     space and the device. User space needs to know what the device
+ *     is doing. If the connect sequence in a device is atomic and
+ *     fast, the device can transition directly to CONNECTED
+ *
+ * @WIMAX_ST_CONNECTED: The device is connected to a network.
+ *
+ * @__WIMAX_ST_INVALID: This is an invalid state used to mark the
+ *     maximum numeric value of states.
+ *
+ * Description:
+ *
+ * Transitions from one state to another one are atomic and can only
+ * be caused in kernel space with wimax_state_change(). To read the
+ * state, use wimax_state_get().
+ *
+ * States starting with __ are internal and shall not be used or
+ * referred to by drivers or userspace. They look ugly, but that's the
+ * point -- if any use is made non-internal to the stack, it is easier
+ * to catch on review.
+ *
+ * All API operations [with well defined exceptions] will take the
+ * device mutex before starting and then check the state. If the state
+ * is %__WIMAX_ST_NULL, %WIMAX_ST_DOWN, %WIMAX_ST_UNINITIALIZED or
+ * %__WIMAX_ST_QUIESCING, it will drop the lock and quit with
+ * -%EINVAL, -%ENOMEDIUM, -%ENOTCONN or -%ESHUTDOWN.
+ *
+ * The order of the definitions is important, so we can do numerical
+ * comparisons (eg: < %WIMAX_ST_RADIO_OFF means the device is not ready
+ * to operate).
+ */
+/*
+ * The allowed state transitions are described in the table below
+ * (states in rows can go to states in columns where there is an X):
+ *
+ *                                  UNINI   RADIO READY SCAN CONNEC CONNEC
+ *             NULL DOWN QUIESCING TIALIZED  OFF        NING  TING   TED
+ * NULL         -    x
+ * DOWN              -      x        x       x
+ * QUIESCING         x      -
+ * UNINITIALIZED            x        -       x
+ * RADIO_OFF                x                -     x
+ * READY                    x                x     -     x     x      x
+ * SCANNING                 x                x     x     -     x      x
+ * CONNECTING               x                x     x     x     -      x
+ * CONNECTED                x                x     x                  -
+ *
+ * This table not available in kernel-doc because the formatting messes it up.
+ */
+ enum wimax_st {
+	__WIMAX_ST_NULL = 0,
+	WIMAX_ST_DOWN,
+	__WIMAX_ST_QUIESCING,
+	WIMAX_ST_UNINITIALIZED,
+	WIMAX_ST_RADIO_OFF,
+	WIMAX_ST_READY,
+	WIMAX_ST_SCANNING,
+	WIMAX_ST_CONNECTING,
+	WIMAX_ST_CONNECTED,
+	__WIMAX_ST_INVALID			/* Always keep last */
+};
+
+
+#endif /* #ifndef __LINUX__WIMAX_H__ */
diff --git a/include/net/wimax.h b/include/net/wimax.h
new file mode 100644
index 000000000000..1602614fdaf9
--- /dev/null
+++ b/include/net/wimax.h
@@ -0,0 +1,520 @@
+/*
+ * Linux WiMAX
+ * Kernel space API for accessing WiMAX devices
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ *
+ * The WiMAX stack provides an API for controlling and managing the
+ * system's WiMAX devices. This API affects the control plane; the
+ * data plane is accessed via the network stack (netdev).
+ *
+ * Parts of the WiMAX stack API and notifications are exported to
+ * user space via Generic Netlink. In user space, libwimax (part of
+ * the wimax-tools package) provides a shim layer for accessing those
+ * calls.
+ *
+ * The API is standarized for all WiMAX devices and different drivers
+ * implement the backend support for it. However, device-specific
+ * messaging pipes are provided that can be used to issue commands and
+ * receive notifications in free form.
+ *
+ * Currently the messaging pipes are the only means of control as it
+ * is not known (due to the lack of more devices in the market) what
+ * will be a good abstraction layer. Expect this to change as more
+ * devices show in the market. This API is designed to be growable in
+ * order to address this problem.
+ *
+ * USAGE
+ *
+ * Embed a `struct wimax_dev` at the beginning of the the device's
+ * private structure, initialize and register it. For details, see
+ * `struct wimax_dev`s documentation.
+ *
+ * Once this is done, wimax-tools's libwimaxll can be used to
+ * communicate with the driver from user space. You user space
+ * application does not have to forcibily use libwimaxll and can talk
+ * the generic netlink protocol directly if desired.
+ *
+ * Remember this is a very low level API that will to provide all of
+ * WiMAX features. Other daemons and services running in user space
+ * are the expected clients of it. They offer a higher level API that
+ * applications should use (an example of this is the Intel's WiMAX
+ * Network Service for the i2400m).
+ *
+ * DESIGN
+ *
+ * Although not set on final stone, this very basic interface is
+ * mostly completed. Remember this is meant to grow as new common
+ * operations are decided upon. New operations will be added to the
+ * interface, intent being on keeping backwards compatibility as much
+ * as possible.
+ *
+ * This layer implements a set of calls to control a WiMAX device,
+ * exposing a frontend to the rest of the kernel and user space (via
+ * generic netlink) and a backend implementation in the driver through
+ * function pointers.
+ *
+ * WiMAX devices have a state, and a kernel-only API allows the
+ * drivers to manipulate that state. State transitions are atomic, and
+ * only some of them are allowed (see `enum wimax_st`).
+ *
+ * Most API calls will set the state automatically; in most cases
+ * drivers have to only report state changes due to external
+ * conditions.
+ *
+ * All API operations are 'atomic', serialized thorough a mutex in the
+ * `struct wimax_dev`.
+ *
+ * EXPORTING TO USER SPACE THROUGH GENERIC NETLINK
+ *
+ * The API is exported to user space using generic netlink (other
+ * methods can be added as needed).
+ *
+ * There is a Generic Netlink Family named "WiMAX", where interfaces
+ * supporting the WiMAX interface receive commands and broadcast their
+ * signals over a multicast group named "msg".
+ *
+ * Mapping to the source/destination interface is done by an interface
+ * index attribute.
+ *
+ * For user-to-kernel traffic (commands) we use a function call
+ * marshalling mechanism, where a message X with attributes A, B, C
+ * sent from user space to kernel space means executing the WiMAX API
+ * call wimax_X(A, B, C), sending the results back as a message.
+ *
+ * Kernel-to-user (notifications or signals) communication is sent
+ * over multicast groups. This allows to have multiple applications
+ * monitoring them.
+ *
+ * Each command/signal gets assigned it's own attribute policy. This
+ * way the validator will verify that all the attributes in there are
+ * only the ones that should be for each command/signal. Thing of an
+ * attribute mapping to a type+argumentname for each command/signal.
+ *
+ * If we had a single policy for *all* commands/signals, after running
+ * the validator we'd have to check "does this attribute belong in
+ * here"?  for each one. It can be done manually, but it's just easier
+ * to have the validator do that job with multiple policies. As well,
+ * it makes it easier to later expand each command/signal signature
+ * without affecting others and keeping the namespace more or less
+ * sane. Not that it is too complicated, but it makes it even easier.
+ *
+ * No state information is maintained in the kernel for each user
+ * space connection (the connection is stateless).
+ *
+ * TESTING FOR THE INTERFACE AND VERSIONING
+ *
+ * If network interface X is a WiMAX device, there will be a Generic
+ * Netlink family named "WiMAX X" and the device will present a
+ * "wimax" directory in it's network sysfs directory
+ * (/sys/class/net/DEVICE/wimax) [used by HAL].
+ *
+ * The inexistence of any of these means the device does not support
+ * this WiMAX API.
+ *
+ * By querying the generic netlink controller, versioning information
+ * and the multicast groups available can be found. Applications using
+ * the interface can either rely on that or use the generic netlink
+ * controller to figure out which generic netlink commands/signals are
+ * supported.
+ *
+ * NOTE: this versioning is a last resort to avoid hard
+ *    incompatibilities. It is the intention of the design of this
+ *    stack not to introduce backward incompatible changes.
+ *
+ * The version code has to fit in one byte (restrictions imposed by
+ * generic netlink); we use `version / 10` for the major version and
+ * `version % 10` for the minor. This gives 9 minors for each major
+ * and 25 majors.
+ *
+ * The version change protocol is as follow:
+ *
+ * - Major versions: needs to be increased if an existing message/API
+ *   call is changed or removed. Doesn't need to be changed if a new
+ *   message is added.
+ *
+ * - Minor version: needs to be increased if new messages/API calls are
+ *   being added or some other consideration that doesn't impact the
+ *   user-kernel interface too much (like some kind of bug fix) and
+ *   that is kind of left up in the air to common sense.
+ *
+ * User space code should not try to work if the major version it was
+ * compiled for differs from what the kernel offers. As well, if the
+ * minor version of the kernel interface is lower than the one user
+ * space is expecting (the one it was compiled for), the kernel
+ * might be missing API calls; user space shall be ready to handle
+ * said condition. Use the generic netlink controller operations to
+ * find which ones are supported and which not.
+ *
+ * libwimaxll:wimaxll_open() takes care of checking versions.
+ *
+ * THE OPERATIONS:
+ *
+ * Each operation is defined in its on file (drivers/net/wimax/op-*.c)
+ * for clarity. The parts needed for an operation are:
+ *
+ *  - a function pointer in `struct wimax_dev`: optional, as the
+ *    operation might be implemented by the stack and not by the
+ *    driver.
+ *
+ *    All function pointers are named wimax_dev->op_*(), and drivers
+ *    must implement them except where noted otherwise.
+ *
+ *  - When exported to user space, a `struct nla_policy` to define the
+ *    attributes of the generic netlink command and a `struct genl_ops`
+ *    to define the operation.
+ *
+ * All the declarations for the operation codes (WIMAX_GNL_OP_<NAME>)
+ * and generic netlink attributes (WIMAX_GNL_<NAME>_*) are declared in
+ * include/linux/wimax.h; this file is intended to be cloned by user
+ * space to gain access to those declarations.
+ *
+ * A few caveats to remember:
+ *
+ *  - Need to define attribute numbers starting in 1; otherwise it
+ *    fails.
+ *
+ *  - the `struct genl_family` requires a maximum attribute id; when
+ *    defining the `struct nla_policy` for each message, it has to have
+ *    an array size of WIMAX_GNL_ATTR_MAX+1.
+ *
+ * THE PIPE INTERFACE:
+ *
+ * This interface is kept intentionally simple. The driver can send
+ * and receive free-form messages to/from user space through a
+ * pipe. See drivers/net/wimax/op-msg.c for details.
+ *
+ * The kernel-to-user messages are sent with
+ * wimax_msg(). user-to-kernel messages are delivered via
+ * wimax_dev->op_msg_from_user().
+ *
+ * RFKILL:
+ *
+ * RFKILL support is built into the wimax_dev layer; the driver just
+ * needs to call wimax_report_rfkill_{hw,sw}() to inform of changes in
+ * the hardware or software RF kill switches. When the stack wants to
+ * turn the radio off, it will call wimax_dev->op_rfkill_sw_toggle(),
+ * which the driver implements.
+ *
+ * User space can set the software RF Kill switch by calling
+ * wimax_rfkill().
+ *
+ * The code for now only supports devices that don't require polling;
+ * If the device needs to be polled, create a self-rearming delayed
+ * work struct for polling or look into adding polled support to the
+ * WiMAX stack.
+ *
+ * When initializing the hardware (_probe), after calling
+ * wimax_dev_add(), query the device for it's RF Kill switches status
+ * and feed it back to the WiMAX stack using
+ * wimax_report_rfkill_{hw,sw}(). If any switch is missing, always
+ * report it as ON.
+ *
+ * NOTE: the wimax stack uses an inverted terminology to that of the
+ * RFKILL subsystem:
+ *
+ *  - ON: radio is ON, RFKILL is DISABLED or OFF.
+ *  - OFF: radio is OFF, RFKILL is ENABLED or ON.
+ *
+ * MISCELLANEOUS OPS:
+ *
+ * wimax_reset() can be used to reset the device to power on state; by
+ * default it issues a warm reset that maintains the same device
+ * node. If that is not possible, it falls back to a cold reset
+ * (device reconnect). The driver implements the backend to this
+ * through wimax_dev->op_reset().
+ */
+
+#ifndef __NET__WIMAX_H__
+#define __NET__WIMAX_H__
+#ifdef __KERNEL__
+
+#include <linux/wimax.h>
+#include <net/genetlink.h>
+#include <linux/netdevice.h>
+
+struct net_device;
+struct genl_info;
+struct wimax_dev;
+struct input_dev;
+
+/**
+ * struct wimax_dev - Generic WiMAX device
+ *
+ * @net_dev: [fill] Pointer to the &struct net_device this WiMAX
+ *     device implements.
+ *
+ * @op_msg_from_user: [fill] Driver-specific operation to
+ *     handle a raw message from user space to the driver. The
+ *     driver can send messages to user space using with
+ *     wimax_msg_to_user().
+ *
+ * @op_rfkill_sw_toggle: [fill] Driver-specific operation to act on
+ *     userspace (or any other agent) requesting the WiMAX device to
+ *     change the RF Kill software switch (WIMAX_RF_ON or
+ *     WIMAX_RF_OFF).
+ *     If such hardware support is not present, it is assumed the
+ *     radio cannot be switched off and it is always on (and the stack
+ *     will error out when trying to switch it off). In such case,
+ *     this function pointer can be left as NULL.
+ *
+ * @op_reset: [fill] Driver specific operation to reset the
+ *     device.
+ *     This operation should always attempt first a warm reset that
+ *     does not disconnect the device from the bus and return 0.
+ *     If that fails, it should resort to some sort of cold or bus
+ *     reset (even if it implies a bus disconnection and device
+ *     dissapearance). In that case, -ENODEV should be returned to
+ *     indicate the device is gone.
+ *     This operation has to be synchronous, and return only when the
+ *     reset is complete. In case of having had to resort to bus/cold
+ *     reset implying a device disconnection, the call is allowed to
+ *     return inmediately.
+ *     NOTE: wimax_dev->mutex is NOT locked when this op is being
+ *     called; however, wimax_dev->mutex_reset IS locked to ensure
+ *     serialization of calls to wimax_reset().
+ *     See wimax_reset()'s documentation.
+ *
+ * @name: [fill] A way to identify this device. We need to register a
+ *     name with many subsystems (input for RFKILL, workqueue
+ *     creation, etc). We can't use the network device name as that
+ *     might change and in some instances we don't know it yet (until
+ *     we don't call register_netdev()). So we generate an unique one
+ *     using the driver name and device bus id, place it here and use
+ *     it across the board. Recommended naming:
+ *     DRIVERNAME-BUSNAME:BUSID (dev->bus->name, dev->bus_id).
+ *
+ * @id_table_node: [private] link to the list of wimax devices kept by
+ *     id-table.c. Protected by it's own spinlock.
+ *
+ * @mutex: [private] Serializes all concurrent access and execution of
+ *     operations.
+ *
+ * @mutex_reset: [private] Serializes reset operations. Needs to be a
+ *     different mutex because as part of the reset operation, the
+ *     driver has to call back into the stack to do things such as
+ *     state change, that require wimax_dev->mutex.
+ *
+ * @state: [private] Current state of the WiMAX device.
+ *
+ * @rfkill: [private] integration into the RF-Kill infrastructure.
+ *
+ * @rfkill_input: [private] virtual input device to process the
+ *     hardware RF Kill switches.
+ *
+ * @rf_sw: [private] State of the software radio switch (OFF/ON)
+ *
+ * @rf_hw: [private] State of the hardware radio switch (OFF/ON)
+ *
+ * Description:
+ * This structure defines a common interface to access all WiMAX
+ * devices from different vendors and provides a common API as well as
+ * a free-form device-specific messaging channel.
+ *
+ * Usage:
+ *  1. Embed a &struct wimax_dev at *the beginning* the network
+ *     device structure so that netdev_priv() points to it.
+ *
+ *  2. memset() it to zero
+ *
+ *  3. Initialize with wimax_dev_init(). This will leave the WiMAX
+ *     device in the %__WIMAX_ST_NULL state.
+ *
+ *  4. Fill all the fields marked with [fill]; once called
+ *     wimax_dev_add(), those fields CANNOT be modified.
+ *
+ *  5. Call wimax_dev_add() *after* registering the network
+ *     device. This will leave the WiMAX device in the %WIMAX_ST_DOWN
+ *     state.
+ *     Protect the driver's net_device->open() against succeeding if
+ *     the wimax device state is lower than %WIMAX_ST_DOWN.
+ *
+ *  6. Select when the device is going to be turned on/initialized;
+ *     for example, it could be initialized on 'ifconfig up' (when the
+ *     netdev op 'open()' is called on the driver).
+ *
+ * When the device is initialized (at `ifconfig up` time, or right
+ * after calling wimax_dev_add() from _probe(), make sure the
+ * following steps are taken
+ *
+ *  a. Move the device to %WIMAX_ST_UNINITIALIZED. This is needed so
+ *     some API calls that shouldn't work until the device is ready
+ *     can be blocked.
+ *
+ *  b. Initialize the device. Make sure to turn the SW radio switch
+ *     off and move the device to state %WIMAX_ST_RADIO_OFF when
+ *     done. When just initialized, a device should be left in RADIO
+ *     OFF state until user space devices to turn it on.
+ *
+ *  c. Query the device for the state of the hardware rfkill switch
+ *     and call wimax_rfkill_report_hw() and wimax_rfkill_report_sw()
+ *     as needed. See below.
+ *
+ * wimax_dev_rm() undoes before unregistering the network device. Once
+ * wimax_dev_add() is called, the driver can get called on the
+ * wimax_dev->op_* function pointers
+ *
+ * CONCURRENCY:
+ *
+ * The stack provides a mutex for each device that will disallow API
+ * calls happening concurrently; thus, op calls into the driver
+ * through the wimax_dev->op*() function pointers will always be
+ * serialized and *never* concurrent.
+ *
+ * For locking, take wimax_dev->mutex is taken; (most) operations in
+ * the API have to check for wimax_dev_is_ready() to return 0 before
+ * continuing (this is done internally).
+ *
+ * REFERENCE COUNTING:
+ *
+ * The WiMAX device is reference counted by the associated network
+ * device. The only operation that can be used to reference the device
+ * is wimax_dev_get_by_genl_info(), and the reference it acquires has
+ * to be released with dev_put(wimax_dev->net_dev).
+ *
+ * RFKILL:
+ *
+ * At startup, both HW and SW radio switchess are assumed to be off.
+ *
+ * At initialization time [after calling wimax_dev_add()], have the
+ * driver query the device for the status of the software and hardware
+ * RF kill switches and call wimax_report_rfkill_hw() and
+ * wimax_rfkill_report_sw() to indicate their state. If any is
+ * missing, just call it to indicate it is ON (radio always on).
+ *
+ * Whenever the driver detects a change in the state of the RF kill
+ * switches, it should call wimax_report_rfkill_hw() or
+ * wimax_report_rfkill_sw() to report it to the stack.
+ */
+struct wimax_dev {
+	struct net_device *net_dev;
+	struct list_head id_table_node;
+	struct mutex mutex;		/* Protects all members and API calls */
+	struct mutex mutex_reset;
+	enum wimax_st state;
+
+	int (*op_msg_from_user)(struct wimax_dev *wimax_dev,
+				const char *,
+				const void *, size_t,
+				const struct genl_info *info);
+	int (*op_rfkill_sw_toggle)(struct wimax_dev *wimax_dev,
+				   enum wimax_rf_state);
+	int (*op_reset)(struct wimax_dev *wimax_dev);
+
+	struct rfkill *rfkill;
+	struct input_dev *rfkill_input;
+	unsigned rf_hw;
+	unsigned rf_sw;
+	char name[32];
+
+	struct dentry *debugfs_dentry;
+};
+
+
+
+/*
+ * WiMAX stack public API for device drivers
+ * -----------------------------------------
+ *
+ * These functions are not exported to user space.
+ */
+extern void wimax_dev_init(struct wimax_dev *);
+extern int wimax_dev_add(struct wimax_dev *, struct net_device *);
+extern void wimax_dev_rm(struct wimax_dev *);
+
+static inline
+struct wimax_dev *net_dev_to_wimax(struct net_device *net_dev)
+{
+	return netdev_priv(net_dev);
+}
+
+static inline
+struct device *wimax_dev_to_dev(struct wimax_dev *wimax_dev)
+{
+	return wimax_dev->net_dev->dev.parent;
+}
+
+extern void wimax_state_change(struct wimax_dev *, enum wimax_st);
+extern enum wimax_st wimax_state_get(struct wimax_dev *);
+
+/*
+ * Radio Switch state reporting.
+ *
+ * enum wimax_rf_state is declared in linux/wimax.h so the exports
+ * to user space can use it.
+ */
+extern void wimax_report_rfkill_hw(struct wimax_dev *, enum wimax_rf_state);
+extern void wimax_report_rfkill_sw(struct wimax_dev *, enum wimax_rf_state);
+
+
+/*
+ * Free-form messaging to/from user space
+ *
+ * Sending a message:
+ *
+ *   wimax_msg(wimax_dev, pipe_name, buf, buf_size, GFP_KERNEL);
+ *
+ * Broken up:
+ *
+ *   skb = wimax_msg_alloc(wimax_dev, pipe_name, buf_size, GFP_KERNEL);
+ *   ...fill up skb...
+ *   wimax_msg_send(wimax_dev, pipe_name, skb);
+ *
+ * Be sure not to modify skb->data in the middle (ie: don't use
+ * skb_push()/skb_pull()/skb_reserve() on the skb).
+ *
+ * "pipe_name" is any string, than can be interpreted as the name of
+ * the pipe or destinatary; the interpretation of it is driver
+ * specific, so the recipient can multiplex it as wished. It can be
+ * NULL, it won't be used - an example is using a "diagnostics" tag to
+ * send diagnostics information that a device-specific diagnostics
+ * tool would be interested in.
+ */
+extern struct sk_buff *wimax_msg_alloc(struct wimax_dev *, const char *,
+				       const void *, size_t, gfp_t);
+extern int wimax_msg_send(struct wimax_dev *, struct sk_buff *);
+extern int wimax_msg(struct wimax_dev *, const char *,
+		     const void *, size_t, gfp_t);
+
+extern const void *wimax_msg_data_len(struct sk_buff *, size_t *);
+extern const void *wimax_msg_data(struct sk_buff *);
+extern ssize_t wimax_msg_len(struct sk_buff *);
+
+
+/*
+ * WiMAX stack user space API
+ * --------------------------
+ *
+ * This API is what gets exported to user space for general
+ * operations. As well, they can be called from within the kernel,
+ * (with a properly referenced `struct wimax_dev`).
+ *
+ * Properly referenced means: the 'struct net_device' that embeds the
+ * device's control structure and (as such) the 'struct wimax_dev' is
+ * referenced by the caller.
+ */
+extern int wimax_rfkill(struct wimax_dev *, enum wimax_rf_state);
+extern int wimax_reset(struct wimax_dev *);
+
+#else
+/* You might be looking for linux/wimax.h */
+#error This file should not be included from user space.
+#endif /* #ifdef __KERNEL__ */
+#endif /* #ifndef __NET__WIMAX_H__ */
-- 
cgit v1.2.3


From ea912f4e7f264981faf8665cfb63d46d7f948117 Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Sat, 20 Dec 2008 16:57:35 -0800
Subject: wimax: debug macros and debug settings for the WiMAX stack

This file contains a simple debug framework that is used in the stack;
it allows the debug level to be controlled at compile-time (so the
debug code is optimized out) and at run-time (for what wasn't compiled
out).

This is eventually going to be moved to use dynamic_printk(). Just
need to find time to do it.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/wimax/debug.h | 453 ++++++++++++++++++++++++++++++++++++++++++++
 net/wimax/debug-levels.h    |  42 ++++
 2 files changed, 495 insertions(+)
 create mode 100644 include/linux/wimax/debug.h
 create mode 100644 net/wimax/debug-levels.h

(limited to 'include/linux')

diff --git a/include/linux/wimax/debug.h b/include/linux/wimax/debug.h
new file mode 100644
index 000000000000..ba0c49399a83
--- /dev/null
+++ b/include/linux/wimax/debug.h
@@ -0,0 +1,453 @@
+/*
+ * Linux WiMAX
+ * Collection of tools to manage debug operations.
+ *
+ *
+ * Copyright (C) 2005-2007 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ *
+ * Don't #include this file directly, read on!
+ *
+ *
+ * EXECUTING DEBUGGING ACTIONS OR NOT
+ *
+ * The main thing this framework provides is decission power to take a
+ * debug action (like printing a message) if the current debug level
+ * allows it.
+ *
+ * The decission power is at two levels: at compile-time (what does
+ * not make it is compiled out) and at run-time. The run-time
+ * selection is done per-submodule (as they are declared by the user
+ * of the framework).
+ *
+ * A call to d_test(L) (L being the target debug level) returns true
+ * if the action should be taken because the current debug levels
+ * allow it (both compile and run time).
+ *
+ * It follows that a call to d_test() that can be determined to be
+ * always false at compile time will get the code depending on it
+ * compiled out by optimization.
+ *
+ *
+ * DEBUG LEVELS
+ *
+ * It is up to the caller to define how much a debugging level is.
+ *
+ * Convention sets 0 as "no debug" (so an action marked as debug level 0
+ * will always be taken). The increasing debug levels are used for
+ * increased verbosity.
+ *
+ *
+ * USAGE
+ *
+ * Group the code in modules and submodules inside each module [which
+ * in most cases maps to Linux modules and .c files that compose
+ * those].
+ *
+ *
+ * For each module, there is:
+ *
+ *  - a MODULENAME (single word, legal C identifier)
+ *
+ *  - a debug-levels.h header file that declares the list of
+ *    submodules and that is included by all .c files that use
+ *    the debugging tools. The file name can be anything.
+ *
+ *  - some (optional) .c code to manipulate the runtime debug levels
+ *    through debugfs.
+ *
+ * The debug-levels.h file would look like:
+ *
+ *     #ifndef __debug_levels__h__
+ *     #define __debug_levels__h__
+ *
+ *     #define D_MODULENAME modulename
+ *     #define D_MASTER 10
+ *
+ *     #include <linux/wimax/debug.h>
+ *
+ *     enum d_module {
+ *             D_SUBMODULE_DECLARE(submodule_1),
+ *             D_SUBMODULE_DECLARE(submodule_2),
+ *             ...
+ *             D_SUBMODULE_DECLARE(submodule_N)
+ *     };
+ *
+ *     #endif
+ *
+ * D_MASTER is the maximum compile-time debug level; any debug actions
+ * above this will be out. D_MODULENAME is the module name (legal C
+ * identifier), which has to be unique for each module (to avoid
+ * namespace collisions during linkage). Note those #defines need to
+ * be done before #including debug.h
+ *
+ * We declare N different submodules whose debug level can be
+ * independently controlled during runtime.
+ *
+ * In a .c file of the module (and only in one of them), define the
+ * following code:
+ *
+ *     struct d_level D_LEVEL[] = {
+ *             D_SUBMODULE_DEFINE(submodule_1),
+ *             D_SUBMODULE_DEFINE(submodule_2),
+ *             ...
+ *             D_SUBMODULE_DEFINE(submodule_N),
+ *     };
+ *     size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
+ *
+ * Externs for d_level_MODULENAME and d_level_size_MODULENAME are used
+ * and declared in this file using the D_LEVEL and D_LEVEL_SIZE macros
+ * #defined also in this file.
+ *
+ * To manipulate from user space the levels, create a debugfs dentry
+ * and then register each submodule with:
+ *
+ *     result = d_level_register_debugfs("PREFIX_", submodule_X, parent);
+ *     if (result < 0)
+ *            goto error;
+ *
+ * Where PREFIX_ is a name of your chosing. This will create debugfs
+ * file with a single numeric value that can be use to tweak it. To
+ * remove the entires, just use debugfs_remove_recursive() on 'parent'.
+ *
+ * NOTE: remember that even if this will show attached to some
+ *     particular instance of a device, the settings are *global*.
+ *
+ *
+ * On each submodule (for example, .c files), the debug infrastructure
+ * should be included like this:
+ *
+ *     #define D_SUBMODULE submodule_x     // matches one in debug-levels.h
+ *     #include "debug-levels.h"
+ *
+ * after #including all your include files.
+ *
+ *
+ * Now you can use the d_*() macros below [d_test(), d_fnstart(),
+ * d_fnend(), d_printf(), d_dump()].
+ *
+ * If their debug level is greater than D_MASTER, they will be
+ * compiled out.
+ *
+ * If their debug level is lower or equal than D_MASTER but greater
+ * than the current debug level of their submodule, they'll be
+ * ignored.
+ *
+ * Otherwise, the action will be performed.
+ */
+#ifndef __debug__h__
+#define __debug__h__
+
+#include <linux/types.h>
+#include <linux/device.h>
+
+
+/* Backend stuff */
+
+/*
+ * Debug backend: generate a message header from a 'struct device'
+ *
+ * @head: buffer where to place the header
+ * @head_size: length of @head
+ * @dev: pointer to device used to generate a header from. If NULL,
+ *     an empty ("") header is generated.
+ */
+static inline
+void __d_head(char *head, size_t head_size,
+	      struct device *dev)
+{
+	if (dev == NULL)
+		head[0] = 0;
+	else if ((unsigned long)dev < 4096) {
+		printk(KERN_ERR "E: Corrupt dev %p\n", dev);
+		WARN_ON(1);
+	} else
+		snprintf(head, head_size, "%s %s: ",
+			 dev_driver_string(dev), dev->bus_id);
+}
+
+
+/*
+ * Debug backend: log some message if debugging is enabled
+ *
+ * @l: intended debug level
+ * @tag: tag to prefix the message with
+ * @dev: 'struct device' associated to this message
+ * @f: printf-like format and arguments
+ *
+ * Note this is optimized out if it doesn't pass the compile-time
+ * check; however, it is *always* compiled. This is useful to make
+ * sure the printf-like formats and variables are always checked and
+ * they don't get bit rot if you have all the debugging disabled.
+ */
+#define _d_printf(l, tag, dev, f, a...)					\
+do {									\
+	char head[64];							\
+	if (!d_test(l))							\
+		break;							\
+	__d_head(head, sizeof(head), dev);				\
+	printk(KERN_ERR "%s%s%s: " f, head, __func__, tag, ##a);	\
+} while (0)
+
+
+/*
+ * CPP sintatic sugar to generate A_B like symbol names when one of
+ * the arguments is a a preprocessor #define.
+ */
+#define __D_PASTE__(varname, modulename) varname##_##modulename
+#define __D_PASTE(varname, modulename) (__D_PASTE__(varname, modulename))
+#define _D_SUBMODULE_INDEX(_name) (D_SUBMODULE_DECLARE(_name))
+
+
+/*
+ * Store a submodule's runtime debug level and name
+ */
+struct d_level {
+	u8 level;
+	const char *name;
+};
+
+
+/*
+ * List of available submodules and their debug levels
+ *
+ * We call them d_level_MODULENAME and d_level_size_MODULENAME; the
+ * macros D_LEVEL and D_LEVEL_SIZE contain the name already for
+ * convenience.
+ *
+ * This array and the size are defined on some .c file that is part of
+ * the current module.
+ */
+#define D_LEVEL __D_PASTE(d_level, D_MODULENAME)
+#define D_LEVEL_SIZE __D_PASTE(d_level_size, D_MODULENAME)
+
+extern struct d_level D_LEVEL[];
+extern size_t D_LEVEL_SIZE;
+
+
+/*
+ * Frontend stuff
+ *
+ *
+ * Stuff you need to declare prior to using the actual "debug" actions
+ * (defined below).
+ */
+
+#ifndef D_MODULENAME
+#error D_MODULENAME is not defined in your debug-levels.h file
+/**
+ * D_MODULE - Name of the current module
+ *
+ * #define in your module's debug-levels.h, making sure it is
+ * unique. This has to be a legal C identifier.
+ */
+#define D_MODULENAME undefined_modulename
+#endif
+
+
+#ifndef D_MASTER
+#warning D_MASTER not defined, but debug.h included! [see docs]
+/**
+ * D_MASTER - Compile time maximum debug level
+ *
+ * #define in your debug-levels.h file to the maximum debug level the
+ * runtime code will be allowed to have. This allows you to provide a
+ * main knob.
+ *
+ * Anything above that level will be optimized out of the compile.
+ *
+ * Defaults to zero (no debug code compiled in).
+ *
+ * Maximum one definition per module (at the debug-levels.h file).
+ */
+#define D_MASTER 0
+#endif
+
+#ifndef D_SUBMODULE
+#error D_SUBMODULE not defined, but debug.h included! [see docs]
+/**
+ * D_SUBMODULE - Name of the current submodule
+ *
+ * #define in your submodule .c file before #including debug-levels.h
+ * to the name of the current submodule as previously declared and
+ * defined with D_SUBMODULE_DECLARE() (in your module's
+ * debug-levels.h) and D_SUBMODULE_DEFINE().
+ *
+ * This is used to provide runtime-control over the debug levels.
+ *
+ * Maximum one per .c file! Can be shared among different .c files
+ * (meaning they belong to the same submodule categorization).
+ */
+#define D_SUBMODULE undefined_module
+#endif
+
+
+/**
+ * D_SUBMODULE_DECLARE - Declare a submodule for runtime debug level control
+ *
+ * @_name: name of the submodule, restricted to the chars that make up a
+ *     valid C identifier ([a-zA-Z0-9_]).
+ *
+ * Declare in the module's debug-levels.h header file as:
+ *
+ * enum d_module {
+ *         D_SUBMODULE_DECLARE(submodule_1),
+ *         D_SUBMODULE_DECLARE(submodule_2),
+ *         D_SUBMODULE_DECLARE(submodule_3),
+ * };
+ *
+ * Some corresponding .c file needs to have a matching
+ * D_SUBMODULE_DEFINE().
+ */
+#define D_SUBMODULE_DECLARE(_name) __D_SUBMODULE_##_name
+
+
+/**
+ * D_SUBMODULE_DEFINE - Define a submodule for runtime debug level control
+ *
+ * @_name: name of the submodule, restricted to the chars that make up a
+ *     valid C identifier ([a-zA-Z0-9_]).
+ *
+ * Use once per module (in some .c file) as:
+ *
+ * static
+ * struct d_level d_level_SUBMODULENAME[] = {
+ *         D_SUBMODULE_DEFINE(submodule_1),
+ *         D_SUBMODULE_DEFINE(submodule_2),
+ *         D_SUBMODULE_DEFINE(submodule_3),
+ * };
+ * size_t d_level_size_SUBDMODULENAME = ARRAY_SIZE(d_level_SUBDMODULENAME);
+ *
+ * Matching D_SUBMODULE_DECLARE()s have to be present in a
+ * debug-levels.h header file.
+ */
+#define D_SUBMODULE_DEFINE(_name)		\
+[__D_SUBMODULE_##_name] = {			\
+	.level = 0,				\
+	.name = #_name				\
+}
+
+
+
+/* The actual "debug" operations */
+
+
+/**
+ * d_test - Returns true if debugging should be enabled
+ *
+ * @l: intended debug level (unsigned)
+ *
+ * If the master debug switch is enabled and the current settings are
+ * higher or equal to the requested level, then debugging
+ * output/actions should be enabled.
+ *
+ * NOTE:
+ *
+ * This needs to be coded so that it can be evaluated in compile
+ * time; this is why the ugly BUG_ON() is placed in there, so the
+ * D_MASTER evaluation compiles all out if it is compile-time false.
+ */
+#define d_test(l)							\
+({									\
+	unsigned __l = l;	/* type enforcer */			\
+	(D_MASTER) >= __l						\
+	&& ({								\
+		BUG_ON(_D_SUBMODULE_INDEX(D_SUBMODULE) >= D_LEVEL_SIZE);\
+		D_LEVEL[_D_SUBMODULE_INDEX(D_SUBMODULE)].level >= __l;	\
+	});								\
+})
+
+
+/**
+ * d_fnstart - log message at function start if debugging enabled
+ *
+ * @l: intended debug level
+ * @_dev: 'struct device' pointer, NULL if none (for context)
+ * @f: printf-like format and arguments
+ */
+#define d_fnstart(l, _dev, f, a...) _d_printf(l, " FNSTART", _dev, f, ## a)
+
+
+/**
+ * d_fnend - log message at function end if debugging enabled
+ *
+ * @l: intended debug level
+ * @_dev: 'struct device' pointer, NULL if none (for context)
+ * @f: printf-like format and arguments
+ */
+#define d_fnend(l, _dev, f, a...) _d_printf(l, " FNEND", _dev, f, ## a)
+
+
+/**
+ * d_printf - log message if debugging enabled
+ *
+ * @l: intended debug level
+ * @_dev: 'struct device' pointer, NULL if none (for context)
+ * @f: printf-like format and arguments
+ */
+#define d_printf(l, _dev, f, a...) _d_printf(l, "", _dev, f, ## a)
+
+
+/**
+ * d_dump - log buffer hex dump if debugging enabled
+ *
+ * @l: intended debug level
+ * @_dev: 'struct device' pointer, NULL if none (for context)
+ * @f: printf-like format and arguments
+ */
+#define d_dump(l, dev, ptr, size)			\
+do {							\
+	char head[64];					\
+	if (!d_test(l))					\
+		break;					\
+	__d_head(head, sizeof(head), dev);		\
+	print_hex_dump(KERN_ERR, head, 0, 16, 1,	\
+		       ((void *) ptr), (size), 0);	\
+} while (0)
+
+
+/**
+ * Export a submodule's debug level over debugfs as PREFIXSUBMODULE
+ *
+ * @prefix: string to prefix the name with
+ * @submodule: name of submodule (not a string, just the name)
+ * @dentry: debugfs parent dentry
+ *
+ * Returns: 0 if ok, < 0 errno on error.
+ *
+ * For removing, just use debugfs_remove_recursive() on the parent.
+ */
+#define d_level_register_debugfs(prefix, name, parent)			\
+({									\
+	int rc;								\
+	struct dentry *fd;						\
+	struct dentry *verify_parent_type = parent;			\
+	fd = debugfs_create_u8(						\
+		prefix #name, 0600, verify_parent_type,			\
+		&(D_LEVEL[__D_SUBMODULE_ ## name].level));		\
+	rc = PTR_ERR(fd);						\
+	if (IS_ERR(fd) && rc != -ENODEV)				\
+		printk(KERN_ERR "%s: Can't create debugfs entry %s: "	\
+		       "%d\n", __func__, prefix #name, rc);		\
+	else								\
+		rc = 0;							\
+	rc;								\
+})
+
+
+#endif /* #ifndef __debug__h__ */
diff --git a/net/wimax/debug-levels.h b/net/wimax/debug-levels.h
new file mode 100644
index 000000000000..1c29123a3aa9
--- /dev/null
+++ b/net/wimax/debug-levels.h
@@ -0,0 +1,42 @@
+/*
+ * Linux WiMAX Stack
+ * Debug levels control file for the wimax module
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+#ifndef __debug_levels__h__
+#define __debug_levels__h__
+
+/* Maximum compile and run time debug level for all submodules */
+#define D_MODULENAME wimax
+#define D_MASTER CONFIG_WIMAX_DEBUG_LEVEL
+
+#include <linux/wimax/debug.h>
+
+/* List of all the enabled modules */
+enum d_module {
+	D_SUBMODULE_DECLARE(debugfs),
+	D_SUBMODULE_DECLARE(id_table),
+	D_SUBMODULE_DECLARE(op_msg),
+	D_SUBMODULE_DECLARE(op_reset),
+	D_SUBMODULE_DECLARE(op_rfkill),
+	D_SUBMODULE_DECLARE(stack),
+};
+
+#endif /* #ifndef __debug_levels__h__ */
-- 
cgit v1.2.3


From ea24652d253eabfb83e955e55ce032228d9d99b9 Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Sat, 20 Dec 2008 16:57:43 -0800
Subject: i2400m: host/device procotol and core driver definitions

The wimax/i2400m.h defines the structures and constants for the
host-device protocols:

 - boot / firmware upload protocol

 - general data transport protocol

 - control protocol

It is done in such a way that can also be used verbatim by user space.

drivers/net/wimax/i2400m.h defines all the APIs used by the core,
bus-generic driver (i2400m) and the bus specific drivers
(i2400m-BUSNAME). It also gives a roadmap to the driver
implementation.

debug-levels.h adds the core driver's debug settings.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/net/wimax/i2400m/debug-levels.h |  45 ++
 drivers/net/wimax/i2400m/i2400m.h       | 755 ++++++++++++++++++++++++++++++++
 include/linux/wimax/i2400m.h            | 512 ++++++++++++++++++++++
 3 files changed, 1312 insertions(+)
 create mode 100644 drivers/net/wimax/i2400m/debug-levels.h
 create mode 100644 drivers/net/wimax/i2400m/i2400m.h
 create mode 100644 include/linux/wimax/i2400m.h

(limited to 'include/linux')

diff --git a/drivers/net/wimax/i2400m/debug-levels.h b/drivers/net/wimax/i2400m/debug-levels.h
new file mode 100644
index 000000000000..3183baa16a52
--- /dev/null
+++ b/drivers/net/wimax/i2400m/debug-levels.h
@@ -0,0 +1,45 @@
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * Debug levels control file for the i2400m module
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+#ifndef __debug_levels__h__
+#define __debug_levels__h__
+
+/* Maximum compile and run time debug level for all submodules */
+#define D_MODULENAME i2400m
+#define D_MASTER CONFIG_WIMAX_I2400M_DEBUG_LEVEL
+
+#include <linux/wimax/debug.h>
+
+/* List of all the enabled modules */
+enum d_module {
+	D_SUBMODULE_DECLARE(control),
+	D_SUBMODULE_DECLARE(driver),
+	D_SUBMODULE_DECLARE(debugfs),
+	D_SUBMODULE_DECLARE(fw),
+	D_SUBMODULE_DECLARE(netdev),
+	D_SUBMODULE_DECLARE(rfkill),
+	D_SUBMODULE_DECLARE(rx),
+	D_SUBMODULE_DECLARE(tx),
+};
+
+
+#endif /* #ifndef __debug_levels__h__ */
diff --git a/drivers/net/wimax/i2400m/i2400m.h b/drivers/net/wimax/i2400m/i2400m.h
new file mode 100644
index 000000000000..067c871cc226
--- /dev/null
+++ b/drivers/net/wimax/i2400m/i2400m.h
@@ -0,0 +1,755 @@
+/*
+ * Intel Wireless WiMAX Connection 2400m
+ * Declarations for bus-generic internal APIs
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ * Yanir Lubetkin <yanirx.lubetkin@intel.com>
+ *  - Initial implementation
+ *
+ *
+ * GENERAL DRIVER ARCHITECTURE
+ *
+ * The i2400m driver is split in the following two major parts:
+ *
+ *  - bus specific driver
+ *  - bus generic driver (this part)
+ *
+ * The bus specific driver sets up stuff specific to the bus the
+ * device is connected to (USB, SDIO, PCI, tam-tam...non-authoritative
+ * nor binding list) which is basically the device-model management
+ * (probe/disconnect, etc), moving data from device to kernel and
+ * back, doing the power saving details and reseting the device.
+ *
+ * For details on each bus-specific driver, see it's include file,
+ * i2400m-BUSNAME.h
+ *
+ * The bus-generic functionality break up is:
+ *
+ *  - Firmware upload: fw.c - takes care of uploading firmware to the
+ *        device. bus-specific driver just needs to provides a way to
+ *        execute boot-mode commands and to reset the device.
+ *
+ *  - RX handling: rx.c - receives data from the bus-specific code and
+ *        feeds it to the network or WiMAX stack or uses it to modify
+ *        the driver state. bus-specific driver only has to receive
+ *        frames and pass them to this module.
+ *
+ *  - TX handling: tx.c - manages the TX FIFO queue and provides means
+ *        for the bus-specific TX code to pull data from the FIFO
+ *        queue. bus-specific code just pulls frames from this module
+ *        to sends them to the device.
+ *
+ *  - netdev glue: netdev.c - interface with Linux networking
+ *        stack. Pass around data frames, and configure when the
+ *        device is up and running or shutdown (through ifconfig up /
+ *        down). Bus-generic only.
+ *
+ *  - control ops: control.c - implements various commmands for
+ *        controlling the device. bus-generic only.
+ *
+ *  - device model glue: driver.c - implements helpers for the
+ *        device-model glue done by the bus-specific layer
+ *        (setup/release the driver resources), turning the device on
+ *        and off, handling the device reboots/resets and a few simple
+ *        WiMAX stack ops.
+ *
+ * Code is also broken up in linux-glue / device-glue.
+ *
+ * Linux glue contains functions that deal mostly with gluing with the
+ * rest of the Linux kernel.
+ *
+ * Device-glue are functions that deal mostly with the way the device
+ * does things and talk the device's language.
+ *
+ * device-glue code is licensed BSD so other open source OSes can take
+ * it to implement their drivers.
+ *
+ *
+ * APIs AND HEADER FILES
+ *
+ * This bus generic code exports three APIs:
+ *
+ *  - HDI (host-device interface) definitions common to all busses
+ *    (include/linux/wimax/i2400m.h); these can be also used by user
+ *    space code.
+ *  - internal API for the bus-generic code
+ *  - external API for the bus-specific drivers
+ *
+ *
+ * LIFE CYCLE:
+ *
+ * When the bus-specific driver probes, it allocates a network device
+ * with enough space for it's data structue, that must contain a
+ * &struct i2400m at the top.
+ *
+ * On probe, it needs to fill the i2400m members marked as [fill], as
+ * well as i2400m->wimax_dev.net_dev and call i2400m_setup(). The
+ * i2400m driver will only register with the WiMAX and network stacks;
+ * the only access done to the device is to read the MAC address so we
+ * can register a network device. This calls i2400m_dev_start() to
+ * load firmware, setup communication with the device and configure it
+ * for operation.
+ *
+ * At this point, control and data communications are possible.
+ *
+ * On disconnect/driver unload, the bus-specific disconnect function
+ * calls i2400m_release() to undo i2400m_setup(). i2400m_dev_stop()
+ * shuts the firmware down and releases resources uses to communicate
+ * with the device.
+ *
+ * While the device is up, it might reset. The bus-specific driver has
+ * to catch that situation and call i2400m_dev_reset_handle() to deal
+ * with it (reset the internal driver structures and go back to square
+ * one).
+ */
+
+#ifndef __I2400M_H__
+#define __I2400M_H__
+
+#include <linux/usb.h>
+#include <linux/netdevice.h>
+#include <linux/completion.h>
+#include <linux/rwsem.h>
+#include <asm/atomic.h>
+#include <net/wimax.h>
+#include <linux/wimax/i2400m.h>
+#include <asm/byteorder.h>
+
+/* Misc constants */
+enum {
+	/* Firmware uploading */
+	I2400M_BOOT_RETRIES = 3,
+	/* Size of the Boot Mode Command buffer */
+	I2400M_BM_CMD_BUF_SIZE = 16 * 1024,
+	I2400M_BM_ACK_BUF_SIZE = 256,
+};
+
+
+/* Firmware version we request when pulling the fw image file */
+#define I2400M_FW_VERSION "1.3"
+
+
+/**
+ * i2400m_reset_type - methods to reset a device
+ *
+ * @I2400M_RT_WARM: Reset without device disconnection, device handles
+ *     are kept valid but state is back to power on, with firmware
+ *     re-uploaded.
+ * @I2400M_RT_COLD: Tell the device to disconnect itself from the bus
+ *     and reconnect. Renders all device handles invalid.
+ * @I2400M_RT_BUS: Tells the bus to reset the device; last measure
+ *     used when both types above don't work.
+ */
+enum i2400m_reset_type {
+	I2400M_RT_WARM,	/* first measure */
+	I2400M_RT_COLD,	/* second measure */
+	I2400M_RT_BUS,	/* call in artillery */
+};
+
+struct i2400m_reset_ctx;
+
+/**
+ * struct i2400m - descriptor for an Intel 2400m
+ *
+ * Members marked with [fill] must be filled out/initialized before
+ * calling i2400m_setup().
+ *
+ * @bus_tx_block_size: [fill] SDIO imposes a 256 block size, USB 16,
+ *     so we have a tx_blk_size variable that the bus layer sets to
+ *     tell the engine how much of that we need.
+ *
+ * @bus_pl_size_max: [fill] Maximum payload size.
+ *
+ * @bus_dev_start: [fill] Function called by the bus-generic code
+ *     [i2400m_dev_start()] to setup the bus-specific communications
+ *     to the the device. See LIFE CYCLE above.
+ *
+ *     NOTE: Doesn't need to upload the firmware, as that is taken
+ *     care of by the bus-generic code.
+ *
+ * @bus_dev_stop: [fill] Function called by the bus-generic code
+ *     [i2400m_dev_stop()] to shutdown the bus-specific communications
+ *     to the the device. See LIFE CYCLE above.
+ *
+ *     This function does not need to reset the device, just tear down
+ *     all the host resources created to  handle communication with
+ *     the device.
+ *
+ * @bus_tx_kick: [fill] Function called by the bus-generic code to let
+ *     the bus-specific code know that there is data available in the
+ *     TX FIFO for transmission to the device.
+ *
+ *     This function cannot sleep.
+ *
+ * @bus_reset: [fill] Function called by the bus-generic code to reset
+ *     the device in in various ways. Doesn't need to wait for the
+ *     reset to finish.
+ *
+ *     If warm or cold reset fail, this function is expected to do a
+ *     bus-specific reset (eg: USB reset) to get the device to a
+ *     working state (even if it implies device disconecction).
+ *
+ *     Note the warm reset is used by the firmware uploader to
+ *     reinitialize the device.
+ *
+ *     IMPORTANT: this is called very early in the device setup
+ *     process, so it cannot rely on common infrastructure being laid
+ *     out.
+ *
+ * @bus_bm_cmd_send: [fill] Function called to send a boot-mode
+ *     command. Flags are defined in 'enum i2400m_bm_cmd_flags'. This
+ *     is synchronous and has to return 0 if ok or < 0 errno code in
+ *     any error condition.
+ *
+ * @bus_bm_wait_for_ack: [fill] Function called to wait for a
+ *     boot-mode notification (that can be a response to a previously
+ *     issued command or an asynchronous one). Will read until all the
+ *     indicated size is read or timeout. Reading more or less data
+ *     than asked for is an error condition. Return 0 if ok, < 0 errno
+ *     code on error.
+ *
+ *     The caller to this function will check if the response is a
+ *     barker that indicates the device going into reset mode.
+ *
+ * @bus_fw_name: [fill] name of the firmware image (in most cases,
+ *     they are all the same for a single release, except that they
+ *     have the type of the bus embedded in the name (eg:
+ *     i2400m-fw-X-VERSION.sbcf, where X is the bus name).
+ *
+ * @bus_bm_mac_addr_impaired: [fill] Set to true if the device's MAC
+ *     address provided in boot mode is kind of broken and needs to
+ *     be re-read later on.
+ *
+ *
+ * @wimax_dev: WiMAX generic device for linkage into the kernel WiMAX
+ *     stack. Due to the way a net_device is allocated, we need to
+ *     force this to be the first field so that we can get from
+ *     netdev_priv() the right pointer.
+ *
+ * @state: device's state (as reported by it)
+ *
+ * @state_wq: waitqueue that is woken up whenever the state changes
+ *
+ * @tx_lock: spinlock to protect TX members
+ *
+ * @tx_buf: FIFO buffer for TX; we queue data here
+ *
+ * @tx_in: FIFO index for incoming data. Note this doesn't wrap around
+ *     and it is always greater than @tx_out.
+ *
+ * @tx_out: FIFO index for outgoing data
+ *
+ * @tx_msg: current TX message that is active in the FIFO for
+ *     appending payloads.
+ *
+ * @tx_sequence: current sequence number for TX messages from the
+ *     device to the host.
+ *
+ * @tx_msg_size: size of the current message being transmitted by the
+ *     bus-specific code.
+ *
+ * @tx_pl_num: total number of payloads sent
+ *
+ * @tx_pl_max: maximum number of payloads sent in a TX message
+ *
+ * @tx_pl_min: minimum number of payloads sent in a TX message
+ *
+ * @tx_num: number of TX messages sent
+ *
+ * @tx_size_acc: number of bytes in all TX messages sent
+ *     (this is different to net_dev's statistics as it also counts
+ *     control messages).
+ *
+ * @tx_size_min: smallest TX message sent.
+ *
+ * @tx_size_max: biggest TX message sent.
+ *
+ * @rx_lock: spinlock to protect RX members
+ *
+ * @rx_pl_num: total number of payloads received
+ *
+ * @rx_pl_max: maximum number of payloads received in a RX message
+ *
+ * @rx_pl_min: minimum number of payloads received in a RX message
+ *
+ * @rx_num: number of RX messages received
+ *
+ * @rx_size_acc: number of bytes in all RX messages received
+ *     (this is different to net_dev's statistics as it also counts
+ *     control messages).
+ *
+ * @rx_size_min: smallest RX message received.
+ *
+ * @rx_size_max: buggest RX message received.
+ *
+ * @init_mutex: Mutex used for serializing the device bringup
+ *     sequence; this way if the device reboots in the middle, we
+ *     don't try to do a bringup again while we are tearing down the
+ *     one that failed.
+ *
+ *     Can't reuse @msg_mutex because from within the bringup sequence
+ *     we need to send messages to the device and thus use @msg_mutex.
+ *
+ * @msg_mutex: mutex used to send control commands to the device (we
+ *     only allow one at a time, per host-device interface design).
+ *
+ * @msg_completion: used to wait for an ack to a control command sent
+ *     to the device.
+ *
+ * @ack_skb: used to store the actual ack to a control command if the
+ *     reception of the command was successful. Otherwise, a ERR_PTR()
+ *     errno code that indicates what failed with the ack reception.
+ *
+ *     Only valid after @msg_completion is woken up. Only updateable
+ *     if @msg_completion is armed. Only touched by
+ *     i2400m_msg_to_dev().
+ *
+ *     Protected by @rx_lock. In theory the command execution flow is
+ *     sequential, but in case the device sends an out-of-phase or
+ *     very delayed response, we need to avoid it trampling current
+ *     execution.
+ *
+ * @bm_cmd_buf: boot mode command buffer for composing firmware upload
+ *     commands.
+ *
+ *     USB can't r/w to stack, vmalloc, etc...as well, we end up
+ *     having to alloc/free a lot to compose commands, so we use these
+ *     for stagging and not having to realloc all the time.
+ *
+ *     This assumes the code always runs serialized. Only one thread
+ *     can call i2400m_bm_cmd() at the same time.
+ *
+ * @bm_ack_buf: boot mode acknoledge buffer for staging reception of
+ *     responses to commands.
+ *
+ *     See @bm_cmd_buf.
+ *
+ * @work_queue: work queue for processing device reports. This
+ *     workqueue cannot be used for processing TX or RX to the device,
+ *     as from it we'll process device reports, which might require
+ *     further communication with the device.
+ *
+ * @debugfs_dentry: hookup for debugfs files.
+ *     These have to be in a separate directory, a child of
+ *     (wimax_dev->debugfs_dentry) so they can be removed when the
+ *     module unloads, as we don't keep each dentry.
+ */
+struct i2400m {
+	struct wimax_dev wimax_dev;	/* FIRST! See doc */
+
+	unsigned updown:1;		/* Network device is up or down */
+	unsigned boot_mode:1;		/* is the device in boot mode? */
+	unsigned sboot:1;		/* signed or unsigned fw boot */
+	unsigned ready:1;		/* all probing steps done */
+	u8 trace_msg_from_user;		/* echo rx msgs to 'trace' pipe */
+					/* typed u8 so debugfs/u8 can tweak */
+	enum i2400m_system_state state;
+	wait_queue_head_t state_wq;	/* Woken up when on state updates */
+
+	size_t bus_tx_block_size;
+	size_t bus_pl_size_max;
+	int (*bus_dev_start)(struct i2400m *);
+	void (*bus_dev_stop)(struct i2400m *);
+	void (*bus_tx_kick)(struct i2400m *);
+	int (*bus_reset)(struct i2400m *, enum i2400m_reset_type);
+	ssize_t (*bus_bm_cmd_send)(struct i2400m *,
+				   const struct i2400m_bootrom_header *,
+				   size_t, int flags);
+	ssize_t (*bus_bm_wait_for_ack)(struct i2400m *,
+				       struct i2400m_bootrom_header *, size_t);
+	const char *bus_fw_name;
+	unsigned bus_bm_mac_addr_impaired:1;
+
+	spinlock_t tx_lock;		/* protect TX state */
+	void *tx_buf;
+	size_t tx_in, tx_out;
+	struct i2400m_msg_hdr *tx_msg;
+	size_t tx_sequence, tx_msg_size;
+	/* TX stats */
+	unsigned tx_pl_num, tx_pl_max, tx_pl_min,
+		tx_num, tx_size_acc, tx_size_min, tx_size_max;
+
+	/* RX stats */
+	spinlock_t rx_lock;		/* protect RX state */
+	unsigned rx_pl_num, rx_pl_max, rx_pl_min,
+		rx_num, rx_size_acc, rx_size_min, rx_size_max;
+
+	struct mutex msg_mutex;		/* serialize command execution */
+	struct completion msg_completion;
+	struct sk_buff *ack_skb;	/* protected by rx_lock */
+
+	void *bm_ack_buf;		/* for receiving acks over USB */
+	void *bm_cmd_buf;		/* for issuing commands over USB */
+
+	struct workqueue_struct *work_queue;
+
+	struct mutex init_mutex;	/* protect bringup seq */
+	struct i2400m_reset_ctx *reset_ctx;	/* protected by init_mutex */
+
+	struct work_struct wake_tx_ws;
+	struct sk_buff *wake_tx_skb;
+
+	struct dentry *debugfs_dentry;
+};
+
+
+/*
+ * Initialize a 'struct i2400m' from all zeroes
+ *
+ * This is a bus-generic API call.
+ */
+static inline
+void i2400m_init(struct i2400m *i2400m)
+{
+	wimax_dev_init(&i2400m->wimax_dev);
+
+	i2400m->boot_mode = 1;
+	init_waitqueue_head(&i2400m->state_wq);
+
+	spin_lock_init(&i2400m->tx_lock);
+	i2400m->tx_pl_min = UINT_MAX;
+	i2400m->tx_size_min = UINT_MAX;
+
+	spin_lock_init(&i2400m->rx_lock);
+	i2400m->rx_pl_min = UINT_MAX;
+	i2400m->rx_size_min = UINT_MAX;
+
+	mutex_init(&i2400m->msg_mutex);
+	init_completion(&i2400m->msg_completion);
+
+	mutex_init(&i2400m->init_mutex);
+	/* wake_tx_ws is initialized in i2400m_tx_setup() */
+}
+
+
+/*
+ * Bus-generic internal APIs
+ * -------------------------
+ */
+
+static inline
+struct i2400m *wimax_dev_to_i2400m(struct wimax_dev *wimax_dev)
+{
+	return container_of(wimax_dev, struct i2400m, wimax_dev);
+}
+
+static inline
+struct i2400m *net_dev_to_i2400m(struct net_device *net_dev)
+{
+	return wimax_dev_to_i2400m(netdev_priv(net_dev));
+}
+
+/*
+ * Boot mode support
+ */
+
+/**
+ * i2400m_bm_cmd_flags - flags to i2400m_bm_cmd()
+ *
+ * @I2400M_BM_CMD_RAW: send the command block as-is, without doing any
+ *     extra processing for adding CRC.
+ */
+enum i2400m_bm_cmd_flags {
+	I2400M_BM_CMD_RAW	= 1 << 2,
+};
+
+/**
+ * i2400m_bri - Boot-ROM indicators
+ *
+ * Flags for i2400m_bootrom_init() and i2400m_dev_bootstrap() [which
+ * are passed from things like i2400m_setup()]. Can be combined with
+ * |.
+ *
+ * @I2400M_BRI_SOFT: The device rebooted already and a reboot
+ *     barker received, proceed directly to ack the boot sequence.
+ * @I2400M_BRI_NO_REBOOT: Do not reboot the device and proceed
+ *     directly to wait for a reboot barker from the device.
+ * @I2400M_BRI_MAC_REINIT: We need to reinitialize the boot
+ *     rom after reading the MAC adress. This is quite a dirty hack,
+ *     if you ask me -- the device requires the bootrom to be
+ *     intialized after reading the MAC address.
+ */
+enum i2400m_bri {
+	I2400M_BRI_SOFT       = 1 << 1,
+	I2400M_BRI_NO_REBOOT  = 1 << 2,
+	I2400M_BRI_MAC_REINIT = 1 << 3,
+};
+
+extern void i2400m_bm_cmd_prepare(struct i2400m_bootrom_header *);
+extern int i2400m_dev_bootstrap(struct i2400m *, enum i2400m_bri);
+extern int i2400m_read_mac_addr(struct i2400m *);
+extern int i2400m_bootrom_init(struct i2400m *, enum i2400m_bri);
+
+/* Make/grok boot-rom header commands */
+
+static inline
+__le32 i2400m_brh_command(enum i2400m_brh_opcode opcode, unsigned use_checksum,
+			  unsigned direct_access)
+{
+	return cpu_to_le32(
+		I2400M_BRH_SIGNATURE
+		| (direct_access ? I2400M_BRH_DIRECT_ACCESS : 0)
+		| I2400M_BRH_RESPONSE_REQUIRED /* response always required */
+		| (use_checksum ? I2400M_BRH_USE_CHECKSUM : 0)
+		| (opcode & I2400M_BRH_OPCODE_MASK));
+}
+
+static inline
+void i2400m_brh_set_opcode(struct i2400m_bootrom_header *hdr,
+			   enum i2400m_brh_opcode opcode)
+{
+	hdr->command = cpu_to_le32(
+		(le32_to_cpu(hdr->command) & ~I2400M_BRH_OPCODE_MASK)
+		| (opcode & I2400M_BRH_OPCODE_MASK));
+}
+
+static inline
+unsigned i2400m_brh_get_opcode(const struct i2400m_bootrom_header *hdr)
+{
+	return le32_to_cpu(hdr->command) & I2400M_BRH_OPCODE_MASK;
+}
+
+static inline
+unsigned i2400m_brh_get_response(const struct i2400m_bootrom_header *hdr)
+{
+	return (le32_to_cpu(hdr->command) & I2400M_BRH_RESPONSE_MASK)
+		>> I2400M_BRH_RESPONSE_SHIFT;
+}
+
+static inline
+unsigned i2400m_brh_get_use_checksum(const struct i2400m_bootrom_header *hdr)
+{
+	return le32_to_cpu(hdr->command) & I2400M_BRH_USE_CHECKSUM;
+}
+
+static inline
+unsigned i2400m_brh_get_response_required(
+	const struct i2400m_bootrom_header *hdr)
+{
+	return le32_to_cpu(hdr->command) & I2400M_BRH_RESPONSE_REQUIRED;
+}
+
+static inline
+unsigned i2400m_brh_get_direct_access(const struct i2400m_bootrom_header *hdr)
+{
+	return le32_to_cpu(hdr->command) & I2400M_BRH_DIRECT_ACCESS;
+}
+
+static inline
+unsigned i2400m_brh_get_signature(const struct i2400m_bootrom_header *hdr)
+{
+	return (le32_to_cpu(hdr->command) & I2400M_BRH_SIGNATURE_MASK)
+		>> I2400M_BRH_SIGNATURE_SHIFT;
+}
+
+
+/*
+ * Driver / device setup and internal functions
+ */
+extern void i2400m_netdev_setup(struct net_device *net_dev);
+extern int i2400m_tx_setup(struct i2400m *);
+extern void i2400m_wake_tx_work(struct work_struct *);
+extern void i2400m_tx_release(struct i2400m *);
+
+extern void i2400m_net_rx(struct i2400m *, struct sk_buff *, unsigned,
+			  const void *, int);
+enum i2400m_pt;
+extern int i2400m_tx(struct i2400m *, const void *, size_t, enum i2400m_pt);
+
+#ifdef CONFIG_DEBUG_FS
+extern int i2400m_debugfs_add(struct i2400m *);
+extern void i2400m_debugfs_rm(struct i2400m *);
+#else
+static inline int i2400m_debugfs_add(struct i2400m *i2400m)
+{
+	return 0;
+}
+static inline void i2400m_debugfs_rm(struct i2400m *i2400m) {}
+#endif
+
+/* Called by _dev_start()/_dev_stop() to initialize the device itself */
+extern int i2400m_dev_initialize(struct i2400m *);
+extern void i2400m_dev_shutdown(struct i2400m *);
+
+extern struct attribute_group i2400m_dev_attr_group;
+
+extern int i2400m_schedule_work(struct i2400m *,
+				void (*)(struct work_struct *), gfp_t);
+
+/* HDI message's payload description handling */
+
+static inline
+size_t i2400m_pld_size(const struct i2400m_pld *pld)
+{
+	return I2400M_PLD_SIZE_MASK & le32_to_cpu(pld->val);
+}
+
+static inline
+enum i2400m_pt i2400m_pld_type(const struct i2400m_pld *pld)
+{
+	return (I2400M_PLD_TYPE_MASK & le32_to_cpu(pld->val))
+		>> I2400M_PLD_TYPE_SHIFT;
+}
+
+static inline
+void i2400m_pld_set(struct i2400m_pld *pld, size_t size,
+		    enum i2400m_pt type)
+{
+	pld->val = cpu_to_le32(
+		((type << I2400M_PLD_TYPE_SHIFT) & I2400M_PLD_TYPE_MASK)
+		|  (size & I2400M_PLD_SIZE_MASK));
+}
+
+
+/*
+ * API for the bus-specific drivers
+ * --------------------------------
+ */
+
+static inline
+struct i2400m *i2400m_get(struct i2400m *i2400m)
+{
+	dev_hold(i2400m->wimax_dev.net_dev);
+	return i2400m;
+}
+
+static inline
+void i2400m_put(struct i2400m *i2400m)
+{
+	dev_put(i2400m->wimax_dev.net_dev);
+}
+
+extern int i2400m_dev_reset_handle(struct i2400m *);
+
+/*
+ * _setup()/_release() are called by the probe/disconnect functions of
+ * the bus-specific drivers.
+ */
+extern int i2400m_setup(struct i2400m *, enum i2400m_bri bm_flags);
+extern void i2400m_release(struct i2400m *);
+
+extern int i2400m_rx(struct i2400m *, struct sk_buff *);
+extern struct i2400m_msg_hdr *i2400m_tx_msg_get(struct i2400m *, size_t *);
+extern void i2400m_tx_msg_sent(struct i2400m *);
+
+static const __le32 i2400m_NBOOT_BARKER[4] = {
+	__constant_cpu_to_le32(I2400M_NBOOT_BARKER),
+	__constant_cpu_to_le32(I2400M_NBOOT_BARKER),
+	__constant_cpu_to_le32(I2400M_NBOOT_BARKER),
+	__constant_cpu_to_le32(I2400M_NBOOT_BARKER)
+};
+
+static const __le32 i2400m_SBOOT_BARKER[4] = {
+	__constant_cpu_to_le32(I2400M_SBOOT_BARKER),
+	__constant_cpu_to_le32(I2400M_SBOOT_BARKER),
+	__constant_cpu_to_le32(I2400M_SBOOT_BARKER),
+	__constant_cpu_to_le32(I2400M_SBOOT_BARKER)
+};
+
+
+/*
+ * Utility functions
+ */
+
+static inline
+struct device *i2400m_dev(struct i2400m *i2400m)
+{
+	return i2400m->wimax_dev.net_dev->dev.parent;
+}
+
+/*
+ * Helper for scheduling simple work functions
+ *
+ * This struct can get any kind of payload attached (normally in the
+ * form of a struct where you pack the stuff you want to pass to the
+ * _work function).
+ */
+struct i2400m_work {
+	struct work_struct ws;
+	struct i2400m *i2400m;
+	u8 pl[0];
+};
+extern int i2400m_queue_work(struct i2400m *,
+			     void (*)(struct work_struct *), gfp_t,
+				const void *, size_t);
+
+extern int i2400m_msg_check_status(const struct i2400m_l3l4_hdr *,
+				   char *, size_t);
+extern int i2400m_msg_size_check(struct i2400m *,
+				 const struct i2400m_l3l4_hdr *, size_t);
+extern struct sk_buff *i2400m_msg_to_dev(struct i2400m *, const void *, size_t);
+extern void i2400m_msg_to_dev_cancel_wait(struct i2400m *, int);
+extern void i2400m_msg_ack_hook(struct i2400m *,
+				const struct i2400m_l3l4_hdr *, size_t);
+extern void i2400m_report_hook(struct i2400m *,
+			       const struct i2400m_l3l4_hdr *, size_t);
+extern int i2400m_cmd_enter_powersave(struct i2400m *);
+extern int i2400m_cmd_get_state(struct i2400m *);
+extern int i2400m_cmd_exit_idle(struct i2400m *);
+extern struct sk_buff *i2400m_get_device_info(struct i2400m *);
+extern int i2400m_firmware_check(struct i2400m *);
+extern int i2400m_set_init_config(struct i2400m *,
+				  const struct i2400m_tlv_hdr **, size_t);
+
+static inline
+struct usb_endpoint_descriptor *usb_get_epd(struct usb_interface *iface, int ep)
+{
+	return &iface->cur_altsetting->endpoint[ep].desc;
+}
+
+extern int i2400m_op_rfkill_sw_toggle(struct wimax_dev *,
+				      enum wimax_rf_state);
+extern void i2400m_report_tlv_rf_switches_status(
+	struct i2400m *, const struct i2400m_tlv_rf_switches_status *);
+
+
+/*
+ * Do a millisecond-sleep for allowing wireshark to dump all the data
+ * packets. Used only for debugging.
+ */
+static inline
+void __i2400m_msleep(unsigned ms)
+{
+#if 1
+#else
+	msleep(ms);
+#endif
+}
+
+/* Module parameters */
+
+extern int i2400m_idle_mode_disabled;
+
+
+#endif /* #ifndef __I2400M_H__ */
diff --git a/include/linux/wimax/i2400m.h b/include/linux/wimax/i2400m.h
new file mode 100644
index 000000000000..74198f5bb4dc
--- /dev/null
+++ b/include/linux/wimax/i2400m.h
@@ -0,0 +1,512 @@
+/*
+ * Intel Wireless WiMax Connection 2400m
+ * Host-Device protocol interface definitions
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *  - Initial implementation
+ *
+ *
+ * This header defines the data structures and constants used to
+ * communicate with the device.
+ *
+ * BOOTMODE/BOOTROM/FIRMWARE UPLOAD PROTOCOL
+ *
+ * The firmware upload protocol is quite simple and only requires a
+ * handful of commands. See drivers/net/wimax/i2400m/fw.c for more
+ * details.
+ *
+ * The BCF data structure is for the firmware file header.
+ *
+ *
+ * THE DATA / CONTROL PROTOCOL
+ *
+ * This is the normal protocol spoken with the device once the
+ * firmware is uploaded. It transports data payloads and control
+ * messages back and forth.
+ *
+ * It consists 'messages' that pack one or more payloads each. The
+ * format is described in detail in drivers/net/wimax/i2400m/rx.c and
+ * tx.c.
+ *
+ *
+ * THE L3L4 PROTOCOL
+ *
+ * The term L3L4 refers to Layer 3 (the device), Layer 4 (the
+ * driver/host software).
+ *
+ * This is the control protocol used by the host to control the i2400m
+ * device (scan, connect, disconnect...). This is sent to / received
+ * as control frames. These frames consist of a header and zero or
+ * more TLVs with information. We call each control frame a "message".
+ *
+ * Each message is composed of:
+ *
+ * HEADER
+ * [TLV0 + PAYLOAD0]
+ * [TLV1 + PAYLOAD1]
+ * [...]
+ * [TLVN + PAYLOADN]
+ *
+ * The HEADER is defined by 'struct i2400m_l3l4_hdr'. The payloads are
+ * defined by a TLV structure (Type Length Value) which is a 'header'
+ * (struct i2400m_tlv_hdr) and then the payload.
+ *
+ * All integers are represented as Little Endian.
+ *
+ * - REQUESTS AND EVENTS
+ *
+ * The requests can be clasified as follows:
+ *
+ *   COMMAND:  implies a request from the host to the device requesting
+ *             an action being performed. The device will reply with a
+ *             message (with the same type as the command), status and
+ *             no (TLV) payload. Execution of a command might cause
+ *             events (of different type) to be sent later on as
+ *             device's state changes.
+ *
+ *   GET/SET:  similar to COMMAND, but will not cause other
+ *             EVENTs. The reply, in the case of GET, will contain
+ *             TLVs with the requested information.
+ *
+ *   EVENT:    asynchronous messages sent from the device, maybe as a
+ *             consequence of previous COMMANDs but disassociated from
+ *             them.
+ *
+ * Only one request might be pending at the same time (ie: don't
+ * parallelize nor post another GET request before the previous
+ * COMMAND has been acknowledged with it's corresponding reply by the
+ * device).
+ *
+ * The different requests and their formats are described below:
+ *
+ *  I2400M_MT_*   Message types
+ *  I2400M_MS_*   Message status (for replies, events)
+ *  i2400m_tlv_*  TLVs
+ *
+ * data types are named 'struct i2400m_msg_OPNAME', OPNAME matching the
+ * operation.
+ */
+
+#ifndef __LINUX__WIMAX__I2400M_H__
+#define __LINUX__WIMAX__I2400M_H__
+
+#include <linux/types.h>
+
+
+/*
+ * Host Device Interface (HDI) common to all busses
+ */
+
+/* Boot-mode (firmware upload mode) commands */
+
+/* Header for the firmware file */
+struct i2400m_bcf_hdr {
+	__le32 module_type;
+	__le32 header_len;
+	__le32 header_version;
+	__le32 module_id;
+	__le32 module_vendor;
+	__le32 date;		/* BCD YYYMMDD */
+	__le32 size;
+	__le32 key_size;	/* in dwords */
+	__le32 modulus_size;	/* in dwords */
+	__le32 exponent_size;	/* in dwords */
+	__u8 reserved[88];
+} __attribute__ ((packed));
+
+/* Boot mode opcodes */
+enum i2400m_brh_opcode {
+	I2400M_BRH_READ = 1,
+	I2400M_BRH_WRITE = 2,
+	I2400M_BRH_JUMP = 3,
+	I2400M_BRH_SIGNED_JUMP = 8,
+	I2400M_BRH_HASH_PAYLOAD_ONLY = 9,
+};
+
+/* Boot mode command masks and stuff */
+enum i2400m_brh {
+	I2400M_BRH_SIGNATURE = 0xcbbc0000,
+	I2400M_BRH_SIGNATURE_MASK = 0xffff0000,
+	I2400M_BRH_SIGNATURE_SHIFT = 16,
+	I2400M_BRH_OPCODE_MASK = 0x0000000f,
+	I2400M_BRH_RESPONSE_MASK = 0x000000f0,
+	I2400M_BRH_RESPONSE_SHIFT = 4,
+	I2400M_BRH_DIRECT_ACCESS = 0x00000400,
+	I2400M_BRH_RESPONSE_REQUIRED = 0x00000200,
+	I2400M_BRH_USE_CHECKSUM = 0x00000100,
+};
+
+
+/* Constants for bcf->module_id */
+enum i2400m_bcf_mod_id {
+	/* Firmware file carries its own pokes -- pokes are a set of
+	 * magical values that have to be written in certain memory
+	 * addresses to get the device up and ready for firmware
+	 * download when it is in non-signed boot mode. */
+	I2400M_BCF_MOD_ID_POKES = 0x000000001,
+};
+
+
+/**
+ * i2400m_bootrom_header - Header for a boot-mode command
+ *
+ * @cmd: the above command descriptor
+ * @target_addr: where on the device memory should the action be performed.
+ * @data_size: for read/write, amount of data to be read/written
+ * @block_checksum: checksum value (if applicable)
+ * @payload: the beginning of data attached to this header
+ */
+struct i2400m_bootrom_header {
+	__le32 command;		/* Compose with enum i2400_brh */
+	__le32 target_addr;
+	__le32 data_size;
+	__le32 block_checksum;
+	char payload[0];
+} __attribute__ ((packed));
+
+
+/*
+ * Data / control protocol
+ */
+
+/* Packet types for the host-device interface */
+enum i2400m_pt {
+	I2400M_PT_DATA = 0,
+	I2400M_PT_CTRL,
+	I2400M_PT_TRACE,	/* For device debug */
+	I2400M_PT_RESET_WARM,	/* device reset */
+	I2400M_PT_RESET_COLD,	/* USB[transport] reset, like reconnect */
+	I2400M_PT_ILLEGAL
+};
+
+
+/*
+ * Payload for a data packet
+ *
+ * This is prefixed to each and every outgoing DATA type.
+ */
+struct i2400m_pl_data_hdr {
+	__le32 reserved;
+} __attribute__((packed));
+
+
+/* Misc constants */
+enum {
+	I2400M_PL_PAD = 16,	/* Payload data size alignment */
+	I2400M_PL_SIZE_MAX = 0x3EFF,
+	I2400M_MAX_PLS_IN_MSG = 60,
+	/* protocol barkers: sync sequences; for notifications they
+	 * are sent in groups of four. */
+	I2400M_H2D_PREVIEW_BARKER = 0xcafe900d,
+	I2400M_COLD_RESET_BARKER = 0xc01dc01d,
+	I2400M_WARM_RESET_BARKER = 0x50f750f7,
+	I2400M_NBOOT_BARKER = 0xdeadbeef,
+	I2400M_SBOOT_BARKER = 0x0ff1c1a1,
+	I2400M_ACK_BARKER = 0xfeedbabe,
+	I2400M_D2H_MSG_BARKER = 0xbeefbabe,
+};
+
+
+/*
+ * Hardware payload descriptor
+ *
+ * Bitfields encoded in a struct to enforce typing semantics.
+ *
+ * Look in rx.c and tx.c for a full description of the format.
+ */
+struct i2400m_pld {
+	__le32 val;
+} __attribute__ ((packed));
+
+#define I2400M_PLD_SIZE_MASK 0x00003fff
+#define I2400M_PLD_TYPE_SHIFT 16
+#define I2400M_PLD_TYPE_MASK 0x000f0000
+
+/*
+ * Header for a TX message or RX message
+ *
+ * @barker: preamble
+ * @size: used for management of the FIFO queue buffer; before
+ *     sending, this is converted to be a real preamble. This
+ *     indicates the real size of the TX message that starts at this
+ *     point. If the highest bit is set, then this message is to be
+ *     skipped.
+ * @sequence: sequence number of this message
+ * @offset: offset where the message itself starts -- see the comments
+ *     in the file header about message header and payload descriptor
+ *     alignment.
+ * @num_pls: number of payloads in this message
+ * @padding: amount of padding bytes at the end of the message to make
+ *           it be of block-size aligned
+ *
+ * Look in rx.c and tx.c for a full description of the format.
+ */
+struct i2400m_msg_hdr {
+	union {
+		__le32 barker;
+		__u32 size;	/* same size type as barker!! */
+	};
+	union {
+		__le32 sequence;
+		__u32 offset;	/* same size type as barker!! */
+	};
+	__le16 num_pls;
+	__le16 rsv1;
+	__le16 padding;
+	__le16 rsv2;
+	struct i2400m_pld pld[0];
+} __attribute__ ((packed));
+
+
+
+/*
+ * L3/L4 control protocol
+ */
+
+enum {
+	/* Interface version */
+	I2400M_L3L4_VERSION             = 0x0100,
+};
+
+/* Message types */
+enum i2400m_mt {
+	I2400M_MT_RESERVED              = 0x0000,
+	I2400M_MT_INVALID               = 0xffff,
+	I2400M_MT_REPORT_MASK		= 0x8000,
+
+	I2400M_MT_GET_SCAN_RESULT  	= 0x4202,
+	I2400M_MT_SET_SCAN_PARAM   	= 0x4402,
+	I2400M_MT_CMD_RF_CONTROL   	= 0x4602,
+	I2400M_MT_CMD_SCAN         	= 0x4603,
+	I2400M_MT_CMD_CONNECT      	= 0x4604,
+	I2400M_MT_CMD_DISCONNECT   	= 0x4605,
+	I2400M_MT_CMD_EXIT_IDLE   	= 0x4606,
+	I2400M_MT_GET_LM_VERSION   	= 0x5201,
+	I2400M_MT_GET_DEVICE_INFO  	= 0x5202,
+	I2400M_MT_GET_LINK_STATUS  	= 0x5203,
+	I2400M_MT_GET_STATISTICS   	= 0x5204,
+	I2400M_MT_GET_STATE        	= 0x5205,
+	I2400M_MT_GET_MEDIA_STATUS	= 0x5206,
+	I2400M_MT_SET_INIT_CONFIG	= 0x5404,
+	I2400M_MT_CMD_INIT	        = 0x5601,
+	I2400M_MT_CMD_TERMINATE		= 0x5602,
+	I2400M_MT_CMD_MODE_OF_OP	= 0x5603,
+	I2400M_MT_CMD_RESET_DEVICE	= 0x5604,
+	I2400M_MT_CMD_MONITOR_CONTROL   = 0x5605,
+	I2400M_MT_CMD_ENTER_POWERSAVE   = 0x5606,
+	I2400M_MT_GET_TLS_OPERATION_RESULT = 0x6201,
+	I2400M_MT_SET_EAP_SUCCESS       = 0x6402,
+	I2400M_MT_SET_EAP_FAIL          = 0x6403,
+	I2400M_MT_SET_EAP_KEY          	= 0x6404,
+	I2400M_MT_CMD_SEND_EAP_RESPONSE = 0x6602,
+	I2400M_MT_REPORT_SCAN_RESULT    = 0xc002,
+	I2400M_MT_REPORT_STATE		= 0xd002,
+	I2400M_MT_REPORT_POWERSAVE_READY = 0xd005,
+	I2400M_MT_REPORT_EAP_REQUEST    = 0xe002,
+	I2400M_MT_REPORT_EAP_RESTART    = 0xe003,
+	I2400M_MT_REPORT_ALT_ACCEPT    	= 0xe004,
+	I2400M_MT_REPORT_KEY_REQUEST 	= 0xe005,
+};
+
+
+/*
+ * Message Ack Status codes
+ *
+ * When a message is replied-to, this status is reported.
+ */
+enum i2400m_ms {
+	I2400M_MS_DONE_OK                  = 0,
+	I2400M_MS_DONE_IN_PROGRESS         = 1,
+	I2400M_MS_INVALID_OP               = 2,
+	I2400M_MS_BAD_STATE                = 3,
+	I2400M_MS_ILLEGAL_VALUE            = 4,
+	I2400M_MS_MISSING_PARAMS           = 5,
+	I2400M_MS_VERSION_ERROR            = 6,
+	I2400M_MS_ACCESSIBILITY_ERROR      = 7,
+	I2400M_MS_BUSY                     = 8,
+	I2400M_MS_CORRUPTED_TLV            = 9,
+	I2400M_MS_UNINITIALIZED            = 10,
+	I2400M_MS_UNKNOWN_ERROR            = 11,
+	I2400M_MS_PRODUCTION_ERROR         = 12,
+	I2400M_MS_NO_RF                    = 13,
+	I2400M_MS_NOT_READY_FOR_POWERSAVE  = 14,
+	I2400M_MS_THERMAL_CRITICAL         = 15,
+	I2400M_MS_MAX
+};
+
+
+/**
+ * i2400m_tlv - enumeration of the different types of TLVs
+ *
+ * TLVs stand for type-length-value and are the header for a payload
+ * composed of almost anything. Each payload has a type assigned
+ * and a length.
+ */
+enum i2400m_tlv {
+	I2400M_TLV_L4_MESSAGE_VERSIONS = 129,
+	I2400M_TLV_SYSTEM_STATE = 141,
+	I2400M_TLV_MEDIA_STATUS = 161,
+	I2400M_TLV_RF_OPERATION = 162,
+	I2400M_TLV_RF_STATUS = 163,
+	I2400M_TLV_DEVICE_RESET_TYPE = 132,
+	I2400M_TLV_CONFIG_IDLE_PARAMETERS = 601,
+};
+
+
+struct i2400m_tlv_hdr {
+	__le16 type;
+	__le16 length;		/* payload's */
+	__u8   pl[0];
+} __attribute__((packed));
+
+
+struct i2400m_l3l4_hdr {
+	__le16 type;
+	__le16 length;		/* payload's */
+	__le16 version;
+	__le16 resv1;
+	__le16 status;
+	__le16 resv2;
+	struct i2400m_tlv_hdr pl[0];
+} __attribute__((packed));
+
+
+/**
+ * i2400m_system_state - different states of the device
+ */
+enum i2400m_system_state {
+	I2400M_SS_UNINITIALIZED = 1,
+	I2400M_SS_INIT,
+	I2400M_SS_READY,
+	I2400M_SS_SCAN,
+	I2400M_SS_STANDBY,
+	I2400M_SS_CONNECTING,
+	I2400M_SS_WIMAX_CONNECTED,
+	I2400M_SS_DATA_PATH_CONNECTED,
+	I2400M_SS_IDLE,
+	I2400M_SS_DISCONNECTING,
+	I2400M_SS_OUT_OF_ZONE,
+	I2400M_SS_SLEEPACTIVE,
+	I2400M_SS_PRODUCTION,
+	I2400M_SS_CONFIG,
+	I2400M_SS_RF_OFF,
+	I2400M_SS_RF_SHUTDOWN,
+	I2400M_SS_DEVICE_DISCONNECT,
+	I2400M_SS_MAX,
+};
+
+
+/**
+ * i2400m_tlv_system_state - report on the state of the system
+ *
+ * @state: see enum i2400m_system_state
+ */
+struct i2400m_tlv_system_state {
+	struct i2400m_tlv_hdr hdr;
+	__le32 state;
+} __attribute__((packed));
+
+
+struct i2400m_tlv_l4_message_versions {
+	struct i2400m_tlv_hdr hdr;
+	__le16 major;
+	__le16 minor;
+	__le16 branch;
+	__le16 reserved;
+} __attribute__((packed));
+
+
+struct i2400m_tlv_detailed_device_info {
+	struct i2400m_tlv_hdr hdr;
+	__u8 reserved1[400];
+	__u8 mac_address[6];
+	__u8 reserved2[2];
+} __attribute__((packed));
+
+
+enum i2400m_rf_switch_status {
+	I2400M_RF_SWITCH_ON = 1,
+	I2400M_RF_SWITCH_OFF = 2,
+};
+
+struct i2400m_tlv_rf_switches_status {
+	struct i2400m_tlv_hdr hdr;
+	__u8 sw_rf_switch;	/* 1 ON, 2 OFF */
+	__u8 hw_rf_switch;	/* 1 ON, 2 OFF */
+	__u8 reserved[2];
+} __attribute__((packed));
+
+
+enum {
+	i2400m_rf_operation_on = 1,
+	i2400m_rf_operation_off = 2
+};
+
+struct i2400m_tlv_rf_operation {
+	struct i2400m_tlv_hdr hdr;
+	__le32 status;	/* 1 ON, 2 OFF */
+} __attribute__((packed));
+
+
+enum i2400m_tlv_reset_type {
+	I2400M_RESET_TYPE_COLD = 1,
+	I2400M_RESET_TYPE_WARM
+};
+
+struct i2400m_tlv_device_reset_type {
+	struct i2400m_tlv_hdr hdr;
+	__le32 reset_type;
+} __attribute__((packed));
+
+
+struct i2400m_tlv_config_idle_parameters {
+	struct i2400m_tlv_hdr hdr;
+	__le32 idle_timeout;	/* 100 to 300000 ms [5min], 100 increments
+				 * 0 disabled */
+	__le32 idle_paging_interval;	/* frames */
+} __attribute__((packed));
+
+
+enum i2400m_media_status {
+	I2400M_MEDIA_STATUS_LINK_UP = 1,
+	I2400M_MEDIA_STATUS_LINK_DOWN,
+	I2400M_MEDIA_STATUS_LINK_RENEW,
+};
+
+struct i2400m_tlv_media_status {
+	struct i2400m_tlv_hdr hdr;
+	__le32 media_status;
+} __attribute__((packed));
+
+#endif /* #ifndef __LINUX__WIMAX__I2400M_H__ */
-- 
cgit v1.2.3


From e30698743419d20dce03d033761f203b4d847ab0 Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Sat, 20 Dec 2008 16:57:59 -0800
Subject: wimax: export linux/wimax.h and linux/wimax/i2400m.h with
 headers_install

These two files are what user space can use to establish communication
with the WiMAX kernel API and to speak the Intel 2400m Wireless WiMAX
connection's control protocol.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/Kbuild       | 2 ++
 include/linux/wimax/Kbuild | 1 +
 2 files changed, 3 insertions(+)
 create mode 100644 include/linux/wimax/Kbuild

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index a3323f337e4d..12e9a2957caf 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -371,3 +371,5 @@ unifdef-y += xattr.h
 unifdef-y += xfrm.h
 
 objhdr-y += version.h
+header-y += wimax.h
+header-y += wimax/
diff --git a/include/linux/wimax/Kbuild b/include/linux/wimax/Kbuild
new file mode 100644
index 000000000000..3cb4f269bb09
--- /dev/null
+++ b/include/linux/wimax/Kbuild
@@ -0,0 +1 @@
+header-y += i2400m.h
-- 
cgit v1.2.3


From f7b7baae6b30ff04124259ff8d7c0c0d281320e6 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Tue, 11 Nov 2008 17:17:46 +0800
Subject: PCI: add PCI Advanced Feature Capability defines

PCI Advanced Features Capability is introduced by "Conventional PCI
Advanced Caps ECN" (can be downloaded in pcisig.com).  Add defines for
the various AF capabilities, including function level reset (FLR).

Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci_regs.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index e5effd47ed74..7766488470e4 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -210,6 +210,7 @@
 #define  PCI_CAP_ID_AGP3	0x0E	/* AGP Target PCI-PCI bridge */
 #define  PCI_CAP_ID_EXP 	0x10	/* PCI Express */
 #define  PCI_CAP_ID_MSIX	0x11	/* MSI-X */
+#define  PCI_CAP_ID_AF		0x13	/* PCI Advanced Features */
 #define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
 #define PCI_CAP_FLAGS		2	/* Capability defined flags (16 bits) */
 #define PCI_CAP_SIZEOF		4
@@ -316,6 +317,17 @@
 #define  PCI_CHSWP_EXT		0x40	/* ENUM# status - extraction */
 #define  PCI_CHSWP_INS		0x80	/* ENUM# status - insertion */
 
+/* PCI Advanced Feature registers */
+
+#define PCI_AF_LENGTH		2
+#define PCI_AF_CAP		3
+#define  PCI_AF_CAP_TP		0x01
+#define  PCI_AF_CAP_FLR		0x02
+#define PCI_AF_CTRL		4
+#define  PCI_AF_CTRL_FLR	0x01
+#define PCI_AF_STATUS		5
+#define  PCI_AF_STATUS_TP	0x01
+
 /* PCI-X registers */
 
 #define PCI_X_CMD		2	/* Modes & Features */
-- 
cgit v1.2.3


From 8b62091e20215730be1b94b7cd135a78a3e692ca Mon Sep 17 00:00:00 2001
From: Andrew Patterson <andrew.patterson@hp.com>
Date: Mon, 10 Nov 2008 15:30:40 -0700
Subject: ACPI/PCI: include missing acpi.h file in pci-acpi.h.

The pci-acpi.h file will not compile without including linux/acpi.h.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci-acpi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 8837928fbf33..a9e4c34e9389 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -8,6 +8,8 @@
 #ifndef _PCI_ACPI_H_
 #define _PCI_ACPI_H_
 
+#include <linux/acpi.h>
+
 #define OSC_QUERY_TYPE			0
 #define OSC_SUPPORT_TYPE 		1
 #define OSC_CONTROL_TYPE		2
-- 
cgit v1.2.3


From 990a7ac5645883a833a11b900bb6f25b65dea65b Mon Sep 17 00:00:00 2001
From: Andrew Patterson <andrew.patterson@hp.com>
Date: Mon, 10 Nov 2008 15:30:45 -0700
Subject: ACPI/PCI: call _OSC support during root bridge discovery

Add pci_acpi_osc_support() and call it when a PCI bridge is added.  This
allows us to avoid having every individual PCI root bridge driver call
_OSC support for every root bridge in their probe functions, a
significant savings in boot time.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/pci_root.c  |  9 +++++++++
 drivers/pci/pci-acpi.c   | 24 +++++++++++++++++++-----
 include/linux/pci-acpi.h |  1 +
 3 files changed, 29 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 642554b1b60c..de4d57114fe4 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -31,6 +31,7 @@
 #include <linux/spinlock.h>
 #include <linux/pm.h>
 #include <linux/pci.h>
+#include <linux/pci-acpi.h>
 #include <linux/acpi.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
@@ -193,6 +194,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	unsigned long long value = 0;
 	acpi_handle handle = NULL;
 	struct acpi_device *child;
+	u32 flags;
 
 
 	if (!device)
@@ -210,6 +212,13 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 
 	device->ops.bind = acpi_pci_bind;
 
+	/*
+	 * All supported architectures that use ACPI have support for
+	 * PCI domains, so we indicate this in _OSC support capabilities.
+	 */
+	flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT;
+	pci_acpi_osc_support(device->handle, flags);
+
 	/* 
 	 * Segment
 	 * -------
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 2ed3f10d0860..8a1f02c3c915 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -143,28 +143,42 @@ static acpi_status __acpi_query_osc(u32 flags, struct acpi_osc_data *osc_data,
 	return status;
 }
 
-static acpi_status acpi_query_osc(acpi_handle handle,
-				  u32 level, void *context, void **retval)
+/*
+ * pci_acpi_osc_support: Invoke _OSC indicating support for the given feature
+ * @flags: Bitmask of flags to support
+ *
+ * See the ACPI spec for the definition of the flags
+ */
+int pci_acpi_osc_support(acpi_handle handle, u32 flags)
 {
+	u32 dummy;
 	acpi_status status;
-	struct acpi_osc_data *osc_data;
-	u32 flags = (unsigned long)context, dummy;
 	acpi_handle tmp;
+	struct acpi_osc_data *osc_data;
+	int rc = 0;
 
 	status = acpi_get_handle(handle, "_OSC", &tmp);
 	if (ACPI_FAILURE(status))
-		return AE_OK;
+		return -ENOTTY;
 
 	mutex_lock(&pci_acpi_lock);
 	osc_data = acpi_get_osc_data(handle);
 	if (!osc_data) {
 		printk(KERN_ERR "acpi osc data array is full\n");
+		rc = -ENOMEM;
 		goto out;
 	}
 
 	__acpi_query_osc(flags, osc_data, &dummy);
 out:
 	mutex_unlock(&pci_acpi_lock);
+	return rc;
+}
+
+static acpi_status acpi_query_osc(acpi_handle handle, u32 level,
+				  void *context, void **retval)
+{
+	pci_acpi_osc_support(handle, (unsigned long)context);
 	return AE_OK;
 }
 
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index a9e4c34e9389..424f06f84cab 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -51,6 +51,7 @@
 #ifdef CONFIG_ACPI
 extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags);
 extern acpi_status __pci_osc_support_set(u32 flags, const char *hid);
+int pci_acpi_osc_support(acpi_handle handle, u32 flags);
 static inline acpi_status pci_osc_support_set(u32 flags)
 {
 	return __pci_osc_support_set(flags, PCI_ROOT_HID_STRING);
-- 
cgit v1.2.3


From 0ef5f8f6159e44b4faa997be08d1a3bcbf44ad08 Mon Sep 17 00:00:00 2001
From: Andrew Patterson <andrew.patterson@hp.com>
Date: Mon, 10 Nov 2008 15:30:50 -0700
Subject: ACPI/PCI: PCI extended config _OSC support called when root bridge
 added

The _OSC capability OSC_EXT_PCI_CONFIG_SUPPORT is set when the root
bridge is added with pci_acpi_osc_support() if we can access PCI
extended config space.

This adds the function pci_ext_cfg_avail which returns true if we can
access PCI extended config space (offset greater than 0xff). It
currently only returns false if arch=x86 and raw_pci_ext_ops is not set
(which might happen if pci=nommcfg is set on the kernel command-line).

Signed-off-by: Andrew Patterson <andrew.patterson@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/common.c   |  8 ++++++++
 drivers/acpi/pci_root.c | 10 ++++++++--
 drivers/pci/pci.c       | 13 +++++++++++++
 include/linux/pci.h     |  2 ++
 4 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 62ddb73e09ed..9ab8509f7b15 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -562,6 +562,14 @@ void pcibios_disable_device (struct pci_dev *dev)
 		pcibios_disable_irq(dev);
 }
 
+int pci_ext_cfg_avail(struct pci_dev *dev)
+{
+	if (raw_pci_ext_ops)
+		return 1;
+	else
+		return 0;
+}
+
 struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
 {
 	struct pci_bus *bus = NULL;
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index de4d57114fe4..96e68e841539 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -194,7 +194,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	unsigned long long value = 0;
 	acpi_handle handle = NULL;
 	struct acpi_device *child;
-	u32 flags;
+	u32 flags, base_flags;
 
 
 	if (!device)
@@ -216,7 +216,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	 * All supported architectures that use ACPI have support for
 	 * PCI domains, so we indicate this in _OSC support capabilities.
 	 */
-	flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT;
+	flags = base_flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT;
 	pci_acpi_osc_support(device->handle, flags);
 
 	/* 
@@ -344,6 +344,12 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	list_for_each_entry(child, &device->children, node)
 		acpi_pci_bridge_scan(child);
 
+	/* Indicate support for various _OSC capabilities. */
+	if (pci_ext_cfg_avail(root->bus->self))
+		flags |= OSC_EXT_PCI_CONFIG_SUPPORT;
+	if (flags != base_flags)
+		pci_acpi_osc_support(device->handle, flags);
+
       end:
 	if (result) {
 		if (!list_empty(&root->node))
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 3c2fa2fdc9cd..48fa860276d4 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2084,6 +2084,19 @@ static void __devinit pci_no_domains(void)
 #endif
 }
 
+/**
+ * pci_ext_cfg_enabled - can we access extended PCI config space?
+ * @dev: The PCI device of the root bridge.
+ *
+ * Returns 1 if we can access PCI extended config space (offsets
+ * greater than 0xff). This is the default implementation. Architecture
+ * implementations can override this.
+ */
+int __attribute__ ((weak)) pci_ext_cfg_avail(struct pci_dev *dev)
+{
+	return 1;
+}
+
 static int __devinit pci_init(void)
 {
 	struct pci_dev *dev = NULL;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4bb156ba854a..6fd47654ca4e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1140,6 +1140,8 @@ static inline void pci_mmcfg_early_init(void) { }
 static inline void pci_mmcfg_late_init(void) { }
 #endif
 
+int pci_ext_cfg_avail(struct pci_dev *dev);
+
 #ifdef CONFIG_HAS_IOMEM
 static inline void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar)
 {
-- 
cgit v1.2.3


From 3e1b16002af29758b6bc9c38939d43838d9335bc Mon Sep 17 00:00:00 2001
From: Andrew Patterson <andrew.patterson@hp.com>
Date: Mon, 10 Nov 2008 15:30:55 -0700
Subject: ACPI/PCI: PCIe ASPM _OSC support capabilities called when root bridge
 added

The _OSC capabilities OSC_ACTIVE_STATE_PWR_SUPPORT and
OSC_CLOCK_PWR_CAPABILITY_SUPPORT are set when the root bridge is added
with pci_acpi_osc_support(), so we no longer need to do it in the ASPM
driver.  Also add the function pcie_aspm_enabled, which returns true if
pcie_aspm=off is not on the kernel command-line.

Signed-off-by: Andrew Patterson <andrew.patterson@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/pci_root.c |  3 +++
 drivers/pci/pcie/aspm.c | 27 +++++++++------------------
 include/linux/pci.h     |  9 +++++++++
 3 files changed, 21 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 96e68e841539..9fe026b1c9d0 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -347,6 +347,9 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	/* Indicate support for various _OSC capabilities. */
 	if (pci_ext_cfg_avail(root->bus->self))
 		flags |= OSC_EXT_PCI_CONFIG_SUPPORT;
+	if (pcie_aspm_enabled())
+		flags |= OSC_ACTIVE_STATE_PWR_SUPPORT |
+			OSC_CLOCK_PWR_CAPABILITY_SUPPORT;
 	if (flags != base_flags)
 		pci_acpi_osc_support(device->handle, flags);
 
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 9aad608bcf3f..e361c7dc726f 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -857,24 +857,15 @@ void pcie_no_aspm(void)
 		aspm_disabled = 1;
 }
 
-#ifdef CONFIG_ACPI
-#include <acpi/acpi_bus.h>
-#include <linux/pci-acpi.h>
-static void pcie_aspm_platform_init(void)
-{
-	pcie_osc_support_set(OSC_ACTIVE_STATE_PWR_SUPPORT|
-		OSC_CLOCK_PWR_CAPABILITY_SUPPORT);
-}
-#else
-static inline void pcie_aspm_platform_init(void) { }
-#endif
-
-static int __init pcie_aspm_init(void)
+/**
+ * pcie_aspm_enabled - is PCIe ASPM enabled?
+ *
+ * Returns true if ASPM has not been disabled by the command-line option
+ * pcie_aspm=off.
+ **/
+int pcie_aspm_enabled(void)
 {
-	if (aspm_disabled)
-		return 0;
-	pcie_aspm_platform_init();
-	return 0;
+       return !aspm_disabled;
 }
+EXPORT_SYMBOL(pcie_aspm_enabled);
 
-fs_initcall(pcie_aspm_init);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6fd47654ca4e..eae97a2bf603 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -791,6 +791,15 @@ extern void msi_remove_pci_irq_vectors(struct pci_dev *dev);
 extern void pci_restore_msi_state(struct pci_dev *dev);
 #endif
 
+#ifndef CONFIG_PCIEASPM
+static inline int pcie_aspm_enabled(void)
+{
+	return 0;
+}
+#else
+extern int pcie_aspm_enabled(void);
+#endif
+
 #ifdef CONFIG_HT_IRQ
 /* The functions a driver should call */
 int  ht_create_irq(struct pci_dev *dev, int idx);
-- 
cgit v1.2.3


From 07ae95f988a34465bdcb384bfa73c03424fe2312 Mon Sep 17 00:00:00 2001
From: Andrew Patterson <andrew.patterson@hp.com>
Date: Mon, 10 Nov 2008 15:31:05 -0700
Subject: ACPI/PCI: PCI MSI _OSC support capabilities called when root bridge
 added

The _OSC capability OSC_MSI_SUPPORT is set when the root bridge is added
with pci_acpi_osc_support(), so we no longer need to do it in the PCI
MSI driver.  Also adds the function pci_msi_enabled, which returns true
if pci=nomsi is not on the kernel command-line.

Signed-off-by: Andrew Patterson <andrew.patterson@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/pci_root.c |  2 ++
 drivers/pci/msi.c       | 31 +++++++++++--------------------
 drivers/pci/pci.c       |  2 --
 drivers/pci/pci.h       |  2 --
 include/linux/pci.h     |  5 +++++
 5 files changed, 18 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 9fe026b1c9d0..5b38a026d122 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -350,6 +350,8 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 	if (pcie_aspm_enabled())
 		flags |= OSC_ACTIVE_STATE_PWR_SUPPORT |
 			OSC_CLOCK_PWR_CAPABILITY_SUPPORT;
+	if (pci_msi_enabled())
+		flags |= OSC_MSI_SUPPORT;
 	if (flags != base_flags)
 		pci_acpi_osc_support(device->handle, flags);
 
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 11a51f8ed3b3..b4a90badd0a6 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -776,28 +776,19 @@ void pci_no_msi(void)
 	pci_msi_enable = 0;
 }
 
-void pci_msi_init_pci_dev(struct pci_dev *dev)
-{
-	INIT_LIST_HEAD(&dev->msi_list);
-}
-
-#ifdef CONFIG_ACPI
-#include <linux/acpi.h>
-#include <linux/pci-acpi.h>
-static void __devinit msi_acpi_init(void)
+/**
+ * pci_msi_enabled - is MSI enabled?
+ *
+ * Returns true if MSI has not been disabled by the command-line option
+ * pci=nomsi.
+ **/
+int pci_msi_enabled(void)
 {
-	if (acpi_pci_disabled)
-		return;
-	pci_osc_support_set(OSC_MSI_SUPPORT);
-	pcie_osc_support_set(OSC_MSI_SUPPORT);
+	return pci_msi_enable;
 }
-#else
-static inline void msi_acpi_init(void) { }
-#endif /* CONFIG_ACPI */
+EXPORT_SYMBOL(pci_msi_enabled);
 
-void __devinit msi_init(void)
+void pci_msi_init_pci_dev(struct pci_dev *dev)
 {
-	if (!pci_msi_enable)
-		return;
-	msi_acpi_init();
+	INIT_LIST_HEAD(&dev->msi_list);
 }
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 48fa860276d4..2cfa41e367a7 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2105,8 +2105,6 @@ static int __devinit pci_init(void)
 		pci_fixup_device(pci_fixup_final, dev);
 	}
 
-	msi_init();
-
 	return 0;
 }
 
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index d3e65e29df51..9162e242b99e 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -102,11 +102,9 @@ extern unsigned int pci_pm_d3_delay;
 #ifdef CONFIG_PCI_MSI
 void pci_no_msi(void);
 extern void pci_msi_init_pci_dev(struct pci_dev *dev);
-extern void __devinit msi_init(void);
 #else
 static inline void pci_no_msi(void) { }
 static inline void pci_msi_init_pci_dev(struct pci_dev *dev) { }
-static inline void msi_init(void) { }
 #endif
 
 #ifdef CONFIG_PCIEAER
diff --git a/include/linux/pci.h b/include/linux/pci.h
index eae97a2bf603..59a3dc2059d3 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -779,6 +779,10 @@ static inline void msi_remove_pci_irq_vectors(struct pci_dev *dev)
 
 static inline void pci_restore_msi_state(struct pci_dev *dev)
 { }
+static inline int pci_msi_enabled(void)
+{
+	return 0;
+}
 #else
 extern int pci_enable_msi(struct pci_dev *dev);
 extern void pci_msi_shutdown(struct pci_dev *dev);
@@ -789,6 +793,7 @@ extern void pci_msix_shutdown(struct pci_dev *dev);
 extern void pci_disable_msix(struct pci_dev *dev);
 extern void msi_remove_pci_irq_vectors(struct pci_dev *dev);
 extern void pci_restore_msi_state(struct pci_dev *dev);
+extern int pci_msi_enabled(void);
 #endif
 
 #ifndef CONFIG_PCIEASPM
-- 
cgit v1.2.3


From 23616941914917cf25b94789856b5326b68d8ee8 Mon Sep 17 00:00:00 2001
From: Andrew Patterson <andrew.patterson@hp.com>
Date: Mon, 10 Nov 2008 15:31:10 -0700
Subject: ACPI/PCI: remove obsolete _OSC capability support functions

The acpi_query_osc, __pci_osc_support_set, pci_osc_support_set, and
pcie_osc_support_set functions have been obsoleted in favor of setting
these capabilities during root bridge discovery with
pci_acpi_osc_support.  There are no longer any callers of these
functions, so remove them.

Signed-off-by: Andrew Patterson <andrew.patterson@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci-acpi.c   | 25 -------------------------
 include/linux/pci-acpi.h | 11 -----------
 2 files changed, 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 8a1f02c3c915..8f923ee5177f 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -175,31 +175,6 @@ out:
 	return rc;
 }
 
-static acpi_status acpi_query_osc(acpi_handle handle, u32 level,
-				  void *context, void **retval)
-{
-	pci_acpi_osc_support(handle, (unsigned long)context);
-	return AE_OK;
-}
-
-/**
- * __pci_osc_support_set - register OS support to Firmware
- * @flags: OS support bits
- * @hid: hardware ID
- *
- * Update OS support fields and doing a _OSC Query to obtain an update
- * from Firmware on supported control bits.
- **/
-acpi_status __pci_osc_support_set(u32 flags, const char *hid)
-{
-	if (!(flags & OSC_SUPPORT_MASKS))
-		return AE_TYPE;
-
-	acpi_get_devices(hid, acpi_query_osc,
-			 (void *)(unsigned long)flags, NULL);
-	return AE_OK;
-}
-
 /**
  * pci_osc_control_set - commit requested control to Firmware
  * @handle: acpi_handle for the target ACPI object
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 424f06f84cab..871e096e0fbc 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -50,16 +50,7 @@
 
 #ifdef CONFIG_ACPI
 extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags);
-extern acpi_status __pci_osc_support_set(u32 flags, const char *hid);
 int pci_acpi_osc_support(acpi_handle handle, u32 flags);
-static inline acpi_status pci_osc_support_set(u32 flags)
-{
-	return __pci_osc_support_set(flags, PCI_ROOT_HID_STRING);
-}
-static inline acpi_status pcie_osc_support_set(u32 flags)
-{
-	return __pci_osc_support_set(flags, PCI_EXPRESS_ROOT_HID_STRING);
-}
 static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 {
 	/* Find root host bridge */
@@ -76,8 +67,6 @@ typedef u32 		acpi_status;
 #endif    
 static inline acpi_status pci_osc_control_set(acpi_handle handle, u32 flags)
 {return AE_ERROR;}
-static inline acpi_status pci_osc_support_set(u32 flags) {return AE_ERROR;} 
-static inline acpi_status pcie_osc_support_set(u32 flags) {return AE_ERROR;}
 static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 { return NULL; }
 #endif
-- 
cgit v1.2.3


From e8de1481fd7126ee9e93d6889da6f00c05e1e019 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 22 Oct 2008 19:55:31 -0700
Subject: resource: allow MMIO exclusivity for device drivers

Device drivers that use pci_request_regions() (and similar APIs) have a
reasonable expectation that they are the only ones accessing their device.
As part of the e1000e hunt, we were afraid that some userland (X or some
bootsplash stuff) was mapping the MMIO region that the driver thought it
had exclusively via /dev/mem or via various sysfs resource mappings.

This patch adds the option for device drivers to cause their reserved
regions to the "banned from /dev/mem use" list, so now both kernel memory
and device-exclusive MMIO regions are banned.
NOTE: This is only active when CONFIG_STRICT_DEVMEM is set.

In addition to the config option, a kernel parameter iomem=relaxed is
provided for the cases where developers want to diagnose, in the field,
drivers issues from userspace.

Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/kernel-parameters.txt |   4 ++
 arch/x86/mm/init_32.c               |   2 +
 arch/x86/mm/init_64.c               |   2 +
 drivers/net/e1000e/netdev.c         |   2 +-
 drivers/pci/pci-sysfs.c             |   3 +
 drivers/pci/pci.c                   | 107 ++++++++++++++++++++++++++++++++----
 include/linux/ioport.h              |  11 +++-
 include/linux/pci.h                 |   3 +
 kernel/resource.c                   |  61 +++++++++++++++++++-
 9 files changed, 176 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 0b3f6711d2f1..0072fabb1dd1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -918,6 +918,10 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	inttest=	[IA64]
 
+	iomem=		Disable strict checking of access to MMIO memory
+		strict	regions from userspace.
+		relaxed
+
 	iommu=		[x86]
 		off
 		force
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 544d724caeee..88f1b10de3be 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -328,6 +328,8 @@ int devmem_is_allowed(unsigned long pagenr)
 {
 	if (pagenr <= 256)
 		return 1;
+	if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
+		return 0;
 	if (!page_is_ram(pagenr))
 		return 1;
 	return 0;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 54c437e96541..23f68e77ad1f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -888,6 +888,8 @@ int devmem_is_allowed(unsigned long pagenr)
 {
 	if (pagenr <= 256)
 		return 1;
+	if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
+		return 0;
 	if (!page_is_ram(pagenr))
 		return 1;
 	return 0;
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index d4639facd1bd..91817d0afcaf 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -4807,7 +4807,7 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
 		}
 	}
 
-	err = pci_request_selected_regions(pdev,
+	err = pci_request_selected_regions_exclusive(pdev,
 	                                  pci_select_bars(pdev, IORESOURCE_MEM),
 	                                  e1000e_driver_name);
 	if (err)
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 388440e0d222..d5cdccf27a69 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -620,6 +620,9 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
 	vma->vm_pgoff += start >> PAGE_SHIFT;
 	mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io;
 
+	if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(start))
+		return -EINVAL;
+
 	return pci_mmap_page_range(pdev, vma, mmap_type, write_combine);
 }
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 2cfa41e367a7..47663dc0daf7 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1395,7 +1395,8 @@ void pci_release_region(struct pci_dev *pdev, int bar)
  *	Returns 0 on success, or %EBUSY on error.  A warning
  *	message is also printed on failure.
  */
-int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
+static int __pci_request_region(struct pci_dev *pdev, int bar, const char *res_name,
+									int exclusive)
 {
 	struct pci_devres *dr;
 
@@ -1408,8 +1409,9 @@ int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
 			goto err_out;
 	}
 	else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) {
-		if (!request_mem_region(pci_resource_start(pdev, bar),
-				        pci_resource_len(pdev, bar), res_name))
+		if (!__request_mem_region(pci_resource_start(pdev, bar),
+					pci_resource_len(pdev, bar), res_name,
+					exclusive))
 			goto err_out;
 	}
 
@@ -1427,6 +1429,47 @@ err_out:
 	return -EBUSY;
 }
 
+/**
+ *	pci_request_region - Reserved PCI I/O and memory resource
+ *	@pdev: PCI device whose resources are to be reserved
+ *	@bar: BAR to be reserved
+ *	@res_name: Name to be associated with resource.
+ *
+ *	Mark the PCI region associated with PCI device @pdev BR @bar as
+ *	being reserved by owner @res_name.  Do not access any
+ *	address inside the PCI regions unless this call returns
+ *	successfully.
+ *
+ *	Returns 0 on success, or %EBUSY on error.  A warning
+ *	message is also printed on failure.
+ */
+int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name)
+{
+	return __pci_request_region(pdev, bar, res_name, 0);
+}
+
+/**
+ *	pci_request_region_exclusive - Reserved PCI I/O and memory resource
+ *	@pdev: PCI device whose resources are to be reserved
+ *	@bar: BAR to be reserved
+ *	@res_name: Name to be associated with resource.
+ *
+ *	Mark the PCI region associated with PCI device @pdev BR @bar as
+ *	being reserved by owner @res_name.  Do not access any
+ *	address inside the PCI regions unless this call returns
+ *	successfully.
+ *
+ *	Returns 0 on success, or %EBUSY on error.  A warning
+ *	message is also printed on failure.
+ *
+ *	The key difference that _exclusive makes it that userspace is
+ *	explicitly not allowed to map the resource via /dev/mem or
+ * 	sysfs.
+ */
+int pci_request_region_exclusive(struct pci_dev *pdev, int bar, const char *res_name)
+{
+	return __pci_request_region(pdev, bar, res_name, IORESOURCE_EXCLUSIVE);
+}
 /**
  * pci_release_selected_regions - Release selected PCI I/O and memory resources
  * @pdev: PCI device whose resources were previously reserved
@@ -1444,20 +1487,14 @@ void pci_release_selected_regions(struct pci_dev *pdev, int bars)
 			pci_release_region(pdev, i);
 }
 
-/**
- * pci_request_selected_regions - Reserve selected PCI I/O and memory resources
- * @pdev: PCI device whose resources are to be reserved
- * @bars: Bitmask of BARs to be requested
- * @res_name: Name to be associated with resource
- */
-int pci_request_selected_regions(struct pci_dev *pdev, int bars,
-				 const char *res_name)
+int __pci_request_selected_regions(struct pci_dev *pdev, int bars,
+				 const char *res_name, int excl)
 {
 	int i;
 
 	for (i = 0; i < 6; i++)
 		if (bars & (1 << i))
-			if(pci_request_region(pdev, i, res_name))
+			if (__pci_request_region(pdev, i, res_name, excl))
 				goto err_out;
 	return 0;
 
@@ -1469,6 +1506,26 @@ err_out:
 	return -EBUSY;
 }
 
+
+/**
+ * pci_request_selected_regions - Reserve selected PCI I/O and memory resources
+ * @pdev: PCI device whose resources are to be reserved
+ * @bars: Bitmask of BARs to be requested
+ * @res_name: Name to be associated with resource
+ */
+int pci_request_selected_regions(struct pci_dev *pdev, int bars,
+				 const char *res_name)
+{
+	return __pci_request_selected_regions(pdev, bars, res_name, 0);
+}
+
+int pci_request_selected_regions_exclusive(struct pci_dev *pdev,
+				 int bars, const char *res_name)
+{
+	return __pci_request_selected_regions(pdev, bars, res_name,
+			IORESOURCE_EXCLUSIVE);
+}
+
 /**
  *	pci_release_regions - Release reserved PCI I/O and memory resources
  *	@pdev: PCI device whose resources were previously reserved by pci_request_regions
@@ -1501,6 +1558,29 @@ int pci_request_regions(struct pci_dev *pdev, const char *res_name)
 	return pci_request_selected_regions(pdev, ((1 << 6) - 1), res_name);
 }
 
+/**
+ *	pci_request_regions_exclusive - Reserved PCI I/O and memory resources
+ *	@pdev: PCI device whose resources are to be reserved
+ *	@res_name: Name to be associated with resource.
+ *
+ *	Mark all PCI regions associated with PCI device @pdev as
+ *	being reserved by owner @res_name.  Do not access any
+ *	address inside the PCI regions unless this call returns
+ *	successfully.
+ *
+ *	pci_request_regions_exclusive() will mark the region so that
+ * 	/dev/mem and the sysfs MMIO access will not be allowed.
+ *
+ *	Returns 0 on success, or %EBUSY on error.  A warning
+ *	message is also printed on failure.
+ */
+int pci_request_regions_exclusive(struct pci_dev *pdev, const char *res_name)
+{
+	return pci_request_selected_regions_exclusive(pdev,
+					((1 << 6) - 1), res_name);
+}
+
+
 /**
  * pci_set_master - enables bus-mastering for device dev
  * @dev: the PCI device to enable
@@ -2149,10 +2229,13 @@ EXPORT_SYMBOL(pci_find_capability);
 EXPORT_SYMBOL(pci_bus_find_capability);
 EXPORT_SYMBOL(pci_release_regions);
 EXPORT_SYMBOL(pci_request_regions);
+EXPORT_SYMBOL(pci_request_regions_exclusive);
 EXPORT_SYMBOL(pci_release_region);
 EXPORT_SYMBOL(pci_request_region);
+EXPORT_SYMBOL(pci_request_region_exclusive);
 EXPORT_SYMBOL(pci_release_selected_regions);
 EXPORT_SYMBOL(pci_request_selected_regions);
+EXPORT_SYMBOL(pci_request_selected_regions_exclusive);
 EXPORT_SYMBOL(pci_set_master);
 EXPORT_SYMBOL(pci_set_mwi);
 EXPORT_SYMBOL(pci_try_set_mwi);
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 041e95aac2bf..f6bb2ca8e3ba 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -49,6 +49,7 @@ struct resource_list {
 #define IORESOURCE_SIZEALIGN	0x00020000	/* size indicates alignment */
 #define IORESOURCE_STARTALIGN	0x00040000	/* start field is alignment */
 
+#define IORESOURCE_EXCLUSIVE	0x08000000	/* Userland may not map this resource */
 #define IORESOURCE_DISABLED	0x10000000
 #define IORESOURCE_UNSET	0x20000000
 #define IORESOURCE_AUTO		0x40000000
@@ -133,13 +134,16 @@ static inline unsigned long resource_type(struct resource *res)
 }
 
 /* Convenience shorthand with allocation */
-#define request_region(start,n,name)	__request_region(&ioport_resource, (start), (n), (name))
-#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name))
+#define request_region(start,n,name)	__request_region(&ioport_resource, (start), (n), (name), 0)
+#define __request_mem_region(start,n,name, excl) __request_region(&iomem_resource, (start), (n), (name), excl)
+#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0)
+#define request_mem_region_exclusive(start,n,name) \
+	__request_region(&iomem_resource, (start), (n), (name), IORESOURCE_EXCLUSIVE)
 #define rename_region(region, newname) do { (region)->name = (newname); } while (0)
 
 extern struct resource * __request_region(struct resource *,
 					resource_size_t start,
-					resource_size_t n, const char *name);
+					resource_size_t n, const char *name, int relaxed);
 
 /* Compatibility cruft */
 #define release_region(start,n)	__release_region(&ioport_resource, (start), (n))
@@ -175,6 +179,7 @@ extern struct resource * __devm_request_region(struct device *dev,
 extern void __devm_release_region(struct device *dev, struct resource *parent,
 				  resource_size_t start, resource_size_t n);
 extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size);
+extern int iomem_is_exclusive(u64 addr);
 
 #endif /* __ASSEMBLY__ */
 #endif	/* _LINUX_IOPORT_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 59a3dc2059d3..bfcb39ca8879 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -686,10 +686,13 @@ void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *),
 		    int (*)(struct pci_dev *, u8, u8));
 #define HAVE_PCI_REQ_REGIONS	2
 int __must_check pci_request_regions(struct pci_dev *, const char *);
+int __must_check pci_request_regions_exclusive(struct pci_dev *, const char *);
 void pci_release_regions(struct pci_dev *);
 int __must_check pci_request_region(struct pci_dev *, int, const char *);
+int __must_check pci_request_region_exclusive(struct pci_dev *, int, const char *);
 void pci_release_region(struct pci_dev *, int);
 int pci_request_selected_regions(struct pci_dev *, int, const char *);
+int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *);
 void pci_release_selected_regions(struct pci_dev *, int);
 
 /* drivers/pci/bus.c */
diff --git a/kernel/resource.c b/kernel/resource.c
index e633106b12f6..ca6a1536b205 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -623,7 +623,7 @@ resource_size_t resource_alignment(struct resource *res)
  */
 struct resource * __request_region(struct resource *parent,
 				   resource_size_t start, resource_size_t n,
-				   const char *name)
+				   const char *name, int flags)
 {
 	struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
 
@@ -634,6 +634,7 @@ struct resource * __request_region(struct resource *parent,
 	res->start = start;
 	res->end = start + n - 1;
 	res->flags = IORESOURCE_BUSY;
+	res->flags |= flags;
 
 	write_lock(&resource_lock);
 
@@ -679,7 +680,7 @@ int __check_region(struct resource *parent, resource_size_t start,
 {
 	struct resource * res;
 
-	res = __request_region(parent, start, n, "check-region");
+	res = __request_region(parent, start, n, "check-region", 0);
 	if (!res)
 		return -EBUSY;
 
@@ -776,7 +777,7 @@ struct resource * __devm_request_region(struct device *dev,
 	dr->start = start;
 	dr->n = n;
 
-	res = __request_region(parent, start, n, name);
+	res = __request_region(parent, start, n, name, 0);
 	if (res)
 		devres_add(dev, dr);
 	else
@@ -876,3 +877,57 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size)
 
 	return err;
 }
+
+#ifdef CONFIG_STRICT_DEVMEM
+static int strict_iomem_checks = 1;
+#else
+static int strict_iomem_checks;
+#endif
+
+/*
+ * check if an address is reserved in the iomem resource tree
+ * returns 1 if reserved, 0 if not reserved.
+ */
+int iomem_is_exclusive(u64 addr)
+{
+	struct resource *p = &iomem_resource;
+	int err = 0;
+	loff_t l;
+	int size = PAGE_SIZE;
+
+	if (!strict_iomem_checks)
+		return 0;
+
+	addr = addr & PAGE_MASK;
+
+	read_lock(&resource_lock);
+	for (p = p->child; p ; p = r_next(NULL, p, &l)) {
+		/*
+		 * We can probably skip the resources without
+		 * IORESOURCE_IO attribute?
+		 */
+		if (p->start >= addr + size)
+			break;
+		if (p->end < addr)
+			continue;
+		if (p->flags & IORESOURCE_BUSY &&
+		     p->flags & IORESOURCE_EXCLUSIVE) {
+			err = 1;
+			break;
+		}
+	}
+	read_unlock(&resource_lock);
+
+	return err;
+}
+
+static int __init strict_iomem(char *str)
+{
+	if (strstr(str, "relaxed"))
+		strict_iomem_checks = 0;
+	if (strstr(str, "strict"))
+		strict_iomem_checks = 1;
+	return 1;
+}
+
+__setup("iomem=", strict_iomem);
-- 
cgit v1.2.3


From 57c2cf71c12318b72ebaa5720d210476b6bac4d4 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Thu, 11 Dec 2008 11:24:23 -0700
Subject: PCI: add pci_swizzle_interrupt_pin()

This patch adds pci_swizzle_interrupt_pin(), which implements the
INTx swizzling algorithm specified in Table 9-1 of the "PCI-to-PCI
Bridge Architecture Specification," revision 1.2.

There are many architecture-specific implementations of this
swizzle that can be replaced by this common one.

Reviewed-by: David Howells <dhowells@redhat.com>
Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c   | 16 +++++++++++++++-
 include/linux/pci.h |  1 +
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index bd52ca4c2893..d4d71fae6233 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1366,6 +1366,20 @@ void pci_enable_ari(struct pci_dev *dev)
 	bridge->ari_enabled = 1;
 }
 
+/**
+ * pci_swizzle_interrupt_pin - swizzle INTx for device behind bridge
+ * @dev: the PCI device
+ * @pin: the INTx pin (1=INTA, 2=INTB, 3=INTD, 4=INTD)
+ *
+ * Perform INTx swizzling for a device behind one level of bridge.  This is
+ * required by section 9.1 of the PCI-to-PCI bridge specification for devices
+ * behind bridges on add-in cards.
+ */
+u8 pci_swizzle_interrupt_pin(struct pci_dev *dev, u8 pin)
+{
+	return (((pin - 1) + PCI_SLOT(dev->devfn)) % 4) + 1;
+}
+
 int
 pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
 {
@@ -1376,7 +1390,7 @@ pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
 		return -1;
 
 	while (dev->bus->self) {
-		pin = (((pin - 1) + PCI_SLOT(dev->devfn)) % 4) + 1;
+		pin = pci_swizzle_interrupt_pin(dev, pin);
 		dev = dev->bus->self;
 	}
 	*bridge = dev;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index bfcb39ca8879..58357d14f94c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -532,6 +532,7 @@ int __must_check pci_bus_add_device(struct pci_dev *dev);
 void pci_read_bridge_bases(struct pci_bus *child);
 struct resource *pci_find_parent_resource(const struct pci_dev *dev,
 					  struct resource *res);
+u8 pci_swizzle_interrupt_pin(struct pci_dev *dev, u8 pin);
 int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge);
 extern struct pci_dev *pci_dev_get(struct pci_dev *dev);
 extern void pci_dev_put(struct pci_dev *dev);
-- 
cgit v1.2.3


From 1684f5ddd4c0c754f52c78eaa2c5c69ad09fb18c Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 1 Dec 2008 14:30:30 -0800
Subject: PCI: uninline pci_ioremap_bar()

It's too large to be inlined.

Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c   | 16 ++++++++++++++++
 include/linux/pci.h | 15 +--------------
 2 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 1fb7cff4cdae..9354dd63f035 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -56,6 +56,22 @@ unsigned char pci_bus_max_busnr(struct pci_bus* bus)
 }
 EXPORT_SYMBOL_GPL(pci_bus_max_busnr);
 
+#ifdef CONFIG_HAS_IOMEM
+void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar)
+{
+	/*
+	 * Make sure the BAR is actually a memory resource, not an IO resource
+	 */
+	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
+		WARN_ON(1);
+		return NULL;
+	}
+	return ioremap_nocache(pci_resource_start(pdev, bar),
+				     pci_resource_len(pdev, bar));
+}
+EXPORT_SYMBOL_GPL(pci_ioremap_bar);
+#endif
+
 #if 0
 /**
  * pci_max_busnr - returns maximum PCI bus number
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 58357d14f94c..0d8bc920c2e5 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1160,20 +1160,7 @@ static inline void pci_mmcfg_late_init(void) { }
 
 int pci_ext_cfg_avail(struct pci_dev *dev);
 
-#ifdef CONFIG_HAS_IOMEM
-static inline void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar)
-{
-	/*
-	 * Make sure the BAR is actually a memory resource, not an IO resource
-	 */
-	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
-		WARN_ON(1);
-		return NULL;
-	}
-	return ioremap_nocache(pci_resource_start(pdev, bar),
-				     pci_resource_len(pdev, bar));
-}
-#endif
+void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar);
 
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.3


From 14add80b5120966fe0659d61815b9e9b4b68fdc5 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yu.zhao@intel.com>
Date: Sat, 22 Nov 2008 02:38:52 +0800
Subject: PCI: remove unnecessary arg of pci_update_resource()

This cleanup removes unnecessary argument 'struct resource *res' in
pci_update_resource(), so it takes same arguments as other companion
functions (pci_assign_resource(), etc.).

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c       | 4 ++--
 drivers/pci/setup-res.c | 7 ++++---
 include/linux/pci.h     | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 9354dd63f035..c3ef2e78fc58 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -393,8 +393,8 @@ pci_restore_bars(struct pci_dev *dev)
 		return;
 	}
 
-	for (i = 0; i < numres; i ++)
-		pci_update_resource(dev, &dev->resource[i], i);
+	for (i = 0; i < numres; i++)
+		pci_update_resource(dev, i);
 }
 
 static struct pci_platform_pm_ops *pci_platform_pm;
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 4e375632499a..3c5203ff53c7 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -26,11 +26,12 @@
 #include "pci.h"
 
 
-void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno)
+void pci_update_resource(struct pci_dev *dev, int resno)
 {
 	struct pci_bus_region region;
 	u32 new, check, mask;
 	int reg;
+	struct resource *res = dev->resource + resno;
 
 	/*
 	 * Ignore resources for unimplemented BARs and unused resource slots
@@ -162,7 +163,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
 	} else {
 		res->flags &= ~IORESOURCE_STARTALIGN;
 		if (resno < PCI_BRIDGE_RESOURCES)
-			pci_update_resource(dev, res, resno);
+			pci_update_resource(dev, resno);
 	}
 
 	return ret;
@@ -197,7 +198,7 @@ int pci_assign_resource_fixed(struct pci_dev *dev, int resno)
 		dev_err(&dev->dev, "BAR %d: can't allocate %s resource %pR\n",
 			resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", res);
 	} else if (resno < PCI_BRIDGE_RESOURCES) {
-		pci_update_resource(dev, res, resno);
+		pci_update_resource(dev, resno);
 	}
 
 	return ret;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0d8bc920c2e5..c5e02f324e13 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -648,7 +648,7 @@ int pcie_get_readrq(struct pci_dev *dev);
 int pcie_set_readrq(struct pci_dev *dev, int rq);
 int pci_reset_function(struct pci_dev *dev);
 int pci_execute_reset_function(struct pci_dev *dev);
-void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno);
+void pci_update_resource(struct pci_dev *dev, int resno);
 int __must_check pci_assign_resource(struct pci_dev *dev, int i);
 int pci_select_bars(struct pci_dev *dev, unsigned long flags);
 
-- 
cgit v1.2.3


From fde09c6d8f92de0c9f75698a75f0989f2234c517 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yu.zhao@intel.com>
Date: Sat, 22 Nov 2008 02:39:32 +0800
Subject: PCI: define PCI resource names in an 'enum'

This patch moves all definitions of the PCI resource names to an 'enum',
and also replaces some hard-coded resource variables with symbol
names. This change eases introduction of device specific resources.

Reviewed-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci-sysfs.c |  4 +++-
 drivers/pci/probe.c     |  2 +-
 drivers/pci/proc.c      |  7 ++++---
 include/linux/pci.h     | 37 ++++++++++++++++++++++++-------------
 4 files changed, 32 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index d2f1354fd189..ea54cedcdfc6 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -102,11 +102,13 @@ resource_show(struct device * dev, struct device_attribute *attr, char * buf)
 	struct pci_dev * pci_dev = to_pci_dev(dev);
 	char * str = buf;
 	int i;
-	int max = 7;
+	int max;
 	resource_size_t start, end;
 
 	if (pci_dev->subordinate)
 		max = DEVICE_COUNT_RESOURCE;
+	else
+		max = PCI_BRIDGE_RESOURCES;
 
 	for (i = 0; i < max; i++) {
 		struct resource *res =  &pci_dev->resource[i];
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 5dcf2b65e3f9..e1cf5d50ed4d 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -423,7 +423,7 @@ static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent,
 	child->subordinate = 0xff;
 
 	/* Set up default resource pointers and names.. */
-	for (i = 0; i < 4; i++) {
+	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
 		child->resource[i] = &bridge->resource[PCI_BRIDGE_RESOURCES+i];
 		child->resource[i]->name = child->name;
 	}
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index 7fb086d39617..593bb844b8db 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -361,15 +361,16 @@ static int show_device(struct seq_file *m, void *v)
 			dev->vendor,
 			dev->device,
 			dev->irq);
-	/* Here should be 7 and not PCI_NUM_RESOURCES as we need to preserve compatibility */
-	for (i=0; i<7; i++) {
+
+	/* only print standard and ROM resources to preserve compatibility */
+	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
 		resource_size_t start, end;
 		pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
 		seq_printf(m, "\t%16llx",
 			(unsigned long long)(start |
 			(dev->resource[i].flags & PCI_REGION_FLAG_MASK)));
 	}
-	for (i=0; i<7; i++) {
+	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
 		resource_size_t start, end;
 		pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
 		seq_printf(m, "\t%16llx",
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c5e02f324e13..da1c22bab40e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -82,7 +82,30 @@ enum pci_mmap_state {
 #define PCI_DMA_FROMDEVICE	2
 #define PCI_DMA_NONE		3
 
-#define DEVICE_COUNT_RESOURCE	12
+/*
+ *  For PCI devices, the region numbers are assigned this way:
+ */
+enum {
+	/* #0-5: standard PCI resources */
+	PCI_STD_RESOURCES,
+	PCI_STD_RESOURCE_END = 5,
+
+	/* #6: expansion ROM resource */
+	PCI_ROM_RESOURCE,
+
+	/* resources assigned to buses behind the bridge */
+#define PCI_BRIDGE_RESOURCE_NUM 4
+
+	PCI_BRIDGE_RESOURCES,
+	PCI_BRIDGE_RESOURCE_END = PCI_BRIDGE_RESOURCES +
+				  PCI_BRIDGE_RESOURCE_NUM - 1,
+
+	/* total resources associated with a PCI device */
+	PCI_NUM_RESOURCES,
+
+	/* preserve this for compatibility */
+	DEVICE_COUNT_RESOURCE
+};
 
 typedef int __bitwise pci_power_t;
 
@@ -274,18 +297,6 @@ static inline void pci_add_saved_cap(struct pci_dev *pci_dev,
 	hlist_add_head(&new_cap->next, &pci_dev->saved_cap_space);
 }
 
-/*
- *  For PCI devices, the region numbers are assigned this way:
- *
- *	0-5	standard PCI regions
- *	6	expansion ROM
- *	7-10	bridges: address space assigned to buses behind the bridge
- */
-
-#define PCI_ROM_RESOURCE	6
-#define PCI_BRIDGE_RESOURCES	7
-#define PCI_NUM_RESOURCES	11
-
 #ifndef PCI_BUS_NUM_RESOURCES
 #define PCI_BUS_NUM_RESOURCES	16
 #endif
-- 
cgit v1.2.3


From e8c331e963c58b83db24b7d0e39e8c07f687dbc6 Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Wed, 17 Dec 2008 12:09:12 +0900
Subject: PCI hotplug: introduce functions for ACPI slot detection

Some ACPI related PCI hotplug code can be shared among PCI hotplug
drivers. This patch introduces the following functions in
drivers/pci/hotplug/acpi_pcihp.c to share the code, and changes
acpiphp and pciehp to use them.

- int acpi_pci_detect_ejectable(struct pci_bus *pbus)
  This checks if the specified PCI bus has ejectable slots.

- int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle)
  This checks if the specified handle is ejectable ACPI PCI slot. The
  'pbus' parameter is needed to check if 'handle' is PCI related ACPI
  object.

This patch also introduces the following inline function in
include/linux/pci-acpi.h, which is useful to get ACPI handle of the
PCI bridge from struct pci_bus of the bridge's secondary bus.

- static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
  This returns ACPI handle of the PCI bridge which generates PCI bus
  specified by 'pbus'.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/acpi_pcihp.c   |  69 ++++++++++++++++++++++++
 drivers/pci/hotplug/acpiphp_glue.c | 107 ++++++-------------------------------
 drivers/pci/hotplug/pciehp_acpi.c  |  55 ++-----------------
 include/linux/pci-acpi.h           |   9 ++++
 include/linux/pci_hotplug.h        |   2 +
 5 files changed, 100 insertions(+), 142 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index e17ef54f0efc..c62ab8d240aa 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -501,5 +501,74 @@ int acpi_root_bridge(acpi_handle handle)
 }
 EXPORT_SYMBOL_GPL(acpi_root_bridge);
 
+
+static int is_ejectable(acpi_handle handle)
+{
+	acpi_status status;
+	acpi_handle tmp;
+	unsigned long long removable;
+	status = acpi_get_handle(handle, "_ADR", &tmp);
+	if (ACPI_FAILURE(status))
+		return 0;
+	status = acpi_get_handle(handle, "_EJ0", &tmp);
+	if (ACPI_SUCCESS(status))
+		return 1;
+	status = acpi_evaluate_integer(handle, "_RMV", NULL, &removable);
+	if (ACPI_SUCCESS(status) && removable)
+		return 1;
+	return 0;
+}
+
+/**
+ * acpi_pcihp_check_ejectable - check if handle is ejectable ACPI PCI slot
+ * @pbus: the PCI bus of the PCI slot corresponding to 'handle'
+ * @handle: ACPI handle to check
+ *
+ * Return 1 if handle is ejectable PCI slot, 0 otherwise.
+ */
+int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle)
+{
+	acpi_handle bridge_handle, parent_handle;
+
+	if (!(bridge_handle = acpi_pci_get_bridge_handle(pbus)))
+		return 0;
+	if ((ACPI_FAILURE(acpi_get_parent(handle, &parent_handle))))
+		return 0;
+	if (bridge_handle != parent_handle)
+		return 0;
+	return is_ejectable(handle);
+}
+EXPORT_SYMBOL_GPL(acpi_pci_check_ejectable);
+
+static acpi_status
+check_hotplug(acpi_handle handle, u32 lvl, void *context, void **rv)
+{
+	int *found = (int *)context;
+	if (is_ejectable(handle)) {
+		*found = 1;
+		return AE_CTRL_TERMINATE;
+	}
+	return AE_OK;
+}
+
+/**
+ * acpi_pci_detect_ejectable - check if the PCI bus has ejectable slots
+ * @pbus - PCI bus to scan
+ *
+ * Returns 1 if the PCI bus has ACPI based ejectable slots, 0 otherwise.
+ */
+int acpi_pci_detect_ejectable(struct pci_bus *pbus)
+{
+	acpi_handle handle;
+	int found = 0;
+
+	if (!(handle = acpi_pci_get_bridge_handle(pbus)))
+		return 0;
+	acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1,
+			    check_hotplug, (void *)&found, NULL);
+	return found;
+}
+EXPORT_SYMBOL_GPL(acpi_pci_detect_ejectable);
+
 module_param(debug_acpi, bool, 0644);
 MODULE_PARM_DESC(debug_acpi, "Debugging mode for ACPI enabled or not");
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 7a5760426897..f09b1010d477 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -46,6 +46,7 @@
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/pci_hotplug.h>
+#include <linux/pci-acpi.h>
 #include <linux/mutex.h>
 
 #include "../pci.h"
@@ -62,68 +63,6 @@ static void acpiphp_sanitize_bus(struct pci_bus *bus);
 static void acpiphp_set_hpp_values(acpi_handle handle, struct pci_bus *bus);
 static void handle_hotplug_event_func(acpi_handle handle, u32 type, void *context);
 
-
-/*
- * initialization & terminatation routines
- */
-
-/**
- * is_ejectable - determine if a slot is ejectable
- * @handle: handle to acpi namespace
- *
- * Ejectable slot should satisfy at least these conditions:
- *
- *  1. has _ADR method
- *  2. has _EJ0 method or _RMV method
- *
- * optionally
- *
- *  1. has _STA method
- *  2. has _PS0 method
- *  3. has _PS3 method
- *  4. ..
- */
-static int is_ejectable(acpi_handle handle)
-{
-	acpi_status status;
-	acpi_handle tmp;
-	unsigned long long removable;
-
-	status = acpi_get_handle(handle, "_ADR", &tmp);
-	if (ACPI_FAILURE(status))
-		return 0;
-
-	status = acpi_get_handle(handle, "_EJ0", &tmp);
-	if (ACPI_SUCCESS(status))
-		return 1;
-
-	status = acpi_get_handle(handle, "_RMV", &tmp);
-	if (ACPI_SUCCESS(status)) {
-		status = acpi_evaluate_integer(handle, "_RMV", NULL,
-					       &removable);
-		if (ACPI_SUCCESS(status) && removable)
-			return 1;
-	}
-
-	return 0;
-}
-
-
-/* callback routine to check for the existence of ejectable slots */
-static acpi_status
-is_ejectable_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
-{
-	int *count = (int *)context;
-
-	if (is_ejectable(handle)) {
-		(*count)++;
-		/* only one ejectable slot is enough */
-		return AE_CTRL_TERMINATE;
-	} else {
-		return AE_OK;
-	}
-}
-
 /* callback routine to check for the existence of a pci dock device */
 static acpi_status
 is_pci_dock_device(acpi_handle handle, u32 lvl, void *context, void **rv)
@@ -138,9 +77,6 @@ is_pci_dock_device(acpi_handle handle, u32 lvl, void *context, void **rv)
 	}
 }
 
-
-
-
 /*
  * the _DCK method can do funny things... and sometimes not
  * hah-hah funny.
@@ -191,8 +127,9 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
 	acpi_status status = AE_OK;
 	unsigned long long adr, sun;
 	int device, function, retval;
+	struct pci_bus *pbus = bridge->pci_bus;
 
-	if (!is_ejectable(handle) && !is_dock_device(handle))
+	if (!acpi_pci_check_ejectable(pbus, handle) && !is_dock_device(handle))
 		return AE_OK;
 
 	acpi_evaluate_integer(handle, "_ADR", NULL, &adr);
@@ -258,8 +195,7 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
 		bridge->nr_slots++;
 
 		dbg("found ACPI PCI Hotplug slot %llu at PCI %04x:%02x:%02x\n",
-				slot->sun, pci_domain_nr(bridge->pci_bus),
-				bridge->pci_bus->number, slot->device);
+		    slot->sun, pci_domain_nr(pbus), pbus->number, device);
 		retval = acpiphp_register_hotplug_slot(slot);
 		if (retval) {
 			if (retval == -EBUSY)
@@ -276,8 +212,7 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
 	list_add_tail(&newfunc->sibling, &slot->funcs);
 
 	/* associate corresponding pci_dev */
-	newfunc->pci_dev = pci_get_slot(bridge->pci_bus,
-					 PCI_DEVFN(device, function));
+	newfunc->pci_dev = pci_get_slot(pbus, PCI_DEVFN(device, function));
 	if (newfunc->pci_dev) {
 		slot->flags |= (SLOT_ENABLED | SLOT_POWEREDON);
 	}
@@ -326,27 +261,15 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
 
 
 /* see if it's worth looking at this bridge */
-static int detect_ejectable_slots(acpi_handle *bridge_handle)
+static int detect_ejectable_slots(struct pci_bus *pbus)
 {
-	acpi_status status;
-	int count;
-
-	count = 0;
-
-	/* only check slots defined directly below bridge object */
-	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, bridge_handle, (u32)1,
-				     is_ejectable_slot, (void *)&count, NULL);
-
-	/*
-	 * we also need to add this bridge if there is a dock bridge or
-	 * other pci device on a dock station (removable)
-	 */
-	if (!count)
-		status = acpi_walk_namespace(ACPI_TYPE_DEVICE, bridge_handle,
-				(u32)1, is_pci_dock_device, (void *)&count,
-				NULL);
-
-	return count;
+	int found = acpi_pci_detect_ejectable(pbus);
+	if (!found) {
+		acpi_handle bridge_handle = acpi_pci_get_bridge_handle(pbus);
+		acpi_walk_namespace(ACPI_TYPE_DEVICE, bridge_handle, (u32)1,
+				    is_pci_dock_device, (void *)&found, NULL);
+	}
+	return found;
 }
 
 
@@ -556,7 +479,7 @@ find_p2p_bridge(acpi_handle handle, u32 lvl, void *context, void **rv)
 		goto out;
 
 	/* check if this bridge has ejectable slots */
-	if ((detect_ejectable_slots(handle) > 0)) {
+	if ((detect_ejectable_slots(dev->subordinate) > 0)) {
 		dbg("found PCI-to-PCI bridge at PCI %s\n", pci_name(dev));
 		add_p2p_bridge(handle, dev);
 	}
@@ -617,7 +540,7 @@ static int add_bridge(acpi_handle handle)
 	}
 
 	/* check if this bridge has ejectable slots */
-	if (detect_ejectable_slots(handle) > 0) {
+	if (detect_ejectable_slots(pci_bus) > 0) {
 		dbg("found PCI host-bus bridge with hot-pluggable slots\n");
 		add_host_bridge(handle, pci_bus);
 	}
diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c
index 88a5c57f2e5b..438d795f9fe3 100644
--- a/drivers/pci/hotplug/pciehp_acpi.c
+++ b/drivers/pci/hotplug/pciehp_acpi.c
@@ -24,6 +24,8 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include "pciehp.h"
 
 #define PCIEHP_DETECT_PCIE	(0)
@@ -41,59 +43,11 @@ MODULE_PARM_DESC(pciehp_detect_mode,
 	 "  auto(default) - Auto select mode. Use acpi option if duplicate\n"
 	 "                  slot ids are found. Otherwise, use pcie option\n");
 
-static int is_ejectable(acpi_handle handle)
-{
-	acpi_status status;
-	acpi_handle tmp;
-	unsigned long long removable;
-	status = acpi_get_handle(handle, "_ADR", &tmp);
-	if (ACPI_FAILURE(status))
-		return 0;
-	status = acpi_get_handle(handle, "_EJ0", &tmp);
-	if (ACPI_SUCCESS(status))
-		return 1;
-	status = acpi_evaluate_integer(handle, "_RMV", NULL, &removable);
-	if (ACPI_SUCCESS(status) && removable)
-		return 1;
-	return 0;
-}
-
-static acpi_status
-check_hotplug(acpi_handle handle, u32 lvl, void *context, void **rv)
-{
-	int *found = (int *)context;
-	if (is_ejectable(handle)) {
-		*found = 1;
-		return AE_CTRL_TERMINATE;
-	}
-	return AE_OK;
-}
-
-static int pciehp_detect_acpi_slot(struct pci_bus *pbus)
-{
-	acpi_handle handle;
-	struct pci_dev *pdev = pbus->self;
-	int found = 0;
-
-	if (!pdev){
-		int seg = pci_domain_nr(pbus), busnr = pbus->number;
-		handle = acpi_get_pci_rootbridge_handle(seg, busnr);
-	} else
-		handle = DEVICE_ACPI_HANDLE(&(pdev->dev));
-
-	if (!handle)
-		return 0;
-
-	acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1,
-			    check_hotplug, (void *)&found, NULL);
-	return found;
-}
-
 int pciehp_acpi_slot_detection_check(struct pci_dev *dev)
 {
 	if (slot_detection_mode != PCIEHP_DETECT_ACPI)
 		return 0;
-	if (pciehp_detect_acpi_slot(dev->subordinate))
+	if (acpi_pci_detect_ejectable(dev->subordinate))
 		return 0;
 	return -ENODEV;
 }
@@ -135,6 +89,7 @@ static int __init dummy_probe(struct pcie_device *dev,
 	u32 slot_cap;
 	struct slot *slot, *tmp;
 	struct pci_dev *pdev = dev->port;
+	struct pci_bus *pbus = pdev->subordinate;
 	if (!(slot = kzalloc(sizeof(*slot), GFP_KERNEL)))
 		return -ENOMEM;
 	/* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */
@@ -149,7 +104,7 @@ static int __init dummy_probe(struct pcie_device *dev,
 			dup_slot_id++;
 	}
 	list_add_tail(&slot->slot_list, &dummy_slots);
-	if (!acpi_slot_detected && pciehp_detect_acpi_slot(pdev->subordinate))
+	if (!acpi_slot_detected && acpi_pci_detect_ejectable(pbus))
 		acpi_slot_detected = 1;
 	return -ENODEV;         /* dummy driver always returns error */
 }
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 871e096e0fbc..042c166f65d5 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -60,6 +60,15 @@ static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 	return acpi_get_pci_rootbridge_handle(pci_domain_nr(pdev->bus),
 			pdev->bus->number);
 }
+
+static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
+{
+	int seg = pci_domain_nr(pbus), busnr = pbus->number;
+	struct pci_dev *bridge = pbus->self;
+	if (bridge)
+		return DEVICE_ACPI_HANDLE(&(bridge->dev));
+	return acpi_get_pci_rootbridge_handle(seg, busnr);
+}
 #else
 #if !defined(AE_ERROR)
 typedef u32 		acpi_status;
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index a00bd1a0f156..f7cc204fab07 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -228,6 +228,8 @@ extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
 				struct hotplug_params *hpp);
 int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags);
 int acpi_root_bridge(acpi_handle handle);
+int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle);
+int acpi_pci_detect_ejectable(struct pci_bus *pbus);
 #endif
 #endif
 
-- 
cgit v1.2.3


From 68feac87de15edfc2c700d2d81b814288c93d003 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Tue, 16 Dec 2008 21:36:55 -0700
Subject: PCI: add pci_common_swizzle() for INTx swizzling

This patch adds pci_common_swizzle(), which swizzles INTx values all the
way up to a root bridge.

This common implementation can replace several architecture-specific
ones.  This should someday be combined with pci_get_interrupt_pin(),
but I left it separate for now to make reviewing easier.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c   | 20 ++++++++++++++++++++
 include/linux/pci.h |  1 +
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 9d2aa6366fd0..c824dc8d617c 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1421,6 +1421,26 @@ pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge)
 	return pin;
 }
 
+/**
+ * pci_common_swizzle - swizzle INTx all the way to root bridge
+ * @dev: the PCI device
+ * @pinp: pointer to the INTx pin value (1=INTA, 2=INTB, 3=INTD, 4=INTD)
+ *
+ * Perform INTx swizzling for a device.  This traverses through all PCI-to-PCI
+ * bridges all the way up to a PCI root bus.
+ */
+u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp)
+{
+	u8 pin = *pinp;
+
+	while (dev->bus->self) {
+		pin = pci_swizzle_interrupt_pin(dev, pin);
+		dev = dev->bus->self;
+	}
+	*pinp = pin;
+	return PCI_SLOT(dev->devfn);
+}
+
 /**
  *	pci_release_region - Release a PCI bar
  *	@pdev: PCI device whose resources were previously reserved by pci_request_region
diff --git a/include/linux/pci.h b/include/linux/pci.h
index da1c22bab40e..170f9ae2d8a0 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -545,6 +545,7 @@ struct resource *pci_find_parent_resource(const struct pci_dev *dev,
 					  struct resource *res);
 u8 pci_swizzle_interrupt_pin(struct pci_dev *dev, u8 pin);
 int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge);
+u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp);
 extern struct pci_dev *pci_dev_get(struct pci_dev *dev);
 extern void pci_dev_put(struct pci_dev *dev);
 extern void pci_remove_bus(struct pci_bus *b);
-- 
cgit v1.2.3


From 287d19ce2e67c15e79a187b3bdcbbea1a0a51a7d Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 18 Dec 2008 09:17:16 -0800
Subject: PCI: revise VPD access interface

Change PCI VPD API which was only used by sysfs to something usable
in drivers.
   * move iteration over multiple words to the low level
   * use conventional types for arguments
   * add exportable wrapper

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/access.c    | 156 ++++++++++++++++++++++++++++++------------------
 drivers/pci/pci-sysfs.c |  38 +++---------
 drivers/pci/pci.h       |   6 +-
 include/linux/pci.h     |   4 ++
 4 files changed, 114 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 98ddba94b5b9..86ec4ad44bcd 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -66,6 +66,39 @@ EXPORT_SYMBOL(pci_bus_write_config_byte);
 EXPORT_SYMBOL(pci_bus_write_config_word);
 EXPORT_SYMBOL(pci_bus_write_config_dword);
 
+
+/**
+ * pci_read_vpd - Read one entry from Vital Product Data
+ * @dev:	pci device struct
+ * @pos:	offset in vpd space
+ * @count:	number of bytes to read
+ * @buf:	pointer to where to store result
+ *
+ */
+ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf)
+{
+	if (!dev->vpd || !dev->vpd->ops)
+		return -ENODEV;
+	return dev->vpd->ops->read(dev, pos, count, buf);
+}
+EXPORT_SYMBOL(pci_read_vpd);
+
+/**
+ * pci_write_vpd - Write entry to Vital Product Data
+ * @dev:	pci device struct
+ * @pos:	offset in vpd space
+ * @count:	number of bytes to read
+ * @val:	value to write
+ *
+ */
+ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf)
+{
+	if (!dev->vpd || !dev->vpd->ops)
+		return -ENODEV;
+	return dev->vpd->ops->write(dev, pos, count, buf);
+}
+EXPORT_SYMBOL(pci_write_vpd);
+
 /*
  * The following routines are to prevent the user from accessing PCI config
  * space when it's unsafe to do so.  Some devices require this during BIST and
@@ -176,19 +209,17 @@ static int pci_vpd_pci22_wait(struct pci_dev *dev)
 	}
 }
 
-static int pci_vpd_pci22_read(struct pci_dev *dev, int pos, int size,
-			      char *buf)
+static ssize_t pci_vpd_pci22_read(struct pci_dev *dev, loff_t pos, size_t count,
+				  void *arg)
 {
 	struct pci_vpd_pci22 *vpd =
 		container_of(dev->vpd, struct pci_vpd_pci22, base);
-	u32 val;
-	int ret = 0;
-	int begin, end, i;
+	int ret;
+	loff_t end = pos + count;
+	u8 *buf = arg;
 
-	if (pos < 0 || pos > vpd->base.len || size > vpd->base.len  - pos)
+	if (pos < 0 || pos > vpd->base.len || end > vpd->base.len)
 		return -EINVAL;
-	if (size == 0)
-		return 0;
 
 	if (mutex_lock_killable(&vpd->lock))
 		return -EINTR;
@@ -196,73 +227,84 @@ static int pci_vpd_pci22_read(struct pci_dev *dev, int pos, int size,
 	ret = pci_vpd_pci22_wait(dev);
 	if (ret < 0)
 		goto out;
-	ret = pci_user_write_config_word(dev, vpd->cap + PCI_VPD_ADDR,
-					 pos & ~3);
-	if (ret < 0)
-		goto out;
 
-	vpd->busy = true;
-	vpd->flag = PCI_VPD_ADDR_F;
-	ret = pci_vpd_pci22_wait(dev);
-	if (ret < 0)
-		goto out;
-	ret = pci_user_read_config_dword(dev, vpd->cap + PCI_VPD_DATA,
-					 &val);
+	while (pos < end) {
+		u32 val;
+		unsigned int i, skip;
+
+		ret = pci_user_write_config_word(dev, vpd->cap + PCI_VPD_ADDR,
+						 pos & ~3);
+		if (ret < 0)
+			break;
+		vpd->busy = true;
+		vpd->flag = PCI_VPD_ADDR_F;
+		ret = pci_vpd_pci22_wait(dev);
+		if (ret < 0)
+			break;
+
+		ret = pci_user_read_config_dword(dev, vpd->cap + PCI_VPD_DATA, &val);
+		if (ret < 0)
+			break;
+
+		skip = pos & 3;
+		for (i = 0;  i < sizeof(u32); i++) {
+			if (i >= skip) {
+				*buf++ = val;
+				if (++pos == end)
+					break;
+			}
+			val >>= 8;
+		}
+	}
 out:
 	mutex_unlock(&vpd->lock);
-	if (ret < 0)
-		return ret;
-
-	/* Convert to bytes */
-	begin = pos & 3;
-	end = min(4, begin + size);
-	for (i = 0; i < end; ++i) {
-		if (i >= begin)
-			*buf++ = val;
-		val >>= 8;
-	}
-	return end - begin;
+	return ret ? ret : count;
 }
 
-static int pci_vpd_pci22_write(struct pci_dev *dev, int pos, int size,
-			       const char *buf)
+static ssize_t pci_vpd_pci22_write(struct pci_dev *dev, loff_t pos, size_t count,
+				   const void *arg)
 {
 	struct pci_vpd_pci22 *vpd =
 		container_of(dev->vpd, struct pci_vpd_pci22, base);
-	u32 val;
+	const u8 *buf = arg;
+	loff_t end = pos + count;
 	int ret = 0;
 
-	if (pos < 0 || pos > vpd->base.len || pos & 3 ||
-	    size > vpd->base.len - pos || size < 4)
+	if (pos < 0 || (pos & 3) || (count & 3) || end > vpd->base.len)
 		return -EINVAL;
 
-	val = (u8) *buf++;
-	val |= ((u8) *buf++) << 8;
-	val |= ((u8) *buf++) << 16;
-	val |= ((u32)(u8) *buf++) << 24;
-
 	if (mutex_lock_killable(&vpd->lock))
 		return -EINTR;
+
 	ret = pci_vpd_pci22_wait(dev);
 	if (ret < 0)
 		goto out;
-	ret = pci_user_write_config_dword(dev, vpd->cap + PCI_VPD_DATA,
-					  val);
-	if (ret < 0)
-		goto out;
-	ret = pci_user_write_config_word(dev, vpd->cap + PCI_VPD_ADDR,
-					 pos | PCI_VPD_ADDR_F);
-	if (ret < 0)
-		goto out;
-	vpd->busy = true;
-	vpd->flag = 0;
-	ret = pci_vpd_pci22_wait(dev);
+
+	while (pos < end) {
+		u32 val;
+
+		val = *buf++;
+		val |= *buf++ << 8;
+		val |= *buf++ << 16;
+		val |= *buf++ << 24;
+
+		ret = pci_user_write_config_dword(dev, vpd->cap + PCI_VPD_DATA, val);
+		if (ret < 0)
+			break;
+		ret = pci_user_write_config_word(dev, vpd->cap + PCI_VPD_ADDR,
+						 pos | PCI_VPD_ADDR_F);
+		if (ret < 0)
+			break;
+
+		vpd->busy = true;
+		vpd->flag = 0;
+		ret = pci_vpd_pci22_wait(dev);
+
+		pos += sizeof(u32);
+	}
 out:
 	mutex_unlock(&vpd->lock);
-	if (ret < 0)
-		return ret;
-
-	return 4;
+	return ret ? ret : count;
 }
 
 static void pci_vpd_pci22_release(struct pci_dev *dev)
@@ -270,7 +312,7 @@ static void pci_vpd_pci22_release(struct pci_dev *dev)
 	kfree(container_of(dev->vpd, struct pci_vpd_pci22, base));
 }
 
-static struct pci_vpd_ops pci_vpd_pci22_ops = {
+static const struct pci_vpd_ops pci_vpd_pci22_ops = {
 	.read = pci_vpd_pci22_read,
 	.write = pci_vpd_pci22_write,
 	.release = pci_vpd_pci22_release,
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index ea54cedcdfc6..c23619fb6c4b 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -371,55 +371,33 @@ pci_write_config(struct kobject *kobj, struct bin_attribute *bin_attr,
 }
 
 static ssize_t
-pci_read_vpd(struct kobject *kobj, struct bin_attribute *bin_attr,
-	     char *buf, loff_t off, size_t count)
+read_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr,
+	      char *buf, loff_t off, size_t count)
 {
 	struct pci_dev *dev =
 		to_pci_dev(container_of(kobj, struct device, kobj));
-	int end;
-	int ret;
 
 	if (off > bin_attr->size)
 		count = 0;
 	else if (count > bin_attr->size - off)
 		count = bin_attr->size - off;
-	end = off + count;
-
-	while (off < end) {
-		ret = dev->vpd->ops->read(dev, off, end - off, buf);
-		if (ret < 0)
-			return ret;
-		buf += ret;
-		off += ret;
-	}
 
-	return count;
+	return pci_read_vpd(dev, off, count, buf);
 }
 
 static ssize_t
-pci_write_vpd(struct kobject *kobj, struct bin_attribute *bin_attr,
-	      char *buf, loff_t off, size_t count)
+write_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr,
+	       char *buf, loff_t off, size_t count)
 {
 	struct pci_dev *dev =
 		to_pci_dev(container_of(kobj, struct device, kobj));
-	int end;
-	int ret;
 
 	if (off > bin_attr->size)
 		count = 0;
 	else if (count > bin_attr->size - off)
 		count = bin_attr->size - off;
-	end = off + count;
-
-	while (off < end) {
-		ret = dev->vpd->ops->write(dev, off, end - off, buf);
-		if (ret < 0)
-			return ret;
-		buf += ret;
-		off += ret;
-	}
 
-	return count;
+	return pci_write_vpd(dev, off, count, buf);
 }
 
 #ifdef HAVE_PCI_LEGACY
@@ -845,8 +823,8 @@ static int pci_create_capabilities_sysfs(struct pci_dev *dev)
 		attr->size = dev->vpd->len;
 		attr->attr.name = "vpd";
 		attr->attr.mode = S_IRUSR | S_IWUSR;
-		attr->read = pci_read_vpd;
-		attr->write = pci_write_vpd;
+		attr->read = read_vpd_attr;
+		attr->write = write_vpd_attr;
 		retval = sysfs_create_bin_file(&dev->dev.kobj, attr);
 		if (retval) {
 			kfree(dev->vpd->attr);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 65deed8bfc06..211fd418f48f 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -56,14 +56,14 @@ extern int pci_user_write_config_word(struct pci_dev *dev, int where, u16 val);
 extern int pci_user_write_config_dword(struct pci_dev *dev, int where, u32 val);
 
 struct pci_vpd_ops {
-	int (*read)(struct pci_dev *dev, int pos, int size, char *buf);
-	int (*write)(struct pci_dev *dev, int pos, int size, const char *buf);
+	ssize_t (*read)(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
+	ssize_t (*write)(struct pci_dev *dev, loff_t pos, size_t count, const void *buf);
 	void (*release)(struct pci_dev *dev);
 };
 
 struct pci_vpd {
 	unsigned int len;
-	struct pci_vpd_ops *ops;
+	const struct pci_vpd_ops *ops;
 	struct bin_attribute *attr; /* descriptor for sysfs VPD entry */
 };
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 170f9ae2d8a0..76079e106895 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -687,6 +687,10 @@ int pci_back_from_sleep(struct pci_dev *dev);
 /* Functions for PCI Hotplug drivers to use */
 int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap);
 
+/* Vital product data routines */
+ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
+ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf);
+
 /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
 void pci_bus_assign_resources(struct pci_bus *bus);
 void pci_bus_size_bridges(struct pci_bus *bus);
-- 
cgit v1.2.3


From db5679437a2b938c9127480a3923633721583a4f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 18 Dec 2008 09:17:16 -0800
Subject: PCI: add interface to set visible size of VPD

The VPD on all devices may not be 32K. Unfortunately, there is no
generic way to find the size, so this adds a simple API hook
to reset it.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/access.c | 23 +++++++++++++++++++++++
 include/linux/pci.h  |  1 +
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 86ec4ad44bcd..381444794778 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -339,6 +339,29 @@ int pci_vpd_pci22_init(struct pci_dev *dev)
 	return 0;
 }
 
+/**
+ * pci_vpd_truncate - Set available Vital Product Data size
+ * @dev:	pci device struct
+ * @size:	available memory in bytes
+ *
+ * Adjust size of available VPD area.
+ */
+int pci_vpd_truncate(struct pci_dev *dev, size_t size)
+{
+	if (!dev->vpd)
+		return -EINVAL;
+
+	/* limited by the access method */
+	if (size > dev->vpd->len)
+		return -EINVAL;
+
+	dev->vpd->len = size;
+	dev->vpd->attr->size = size;
+
+	return 0;
+}
+EXPORT_SYMBOL(pci_vpd_truncate);
+
 /**
  * pci_block_user_cfg_access - Block userspace PCI config reads/writes
  * @dev:	pci device struct
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 76079e106895..7cbecef19bb6 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -690,6 +690,7 @@ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap);
 /* Vital product data routines */
 ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
 ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf);
+int pci_vpd_truncate(struct pci_dev *dev, size_t size);
 
 /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
 void pci_bus_assign_resources(struct pci_bus *bus);
-- 
cgit v1.2.3


From 322162a71bd9fc4edb1b11236e7bc8aa27ccac22 Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Fri, 19 Dec 2008 15:19:02 +0900
Subject: PCI: pciehp: cleanup register and field definitions

Clean up register definitions related to PCI Express Hot plug.

  - Add register definitions into include/linux/pci_regs.h, and use
    them instead of pciehp's locally definied register definitions.
  - Remove pciehp's locally defined register definitions
  - Remove unused register definitions in pciehp.
  - Some minor cleanups.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/pciehp_hpc.c | 328 ++++++++++++---------------------------
 include/linux/pci_regs.h         |  64 +++++++-
 2 files changed, 156 insertions(+), 236 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 22b88cb17a07..71a8012886b0 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -42,42 +42,6 @@
 
 static atomic_t pciehp_num_controllers = ATOMIC_INIT(0);
 
-struct ctrl_reg {
-	u8 cap_id;
-	u8 nxt_ptr;
-	u16 cap_reg;
-	u32 dev_cap;
-	u16 dev_ctrl;
-	u16 dev_status;
-	u32 lnk_cap;
-	u16 lnk_ctrl;
-	u16 lnk_status;
-	u32 slot_cap;
-	u16 slot_ctrl;
-	u16 slot_status;
-	u16 root_ctrl;
-	u16 rsvp;
-	u32 root_status;
-} __attribute__ ((packed));
-
-/* offsets to the controller registers based on the above structure layout */
-enum ctrl_offsets {
-	PCIECAPID	=	offsetof(struct ctrl_reg, cap_id),
-	NXTCAPPTR	=	offsetof(struct ctrl_reg, nxt_ptr),
-	CAPREG		=	offsetof(struct ctrl_reg, cap_reg),
-	DEVCAP		=	offsetof(struct ctrl_reg, dev_cap),
-	DEVCTRL		=	offsetof(struct ctrl_reg, dev_ctrl),
-	DEVSTATUS	=	offsetof(struct ctrl_reg, dev_status),
-	LNKCAP		=	offsetof(struct ctrl_reg, lnk_cap),
-	LNKCTRL		=	offsetof(struct ctrl_reg, lnk_ctrl),
-	LNKSTATUS	=	offsetof(struct ctrl_reg, lnk_status),
-	SLOTCAP		=	offsetof(struct ctrl_reg, slot_cap),
-	SLOTCTRL	=	offsetof(struct ctrl_reg, slot_ctrl),
-	SLOTSTATUS	=	offsetof(struct ctrl_reg, slot_status),
-	ROOTCTRL	=	offsetof(struct ctrl_reg, root_ctrl),
-	ROOTSTATUS	=	offsetof(struct ctrl_reg, root_status),
-};
-
 static inline int pciehp_readw(struct controller *ctrl, int reg, u16 *value)
 {
 	struct pci_dev *dev = ctrl->pci_dev;
@@ -102,95 +66,9 @@ static inline int pciehp_writel(struct controller *ctrl, int reg, u32 value)
 	return pci_write_config_dword(dev, ctrl->cap_base + reg, value);
 }
 
-/* Field definitions in PCI Express Capabilities Register */
-#define CAP_VER			0x000F
-#define DEV_PORT_TYPE		0x00F0
-#define SLOT_IMPL		0x0100
-#define MSG_NUM			0x3E00
-
-/* Device or Port Type */
-#define NAT_ENDPT		0x00
-#define LEG_ENDPT		0x01
-#define ROOT_PORT		0x04
-#define UP_STREAM		0x05
-#define	DN_STREAM		0x06
-#define PCIE_PCI_BRDG		0x07
-#define PCI_PCIE_BRDG		0x10
-
-/* Field definitions in Device Capabilities Register */
-#define DATTN_BUTTN_PRSN	0x1000
-#define DATTN_LED_PRSN		0x2000
-#define DPWR_LED_PRSN		0x4000
-
-/* Field definitions in Link Capabilities Register */
-#define MAX_LNK_SPEED		0x000F
-#define MAX_LNK_WIDTH		0x03F0
-#define LINK_ACTIVE_REPORTING	0x00100000
-
-/* Link Width Encoding */
-#define LNK_X1		0x01
-#define LNK_X2		0x02
-#define LNK_X4		0x04
-#define LNK_X8		0x08
-#define LNK_X12		0x0C
-#define LNK_X16		0x10
-#define LNK_X32		0x20
-
-/*Field definitions of Link Status Register */
-#define LNK_SPEED	0x000F
-#define NEG_LINK_WD	0x03F0
-#define LNK_TRN_ERR	0x0400
-#define	LNK_TRN		0x0800
-#define SLOT_CLK_CONF	0x1000
-#define LINK_ACTIVE	0x2000
-
-/* Field definitions in Slot Capabilities Register */
-#define ATTN_BUTTN_PRSN	0x00000001
-#define	PWR_CTRL_PRSN	0x00000002
-#define MRL_SENS_PRSN	0x00000004
-#define ATTN_LED_PRSN	0x00000008
-#define PWR_LED_PRSN	0x00000010
-#define HP_SUPR_RM_SUP	0x00000020
-#define HP_CAP		0x00000040
-#define SLOT_PWR_VALUE	0x000003F8
-#define SLOT_PWR_LIMIT	0x00000C00
-#define PSN		0xFFF80000	/* PSN: Physical Slot Number */
-
-/* Field definitions in Slot Control Register */
-#define ATTN_BUTTN_ENABLE		0x0001
-#define PWR_FAULT_DETECT_ENABLE		0x0002
-#define MRL_DETECT_ENABLE		0x0004
-#define PRSN_DETECT_ENABLE		0x0008
-#define CMD_CMPL_INTR_ENABLE		0x0010
-#define HP_INTR_ENABLE			0x0020
-#define ATTN_LED_CTRL			0x00C0
-#define PWR_LED_CTRL			0x0300
-#define PWR_CTRL			0x0400
-#define EMI_CTRL			0x0800
-
-/* Attention indicator and Power indicator states */
-#define LED_ON		0x01
-#define LED_BLINK	0x10
-#define LED_OFF		0x11
-
 /* Power Control Command */
 #define POWER_ON	0
-#define POWER_OFF	0x0400
-
-/* EMI Status defines */
-#define EMI_DISENGAGED	0
-#define EMI_ENGAGED	1
-
-/* Field definitions in Slot Status Register */
-#define ATTN_BUTTN_PRESSED	0x0001
-#define PWR_FAULT_DETECTED	0x0002
-#define MRL_SENS_CHANGED	0x0004
-#define PRSN_DETECT_CHANGED	0x0008
-#define CMD_COMPLETED		0x0010
-#define MRL_STATE		0x0020
-#define PRSN_STATE		0x0040
-#define EMI_STATE		0x0080
-#define EMI_STATUS_BIT		7
+#define POWER_OFF	PCI_EXP_SLTCTL_PCC
 
 static irqreturn_t pcie_isr(int irq, void *dev_id);
 static void start_int_poll_timer(struct controller *ctrl, int sec);
@@ -253,22 +131,20 @@ static inline void pciehp_free_irq(struct controller *ctrl)
 static int pcie_poll_cmd(struct controller *ctrl)
 {
 	u16 slot_status;
-	int timeout = 1000;
+	int err, timeout = 1000;
 
-	if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) {
-		if (slot_status & CMD_COMPLETED) {
-			pciehp_writew(ctrl, SLOTSTATUS, CMD_COMPLETED);
-			return 1;
-		}
+	err = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status);
+	if (!err && (slot_status & PCI_EXP_SLTSTA_CC)) {
+		pciehp_writew(ctrl, PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_CC);
+		return 1;
 	}
 	while (timeout > 0) {
 		msleep(10);
 		timeout -= 10;
-		if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) {
-			if (slot_status & CMD_COMPLETED) {
-				pciehp_writew(ctrl, SLOTSTATUS, CMD_COMPLETED);
-				return 1;
-			}
+		err = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status);
+		if (!err && (slot_status & PCI_EXP_SLTSTA_CC)) {
+			pciehp_writew(ctrl, PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_CC);
+			return 1;
 		}
 	}
 	return 0;	/* timeout */
@@ -302,14 +178,14 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
 
 	mutex_lock(&ctrl->ctrl_lock);
 
-	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
+	retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status);
 	if (retval) {
 		ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
 			 __func__);
 		goto out;
 	}
 
-	if (slot_status & CMD_COMPLETED) {
+	if (slot_status & PCI_EXP_SLTSTA_CC) {
 		if (!ctrl->no_cmd_complete) {
 			/*
 			 * After 1 sec and CMD_COMPLETED still not set, just
@@ -332,7 +208,7 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
 		}
 	}
 
-	retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
+	retval = pciehp_readw(ctrl, PCI_EXP_SLTCTL, &slot_ctrl);
 	if (retval) {
 		ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
 		goto out;
@@ -342,7 +218,7 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
 	slot_ctrl |= (cmd & mask);
 	ctrl->cmd_busy = 1;
 	smp_mb();
-	retval = pciehp_writew(ctrl, SLOTCTRL, slot_ctrl);
+	retval = pciehp_writew(ctrl, PCI_EXP_SLTCTL, slot_ctrl);
 	if (retval)
 		ctrl_err(ctrl, "Cannot write to SLOTCTRL register\n");
 
@@ -356,8 +232,8 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
 		 * completed interrupt is not enabled, we need to poll
 		 * command completed event.
 		 */
-		if (!(slot_ctrl & HP_INTR_ENABLE) ||
-		    !(slot_ctrl & CMD_CMPL_INTR_ENABLE))
+		if (!(slot_ctrl & PCI_EXP_SLTCTL_HPIE) ||
+		    !(slot_ctrl & PCI_EXP_SLTCTL_CCIE))
 			poll = 1;
                 pcie_wait_cmd(ctrl, poll);
 	}
@@ -370,9 +246,9 @@ static inline int check_link_active(struct controller *ctrl)
 {
 	u16 link_status;
 
-	if (pciehp_readw(ctrl, LNKSTATUS, &link_status))
+	if (pciehp_readw(ctrl, PCI_EXP_LNKSTA, &link_status))
 		return 0;
-	return !!(link_status & LINK_ACTIVE);
+	return !!(link_status & PCI_EXP_LNKSTA_DLLLA);
 }
 
 static void pcie_wait_link_active(struct controller *ctrl)
@@ -412,14 +288,15 @@ static int hpc_check_lnk_status(struct controller *ctrl)
         } else
                 msleep(1000);
 
-	retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
+	retval = pciehp_readw(ctrl, PCI_EXP_LNKSTA, &lnk_status);
 	if (retval) {
 		ctrl_err(ctrl, "Cannot read LNKSTATUS register\n");
 		return retval;
 	}
 
 	ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status);
-	if ((lnk_status & LNK_TRN) || !(lnk_status & NEG_LINK_WD)) {
+	if ((lnk_status & PCI_EXP_LNKSTA_LT) ||
+	    !(lnk_status & PCI_EXP_LNKSTA_NLW)) {
 		ctrl_err(ctrl, "Link Training Error occurs \n");
 		retval = -1;
 		return retval;
@@ -435,16 +312,16 @@ static int hpc_get_attention_status(struct slot *slot, u8 *status)
 	u8 atten_led_state;
 	int retval = 0;
 
-	retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
+	retval = pciehp_readw(ctrl, PCI_EXP_SLTCTL, &slot_ctrl);
 	if (retval) {
 		ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
 		return retval;
 	}
 
 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x, value read %x\n",
-		 __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
+		 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_ctrl);
 
-	atten_led_state = (slot_ctrl & ATTN_LED_CTRL) >> 6;
+	atten_led_state = (slot_ctrl & PCI_EXP_SLTCTL_AIC) >> 6;
 
 	switch (atten_led_state) {
 	case 0:
@@ -474,15 +351,15 @@ static int hpc_get_power_status(struct slot *slot, u8 *status)
 	u8 pwr_state;
 	int	retval = 0;
 
-	retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl);
+	retval = pciehp_readw(ctrl, PCI_EXP_SLTCTL, &slot_ctrl);
 	if (retval) {
 		ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__);
 		return retval;
 	}
 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x value read %x\n",
-		 __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl);
+		 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_ctrl);
 
-	pwr_state = (slot_ctrl & PWR_CTRL) >> 10;
+	pwr_state = (slot_ctrl & PCI_EXP_SLTCTL_PCC) >> 10;
 
 	switch (pwr_state) {
 	case 0:
@@ -503,17 +380,15 @@ static int hpc_get_latch_status(struct slot *slot, u8 *status)
 {
 	struct controller *ctrl = slot->ctrl;
 	u16 slot_status;
-	int retval = 0;
+	int retval;
 
-	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
+	retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status);
 	if (retval) {
 		ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
 			 __func__);
 		return retval;
 	}
-
-	*status = (((slot_status & MRL_STATE) >> 5) == 0) ? 0 : 1;
-
+	*status = !!(slot_status & PCI_EXP_SLTSTA_MRLSS);
 	return 0;
 }
 
@@ -521,18 +396,15 @@ static int hpc_get_adapter_status(struct slot *slot, u8 *status)
 {
 	struct controller *ctrl = slot->ctrl;
 	u16 slot_status;
-	u8 card_state;
-	int retval = 0;
+	int retval;
 
-	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
+	retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status);
 	if (retval) {
 		ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
 			 __func__);
 		return retval;
 	}
-	card_state = (u8)((slot_status & PRSN_STATE) >> 6);
-	*status = (card_state == 1) ? 1 : 0;
-
+	*status = !!(slot_status & PCI_EXP_SLTSTA_PDS);
 	return 0;
 }
 
@@ -540,32 +412,28 @@ static int hpc_query_power_fault(struct slot *slot)
 {
 	struct controller *ctrl = slot->ctrl;
 	u16 slot_status;
-	u8 pwr_fault;
-	int retval = 0;
+	int retval;
 
-	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
+	retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status);
 	if (retval) {
 		ctrl_err(ctrl, "Cannot check for power fault\n");
 		return retval;
 	}
-	pwr_fault = (u8)((slot_status & PWR_FAULT_DETECTED) >> 1);
-
-	return pwr_fault;
+	return !!(slot_status & PCI_EXP_SLTSTA_PFD);
 }
 
 static int hpc_get_emi_status(struct slot *slot, u8 *status)
 {
 	struct controller *ctrl = slot->ctrl;
 	u16 slot_status;
-	int retval = 0;
+	int retval;
 
-	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
+	retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status);
 	if (retval) {
 		ctrl_err(ctrl, "Cannot check EMI status\n");
 		return retval;
 	}
-	*status = (slot_status & EMI_STATE) >> EMI_STATUS_BIT;
-
+	*status = !!(slot_status & PCI_EXP_SLTSTA_EIS);
 	return retval;
 }
 
@@ -575,8 +443,8 @@ static int hpc_toggle_emi(struct slot *slot)
 	u16 cmd_mask;
 	int rc;
 
-	slot_cmd = EMI_CTRL;
-	cmd_mask = EMI_CTRL;
+	slot_cmd = PCI_EXP_SLTCTL_EIC;
+	cmd_mask = PCI_EXP_SLTCTL_EIC;
 	rc = pcie_write_cmd(slot->ctrl, slot_cmd, cmd_mask);
 	slot->last_emi_toggle = get_seconds();
 
@@ -590,7 +458,7 @@ static int hpc_set_attention_status(struct slot *slot, u8 value)
 	u16 cmd_mask;
 	int rc;
 
-	cmd_mask = ATTN_LED_CTRL;
+	cmd_mask = PCI_EXP_SLTCTL_AIC;
 	switch (value) {
 		case 0 :	/* turn off */
 			slot_cmd = 0x00C0;
@@ -606,7 +474,7 @@ static int hpc_set_attention_status(struct slot *slot, u8 value)
 	}
 	rc = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
-		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+		 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
 
 	return rc;
 }
@@ -618,10 +486,10 @@ static void hpc_set_green_led_on(struct slot *slot)
 	u16 cmd_mask;
 
 	slot_cmd = 0x0100;
-	cmd_mask = PWR_LED_CTRL;
+	cmd_mask = PCI_EXP_SLTCTL_PIC;
 	pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
-		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+		 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
 }
 
 static void hpc_set_green_led_off(struct slot *slot)
@@ -631,10 +499,10 @@ static void hpc_set_green_led_off(struct slot *slot)
 	u16 cmd_mask;
 
 	slot_cmd = 0x0300;
-	cmd_mask = PWR_LED_CTRL;
+	cmd_mask = PCI_EXP_SLTCTL_PIC;
 	pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
-		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+		 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
 }
 
 static void hpc_set_green_led_blink(struct slot *slot)
@@ -644,10 +512,10 @@ static void hpc_set_green_led_blink(struct slot *slot)
 	u16 cmd_mask;
 
 	slot_cmd = 0x0200;
-	cmd_mask = PWR_LED_CTRL;
+	cmd_mask = PCI_EXP_SLTCTL_PIC;
 	pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
-		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+		 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
 }
 
 static int hpc_power_on_slot(struct slot * slot)
@@ -661,15 +529,15 @@ static int hpc_power_on_slot(struct slot * slot)
 	ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
 
 	/* Clear sticky power-fault bit from previous power failures */
-	retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status);
+	retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status);
 	if (retval) {
 		ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n",
 			 __func__);
 		return retval;
 	}
-	slot_status &= PWR_FAULT_DETECTED;
+	slot_status &= PCI_EXP_SLTSTA_PFD;
 	if (slot_status) {
-		retval = pciehp_writew(ctrl, SLOTSTATUS, slot_status);
+		retval = pciehp_writew(ctrl, PCI_EXP_SLTSTA, slot_status);
 		if (retval) {
 			ctrl_err(ctrl,
 				 "%s: Cannot write to SLOTSTATUS register\n",
@@ -679,13 +547,13 @@ static int hpc_power_on_slot(struct slot * slot)
 	}
 
 	slot_cmd = POWER_ON;
-	cmd_mask = PWR_CTRL;
+	cmd_mask = PCI_EXP_SLTCTL_PCC;
 	/* Enable detection that we turned off at slot power-off time */
 	if (!pciehp_poll_mode) {
-		slot_cmd |= (PWR_FAULT_DETECT_ENABLE | MRL_DETECT_ENABLE |
-			     PRSN_DETECT_ENABLE);
-		cmd_mask |= (PWR_FAULT_DETECT_ENABLE | MRL_DETECT_ENABLE |
-			     PRSN_DETECT_ENABLE);
+		slot_cmd |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
+			     PCI_EXP_SLTCTL_PDCE);
+		cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
+			     PCI_EXP_SLTCTL_PDCE);
 	}
 
 	retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
@@ -695,7 +563,7 @@ static int hpc_power_on_slot(struct slot * slot)
 		return -1;
 	}
 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
-		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+		 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
 
 	return retval;
 }
@@ -752,7 +620,7 @@ static int hpc_power_off_slot(struct slot * slot)
 	changed = pcie_mask_bad_dllp(ctrl);
 
 	slot_cmd = POWER_OFF;
-	cmd_mask = PWR_CTRL;
+	cmd_mask = PCI_EXP_SLTCTL_PCC;
 	/*
 	 * If we get MRL or presence detect interrupts now, the isr
 	 * will notice the sticky power-fault bit too and issue power
@@ -761,10 +629,10 @@ static int hpc_power_off_slot(struct slot * slot)
 	 * till the slot is powered on again.
 	 */
 	if (!pciehp_poll_mode) {
-		slot_cmd &= ~(PWR_FAULT_DETECT_ENABLE | MRL_DETECT_ENABLE |
-			      PRSN_DETECT_ENABLE);
-		cmd_mask |= (PWR_FAULT_DETECT_ENABLE | MRL_DETECT_ENABLE |
-			     PRSN_DETECT_ENABLE);
+		slot_cmd &= ~(PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
+			      PCI_EXP_SLTCTL_PDCE);
+		cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE |
+			     PCI_EXP_SLTCTL_PDCE);
 	}
 
 	retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask);
@@ -774,7 +642,7 @@ static int hpc_power_off_slot(struct slot * slot)
 		goto out;
 	}
 	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n",
-		 __func__, ctrl->cap_base + SLOTCTRL, slot_cmd);
+		 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
  out:
 	if (changed)
 		pcie_unmask_bad_dllp(ctrl);
@@ -795,19 +663,19 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
 	 */
 	intr_loc = 0;
 	do {
-		if (pciehp_readw(ctrl, SLOTSTATUS, &detected)) {
+		if (pciehp_readw(ctrl, PCI_EXP_SLTSTA, &detected)) {
 			ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS\n",
 				 __func__);
 			return IRQ_NONE;
 		}
 
-		detected &= (ATTN_BUTTN_PRESSED | PWR_FAULT_DETECTED |
-			     MRL_SENS_CHANGED | PRSN_DETECT_CHANGED |
-			     CMD_COMPLETED);
+		detected &= (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
+			     PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_PDC |
+			     PCI_EXP_SLTSTA_CC);
 		intr_loc |= detected;
 		if (!intr_loc)
 			return IRQ_NONE;
-		if (detected && pciehp_writew(ctrl, SLOTSTATUS, detected)) {
+		if (detected && pciehp_writew(ctrl, PCI_EXP_SLTSTA, detected)) {
 			ctrl_err(ctrl, "%s: Cannot write to SLOTSTATUS\n",
 				 __func__);
 			return IRQ_NONE;
@@ -817,31 +685,31 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
 	ctrl_dbg(ctrl, "%s: intr_loc %x\n", __func__, intr_loc);
 
 	/* Check Command Complete Interrupt Pending */
-	if (intr_loc & CMD_COMPLETED) {
+	if (intr_loc & PCI_EXP_SLTSTA_CC) {
 		ctrl->cmd_busy = 0;
 		smp_mb();
 		wake_up(&ctrl->queue);
 	}
 
-	if (!(intr_loc & ~CMD_COMPLETED))
+	if (!(intr_loc & ~PCI_EXP_SLTSTA_CC))
 		return IRQ_HANDLED;
 
 	p_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
 
 	/* Check MRL Sensor Changed */
-	if (intr_loc & MRL_SENS_CHANGED)
+	if (intr_loc & PCI_EXP_SLTSTA_MRLSC)
 		pciehp_handle_switch_change(p_slot);
 
 	/* Check Attention Button Pressed */
-	if (intr_loc & ATTN_BUTTN_PRESSED)
+	if (intr_loc & PCI_EXP_SLTSTA_ABP)
 		pciehp_handle_attention_button(p_slot);
 
 	/* Check Presence Detect Changed */
-	if (intr_loc & PRSN_DETECT_CHANGED)
+	if (intr_loc & PCI_EXP_SLTSTA_PDC)
 		pciehp_handle_presence_change(p_slot);
 
 	/* Check Power Fault Detected */
-	if (intr_loc & PWR_FAULT_DETECTED)
+	if (intr_loc & PCI_EXP_SLTSTA_PFD)
 		pciehp_handle_power_fault(p_slot);
 
 	return IRQ_HANDLED;
@@ -854,7 +722,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 	u32	lnk_cap;
 	int retval = 0;
 
-	retval = pciehp_readl(ctrl, LNKCAP, &lnk_cap);
+	retval = pciehp_readl(ctrl, PCI_EXP_LNKCAP, &lnk_cap);
 	if (retval) {
 		ctrl_err(ctrl, "%s: Cannot read LNKCAP register\n", __func__);
 		return retval;
@@ -883,13 +751,13 @@ static int hpc_get_max_lnk_width(struct slot *slot,
 	u32	lnk_cap;
 	int retval = 0;
 
-	retval = pciehp_readl(ctrl, LNKCAP, &lnk_cap);
+	retval = pciehp_readl(ctrl, PCI_EXP_LNKCAP, &lnk_cap);
 	if (retval) {
 		ctrl_err(ctrl, "%s: Cannot read LNKCAP register\n", __func__);
 		return retval;
 	}
 
-	switch ((lnk_cap & 0x03F0) >> 4){
+	switch ((lnk_cap & PCI_EXP_LNKSTA_NLW) >> 4){
 	case 0:
 		lnk_wdth = PCIE_LNK_WIDTH_RESRV;
 		break;
@@ -932,14 +800,14 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 	int retval = 0;
 	u16 lnk_status;
 
-	retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
+	retval = pciehp_readw(ctrl, PCI_EXP_LNKSTA, &lnk_status);
 	if (retval) {
 		ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
 			 __func__);
 		return retval;
 	}
 
-	switch (lnk_status & 0x0F) {
+	switch (lnk_status & PCI_EXP_LNKSTA_CLS) {
 	case 1:
 		lnk_speed = PCIE_2PT5GB;
 		break;
@@ -962,14 +830,14 @@ static int hpc_get_cur_lnk_width(struct slot *slot,
 	int retval = 0;
 	u16 lnk_status;
 
-	retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status);
+	retval = pciehp_readw(ctrl, PCI_EXP_LNKSTA, &lnk_status);
 	if (retval) {
 		ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n",
 			 __func__);
 		return retval;
 	}
 
-	switch ((lnk_status & 0x03F0) >> 4){
+	switch ((lnk_status & PCI_EXP_LNKSTA_NLW) >> 4){
 	case 0:
 		lnk_wdth = PCIE_LNK_WIDTH_RESRV;
 		break;
@@ -1035,18 +903,19 @@ int pcie_enable_notification(struct controller *ctrl)
 {
 	u16 cmd, mask;
 
-	cmd = PRSN_DETECT_ENABLE;
+	cmd = PCI_EXP_SLTCTL_PDCE;
 	if (ATTN_BUTTN(ctrl))
-		cmd |= ATTN_BUTTN_ENABLE;
+		cmd |= PCI_EXP_SLTCTL_ABPE;
 	if (POWER_CTRL(ctrl))
-		cmd |= PWR_FAULT_DETECT_ENABLE;
+		cmd |= PCI_EXP_SLTCTL_PFDE;
 	if (MRL_SENS(ctrl))
-		cmd |= MRL_DETECT_ENABLE;
+		cmd |= PCI_EXP_SLTCTL_MRLSCE;
 	if (!pciehp_poll_mode)
-		cmd |= HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE;
+		cmd |= PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE;
 
-	mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | MRL_DETECT_ENABLE |
-	       PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE;
+	mask = (PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE |
+		PCI_EXP_SLTCTL_MRLSCE | PCI_EXP_SLTCTL_PFDE |
+		PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE);
 
 	if (pcie_write_cmd(ctrl, cmd, mask)) {
 		ctrl_err(ctrl, "Cannot enable software notification\n");
@@ -1058,8 +927,9 @@ int pcie_enable_notification(struct controller *ctrl)
 static void pcie_disable_notification(struct controller *ctrl)
 {
 	u16 mask;
-	mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | MRL_DETECT_ENABLE |
-	       PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE;
+	mask = (PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE |
+		PCI_EXP_SLTCTL_MRLSCE | PCI_EXP_SLTCTL_PFDE |
+		PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE);
 	if (pcie_write_cmd(ctrl, 0, mask))
 		ctrl_warn(ctrl, "Cannot disable software notification\n");
 }
@@ -1156,9 +1026,9 @@ static inline void dbg_ctrl(struct controller *ctrl)
 		  EMI(ctrl)        ? "yes" : "no");
 	ctrl_info(ctrl, "  Command Completed    : %3s\n",
 		  NO_CMD_CMPL(ctrl) ? "no" : "yes");
-	pciehp_readw(ctrl, SLOTSTATUS, &reg16);
+	pciehp_readw(ctrl, PCI_EXP_SLTSTA, &reg16);
 	ctrl_info(ctrl, "Slot Status            : 0x%04x\n", reg16);
-	pciehp_readw(ctrl, SLOTCTRL, &reg16);
+	pciehp_readw(ctrl, PCI_EXP_SLTCTL, &reg16);
 	ctrl_info(ctrl, "Slot Control           : 0x%04x\n", reg16);
 }
 
@@ -1182,7 +1052,7 @@ struct controller *pcie_init(struct pcie_device *dev)
 		ctrl_err(ctrl, "Cannot find PCI Express capability\n");
 		goto abort_ctrl;
 	}
-	if (pciehp_readl(ctrl, SLOTCAP, &slot_cap)) {
+	if (pciehp_readl(ctrl, PCI_EXP_SLTCAP, &slot_cap)) {
 		ctrl_err(ctrl, "Cannot read SLOTCAP register\n");
 		goto abort_ctrl;
 	}
@@ -1207,17 +1077,17 @@ struct controller *pcie_init(struct pcie_device *dev)
 	    ctrl->no_cmd_complete = 1;
 
         /* Check if Data Link Layer Link Active Reporting is implemented */
-        if (pciehp_readl(ctrl, LNKCAP, &link_cap)) {
+        if (pciehp_readl(ctrl, PCI_EXP_LNKCAP, &link_cap)) {
                 ctrl_err(ctrl, "%s: Cannot read LNKCAP register\n", __func__);
                 goto abort_ctrl;
         }
-        if (link_cap & LINK_ACTIVE_REPORTING) {
+        if (link_cap & PCI_EXP_LNKCAP_DLLLARC) {
                 ctrl_dbg(ctrl, "Link Active Reporting supported\n");
                 ctrl->link_active_reporting = 1;
         }
 
 	/* Clear all remaining event bits in Slot Status register */
-	if (pciehp_writew(ctrl, SLOTSTATUS, 0x1f))
+	if (pciehp_writew(ctrl, PCI_EXP_SLTSTA, 0x1f))
 		goto abort_ctrl;
 
 	/* Disable sotfware notification */
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 7766488470e4..027815b4635e 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -411,20 +411,70 @@
 #define  PCI_EXP_DEVSTA_AUXPD	0x10	/* AUX Power Detected */
 #define  PCI_EXP_DEVSTA_TRPND	0x20	/* Transactions Pending */
 #define PCI_EXP_LNKCAP		12	/* Link Capabilities */
-#define  PCI_EXP_LNKCAP_ASPMS	0xc00	/* ASPM Support */
-#define  PCI_EXP_LNKCAP_L0SEL	0x7000	/* L0s Exit Latency */
-#define  PCI_EXP_LNKCAP_L1EL	0x38000	/* L1 Exit Latency */
-#define  PCI_EXP_LNKCAP_CLKPM	0x40000	/* L1 Clock Power Management */
+#define  PCI_EXP_LNKCAP_SLS	0x0000000f /* Supported Link Speeds */
+#define  PCI_EXP_LNKCAP_MLW	0x000003f0 /* Maximum Link Width */
+#define  PCI_EXP_LNKCAP_ASPMS	0x00000c00 /* ASPM Support */
+#define  PCI_EXP_LNKCAP_L0SEL	0x00007000 /* L0s Exit Latency */
+#define  PCI_EXP_LNKCAP_L1EL	0x00038000 /* L1 Exit Latency */
+#define  PCI_EXP_LNKCAP_CLKPM	0x00040000 /* L1 Clock Power Management */
+#define  PCI_EXP_LNKCAP_SDERC	0x00080000 /* Suprise Down Error Reporting Capable */
+#define  PCI_EXP_LNKCAP_DLLLARC	0x00100000 /* Data Link Layer Link Active Reporting Capable */
+#define  PCI_EXP_LNKCAP_LBNC	0x00200000 /* Link Bandwidth Notification Capability */
+#define  PCI_EXP_LNKCAP_PN	0xff000000 /* Port Number */
 #define PCI_EXP_LNKCTL		16	/* Link Control */
-#define  PCI_EXP_LNKCTL_RL	0x20	/* Retrain Link */
-#define  PCI_EXP_LNKCTL_CCC	0x40	/* Common Clock COnfiguration */
+#define  PCI_EXP_LNKCTL_ASPMC	0x0003	/* ASPM Control */
+#define  PCI_EXP_LNKCTL_RCB	0x0008	/* Read Completion Boundary */
+#define  PCI_EXP_LNKCTL_LD	0x0010	/* Link Disable */
+#define  PCI_EXP_LNKCTL_RL	0x0020	/* Retrain Link */
+#define  PCI_EXP_LNKCTL_CCC	0x0040	/* Common Clock Configuration */
+#define  PCI_EXP_LNKCTL_ES	0x0080	/* Extended Synch */
 #define  PCI_EXP_LNKCTL_CLKREQ_EN 0x100	/* Enable clkreq */
+#define  PCI_EXP_LNKCTL_HAWD	0x0200	/* Hardware Autonomous Width Disable */
+#define  PCI_EXP_LNKCTL_LBMIE	0x0400	/* Link Bandwidth Management Interrupt Enable */
+#define  PCI_EXP_LNKCTL_LABIE	0x0800	/* Lnk Autonomous Bandwidth Interrupt Enable */
 #define PCI_EXP_LNKSTA		18	/* Link Status */
-#define  PCI_EXP_LNKSTA_LT	0x800	/* Link Training */
+#define  PCI_EXP_LNKSTA_CLS	0x000f	/* Current Link Speed */
+#define  PCI_EXP_LNKSTA_NLW	0x03f0	/* Nogotiated Link Width */
+#define  PCI_EXP_LNKSTA_LT	0x0800	/* Link Training */
 #define  PCI_EXP_LNKSTA_SLC	0x1000	/* Slot Clock Configuration */
+#define  PCI_EXP_LNKSTA_DLLLA	0x2000	/* Data Link Layer Link Active */
+#define  PCI_EXP_LNKSTA_LBMS	0x4000	/* Link Bandwidth Management Status */
+#define  PCI_EXP_LNKSTA_LABS	0x8000	/* Link Autonomous Bandwidth Status */
 #define PCI_EXP_SLTCAP		20	/* Slot Capabilities */
+#define  PCI_EXP_SLTCAP_ABP	0x00000001 /* Attention Button Present */
+#define  PCI_EXP_SLTCAP_PCP	0x00000002 /* Power Controller Present */
+#define  PCI_EXP_SLTCAP_MRLSP	0x00000004 /* MRL Sensor Present */
+#define  PCI_EXP_SLTCAP_AIP	0x00000008 /* Attention Indicator Present */
+#define  PCI_EXP_SLTCAP_PIP	0x00000010 /* Power Indicator Present */
+#define  PCI_EXP_SLTCAP_HPS	0x00000020 /* Hot-Plug Surprise */
+#define  PCI_EXP_SLTCAP_HPC	0x00000040 /* Hot-Plug Capable */
+#define  PCI_EXP_SLTCAP_SPLV	0x00007f80 /* Slot Power Limit Value */
+#define  PCI_EXP_SLTCAP_SPLS	0x00018000 /* Slot Power Limit Scale */
+#define  PCI_EXP_SLTCAP_EIP	0x00020000 /* Electromechanical Interlock Present */
+#define  PCI_EXP_SLTCAP_NCCS	0x00040000 /* No Command Completed Support */
+#define  PCI_EXP_SLTCAP_PSN	0xfff80000 /* Physical Slot Number */
 #define PCI_EXP_SLTCTL		24	/* Slot Control */
+#define  PCI_EXP_SLTCTL_ABPE	0x0001	/* Attention Button Pressed Enable */
+#define  PCI_EXP_SLTCTL_PFDE	0x0002	/* Power Fault Detected Enable */
+#define  PCI_EXP_SLTCTL_MRLSCE	0x0004	/* MRL Sensor Changed Enable */
+#define  PCI_EXP_SLTCTL_PDCE	0x0008	/* Presence Detect Changed Enable */
+#define  PCI_EXP_SLTCTL_CCIE	0x0010	/* Command Completed Interrupt Enable */
+#define  PCI_EXP_SLTCTL_HPIE	0x0020	/* Hot-Plug Interrupt Enable */
+#define  PCI_EXP_SLTCTL_AIC	0x00c0	/* Attention Indicator Control */
+#define  PCI_EXP_SLTCTL_PIC	0x0300	/* Power Indicator Control */
+#define  PCI_EXP_SLTCTL_PCC	0x0400	/* Power Controller Control */
+#define  PCI_EXP_SLTCTL_EIC	0x0800	/* Electromechanical Interlock Control */
+#define  PCI_EXP_SLTCTL_DLLSCE	0x1000	/* Data Link Layer State Changed Enable */
 #define PCI_EXP_SLTSTA		26	/* Slot Status */
+#define  PCI_EXP_SLTSTA_ABP	0x0001	/* Attention Button Pressed */
+#define  PCI_EXP_SLTSTA_PFD	0x0002	/* Power Fault Detected */
+#define  PCI_EXP_SLTSTA_MRLSC	0x0004	/* MRL Sensor Changed */
+#define  PCI_EXP_SLTSTA_PDC	0x0008	/* Presence Detect Changed */
+#define  PCI_EXP_SLTSTA_CC	0x0010	/* Command Completed */
+#define  PCI_EXP_SLTSTA_MRLSS	0x0020	/* MRL Sensor State */
+#define  PCI_EXP_SLTSTA_PDS	0x0040	/* Presence Detect State */
+#define  PCI_EXP_SLTSTA_EIS	0x0080	/* Electromechanical Interlock Status */
+#define  PCI_EXP_SLTSTA_DLLSC	0x0100	/* Data Link Layer State Changed */
 #define PCI_EXP_RTCTL		28	/* Root Control */
 #define  PCI_EXP_RTCTL_SECEE	0x01	/* System Error on Correctable Error */
 #define  PCI_EXP_RTCTL_SENFEE	0x02	/* System Error on Non-Fatal Error */
-- 
cgit v1.2.3


From 6a479079c07211bf348ac8a79754f26bea258f26 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 23 Dec 2008 03:08:29 +0000
Subject: PCI: Add pci_clear_master() as opposite of pci_set_master()

During an online device reset it may be useful to disable bus-mastering.
pci_disable_device() does that, and far more besides, so is not suitable
for an online reset.

Add pci_clear_master() which does just this.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/PCI/pci.txt |  3 ++-
 drivers/pci/pci.c         | 39 ++++++++++++++++++++++++++++-----------
 include/linux/pci.h       |  1 +
 3 files changed, 31 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt
index fd4907a2968c..7f6de6ea5b47 100644
--- a/Documentation/PCI/pci.txt
+++ b/Documentation/PCI/pci.txt
@@ -294,7 +294,8 @@ NOTE: pci_enable_device() can fail! Check the return value.
 
 pci_set_master() will enable DMA by setting the bus master bit
 in the PCI_COMMAND register. It also fixes the latency timer value if
-it's set to something bogus by the BIOS.
+it's set to something bogus by the BIOS.  pci_clear_master() will
+disable DMA by clearing the bus master bit.
 
 If the PCI device can use the PCI Memory-Write-Invalidate transaction,
 call pci_set_mwi().  This enables the PCI_COMMAND bit for Mem-Wr-Inval
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c824dc8d617c..f3fd55df67db 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1667,6 +1667,22 @@ int pci_request_regions_exclusive(struct pci_dev *pdev, const char *res_name)
 					((1 << 6) - 1), res_name);
 }
 
+static void __pci_set_master(struct pci_dev *dev, bool enable)
+{
+	u16 old_cmd, cmd;
+
+	pci_read_config_word(dev, PCI_COMMAND, &old_cmd);
+	if (enable)
+		cmd = old_cmd | PCI_COMMAND_MASTER;
+	else
+		cmd = old_cmd & ~PCI_COMMAND_MASTER;
+	if (cmd != old_cmd) {
+		dev_dbg(&dev->dev, "%s bus mastering\n",
+			enable ? "enabling" : "disabling");
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	dev->is_busmaster = enable;
+}
 
 /**
  * pci_set_master - enables bus-mastering for device dev
@@ -1675,21 +1691,21 @@ int pci_request_regions_exclusive(struct pci_dev *pdev, const char *res_name)
  * Enables bus-mastering on the device and calls pcibios_set_master()
  * to do the needed arch specific settings.
  */
-void
-pci_set_master(struct pci_dev *dev)
+void pci_set_master(struct pci_dev *dev)
 {
-	u16 cmd;
-
-	pci_read_config_word(dev, PCI_COMMAND, &cmd);
-	if (! (cmd & PCI_COMMAND_MASTER)) {
-		dev_dbg(&dev->dev, "enabling bus mastering\n");
-		cmd |= PCI_COMMAND_MASTER;
-		pci_write_config_word(dev, PCI_COMMAND, cmd);
-	}
-	dev->is_busmaster = 1;
+	__pci_set_master(dev, true);
 	pcibios_set_master(dev);
 }
 
+/**
+ * pci_clear_master - disables bus-mastering for device dev
+ * @dev: the PCI device to disable
+ */
+void pci_clear_master(struct pci_dev *dev)
+{
+	__pci_set_master(dev, false);
+}
+
 #ifdef PCI_DISABLE_MWI
 int pci_set_mwi(struct pci_dev *dev)
 {
@@ -2346,6 +2362,7 @@ EXPORT_SYMBOL(pci_release_selected_regions);
 EXPORT_SYMBOL(pci_request_selected_regions);
 EXPORT_SYMBOL(pci_request_selected_regions_exclusive);
 EXPORT_SYMBOL(pci_set_master);
+EXPORT_SYMBOL(pci_clear_master);
 EXPORT_SYMBOL(pci_set_mwi);
 EXPORT_SYMBOL(pci_try_set_mwi);
 EXPORT_SYMBOL(pci_clear_mwi);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7cbecef19bb6..0f6d2bb1df9c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -642,6 +642,7 @@ static inline int pci_is_managed(struct pci_dev *pdev)
 
 void pci_disable_device(struct pci_dev *dev);
 void pci_set_master(struct pci_dev *dev);
+void pci_clear_master(struct pci_dev *dev);
 int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state);
 #define HAVE_PCI_SET_MWI
 int __must_check pci_set_mwi(struct pci_dev *dev);
-- 
cgit v1.2.3


From 16cf0ebc35dd63f72628ba1246132a6fd17bced2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 5 Jan 2009 14:50:27 +0100
Subject: x86/PCI: Do not use interrupt links for devices using MSI-X

pcibios_enable_device() and pcibios_disable_device() don't handle
IRQs for devices that have MSI enabled and it should treat the
devices with MSI-X enabled in the same way.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/common.c | 4 ++--
 include/linux/pci.h   | 9 +++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 9ab8509f7b15..82d22fc601ae 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -551,14 +551,14 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 	if ((err = pci_enable_resources(dev, mask)) < 0)
 		return err;
 
-	if (!dev->msi_enabled)
+	if (!pci_dev_msi_enabled(dev))
 		return pcibios_enable_irq(dev);
 	return 0;
 }
 
 void pcibios_disable_device (struct pci_dev *dev)
 {
-	if (!dev->msi_enabled && pcibios_disable_irq)
+	if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq)
 		pcibios_disable_irq(dev);
 }
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0f6d2bb1df9c..80f8b8b65fde 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -336,6 +336,15 @@ struct pci_bus {
 #define pci_bus_b(n)	list_entry(n, struct pci_bus, node)
 #define to_pci_bus(n)	container_of(n, struct pci_bus, dev)
 
+#ifdef CONFIG_PCI_MSI
+static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev)
+{
+	return pci_dev->msi_enabled || pci_dev->msix_enabled;
+}
+#else
+static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { return false; }
+#endif
+
 /*
  * Error values that may be returned by PCI functions.
  */
-- 
cgit v1.2.3


From 940fbf411e5fb42aee8ab7dd814b24080951dbfc Mon Sep 17 00:00:00 2001
From: Detlef Riekenberg <wine.dev@web.de>
Date: Wed, 7 Jan 2009 10:11:44 +0100
Subject: linux/types.h: Don't depend on __GNUC__ for __le64/__be64

The typedefs for __u64 and __s64 where fixed to be available for other
compiler on May 2 2008 by H.  Peter Anvin (in commit edfa5cfa3dc5)

Acked-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Detlef Riekenberg <wine.dev@web.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/types.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/types.h b/include/linux/types.h
index 3b864f2d9560..712ca53bc348 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -176,10 +176,9 @@ typedef __u16 __bitwise __le16;
 typedef __u16 __bitwise __be16;
 typedef __u32 __bitwise __le32;
 typedef __u32 __bitwise __be32;
-#if defined(__GNUC__)
 typedef __u64 __bitwise __le64;
 typedef __u64 __bitwise __be64;
-#endif
+
 typedef __u16 __bitwise __sum16;
 typedef __u32 __bitwise __wsum;
 
-- 
cgit v1.2.3


From 8d1a0a13edecfdcb47fee3238ed4a2af2a2867f9 Mon Sep 17 00:00:00 2001
From: Anders Larsen <al@alarsen.net>
Date: Thu, 1 Jan 2009 17:17:35 +0100
Subject: qnx: include <linux/types.h> for definitions of __[us]{8,16,32,64}
 types

On 2008-12-30 11:32:33, Sam Ravnborg wrote:
> We have added a few additional validation checks of the userspace headers:
...
> 3) We should include <linux/types.h> and not <asm/types.h>
> 4) If we use a __[us]{8,16,32,64} type then we must include <linux/types.h>

Satisfy these requirements for the linux/qnx*.h headers.

Signed-off-by: Anders Larsen <al@alarsen.net>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 include/linux/qnx4_fs.h  | 4 +---
 include/linux/qnxtypes.h | 5 ++---
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h
index 34a196ee7941..787d19ea9f46 100644
--- a/include/linux/qnx4_fs.h
+++ b/include/linux/qnx4_fs.h
@@ -2,14 +2,12 @@
  *  Name                         : qnx4_fs.h
  *  Author                       : Richard Frowijn
  *  Function                     : qnx4 global filesystem definitions
- *  Version                      : 1.0.2
- *  Last modified                : 2000-01-31
- *
  *  History                      : 23-03-1998 created
  */
 #ifndef _LINUX_QNX4_FS_H
 #define _LINUX_QNX4_FS_H
 
+#include <linux/types.h>
 #include <linux/qnxtypes.h>
 #include <linux/magic.h>
 
diff --git a/include/linux/qnxtypes.h b/include/linux/qnxtypes.h
index a3eb1137857b..bebbe5cc4fb8 100644
--- a/include/linux/qnxtypes.h
+++ b/include/linux/qnxtypes.h
@@ -2,9 +2,6 @@
  *  Name                         : qnxtypes.h
  *  Author                       : Richard Frowijn
  *  Function                     : standard qnx types
- *  Version                      : 1.0.2
- *  Last modified                : 2000-01-06
- *
  *  History                      : 22-03-1998 created
  *
  */
@@ -12,6 +9,8 @@
 #ifndef _QNX4TYPES_H
 #define _QNX4TYPES_H
 
+#include <linux/types.h>
+
 typedef __le16 qnx4_nxtnt_t;
 typedef __u8  qnx4_ftype_t;
 
-- 
cgit v1.2.3


From 87df4de8073f922a1f643b9fa6ba0412d5529ecf Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Mon, 15 Dec 2008 19:42:03 +0200
Subject: nfsd: last_byte_offset

refactor the nfs4 server lock code to use last_byte_offset
to compute the last byte covered by the lock.  Check for overflow
so that the last byte is set to NFS4_MAX_UINT64 if offset + len
wraps around.

Also, use NFS4_MAX_UINT64 for ~(u64)0 where appropriate.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c  | 42 ++++++++++++++++++++++++++----------------
 include/linux/nfs4.h |  2 ++
 2 files changed, 28 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e62d0e3df8b3..88db7d3ec120 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2416,6 +2416,26 @@ out:
 #define LOCK_HASH_SIZE             (1 << LOCK_HASH_BITS)
 #define LOCK_HASH_MASK             (LOCK_HASH_SIZE - 1)
 
+static inline u64
+end_offset(u64 start, u64 len)
+{
+	u64 end;
+
+	end = start + len;
+	return end >= start ? end: NFS4_MAX_UINT64;
+}
+
+/* last octet in a range */
+static inline u64
+last_byte_offset(u64 start, u64 len)
+{
+	u64 end;
+
+	BUG_ON(!len);
+	end = start + len;
+	return end > start ? end - 1: NFS4_MAX_UINT64;
+}
+
 #define lockownerid_hashval(id) \
         ((id) & LOCK_HASH_MASK)
 
@@ -2519,8 +2539,8 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
 		deny->ld_clientid.cl_id = 0;
 	}
 	deny->ld_start = fl->fl_start;
-	deny->ld_length = ~(u64)0;
-	if (fl->fl_end != ~(u64)0)
+	deny->ld_length = NFS4_MAX_UINT64;
+	if (fl->fl_end != NFS4_MAX_UINT64)
 		deny->ld_length = fl->fl_end - fl->fl_start + 1;        
 	deny->ld_type = NFS4_READ_LT;
 	if (fl->fl_type != F_RDLCK)
@@ -2617,7 +2637,7 @@ out:
 static int
 check_lock_length(u64 offset, u64 length)
 {
-	return ((length == 0)  || ((length != ~(u64)0) &&
+	return ((length == 0)  || ((length != NFS4_MAX_UINT64) &&
 	     LOFF_OVERFLOW(offset, length)));
 }
 
@@ -2737,11 +2757,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	file_lock.fl_lmops = &nfsd_posix_mng_ops;
 
 	file_lock.fl_start = lock->lk_offset;
-	if ((lock->lk_length == ~(u64)0) || 
-			LOFF_OVERFLOW(lock->lk_offset, lock->lk_length))
-		file_lock.fl_end = ~(u64)0;
-	else
-		file_lock.fl_end = lock->lk_offset + lock->lk_length - 1;
+	file_lock.fl_end = last_byte_offset(lock->lk_offset, lock->lk_length);
 	nfs4_transform_lock_offset(&file_lock);
 
 	/*
@@ -2858,10 +2874,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	file_lock.fl_lmops = &nfsd_posix_mng_ops;
 
 	file_lock.fl_start = lockt->lt_offset;
-	if ((lockt->lt_length == ~(u64)0) || LOFF_OVERFLOW(lockt->lt_offset, lockt->lt_length))
-		file_lock.fl_end = ~(u64)0;
-	else
-		file_lock.fl_end = lockt->lt_offset + lockt->lt_length - 1;
+	file_lock.fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length);
 
 	nfs4_transform_lock_offset(&file_lock);
 
@@ -2917,10 +2930,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	file_lock.fl_lmops = &nfsd_posix_mng_ops;
 	file_lock.fl_start = locku->lu_offset;
 
-	if ((locku->lu_length == ~(u64)0) || LOFF_OVERFLOW(locku->lu_offset, locku->lu_length))
-		file_lock.fl_end = ~(u64)0;
-	else
-		file_lock.fl_end = locku->lu_offset + locku->lu_length - 1;
+	file_lock.fl_end = last_byte_offset(locku->lu_offset, locku->lu_length);
 	nfs4_transform_lock_offset(&file_lock);
 
 	/*
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index ea0366769484..b912311a56b1 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -88,6 +88,8 @@
 #define NFS4_ACE_GENERIC_EXECUTE              0x001200A0
 #define NFS4_ACE_MASK_ALL                     0x001F01FF
 
+#define NFS4_MAX_UINT64	(~(u64)0)
+
 enum nfs4_acl_whotype {
 	NFS4_ACL_WHO_NAMED = 0,
 	NFS4_ACL_WHO_OWNER,
-- 
cgit v1.2.3


From db43910cb42285a99f45f7e0a0a32e32d0b61dcf Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Mon, 15 Dec 2008 19:42:24 +0200
Subject: nfsd: get rid of NFSD_VERSION

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfsd/nfsd.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 21269405ffe2..e19f45991b2e 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -23,7 +23,6 @@
 /*
  * nfsd version
  */
-#define NFSD_VERSION		"0.5"
 #define NFSD_SUPPORTED_MINOR_VERSION	0
 
 /*
-- 
cgit v1.2.3