From 6902c0bead4ce266226fc0c5b3828b850bdc884a Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 6 Jun 2008 01:33:22 -0400
Subject: Input: gameport - make gameport_register_driver() return errors

Perform actual driver registration right in gameport_register_driver()
instead of offloading it to kgameportd and return proper error code to
callers if driver registration fails.

Note that driver <-> port matching is still done by kgameportd.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/gameport.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gameport.h b/include/linux/gameport.h
index f64e29c0ef3f..5126125afd4e 100644
--- a/include/linux/gameport.h
+++ b/include/linux/gameport.h
@@ -146,10 +146,11 @@ static inline void gameport_unpin_driver(struct gameport *gameport)
 	mutex_unlock(&gameport->drv_mutex);
 }
 
-void __gameport_register_driver(struct gameport_driver *drv, struct module *owner);
-static inline void gameport_register_driver(struct gameport_driver *drv)
+int __gameport_register_driver(struct gameport_driver *drv,
+				struct module *owner, const char *mod_name);
+static inline int gameport_register_driver(struct gameport_driver *drv)
 {
-	__gameport_register_driver(drv, THIS_MODULE);
+	return __gameport_register_driver(drv, THIS_MODULE, KBUILD_MODNAME);
 }
 
 void gameport_unregister_driver(struct gameport_driver *drv);
-- 
cgit v1.2.3


From 8c4b3c29329eb7ffded3023e6d65bc415cb4e215 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 6 Jun 2008 01:33:51 -0400
Subject: Input: gameport - mark gameport_register_driver() __must_check

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/gameport.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gameport.h b/include/linux/gameport.h
index 5126125afd4e..0cd825f7363a 100644
--- a/include/linux/gameport.h
+++ b/include/linux/gameport.h
@@ -148,7 +148,7 @@ static inline void gameport_unpin_driver(struct gameport *gameport)
 
 int __gameport_register_driver(struct gameport_driver *drv,
 				struct module *owner, const char *mod_name);
-static inline int gameport_register_driver(struct gameport_driver *drv)
+static inline int __must_check gameport_register_driver(struct gameport_driver *drv)
 {
 	return __gameport_register_driver(drv, THIS_MODULE, KBUILD_MODNAME);
 }
-- 
cgit v1.2.3


From 03bac96fae0efdb25e2059e5accbe4f3ee6328dd Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 23 Jun 2008 10:47:34 -0400
Subject: Input: expand keycode space

Expand the number of potential key codes from 512 to 768 since people
are coming up with more and more keys.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/input.h           | 2 +-
 include/linux/mod_devicetable.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index a5802c9c81a4..7fae1dee356a 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -579,7 +579,7 @@ struct input_absinfo {
 
 /* We avoid low common keys in module aliases so they don't get huge. */
 #define KEY_MIN_INTERESTING	KEY_MUTE
-#define KEY_MAX			0x1ff
+#define KEY_MAX			0x2ff
 #define KEY_CNT			(KEY_MAX+1)
 
 /*
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index c4db5827963d..0dddfa44ec19 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -274,7 +274,7 @@ struct pcmcia_device_id {
 /* Input */
 #define INPUT_DEVICE_ID_EV_MAX		0x1f
 #define INPUT_DEVICE_ID_KEY_MIN_INTERESTING	0x71
-#define INPUT_DEVICE_ID_KEY_MAX		0x1ff
+#define INPUT_DEVICE_ID_KEY_MAX		0x2ff
 #define INPUT_DEVICE_ID_REL_MAX		0x0f
 #define INPUT_DEVICE_ID_ABS_MAX		0x3f
 #define INPUT_DEVICE_ID_MSC_MAX		0x07
-- 
cgit v1.2.3


From 5a599a15182ed48e5bf54111feb3b21e425e194d Mon Sep 17 00:00:00 2001
From: Aristeu Rozanski <aris@ruivo.org>
Date: Mon, 23 Jun 2008 10:47:53 -0400
Subject: Input: add keycodes for remote controls/phone keypads

The new keys are separate from normal numeric keys and standard
numeric keypads. The userspace should not attempt to apply modifiers
like shift and NumLock to these so tey work properly regardless of
the language mapping used.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/input.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index 7fae1dee356a..b86fb5581ce6 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -577,6 +577,19 @@ struct input_absinfo {
 #define KEY_BRL_DOT9		0x1f9
 #define KEY_BRL_DOT10		0x1fa
 
+#define KEY_NUMERIC_0		0x200	/* used by phones, remote controls, */
+#define KEY_NUMERIC_1		0x201	/* and other keypads */
+#define KEY_NUMERIC_2		0x202
+#define KEY_NUMERIC_3		0x203
+#define KEY_NUMERIC_4		0x204
+#define KEY_NUMERIC_5		0x205
+#define KEY_NUMERIC_6		0x206
+#define KEY_NUMERIC_7		0x207
+#define KEY_NUMERIC_8		0x208
+#define KEY_NUMERIC_9		0x209
+#define KEY_NUMERIC_STAR	0x20a
+#define KEY_NUMERIC_POUND	0x20b
+
 /* We avoid low common keys in module aliases so they don't get huge. */
 #define KEY_MIN_INTERESTING	KEY_MUTE
 #define KEY_MAX			0x2ff
-- 
cgit v1.2.3


From b845b517b5e3706a3729f6ea83b88ab85f0725b0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 8 Aug 2008 21:47:09 +0200
Subject: printk: robustify printk

Avoid deadlocks against rq->lock and xtime_lock by deferring the klogd
wakeup by polling from the timer tick.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h   |  4 ++++
 kernel/printk.c          | 19 +++++++++++++++++--
 kernel/time/tick-sched.c |  2 +-
 kernel/timer.c           |  1 +
 4 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index aaa998f65c7a..113ac8d0425f 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -200,6 +200,8 @@ extern struct ratelimit_state printk_ratelimit_state;
 extern int printk_ratelimit(void);
 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 				   unsigned int interval_msec);
+extern void printk_tick(void);
+extern int printk_needs_cpu(int);
 #else
 static inline int vprintk(const char *s, va_list args)
 	__attribute__ ((format (printf, 1, 0)));
@@ -211,6 +213,8 @@ static inline int printk_ratelimit(void) { return 0; }
 static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \
 					  unsigned int interval_msec)	\
 		{ return false; }
+static inline void printk_tick(void) { }
+static inline int printk_needs_cpu(int) { return 0; }
 #endif
 
 extern void asmlinkage __attribute__((format(printf, 1, 2)))
diff --git a/kernel/printk.c b/kernel/printk.c
index b51b1567bb55..655cc2ca10cc 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -982,10 +982,25 @@ int is_console_locked(void)
 	return console_locked;
 }
 
-void wake_up_klogd(void)
+static DEFINE_PER_CPU(int, printk_pending);
+
+void printk_tick(void)
 {
-	if (!oops_in_progress && waitqueue_active(&log_wait))
+	if (__get_cpu_var(printk_pending)) {
+		__get_cpu_var(printk_pending) = 0;
 		wake_up_interruptible(&log_wait);
+	}
+}
+
+int printk_needs_cpu(int cpu)
+{
+	return per_cpu(printk_pending, cpu);
+}
+
+void wake_up_klogd(void)
+{
+	if (waitqueue_active(&log_wait))
+		__get_cpu_var(printk_pending) = 1;
 }
 
 /**
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 825b4c00fe44..c13d4f182370 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -255,7 +255,7 @@ void tick_nohz_stop_sched_tick(int inidle)
 	next_jiffies = get_next_timer_interrupt(last_jiffies);
 	delta_jiffies = next_jiffies - last_jiffies;
 
-	if (rcu_needs_cpu(cpu))
+	if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu))
 		delta_jiffies = 1;
 	/*
 	 * Do not stop the tick, if we are only one off
diff --git a/kernel/timer.c b/kernel/timer.c
index 03bc7f1f1593..510fe69351ca 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -978,6 +978,7 @@ void update_process_times(int user_tick)
 	run_local_timers();
 	if (rcu_pending(cpu))
 		rcu_check_callbacks(cpu, user_tick);
+	printk_tick();
 	scheduler_tick();
 	run_posix_cpu_timers(p);
 }
-- 
cgit v1.2.3


From ced9cd40ac14111befd6b0c73ec90106c22a3fd7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 11 Aug 2008 14:38:12 +0200
Subject: printk: robustify printk, fix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fix:

 include/linux/kernel.h: In function ‘printk_needs_cpu':
 include/linux/kernel.h:217: error: parameter name omitted

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 113ac8d0425f..3652a4564126 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -200,8 +200,6 @@ extern struct ratelimit_state printk_ratelimit_state;
 extern int printk_ratelimit(void);
 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 				   unsigned int interval_msec);
-extern void printk_tick(void);
-extern int printk_needs_cpu(int);
 #else
 static inline int vprintk(const char *s, va_list args)
 	__attribute__ ((format (printf, 1, 0)));
@@ -213,10 +211,11 @@ static inline int printk_ratelimit(void) { return 0; }
 static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \
 					  unsigned int interval_msec)	\
 		{ return false; }
-static inline void printk_tick(void) { }
-static inline int printk_needs_cpu(int) { return 0; }
 #endif
 
+extern int printk_needs_cpu(int cpu);
+extern void printk_tick(void);
+
 extern void asmlinkage __attribute__((format(printf, 1, 2)))
 	early_printk(const char *fmt, ...);
 
-- 
cgit v1.2.3


From 978b0116cd225682a29e3d1d5010319bf2de32c2 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 6 Sep 2008 20:04:36 +0200
Subject: softirq: allocate less vectors

We don't need whole 32 of them, only NR_SOFTIRQS.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/interrupt.h | 2 ++
 kernel/softirq.c          | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 58ff4e74b2f3..54b3623434ec 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -252,6 +252,8 @@ enum
 	HRTIMER_SOFTIRQ,
 #endif
 	RCU_SOFTIRQ, 	/* Preferable RCU should always be the last softirq */
+
+	NR_SOFTIRQS
 };
 
 /* softirq mask and active fields moved to irq_cpustat_t in
diff --git a/kernel/softirq.c b/kernel/softirq.c
index c506f266a6b9..82e32aadedd8 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -46,7 +46,7 @@ irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
 EXPORT_SYMBOL(irq_stat);
 #endif
 
-static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
+static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
 
 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
 
-- 
cgit v1.2.3


From 4d5975e5016a9025814b92981de21eaf9203caa6 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.y.miao@gmail.com>
Date: Wed, 10 Sep 2008 12:06:15 -0400
Subject: Input: ads7846 - introduce .gpio_pendown to get pendown state

The GPIO connected to ADS7846 nPENIRQ signal is usually used to get
the pendown state as well. Introduce a .gpio_pendown, and use this
to decide the pendown state if .get_pendown_state is NULL.

Signed-off-by: Eric Miao <eric.miao@marvell.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/ads7846.c | 68 +++++++++++++++++++++++++++++--------
 include/linux/spi/ads7846.h         |  3 ++
 2 files changed, 57 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index efbbbe48621a..6020a7dcce33 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -24,6 +24,7 @@
 #include <linux/input.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
+#include <linux/gpio.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
 #include <asm/irq.h>
@@ -116,6 +117,7 @@ struct ads7846 {
 	void			*filter_data;
 	void			(*filter_cleanup)(void *data);
 	int			(*get_pendown_state)(void);
+	int			gpio_pendown;
 };
 
 /* leave chip selected when we're done, for quicker re-select? */
@@ -492,6 +494,14 @@ static struct attribute_group ads784x_attr_group = {
 
 /*--------------------------------------------------------------------------*/
 
+static int get_pendown_state(struct ads7846 *ts)
+{
+	if (ts->get_pendown_state)
+		return ts->get_pendown_state();
+
+	return !gpio_get_value(ts->gpio_pendown);
+}
+
 /*
  * PENIRQ only kicks the timer.  The timer only reissues the SPI transfer,
  * to retrieve touchscreen status.
@@ -551,7 +561,7 @@ static void ads7846_rx(void *ads)
 	 */
 	if (ts->penirq_recheck_delay_usecs) {
 		udelay(ts->penirq_recheck_delay_usecs);
-		if (!ts->get_pendown_state())
+		if (!get_pendown_state(ts))
 			Rt = 0;
 	}
 
@@ -678,7 +688,7 @@ static enum hrtimer_restart ads7846_timer(struct hrtimer *handle)
 
 	spin_lock_irq(&ts->lock);
 
-	if (unlikely(!ts->get_pendown_state() ||
+	if (unlikely(!get_pendown_state(ts) ||
 		     device_suspended(&ts->spi->dev))) {
 		if (ts->pendown) {
 			struct input_dev *input = ts->input;
@@ -717,7 +727,7 @@ static irqreturn_t ads7846_irq(int irq, void *handle)
 	unsigned long flags;
 
 	spin_lock_irqsave(&ts->lock, flags);
-	if (likely(ts->get_pendown_state())) {
+	if (likely(get_pendown_state(ts))) {
 		if (!ts->irq_disabled) {
 			/* The ARM do_simple_IRQ() dispatcher doesn't act
 			 * like the other dispatchers:  it will report IRQs
@@ -807,6 +817,36 @@ static int ads7846_resume(struct spi_device *spi)
 	return 0;
 }
 
+static int __devinit setup_pendown(struct spi_device *spi, struct ads7846 *ts)
+{
+	struct ads7846_platform_data *pdata = spi->dev.platform_data;
+	int err;
+
+	/* REVISIT when the irq can be triggered active-low, or if for some
+	 * reason the touchscreen isn't hooked up, we don't need to access
+	 * the pendown state.
+	 */
+	if (!pdata->get_pendown_state && !gpio_is_valid(pdata->gpio_pendown)) {
+		dev_err(&spi->dev, "no get_pendown_state nor gpio_pendown?\n");
+		return -EINVAL;
+	}
+
+	if (pdata->get_pendown_state) {
+		ts->get_pendown_state = pdata->get_pendown_state;
+		return 0;
+	}
+
+	err = gpio_request(pdata->gpio_pendown, "ads7846_pendown");
+	if (err) {
+		dev_err(&spi->dev, "failed to request pendown GPIO%d\n",
+				pdata->gpio_pendown);
+		return err;
+	}
+
+	ts->gpio_pendown = pdata->gpio_pendown;
+	return 0;
+}
+
 static int __devinit ads7846_probe(struct spi_device *spi)
 {
 	struct ads7846			*ts;
@@ -834,15 +874,6 @@ static int __devinit ads7846_probe(struct spi_device *spi)
 		return -EINVAL;
 	}
 
-	/* REVISIT when the irq can be triggered active-low, or if for some
-	 * reason the touchscreen isn't hooked up, we don't need to access
-	 * the pendown state.
-	 */
-	if (pdata->get_pendown_state == NULL) {
-		dev_dbg(&spi->dev, "no get_pendown_state function?\n");
-		return -EINVAL;
-	}
-
 	/* We'd set TX wordsize 8 bits and RX wordsize to 13 bits ... except
 	 * that even if the hardware can do that, the SPI controller driver
 	 * may not.  So we stick to very-portable 8 bit words, both RX and TX.
@@ -894,7 +925,10 @@ static int __devinit ads7846_probe(struct spi_device *spi)
 		ts->filter_data = ts;
 	} else
 		ts->filter = ads7846_no_filter;
-	ts->get_pendown_state = pdata->get_pendown_state;
+
+	err = setup_pendown(spi, ts);
+	if (err)
+		goto err_cleanup_filter;
 
 	if (pdata->penirq_recheck_delay_usecs)
 		ts->penirq_recheck_delay_usecs =
@@ -1086,7 +1120,7 @@ static int __devinit ads7846_probe(struct spi_device *spi)
 			spi->dev.driver->name, ts)) {
 		dev_dbg(&spi->dev, "irq %d busy?\n", spi->irq);
 		err = -EBUSY;
-		goto err_cleanup_filter;
+		goto err_free_gpio;
 	}
 
 	err = ads784x_hwmon_register(spi, ts);
@@ -1117,6 +1151,9 @@ static int __devinit ads7846_probe(struct spi_device *spi)
 	ads784x_hwmon_unregister(spi, ts);
  err_free_irq:
 	free_irq(spi->irq, ts);
+ err_free_gpio:
+	if (ts->gpio_pendown != -1)
+		gpio_free(ts->gpio_pendown);
  err_cleanup_filter:
 	if (ts->filter_cleanup)
 		ts->filter_cleanup(ts->filter_data);
@@ -1141,6 +1178,9 @@ static int __devexit ads7846_remove(struct spi_device *spi)
 	/* suspend left the IRQ disabled */
 	enable_irq(ts->spi->irq);
 
+	if (ts->gpio_pendown != -1)
+		gpio_free(ts->gpio_pendown);
+
 	if (ts->filter_cleanup)
 		ts->filter_cleanup(ts->filter_data);
 
diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h
index daf744017a31..05eab2f11e63 100644
--- a/include/linux/spi/ads7846.h
+++ b/include/linux/spi/ads7846.h
@@ -43,6 +43,9 @@ struct ads7846_platform_data {
 	u16	debounce_tol;		/* tolerance used for filtering */
 	u16	debounce_rep;		/* additional consecutive good readings
 					 * required after the first two */
+	int	gpio_pendown;		/* the GPIO used to decide the pendown
+					 * state if get_pendown_state == NULL
+					 */
 	int	(*get_pendown_state)(void);
 	int	(*filter_init)	(struct ads7846_platform_data *pdata,
 				 void **filter_data);
-- 
cgit v1.2.3


From 600715dcdf567c86f8b2c6173fcfb4b873e25a19 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 11 Sep 2008 01:31:45 -0700
Subject: generic: add phys_addr_t for holding physical addresses

Add a kernel-wide "phys_addr_t" which is guaranteed to be able to hold
any physical address.  By default it equals the word size of the
architecture, but a 32-bit architecture can set ARCH_PHYS_ADDR_T_64BIT
if it needs a 64-bit phys_addr_t.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/Kconfig             | 3 +++
 arch/powerpc/include/asm/types.h | 7 -------
 arch/x86/Kconfig                 | 3 +++
 include/asm-x86/page_32.h        | 2 --
 include/asm-x86/page_64.h        | 1 -
 include/linux/types.h            | 6 ++++++
 mm/Kconfig                       | 3 +++
 7 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 587da5e0990f..f5f83ee60411 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -22,6 +22,9 @@ config WORD_SIZE
 config PPC_MERGE
 	def_bool y
 
+config ARCH_PHYS_ADDR_T_64BIT
+       def_bool PPC64 || PHYS_64BIT
+
 config MMU
 	bool
 	default y
diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h
index d3374bc865ba..c646f34c4e8b 100644
--- a/arch/powerpc/include/asm/types.h
+++ b/arch/powerpc/include/asm/types.h
@@ -48,13 +48,6 @@ typedef struct {
 
 typedef __vector128 vector128;
 
-/* Physical address used by some IO functions */
-#if defined(CONFIG_PPC64) || defined(CONFIG_PHYS_64BIT)
-typedef u64 phys_addr_t;
-#else
-typedef u32 phys_addr_t;
-#endif
-
 #ifdef __powerpc64__
 typedef u64 dma_addr_t;
 #else
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ed92864d1325..a0ffb5188c8c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -932,6 +932,9 @@ config X86_PAE
 	  has the cost of more pagetable lookup overhead, and also
 	  consumes more pagetable space per process.
 
+config ARCH_PHYS_ADDR_T_64BIT
+       def_bool X86_64 || X86_PAE
+
 # Common NUMA Features
 config NUMA
 	bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h
index ab8528793f08..d0e58605a520 100644
--- a/include/asm-x86/page_32.h
+++ b/include/asm-x86/page_32.h
@@ -33,7 +33,6 @@ typedef u64	pmdval_t;
 typedef u64	pudval_t;
 typedef u64	pgdval_t;
 typedef u64	pgprotval_t;
-typedef u64	phys_addr_t;
 
 typedef union {
 	struct {
@@ -54,7 +53,6 @@ typedef unsigned long	pmdval_t;
 typedef unsigned long	pudval_t;
 typedef unsigned long	pgdval_t;
 typedef unsigned long	pgprotval_t;
-typedef unsigned long	phys_addr_t;
 
 typedef union {
 	pteval_t pte;
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index c6916c83e6b1..2456fbf2d7dc 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -79,7 +79,6 @@ typedef unsigned long	pmdval_t;
 typedef unsigned long	pudval_t;
 typedef unsigned long	pgdval_t;
 typedef unsigned long	pgprotval_t;
-typedef unsigned long	phys_addr_t;
 
 typedef struct page *pgtable_t;
 
diff --git a/include/linux/types.h b/include/linux/types.h
index d4a9ce6e2760..022c668496da 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -197,6 +197,12 @@ typedef u64 resource_size_t;
 typedef u32 resource_size_t;
 #endif
 
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+typedef u64 phys_addr_t;
+#else
+typedef u32 phys_addr_t;
+#endif
+
 struct ustat {
 	__kernel_daddr_t	f_tfree;
 	__kernel_ino_t		f_tinode;
diff --git a/mm/Kconfig b/mm/Kconfig
index 0bd9c2dbb2a0..91ee3922510a 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -187,6 +187,9 @@ config RESOURCES_64BIT
 	help
 	  This option allows memory and IO resources to be 64 bit.
 
+config PHYS_ADDR_T_64BIT
+	def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT
+
 config ZONE_DMA_FLAG
 	int
 	default "0" if !ZONE_DMA
-- 
cgit v1.2.3


From 947d0496cf3e12ebfa70b3eaf561c25403247ce9 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 11 Sep 2008 01:31:48 -0700
Subject: generic: make PFN_PHYS explicitly return phys_addr_t

PFN_PHYS, as its name suggests, turns a pfn into a physical address.
However, it is a macro which just operates on its argument without
modifying its type.  pfns are typed unsigned long, but an unsigned
long may not be long enough to hold a physical address (32-bit systems
with more than 32 bits of physcial address).

Make sure we cast to phys_addr_t to return a complete result.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/m32r/mm/discontig.c   | 4 ++--
 include/asm-x86/xen/page.h | 4 ++--
 include/linux/pfn.h        | 6 +++++-
 3 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c
index cbc3c4c54566..7daf897292cf 100644
--- a/arch/m32r/mm/discontig.c
+++ b/arch/m32r/mm/discontig.c
@@ -111,9 +111,9 @@ unsigned long __init setup_memory(void)
 				initrd_start, INITRD_SIZE);
 		} else {
 			printk("initrd extends beyond end of memory "
-				"(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+				"(0x%08lx > 0x%08llx)\ndisabling initrd\n",
 				INITRD_START + INITRD_SIZE,
-				PFN_PHYS(max_low_pfn));
+			        (unsigned long long)PFN_PHYS(max_low_pfn));
 
 			initrd_start = 0;
 		}
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
index 7b3835d3b77d..de9170b537ab 100644
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -76,13 +76,13 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
 static inline xmaddr_t phys_to_machine(xpaddr_t phys)
 {
 	unsigned offset = phys.paddr & ~PAGE_MASK;
-	return XMADDR(PFN_PHYS((u64)pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset);
+	return XMADDR(PFN_PHYS(pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset);
 }
 
 static inline xpaddr_t machine_to_phys(xmaddr_t machine)
 {
 	unsigned offset = machine.maddr & ~PAGE_MASK;
-	return XPADDR(PFN_PHYS((u64)mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset);
+	return XPADDR(PFN_PHYS(mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset);
 }
 
 /*
diff --git a/include/linux/pfn.h b/include/linux/pfn.h
index bb01f8b92b56..7646637221f3 100644
--- a/include/linux/pfn.h
+++ b/include/linux/pfn.h
@@ -1,9 +1,13 @@
 #ifndef _LINUX_PFN_H_
 #define _LINUX_PFN_H_
 
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#endif
+
 #define PFN_ALIGN(x)	(((unsigned long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK)
 #define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
 #define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
-#define PFN_PHYS(x)	((x) << PAGE_SHIFT)
+#define PFN_PHYS(x)	((phys_addr_t)(x) << PAGE_SHIFT)
 
 #endif
-- 
cgit v1.2.3


From 8308c54d7e312f7a03e2ce2057d0837e6fe3843f Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 11 Sep 2008 01:31:50 -0700
Subject: generic: redefine resource_size_t as phys_addr_t

There's no good reason why a resource_size_t shouldn't just be a
physical address, so simply redefine it in terms of phys_addr_t.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/platforms/Kconfig.cputype |  1 -
 arch/powerpc/sysdev/ppc4xx_pci.c       | 16 ++++++----------
 arch/x86/Kconfig                       |  1 -
 arch/x86/kernel/e820.c                 |  4 +---
 drivers/pci/setup-bus.c                |  9 ++++-----
 include/linux/types.h                  |  8 ++------
 6 files changed, 13 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 7f6512733862..be852fd407a8 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -135,7 +135,6 @@ config PTE_64BIT
 config PHYS_64BIT
 	bool 'Large physical address support' if E500
 	depends on 44x || E500
-	select RESOURCES_64BIT
 	default y if 44x
 	---help---
 	  This option enables kernel support for larger than 32-bit physical
diff --git a/arch/powerpc/sysdev/ppc4xx_pci.c b/arch/powerpc/sysdev/ppc4xx_pci.c
index fb368dfde5d4..e8a76d9539db 100644
--- a/arch/powerpc/sysdev/ppc4xx_pci.c
+++ b/arch/powerpc/sysdev/ppc4xx_pci.c
@@ -41,13 +41,10 @@ extern unsigned long total_memory;
 #define U64_TO_U32_LOW(val)	((u32)((val) & 0x00000000ffffffffULL))
 #define U64_TO_U32_HIGH(val)	((u32)((val) >> 32))
 
-#ifdef CONFIG_RESOURCES_64BIT
-#define RES_TO_U32_LOW(val)	U64_TO_U32_LOW(val)
-#define RES_TO_U32_HIGH(val)	U64_TO_U32_HIGH(val)
-#else
-#define RES_TO_U32_LOW(val)	(val)
-#define RES_TO_U32_HIGH(val)	(0)
-#endif
+#define RES_TO_U32_LOW(val)	\
+	((sizeof(resource_size_t) > sizeof(u32)) ? U64_TO_U32_LOW(val) : (val))
+#define RES_TO_U32_HIGH(val)	\
+	((sizeof(resource_size_t) > sizeof(u32)) ? U64_TO_U32_HIGH(val) : (0))
 
 static inline int ppc440spe_revA(void)
 {
@@ -145,12 +142,11 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose,
 
 		/* Use that */
 		res->start = pci_addr;
-#ifndef CONFIG_RESOURCES_64BIT
 		/* Beware of 32 bits resources */
-		if ((pci_addr + size) > 0x100000000ull)
+		if (sizeof(resource_size_t) == sizeof(u32) &&
+		    (pci_addr + size) > 0x100000000ull)
 			res->end = 0xffffffff;
 		else
-#endif
 			res->end = res->start + size - 1;
 		break;
 	}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a0ffb5188c8c..b4e1875f9861 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -925,7 +925,6 @@ config X86_PAE
 	def_bool n
 	prompt "PAE (Physical Address Extension) Support"
 	depends on X86_32 && !HIGHMEM4G
-	select RESOURCES_64BIT
 	help
 	  PAE is required for NX support, and furthermore enables
 	  larger swapspace support for non-overcommit purposes. It
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 66e48aa2dd1b..477f4bb7e552 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1276,12 +1276,10 @@ void __init e820_reserve_resources(void)
 	res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
 	for (i = 0; i < e820.nr_map; i++) {
 		end = e820.map[i].addr + e820.map[i].size - 1;
-#ifndef CONFIG_RESOURCES_64BIT
-		if (end > 0x100000000ULL) {
+		if (end != (resource_size_t)end) {
 			res++;
 			continue;
 		}
-#endif
 		res->name = e820_type_to_string(e820.map[i].type);
 		res->start = e820.map[i].addr;
 		res->end = end;
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 1aad599816f7..f250a90ee450 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -378,11 +378,10 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
 	align = 0;
 	min_align = 0;
 	for (order = 0; order <= max_order; order++) {
-#ifdef CONFIG_RESOURCES_64BIT
-		resource_size_t align1 = 1ULL << (order + 20);
-#else
-		resource_size_t align1 = 1U << (order + 20);
-#endif
+		resource_size_t align1 = 1;
+
+		align1 <<= (order + 20);
+
 		if (!align)
 			min_align = align1;
 		else if (ALIGN(align + min_align, min_align) < align1)
diff --git a/include/linux/types.h b/include/linux/types.h
index 022c668496da..f24f7beb47df 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -191,18 +191,14 @@ typedef __u32 __bitwise __wsum;
 #ifdef __KERNEL__
 typedef unsigned __bitwise__ gfp_t;
 
-#ifdef CONFIG_RESOURCES_64BIT
-typedef u64 resource_size_t;
-#else
-typedef u32 resource_size_t;
-#endif
-
 #ifdef CONFIG_PHYS_ADDR_T_64BIT
 typedef u64 phys_addr_t;
 #else
 typedef u32 phys_addr_t;
 #endif
 
+typedef phys_addr_t resource_size_t;
+
 struct ustat {
 	__kernel_daddr_t	f_tfree;
 	__kernel_ino_t		f_tinode;
-- 
cgit v1.2.3


From 379daf6290814e41f14880094b7b773640df2461 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Thu, 25 Sep 2008 18:43:34 -0700
Subject: IO resources, x86: ioremap sanity check to catch mapping requests
 exceeding the BAR sizes

Go through the iomem resource tree to check if any of the ioremap()
requests span more than any slot in the iomem resource tree and do
a WARN_ON() if we hit this check.

This will raise a red-flag, if some driver is mapping more than what
is needed. And hopefully identify possible corruptions much earlier.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/ioremap.c  |  6 ++++++
 include/linux/ioport.h |  1 +
 kernel/resource.c      | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index d4b6e6a29ae3..c818b45bd07d 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -149,6 +149,12 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 	if (is_ISA_range(phys_addr, last_addr))
 		return (__force void __iomem *)phys_to_virt(phys_addr);
 
+	/*
+	 * Check if the request spans more than any BAR in the iomem resource
+	 * tree.
+	 */
+	WARN_ON(iomem_map_sanity_check(phys_addr, size));
+
 	/*
 	 * Don't allow anybody to remap normal RAM that we're using..
 	 */
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index fded376b94e3..01712cf1a38b 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -169,6 +169,7 @@ extern struct resource * __devm_request_region(struct device *dev,
 
 extern void __devm_release_region(struct device *dev, struct resource *parent,
 				  resource_size_t start, resource_size_t n);
+extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size);
 
 #endif /* __ASSEMBLY__ */
 #endif	/* _LINUX_IOPORT_H */
diff --git a/kernel/resource.c b/kernel/resource.c
index fc59dcc4795b..1d003a50ee17 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -827,3 +827,36 @@ static int __init reserve_setup(char *str)
 }
 
 __setup("reserve=", reserve_setup);
+
+/*
+ * Check if the requested addr and size spans more than any slot in the
+ * iomem resource tree.
+ */
+int iomem_map_sanity_check(resource_size_t addr, unsigned long size)
+{
+	struct resource *p = &iomem_resource;
+	int err = 0;
+	loff_t l;
+
+	read_lock(&resource_lock);
+	for (p = p->child; p ; p = r_next(NULL, p, &l)) {
+		/*
+		 * We can probably skip the resources without
+		 * IORESOURCE_IO attribute?
+		 */
+		if (p->start >= addr + size)
+			continue;
+		if (p->end < addr)
+			continue;
+		if (p->start <= addr && (p->end >= addr + size - 1))
+			continue;
+		printk(KERN_WARNING "resource map sanity check conflict: "
+		       "0x%llx 0x%llx 0x%llx 0x%llx %s\n",
+		       addr, addr + size - 1, p->start, p->end, p->name);
+		err = -1;
+		break;
+	}
+	read_unlock(&resource_lock);
+
+	return err;
+}
-- 
cgit v1.2.3


From 1daef0a868370c5a96d031b9202e3354bea060e6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 27 Jul 2008 18:19:01 -0400
Subject: NFS: Clean up nfs_sb_active/nfs_sb_deactive

Instead of causing umount requests to block on server->active_wq while the
asynchronous sillyrename deletes are executing, we can use the sb->s_active
counter to obtain a reference to the super_block, and then release that
reference in nfs_async_unlink_release().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           |  1 -
 fs/nfs/internal.h         |  4 ++--
 fs/nfs/super.c            | 24 ++++++++----------------
 fs/nfs/unlink.c           |  5 +++--
 include/linux/nfs_fs_sb.h |  1 -
 5 files changed, 13 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 5ee23e7058b3..2accb67427c6 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -850,7 +850,6 @@ static struct nfs_server *nfs_alloc_server(void)
 	INIT_LIST_HEAD(&server->client_link);
 	INIT_LIST_HEAD(&server->master_link);
 
-	init_waitqueue_head(&server->active_wq);
 	atomic_set(&server->active, 0);
 
 	server->io_stats = nfs_alloc_iostats();
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 24241fcbb98d..7bcf6ec2d458 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -163,8 +163,8 @@ extern struct rpc_stat nfs_rpcstat;
 
 extern int __init register_nfs_fs(void);
 extern void __exit unregister_nfs_fs(void);
-extern void nfs_sb_active(struct nfs_server *server);
-extern void nfs_sb_deactive(struct nfs_server *server);
+extern void nfs_sb_active(struct super_block *sb);
+extern void nfs_sb_deactive(struct super_block *sb);
 
 /* namespace.c */
 extern char *nfs_path(const char *base,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e9b20173fef3..e527fab40419 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -209,7 +209,6 @@ static int nfs_get_sb(struct file_system_type *, int, const char *, void *, stru
 static int nfs_xdev_get_sb(struct file_system_type *fs_type,
 		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
 static void nfs_kill_super(struct super_block *);
-static void nfs_put_super(struct super_block *);
 static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
 
 static struct file_system_type nfs_fs_type = {
@@ -232,7 +231,6 @@ static const struct super_operations nfs_sops = {
 	.alloc_inode	= nfs_alloc_inode,
 	.destroy_inode	= nfs_destroy_inode,
 	.write_inode	= nfs_write_inode,
-	.put_super	= nfs_put_super,
 	.statfs		= nfs_statfs,
 	.clear_inode	= nfs_clear_inode,
 	.umount_begin	= nfs_umount_begin,
@@ -337,26 +335,20 @@ void __exit unregister_nfs_fs(void)
 	unregister_filesystem(&nfs_fs_type);
 }
 
-void nfs_sb_active(struct nfs_server *server)
+void nfs_sb_active(struct super_block *sb)
 {
-	atomic_inc(&server->active);
-}
+	struct nfs_server *server = NFS_SB(sb);
 
-void nfs_sb_deactive(struct nfs_server *server)
-{
-	if (atomic_dec_and_test(&server->active))
-		wake_up(&server->active_wq);
+	if (atomic_inc_return(&server->active) == 1)
+		atomic_inc(&sb->s_active);
 }
 
-static void nfs_put_super(struct super_block *sb)
+void nfs_sb_deactive(struct super_block *sb)
 {
 	struct nfs_server *server = NFS_SB(sb);
-	/*
-	 * Make sure there are no outstanding ops to this server.
-	 * If so, wait for them to finish before allowing the
-	 * unmount to continue.
-	 */
-	wait_event(server->active_wq, atomic_read(&server->active) == 0);
+
+	if (atomic_dec_and_test(&server->active))
+		deactivate_super(sb);
 }
 
 /*
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index f089e5839d7d..ecc295347775 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -99,7 +99,7 @@ static void nfs_async_unlink_release(void *calldata)
 
 	nfs_dec_sillycount(data->dir);
 	nfs_free_unlinkdata(data);
-	nfs_sb_deactive(NFS_SB(sb));
+	nfs_sb_deactive(sb);
 }
 
 static const struct rpc_call_ops nfs_unlink_ops = {
@@ -118,6 +118,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
 		.rpc_message = &msg,
 		.callback_ops = &nfs_unlink_ops,
 		.callback_data = data,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 	struct rpc_task *task;
@@ -149,7 +150,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
 		nfs_dec_sillycount(dir);
 		return 0;
 	}
-	nfs_sb_active(NFS_SERVER(dir));
+	nfs_sb_active(dir->i_sb);
 	data->args.fh = NFS_FH(dir);
 	nfs_fattr_init(&data->res.dir_attr);
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index c9beacd16c00..4e477ae58699 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -119,7 +119,6 @@ struct nfs_server {
 	void (*destroy)(struct nfs_server *);
 
 	atomic_t active; /* Keep trace of any activity to this server */
-	wait_queue_head_t active_wq;  /* Wait for any activity to stop  */
 
 	/* mountd-related mount options */
 	struct sockaddr_storage	mountd_address;
-- 
cgit v1.2.3


From 1b483a6a7b2998e9c98ad985d7494b9b725bd228 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Date: Mon, 15 Sep 2008 23:09:08 +0000
Subject: powerpc: Remove remains of /proc/ppc_htab

commit 14cf11af6cf608eb8c23e989ddb17a715ddce109 ("powerpc: Merge enough to
start building in arch/powerpc.") unwired /proc/ppc_htab, and commit
917f0af9e5a9ceecf9e72537fabb501254ba321d ("powerpc: Remove arch/ppc and
include/asm-ppc") removed the rest of the /proc/ppc_htab support, but there are
still a few references left. Kill them for good.

Signed-off-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 Documentation/powerpc/00-INDEX     |   2 -
 Documentation/powerpc/ppc_htab.txt | 118 -------------------------------------
 include/linux/proc_fs.h            |   1 -
 3 files changed, 121 deletions(-)
 delete mode 100644 Documentation/powerpc/ppc_htab.txt

(limited to 'include/linux')

diff --git a/Documentation/powerpc/00-INDEX b/Documentation/powerpc/00-INDEX
index 29d839ce7327..2f31853899ca 100644
--- a/Documentation/powerpc/00-INDEX
+++ b/Documentation/powerpc/00-INDEX
@@ -18,8 +18,6 @@ mpc52xx.txt
 	- Linux 2.6.x on MPC52xx family
 mpc52xx-device-tree-bindings.txt
 	- MPC5200 Device Tree Bindings
-ppc_htab.txt
-	- info about the Linux/PPC /proc/ppc_htab entry
 smp.txt
 	- use and state info about Linux/PPC on MP machines
 sound.txt
diff --git a/Documentation/powerpc/ppc_htab.txt b/Documentation/powerpc/ppc_htab.txt
deleted file mode 100644
index 8b8c7df29fa9..000000000000
--- a/Documentation/powerpc/ppc_htab.txt
+++ /dev/null
@@ -1,118 +0,0 @@
-                 Information about /proc/ppc_htab
-=====================================================================
-
-This document and the related code was written by me (Cort Dougan), please
-email me (cort@fsmlabs.com) if you have questions, comments or corrections.
-
-Last Change: 2.16.98
-
-This entry in the proc directory is readable by all users but only
-writable by root.
-
-The ppc_htab interface is a user level way of accessing the
-performance monitoring registers as well as providing information
-about the PTE hash table.
-
-1. Reading
-
-  Reading this file will give you information about the memory management
-  hash table that serves as an extended tlb for page translation on the
-  powerpc.  It will also give you information about performance measurement
-  specific to the cpu that you are using.
-
-  Explanation of the 604 Performance Monitoring Fields:
-    MMCR0 - the current value of the MMCR0 register
-    PMC1
-    PMC2 - the value of the performance counters and a
-           description of what events they are counting
-           which are based on MMCR0 bit settings.
-  Explanation of the PTE Hash Table fields:
-
-    Size - hash table size in Kb.
-    Buckets -  number of buckets in the table.
-    Address - the virtual kernel address of the hash table base.
-    Entries - the number of ptes that can be stored in the hash table.
-    User/Kernel - how many pte's are in use by the kernel or user at that time.
-    Overflows - How many of the entries are in their secondary hash location.
-    Percent full - ratio of free pte entries to in use entries.
-    Reloads - Count of how many hash table misses have occurred
-              that were fixed with a reload from the linux tables.
-              Should always be 0 on 603 based machines.
-    Non-error Misses - Count of how many hash table misses have occurred
-              that were completed with the creation of a pte in the linux
-              tables with a call to do_page_fault().
-    Error Misses - Number of misses due to errors such as bad address
-              and permission violations.  This includes kernel access of
-              bad user addresses that are fixed up by the trap handler.
-
-  Note that calculation of the data displayed from /proc/ppc_htab takes
-  a long time and spends a great deal of time in the kernel.  It would
-  be quite hard on performance to read this file constantly.  In time
-  there may be a counter in the kernel that allows successive reads from
-  this file only after a given amount of time has passed to reduce the
-  possibility of a user slowing the system by reading this file.
-
-2. Writing
-
-  Writing to the ppc_htab allows you to change the characteristics of
-  the powerpc PTE hash table and setup performance monitoring.
-
-  Resizing the PTE hash table is not enabled right now due to many
-  complications with moving the hash table, rehashing the entries
-  and many many SMP issues that would have to be dealt with.
-
-  Write options to ppc_htab:
-  
-   - To set the size of the hash table to 64Kb:
-
-      echo 'size 64' > /proc/ppc_htab
-
-     The size must be a multiple of 64 and must be greater than or equal to
-     64.
-
-   - To turn off performance monitoring:
-
-      echo 'off' > /proc/ppc_htab
-
-   - To reset the counters without changing what they're counting:
-
-      echo 'reset' > /proc/ppc_htab
-
-     Note that counting will continue after the reset if it is enabled.
-
-   - To count only events in user mode or only in kernel mode:
-
-      echo 'user' > /proc/ppc_htab
-       ...or...
-      echo 'kernel' > /proc/ppc_htab
-
-     Note that these two options are exclusive of one another and the
-     lack of either of these options counts user and kernel.
-     Using 'reset' and 'off' reset these flags.
-
-   - The 604 has 2 performance counters which can each count events from
-     a specific set of events.  These sets are disjoint so it is not
-     possible to count _any_ combination of 2 events.  One event can
-     be counted by PMC1 and one by PMC2.
-
-     To start counting a particular event use:
-
-      echo 'event' > /proc/ppc_htab
-
-     and choose from these events:
-
-     PMC1
-     ----
-      'ic miss' - instruction cache misses
-      'dtlb' - data tlb misses (not hash table misses)
-
-     PMC2
-     ----
-      'dc miss' - data cache misses
-      'itlb' - instruction tlb misses (not hash table misses)
-      'load miss time' - cycles to complete a load miss
-
-3. Bugs
-
-  The PMC1 and PMC2 counters can overflow and give no indication of that
-  in /proc/ppc_htab.
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index fb61850d1cfc..27d534f4470d 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -139,7 +139,6 @@ extern int proc_readdir(struct file *, void *, filldir_t);
 extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
 
 extern const struct file_operations proc_kcore_operations;
-extern const struct file_operations ppc_htab_operations;
 
 extern int pid_ns_prepare_proc(struct pid_namespace *ns);
 extern void pid_ns_release_proc(struct pid_namespace *ns);
-- 
cgit v1.2.3


From 4eec952e42314b53e48fef1f54dd89cbf9789734 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 15 Jul 2008 17:58:13 -0400
Subject: NFS: Add options for finer control of the lookup cache

Add the flag NFS_MOUNT_LOOKUP_CACHE_NONEG to turn off the caching of
negative dentries. In reality what we do is to force
nfs_lookup_revalidate() to always discard negative dentries.

Add the flag NFS_MOUNT_LOOKUP_CACHE_NONE for enforcing stricter
revalidation of dentries. It forces the revalidate code to always do a
lookup instead of just checking the cached mtime of the parent directory.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c              | 4 ++++
 include/linux/nfs_mount.h | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 74f92b717f78..49d565412827 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -667,6 +667,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
 {
 	if (IS_ROOT(dentry))
 		return 1;
+	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
+		return 0;
 	if (!nfs_verify_change_attribute(dir, dentry->d_time))
 		return 0;
 	/* Revalidate nfsi->cache_change_attribute before we declare a match */
@@ -750,6 +752,8 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
 	/* Don't revalidate a negative dentry if we're creating a new file */
 	if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0)
 		return 0;
+	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
+		return 1;
 	return !nfs_check_verifier(dir, dentry);
 }
 
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
index df7c6b7a7ebb..6549a06ac16e 100644
--- a/include/linux/nfs_mount.h
+++ b/include/linux/nfs_mount.h
@@ -65,4 +65,8 @@ struct nfs_mount_data {
 #define NFS_MOUNT_UNSHARED	0x8000	/* 5 */
 #define NFS_MOUNT_FLAGMASK	0xFFFF
 
+/* The following are for internal use only */
+#define NFS_MOUNT_LOOKUP_CACHE_NONEG	0x10000
+#define NFS_MOUNT_LOOKUP_CACHE_NONE	0x20000
+
 #endif
-- 
cgit v1.2.3


From 691beb13cdc88358334ef0ba867c080a247a760f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 5 Oct 2008 14:48:22 -0400
Subject: NFS: Allow concurrent inode revalidation

Currently, if two processes are both trying to revalidate metadata for the
same inode, they will find themselves being serialised. There is no good
justification for this now that we have improved our ability to detect
stale attribute data, so we should remove that serialisation.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c         | 43 ++-----------------------------------------
 include/linux/nfs_fs.h |  9 ++++-----
 2 files changed, 6 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f3b8ed904df7..e25009f35cc2 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -472,37 +472,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
 	}
 }
 
-static int nfs_wait_schedule(void *word)
-{
-	if (signal_pending(current))
-		return -ERESTARTSYS;
-	schedule();
-	return 0;
-}
-
-/*
- * Wait for the inode to get unlocked.
- */
-static int nfs_wait_on_inode(struct inode *inode)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-	int error;
-
-	error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING,
-					nfs_wait_schedule, TASK_KILLABLE);
-
-	return error;
-}
-
-static void nfs_wake_up_inode(struct inode *inode)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-
-	clear_bit(NFS_INO_REVALIDATING, &nfsi->flags);
-	smp_mb__after_clear_bit();
-	wake_up_bit(&nfsi->flags, NFS_INO_REVALIDATING);
-}
-
 int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
 	struct inode *inode = dentry->d_inode;
@@ -697,20 +666,15 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
 		inode->i_sb->s_id, (long long)NFS_FILEID(inode));
 
-	nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
 	if (is_bad_inode(inode))
- 		goto out_nowait;
+		goto out;
 	if (NFS_STALE(inode))
- 		goto out_nowait;
-
-	status = nfs_wait_on_inode(inode);
-	if (status < 0)
 		goto out;
 
-	status = -ESTALE;
 	if (NFS_STALE(inode))
 		goto out;
 
+	nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
 	status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
 	if (status != 0) {
 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
@@ -740,9 +704,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 		(long long)NFS_FILEID(inode));
 
  out:
-	nfs_wake_up_inode(inode);
-
- out_nowait:
 	return status;
 }
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 78a5922a2f11..ca563ee13e32 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -200,11 +200,10 @@ struct nfs_inode {
 /*
  * Bit offsets in flags field
  */
-#define NFS_INO_REVALIDATING	(0)		/* revalidating attrs */
-#define NFS_INO_ADVISE_RDPLUS	(1)		/* advise readdirplus */
-#define NFS_INO_STALE		(2)		/* possible stale inode */
-#define NFS_INO_ACL_LRU_SET	(3)		/* Inode is on the LRU list */
-#define NFS_INO_MOUNTPOINT	(4)		/* inode is remote mountpoint */
+#define NFS_INO_ADVISE_RDPLUS	(0)		/* advise readdirplus */
+#define NFS_INO_STALE		(1)		/* possible stale inode */
+#define NFS_INO_ACL_LRU_SET	(2)		/* Inode is on the LRU list */
+#define NFS_INO_MOUNTPOINT	(3)		/* inode is remote mountpoint */
 
 static inline struct nfs_inode *NFS_I(const struct inode *inode)
 {
-- 
cgit v1.2.3


From 9fa8d66f1e55bf197568c8c689043c2aad1ffc97 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Tue, 26 Aug 2008 16:23:20 +0100
Subject: NFS: remove 8 bytes of padding from struct nfs_fattr on 64 bit builds

remove 8 bytes of padding from struct nfs_fattr on 64 bit builds

This also removes padding from several nfs structures, including
16 bytes from  nfs4_opendata, nfs4_createdata,nfs3_createdata
& 8 bytes from nfs_read_data,nfs_write_data,nfs_removeres,nfs4_closedata

This also reduces the reported stack usage of many nfs functions (30+).

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
----

This patch is against the latest git 2.6.27-rc4.
I've built & run this on my AMD64 desktop, & successfully run _simple_
tests with a  64 bit client => 32 bit server & 32 bit client to 64 bit
server.

On fedora with gcc (GCC) 4.3.0 20080428 (Red Hat 4.3.0-8) checkpatch
reports 33 functions with reduced stack usage.
e.g.
__nfs_revalidate_inode [nfs] 216 => 200
_nfs4_proc_access [nfs] 304 => 288
_nfs4_proc_link [nfs] 536 => 504
_nfs4_proc_remove [nfs] 304 => 288
_nfs4_proc_rename [nfs] 584 => 552
nfs3_proc_access [nfs] 272 => 256
nfs3_proc_getacl [nfs] 384 => 368
nfs3_proc_link [nfs] 496 => 464
etc
I can supply the complete list if anyone is interested.

regards
Richard
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 8c77c11224d1..9cabbb3a9e6d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -36,6 +36,7 @@ struct nfs_fattr {
 	__u32			nlink;
 	__u32			uid;
 	__u32			gid;
+	dev_t			rdev;
 	__u64			size;
 	union {
 		struct {
@@ -46,7 +47,6 @@ struct nfs_fattr {
 			__u64	used;
 		} nfs3;
 	} du;
-	dev_t			rdev;
 	struct nfs_fsid		fsid;
 	__u64			fileid;
 	struct timespec		atime;
-- 
cgit v1.2.3


From d1ce02e1689dff9d413138f60a79b4e3affb4708 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 25 Sep 2008 11:57:12 -0400
Subject: NFS: SETCLIENTID truncates client ID and netid

The sc_name field is currently 56 bytes long.  This is not large enough
to hold a pair of IPv6 addresses, the authentication type, the protocol
name, and a uniquifier number.  The maximum possible size of the name
string using IPv6 addresses is just under 110 bytes, so I increased the
size of the sc_name field to accomodate this maximum.

In addition, the strings in the nfs4_setclientid structure are
constructed with scnprintf(), which wants to terminate its output with
'\0'.  The sc_netid field was large enough only for a three byte netid
string and a '\0' so inet6 netids were being truncated.  Perhaps we
don't need the overhead of scnprintf() to do a simple string copy, but
I fixed this by increasing the size of the buffer by one byte.

Since all three of the string buffers in nfs4_setclientid are
constructed with scnprintf(), I increased the size of all three by one
byte to document the requirement, although I don't think either the
universal address field or the name field will be so small that these
strings get truncated in this way.

The size of the Linux client's client ID on the wire will be larger
than before.  RFC 3530 suggests the size limit for client IDs is 1024,
and we are still well below that.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9cabbb3a9e6d..f6e95bfad5de 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -672,16 +672,16 @@ struct nfs4_rename_res {
 	struct nfs_fattr *		new_fattr;
 };
 
-#define NFS4_SETCLIENTID_NAMELEN	(56)
+#define NFS4_SETCLIENTID_NAMELEN	(128)
 struct nfs4_setclientid {
 	const nfs4_verifier *		sc_verifier;
 	unsigned int			sc_name_len;
-	char				sc_name[NFS4_SETCLIENTID_NAMELEN];
+	char				sc_name[NFS4_SETCLIENTID_NAMELEN + 1];
 	u32				sc_prog;
 	unsigned int			sc_netid_len;
-	char				sc_netid[RPCBIND_MAXNETIDLEN];
+	char				sc_netid[RPCBIND_MAXNETIDLEN + 1];
 	unsigned int			sc_uaddr_len;
-	char				sc_uaddr[RPCBIND_MAXUADDRLEN];
+	char				sc_uaddr[RPCBIND_MAXUADDRLEN + 1];
 	u32				sc_cb_ident;
 };
 
-- 
cgit v1.2.3


From 19d771f3caccaf66ce2fb539319222139e5b4e88 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 8 Oct 2008 13:54:52 -0400
Subject: NFS: Save padding bytes in struct nfs4_setclientid

Peter Staubach suggested reducing NFS4_SETCLIENTID_NAMELEN by one byte so
as to avoid 7 bytes of unnecessary padding.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index f6e95bfad5de..6ee6ae3f095c 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -672,7 +672,7 @@ struct nfs4_rename_res {
 	struct nfs_fattr *		new_fattr;
 };
 
-#define NFS4_SETCLIENTID_NAMELEN	(128)
+#define NFS4_SETCLIENTID_NAMELEN	(127)
 struct nfs4_setclientid {
 	const nfs4_verifier *		sc_verifier;
 	unsigned int			sc_name_len;
-- 
cgit v1.2.3


From fe9053b30bb48b99f7b45541249f5cfe96bdf7f7 Mon Sep 17 00:00:00 2001
From: Tom Talpey <talpey@netapp.com>
Date: Thu, 9 Oct 2008 14:59:59 -0400
Subject: RPC/RDMA: add data types and new FRMR memory registration enum.

Internal RPC/RDMA structure updates in preparation for FRMR support.

Signed-off-by: Tom Talpey <talpey@netapp.com>
Acked-by: Tom Tucker <tom@opengridcomputing.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprtrdma.h | 1 +
 net/sunrpc/xprtrdma/xprt_rdma.h | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
index 4de56b1d372b..55a5d92ca1e2 100644
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -78,6 +78,7 @@ enum rpcrdma_memreg {
 	RPCRDMA_MEMWINDOWS,
 	RPCRDMA_MEMWINDOWS_ASYNC,
 	RPCRDMA_MTHCAFMR,
+	RPCRDMA_FRMR,
 	RPCRDMA_ALLPHYSICAL,
 	RPCRDMA_LAST
 };
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 2427822f8bd4..05b7898e1f4b 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -58,6 +58,8 @@ struct rpcrdma_ia {
 	struct rdma_cm_id 	*ri_id;
 	struct ib_pd		*ri_pd;
 	struct ib_mr		*ri_bind_mem;
+	u32			ri_dma_lkey;
+	int			ri_have_dma_lkey;
 	struct completion	ri_done;
 	int			ri_async_rc;
 	enum rpcrdma_memreg	ri_memreg_strategy;
@@ -156,6 +158,10 @@ struct rpcrdma_mr_seg {		/* chunk descriptors */
 			union {
 				struct ib_mw	*mw;
 				struct ib_fmr	*fmr;
+				struct {
+					struct ib_fast_reg_page_list *fr_pgl;
+					struct ib_mr *fr_mr;
+				} frmr;
 			} r;
 			struct list_head mw_list;
 		} *rl_mw;
@@ -198,7 +204,7 @@ struct rpcrdma_buffer {
 	atomic_t	rb_credits;	/* most recent server credits */
 	unsigned long	rb_cwndscale;	/* cached framework rpc_cwndscale */
 	int		rb_max_requests;/* client max requests */
-	struct list_head rb_mws;	/* optional memory windows/fmrs */
+	struct list_head rb_mws;	/* optional memory windows/fmrs/frmrs */
 	int		rb_send_index;
 	struct rpcrdma_req	**rb_send_bufs;
 	int		rb_recv_index;
-- 
cgit v1.2.3


From 5675add36e76b9487e7f9e689f854cb8d6afd9b4 Mon Sep 17 00:00:00 2001
From: Tom Talpey <talpey@netapp.com>
Date: Thu, 9 Oct 2008 15:01:41 -0400
Subject: RPC/RDMA: harden connection logic against missing/late rdma_cm
 upcalls.

Add defensive timeouts to wait_for_completion() calls in RDMA
address resolution, and make them interruptible. Fix the timeout
units to milliseconds (formerly jiffies) and move to private header.

Signed-off-by: Tom Talpey <talpey@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprtrdma.h |  3 ---
 net/sunrpc/xprtrdma/verbs.c     | 11 +++++++----
 net/sunrpc/xprtrdma/xprt_rdma.h |  3 +++
 3 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
index 55a5d92ca1e2..54a379c9e8eb 100644
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -66,9 +66,6 @@
 
 #define RPCRDMA_INLINE_PAD_THRESH  (512)/* payload threshold to pad (bytes) */
 
-#define RDMA_RESOLVE_TIMEOUT	(5*HZ)	/* TBD 5 seconds */
-#define RDMA_CONNECT_RETRY_MAX	(2)	/* retries if no listener backlog */
-
 /* memory registration strategies */
 #define RPCRDMA_PERSISTENT_REGISTRATION (1)
 
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index a63d0c0ec017..f46fb93f421b 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 	switch (event->event) {
 	case RDMA_CM_EVENT_ADDR_RESOLVED:
 	case RDMA_CM_EVENT_ROUTE_RESOLVED:
+		ia->ri_async_rc = 0;
 		complete(&ia->ri_done);
 		break;
 	case RDMA_CM_EVENT_ADDR_ERROR:
@@ -363,26 +364,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
 		return id;
 	}
 
-	ia->ri_async_rc = 0;
+	ia->ri_async_rc = -ETIMEDOUT;
 	rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
 	if (rc) {
 		dprintk("RPC:       %s: rdma_resolve_addr() failed %i\n",
 			__func__, rc);
 		goto out;
 	}
-	wait_for_completion(&ia->ri_done);
+	wait_for_completion_interruptible_timeout(&ia->ri_done,
+				msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
 	rc = ia->ri_async_rc;
 	if (rc)
 		goto out;
 
-	ia->ri_async_rc = 0;
+	ia->ri_async_rc = -ETIMEDOUT;
 	rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
 	if (rc) {
 		dprintk("RPC:       %s: rdma_resolve_route() failed %i\n",
 			__func__, rc);
 		goto out;
 	}
-	wait_for_completion(&ia->ri_done);
+	wait_for_completion_interruptible_timeout(&ia->ri_done,
+				msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
 	rc = ia->ri_async_rc;
 	if (rc)
 		goto out;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index fde6499a53b2..c7a7eba991bc 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -51,6 +51,9 @@
 #include <linux/sunrpc/rpc_rdma.h> 	/* RPC/RDMA protocol */
 #include <linux/sunrpc/xprtrdma.h> 	/* xprt parameters */
 
+#define RDMA_RESOLVE_TIMEOUT	(5000)	/* 5 seconds */
+#define RDMA_CONNECT_RETRY_MAX	(2)	/* retries if no listener backlog */
+
 /*
  * Interface Adapter -- one per transport instance
  */
-- 
cgit v1.2.3


From f9da8d157b60d8c5bfc5a21fc50538fdb754a65b Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Fri, 10 Oct 2008 23:14:14 -0400
Subject: Input: move map_to_7segment.h to include/linux

The map_to_7segment.h provides generic 7segment LED mappings and is
designed to be used by other drivers.  Moving it to common area will
make it more usable.  Also exporting it to userspace will help users
of sysfs interface.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Acked-by: Henk Vergonet <henk.vergonet@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/misc/map_to_7segment.h | 189 -----------------------------------
 drivers/input/misc/yealink.c         |   2 +-
 include/linux/Kbuild                 |   1 +
 include/linux/map_to_7segment.h      | 187 ++++++++++++++++++++++++++++++++++
 4 files changed, 189 insertions(+), 190 deletions(-)
 delete mode 100644 drivers/input/misc/map_to_7segment.h
 create mode 100644 include/linux/map_to_7segment.h

(limited to 'include/linux')

diff --git a/drivers/input/misc/map_to_7segment.h b/drivers/input/misc/map_to_7segment.h
deleted file mode 100644
index a424094d9fe2..000000000000
--- a/drivers/input/misc/map_to_7segment.h
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * drivers/usb/input/map_to_7segment.h
- *
- * Copyright (c) 2005 Henk Vergonet <Henk.Vergonet@gmail.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef MAP_TO_7SEGMENT_H
-#define MAP_TO_7SEGMENT_H
-
-/* This file provides translation primitives and tables for the conversion
- * of (ASCII) characters to a 7-segments notation.
- *
- * The 7 segment's wikipedia notation below is used as standard.
- * See: http://en.wikipedia.org/wiki/Seven_segment_display
- *
- * Notation:	+-a-+
- *		f   b
- *		+-g-+
- *		e   c
- *		+-d-+
- *
- * Usage:
- *
- *   Register a map variable, and fill it with a character set:
- * 	static SEG7_DEFAULT_MAP(map_seg7);
- *
- *
- *   Then use for conversion:
- *	seg7 = map_to_seg7(&map_seg7, some_char);
- *	...
- *
- * In device drivers it is recommended, if required, to make the char map
- * accessible via the sysfs interface using the following scheme:
- *
- * static ssize_t show_map(struct device *dev, char *buf) {
- *	memcpy(buf, &map_seg7, sizeof(map_seg7));
- *	return sizeof(map_seg7);
- * }
- * static ssize_t store_map(struct device *dev, const char *buf, size_t cnt) {
- *	if(cnt != sizeof(map_seg7))
- *		return -EINVAL;
- *	memcpy(&map_seg7, buf, cnt);
- *	return cnt;
- * }
- * static DEVICE_ATTR(map_seg7, PERMS_RW, show_map, store_map);
- *
- * History:
- * 2005-05-31	RFC linux-kernel@vger.kernel.org
- */
-#include <linux/errno.h>
-
-
-#define BIT_SEG7_A		0
-#define BIT_SEG7_B		1
-#define BIT_SEG7_C		2
-#define BIT_SEG7_D		3
-#define BIT_SEG7_E		4
-#define BIT_SEG7_F		5
-#define BIT_SEG7_G		6
-#define BIT_SEG7_RESERVED	7
-
-struct seg7_conversion_map {
-	unsigned char	table[128];
-};
-
-static inline int map_to_seg7(struct seg7_conversion_map *map, int c)
-{
-	return c >= 0 && c < sizeof(map->table) ? map->table[c] : -EINVAL;
-}
-
-#define SEG7_CONVERSION_MAP(_name, _map)	\
-	struct seg7_conversion_map _name = { .table = { _map } }
-
-/*
- * It is recommended to use a facility that allows user space to redefine
- * custom character sets for LCD devices. Please use a sysfs interface
- * as described above.
- */
-#define MAP_TO_SEG7_SYSFS_FILE	"map_seg7"
-
-/*******************************************************************************
- * ASCII conversion table
- ******************************************************************************/
-
-#define _SEG7(l,a,b,c,d,e,f,g)	\
-      (	a<<BIT_SEG7_A |	b<<BIT_SEG7_B |	c<<BIT_SEG7_C |	d<<BIT_SEG7_D |	\
-	e<<BIT_SEG7_E |	f<<BIT_SEG7_F |	g<<BIT_SEG7_G )
-
-#define _MAP_0_32_ASCII_SEG7_NON_PRINTABLE	\
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-
-#define _MAP_33_47_ASCII_SEG7_SYMBOL		\
- _SEG7('!',0,0,0,0,1,1,0), _SEG7('"',0,1,0,0,0,1,0), _SEG7('#',0,1,1,0,1,1,0),\
- _SEG7('$',1,0,1,1,0,1,1), _SEG7('%',0,0,1,0,0,1,0), _SEG7('&',1,0,1,1,1,1,1),\
- _SEG7('\'',0,0,0,0,0,1,0),_SEG7('(',1,0,0,1,1,1,0), _SEG7(')',1,1,1,1,0,0,0),\
- _SEG7('*',0,1,1,0,1,1,1), _SEG7('+',0,1,1,0,0,0,1), _SEG7(',',0,0,0,0,1,0,0),\
- _SEG7('-',0,0,0,0,0,0,1), _SEG7('.',0,0,0,0,1,0,0), _SEG7('/',0,1,0,0,1,0,1),
-
-#define _MAP_48_57_ASCII_SEG7_NUMERIC		\
- _SEG7('0',1,1,1,1,1,1,0), _SEG7('1',0,1,1,0,0,0,0), _SEG7('2',1,1,0,1,1,0,1),\
- _SEG7('3',1,1,1,1,0,0,1), _SEG7('4',0,1,1,0,0,1,1), _SEG7('5',1,0,1,1,0,1,1),\
- _SEG7('6',1,0,1,1,1,1,1), _SEG7('7',1,1,1,0,0,0,0), _SEG7('8',1,1,1,1,1,1,1),\
- _SEG7('9',1,1,1,1,0,1,1),
-
-#define _MAP_58_64_ASCII_SEG7_SYMBOL		\
- _SEG7(':',0,0,0,1,0,0,1), _SEG7(';',0,0,0,1,0,0,1), _SEG7('<',1,0,0,0,0,1,1),\
- _SEG7('=',0,0,0,1,0,0,1), _SEG7('>',1,1,0,0,0,0,1), _SEG7('?',1,1,1,0,0,1,0),\
- _SEG7('@',1,1,0,1,1,1,1),
-
-#define _MAP_65_90_ASCII_SEG7_ALPHA_UPPR	\
- _SEG7('A',1,1,1,0,1,1,1), _SEG7('B',1,1,1,1,1,1,1), _SEG7('C',1,0,0,1,1,1,0),\
- _SEG7('D',1,1,1,1,1,1,0), _SEG7('E',1,0,0,1,1,1,1), _SEG7('F',1,0,0,0,1,1,1),\
- _SEG7('G',1,1,1,1,0,1,1), _SEG7('H',0,1,1,0,1,1,1), _SEG7('I',0,1,1,0,0,0,0),\
- _SEG7('J',0,1,1,1,0,0,0), _SEG7('K',0,1,1,0,1,1,1), _SEG7('L',0,0,0,1,1,1,0),\
- _SEG7('M',1,1,1,0,1,1,0), _SEG7('N',1,1,1,0,1,1,0), _SEG7('O',1,1,1,1,1,1,0),\
- _SEG7('P',1,1,0,0,1,1,1), _SEG7('Q',1,1,1,1,1,1,0), _SEG7('R',1,1,1,0,1,1,1),\
- _SEG7('S',1,0,1,1,0,1,1), _SEG7('T',0,0,0,1,1,1,1), _SEG7('U',0,1,1,1,1,1,0),\
- _SEG7('V',0,1,1,1,1,1,0), _SEG7('W',0,1,1,1,1,1,1), _SEG7('X',0,1,1,0,1,1,1),\
- _SEG7('Y',0,1,1,0,0,1,1), _SEG7('Z',1,1,0,1,1,0,1),
-
-#define _MAP_91_96_ASCII_SEG7_SYMBOL		\
- _SEG7('[',1,0,0,1,1,1,0), _SEG7('\\',0,0,1,0,0,1,1),_SEG7(']',1,1,1,1,0,0,0),\
- _SEG7('^',1,1,0,0,0,1,0), _SEG7('_',0,0,0,1,0,0,0), _SEG7('`',0,1,0,0,0,0,0),
-
-#define _MAP_97_122_ASCII_SEG7_ALPHA_LOWER	\
- _SEG7('A',1,1,1,0,1,1,1), _SEG7('b',0,0,1,1,1,1,1), _SEG7('c',0,0,0,1,1,0,1),\
- _SEG7('d',0,1,1,1,1,0,1), _SEG7('E',1,0,0,1,1,1,1), _SEG7('F',1,0,0,0,1,1,1),\
- _SEG7('G',1,1,1,1,0,1,1), _SEG7('h',0,0,1,0,1,1,1), _SEG7('i',0,0,1,0,0,0,0),\
- _SEG7('j',0,0,1,1,0,0,0), _SEG7('k',0,0,1,0,1,1,1), _SEG7('L',0,0,0,1,1,1,0),\
- _SEG7('M',1,1,1,0,1,1,0), _SEG7('n',0,0,1,0,1,0,1), _SEG7('o',0,0,1,1,1,0,1),\
- _SEG7('P',1,1,0,0,1,1,1), _SEG7('q',1,1,1,0,0,1,1), _SEG7('r',0,0,0,0,1,0,1),\
- _SEG7('S',1,0,1,1,0,1,1), _SEG7('T',0,0,0,1,1,1,1), _SEG7('u',0,0,1,1,1,0,0),\
- _SEG7('v',0,0,1,1,1,0,0), _SEG7('W',0,1,1,1,1,1,1), _SEG7('X',0,1,1,0,1,1,1),\
- _SEG7('y',0,1,1,1,0,1,1), _SEG7('Z',1,1,0,1,1,0,1),
-
-#define _MAP_123_126_ASCII_SEG7_SYMBOL		\
- _SEG7('{',1,0,0,1,1,1,0), _SEG7('|',0,0,0,0,1,1,0), _SEG7('}',1,1,1,1,0,0,0),\
- _SEG7('~',1,0,0,0,0,0,0),
-
-/* Maps */
-
-/* This set tries to map as close as possible to the visible characteristics
- * of the ASCII symbol, lowercase and uppercase letters may differ in
- * presentation on the display.
- */
-#define MAP_ASCII7SEG_ALPHANUM			\
-	_MAP_0_32_ASCII_SEG7_NON_PRINTABLE	\
-	_MAP_33_47_ASCII_SEG7_SYMBOL		\
-	_MAP_48_57_ASCII_SEG7_NUMERIC		\
-	_MAP_58_64_ASCII_SEG7_SYMBOL		\
-	_MAP_65_90_ASCII_SEG7_ALPHA_UPPR	\
-	_MAP_91_96_ASCII_SEG7_SYMBOL		\
-	_MAP_97_122_ASCII_SEG7_ALPHA_LOWER	\
-	_MAP_123_126_ASCII_SEG7_SYMBOL
-
-/* This set tries to map as close as possible to the symbolic characteristics
- * of the ASCII character for maximum discrimination.
- * For now this means all alpha chars are in lower case representations.
- * (This for example facilitates the use of hex numbers with uppercase input.)
- */
-#define MAP_ASCII7SEG_ALPHANUM_LC			\
-	_MAP_0_32_ASCII_SEG7_NON_PRINTABLE	\
-	_MAP_33_47_ASCII_SEG7_SYMBOL		\
-	_MAP_48_57_ASCII_SEG7_NUMERIC		\
-	_MAP_58_64_ASCII_SEG7_SYMBOL		\
-	_MAP_97_122_ASCII_SEG7_ALPHA_LOWER	\
-	_MAP_91_96_ASCII_SEG7_SYMBOL		\
-	_MAP_97_122_ASCII_SEG7_ALPHA_LOWER	\
-	_MAP_123_126_ASCII_SEG7_SYMBOL
-
-#define SEG7_DEFAULT_MAP(_name)		\
-	SEG7_CONVERSION_MAP(_name,MAP_ASCII7SEG_ALPHANUM)
-
-#endif	/* MAP_TO_7SEGMENT_H */
-
diff --git a/drivers/input/misc/yealink.c b/drivers/input/misc/yealink.c
index facefd3dba29..11b5c7e84ed1 100644
--- a/drivers/input/misc/yealink.c
+++ b/drivers/input/misc/yealink.c
@@ -52,8 +52,8 @@
 #include <linux/module.h>
 #include <linux/rwsem.h>
 #include <linux/usb/input.h>
+#include <linux/map_to_7segment.h>
 
-#include "map_to_7segment.h"
 #include "yealink.h"
 
 #define DRIVER_VERSION "yld-20051230"
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 4c4142c5aa6e..0b136c5990cc 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -106,6 +106,7 @@ header-y += keyctl.h
 header-y += limits.h
 header-y += magic.h
 header-y += major.h
+header-y += map_to_7segment.h
 header-y += matroxfb.h
 header-y += meye.h
 header-y += minix_fs.h
diff --git a/include/linux/map_to_7segment.h b/include/linux/map_to_7segment.h
new file mode 100644
index 000000000000..7df8432c4402
--- /dev/null
+++ b/include/linux/map_to_7segment.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2005 Henk Vergonet <Henk.Vergonet@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef MAP_TO_7SEGMENT_H
+#define MAP_TO_7SEGMENT_H
+
+/* This file provides translation primitives and tables for the conversion
+ * of (ASCII) characters to a 7-segments notation.
+ *
+ * The 7 segment's wikipedia notation below is used as standard.
+ * See: http://en.wikipedia.org/wiki/Seven_segment_display
+ *
+ * Notation:	+-a-+
+ *		f   b
+ *		+-g-+
+ *		e   c
+ *		+-d-+
+ *
+ * Usage:
+ *
+ *   Register a map variable, and fill it with a character set:
+ *	static SEG7_DEFAULT_MAP(map_seg7);
+ *
+ *
+ *   Then use for conversion:
+ *	seg7 = map_to_seg7(&map_seg7, some_char);
+ *	...
+ *
+ * In device drivers it is recommended, if required, to make the char map
+ * accessible via the sysfs interface using the following scheme:
+ *
+ * static ssize_t show_map(struct device *dev, char *buf) {
+ *	memcpy(buf, &map_seg7, sizeof(map_seg7));
+ *	return sizeof(map_seg7);
+ * }
+ * static ssize_t store_map(struct device *dev, const char *buf, size_t cnt) {
+ *	if(cnt != sizeof(map_seg7))
+ *		return -EINVAL;
+ *	memcpy(&map_seg7, buf, cnt);
+ *	return cnt;
+ * }
+ * static DEVICE_ATTR(map_seg7, PERMS_RW, show_map, store_map);
+ *
+ * History:
+ * 2005-05-31	RFC linux-kernel@vger.kernel.org
+ */
+#include <linux/errno.h>
+
+
+#define BIT_SEG7_A		0
+#define BIT_SEG7_B		1
+#define BIT_SEG7_C		2
+#define BIT_SEG7_D		3
+#define BIT_SEG7_E		4
+#define BIT_SEG7_F		5
+#define BIT_SEG7_G		6
+#define BIT_SEG7_RESERVED	7
+
+struct seg7_conversion_map {
+	unsigned char	table[128];
+};
+
+static inline int map_to_seg7(struct seg7_conversion_map *map, int c)
+{
+	return c >= 0 && c < sizeof(map->table) ? map->table[c] : -EINVAL;
+}
+
+#define SEG7_CONVERSION_MAP(_name, _map)	\
+	struct seg7_conversion_map _name = { .table = { _map } }
+
+/*
+ * It is recommended to use a facility that allows user space to redefine
+ * custom character sets for LCD devices. Please use a sysfs interface
+ * as described above.
+ */
+#define MAP_TO_SEG7_SYSFS_FILE	"map_seg7"
+
+/*******************************************************************************
+ * ASCII conversion table
+ ******************************************************************************/
+
+#define _SEG7(l,a,b,c,d,e,f,g)	\
+      (	a<<BIT_SEG7_A |	b<<BIT_SEG7_B |	c<<BIT_SEG7_C |	d<<BIT_SEG7_D |	\
+	e<<BIT_SEG7_E |	f<<BIT_SEG7_F |	g<<BIT_SEG7_G )
+
+#define _MAP_0_32_ASCII_SEG7_NON_PRINTABLE	\
+	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+
+#define _MAP_33_47_ASCII_SEG7_SYMBOL		\
+ _SEG7('!',0,0,0,0,1,1,0), _SEG7('"',0,1,0,0,0,1,0), _SEG7('#',0,1,1,0,1,1,0),\
+ _SEG7('$',1,0,1,1,0,1,1), _SEG7('%',0,0,1,0,0,1,0), _SEG7('&',1,0,1,1,1,1,1),\
+ _SEG7('\'',0,0,0,0,0,1,0),_SEG7('(',1,0,0,1,1,1,0), _SEG7(')',1,1,1,1,0,0,0),\
+ _SEG7('*',0,1,1,0,1,1,1), _SEG7('+',0,1,1,0,0,0,1), _SEG7(',',0,0,0,0,1,0,0),\
+ _SEG7('-',0,0,0,0,0,0,1), _SEG7('.',0,0,0,0,1,0,0), _SEG7('/',0,1,0,0,1,0,1),
+
+#define _MAP_48_57_ASCII_SEG7_NUMERIC		\
+ _SEG7('0',1,1,1,1,1,1,0), _SEG7('1',0,1,1,0,0,0,0), _SEG7('2',1,1,0,1,1,0,1),\
+ _SEG7('3',1,1,1,1,0,0,1), _SEG7('4',0,1,1,0,0,1,1), _SEG7('5',1,0,1,1,0,1,1),\
+ _SEG7('6',1,0,1,1,1,1,1), _SEG7('7',1,1,1,0,0,0,0), _SEG7('8',1,1,1,1,1,1,1),\
+ _SEG7('9',1,1,1,1,0,1,1),
+
+#define _MAP_58_64_ASCII_SEG7_SYMBOL		\
+ _SEG7(':',0,0,0,1,0,0,1), _SEG7(';',0,0,0,1,0,0,1), _SEG7('<',1,0,0,0,0,1,1),\
+ _SEG7('=',0,0,0,1,0,0,1), _SEG7('>',1,1,0,0,0,0,1), _SEG7('?',1,1,1,0,0,1,0),\
+ _SEG7('@',1,1,0,1,1,1,1),
+
+#define _MAP_65_90_ASCII_SEG7_ALPHA_UPPR	\
+ _SEG7('A',1,1,1,0,1,1,1), _SEG7('B',1,1,1,1,1,1,1), _SEG7('C',1,0,0,1,1,1,0),\
+ _SEG7('D',1,1,1,1,1,1,0), _SEG7('E',1,0,0,1,1,1,1), _SEG7('F',1,0,0,0,1,1,1),\
+ _SEG7('G',1,1,1,1,0,1,1), _SEG7('H',0,1,1,0,1,1,1), _SEG7('I',0,1,1,0,0,0,0),\
+ _SEG7('J',0,1,1,1,0,0,0), _SEG7('K',0,1,1,0,1,1,1), _SEG7('L',0,0,0,1,1,1,0),\
+ _SEG7('M',1,1,1,0,1,1,0), _SEG7('N',1,1,1,0,1,1,0), _SEG7('O',1,1,1,1,1,1,0),\
+ _SEG7('P',1,1,0,0,1,1,1), _SEG7('Q',1,1,1,1,1,1,0), _SEG7('R',1,1,1,0,1,1,1),\
+ _SEG7('S',1,0,1,1,0,1,1), _SEG7('T',0,0,0,1,1,1,1), _SEG7('U',0,1,1,1,1,1,0),\
+ _SEG7('V',0,1,1,1,1,1,0), _SEG7('W',0,1,1,1,1,1,1), _SEG7('X',0,1,1,0,1,1,1),\
+ _SEG7('Y',0,1,1,0,0,1,1), _SEG7('Z',1,1,0,1,1,0,1),
+
+#define _MAP_91_96_ASCII_SEG7_SYMBOL		\
+ _SEG7('[',1,0,0,1,1,1,0), _SEG7('\\',0,0,1,0,0,1,1),_SEG7(']',1,1,1,1,0,0,0),\
+ _SEG7('^',1,1,0,0,0,1,0), _SEG7('_',0,0,0,1,0,0,0), _SEG7('`',0,1,0,0,0,0,0),
+
+#define _MAP_97_122_ASCII_SEG7_ALPHA_LOWER	\
+ _SEG7('A',1,1,1,0,1,1,1), _SEG7('b',0,0,1,1,1,1,1), _SEG7('c',0,0,0,1,1,0,1),\
+ _SEG7('d',0,1,1,1,1,0,1), _SEG7('E',1,0,0,1,1,1,1), _SEG7('F',1,0,0,0,1,1,1),\
+ _SEG7('G',1,1,1,1,0,1,1), _SEG7('h',0,0,1,0,1,1,1), _SEG7('i',0,0,1,0,0,0,0),\
+ _SEG7('j',0,0,1,1,0,0,0), _SEG7('k',0,0,1,0,1,1,1), _SEG7('L',0,0,0,1,1,1,0),\
+ _SEG7('M',1,1,1,0,1,1,0), _SEG7('n',0,0,1,0,1,0,1), _SEG7('o',0,0,1,1,1,0,1),\
+ _SEG7('P',1,1,0,0,1,1,1), _SEG7('q',1,1,1,0,0,1,1), _SEG7('r',0,0,0,0,1,0,1),\
+ _SEG7('S',1,0,1,1,0,1,1), _SEG7('T',0,0,0,1,1,1,1), _SEG7('u',0,0,1,1,1,0,0),\
+ _SEG7('v',0,0,1,1,1,0,0), _SEG7('W',0,1,1,1,1,1,1), _SEG7('X',0,1,1,0,1,1,1),\
+ _SEG7('y',0,1,1,1,0,1,1), _SEG7('Z',1,1,0,1,1,0,1),
+
+#define _MAP_123_126_ASCII_SEG7_SYMBOL		\
+ _SEG7('{',1,0,0,1,1,1,0), _SEG7('|',0,0,0,0,1,1,0), _SEG7('}',1,1,1,1,0,0,0),\
+ _SEG7('~',1,0,0,0,0,0,0),
+
+/* Maps */
+
+/* This set tries to map as close as possible to the visible characteristics
+ * of the ASCII symbol, lowercase and uppercase letters may differ in
+ * presentation on the display.
+ */
+#define MAP_ASCII7SEG_ALPHANUM			\
+	_MAP_0_32_ASCII_SEG7_NON_PRINTABLE	\
+	_MAP_33_47_ASCII_SEG7_SYMBOL		\
+	_MAP_48_57_ASCII_SEG7_NUMERIC		\
+	_MAP_58_64_ASCII_SEG7_SYMBOL		\
+	_MAP_65_90_ASCII_SEG7_ALPHA_UPPR	\
+	_MAP_91_96_ASCII_SEG7_SYMBOL		\
+	_MAP_97_122_ASCII_SEG7_ALPHA_LOWER	\
+	_MAP_123_126_ASCII_SEG7_SYMBOL
+
+/* This set tries to map as close as possible to the symbolic characteristics
+ * of the ASCII character for maximum discrimination.
+ * For now this means all alpha chars are in lower case representations.
+ * (This for example facilitates the use of hex numbers with uppercase input.)
+ */
+#define MAP_ASCII7SEG_ALPHANUM_LC			\
+	_MAP_0_32_ASCII_SEG7_NON_PRINTABLE	\
+	_MAP_33_47_ASCII_SEG7_SYMBOL		\
+	_MAP_48_57_ASCII_SEG7_NUMERIC		\
+	_MAP_58_64_ASCII_SEG7_SYMBOL		\
+	_MAP_97_122_ASCII_SEG7_ALPHA_LOWER	\
+	_MAP_91_96_ASCII_SEG7_SYMBOL		\
+	_MAP_97_122_ASCII_SEG7_ALPHA_LOWER	\
+	_MAP_123_126_ASCII_SEG7_SYMBOL
+
+#define SEG7_DEFAULT_MAP(_name)		\
+	SEG7_CONVERSION_MAP(_name,MAP_ASCII7SEG_ALPHANUM)
+
+#endif	/* MAP_TO_7SEGMENT_H */
+
-- 
cgit v1.2.3


From 64b60e096fa391c56f93e6216115e6757bf86b7e Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Fri, 10 Oct 2008 04:43:17 +0000
Subject: of: Add new helper of_parse_phandles_with_args()

The helper is factored out of of_get_gpio(). Will be used by the QE
pin multiplexing functions (they need to parse the gpios = <> too).

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/of/base.c  | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/of/gpio.c  |  81 ++++++++++-----------------------------
 include/linux/of.h |   3 ++
 3 files changed, 131 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index cd1ce7ab8517..4270eb4a26a1 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -458,3 +458,112 @@ int of_modalias_node(struct device_node *node, char *modalias, int len)
 }
 EXPORT_SYMBOL_GPL(of_modalias_node);
 
+/**
+ * of_parse_phandles_with_args - Find a node pointed by phandle in a list
+ * @np:		pointer to a device tree node containing a list
+ * @list_name:	property name that contains a list
+ * @cells_name:	property name that specifies phandles' arguments count
+ * @index:	index of a phandle to parse out
+ * @out_node:	pointer to device_node struct pointer (will be filled)
+ * @out_args:	pointer to arguments pointer (will be filled)
+ *
+ * This function is useful to parse lists of phandles and their arguments.
+ * Returns 0 on success and fills out_node and out_args, on error returns
+ * appropriate errno value.
+ *
+ * Example:
+ *
+ * phandle1: node1 {
+ * 	#list-cells = <2>;
+ * }
+ *
+ * phandle2: node2 {
+ * 	#list-cells = <1>;
+ * }
+ *
+ * node3 {
+ * 	list = <&phandle1 1 2 &phandle2 3>;
+ * }
+ *
+ * To get a device_node of the `node2' node you may call this:
+ * of_parse_phandles_with_args(node3, "list", "#list-cells", 2, &node2, &args);
+ */
+int of_parse_phandles_with_args(struct device_node *np, const char *list_name,
+				const char *cells_name, int index,
+				struct device_node **out_node,
+				const void **out_args)
+{
+	int ret = -EINVAL;
+	const u32 *list;
+	const u32 *list_end;
+	int size;
+	int cur_index = 0;
+	struct device_node *node = NULL;
+	const void *args;
+
+	list = of_get_property(np, list_name, &size);
+	if (!list) {
+		ret = -ENOENT;
+		goto err0;
+	}
+	list_end = list + size / sizeof(*list);
+
+	while (list < list_end) {
+		const u32 *cells;
+		const phandle *phandle;
+
+		phandle = list;
+		args = list + 1;
+
+		/* one cell hole in the list = <>; */
+		if (!*phandle) {
+			list++;
+			goto next;
+		}
+
+		node = of_find_node_by_phandle(*phandle);
+		if (!node) {
+			pr_debug("%s: could not find phandle\n",
+				 np->full_name);
+			goto err0;
+		}
+
+		cells = of_get_property(node, cells_name, &size);
+		if (!cells || size != sizeof(*cells)) {
+			pr_debug("%s: could not get %s for %s\n",
+				 np->full_name, cells_name, node->full_name);
+			goto err1;
+		}
+
+		/* Next phandle is at offset of one phandle cell + #cells */
+		list += 1 + *cells;
+		if (list > list_end) {
+			pr_debug("%s: insufficient arguments length\n",
+				 np->full_name);
+			goto err1;
+		}
+next:
+		if (cur_index == index)
+			break;
+
+		of_node_put(node);
+		node = NULL;
+		cur_index++;
+	}
+
+	if (!node) {
+		ret = -ENOENT;
+		goto err0;
+	}
+
+	*out_node = node;
+	*out_args = args;
+
+	return 0;
+err1:
+	of_node_put(node);
+err0:
+	pr_debug("%s failed with status %d\n", __func__, ret);
+	return ret;
+}
+EXPORT_SYMBOL(of_parse_phandles_with_args);
diff --git a/drivers/of/gpio.c b/drivers/of/gpio.c
index 1c9cab844f10..7cd7301b5839 100644
--- a/drivers/of/gpio.c
+++ b/drivers/of/gpio.c
@@ -28,78 +28,35 @@
  */
 int of_get_gpio(struct device_node *np, int index)
 {
-	int ret = -EINVAL;
+	int ret;
 	struct device_node *gc;
 	struct of_gpio_chip *of_gc = NULL;
 	int size;
-	const u32 *gpios;
-	u32 nr_cells;
-	int i;
 	const void *gpio_spec;
 	const u32 *gpio_cells;
-	int gpio_index = 0;
 
-	gpios = of_get_property(np, "gpios", &size);
-	if (!gpios) {
-		ret = -ENOENT;
+	ret = of_parse_phandles_with_args(np, "gpios", "#gpio-cells", index,
+					  &gc, &gpio_spec);
+	if (ret) {
+		pr_debug("%s: can't parse gpios property\n", __func__);
 		goto err0;
 	}
-	nr_cells = size / sizeof(u32);
-
-	for (i = 0; i < nr_cells; gpio_index++) {
-		const phandle *gpio_phandle;
-
-		gpio_phandle = gpios + i;
-		gpio_spec = gpio_phandle + 1;
-
-		/* one cell hole in the gpios = <>; */
-		if (!*gpio_phandle) {
-			if (gpio_index == index)
-				return -ENOENT;
-			i++;
-			continue;
-		}
-
-		gc = of_find_node_by_phandle(*gpio_phandle);
-		if (!gc) {
-			pr_debug("%s: could not find phandle for gpios\n",
-				 np->full_name);
-			goto err0;
-		}
-
-		of_gc = gc->data;
-		if (!of_gc) {
-			pr_debug("%s: gpio controller %s isn't registered\n",
-				 np->full_name, gc->full_name);
-			goto err1;
-		}
-
-		gpio_cells = of_get_property(gc, "#gpio-cells", &size);
-		if (!gpio_cells || size != sizeof(*gpio_cells) ||
-				*gpio_cells != of_gc->gpio_cells) {
-			pr_debug("%s: wrong #gpio-cells for %s\n",
-				 np->full_name, gc->full_name);
-			goto err1;
-		}
-
-		/* Next phandle is at phandle cells + #gpio-cells */
-		i += sizeof(*gpio_phandle) / sizeof(u32) + *gpio_cells;
-		if (i >= nr_cells + 1) {
-			pr_debug("%s: insufficient gpio-spec length\n",
-				 np->full_name);
-			goto err1;
-		}
-
-		if (gpio_index == index)
-			break;
-
-		of_gc = NULL;
-		of_node_put(gc);
-	}
 
+	of_gc = gc->data;
 	if (!of_gc) {
-		ret = -ENOENT;
-		goto err0;
+		pr_debug("%s: gpio controller %s isn't registered\n",
+			 np->full_name, gc->full_name);
+		ret = -ENODEV;
+		goto err1;
+	}
+
+	gpio_cells = of_get_property(gc, "#gpio-cells", &size);
+	if (!gpio_cells || size != sizeof(*gpio_cells) ||
+			*gpio_cells != of_gc->gpio_cells) {
+		pr_debug("%s: wrong #gpio-cells for %s\n",
+			 np->full_name, gc->full_name);
+		ret = -EINVAL;
+		goto err1;
 	}
 
 	ret = of_gc->xlate(of_gc, np, gpio_spec);
diff --git a/include/linux/of.h b/include/linux/of.h
index 79886ade070f..e2488f5e7cb2 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -71,5 +71,8 @@ extern int of_n_size_cells(struct device_node *np);
 extern const struct of_device_id *of_match_node(
 	const struct of_device_id *matches, const struct device_node *node);
 extern int of_modalias_node(struct device_node *node, char *modalias, int len);
+extern int of_parse_phandles_with_args(struct device_node *np,
+	const char *list_name, const char *cells_name, int index,
+	struct device_node **out_node, const void **out_args);
 
 #endif /* _LINUX_OF_H */
-- 
cgit v1.2.3


From 6283815d1853b7daf31dc4adb83e5c1dc9568251 Mon Sep 17 00:00:00 2001
From: Andre Noll <maan@systemlinux.org>
Date: Mon, 13 Oct 2008 11:55:12 +1100
Subject: md: linear: Represent dev_info->size and dev_info->offset in sectors.

Rename them to num_sectors and start_sector which is more descriptive.

Signed-off-by: Andre Noll <maan@systemlinux.org>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/linear.c         | 54 ++++++++++++++++++++++++---------------------
 include/linux/raid/linear.h |  4 ++--
 2 files changed, 31 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 01ed03a0c7ee..1dadb134e0bb 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -42,7 +42,7 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector)
 	(void)sector_div(block, conf->hash_spacing);
 	hash = conf->hash_table[block];
 
-	while ((sector>>1) >= (hash->size + hash->offset))
+	while (sector >= hash->num_sectors + hash->start_sector)
 		hash++;
 	return hash;
 }
@@ -65,7 +65,7 @@ static int linear_mergeable_bvec(struct request_queue *q,
 	sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
 
 	dev0 = which_dev(mddev, sector);
-	maxsectors = (dev0->size << 1) - (sector - (dev0->offset<<1));
+	maxsectors = dev0->num_sectors - (sector - dev0->start_sector);
 
 	if (maxsectors < bio_sectors)
 		maxsectors = 0;
@@ -113,7 +113,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
 	mdk_rdev_t *rdev;
 	int i, nb_zone, cnt;
 	sector_t min_spacing;
-	sector_t curr_offset;
+	sector_t curr_sector;
 	struct list_head *tmp;
 
 	conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(dev_info_t),
@@ -145,7 +145,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
 		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
 			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 
-		disk->size = rdev->size;
+		disk->num_sectors = rdev->size * 2;
 		conf->array_sectors += rdev->size * 2;
 
 		cnt++;
@@ -169,7 +169,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
 		sector_t sz = 0;
 		int j;
 		for (j = i; j < cnt - 1 && sz < min_spacing; j++)
-			sz += conf->disks[j].size;
+			sz += conf->disks[j].num_sectors / 2;
 		if (sz >= min_spacing && sz < conf->hash_spacing)
 			conf->hash_spacing = sz;
 	}
@@ -211,20 +211,20 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
 	 * Here we generate the linear hash table
 	 * First calculate the device offsets.
 	 */
-	conf->disks[0].offset = 0;
+	conf->disks[0].start_sector = 0;
 	for (i = 1; i < raid_disks; i++)
-		conf->disks[i].offset =
-			conf->disks[i-1].offset +
-			conf->disks[i-1].size;
+		conf->disks[i].start_sector =
+			conf->disks[i-1].start_sector +
+			conf->disks[i-1].num_sectors;
 
 	table = conf->hash_table;
 	i = 0;
-	for (curr_offset = 0;
-	     curr_offset < conf->array_sectors / 2;
-	     curr_offset += conf->hash_spacing) {
+	for (curr_sector = 0;
+	     curr_sector < conf->array_sectors;
+	     curr_sector += conf->hash_spacing * 2) {
 
 		while (i < raid_disks-1 &&
-		       curr_offset >= conf->disks[i+1].offset)
+		       curr_sector >= conf->disks[i+1].start_sector)
 			i++;
 
 		*table ++ = conf->disks + i;
@@ -316,7 +316,6 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
 	const int rw = bio_data_dir(bio);
 	mddev_t *mddev = q->queuedata;
 	dev_info_t *tmp_dev;
-	sector_t block;
 	int cpu;
 
 	if (unlikely(bio_barrier(bio))) {
@@ -331,29 +330,33 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
 	part_stat_unlock();
 
 	tmp_dev = which_dev(mddev, bio->bi_sector);
-	block = bio->bi_sector >> 1;
     
-	if (unlikely(block >= (tmp_dev->size + tmp_dev->offset)
-		     || block < tmp_dev->offset)) {
+	if (unlikely(bio->bi_sector >= (tmp_dev->num_sectors +
+					tmp_dev->start_sector)
+		     || (bio->bi_sector <
+			 tmp_dev->start_sector))) {
 		char b[BDEVNAME_SIZE];
 
-		printk("linear_make_request: Block %llu out of bounds on "
-			"dev %s size %llu offset %llu\n",
-			(unsigned long long)block,
+		printk("linear_make_request: Sector %llu out of bounds on "
+			"dev %s: %llu sectors, offset %llu\n",
+			(unsigned long long)bio->bi_sector,
 			bdevname(tmp_dev->rdev->bdev, b),
-			(unsigned long long)tmp_dev->size,
-		        (unsigned long long)tmp_dev->offset);
+			(unsigned long long)tmp_dev->num_sectors,
+			(unsigned long long)tmp_dev->start_sector);
 		bio_io_error(bio);
 		return 0;
 	}
 	if (unlikely(bio->bi_sector + (bio->bi_size >> 9) >
-		     (tmp_dev->offset + tmp_dev->size)<<1)) {
+		     tmp_dev->start_sector + tmp_dev->num_sectors)) {
 		/* This bio crosses a device boundary, so we have to
 		 * split it.
 		 */
 		struct bio_pair *bp;
+
 		bp = bio_split(bio,
-			       ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector);
+			       tmp_dev->start_sector + tmp_dev->num_sectors
+			       - bio->bi_sector);
+
 		if (linear_make_request(q, &bp->bio1))
 			generic_make_request(&bp->bio1);
 		if (linear_make_request(q, &bp->bio2))
@@ -363,7 +366,8 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
 	}
 		    
 	bio->bi_bdev = tmp_dev->rdev->bdev;
-	bio->bi_sector = bio->bi_sector - (tmp_dev->offset << 1) + tmp_dev->rdev->data_offset;
+	bio->bi_sector = bio->bi_sector - tmp_dev->start_sector
+		+ tmp_dev->rdev->data_offset;
 
 	return 1;
 }
diff --git a/include/linux/raid/linear.h b/include/linux/raid/linear.h
index 7e375111d007..87090e98529f 100644
--- a/include/linux/raid/linear.h
+++ b/include/linux/raid/linear.h
@@ -5,8 +5,8 @@
 
 struct dev_info {
 	mdk_rdev_t	*rdev;
-	sector_t	size;
-	sector_t	offset;
+	sector_t	num_sectors;
+	sector_t	start_sector;
 };
 
 typedef struct dev_info dev_info_t;
-- 
cgit v1.2.3


From ab5bd5cbc8d4b868378d062eed3d4240930fbb86 Mon Sep 17 00:00:00 2001
From: Andre Noll <maan@systemlinux.org>
Date: Mon, 13 Oct 2008 11:55:12 +1100
Subject: md: Convert remaining 1k representations in linear.c to sectors.

This patch renames hash_spacing and preshift to  spacing and
sector_shift respectively with the following change of semantics:

Case 1: (sizeof(sector_t) <= sizeof(u32)).
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In this case, we have sector_shift = preshift = 0 and spacing =
2 * hash_spacing.

Hence, the index for the hash table which is computed by the new code
in which_dev() as sector / spacing equals the old value which was
(sector/2) / hash_spacing.

Note also that the value of nb_zone stays the same because both sz
and base double.

Case 2: (sizeof(sector_t) > sizeof(u32)).
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

(aka the shifting dance case). Here we have sector_shift = preshift +
1 and

spacing = 2 * hash_spacing

during the computation of nb_zone and curr_sector, but

spacing = hash_spacing

in which_dev() because in the last hunk of the patch for linear.c we
shift down conf->spacing (= 2 * hash_spacing) by one more bit than
in the old code.

Hence in the computation of nb_zone, sz and base have the same value
as before, so nb_zone is not affected. Also curr_sector in the next
hunk stays the same.

In which_dev() the hash table index is computed as

(sector >> sector_shift) / spacing

In view of sector_shift = preshift + 1 and spacing = hash_spacing,
this equals

((sector/2) >> preshift) / hash_spacing

which is the value computed by the old code.

Signed-off-by: Andre Noll <maan@systemlinux.org>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/linear.c         | 35 +++++++++++++++++------------------
 include/linux/raid/linear.h |  6 ++++--
 2 files changed, 21 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 13e928bde7cd..09a64b9cbde8 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -33,14 +33,13 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector)
 {
 	dev_info_t *hash;
 	linear_conf_t *conf = mddev_to_conf(mddev);
-	sector_t block = sector >> 1;
 
 	/*
 	 * sector_div(a,b) returns the remainer and sets a to a/b
 	 */
-	block >>= conf->preshift;
-	(void)sector_div(block, conf->hash_spacing);
-	hash = conf->hash_table[block];
+	sector >>= conf->sector_shift;
+	(void)sector_div(sector, conf->spacing);
+	hash = conf->hash_table[sector];
 
 	while (sector >= hash->num_sectors + hash->start_sector)
 		hash++;
@@ -164,25 +163,25 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
 	 * that is larger than min_sectors and use the size of that as
 	 * the actual spacing
 	 */
-	conf->hash_spacing = conf->array_sectors / 2;
+	conf->spacing = conf->array_sectors;
 	for (i=0; i < cnt-1 ; i++) {
 		sector_t tmp = 0;
 		int j;
 		for (j = i; j < cnt - 1 && tmp < min_sectors; j++)
 			tmp += conf->disks[j].num_sectors;
-		if (tmp >= min_sectors && tmp < conf->hash_spacing * 2)
-			conf->hash_spacing = tmp / 2;
+		if (tmp >= min_sectors && tmp < conf->spacing)
+			conf->spacing = tmp;
 	}
 
-	/* hash_spacing may be too large for sector_div to work with,
+	/* spacing may be too large for sector_div to work with,
 	 * so we might need to pre-shift
 	 */
-	conf->preshift = 0;
+	conf->sector_shift = 0;
 	if (sizeof(sector_t) > sizeof(u32)) {
-		sector_t space = conf->hash_spacing;
+		sector_t space = conf->spacing;
 		while (space > (sector_t)(~(u32)0)) {
 			space >>= 1;
-			conf->preshift++;
+			conf->sector_shift++;
 		}
 	}
 	/*
@@ -194,9 +193,9 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
 		unsigned round;
 		unsigned long base;
 
-		sz = conf->array_sectors >> (conf->preshift + 1);
+		sz = conf->array_sectors >> conf->sector_shift;
 		sz += 1; /* force round-up */
-		base = conf->hash_spacing >> conf->preshift;
+		base = conf->spacing >> conf->sector_shift;
 		round = sector_div(sz, base);
 		nb_zone = sz + (round ? 1 : 0);
 	}
@@ -221,7 +220,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
 	i = 0;
 	for (curr_sector = 0;
 	     curr_sector < conf->array_sectors;
-	     curr_sector += conf->hash_spacing * 2) {
+	     curr_sector += conf->spacing) {
 
 		while (i < raid_disks-1 &&
 		       curr_sector >= conf->disks[i+1].start_sector)
@@ -230,12 +229,12 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
 		*table ++ = conf->disks + i;
 	}
 
-	if (conf->preshift) {
-		conf->hash_spacing >>= conf->preshift;
-		/* round hash_spacing up so that when we divide by it,
+	if (conf->sector_shift) {
+		conf->spacing >>= conf->sector_shift;
+		/* round spacing up so that when we divide by it,
 		 * we err on the side of "too-low", which is safest.
 		 */
-		conf->hash_spacing++;
+		conf->spacing++;
 	}
 
 	BUG_ON(table - conf->hash_table > nb_zone);
diff --git a/include/linux/raid/linear.h b/include/linux/raid/linear.h
index 87090e98529f..f38b9c586afb 100644
--- a/include/linux/raid/linear.h
+++ b/include/linux/raid/linear.h
@@ -15,9 +15,11 @@ struct linear_private_data
 {
 	struct linear_private_data *prev;	/* earlier version */
 	dev_info_t		**hash_table;
-	sector_t		hash_spacing;
+	sector_t		spacing;
 	sector_t		array_sectors;
-	int			preshift; /* shift before dividing by hash_spacing */
+	int			sector_shift;	/* shift before dividing
+						 * by spacing
+						 */
 	dev_info_t		disks[0];
 };
 
-- 
cgit v1.2.3


From fb4d8c76e56a887b9eee99fbc55fe82b18625d30 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 13 Oct 2008 11:55:12 +1100
Subject: md: Remove unnecessary #includes, #defines, and function
 declarations.

A lot of cruft has gathered over the years.  Time to remove it.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/linear.c     |  8 --------
 drivers/md/md.c         | 21 ++++-----------------
 drivers/md/multipath.c  |  9 ---------
 drivers/md/raid0.c      |  5 -----
 drivers/md/raid5.c      |  5 -----
 drivers/md/raid6.h      |  9 ---------
 include/linux/raid/md.h | 22 ----------------------
 7 files changed, 4 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 09a64b9cbde8..190147c79e79 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -16,16 +16,8 @@
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
 */
 
-#include <linux/module.h>
-
-#include <linux/raid/md.h>
-#include <linux/slab.h>
 #include <linux/raid/linear.h>
 
-#define MAJOR_NR MD_MAJOR
-#define MD_DRIVER
-#define MD_PERSONALITY
-
 /*
  * find which device holds a particular offset 
  */
diff --git a/drivers/md/md.c b/drivers/md/md.c
index fc33082d5ffd..cd97de5982e8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -32,31 +32,20 @@
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
 
-#include <linux/module.h>
-#include <linux/kernel.h>
 #include <linux/kthread.h>
-#include <linux/linkage.h>
 #include <linux/raid/md.h>
 #include <linux/raid/bitmap.h>
 #include <linux/sysctl.h>
 #include <linux/buffer_head.h> /* for invalidate_bdev */
 #include <linux/poll.h>
-#include <linux/mutex.h>
 #include <linux/ctype.h>
-#include <linux/freezer.h>
-
-#include <linux/init.h>
-
+#include <linux/hdreg.h>
+#include <linux/proc_fs.h>
+#include <linux/random.h>
+#include <linux/reboot.h>
 #include <linux/file.h>
 
-#ifdef CONFIG_KMOD
-#include <linux/kmod.h>
-#endif
-
-#include <asm/unaligned.h>
-
 #define MAJOR_NR MD_MAJOR
-#define MD_DRIVER
 
 /* 63 partitions with the alternate major number (mdp) */
 #define MdpMinorShift 6
@@ -3559,12 +3548,10 @@ static int do_md_run(mddev_t * mddev)
 		}
 	}
 
-#ifdef CONFIG_KMOD
 	if (mddev->level != LEVEL_NONE)
 		request_module("md-level-%d", mddev->level);
 	else if (mddev->clevel[0])
 		request_module("md-%s", mddev->clevel);
-#endif
 
 	/*
 	 * Drop all container device buffers, from now on
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 8bb8794129b3..8744014b9d80 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -19,16 +19,7 @@
  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
 #include <linux/raid/multipath.h>
-#include <linux/buffer_head.h>
-#include <asm/atomic.h>
-
-#define MAJOR_NR MD_MAJOR
-#define MD_DRIVER
-#define MD_PERSONALITY
 
 #define MAX_WORK_PER_DISK 128
 
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 53508a8a981d..8ac6488ad0dc 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -18,13 +18,8 @@
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
 */
 
-#include <linux/module.h>
 #include <linux/raid/raid0.h>
 
-#define MAJOR_NR MD_MAJOR
-#define MD_DRIVER
-#define MD_PERSONALITY
-
 static void raid0_unplug(struct request_queue *q)
 {
 	mddev_t *mddev = q->queuedata;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ae16794bef20..7e654543c97d 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -43,12 +43,7 @@
  * miss any bits.
  */
 
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/highmem.h>
-#include <linux/bitops.h>
 #include <linux/kthread.h>
-#include <asm/atomic.h>
 #include "raid6.h"
 
 #include <linux/raid/bitmap.h>
diff --git a/drivers/md/raid6.h b/drivers/md/raid6.h
index 31cbee71365f..98dcde88470e 100644
--- a/drivers/md/raid6.h
+++ b/drivers/md/raid6.h
@@ -18,15 +18,6 @@
 /* Set to 1 to use kernel-wide empty_zero_page */
 #define RAID6_USE_EMPTY_ZERO_PAGE 0
 
-#include <linux/module.h>
-#include <linux/stddef.h>
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/mempool.h>
-#include <linux/list.h>
-#include <linux/vmalloc.h>
 #include <linux/raid/md.h>
 #include <linux/raid/raid5.h>
 
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index dc0e3fcb9f28..bb727fa1ce7f 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -19,27 +19,7 @@
 #define _MD_H
 
 #include <linux/blkdev.h>
-#include <linux/major.h>
-#include <linux/ioctl.h>
-#include <linux/types.h>
-#include <linux/bitops.h>
-#include <linux/module.h>
-#include <linux/hdreg.h>
-#include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <linux/smp_lock.h>
-#include <linux/delay.h>
-#include <net/checksum.h>
-#include <linux/random.h>
-#include <linux/kernel_stat.h>
-#include <asm/io.h>
-#include <linux/completion.h>
-#include <linux/mempool.h>
-#include <linux/list.h>
-#include <linux/reboot.h>
-#include <linux/vmalloc.h>
-#include <linux/blkpg.h>
-#include <linux/bio.h>
 
 /*
  * 'md_p.h' holds the 'physical' layout of RAID devices
@@ -83,10 +63,8 @@ extern void md_wakeup_thread(mdk_thread_t *thread);
 extern void md_check_recovery(mddev_t *mddev);
 extern void md_write_start(mddev_t *mddev, struct bio *bi);
 extern void md_write_end(mddev_t *mddev);
-extern void md_handle_safemode(mddev_t *mddev);
 extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
 extern void md_error (mddev_t *mddev, mdk_rdev_t *rdev);
-extern void md_unplug_mddev(mddev_t *mddev);
 
 extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
 			   sector_t sector, int size, struct page *page);
-- 
cgit v1.2.3


From d710e13812600037a723a673dc5c96a071de98d3 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 13 Oct 2008 11:55:12 +1100
Subject: md: remove space after function name in declaration and call.

Having
   function (args)
instead of
   function(args)

make is harder to search for calls of particular functions.
So remove all those spaces.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/md.c         | 26 +++++++++++++-------------
 drivers/md/raid5.c      | 30 +++++++++++++++---------------
 include/linux/raid/md.h | 10 +++++-----
 3 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index cd97de5982e8..a29187d1fcf5 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -55,7 +55,7 @@
 
 
 #ifndef MODULE
-static void autostart_arrays (int part);
+static void autostart_arrays(int part);
 #endif
 
 static LIST_HEAD(pers_list);
@@ -201,7 +201,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
 		)
 
 
-static int md_fail_request (struct request_queue *q, struct bio *bio)
+static int md_fail_request(struct request_queue *q, struct bio *bio)
 {
 	bio_io_error(bio);
 	return 0;
@@ -3962,10 +3962,10 @@ static void autorun_array(mddev_t *mddev)
 	}
 	printk("\n");
 
-	err = do_md_run (mddev);
+	err = do_md_run(mddev);
 	if (err) {
 		printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
-		do_md_stop (mddev, 0, 0);
+		do_md_stop(mddev, 0, 0);
 	}
 }
 
@@ -4324,7 +4324,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
 
 	if (!(info->state & (1<<MD_DISK_FAULTY))) {
 		int err;
-		rdev = md_import_device (dev, -1, 0);
+		rdev = md_import_device(dev, -1, 0);
 		if (IS_ERR(rdev)) {
 			printk(KERN_WARNING 
 				"md: error, md_import_device() returned %ld\n",
@@ -4406,7 +4406,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
 		return -EINVAL;
 	}
 
-	rdev = md_import_device (dev, -1, 0);
+	rdev = md_import_device(dev, -1, 0);
 	if (IS_ERR(rdev)) {
 		printk(KERN_WARNING 
 			"md: error, md_import_device() returned %ld\n",
@@ -4925,11 +4925,11 @@ static int md_ioctl(struct inode *inode, struct file *file,
 			goto done_unlock;
 
 		case STOP_ARRAY:
-			err = do_md_stop (mddev, 0, 1);
+			err = do_md_stop(mddev, 0, 1);
 			goto done_unlock;
 
 		case STOP_ARRAY_RO:
-			err = do_md_stop (mddev, 1, 1);
+			err = do_md_stop(mddev, 1, 1);
 			goto done_unlock;
 
 	}
@@ -4978,7 +4978,7 @@ static int md_ioctl(struct inode *inode, struct file *file,
 			goto done_unlock;
 
 		case RUN_ARRAY:
-			err = do_md_run (mddev);
+			err = do_md_run(mddev);
 			goto done_unlock;
 
 		case SET_BITMAP_FILE:
@@ -5416,11 +5416,11 @@ static int md_seq_show(struct seq_file *seq, void *v)
 			seq_printf(seq, " super non-persistent");
 
 		if (mddev->pers) {
-			mddev->pers->status (seq, mddev);
+			mddev->pers->status(seq, mddev);
 	 		seq_printf(seq, "\n      ");
 			if (mddev->pers->sync_request) {
 				if (mddev->curr_resync > 2) {
-					status_resync (seq, mddev);
+					status_resync(seq, mddev);
 					seq_printf(seq, "\n      ");
 				} else if (mddev->curr_resync == 1 || mddev->curr_resync == 2)
 					seq_printf(seq, "\tresync=DELAYED\n      ");
@@ -6251,7 +6251,7 @@ static int md_notify_reboot(struct notifier_block *this,
 				 * appears to still be in use.  Hence
 				 * the '100'.
 				 */
-				do_md_stop (mddev, 1, 100);
+				do_md_stop(mddev, 1, 100);
 				mddev_unlock(mddev);
 			}
 		/*
@@ -6295,7 +6295,7 @@ static int __init md_init(void)
 	raid_table_header = register_sysctl_table(raid_root_table);
 
 	md_geninit();
-	return (0);
+	return 0;
 }
 
 
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7e654543c97d..d72be4b89e64 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -270,7 +270,7 @@ static int grow_buffers(struct stripe_head *sh, int num)
 	return 0;
 }
 
-static void raid5_build_block (struct stripe_head *sh, int i);
+static void raid5_build_block(struct stripe_head *sh, int i);
 
 static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int disks)
 {
@@ -1146,7 +1146,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
 	release_stripe(sh);
 }
 
-static void raid5_end_write_request (struct bio *bi, int error)
+static void raid5_end_write_request(struct bio *bi, int error)
 {
  	struct stripe_head *sh = bi->bi_private;
 	raid5_conf_t *conf = sh->raid_conf;
@@ -1178,7 +1178,7 @@ static void raid5_end_write_request (struct bio *bi, int error)
 
 static sector_t compute_blocknr(struct stripe_head *sh, int i);
 	
-static void raid5_build_block (struct stripe_head *sh, int i)
+static void raid5_build_block(struct stripe_head *sh, int i)
 {
 	struct r5dev *dev = &sh->dev[i];
 
@@ -1216,10 +1216,10 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
 			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 		}
 		set_bit(Faulty, &rdev->flags);
-		printk (KERN_ALERT
-			"raid5: Disk failure on %s, disabling device.\n"
-			"raid5: Operation continuing on %d devices.\n",
-			bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded);
+		printk(KERN_ALERT
+		       "raid5: Disk failure on %s, disabling device.\n"
+		       "raid5: Operation continuing on %d devices.\n",
+		       bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded);
 	}
 }
 
@@ -1315,8 +1315,8 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
 			*dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks;
 			break;
 		default:
-			printk (KERN_CRIT "raid6: unsupported algorithm %d\n",
-				conf->algorithm);
+			printk(KERN_CRIT "raid6: unsupported algorithm %d\n",
+			       conf->algorithm);
 		}
 		break;
 	}
@@ -1391,8 +1391,8 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
 			}
 			break;
 		default:
-			printk (KERN_CRIT "raid6: unsupported algorithm %d\n",
-				conf->algorithm);
+			printk(KERN_CRIT "raid6: unsupported algorithm %d\n",
+			       conf->algorithm);
 		}
 		break;
 	}
@@ -1400,7 +1400,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
 	chunk_number = stripe * data_disks + i;
 	r_sector = (sector_t)chunk_number * sectors_per_chunk + chunk_offset;
 
-	check = raid5_compute_sector (r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf);
+	check = raid5_compute_sector(r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf);
 	if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) {
 		printk(KERN_ERR "compute_blocknr: map not correct\n");
 		return 0;
@@ -4284,7 +4284,7 @@ static int stop(mddev_t *mddev)
 }
 
 #ifdef DEBUG
-static void print_sh (struct seq_file *seq, struct stripe_head *sh)
+static void print_sh(struct seq_file *seq, struct stripe_head *sh)
 {
 	int i;
 
@@ -4300,7 +4300,7 @@ static void print_sh (struct seq_file *seq, struct stripe_head *sh)
 	seq_printf(seq, "\n");
 }
 
-static void printall (struct seq_file *seq, raid5_conf_t *conf)
+static void printall(struct seq_file *seq, raid5_conf_t *conf)
 {
 	struct stripe_head *sh;
 	struct hlist_node *hn;
@@ -4318,7 +4318,7 @@ static void printall (struct seq_file *seq, raid5_conf_t *conf)
 }
 #endif
 
-static void status (struct seq_file *seq, mddev_t *mddev)
+static void status(struct seq_file *seq, mddev_t *mddev)
 {
 	raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
 	int i;
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index bb727fa1ce7f..82bea14cae1a 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -54,17 +54,17 @@
 
 extern int mdp_major;
 
-extern int register_md_personality (struct mdk_personality *p);
-extern int unregister_md_personality (struct mdk_personality *p);
-extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev),
+extern int register_md_personality(struct mdk_personality *p);
+extern int unregister_md_personality(struct mdk_personality *p);
+extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev),
 				mddev_t *mddev, const char *name);
-extern void md_unregister_thread (mdk_thread_t *thread);
+extern void md_unregister_thread(mdk_thread_t *thread);
 extern void md_wakeup_thread(mdk_thread_t *thread);
 extern void md_check_recovery(mddev_t *mddev);
 extern void md_write_start(mddev_t *mddev, struct bio *bi);
 extern void md_write_end(mddev_t *mddev);
 extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
-extern void md_error (mddev_t *mddev, mdk_rdev_t *rdev);
+extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
 
 extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
 			   sector_t sector, int size, struct page *page);
-- 
cgit v1.2.3


From e6a7d3c04f8fe49099521e6dc9a46b0272381f2f Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 14 Oct 2008 11:58:31 -0700
Subject: netfilter: ctnetlink: remove bogus module dependency between
 ctnetlink and nf_nat

This patch removes the module dependency between ctnetlink and
nf_nat by means of an indirect call that is initialized when
nf_nat is loaded. Now, nf_conntrack_netlink only requires
nf_conntrack and nfnetlink.

This patch puts nfnetlink_parse_nat_setup_hook into the
nf_conntrack_core to avoid dependencies between ctnetlink,
nf_conntrack_ipv4 and nf_conntrack_ipv6.

This patch also introduces the function ctnetlink_change_nat
that is only invoked from the creation path. Actually, the
nat handling cannot be invoked from the update path since
this is not allowed. By introducing this function, we remove
the useless nat handling in the update path and we avoid
deadlock-prone code.

This patch also adds the required EAGAIN logic for nfnetlink.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink.h  |   3 +
 include/net/netfilter/nf_nat_core.h  |   8 ++
 net/ipv4/netfilter/nf_nat_core.c     |  97 ++++++++++++++++++++++
 net/netfilter/nf_conntrack_core.c    |   7 ++
 net/netfilter/nf_conntrack_netlink.c | 151 ++++++++++++++---------------------
 net/netfilter/nfnetlink.c            |  12 ++-
 6 files changed, 185 insertions(+), 93 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 0d8424f76899..7d8e0455ccac 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -78,6 +78,9 @@ extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group,
 			  int echo);
 extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags);
 
+extern void nfnl_lock(void);
+extern void nfnl_unlock(void);
+
 #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \
 	MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys))
 
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h
index f29eeb9777e0..58684066388c 100644
--- a/include/net/netfilter/nf_nat_core.h
+++ b/include/net/netfilter/nf_nat_core.h
@@ -25,4 +25,12 @@ static inline int nf_nat_initialized(struct nf_conn *ct,
 	else
 		return test_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
 }
+
+struct nlattr;
+
+extern int
+(*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
+				  enum nf_nat_manip_type manip,
+				  struct nlattr *attr);
+
 #endif /* _NF_NAT_CORE_H */
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 2ac9eaf1a8c9..a65cf692359f 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -584,6 +584,98 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
 	.flags		= NF_CT_EXT_F_PREALLOC,
 };
 
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
+	[CTA_PROTONAT_PORT_MIN]	= { .type = NLA_U16 },
+	[CTA_PROTONAT_PORT_MAX]	= { .type = NLA_U16 },
+};
+
+static int nfnetlink_parse_nat_proto(struct nlattr *attr,
+				     const struct nf_conn *ct,
+				     struct nf_nat_range *range)
+{
+	struct nlattr *tb[CTA_PROTONAT_MAX+1];
+	const struct nf_nat_protocol *npt;
+	int err;
+
+	err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
+	if (err < 0)
+		return err;
+
+	npt = nf_nat_proto_find_get(nf_ct_protonum(ct));
+	if (npt->nlattr_to_range)
+		err = npt->nlattr_to_range(tb, range);
+	nf_nat_proto_put(npt);
+	return err;
+}
+
+static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
+	[CTA_NAT_MINIP]		= { .type = NLA_U32 },
+	[CTA_NAT_MAXIP]		= { .type = NLA_U32 },
+};
+
+static int
+nfnetlink_parse_nat(struct nlattr *nat,
+		    const struct nf_conn *ct, struct nf_nat_range *range)
+{
+	struct nlattr *tb[CTA_NAT_MAX+1];
+	int err;
+
+	memset(range, 0, sizeof(*range));
+
+	err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[CTA_NAT_MINIP])
+		range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
+
+	if (!tb[CTA_NAT_MAXIP])
+		range->max_ip = range->min_ip;
+	else
+		range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
+
+	if (range->min_ip)
+		range->flags |= IP_NAT_RANGE_MAP_IPS;
+
+	if (!tb[CTA_NAT_PROTO])
+		return 0;
+
+	err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int
+nfnetlink_parse_nat_setup(struct nf_conn *ct,
+			  enum nf_nat_manip_type manip,
+			  struct nlattr *attr)
+{
+	struct nf_nat_range range;
+
+	if (nfnetlink_parse_nat(attr, ct, &range) < 0)
+		return -EINVAL;
+	if (nf_nat_initialized(ct, manip))
+		return -EEXIST;
+
+	return nf_nat_setup_info(ct, &range, manip);
+}
+#else
+static int
+nfnetlink_parse_nat_setup(struct nf_conn *ct,
+			  enum nf_nat_manip_type manip,
+			  struct nlattr *attr)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
 static int __net_init nf_nat_net_init(struct net *net)
 {
 	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
@@ -654,6 +746,9 @@ static int __init nf_nat_init(void)
 
 	BUG_ON(nf_nat_seq_adjust_hook != NULL);
 	rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
+	BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
+	rcu_assign_pointer(nfnetlink_parse_nat_setup_hook,
+			   nfnetlink_parse_nat_setup);
 	return 0;
 
  cleanup_extend:
@@ -667,10 +762,12 @@ static void __exit nf_nat_cleanup(void)
 	nf_ct_l3proto_put(l3proto);
 	nf_ct_extend_unregister(&nat_extend);
 	rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL);
+	rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL);
 	synchronize_net();
 }
 
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf-nat-ipv4");
 
 module_init(nf_nat_init);
 module_exit(nf_nat_cleanup);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 27de3c7b006e..622d7c671cb7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -38,9 +38,16 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_nat.h>
 
 #define NF_CONNTRACK_VERSION	"0.5.0"
 
+unsigned int
+(*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
+				  enum nf_nat_manip_type manip,
+				  struct nlattr *attr) __read_mostly;
+EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
+
 DEFINE_SPINLOCK(nf_conntrack_lock);
 EXPORT_SYMBOL_GPL(nf_conntrack_lock);
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index cadfd15b44f6..08e82d64eb6f 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -689,71 +689,6 @@ ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple,
 	return 0;
 }
 
-#ifdef CONFIG_NF_NAT_NEEDED
-static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
-	[CTA_PROTONAT_PORT_MIN]	= { .type = NLA_U16 },
-	[CTA_PROTONAT_PORT_MAX]	= { .type = NLA_U16 },
-};
-
-static int nfnetlink_parse_nat_proto(struct nlattr *attr,
-				     const struct nf_conn *ct,
-				     struct nf_nat_range *range)
-{
-	struct nlattr *tb[CTA_PROTONAT_MAX+1];
-	const struct nf_nat_protocol *npt;
-	int err;
-
-	err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
-	if (err < 0)
-		return err;
-
-	npt = nf_nat_proto_find_get(nf_ct_protonum(ct));
-	if (npt->nlattr_to_range)
-		err = npt->nlattr_to_range(tb, range);
-	nf_nat_proto_put(npt);
-	return err;
-}
-
-static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
-	[CTA_NAT_MINIP]		= { .type = NLA_U32 },
-	[CTA_NAT_MAXIP]		= { .type = NLA_U32 },
-};
-
-static inline int
-nfnetlink_parse_nat(struct nlattr *nat,
-		    const struct nf_conn *ct, struct nf_nat_range *range)
-{
-	struct nlattr *tb[CTA_NAT_MAX+1];
-	int err;
-
-	memset(range, 0, sizeof(*range));
-
-	err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy);
-	if (err < 0)
-		return err;
-
-	if (tb[CTA_NAT_MINIP])
-		range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
-
-	if (!tb[CTA_NAT_MAXIP])
-		range->max_ip = range->min_ip;
-	else
-		range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
-
-	if (range->min_ip)
-		range->flags |= IP_NAT_RANGE_MAP_IPS;
-
-	if (!tb[CTA_NAT_PROTO])
-		return 0;
-
-	err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
-	if (err < 0)
-		return err;
-
-	return 0;
-}
-#endif
-
 static inline int
 ctnetlink_parse_help(struct nlattr *attr, char **helper_name)
 {
@@ -878,6 +813,34 @@ out:
 	return err;
 }
 
+static int
+ctnetlink_parse_nat_setup(struct nf_conn *ct,
+			  enum nf_nat_manip_type manip,
+			  struct nlattr *attr)
+{
+	typeof(nfnetlink_parse_nat_setup_hook) parse_nat_setup;
+
+	parse_nat_setup = rcu_dereference(nfnetlink_parse_nat_setup_hook);
+	if (!parse_nat_setup) {
+#ifdef CONFIG_KMOD
+		rcu_read_unlock();
+		nfnl_unlock();
+		if (request_module("nf-nat-ipv4") < 0) {
+			nfnl_lock();
+			rcu_read_lock();
+			return -EOPNOTSUPP;
+		}
+		nfnl_lock();
+		rcu_read_lock();
+		if (nfnetlink_parse_nat_setup_hook)
+			return -EAGAIN;
+#endif
+		return -EOPNOTSUPP;
+	}
+
+	return parse_nat_setup(ct, manip, attr);
+}
+
 static int
 ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
 {
@@ -897,31 +860,6 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
 		/* ASSURED bit can only be set */
 		return -EBUSY;
 
-	if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
-#ifndef CONFIG_NF_NAT_NEEDED
-		return -EOPNOTSUPP;
-#else
-		struct nf_nat_range range;
-
-		if (cda[CTA_NAT_DST]) {
-			if (nfnetlink_parse_nat(cda[CTA_NAT_DST], ct,
-						&range) < 0)
-				return -EINVAL;
-			if (nf_nat_initialized(ct, IP_NAT_MANIP_DST))
-				return -EEXIST;
-			nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
-		}
-		if (cda[CTA_NAT_SRC]) {
-			if (nfnetlink_parse_nat(cda[CTA_NAT_SRC], ct,
-						&range) < 0)
-				return -EINVAL;
-			if (nf_nat_initialized(ct, IP_NAT_MANIP_SRC))
-				return -EEXIST;
-			nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
-		}
-#endif
-	}
-
 	/* Be careful here, modifying NAT bits can screw up things,
 	 * so don't let users modify them directly if they don't pass
 	 * nf_nat_range. */
@@ -929,6 +867,31 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
 	return 0;
 }
 
+static int
+ctnetlink_change_nat(struct nf_conn *ct, struct nlattr *cda[])
+{
+#ifdef CONFIG_NF_NAT_NEEDED
+	int ret;
+
+	if (cda[CTA_NAT_DST]) {
+		ret = ctnetlink_parse_nat_setup(ct,
+						IP_NAT_MANIP_DST,
+						cda[CTA_NAT_DST]);
+		if (ret < 0)
+			return ret;
+	}
+	if (cda[CTA_NAT_SRC]) {
+		ret = ctnetlink_parse_nat_setup(ct,
+						IP_NAT_MANIP_SRC,
+						cda[CTA_NAT_SRC]);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+#else
+	return -EOPNOTSUPP;
+#endif
+}
 
 static inline int
 ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
@@ -1157,6 +1120,14 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
 		}
 	}
 
+	if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
+		err = ctnetlink_change_nat(ct, cda);
+		if (err < 0) {
+			rcu_read_unlock();
+			goto err;
+		}
+	}
+
 	if (cda[CTA_PROTOINFO]) {
 		err = ctnetlink_change_protoinfo(ct, cda);
 		if (err < 0) {
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index b75c9c4a995d..4739f9f961d8 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -44,15 +44,17 @@ static struct sock *nfnl = NULL;
 static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
 static DEFINE_MUTEX(nfnl_mutex);
 
-static inline void nfnl_lock(void)
+void nfnl_lock(void)
 {
 	mutex_lock(&nfnl_mutex);
 }
+EXPORT_SYMBOL_GPL(nfnl_lock);
 
-static inline void nfnl_unlock(void)
+void nfnl_unlock(void)
 {
 	mutex_unlock(&nfnl_mutex);
 }
+EXPORT_SYMBOL_GPL(nfnl_unlock);
 
 int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
 {
@@ -132,6 +134,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		return 0;
 
 	type = nlh->nlmsg_type;
+replay:
 	ss = nfnetlink_get_subsys(type);
 	if (!ss) {
 #ifdef CONFIG_KMOD
@@ -165,7 +168,10 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		} else
 			return -EINVAL;
 
-		return nc->call(nfnl, skb, nlh, cda);
+		err = nc->call(nfnl, skb, nlh, cda);
+		if (err == -EAGAIN)
+			goto replay;
+		return err;
 	}
 }
 
-- 
cgit v1.2.3


From 4704f0e274829e3af00737d2d9adace2d71a9605 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 14 Oct 2008 19:16:07 -0400
Subject: NFS: Fix the resolution problem with nfs_inode_attrs_need_update()

It appears that 'jiffies' timestamps do not have high enough resolution for
nfs_inode_attrs_need_update(). One problem is that a GETATTR can be
launched within < 1 jiffy of the last operation that updated the attribute.
Another problem is that RPC calls can take < 1 jiffy to execute.

We can fix this by switching the variables to use a simple global counter
that gets incremented every time we start another GETATTR call.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c            | 14 ++++++++++----
 fs/nfs/inode.c          | 37 ++++++++++++++++++++++++++++++-------
 include/linux/nfs_fs.h  | 10 +++-------
 include/linux/nfs_xdr.h |  1 +
 4 files changed, 44 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 49d565412827..4807074ada8c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -156,6 +156,7 @@ typedef struct {
 	decode_dirent_t	decode;
 	int		plus;
 	unsigned long	timestamp;
+	unsigned long	gencount;
 	int		timestamp_valid;
 } nfs_readdir_descriptor_t;
 
@@ -177,7 +178,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 	struct file	*file = desc->file;
 	struct inode	*inode = file->f_path.dentry->d_inode;
 	struct rpc_cred	*cred = nfs_file_cred(file);
-	unsigned long	timestamp;
+	unsigned long	timestamp, gencount;
 	int		error;
 
 	dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
@@ -186,6 +187,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 
  again:
 	timestamp = jiffies;
+	gencount = nfs_inc_attr_generation_counter();
 	error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page,
 					  NFS_SERVER(inode)->dtsize, desc->plus);
 	if (error < 0) {
@@ -199,6 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 		goto error;
 	}
 	desc->timestamp = timestamp;
+	desc->gencount = gencount;
 	desc->timestamp_valid = 1;
 	SetPageUptodate(page);
 	/* Ensure consistent page alignment of the data.
@@ -224,9 +227,10 @@ int dir_decode(nfs_readdir_descriptor_t *desc)
 	if (IS_ERR(p))
 		return PTR_ERR(p);
 	desc->ptr = p;
-	if (desc->timestamp_valid)
+	if (desc->timestamp_valid) {
 		desc->entry->fattr->time_start = desc->timestamp;
-	else
+		desc->entry->fattr->gencount = desc->gencount;
+	} else
 		desc->entry->fattr->valid &= ~NFS_ATTR_FATTR;
 	return 0;
 }
@@ -471,7 +475,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	struct rpc_cred	*cred = nfs_file_cred(file);
 	struct page	*page = NULL;
 	int		status;
-	unsigned long	timestamp;
+	unsigned long	timestamp, gencount;
 
 	dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
 			(unsigned long long)*desc->dir_cookie);
@@ -482,6 +486,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 		goto out;
 	}
 	timestamp = jiffies;
+	gencount = nfs_inc_attr_generation_counter();
 	status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred,
 						*desc->dir_cookie, page,
 						NFS_SERVER(inode)->dtsize,
@@ -490,6 +495,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	desc->ptr = kmap(page);		/* matching kunmap in nfs_do_filldir */
 	if (status >= 0) {
 		desc->timestamp = timestamp;
+		desc->gencount = gencount;
 		desc->timestamp_valid = 1;
 		if ((status = dir_decode(desc)) == 0)
 			desc->entry->prev_cookie = *desc->dir_cookie;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index de3f11e6234e..116a3bd2bc9b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -305,7 +305,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 			init_special_inode(inode, inode->i_mode, fattr->rdev);
 
 		nfsi->read_cache_jiffies = fattr->time_start;
-		nfsi->last_updated = now;
+		nfsi->attr_gencount = fattr->gencount;
 		nfsi->cache_change_attribute = now;
 		inode->i_atime = fattr->atime;
 		inode->i_mtime = fattr->mtime;
@@ -909,6 +909,30 @@ static int nfs_size_need_update(const struct inode *inode, const struct nfs_fatt
 	return nfs_size_to_loff_t(fattr->size) > i_size_read(inode);
 }
 
+static unsigned long nfs_attr_generation_counter;
+
+static unsigned long nfs_read_attr_generation_counter(void)
+{
+	smp_rmb();
+	return nfs_attr_generation_counter;
+}
+
+unsigned long nfs_inc_attr_generation_counter(void)
+{
+	unsigned long ret;
+	smp_rmb();
+	ret = ++nfs_attr_generation_counter;
+	smp_wmb();
+	return ret;
+}
+
+void nfs_fattr_init(struct nfs_fattr *fattr)
+{
+	fattr->valid = 0;
+	fattr->time_start = jiffies;
+	fattr->gencount = nfs_inc_attr_generation_counter();
+}
+
 /**
  * nfs_inode_attrs_need_update - check if the inode attributes need updating
  * @inode - pointer to inode
@@ -922,8 +946,7 @@ static int nfs_size_need_update(const struct inode *inode, const struct nfs_fatt
  * catch the case where ctime either didn't change, or went backwards
  * (if someone reset the clock on the server) by looking at whether
  * or not this RPC call was started after the inode was last updated.
- * Note also the check for jiffy wraparound if the last_updated timestamp
- * is later than 'jiffies'.
+ * Note also the check for wraparound of 'attr_gencount'
  *
  * The function returns 'true' if it thinks the attributes in 'fattr' are
  * more recent than the ones cached in the inode.
@@ -933,10 +956,10 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n
 {
 	const struct nfs_inode *nfsi = NFS_I(inode);
 
-	return time_after(fattr->time_start, nfsi->last_updated) ||
+	return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
 		nfs_ctime_need_update(inode, fattr) ||
 		nfs_size_need_update(inode, fattr) ||
-		time_after(nfsi->last_updated, jiffies);
+		((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
 }
 
 static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
@@ -1107,7 +1130,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		}
 		/* If ctime has changed we should definitely clear access+acl caches */
 		if (!timespec_equal(&inode->i_ctime, &fattr->ctime))
-			invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
 	} else if (nfsi->change_attr != fattr->change_attr) {
 		dprintk("NFS: change_attr change on server for file %s/%ld\n",
 				inode->i_sb->s_id, inode->i_ino);
@@ -1163,7 +1186,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = now;
-		nfsi->last_updated = now;
+		nfsi->attr_gencount = nfs_inc_attr_generation_counter();
 	} else {
 		if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
 			if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index ca563ee13e32..ac8d0233b05c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -137,7 +137,7 @@ struct nfs_inode {
 	unsigned long		attrtimeo_timestamp;
 	__u64			change_attr;		/* v4 only */
 
-	unsigned long		last_updated;
+	unsigned long		attr_gencount;
 	/* "Generation counter" for the attribute cache. This is
 	 * bumped whenever we update the metadata on the
 	 * server.
@@ -344,15 +344,11 @@ extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ct
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
 extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode);
 extern u64 nfs_compat_user_ino64(u64 fileid);
+extern void nfs_fattr_init(struct nfs_fattr *fattr);
 
 /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
 extern __be32 root_nfs_parse_addr(char *name); /*__init*/
-
-static inline void nfs_fattr_init(struct nfs_fattr *fattr)
-{
-	fattr->valid = 0;
-	fattr->time_start = jiffies;
-}
+extern unsigned long nfs_inc_attr_generation_counter(void);
 
 /*
  * linux/fs/nfs/file.c
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6ee6ae3f095c..c1c31acb8a2b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -56,6 +56,7 @@ struct nfs_fattr {
 	__u64			change_attr;	/* NFSv4 change attribute */
 	__u64			pre_change_attr;/* pre-op NFSv4 change attribute */
 	unsigned long		time_start;
+	unsigned long		gencount;
 };
 
 #define NFS_ATTR_WCC		0x0001		/* pre-op WCC data    */
-- 
cgit v1.2.3


From d98e6346350ac909f095768beb28b82368bd126f Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Tue, 1 Jul 2008 16:23:49 -0500
Subject: KVM: Move KVM TRACE DEFINITIONS to common header

Move KVM trace definitions from x86 specific kvm headers to common kvm
headers to create a cross-architecture numbering scheme for trace
events. This means the kvmtrace_format userspace tool won't need to know
which architecture produced the log file being processed.

Signed-off-by: Jerone Young <jyoung5@us.ibm.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/asm-x86/kvm.h      | 22 ----------------------
 include/asm-x86/kvm_host.h | 18 ------------------
 include/linux/kvm.h        | 21 +++++++++++++++++++++
 include/linux/kvm_host.h   | 19 +++++++++++++++++++
 4 files changed, 40 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h
index 78e954db1e7f..ba0dd791fadf 100644
--- a/include/asm-x86/kvm.h
+++ b/include/asm-x86/kvm.h
@@ -208,26 +208,4 @@ struct kvm_pit_channel_state {
 struct kvm_pit_state {
 	struct kvm_pit_channel_state channels[3];
 };
-
-#define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
-#define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
-#define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
-#define KVM_TRC_IO_READ          (KVM_TRC_HANDLER + 0x05)
-#define KVM_TRC_IO_WRITE         (KVM_TRC_HANDLER + 0x06)
-#define KVM_TRC_CR_READ          (KVM_TRC_HANDLER + 0x07)
-#define KVM_TRC_CR_WRITE         (KVM_TRC_HANDLER + 0x08)
-#define KVM_TRC_DR_READ          (KVM_TRC_HANDLER + 0x09)
-#define KVM_TRC_DR_WRITE         (KVM_TRC_HANDLER + 0x0A)
-#define KVM_TRC_MSR_READ         (KVM_TRC_HANDLER + 0x0B)
-#define KVM_TRC_MSR_WRITE        (KVM_TRC_HANDLER + 0x0C)
-#define KVM_TRC_CPUID            (KVM_TRC_HANDLER + 0x0D)
-#define KVM_TRC_INTR             (KVM_TRC_HANDLER + 0x0E)
-#define KVM_TRC_NMI              (KVM_TRC_HANDLER + 0x0F)
-#define KVM_TRC_VMMCALL          (KVM_TRC_HANDLER + 0x10)
-#define KVM_TRC_HLT              (KVM_TRC_HANDLER + 0x11)
-#define KVM_TRC_CLTS             (KVM_TRC_HANDLER + 0x12)
-#define KVM_TRC_LMSW             (KVM_TRC_HANDLER + 0x13)
-#define KVM_TRC_APIC_ACCESS      (KVM_TRC_HANDLER + 0x14)
-#define KVM_TRC_TDP_FAULT        (KVM_TRC_HANDLER + 0x15)
-
 #endif /* ASM_X86__KVM_H */
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 8c886be0103f..4f2bd884fd3a 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -691,24 +691,6 @@ enum {
 	TASK_SWITCH_GATE = 3,
 };
 
-#define KVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5, name) \
-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
-						vcpu, 5, d1, d2, d3, d4, d5)
-#define KVMTRACE_4D(evt, vcpu, d1, d2, d3, d4, name) \
-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
-						vcpu, 4, d1, d2, d3, d4, 0)
-#define KVMTRACE_3D(evt, vcpu, d1, d2, d3, name) \
-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
-						vcpu, 3, d1, d2, d3, 0, 0)
-#define KVMTRACE_2D(evt, vcpu, d1, d2, name) \
-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
-						vcpu, 2, d1, d2, 0, 0, 0)
-#define KVMTRACE_1D(evt, vcpu, d1, name) \
-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
-						vcpu, 1, d1, 0, 0, 0, 0)
-#define KVMTRACE_0D(evt, vcpu, name) \
-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
-						vcpu, 0, 0, 0, 0, 0, 0)
 
 #ifdef CONFIG_64BIT
 # define KVM_EX_ENTRY ".quad"
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 70a30651cd12..8a3ceadb1366 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -440,4 +440,25 @@ struct kvm_trace_rec {
 #define KVM_GET_MP_STATE          _IOR(KVMIO,  0x98, struct kvm_mp_state)
 #define KVM_SET_MP_STATE          _IOW(KVMIO,  0x99, struct kvm_mp_state)
 
+#define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
+#define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
+#define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
+#define KVM_TRC_IO_READ          (KVM_TRC_HANDLER + 0x05)
+#define KVM_TRC_IO_WRITE         (KVM_TRC_HANDLER + 0x06)
+#define KVM_TRC_CR_READ          (KVM_TRC_HANDLER + 0x07)
+#define KVM_TRC_CR_WRITE         (KVM_TRC_HANDLER + 0x08)
+#define KVM_TRC_DR_READ          (KVM_TRC_HANDLER + 0x09)
+#define KVM_TRC_DR_WRITE         (KVM_TRC_HANDLER + 0x0A)
+#define KVM_TRC_MSR_READ         (KVM_TRC_HANDLER + 0x0B)
+#define KVM_TRC_MSR_WRITE        (KVM_TRC_HANDLER + 0x0C)
+#define KVM_TRC_CPUID            (KVM_TRC_HANDLER + 0x0D)
+#define KVM_TRC_INTR             (KVM_TRC_HANDLER + 0x0E)
+#define KVM_TRC_NMI              (KVM_TRC_HANDLER + 0x0F)
+#define KVM_TRC_VMMCALL          (KVM_TRC_HANDLER + 0x10)
+#define KVM_TRC_HLT              (KVM_TRC_HANDLER + 0x11)
+#define KVM_TRC_CLTS             (KVM_TRC_HANDLER + 0x12)
+#define KVM_TRC_LMSW             (KVM_TRC_HANDLER + 0x13)
+#define KVM_TRC_APIC_ACCESS      (KVM_TRC_HANDLER + 0x14)
+#define KVM_TRC_TDP_FAULT        (KVM_TRC_HANDLER + 0x15)
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8525afc53107..a18aaad2ab79 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -326,6 +326,25 @@ struct kvm_stats_debugfs_item {
 extern struct kvm_stats_debugfs_item debugfs_entries[];
 extern struct dentry *kvm_debugfs_dir;
 
+#define KVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5, name) \
+	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
+						vcpu, 5, d1, d2, d3, d4, d5)
+#define KVMTRACE_4D(evt, vcpu, d1, d2, d3, d4, name) \
+	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
+						vcpu, 4, d1, d2, d3, d4, 0)
+#define KVMTRACE_3D(evt, vcpu, d1, d2, d3, name) \
+	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
+						vcpu, 3, d1, d2, d3, 0, 0)
+#define KVMTRACE_2D(evt, vcpu, d1, d2, name) \
+	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
+						vcpu, 2, d1, d2, 0, 0, 0)
+#define KVMTRACE_1D(evt, vcpu, d1, name) \
+	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
+						vcpu, 1, d1, 0, 0, 0, 0)
+#define KVMTRACE_0D(evt, vcpu, name) \
+	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
+						vcpu, 0, 0, 0, 0, 0, 0)
+
 #ifdef CONFIG_KVM_TRACE
 int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg);
 void kvm_trace_cleanup(void);
-- 
cgit v1.2.3


From e32c8f2c0720fb21c6f4a5f6ccbebdadc878f707 Mon Sep 17 00:00:00 2001
From: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Date: Mon, 14 Jul 2008 14:00:00 +0200
Subject: KVM: kvmtrace: Remove use of bit fields in kvm trace structure

This patch fixes kvmtrace use on big endian systems. When using bit fields the
compiler will lay data out in the wrong order expected when laid down into a
file.
This fixes it by using one variable instead of using bit fields.

Signed-off-by: Jerone Young <jyoung5@us.ibm.com>
Signed-off-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/linux/kvm.h  | 17 ++++++++++++++---
 virt/kvm/kvm_trace.c | 17 +++++++++--------
 2 files changed, 23 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 8a3ceadb1366..8a16b083df2e 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -311,9 +311,13 @@ struct kvm_s390_interrupt {
 
 /* This structure represents a single trace buffer record. */
 struct kvm_trace_rec {
-	__u32 event:28;
-	__u32 extra_u32:3;
-	__u32 cycle_in:1;
+	/* variable rec_val
+	 * is split into:
+	 * bits 0 - 27  -> event id
+	 * bits 28 -30  -> number of extra data args of size u32
+	 * bits 31      -> binary indicator for if tsc is in record
+	 */
+	__u32 rec_val;
 	__u32 pid;
 	__u32 vcpu_id;
 	union {
@@ -327,6 +331,13 @@ struct kvm_trace_rec {
 	} u;
 };
 
+#define TRACE_REC_EVENT_ID(val) \
+		(0x0fffffff & (val))
+#define TRACE_REC_NUM_DATA_ARGS(val) \
+		(0x70000000 & ((val) << 28))
+#define TRACE_REC_TCS(val) \
+		(0x80000000 & ((val) << 31))
+
 #define KVMIO 0xAE
 
 /*
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c
index 58141f31ea8f..9acb78b3cc9f 100644
--- a/virt/kvm/kvm_trace.c
+++ b/virt/kvm/kvm_trace.c
@@ -54,12 +54,13 @@ static void kvm_add_trace(void *probe_private, void *call_data,
 	struct kvm_trace *kt = kvm_trace;
 	struct kvm_trace_rec rec;
 	struct kvm_vcpu *vcpu;
-	int    i, extra, size;
+	int    i, size;
+	u32    extra;
 
 	if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
 		return;
 
-	rec.event	= va_arg(*args, u32);
+	rec.rec_val	= TRACE_REC_EVENT_ID(va_arg(*args, u32));
 	vcpu		= va_arg(*args, struct kvm_vcpu *);
 	rec.pid		= current->tgid;
 	rec.vcpu_id	= vcpu->vcpu_id;
@@ -67,21 +68,21 @@ static void kvm_add_trace(void *probe_private, void *call_data,
 	extra   	= va_arg(*args, u32);
 	WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
 	extra 		= min_t(u32, extra, KVM_TRC_EXTRA_MAX);
-	rec.extra_u32   = extra;
 
-	rec.cycle_in 	= p->cycle_in;
+	rec.rec_val |= TRACE_REC_TCS(p->cycle_in)
+			| TRACE_REC_NUM_DATA_ARGS(extra);
 
-	if (rec.cycle_in) {
+	if (p->cycle_in) {
 		rec.u.cycle.cycle_u64 = get_cycles();
 
-		for (i = 0; i < rec.extra_u32; i++)
+		for (i = 0; i < extra; i++)
 			rec.u.cycle.extra_u32[i] = va_arg(*args, u32);
 	} else {
-		for (i = 0; i < rec.extra_u32; i++)
+		for (i = 0; i < extra; i++)
 			rec.u.nocycle.extra_u32[i] = va_arg(*args, u32);
 	}
 
-	size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32));
+	size = calc_rec_size(p->cycle_in, extra * sizeof(u32));
 	relay_write(kt->rchan, &rec, size);
 }
 
-- 
cgit v1.2.3


From 3f7f95c65ef6a89472a28da1b9436eaeee288831 Mon Sep 17 00:00:00 2001
From: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Date: Mon, 14 Jul 2008 14:00:01 +0200
Subject: KVM: kvmtrace: replace get_cycles with ktime_get v3

The current kvmtrace code uses get_cycles() while the interpretation would be
easier using using nanoseconds. ktime_get() should give at least the same
accuracy as get_cycles on all architectures (even better on 32bit archs) but
at a better unit (e.g. comparable between hosts with different frequencies.

[avi: avoid ktime_t in public header]

Signed-off-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/linux/kvm.h  |  6 +++---
 virt/kvm/kvm_trace.c | 19 ++++++++++---------
 2 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 8a16b083df2e..5d08f11bb27f 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -322,12 +322,12 @@ struct kvm_trace_rec {
 	__u32 vcpu_id;
 	union {
 		struct {
-			__u64 cycle_u64;
+			__u64 timestamp;
 			__u32 extra_u32[KVM_TRC_EXTRA_MAX];
-		} __attribute__((packed)) cycle;
+		} __attribute__((packed)) timestamp;
 		struct {
 			__u32 extra_u32[KVM_TRC_EXTRA_MAX];
-		} nocycle;
+		} notimestamp;
 	} u;
 };
 
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c
index 9acb78b3cc9f..41dcc845f78c 100644
--- a/virt/kvm/kvm_trace.c
+++ b/virt/kvm/kvm_trace.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/relay.h>
 #include <linux/debugfs.h>
+#include <linux/ktime.h>
 
 #include <linux/kvm_host.h>
 
@@ -35,16 +36,16 @@ static struct kvm_trace *kvm_trace;
 struct kvm_trace_probe {
 	const char *name;
 	const char *format;
-	u32 cycle_in;
+	u32 timestamp_in;
 	marker_probe_func *probe_func;
 };
 
-static inline int calc_rec_size(int cycle, int extra)
+static inline int calc_rec_size(int timestamp, int extra)
 {
 	int rec_size = KVM_TRC_HEAD_SIZE;
 
 	rec_size += extra;
-	return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
+	return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
 }
 
 static void kvm_add_trace(void *probe_private, void *call_data,
@@ -69,20 +70,20 @@ static void kvm_add_trace(void *probe_private, void *call_data,
 	WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
 	extra 		= min_t(u32, extra, KVM_TRC_EXTRA_MAX);
 
-	rec.rec_val |= TRACE_REC_TCS(p->cycle_in)
+	rec.rec_val |= TRACE_REC_TCS(p->timestamp_in)
 			| TRACE_REC_NUM_DATA_ARGS(extra);
 
-	if (p->cycle_in) {
-		rec.u.cycle.cycle_u64 = get_cycles();
+	if (p->timestamp_in) {
+		rec.u.timestamp.timestamp = ktime_to_ns(ktime_get());
 
 		for (i = 0; i < extra; i++)
-			rec.u.cycle.extra_u32[i] = va_arg(*args, u32);
+			rec.u.timestamp.extra_u32[i] = va_arg(*args, u32);
 	} else {
 		for (i = 0; i < extra; i++)
-			rec.u.nocycle.extra_u32[i] = va_arg(*args, u32);
+			rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32);
 	}
 
-	size = calc_rec_size(p->cycle_in, extra * sizeof(u32));
+	size = calc_rec_size(p->timestamp_in, extra * sizeof(u32));
 	relay_write(kt->rchan, &rec, size);
 }
 
-- 
cgit v1.2.3


From 31711f2294b38d8334efaf7dbac6da4781fd151e Mon Sep 17 00:00:00 2001
From: Jerone Young <jyoung5@us.ibm.com>
Date: Mon, 14 Jul 2008 14:00:03 +0200
Subject: KVM: ppc: adds trace points for ppc tlb activity

This patch adds trace points to track powerpc TLB activities using the
KVM_TRACE infrastructure.

Signed-off-by: Jerone Young <jyoung5@us.ibm.com>
Signed-off-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/powerpc/kvm/44x_tlb.c | 15 ++++++++++++++-
 arch/powerpc/kvm/emulate.c |  4 ++++
 include/linux/kvm.h        |  3 +++
 3 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 5a5602da5091..a207d16b9dbb 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -19,6 +19,7 @@
 
 #include <linux/types.h>
 #include <linux/string.h>
+#include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
 #include <asm/mmu-44x.h>
@@ -175,6 +176,10 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
 	stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
 	stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,
 	                                            vcpu->arch.msr & MSR_PR);
+
+	KVMTRACE_5D(STLB_WRITE, vcpu, victim,
+			stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2,
+			handler);
 }
 
 void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
@@ -204,6 +209,9 @@ void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
 
 		kvmppc_44x_shadow_release(vcpu, i);
 		stlbe->word0 = 0;
+		KVMTRACE_5D(STLB_INVAL, vcpu, i,
+				stlbe->tid, stlbe->word0, stlbe->word1,
+				stlbe->word2, handler);
 	}
 	up_write(&current->mm->mmap_sem);
 }
@@ -217,8 +225,13 @@ void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
 	/* XXX Replace loop with fancy data structures. */
 	down_write(&current->mm->mmap_sem);
 	for (i = 0; i <= tlb_44x_hwater; i++) {
+		struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
+
 		kvmppc_44x_shadow_release(vcpu, i);
-		vcpu->arch.shadow_tlb[i].word0 = 0;
+		stlbe->word0 = 0;
+		KVMTRACE_5D(STLB_INVAL, vcpu, i,
+				stlbe->tid, stlbe->word0, stlbe->word1,
+				stlbe->word2, handler);
 	}
 	up_write(&current->mm->mmap_sem);
 }
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 8c605d0a5488..4a3e274bac12 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -170,6 +170,10 @@ static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst)
 		kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
 	}
 
+	KVMTRACE_5D(GTLB_WRITE, vcpu, index,
+			tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2,
+			handler);
+
 	return EMULATE_DONE;
 }
 
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 5d08f11bb27f..e21a5050d4d6 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -471,5 +471,8 @@ struct kvm_trace_rec {
 #define KVM_TRC_LMSW             (KVM_TRC_HANDLER + 0x13)
 #define KVM_TRC_APIC_ACCESS      (KVM_TRC_HANDLER + 0x14)
 #define KVM_TRC_TDP_FAULT        (KVM_TRC_HANDLER + 0x15)
+#define KVM_TRC_GTLB_WRITE       (KVM_TRC_HANDLER + 0x16)
+#define KVM_TRC_STLB_WRITE       (KVM_TRC_HANDLER + 0x17)
+#define KVM_TRC_STLB_INVAL       (KVM_TRC_HANDLER + 0x18)
 
 #endif
-- 
cgit v1.2.3


From 3b4bd7969f7b61a1ab455bff084ee4f0a2411055 Mon Sep 17 00:00:00 2001
From: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Date: Mon, 14 Jul 2008 14:00:04 +0200
Subject: KVM: ppc: trace powerpc instruction emulation

This patch adds a trace point for the instruction emulation on embedded powerpc
utilizing the KVM_TRACE interface.

Signed-off-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/powerpc/kvm/emulate.c | 2 ++
 include/linux/kvm.h        | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 4a3e274bac12..c3ed63b22210 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -769,6 +769,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		break;
 	}
 
+	KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit);
+
 	if (advance)
 		vcpu->arch.pc += 4; /* Advance past emulated instruction. */
 
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index e21a5050d4d6..d29b64881447 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -474,5 +474,6 @@ struct kvm_trace_rec {
 #define KVM_TRC_GTLB_WRITE       (KVM_TRC_HANDLER + 0x16)
 #define KVM_TRC_STLB_WRITE       (KVM_TRC_HANDLER + 0x17)
 #define KVM_TRC_STLB_INVAL       (KVM_TRC_HANDLER + 0x18)
+#define KVM_TRC_PPC_INSTR        (KVM_TRC_HANDLER + 0x19)
 
 #endif
-- 
cgit v1.2.3


From 4d5c5d0fe89c921336b95f5e7e4f529a9df92f53 Mon Sep 17 00:00:00 2001
From: Ben-Ami Yassour <benami@il.ibm.com>
Date: Mon, 28 Jul 2008 19:26:26 +0300
Subject: KVM: pci device assignment

Based on a patch from: Amit Shah <amit.shah@qumranet.com>

This patch adds support for handling PCI devices that are assigned to
the guest.

The device to be assigned to the guest is registered in the host kernel
and interrupt delivery is handled.  If a device is already assigned, or
the device driver for it is still loaded on the host, the device
assignment is failed by conveying a -EBUSY reply to the userspace.

Devices that share their interrupt line are not supported at the moment.

By itself, this patch will not make devices work within the guest.
The VT-d extension is required to enable the device to perform DMA.
Another alternative is PVDMA.

Signed-off-by: Amit Shah <amit.shah@qumranet.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86.c         | 243 +++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86/kvm_host.h |  16 +++
 include/linux/kvm.h        |  19 ++++
 3 files changed, 278 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 94a216562f10..a97157cc42ae 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4,10 +4,14 @@
  * derived from drivers/kvm/kvm_main.c
  *
  * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright (C) 2008 Qumranet, Inc.
+ * Copyright IBM Corporation, 2008
  *
  * Authors:
  *   Avi Kivity   <avi@qumranet.com>
  *   Yaniv Kamay  <yaniv@qumranet.com>
+ *   Amit Shah    <amit.shah@qumranet.com>
+ *   Ben-Ami Yassour <benami@il.ibm.com>
  *
  * This work is licensed under the terms of the GNU GPL, version 2.  See
  * the COPYING file in the top-level directory.
@@ -23,8 +27,10 @@
 #include "x86.h"
 
 #include <linux/clocksource.h>
+#include <linux/interrupt.h>
 #include <linux/kvm.h>
 #include <linux/fs.h>
+#include <linux/pci.h>
 #include <linux/vmalloc.h>
 #include <linux/module.h>
 #include <linux/mman.h>
@@ -98,6 +104,219 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ NULL }
 };
 
+struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
+						      int assigned_dev_id)
+{
+	struct list_head *ptr;
+	struct kvm_assigned_dev_kernel *match;
+
+	list_for_each(ptr, head) {
+		match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
+		if (match->assigned_dev_id == assigned_dev_id)
+			return match;
+	}
+	return NULL;
+}
+
+static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
+{
+	struct kvm_assigned_dev_kernel *assigned_dev;
+
+	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
+				    interrupt_work);
+
+	/* This is taken to safely inject irq inside the guest. When
+	 * the interrupt injection (or the ioapic code) uses a
+	 * finer-grained lock, update this
+	 */
+	mutex_lock(&assigned_dev->kvm->lock);
+	kvm_set_irq(assigned_dev->kvm,
+		    assigned_dev->guest_irq, 1);
+	mutex_unlock(&assigned_dev->kvm->lock);
+	kvm_put_kvm(assigned_dev->kvm);
+}
+
+/* FIXME: Implement the OR logic needed to make shared interrupts on
+ * this line behave properly
+ */
+static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
+{
+	struct kvm_assigned_dev_kernel *assigned_dev =
+		(struct kvm_assigned_dev_kernel *) dev_id;
+
+	kvm_get_kvm(assigned_dev->kvm);
+	schedule_work(&assigned_dev->interrupt_work);
+	disable_irq_nosync(irq);
+	return IRQ_HANDLED;
+}
+
+/* Ack the irq line for an assigned device */
+static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
+{
+	struct kvm_assigned_dev_kernel *dev;
+
+	if (kian->gsi == -1)
+		return;
+
+	dev = container_of(kian, struct kvm_assigned_dev_kernel,
+			   ack_notifier);
+	kvm_set_irq(dev->kvm, dev->guest_irq, 0);
+	enable_irq(dev->host_irq);
+}
+
+static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
+				   struct kvm_assigned_irq
+				   *assigned_irq)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *match;
+
+	mutex_lock(&kvm->lock);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_irq->assigned_dev_id);
+	if (!match) {
+		mutex_unlock(&kvm->lock);
+		return -EINVAL;
+	}
+
+	if (match->irq_requested) {
+		match->guest_irq = assigned_irq->guest_irq;
+		match->ack_notifier.gsi = assigned_irq->guest_irq;
+		mutex_unlock(&kvm->lock);
+		return 0;
+	}
+
+	INIT_WORK(&match->interrupt_work,
+		  kvm_assigned_dev_interrupt_work_handler);
+
+	if (irqchip_in_kernel(kvm)) {
+		if (assigned_irq->host_irq)
+			match->host_irq = assigned_irq->host_irq;
+		else
+			match->host_irq = match->dev->irq;
+		match->guest_irq = assigned_irq->guest_irq;
+		match->ack_notifier.gsi = assigned_irq->guest_irq;
+		match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
+		kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
+
+		/* Even though this is PCI, we don't want to use shared
+		 * interrupts. Sharing host devices with guest-assigned devices
+		 * on the same interrupt line is not a happy situation: there
+		 * are going to be long delays in accepting, acking, etc.
+		 */
+		if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
+				"kvm_assigned_device", (void *)match)) {
+			printk(KERN_INFO "%s: couldn't allocate irq for pv "
+			       "device\n", __func__);
+			r = -EIO;
+			goto out;
+		}
+	}
+
+	match->irq_requested = true;
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
+				      struct kvm_assigned_pci_dev *assigned_dev)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *match;
+	struct pci_dev *dev;
+
+	mutex_lock(&kvm->lock);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_dev->assigned_dev_id);
+	if (match) {
+		/* device already assigned */
+		r = -EINVAL;
+		goto out;
+	}
+
+	match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
+	if (match == NULL) {
+		printk(KERN_INFO "%s: Couldn't allocate memory\n",
+		       __func__);
+		r = -ENOMEM;
+		goto out;
+	}
+	dev = pci_get_bus_and_slot(assigned_dev->busnr,
+				   assigned_dev->devfn);
+	if (!dev) {
+		printk(KERN_INFO "%s: host device not found\n", __func__);
+		r = -EINVAL;
+		goto out_free;
+	}
+	if (pci_enable_device(dev)) {
+		printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
+		r = -EBUSY;
+		goto out_put;
+	}
+	r = pci_request_regions(dev, "kvm_assigned_device");
+	if (r) {
+		printk(KERN_INFO "%s: Could not get access to device regions\n",
+		       __func__);
+		goto out_disable;
+	}
+	match->assigned_dev_id = assigned_dev->assigned_dev_id;
+	match->host_busnr = assigned_dev->busnr;
+	match->host_devfn = assigned_dev->devfn;
+	match->dev = dev;
+
+	match->kvm = kvm;
+
+	list_add(&match->list, &kvm->arch.assigned_dev_head);
+
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+out_disable:
+	pci_disable_device(dev);
+out_put:
+	pci_dev_put(dev);
+out_free:
+	kfree(match);
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static void kvm_free_assigned_devices(struct kvm *kvm)
+{
+	struct list_head *ptr, *ptr2;
+	struct kvm_assigned_dev_kernel *assigned_dev;
+
+	list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
+		assigned_dev = list_entry(ptr,
+					  struct kvm_assigned_dev_kernel,
+					  list);
+
+		if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) {
+			free_irq(assigned_dev->host_irq,
+				 (void *)assigned_dev);
+
+			kvm_unregister_irq_ack_notifier(kvm,
+							&assigned_dev->
+							ack_notifier);
+		}
+
+		if (cancel_work_sync(&assigned_dev->interrupt_work))
+			/* We had pending work. That means we will have to take
+			 * care of kvm_put_kvm.
+			 */
+			kvm_put_kvm(kvm);
+
+		pci_release_regions(assigned_dev->dev);
+		pci_disable_device(assigned_dev->dev);
+		pci_dev_put(assigned_dev->dev);
+
+		list_del(&assigned_dev->list);
+		kfree(assigned_dev);
+	}
+}
 
 unsigned long segment_base(u16 selector)
 {
@@ -1766,6 +1985,28 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_ASSIGN_PCI_DEVICE: {
+		struct kvm_assigned_pci_dev assigned_dev;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
+			goto out;
+		r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_ASSIGN_IRQ: {
+		struct kvm_assigned_irq assigned_irq;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
+			goto out;
+		r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
+		if (r)
+			goto out;
+		break;
+	}
 	case KVM_GET_PIT: {
 		struct kvm_pit_state ps;
 		r = -EFAULT;
@@ -3945,6 +4186,7 @@ struct  kvm *kvm_arch_create_vm(void)
 		return ERR_PTR(-ENOMEM);
 
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 
 	return kvm;
 }
@@ -3977,6 +4219,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_free_assigned_devices(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
 	kfree(kvm->arch.vioapic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index d451928fc841..99dddfcecf60 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -327,6 +327,21 @@ struct kvm_irq_ack_notifier {
 	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
 };
 
+struct kvm_assigned_dev_kernel {
+	struct kvm_irq_ack_notifier ack_notifier;
+	struct work_struct interrupt_work;
+	struct list_head list;
+	struct kvm_assigned_pci_dev assigned_dev;
+	int assigned_dev_id;
+	int host_busnr;
+	int host_devfn;
+	int host_irq;
+	int guest_irq;
+	int irq_requested;
+	struct pci_dev *dev;
+	struct kvm *kvm;
+};
+
 struct kvm_arch{
 	int naliases;
 	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
@@ -339,6 +354,7 @@ struct kvm_arch{
 	 * Hash table of struct kvm_mmu_page.
 	 */
 	struct list_head active_mmu_pages;
+	struct list_head assigned_dev_head;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index d29b64881447..ef4bc6f89778 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -383,6 +383,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_MP_STATE 14
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
+#define KVM_CAP_DEVICE_ASSIGNMENT 17
 
 /*
  * ioctls for VM fds
@@ -412,6 +413,10 @@ struct kvm_trace_rec {
 			_IOW(KVMIO,  0x67, struct kvm_coalesced_mmio_zone)
 #define KVM_UNREGISTER_COALESCED_MMIO \
 			_IOW(KVMIO,  0x68, struct kvm_coalesced_mmio_zone)
+#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
+				   struct kvm_assigned_pci_dev)
+#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
+			    struct kvm_assigned_irq)
 
 /*
  * ioctls for vcpu fds
@@ -476,4 +481,18 @@ struct kvm_trace_rec {
 #define KVM_TRC_STLB_INVAL       (KVM_TRC_HANDLER + 0x18)
 #define KVM_TRC_PPC_INSTR        (KVM_TRC_HANDLER + 0x19)
 
+struct kvm_assigned_pci_dev {
+	__u32 assigned_dev_id;
+	__u32 busnr;
+	__u32 devfn;
+	__u32 flags;
+};
+
+struct kvm_assigned_irq {
+	__u32 assigned_dev_id;
+	__u32 host_irq;
+	__u32 guest_irq;
+	__u32 flags;
+};
+
 #endif
-- 
cgit v1.2.3


From d76901750ab9f71091d33ef3d2b5909d8a9a4ad4 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 8 Sep 2008 15:23:48 -0300
Subject: KVM: x86: do not execute halted vcpus

Offline or uninitialized vcpu's can be executed if requested to perform
userspace work.

Follow Avi's suggestion to handle halted vcpu's in the main loop,
simplifying kvm_emulate_halt(). Introduce a new vcpu->requests bit to
indicate events that promote state from halted to running.

Also standardize vcpu wake sites.

Signed-off-by: Marcelo Tosatti <mtosatti <at> redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/i8254.c     |   5 +--
 arch/x86/kvm/lapic.c     |  16 ++------
 arch/x86/kvm/x86.c       | 100 ++++++++++++++++++++++++++---------------------
 include/linux/kvm_host.h |   1 +
 virt/kvm/kvm_main.c      |  10 ++---
 5 files changed, 67 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4cb443026ec4..634132a9a512 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -200,10 +200,9 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
 
 	if (!atomic_inc_and_test(&pt->pending))
 		set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
-	if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
-		vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
+	if (vcpu0 && waitqueue_active(&vcpu0->wq))
 		wake_up_interruptible(&vcpu0->wq);
-	}
 
 	pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
 	pt->scheduled = ktime_to_ns(pt->timer.expires);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index be94f93a73f6..fd00f698692f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -339,13 +339,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		} else
 			apic_clear_vector(vector, apic->regs + APIC_TMR);
 
-		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
-			kvm_vcpu_kick(vcpu);
-		else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
-			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-			if (waitqueue_active(&vcpu->wq))
-				wake_up_interruptible(&vcpu->wq);
-		}
+		kvm_vcpu_kick(vcpu);
 
 		result = (orig_irr == 0);
 		break;
@@ -384,8 +378,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 			vcpu->arch.sipi_vector = vector;
 			vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
-			if (waitqueue_active(&vcpu->wq))
-				wake_up_interruptible(&vcpu->wq);
+			kvm_vcpu_kick(vcpu);
 		}
 		break;
 
@@ -950,10 +943,9 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
 
 	if(!atomic_inc_and_test(&apic->timer.pending))
 		set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
-	if (waitqueue_active(q)) {
-		apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+	if (waitqueue_active(q))
 		wake_up_interruptible(q);
-	}
+
 	if (apic_lvtt_period(apic)) {
 		result = 1;
 		apic->timer.dev.expires = ktime_add_ns(
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3f3cb7107c03..bf98d40b21ec 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2798,11 +2798,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 	KVMTRACE_0D(HLT, vcpu, handler);
 	if (irqchip_in_kernel(vcpu->kvm)) {
 		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
-		up_read(&vcpu->kvm->slots_lock);
-		kvm_vcpu_block(vcpu);
-		down_read(&vcpu->kvm->slots_lock);
-		if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
-			return -EINTR;
 		return 1;
 	} else {
 		vcpu->run->exit_reason = KVM_EXIT_HLT;
@@ -3097,24 +3092,10 @@ static void vapic_exit(struct kvm_vcpu *vcpu)
 	up_read(&vcpu->kvm->slots_lock);
 }
 
-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
 
-	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
-		pr_debug("vcpu %d received sipi with vector # %x\n",
-		       vcpu->vcpu_id, vcpu->arch.sipi_vector);
-		kvm_lapic_reset(vcpu);
-		r = kvm_x86_ops->vcpu_reset(vcpu);
-		if (r)
-			return r;
-		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-	}
-
-	down_read(&vcpu->kvm->slots_lock);
-	vapic_enter(vcpu);
-
-again:
 	if (vcpu->requests)
 		if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
 			kvm_mmu_unload(vcpu);
@@ -3151,22 +3132,13 @@ again:
 
 	local_irq_disable();
 
-	if (vcpu->requests || need_resched()) {
+	if (vcpu->requests || need_resched() || signal_pending(current)) {
 		local_irq_enable();
 		preempt_enable();
 		r = 1;
 		goto out;
 	}
 
-	if (signal_pending(current)) {
-		local_irq_enable();
-		preempt_enable();
-		r = -EINTR;
-		kvm_run->exit_reason = KVM_EXIT_INTR;
-		++vcpu->stat.signal_exits;
-		goto out;
-	}
-
 	if (vcpu->guest_debug.enabled)
 		kvm_x86_ops->guest_debug_pre(vcpu);
 
@@ -3227,26 +3199,63 @@ again:
 	kvm_lapic_sync_from_vapic(vcpu);
 
 	r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
+out:
+	return r;
+}
 
-	if (r > 0) {
-		if (dm_request_for_irq_injection(vcpu, kvm_run)) {
-			r = -EINTR;
-			kvm_run->exit_reason = KVM_EXIT_INTR;
-			++vcpu->stat.request_irq_exits;
-			goto out;
-		}
-		if (!need_resched())
-			goto again;
+static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	int r;
+
+	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
+		printk("vcpu %d received sipi with vector # %x\n",
+		       vcpu->vcpu_id, vcpu->arch.sipi_vector);
+		kvm_lapic_reset(vcpu);
+		r = kvm_x86_ops->vcpu_reset(vcpu);
+		if (r)
+			return r;
+		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 	}
 
-out:
-	up_read(&vcpu->kvm->slots_lock);
-	if (r > 0) {
-		kvm_resched(vcpu);
-		down_read(&vcpu->kvm->slots_lock);
-		goto again;
+	down_read(&vcpu->kvm->slots_lock);
+	vapic_enter(vcpu);
+
+	r = 1;
+	while (r > 0) {
+		if (kvm_arch_vcpu_runnable(vcpu))
+			r = vcpu_enter_guest(vcpu, kvm_run);
+		else {
+			up_read(&vcpu->kvm->slots_lock);
+			kvm_vcpu_block(vcpu);
+			down_read(&vcpu->kvm->slots_lock);
+			if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
+				if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
+					vcpu->arch.mp_state =
+							KVM_MP_STATE_RUNNABLE;
+			if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
+				r = -EINTR;
+		}
+
+		if (r > 0) {
+			if (dm_request_for_irq_injection(vcpu, kvm_run)) {
+				r = -EINTR;
+				kvm_run->exit_reason = KVM_EXIT_INTR;
+				++vcpu->stat.request_irq_exits;
+			}
+			if (signal_pending(current)) {
+				r = -EINTR;
+				kvm_run->exit_reason = KVM_EXIT_INTR;
+				++vcpu->stat.signal_exits;
+			}
+			if (need_resched()) {
+				up_read(&vcpu->kvm->slots_lock);
+				kvm_resched(vcpu);
+				down_read(&vcpu->kvm->slots_lock);
+			}
+		}
 	}
 
+	up_read(&vcpu->kvm->slots_lock);
 	post_kvm_run_save(vcpu, kvm_run);
 
 	vapic_exit(vcpu);
@@ -3266,6 +3275,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
 		kvm_vcpu_block(vcpu);
+		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
 		r = -EAGAIN;
 		goto out;
 	}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a18aaad2ab79..4b036430ea23 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -34,6 +34,7 @@
 #define KVM_REQ_MMU_RELOAD         3
 #define KVM_REQ_TRIPLE_FAULT       4
 #define KVM_REQ_PENDING_TIMER      5
+#define KVM_REQ_UNHALT             6
 
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de3b029f6adf..63e661be040a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -980,12 +980,12 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	for (;;) {
 		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
-		if (kvm_cpu_has_interrupt(vcpu))
-			break;
-		if (kvm_cpu_has_pending_timer(vcpu))
-			break;
-		if (kvm_arch_vcpu_runnable(vcpu))
+		if (kvm_cpu_has_interrupt(vcpu) ||
+		    kvm_cpu_has_pending_timer(vcpu) ||
+		    kvm_arch_vcpu_runnable(vcpu)) {
+			set_bit(KVM_REQ_UNHALT, &vcpu->requests);
 			break;
+		}
 		if (signal_pending(current))
 			break;
 
-- 
cgit v1.2.3


From 7d81a5e03ddbb44d05a32cad4a46a23577216497 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Tue, 14 Oct 2008 17:16:55 +0200
Subject: MIPS: IP22/28: Switch over to RTC class driver

This patchset removes some dead code and creates a platform device
for the RTC class driver.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/mach-ip22/ds1286.h | 18 ---------
 arch/mips/include/asm/mach-ip28/ds1286.h |  4 --
 arch/mips/sgi-ip22/ip22-platform.c       | 15 ++++++++
 arch/mips/sgi-ip22/ip22-setup.c          |  1 -
 arch/mips/sgi-ip22/ip22-time.c           | 64 --------------------------------
 include/linux/ds1286.h                   |  2 -
 6 files changed, 15 insertions(+), 89 deletions(-)
 delete mode 100644 arch/mips/include/asm/mach-ip22/ds1286.h
 delete mode 100644 arch/mips/include/asm/mach-ip28/ds1286.h

(limited to 'include/linux')

diff --git a/arch/mips/include/asm/mach-ip22/ds1286.h b/arch/mips/include/asm/mach-ip22/ds1286.h
deleted file mode 100644
index f19f1eafbc71..000000000000
--- a/arch/mips/include/asm/mach-ip22/ds1286.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 1998, 2001, 03 by Ralf Baechle
- *
- * RTC routines for PC style attached Dallas chip.
- */
-#ifndef __ASM_MACH_IP22_DS1286_H
-#define __ASM_MACH_IP22_DS1286_H
-
-#include <asm/sgi/hpc3.h>
-
-#define rtc_read(reg)		(hpc3c0->rtcregs[(reg)] & 0xff)
-#define rtc_write(data, reg)	do { hpc3c0->rtcregs[(reg)] = (data); } while(0)
-
-#endif /* __ASM_MACH_IP22_DS1286_H */
diff --git a/arch/mips/include/asm/mach-ip28/ds1286.h b/arch/mips/include/asm/mach-ip28/ds1286.h
deleted file mode 100644
index 471bb9a33e0f..000000000000
--- a/arch/mips/include/asm/mach-ip28/ds1286.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __ASM_MACH_IP28_DS1286_H
-#define __ASM_MACH_IP28_DS1286_H
-#include <asm/mach-ip22/ds1286.h>
-#endif /* __ASM_MACH_IP28_DS1286_H */
diff --git a/arch/mips/sgi-ip22/ip22-platform.c b/arch/mips/sgi-ip22/ip22-platform.c
index 52486c4d2b01..deddbf0ebe5c 100644
--- a/arch/mips/sgi-ip22/ip22-platform.c
+++ b/arch/mips/sgi-ip22/ip22-platform.c
@@ -192,3 +192,18 @@ static int __init sgi_button_devinit(void)
 }
 
 device_initcall(sgi_button_devinit);
+
+static int __init sgi_ds1286_devinit(void)
+{
+	struct resource res;
+
+	memset(&res, 0, sizeof(res));
+	res.start = HPC3_CHIP0_BASE + offsetof(struct hpc3_regs, rtcregs);
+	res.end = res.start + sizeof(hpc3c0->rtcregs) - 1;
+	res.flags = IORESOURCE_MEM;
+
+	return IS_ERR(platform_device_register_simple("rtc-ds1286", -1,
+						      &res, 1));
+}
+
+device_initcall(sgi_ds1286_devinit);
diff --git a/arch/mips/sgi-ip22/ip22-setup.c b/arch/mips/sgi-ip22/ip22-setup.c
index 896a1ef84829..b9a931358e23 100644
--- a/arch/mips/sgi-ip22/ip22-setup.c
+++ b/arch/mips/sgi-ip22/ip22-setup.c
@@ -4,7 +4,6 @@
  * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
  * Copyright (C) 1997, 1998 Ralf Baechle (ralf@gnu.org)
  */
-#include <linux/ds1286.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/kdev_t.h>
diff --git a/arch/mips/sgi-ip22/ip22-time.c b/arch/mips/sgi-ip22/ip22-time.c
index 10e505491655..3dcb27ec0c53 100644
--- a/arch/mips/sgi-ip22/ip22-time.c
+++ b/arch/mips/sgi-ip22/ip22-time.c
@@ -10,7 +10,6 @@
  * Copyright (C) 2003, 06 Ralf Baechle (ralf@linux-mips.org)
  */
 #include <linux/bcd.h>
-#include <linux/ds1286.h>
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/kernel.h>
@@ -29,69 +28,6 @@
 #include <asm/sgi/hpc3.h>
 #include <asm/sgi/ip22.h>
 
-/*
- * Note that mktime uses month from 1 to 12 while rtc_time_to_tm
- * uses 0 to 11.
- */
-unsigned long read_persistent_clock(void)
-{
-	unsigned int yrs, mon, day, hrs, min, sec;
-	unsigned int save_control;
-	unsigned long flags;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	save_control = hpc3c0->rtcregs[RTC_CMD] & 0xff;
-	hpc3c0->rtcregs[RTC_CMD] = save_control | RTC_TE;
-
-	sec = BCD2BIN(hpc3c0->rtcregs[RTC_SECONDS] & 0xff);
-	min = BCD2BIN(hpc3c0->rtcregs[RTC_MINUTES] & 0xff);
-	hrs = BCD2BIN(hpc3c0->rtcregs[RTC_HOURS] & 0x3f);
-	day = BCD2BIN(hpc3c0->rtcregs[RTC_DATE] & 0xff);
-	mon = BCD2BIN(hpc3c0->rtcregs[RTC_MONTH] & 0x1f);
-	yrs = BCD2BIN(hpc3c0->rtcregs[RTC_YEAR] & 0xff);
-
-	hpc3c0->rtcregs[RTC_CMD] = save_control;
-	spin_unlock_irqrestore(&rtc_lock, flags);
-
-	if (yrs < 45)
-		yrs += 30;
-	if ((yrs += 40) < 70)
-		yrs += 100;
-
-	return mktime(yrs + 1900, mon, day, hrs, min, sec);
-}
-
-int rtc_mips_set_time(unsigned long tim)
-{
-	struct rtc_time tm;
-	unsigned int save_control;
-	unsigned long flags;
-
-	rtc_time_to_tm(tim, &tm);
-
-	tm.tm_mon += 1;		/* tm_mon starts at zero */
-	tm.tm_year -= 40;
-	if (tm.tm_year >= 100)
-		tm.tm_year -= 100;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	save_control = hpc3c0->rtcregs[RTC_CMD] & 0xff;
-	hpc3c0->rtcregs[RTC_CMD] = save_control | RTC_TE;
-
-	hpc3c0->rtcregs[RTC_YEAR] = BIN2BCD(tm.tm_year);
-	hpc3c0->rtcregs[RTC_MONTH] = BIN2BCD(tm.tm_mon);
-	hpc3c0->rtcregs[RTC_DATE] = BIN2BCD(tm.tm_mday);
-	hpc3c0->rtcregs[RTC_HOURS] = BIN2BCD(tm.tm_hour);
-	hpc3c0->rtcregs[RTC_MINUTES] = BIN2BCD(tm.tm_min);
-	hpc3c0->rtcregs[RTC_SECONDS] = BIN2BCD(tm.tm_sec);
-	hpc3c0->rtcregs[RTC_HUNDREDTH_SECOND] = 0;
-
-	hpc3c0->rtcregs[RTC_CMD] = save_control;
-	spin_unlock_irqrestore(&rtc_lock, flags);
-
-	return 0;
-}
-
 static unsigned long dosample(void)
 {
 	u32 ct0, ct1;
diff --git a/include/linux/ds1286.h b/include/linux/ds1286.h
index d8989860e4ce..45ea0aa0aeb9 100644
--- a/include/linux/ds1286.h
+++ b/include/linux/ds1286.h
@@ -8,8 +8,6 @@
 #ifndef __LINUX_DS1286_H
 #define __LINUX_DS1286_H
 
-#include <asm/ds1286.h>
-
 /**********************************************************************
  * register summary
  **********************************************************************/
-- 
cgit v1.2.3


From 387179464257921eb9aa3d15cc3ff194f6945a7c Mon Sep 17 00:00:00 2001
From: "Kay, Allen M" <allen.m.kay@intel.com>
Date: Tue, 9 Sep 2008 18:37:29 +0300
Subject: VT-d: Changes to support KVM

This patch extends the VT-d driver to support KVM

[Ben: fixed memory pinning]
[avi: move dma_remapping.h as well]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>
Acked-by: Mark Gross <mgross@linux.intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 drivers/pci/dma_remapping.h   | 157 --------------------
 drivers/pci/dmar.c            |   4 +-
 drivers/pci/intel-iommu.c     | 116 ++++++++++++++-
 drivers/pci/intel-iommu.h     | 307 ---------------------------------------
 drivers/pci/intr_remapping.c  |   2 +-
 drivers/pci/intr_remapping.h  |   2 +-
 drivers/pci/iova.c            |   2 +-
 drivers/pci/iova.h            |  52 -------
 include/linux/dma_remapping.h | 157 ++++++++++++++++++++
 include/linux/intel-iommu.h   | 327 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/iova.h          |  52 +++++++
 11 files changed, 653 insertions(+), 525 deletions(-)
 delete mode 100644 drivers/pci/dma_remapping.h
 delete mode 100644 drivers/pci/intel-iommu.h
 delete mode 100644 drivers/pci/iova.h
 create mode 100644 include/linux/dma_remapping.h
 create mode 100644 include/linux/intel-iommu.h
 create mode 100644 include/linux/iova.h

(limited to 'include/linux')

diff --git a/drivers/pci/dma_remapping.h b/drivers/pci/dma_remapping.h
deleted file mode 100644
index bff5c65f81dc..000000000000
--- a/drivers/pci/dma_remapping.h
+++ /dev/null
@@ -1,157 +0,0 @@
-#ifndef _DMA_REMAPPING_H
-#define _DMA_REMAPPING_H
-
-/*
- * We need a fixed PAGE_SIZE of 4K irrespective of
- * arch PAGE_SIZE for IOMMU page tables.
- */
-#define PAGE_SHIFT_4K		(12)
-#define PAGE_SIZE_4K		(1UL << PAGE_SHIFT_4K)
-#define PAGE_MASK_4K		(((u64)-1) << PAGE_SHIFT_4K)
-#define PAGE_ALIGN_4K(addr)	(((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
-
-#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT_4K)
-#define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
-#define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
-
-
-/*
- * 0: Present
- * 1-11: Reserved
- * 12-63: Context Ptr (12 - (haw-1))
- * 64-127: Reserved
- */
-struct root_entry {
-	u64	val;
-	u64	rsvd1;
-};
-#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
-static inline bool root_present(struct root_entry *root)
-{
-	return (root->val & 1);
-}
-static inline void set_root_present(struct root_entry *root)
-{
-	root->val |= 1;
-}
-static inline void set_root_value(struct root_entry *root, unsigned long value)
-{
-	root->val |= value & PAGE_MASK_4K;
-}
-
-struct context_entry;
-static inline struct context_entry *
-get_context_addr_from_root(struct root_entry *root)
-{
-	return (struct context_entry *)
-		(root_present(root)?phys_to_virt(
-		root->val & PAGE_MASK_4K):
-		NULL);
-}
-
-/*
- * low 64 bits:
- * 0: present
- * 1: fault processing disable
- * 2-3: translation type
- * 12-63: address space root
- * high 64 bits:
- * 0-2: address width
- * 3-6: aval
- * 8-23: domain id
- */
-struct context_entry {
-	u64 lo;
-	u64 hi;
-};
-#define context_present(c) ((c).lo & 1)
-#define context_fault_disable(c) (((c).lo >> 1) & 1)
-#define context_translation_type(c) (((c).lo >> 2) & 3)
-#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
-#define context_address_width(c) ((c).hi &  7)
-#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
-
-#define context_set_present(c) do {(c).lo |= 1;} while (0)
-#define context_set_fault_enable(c) \
-	do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
-#define context_set_translation_type(c, val) \
-	do { \
-		(c).lo &= (((u64)-1) << 4) | 3; \
-		(c).lo |= ((val) & 3) << 2; \
-	} while (0)
-#define CONTEXT_TT_MULTI_LEVEL 0
-#define context_set_address_root(c, val) \
-	do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
-#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
-#define context_set_domain_id(c, val) \
-	do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
-#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
-
-/*
- * 0: readable
- * 1: writable
- * 2-6: reserved
- * 7: super page
- * 8-11: available
- * 12-63: Host physcial address
- */
-struct dma_pte {
-	u64 val;
-};
-#define dma_clear_pte(p)	do {(p).val = 0;} while (0)
-
-#define DMA_PTE_READ (1)
-#define DMA_PTE_WRITE (2)
-
-#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
-#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
-#define dma_set_pte_prot(p, prot) \
-		do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
-#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
-#define dma_set_pte_addr(p, addr) do {\
-		(p).val |= ((addr) & PAGE_MASK_4K); } while (0)
-#define dma_pte_present(p) (((p).val & 3) != 0)
-
-struct intel_iommu;
-
-struct dmar_domain {
-	int	id;			/* domain id */
-	struct intel_iommu *iommu;	/* back pointer to owning iommu */
-
-	struct list_head devices; 	/* all devices' list */
-	struct iova_domain iovad;	/* iova's that belong to this domain */
-
-	struct dma_pte	*pgd;		/* virtual address */
-	spinlock_t	mapping_lock;	/* page table lock */
-	int		gaw;		/* max guest address width */
-
-	/* adjusted guest address width, 0 is level 2 30-bit */
-	int		agaw;
-
-#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
-	int		flags;
-};
-
-/* PCI domain-device relationship */
-struct device_domain_info {
-	struct list_head link;	/* link to domain siblings */
-	struct list_head global; /* link to global list */
-	u8 bus;			/* PCI bus numer */
-	u8 devfn;		/* PCI devfn number */
-	struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
-	struct dmar_domain *domain; /* pointer to domain */
-};
-
-extern int init_dmars(void);
-extern void free_dmar_iommu(struct intel_iommu *iommu);
-
-extern int dmar_disabled;
-
-#ifndef CONFIG_DMAR_GFX_WA
-static inline void iommu_prepare_gfx_mapping(void)
-{
-	return;
-}
-#endif /* !CONFIG_DMAR_GFX_WA */
-
-#endif
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index bd2c01674f5e..e842e756308a 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -28,9 +28,9 @@
 
 #include <linux/pci.h>
 #include <linux/dmar.h>
+#include <linux/iova.h>
+#include <linux/intel-iommu.h>
 #include <linux/timer.h>
-#include "iova.h"
-#include "intel-iommu.h"
 
 #undef PREFIX
 #define PREFIX "DMAR:"
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 389fdd6f4a9f..fc5f2dbf5323 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -33,8 +33,8 @@
 #include <linux/dma-mapping.h>
 #include <linux/mempool.h>
 #include <linux/timer.h>
-#include "iova.h"
-#include "intel-iommu.h"
+#include <linux/iova.h>
+#include <linux/intel-iommu.h>
 #include <asm/proto.h> /* force_iommu in this header in x86-64*/
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
@@ -156,7 +156,7 @@ static inline void *alloc_domain_mem(void)
 	return iommu_kmem_cache_alloc(iommu_domain_cache);
 }
 
-static inline void free_domain_mem(void *vaddr)
+static void free_domain_mem(void *vaddr)
 {
 	kmem_cache_free(iommu_domain_cache, vaddr);
 }
@@ -1341,7 +1341,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain)
  * find_domain
  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
  */
-struct dmar_domain *
+static struct dmar_domain *
 find_domain(struct pci_dev *pdev)
 {
 	struct device_domain_info *info;
@@ -2318,3 +2318,111 @@ int __init intel_iommu_init(void)
 	return 0;
 }
 
+void intel_iommu_domain_exit(struct dmar_domain *domain)
+{
+	u64 end;
+
+	/* Domain 0 is reserved, so dont process it */
+	if (!domain)
+		return;
+
+	end = DOMAIN_MAX_ADDR(domain->gaw);
+	end = end & (~PAGE_MASK_4K);
+
+	/* clear ptes */
+	dma_pte_clear_range(domain, 0, end);
+
+	/* free page tables */
+	dma_pte_free_pagetable(domain, 0, end);
+
+	iommu_free_domain(domain);
+	free_domain_mem(domain);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
+
+struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
+{
+	struct dmar_drhd_unit *drhd;
+	struct dmar_domain *domain;
+	struct intel_iommu *iommu;
+
+	drhd = dmar_find_matched_drhd_unit(pdev);
+	if (!drhd) {
+		printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
+		return NULL;
+	}
+
+	iommu = drhd->iommu;
+	if (!iommu) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: iommu == NULL\n");
+		return NULL;
+	}
+	domain = iommu_alloc_domain(iommu);
+	if (!domain) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: domain == NULL\n");
+		return NULL;
+	}
+	if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+		printk(KERN_ERR
+			"intel_iommu_domain_alloc: domain_init() failed\n");
+		intel_iommu_domain_exit(domain);
+		return NULL;
+	}
+	return domain;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
+
+int intel_iommu_context_mapping(
+	struct dmar_domain *domain, struct pci_dev *pdev)
+{
+	int rc;
+	rc = domain_context_mapping(domain, pdev);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
+
+int intel_iommu_page_mapping(
+	struct dmar_domain *domain, dma_addr_t iova,
+	u64 hpa, size_t size, int prot)
+{
+	int rc;
+	rc = domain_page_mapping(domain, iova, hpa, size, prot);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
+
+void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+{
+	detach_domain_for_dev(domain, bus, devfn);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
+
+struct dmar_domain *
+intel_iommu_find_domain(struct pci_dev *pdev)
+{
+	return find_domain(pdev);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
+
+int intel_iommu_found(void)
+{
+	return g_num_of_iommus;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_found);
+
+u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
+{
+	struct dma_pte *pte;
+	u64 pfn;
+
+	pfn = 0;
+	pte = addr_to_dma_pte(domain, iova);
+
+	if (pte)
+		pfn = dma_pte_addr(*pte);
+
+	return pfn >> PAGE_SHIFT_4K;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h
deleted file mode 100644
index 2142c01e0143..000000000000
--- a/drivers/pci/intel-iommu.h
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * Copyright (c) 2006, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Author: Ashok Raj <ashok.raj@intel.com>
- * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
- */
-
-#ifndef _INTEL_IOMMU_H_
-#define _INTEL_IOMMU_H_
-
-#include <linux/types.h>
-#include <linux/msi.h>
-#include <linux/sysdev.h>
-#include "iova.h"
-#include <linux/io.h>
-#include <asm/cacheflush.h>
-#include "dma_remapping.h"
-
-/*
- * Intel IOMMU register specification per version 1.0 public spec.
- */
-
-#define	DMAR_VER_REG	0x0	/* Arch version supported by this IOMMU */
-#define	DMAR_CAP_REG	0x8	/* Hardware supported capabilities */
-#define	DMAR_ECAP_REG	0x10	/* Extended capabilities supported */
-#define	DMAR_GCMD_REG	0x18	/* Global command register */
-#define	DMAR_GSTS_REG	0x1c	/* Global status register */
-#define	DMAR_RTADDR_REG	0x20	/* Root entry table */
-#define	DMAR_CCMD_REG	0x28	/* Context command reg */
-#define	DMAR_FSTS_REG	0x34	/* Fault Status register */
-#define	DMAR_FECTL_REG	0x38	/* Fault control register */
-#define	DMAR_FEDATA_REG	0x3c	/* Fault event interrupt data register */
-#define	DMAR_FEADDR_REG	0x40	/* Fault event interrupt addr register */
-#define	DMAR_FEUADDR_REG 0x44	/* Upper address register */
-#define	DMAR_AFLOG_REG	0x58	/* Advanced Fault control */
-#define	DMAR_PMEN_REG	0x64	/* Enable Protected Memory Region */
-#define	DMAR_PLMBASE_REG 0x68	/* PMRR Low addr */
-#define	DMAR_PLMLIMIT_REG 0x6c	/* PMRR low limit */
-#define	DMAR_PHMBASE_REG 0x70	/* pmrr high base addr */
-#define	DMAR_PHMLIMIT_REG 0x78	/* pmrr high limit */
-#define DMAR_IQH_REG	0x80	/* Invalidation queue head register */
-#define DMAR_IQT_REG	0x88	/* Invalidation queue tail register */
-#define DMAR_IQA_REG	0x90	/* Invalidation queue addr register */
-#define DMAR_ICS_REG	0x98	/* Invalidation complete status register */
-#define DMAR_IRTA_REG	0xb8    /* Interrupt remapping table addr register */
-
-#define OFFSET_STRIDE		(9)
-/*
-#define dmar_readl(dmar, reg) readl(dmar + reg)
-#define dmar_readq(dmar, reg) ({ \
-		u32 lo, hi; \
-		lo = readl(dmar + reg); \
-		hi = readl(dmar + reg + 4); \
-		(((u64) hi) << 32) + lo; })
-*/
-static inline u64 dmar_readq(void __iomem *addr)
-{
-	u32 lo, hi;
-	lo = readl(addr);
-	hi = readl(addr + 4);
-	return (((u64) hi) << 32) + lo;
-}
-
-static inline void dmar_writeq(void __iomem *addr, u64 val)
-{
-	writel((u32)val, addr);
-	writel((u32)(val >> 32), addr + 4);
-}
-
-#define DMAR_VER_MAJOR(v)		(((v) & 0xf0) >> 4)
-#define DMAR_VER_MINOR(v)		((v) & 0x0f)
-
-/*
- * Decoding Capability Register
- */
-#define cap_read_drain(c)	(((c) >> 55) & 1)
-#define cap_write_drain(c)	(((c) >> 54) & 1)
-#define cap_max_amask_val(c)	(((c) >> 48) & 0x3f)
-#define cap_num_fault_regs(c)	((((c) >> 40) & 0xff) + 1)
-#define cap_pgsel_inv(c)	(((c) >> 39) & 1)
-
-#define cap_super_page_val(c)	(((c) >> 34) & 0xf)
-#define cap_super_offset(c)	(((find_first_bit(&cap_super_page_val(c), 4)) \
-					* OFFSET_STRIDE) + 21)
-
-#define cap_fault_reg_offset(c)	((((c) >> 24) & 0x3ff) * 16)
-#define cap_max_fault_reg_offset(c) \
-	(cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16)
-
-#define cap_zlr(c)		(((c) >> 22) & 1)
-#define cap_isoch(c)		(((c) >> 23) & 1)
-#define cap_mgaw(c)		((((c) >> 16) & 0x3f) + 1)
-#define cap_sagaw(c)		(((c) >> 8) & 0x1f)
-#define cap_caching_mode(c)	(((c) >> 7) & 1)
-#define cap_phmr(c)		(((c) >> 6) & 1)
-#define cap_plmr(c)		(((c) >> 5) & 1)
-#define cap_rwbf(c)		(((c) >> 4) & 1)
-#define cap_afl(c)		(((c) >> 3) & 1)
-#define cap_ndoms(c)		(((unsigned long)1) << (4 + 2 * ((c) & 0x7)))
-/*
- * Extended Capability Register
- */
-
-#define ecap_niotlb_iunits(e)	((((e) >> 24) & 0xff) + 1)
-#define ecap_iotlb_offset(e) 	((((e) >> 8) & 0x3ff) * 16)
-#define ecap_max_iotlb_offset(e) \
-	(ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
-#define ecap_coherent(e)	((e) & 0x1)
-#define ecap_qis(e)		((e) & 0x2)
-#define ecap_eim_support(e)	((e >> 4) & 0x1)
-#define ecap_ir_support(e)	((e >> 3) & 0x1)
-#define ecap_max_handle_mask(e) ((e >> 20) & 0xf)
-
-
-/* IOTLB_REG */
-#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
-#define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
-#define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
-#define DMA_TLB_IIRG(type) ((type >> 60) & 7)
-#define DMA_TLB_IAIG(val) (((val) >> 57) & 7)
-#define DMA_TLB_READ_DRAIN (((u64)1) << 49)
-#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
-#define DMA_TLB_DID(id)	(((u64)((id) & 0xffff)) << 32)
-#define DMA_TLB_IVT (((u64)1) << 63)
-#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
-#define DMA_TLB_MAX_SIZE (0x3f)
-
-/* INVALID_DESC */
-#define DMA_ID_TLB_GLOBAL_FLUSH	(((u64)1) << 3)
-#define DMA_ID_TLB_DSI_FLUSH	(((u64)2) << 3)
-#define DMA_ID_TLB_PSI_FLUSH	(((u64)3) << 3)
-#define DMA_ID_TLB_READ_DRAIN	(((u64)1) << 7)
-#define DMA_ID_TLB_WRITE_DRAIN	(((u64)1) << 6)
-#define DMA_ID_TLB_DID(id)	(((u64)((id & 0xffff) << 16)))
-#define DMA_ID_TLB_IH_NONLEAF	(((u64)1) << 6)
-#define DMA_ID_TLB_ADDR(addr)	(addr)
-#define DMA_ID_TLB_ADDR_MASK(mask)	(mask)
-
-/* PMEN_REG */
-#define DMA_PMEN_EPM (((u32)1)<<31)
-#define DMA_PMEN_PRS (((u32)1)<<0)
-
-/* GCMD_REG */
-#define DMA_GCMD_TE (((u32)1) << 31)
-#define DMA_GCMD_SRTP (((u32)1) << 30)
-#define DMA_GCMD_SFL (((u32)1) << 29)
-#define DMA_GCMD_EAFL (((u32)1) << 28)
-#define DMA_GCMD_WBF (((u32)1) << 27)
-#define DMA_GCMD_QIE (((u32)1) << 26)
-#define DMA_GCMD_SIRTP (((u32)1) << 24)
-#define DMA_GCMD_IRE (((u32) 1) << 25)
-
-/* GSTS_REG */
-#define DMA_GSTS_TES (((u32)1) << 31)
-#define DMA_GSTS_RTPS (((u32)1) << 30)
-#define DMA_GSTS_FLS (((u32)1) << 29)
-#define DMA_GSTS_AFLS (((u32)1) << 28)
-#define DMA_GSTS_WBFS (((u32)1) << 27)
-#define DMA_GSTS_QIES (((u32)1) << 26)
-#define DMA_GSTS_IRTPS (((u32)1) << 24)
-#define DMA_GSTS_IRES (((u32)1) << 25)
-
-/* CCMD_REG */
-#define DMA_CCMD_ICC (((u64)1) << 63)
-#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61)
-#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61)
-#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61)
-#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32)
-#define DMA_CCMD_MASK_NOBIT 0
-#define DMA_CCMD_MASK_1BIT 1
-#define DMA_CCMD_MASK_2BIT 2
-#define DMA_CCMD_MASK_3BIT 3
-#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16)
-#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff))
-
-/* FECTL_REG */
-#define DMA_FECTL_IM (((u32)1) << 31)
-
-/* FSTS_REG */
-#define DMA_FSTS_PPF ((u32)2)
-#define DMA_FSTS_PFO ((u32)1)
-#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
-
-/* FRCD_REG, 32 bits access */
-#define DMA_FRCD_F (((u32)1) << 31)
-#define dma_frcd_type(d) ((d >> 30) & 1)
-#define dma_frcd_fault_reason(c) (c & 0xff)
-#define dma_frcd_source_id(c) (c & 0xffff)
-#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
-
-#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
-
-#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
-{\
-	cycles_t start_time = get_cycles();\
-	while (1) {\
-		sts = op (iommu->reg + offset);\
-		if (cond)\
-			break;\
-		if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
-			panic("DMAR hardware is malfunctioning\n");\
-		cpu_relax();\
-	}\
-}
-
-#define QI_LENGTH	256	/* queue length */
-
-enum {
-	QI_FREE,
-	QI_IN_USE,
-	QI_DONE
-};
-
-#define QI_CC_TYPE		0x1
-#define QI_IOTLB_TYPE		0x2
-#define QI_DIOTLB_TYPE		0x3
-#define QI_IEC_TYPE		0x4
-#define QI_IWD_TYPE		0x5
-
-#define QI_IEC_SELECTIVE	(((u64)1) << 4)
-#define QI_IEC_IIDEX(idx)	(((u64)(idx & 0xffff) << 32))
-#define QI_IEC_IM(m)		(((u64)(m & 0x1f) << 27))
-
-#define QI_IWD_STATUS_DATA(d)	(((u64)d) << 32)
-#define QI_IWD_STATUS_WRITE	(((u64)1) << 5)
-
-struct qi_desc {
-	u64 low, high;
-};
-
-struct q_inval {
-	spinlock_t      q_lock;
-	struct qi_desc  *desc;          /* invalidation queue */
-	int             *desc_status;   /* desc status */
-	int             free_head;      /* first free entry */
-	int             free_tail;      /* last free entry */
-	int             free_cnt;
-};
-
-#ifdef CONFIG_INTR_REMAP
-/* 1MB - maximum possible interrupt remapping table size */
-#define INTR_REMAP_PAGE_ORDER	8
-#define INTR_REMAP_TABLE_REG_SIZE	0xf
-
-#define INTR_REMAP_TABLE_ENTRIES	65536
-
-struct ir_table {
-	struct irte *base;
-};
-#endif
-
-struct intel_iommu {
-	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
-	u64		cap;
-	u64		ecap;
-	int		seg;
-	u32		gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
-	spinlock_t	register_lock; /* protect register handling */
-	int		seq_id;	/* sequence id of the iommu */
-
-#ifdef CONFIG_DMAR
-	unsigned long 	*domain_ids; /* bitmap of domains */
-	struct dmar_domain **domains; /* ptr to domains */
-	spinlock_t	lock; /* protect context, domain ids */
-	struct root_entry *root_entry; /* virtual address */
-
-	unsigned int irq;
-	unsigned char name[7];    /* Device Name */
-	struct msi_msg saved_msg;
-	struct sys_device sysdev;
-#endif
-	struct q_inval  *qi;            /* Queued invalidation info */
-#ifdef CONFIG_INTR_REMAP
-	struct ir_table *ir_table;	/* Interrupt remapping info */
-#endif
-};
-
-static inline void __iommu_flush_cache(
-	struct intel_iommu *iommu, void *addr, int size)
-{
-	if (!ecap_coherent(iommu->ecap))
-		clflush_cache_range(addr, size);
-}
-
-extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
-
-extern int alloc_iommu(struct dmar_drhd_unit *drhd);
-extern void free_iommu(struct intel_iommu *iommu);
-extern int dmar_enable_qi(struct intel_iommu *iommu);
-extern void qi_global_iec(struct intel_iommu *iommu);
-
-extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
-#endif
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index bb642cc5e18c..738d4c89581c 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -4,7 +4,7 @@
 #include <linux/pci.h>
 #include <linux/irq.h>
 #include <asm/io_apic.h>
-#include "intel-iommu.h"
+#include <linux/intel-iommu.h>
 #include "intr_remapping.h"
 
 static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h
index 05f2635bbe4e..ca48f0df8ac9 100644
--- a/drivers/pci/intr_remapping.h
+++ b/drivers/pci/intr_remapping.h
@@ -1,4 +1,4 @@
-#include "intel-iommu.h"
+#include <linux/intel-iommu.h>
 
 struct ioapic_scope {
 	struct intel_iommu *iommu;
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
index 3ef4ac064315..2287116e9822 100644
--- a/drivers/pci/iova.c
+++ b/drivers/pci/iova.c
@@ -7,7 +7,7 @@
  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  */
 
-#include "iova.h"
+#include <linux/iova.h>
 
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit)
diff --git a/drivers/pci/iova.h b/drivers/pci/iova.h
deleted file mode 100644
index 228f6c94b69c..000000000000
--- a/drivers/pci/iova.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2006, Intel Corporation.
- *
- * This file is released under the GPLv2.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
- *
- */
-
-#ifndef _IOVA_H_
-#define _IOVA_H_
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/rbtree.h>
-#include <linux/dma-mapping.h>
-
-/* IO virtual address start page frame number */
-#define IOVA_START_PFN		(1)
-
-/* iova structure */
-struct iova {
-	struct rb_node	node;
-	unsigned long	pfn_hi; /* IOMMU dish out addr hi */
-	unsigned long	pfn_lo; /* IOMMU dish out addr lo */
-};
-
-/* holds all the iova translations for a domain */
-struct iova_domain {
-	spinlock_t	iova_alloc_lock;/* Lock to protect iova  allocation */
-	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
-	struct rb_root	rbroot;		/* iova domain rbtree root */
-	struct rb_node	*cached32_node; /* Save last alloced node */
-	unsigned long	dma_32bit_pfn;
-};
-
-struct iova *alloc_iova_mem(void);
-void free_iova_mem(struct iova *iova);
-void free_iova(struct iova_domain *iovad, unsigned long pfn);
-void __free_iova(struct iova_domain *iovad, struct iova *iova);
-struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
-	unsigned long limit_pfn,
-	bool size_aligned);
-struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
-	unsigned long pfn_hi);
-void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
-void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit);
-struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
-void put_iova_domain(struct iova_domain *iovad);
-
-#endif
diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h
new file mode 100644
index 000000000000..bff5c65f81dc
--- /dev/null
+++ b/include/linux/dma_remapping.h
@@ -0,0 +1,157 @@
+#ifndef _DMA_REMAPPING_H
+#define _DMA_REMAPPING_H
+
+/*
+ * We need a fixed PAGE_SIZE of 4K irrespective of
+ * arch PAGE_SIZE for IOMMU page tables.
+ */
+#define PAGE_SHIFT_4K		(12)
+#define PAGE_SIZE_4K		(1UL << PAGE_SHIFT_4K)
+#define PAGE_MASK_4K		(((u64)-1) << PAGE_SHIFT_4K)
+#define PAGE_ALIGN_4K(addr)	(((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
+
+#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT_4K)
+#define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
+#define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
+
+
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+	u64	val;
+	u64	rsvd1;
+};
+#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
+static inline bool root_present(struct root_entry *root)
+{
+	return (root->val & 1);
+}
+static inline void set_root_present(struct root_entry *root)
+{
+	root->val |= 1;
+}
+static inline void set_root_value(struct root_entry *root, unsigned long value)
+{
+	root->val |= value & PAGE_MASK_4K;
+}
+
+struct context_entry;
+static inline struct context_entry *
+get_context_addr_from_root(struct root_entry *root)
+{
+	return (struct context_entry *)
+		(root_present(root)?phys_to_virt(
+		root->val & PAGE_MASK_4K):
+		NULL);
+}
+
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+	u64 lo;
+	u64 hi;
+};
+#define context_present(c) ((c).lo & 1)
+#define context_fault_disable(c) (((c).lo >> 1) & 1)
+#define context_translation_type(c) (((c).lo >> 2) & 3)
+#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
+#define context_address_width(c) ((c).hi &  7)
+#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
+
+#define context_set_present(c) do {(c).lo |= 1;} while (0)
+#define context_set_fault_enable(c) \
+	do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
+#define context_set_translation_type(c, val) \
+	do { \
+		(c).lo &= (((u64)-1) << 4) | 3; \
+		(c).lo |= ((val) & 3) << 2; \
+	} while (0)
+#define CONTEXT_TT_MULTI_LEVEL 0
+#define context_set_address_root(c, val) \
+	do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
+#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
+#define context_set_domain_id(c, val) \
+	do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
+#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
+
+/*
+ * 0: readable
+ * 1: writable
+ * 2-6: reserved
+ * 7: super page
+ * 8-11: available
+ * 12-63: Host physcial address
+ */
+struct dma_pte {
+	u64 val;
+};
+#define dma_clear_pte(p)	do {(p).val = 0;} while (0)
+
+#define DMA_PTE_READ (1)
+#define DMA_PTE_WRITE (2)
+
+#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
+#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
+#define dma_set_pte_prot(p, prot) \
+		do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
+#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
+#define dma_set_pte_addr(p, addr) do {\
+		(p).val |= ((addr) & PAGE_MASK_4K); } while (0)
+#define dma_pte_present(p) (((p).val & 3) != 0)
+
+struct intel_iommu;
+
+struct dmar_domain {
+	int	id;			/* domain id */
+	struct intel_iommu *iommu;	/* back pointer to owning iommu */
+
+	struct list_head devices; 	/* all devices' list */
+	struct iova_domain iovad;	/* iova's that belong to this domain */
+
+	struct dma_pte	*pgd;		/* virtual address */
+	spinlock_t	mapping_lock;	/* page table lock */
+	int		gaw;		/* max guest address width */
+
+	/* adjusted guest address width, 0 is level 2 30-bit */
+	int		agaw;
+
+#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
+	int		flags;
+};
+
+/* PCI domain-device relationship */
+struct device_domain_info {
+	struct list_head link;	/* link to domain siblings */
+	struct list_head global; /* link to global list */
+	u8 bus;			/* PCI bus numer */
+	u8 devfn;		/* PCI devfn number */
+	struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
+	struct dmar_domain *domain; /* pointer to domain */
+};
+
+extern int init_dmars(void);
+extern void free_dmar_iommu(struct intel_iommu *iommu);
+
+extern int dmar_disabled;
+
+#ifndef CONFIG_DMAR_GFX_WA
+static inline void iommu_prepare_gfx_mapping(void)
+{
+	return;
+}
+#endif /* !CONFIG_DMAR_GFX_WA */
+
+#endif
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
new file mode 100644
index 000000000000..2e117f30a76c
--- /dev/null
+++ b/include/linux/intel-iommu.h
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Author: Ashok Raj <ashok.raj@intel.com>
+ * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ */
+
+#ifndef _INTEL_IOMMU_H_
+#define _INTEL_IOMMU_H_
+
+#include <linux/types.h>
+#include <linux/msi.h>
+#include <linux/sysdev.h>
+#include <linux/iova.h>
+#include <linux/io.h>
+#include <linux/dma_remapping.h>
+#include <asm/cacheflush.h>
+
+/*
+ * Intel IOMMU register specification per version 1.0 public spec.
+ */
+
+#define	DMAR_VER_REG	0x0	/* Arch version supported by this IOMMU */
+#define	DMAR_CAP_REG	0x8	/* Hardware supported capabilities */
+#define	DMAR_ECAP_REG	0x10	/* Extended capabilities supported */
+#define	DMAR_GCMD_REG	0x18	/* Global command register */
+#define	DMAR_GSTS_REG	0x1c	/* Global status register */
+#define	DMAR_RTADDR_REG	0x20	/* Root entry table */
+#define	DMAR_CCMD_REG	0x28	/* Context command reg */
+#define	DMAR_FSTS_REG	0x34	/* Fault Status register */
+#define	DMAR_FECTL_REG	0x38	/* Fault control register */
+#define	DMAR_FEDATA_REG	0x3c	/* Fault event interrupt data register */
+#define	DMAR_FEADDR_REG	0x40	/* Fault event interrupt addr register */
+#define	DMAR_FEUADDR_REG 0x44	/* Upper address register */
+#define	DMAR_AFLOG_REG	0x58	/* Advanced Fault control */
+#define	DMAR_PMEN_REG	0x64	/* Enable Protected Memory Region */
+#define	DMAR_PLMBASE_REG 0x68	/* PMRR Low addr */
+#define	DMAR_PLMLIMIT_REG 0x6c	/* PMRR low limit */
+#define	DMAR_PHMBASE_REG 0x70	/* pmrr high base addr */
+#define	DMAR_PHMLIMIT_REG 0x78	/* pmrr high limit */
+#define DMAR_IQH_REG	0x80	/* Invalidation queue head register */
+#define DMAR_IQT_REG	0x88	/* Invalidation queue tail register */
+#define DMAR_IQA_REG	0x90	/* Invalidation queue addr register */
+#define DMAR_ICS_REG	0x98	/* Invalidation complete status register */
+#define DMAR_IRTA_REG	0xb8    /* Interrupt remapping table addr register */
+
+#define OFFSET_STRIDE		(9)
+/*
+#define dmar_readl(dmar, reg) readl(dmar + reg)
+#define dmar_readq(dmar, reg) ({ \
+		u32 lo, hi; \
+		lo = readl(dmar + reg); \
+		hi = readl(dmar + reg + 4); \
+		(((u64) hi) << 32) + lo; })
+*/
+static inline u64 dmar_readq(void __iomem *addr)
+{
+	u32 lo, hi;
+	lo = readl(addr);
+	hi = readl(addr + 4);
+	return (((u64) hi) << 32) + lo;
+}
+
+static inline void dmar_writeq(void __iomem *addr, u64 val)
+{
+	writel((u32)val, addr);
+	writel((u32)(val >> 32), addr + 4);
+}
+
+#define DMAR_VER_MAJOR(v)		(((v) & 0xf0) >> 4)
+#define DMAR_VER_MINOR(v)		((v) & 0x0f)
+
+/*
+ * Decoding Capability Register
+ */
+#define cap_read_drain(c)	(((c) >> 55) & 1)
+#define cap_write_drain(c)	(((c) >> 54) & 1)
+#define cap_max_amask_val(c)	(((c) >> 48) & 0x3f)
+#define cap_num_fault_regs(c)	((((c) >> 40) & 0xff) + 1)
+#define cap_pgsel_inv(c)	(((c) >> 39) & 1)
+
+#define cap_super_page_val(c)	(((c) >> 34) & 0xf)
+#define cap_super_offset(c)	(((find_first_bit(&cap_super_page_val(c), 4)) \
+					* OFFSET_STRIDE) + 21)
+
+#define cap_fault_reg_offset(c)	((((c) >> 24) & 0x3ff) * 16)
+#define cap_max_fault_reg_offset(c) \
+	(cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16)
+
+#define cap_zlr(c)		(((c) >> 22) & 1)
+#define cap_isoch(c)		(((c) >> 23) & 1)
+#define cap_mgaw(c)		((((c) >> 16) & 0x3f) + 1)
+#define cap_sagaw(c)		(((c) >> 8) & 0x1f)
+#define cap_caching_mode(c)	(((c) >> 7) & 1)
+#define cap_phmr(c)		(((c) >> 6) & 1)
+#define cap_plmr(c)		(((c) >> 5) & 1)
+#define cap_rwbf(c)		(((c) >> 4) & 1)
+#define cap_afl(c)		(((c) >> 3) & 1)
+#define cap_ndoms(c)		(((unsigned long)1) << (4 + 2 * ((c) & 0x7)))
+/*
+ * Extended Capability Register
+ */
+
+#define ecap_niotlb_iunits(e)	((((e) >> 24) & 0xff) + 1)
+#define ecap_iotlb_offset(e) 	((((e) >> 8) & 0x3ff) * 16)
+#define ecap_max_iotlb_offset(e) \
+	(ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
+#define ecap_coherent(e)	((e) & 0x1)
+#define ecap_qis(e)		((e) & 0x2)
+#define ecap_eim_support(e)	((e >> 4) & 0x1)
+#define ecap_ir_support(e)	((e >> 3) & 0x1)
+#define ecap_max_handle_mask(e) ((e >> 20) & 0xf)
+
+
+/* IOTLB_REG */
+#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
+#define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
+#define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
+#define DMA_TLB_IIRG(type) ((type >> 60) & 7)
+#define DMA_TLB_IAIG(val) (((val) >> 57) & 7)
+#define DMA_TLB_READ_DRAIN (((u64)1) << 49)
+#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
+#define DMA_TLB_DID(id)	(((u64)((id) & 0xffff)) << 32)
+#define DMA_TLB_IVT (((u64)1) << 63)
+#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
+#define DMA_TLB_MAX_SIZE (0x3f)
+
+/* INVALID_DESC */
+#define DMA_ID_TLB_GLOBAL_FLUSH	(((u64)1) << 3)
+#define DMA_ID_TLB_DSI_FLUSH	(((u64)2) << 3)
+#define DMA_ID_TLB_PSI_FLUSH	(((u64)3) << 3)
+#define DMA_ID_TLB_READ_DRAIN	(((u64)1) << 7)
+#define DMA_ID_TLB_WRITE_DRAIN	(((u64)1) << 6)
+#define DMA_ID_TLB_DID(id)	(((u64)((id & 0xffff) << 16)))
+#define DMA_ID_TLB_IH_NONLEAF	(((u64)1) << 6)
+#define DMA_ID_TLB_ADDR(addr)	(addr)
+#define DMA_ID_TLB_ADDR_MASK(mask)	(mask)
+
+/* PMEN_REG */
+#define DMA_PMEN_EPM (((u32)1)<<31)
+#define DMA_PMEN_PRS (((u32)1)<<0)
+
+/* GCMD_REG */
+#define DMA_GCMD_TE (((u32)1) << 31)
+#define DMA_GCMD_SRTP (((u32)1) << 30)
+#define DMA_GCMD_SFL (((u32)1) << 29)
+#define DMA_GCMD_EAFL (((u32)1) << 28)
+#define DMA_GCMD_WBF (((u32)1) << 27)
+#define DMA_GCMD_QIE (((u32)1) << 26)
+#define DMA_GCMD_SIRTP (((u32)1) << 24)
+#define DMA_GCMD_IRE (((u32) 1) << 25)
+
+/* GSTS_REG */
+#define DMA_GSTS_TES (((u32)1) << 31)
+#define DMA_GSTS_RTPS (((u32)1) << 30)
+#define DMA_GSTS_FLS (((u32)1) << 29)
+#define DMA_GSTS_AFLS (((u32)1) << 28)
+#define DMA_GSTS_WBFS (((u32)1) << 27)
+#define DMA_GSTS_QIES (((u32)1) << 26)
+#define DMA_GSTS_IRTPS (((u32)1) << 24)
+#define DMA_GSTS_IRES (((u32)1) << 25)
+
+/* CCMD_REG */
+#define DMA_CCMD_ICC (((u64)1) << 63)
+#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61)
+#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61)
+#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61)
+#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32)
+#define DMA_CCMD_MASK_NOBIT 0
+#define DMA_CCMD_MASK_1BIT 1
+#define DMA_CCMD_MASK_2BIT 2
+#define DMA_CCMD_MASK_3BIT 3
+#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16)
+#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff))
+
+/* FECTL_REG */
+#define DMA_FECTL_IM (((u32)1) << 31)
+
+/* FSTS_REG */
+#define DMA_FSTS_PPF ((u32)2)
+#define DMA_FSTS_PFO ((u32)1)
+#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
+
+/* FRCD_REG, 32 bits access */
+#define DMA_FRCD_F (((u32)1) << 31)
+#define dma_frcd_type(d) ((d >> 30) & 1)
+#define dma_frcd_fault_reason(c) (c & 0xff)
+#define dma_frcd_source_id(c) (c & 0xffff)
+#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
+
+#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
+
+#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
+{\
+	cycles_t start_time = get_cycles();\
+	while (1) {\
+		sts = op (iommu->reg + offset);\
+		if (cond)\
+			break;\
+		if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
+			panic("DMAR hardware is malfunctioning\n");\
+		cpu_relax();\
+	}\
+}
+
+#define QI_LENGTH	256	/* queue length */
+
+enum {
+	QI_FREE,
+	QI_IN_USE,
+	QI_DONE
+};
+
+#define QI_CC_TYPE		0x1
+#define QI_IOTLB_TYPE		0x2
+#define QI_DIOTLB_TYPE		0x3
+#define QI_IEC_TYPE		0x4
+#define QI_IWD_TYPE		0x5
+
+#define QI_IEC_SELECTIVE	(((u64)1) << 4)
+#define QI_IEC_IIDEX(idx)	(((u64)(idx & 0xffff) << 32))
+#define QI_IEC_IM(m)		(((u64)(m & 0x1f) << 27))
+
+#define QI_IWD_STATUS_DATA(d)	(((u64)d) << 32)
+#define QI_IWD_STATUS_WRITE	(((u64)1) << 5)
+
+struct qi_desc {
+	u64 low, high;
+};
+
+struct q_inval {
+	spinlock_t      q_lock;
+	struct qi_desc  *desc;          /* invalidation queue */
+	int             *desc_status;   /* desc status */
+	int             free_head;      /* first free entry */
+	int             free_tail;      /* last free entry */
+	int             free_cnt;
+};
+
+#ifdef CONFIG_INTR_REMAP
+/* 1MB - maximum possible interrupt remapping table size */
+#define INTR_REMAP_PAGE_ORDER	8
+#define INTR_REMAP_TABLE_REG_SIZE	0xf
+
+#define INTR_REMAP_TABLE_ENTRIES	65536
+
+struct ir_table {
+	struct irte *base;
+};
+#endif
+
+struct intel_iommu {
+	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
+	u64		cap;
+	u64		ecap;
+	int		seg;
+	u32		gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
+	spinlock_t	register_lock; /* protect register handling */
+	int		seq_id;	/* sequence id of the iommu */
+
+#ifdef CONFIG_DMAR
+	unsigned long 	*domain_ids; /* bitmap of domains */
+	struct dmar_domain **domains; /* ptr to domains */
+	spinlock_t	lock; /* protect context, domain ids */
+	struct root_entry *root_entry; /* virtual address */
+
+	unsigned int irq;
+	unsigned char name[7];    /* Device Name */
+	struct msi_msg saved_msg;
+	struct sys_device sysdev;
+#endif
+	struct q_inval  *qi;            /* Queued invalidation info */
+#ifdef CONFIG_INTR_REMAP
+	struct ir_table *ir_table;	/* Interrupt remapping info */
+#endif
+};
+
+static inline void __iommu_flush_cache(
+	struct intel_iommu *iommu, void *addr, int size)
+{
+	if (!ecap_coherent(iommu->ecap))
+		clflush_cache_range(addr, size);
+}
+
+extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
+
+extern int alloc_iommu(struct dmar_drhd_unit *drhd);
+extern void free_iommu(struct intel_iommu *iommu);
+extern int dmar_enable_qi(struct intel_iommu *iommu);
+extern void qi_global_iec(struct intel_iommu *iommu);
+
+extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
+
+void intel_iommu_domain_exit(struct dmar_domain *domain);
+struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev);
+int intel_iommu_context_mapping(struct dmar_domain *domain,
+				struct pci_dev *pdev);
+int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
+			     u64 hpa, size_t size, int prot);
+void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn);
+struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
+u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
+
+#ifdef CONFIG_DMAR
+int intel_iommu_found(void);
+#else /* CONFIG_DMAR */
+static inline int intel_iommu_found(void)
+{
+	return 0;
+}
+#endif /* CONFIG_DMAR */
+
+#endif
diff --git a/include/linux/iova.h b/include/linux/iova.h
new file mode 100644
index 000000000000..228f6c94b69c
--- /dev/null
+++ b/include/linux/iova.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This file is released under the GPLv2.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ *
+ */
+
+#ifndef _IOVA_H_
+#define _IOVA_H_
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/dma-mapping.h>
+
+/* IO virtual address start page frame number */
+#define IOVA_START_PFN		(1)
+
+/* iova structure */
+struct iova {
+	struct rb_node	node;
+	unsigned long	pfn_hi; /* IOMMU dish out addr hi */
+	unsigned long	pfn_lo; /* IOMMU dish out addr lo */
+};
+
+/* holds all the iova translations for a domain */
+struct iova_domain {
+	spinlock_t	iova_alloc_lock;/* Lock to protect iova  allocation */
+	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
+	struct rb_root	rbroot;		/* iova domain rbtree root */
+	struct rb_node	*cached32_node; /* Save last alloced node */
+	unsigned long	dma_32bit_pfn;
+};
+
+struct iova *alloc_iova_mem(void);
+void free_iova_mem(struct iova *iova);
+void free_iova(struct iova_domain *iovad, unsigned long pfn);
+void __free_iova(struct iova_domain *iovad, struct iova *iova);
+struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
+	unsigned long limit_pfn,
+	bool size_aligned);
+struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
+	unsigned long pfn_hi);
+void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
+void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit);
+struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
+void put_iova_domain(struct iova_domain *iovad);
+
+#endif
-- 
cgit v1.2.3


From 62c476c7c7f25a5b245b9902a935636e6316e58c Mon Sep 17 00:00:00 2001
From: Ben-Ami Yassour <benami@il.ibm.com>
Date: Sun, 14 Sep 2008 03:48:28 +0300
Subject: KVM: Device Assignment with VT-d

Based on a patch by: Kay, Allen M <allen.m.kay@intel.com>

This patch enables PCI device assignment based on VT-d support.
When a device is assigned to the guest, the guest memory is pinned and
the mapping is updated in the VT-d IOMMU.

[Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
and also control enable/disable from userspace]

Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
Signed-off-by: Amit Shah <amit.shah@qumranet.com>

Acked-by: Mark Gross <mgross@linux.intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/Makefile      |   3 +
 arch/x86/kvm/vtd.c         | 198 +++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c         |  14 ++++
 include/asm-x86/kvm_host.h |  23 +-----
 include/linux/kvm.h        |   3 +
 include/linux/kvm_host.h   |  52 ++++++++++++
 virt/kvm/kvm_main.c        |   9 ++-
 7 files changed, 281 insertions(+), 21 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c

(limited to 'include/linux')

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940bb6f40..3072b17447ab 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
 	i8254.o
+ifeq ($(CONFIG_DMAR),y)
+kvm-objs += vtd.o
+endif
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 000000000000..667bf3fb64bf
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_pages(struct kvm *kvm,
+				gfn_t base_gfn, unsigned long npages);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+			gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	int i, r;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	r = -EINVAL;
+	for (i = 0; i < npages; i++) {
+		/* check if already mapped */
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		if (pfn && !is_mmio_pfn(pfn))
+			continue;
+
+		pfn = gfn_to_pfn(kvm, gfn);
+		if (!is_mmio_pfn(pfn)) {
+			r = intel_iommu_page_mapping(domain,
+						     gfn_to_gpa(gfn),
+						     pfn_to_hpa(pfn),
+						     PAGE_SIZE,
+						     DMA_PTE_READ |
+						     DMA_PTE_WRITE);
+			if (r) {
+				printk(KERN_DEBUG "kvm_iommu_map_pages:"
+				       "iommu failed to map pfn=%lx\n", pfn);
+				goto unmap_pages;
+			}
+		} else {
+			printk(KERN_DEBUG "kvm_iommu_map_page:"
+			       "invalid pfn=%lx\n", pfn);
+			goto unmap_pages;
+		}
+		gfn++;
+	}
+	return 0;
+
+unmap_pages:
+	kvm_iommu_put_pages(kvm, base_gfn, i);
+	return r;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, r;
+
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+					kvm->memslots[i].npages);
+		if (r)
+			break;
+	}
+	up_read(&kvm->slots_lock);
+	return r;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct pci_dev *pdev = NULL;
+	int r;
+
+	if (!intel_iommu_found()) {
+		printk(KERN_ERR "%s: intel iommu not found\n", __func__);
+		return -ENODEV;
+	}
+
+	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
+	       assigned_dev->host_busnr,
+	       PCI_SLOT(assigned_dev->host_devfn),
+	       PCI_FUNC(assigned_dev->host_devfn));
+
+	pdev = assigned_dev->dev;
+
+	if (pdev == NULL) {
+		if (kvm->arch.intel_iommu_domain) {
+			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
+			kvm->arch.intel_iommu_domain = NULL;
+		}
+		return -ENODEV;
+	}
+
+	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
+	if (!kvm->arch.intel_iommu_domain)
+		return -ENODEV;
+
+	r = kvm_iommu_map_memslots(kvm);
+	if (r)
+		goto out_unmap;
+
+	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
+			       pdev->bus->number, pdev->devfn);
+
+	r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
+					pdev);
+	if (r) {
+		printk(KERN_ERR "Domain context map for %s failed",
+		       pci_name(pdev));
+		goto out_unmap;
+	}
+	return 0;
+
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return r;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+			       gfn_t base_gfn, unsigned long npages)
+{
+	gfn_t gfn = base_gfn;
+	pfn_t pfn;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+	int i;
+
+	for (i = 0; i < npages; i++) {
+		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+						     gfn_to_gpa(gfn));
+		kvm_release_pfn_clean(pfn);
+		gfn++;
+	}
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int i;
+	down_read(&kvm->slots_lock);
+	for (i = 0; i < kvm->nmemslots; i++) {
+		kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
+				    kvm->memslots[i].npages);
+	}
+	up_read(&kvm->slots_lock);
+
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct kvm_assigned_dev_kernel *entry;
+	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) {
+		printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n",
+		       entry->host_busnr,
+		       PCI_SLOT(entry->host_devfn),
+		       PCI_FUNC(entry->host_devfn));
+
+		/* detach kvm dmar domain */
+		intel_iommu_detach_dev(domain, entry->host_busnr,
+				       entry->host_devfn);
+	}
+	kvm_iommu_unmap_memslots(kvm);
+	intel_iommu_domain_exit(domain);
+	return 0;
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2134f3e0a516..c8a2793626ec 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/highmem.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -277,9 +278,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 
 	list_add(&match->list, &kvm->arch.assigned_dev_head);
 
+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+		r = kvm_iommu_map_guest(kvm, match);
+		if (r)
+			goto out_list_del;
+	}
+
 out:
 	mutex_unlock(&kvm->lock);
 	return r;
+out_list_del:
+	list_del(&match->list);
+	pci_release_regions(dev);
 out_disable:
 	pci_disable_device(dev);
 out_put:
@@ -1147,6 +1157,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PV_MMU:
 		r = !tdp_enabled;
 		break;
+	case KVM_CAP_IOMMU:
+		r = intel_iommu_found();
+		break;
 	default:
 		r = 0;
 		break;
@@ -4282,6 +4295,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_iommu_unmap_guest(kvm);
 	kvm_free_assigned_devices(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 68a3ac13afce..805629c0f15f 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -331,26 +331,6 @@ struct kvm_mem_alias {
 	gfn_t target_gfn;
 };
 
-struct kvm_irq_ack_notifier {
-	struct hlist_node link;
-	unsigned gsi;
-	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
-};
-
-struct kvm_assigned_dev_kernel {
-	struct kvm_irq_ack_notifier ack_notifier;
-	struct work_struct interrupt_work;
-	struct list_head list;
-	int assigned_dev_id;
-	int host_busnr;
-	int host_devfn;
-	int host_irq;
-	int guest_irq;
-	int irq_requested;
-	struct pci_dev *dev;
-	struct kvm *kvm;
-};
-
 struct kvm_arch{
 	int naliases;
 	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
@@ -364,6 +344,7 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
+	struct dmar_domain *intel_iommu_domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
@@ -514,6 +495,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
 		  gpa_t addr, unsigned long *ret);
 
+int is_mmio_pfn(pfn_t pfn);
+
 extern bool tdp_enabled;
 
 enum emulation_result {
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ef4bc6f89778..4269be171faf 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -384,6 +384,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
+#define KVM_CAP_IOMMU 18
 
 /*
  * ioctls for VM fds
@@ -495,4 +496,6 @@ struct kvm_assigned_irq {
 	__u32 flags;
 };
 
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4b036430ea23..6252802c3cc0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -286,6 +286,53 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+struct kvm_irq_ack_notifier {
+	struct hlist_node link;
+	unsigned gsi;
+	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
+};
+
+struct kvm_assigned_dev_kernel {
+	struct kvm_irq_ack_notifier ack_notifier;
+	struct work_struct interrupt_work;
+	struct list_head list;
+	int assigned_dev_id;
+	int host_busnr;
+	int host_devfn;
+	int host_irq;
+	int guest_irq;
+	int irq_requested;
+	struct pci_dev *dev;
+	struct kvm *kvm;
+};
+
+#ifdef CONFIG_DMAR
+int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+			unsigned long npages);
+int kvm_iommu_map_guest(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+#else /* CONFIG_DMAR */
+static inline int kvm_iommu_map_pages(struct kvm *kvm,
+				      gfn_t base_gfn,
+				      unsigned long npages)
+{
+	return 0;
+}
+
+static inline int kvm_iommu_map_guest(struct kvm *kvm,
+				      struct kvm_assigned_dev_kernel
+				      *assigned_dev)
+{
+	return -ENODEV;
+}
+
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+#endif /* CONFIG_DMAR */
+
 static inline void kvm_guest_enter(void)
 {
 	account_system_vtime(current);
@@ -308,6 +355,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn)
 	return (gpa_t)gfn << PAGE_SHIFT;
 }
 
+static inline hpa_t pfn_to_hpa(pfn_t pfn)
+{
+	return (hpa_t)pfn << PAGE_SHIFT;
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 63e661be040a..f42d5c2a396d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -41,6 +41,7 @@
 #include <linux/pagemap.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -76,7 +77,7 @@ static inline int valid_vcpu(int n)
 	return likely(n >= 0 && n < KVM_MAX_VCPUS);
 }
 
-static inline int is_mmio_pfn(pfn_t pfn)
+inline int is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
 		return PageReserved(pfn_to_page(pfn));
@@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	kvm_free_physmem_slot(&old, &new);
+
+	/* map the pages in iommu page table */
+	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
+	if (r)
+		goto out;
+
 	return 0;
 
 out_free:
-- 
cgit v1.2.3


From 4731d4c7a07769cf2926c327177b97bb8c68cafc Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 23 Sep 2008 13:18:39 -0300
Subject: KVM: MMU: out of sync shadow core

Allow guest pagetables to go out of sync.  Instead of emulating write
accesses to guest pagetables, or unshadowing them, we un-write-protect
the page table and allow the guest to modify it at will.  We rely on
invlpg executions to synchronize individual ptes, and will synchronize
the entire pagetable on tlb flushes.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c         | 210 +++++++++++++++++++++++++++++++++++++++++----
 arch/x86/kvm/paging_tmpl.h |   2 +-
 arch/x86/kvm/x86.c         |   3 +
 include/asm-x86/kvm_host.h |   3 +
 include/linux/kvm_host.h   |   1 +
 5 files changed, 201 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 57c7580e7f98..d88659ae7778 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -147,6 +147,10 @@ struct kvm_shadow_walk {
 		     u64 addr, u64 *spte, int level);
 };
 
+struct kvm_unsync_walk {
+	int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk);
+};
+
 typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp);
 
 static struct kmem_cache *pte_chain_cache;
@@ -654,8 +658,6 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
 
 	if (write_protected)
 		kvm_flush_remote_tlbs(kvm);
-
-	account_shadowed(kvm, gfn);
 }
 
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
@@ -908,6 +910,41 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
 {
 }
 
+static int mmu_unsync_walk(struct kvm_mmu_page *sp,
+			   struct kvm_unsync_walk *walker)
+{
+	int i, ret;
+
+	if (!sp->unsync_children)
+		return 0;
+
+	for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
+		u64 ent = sp->spt[i];
+
+		if (is_shadow_present_pte(ent)) {
+			struct kvm_mmu_page *child;
+			child = page_header(ent & PT64_BASE_ADDR_MASK);
+
+			if (child->unsync_children) {
+				ret = mmu_unsync_walk(child, walker);
+				if (ret)
+					return ret;
+			}
+
+			if (child->unsync) {
+				ret = walker->entry(child, walker);
+				if (ret)
+					return ret;
+			}
+		}
+	}
+
+	if (i == PT64_ENT_PER_PAGE)
+		sp->unsync_children = 0;
+
+	return 0;
+}
+
 static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
 {
 	unsigned index;
@@ -928,6 +965,59 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
 	return NULL;
 }
 
+static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	WARN_ON(!sp->unsync);
+	sp->unsync = 0;
+	--kvm->stat.mmu_unsync;
+}
+
+static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp);
+
+static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	if (sp->role.glevels != vcpu->arch.mmu.root_level) {
+		kvm_mmu_zap_page(vcpu->kvm, sp);
+		return 1;
+	}
+
+	rmap_write_protect(vcpu->kvm, sp->gfn);
+	if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
+		kvm_mmu_zap_page(vcpu->kvm, sp);
+		return 1;
+	}
+
+	kvm_mmu_flush_tlb(vcpu);
+	kvm_unlink_unsync_page(vcpu->kvm, sp);
+	return 0;
+}
+
+struct sync_walker {
+	struct kvm_vcpu *vcpu;
+	struct kvm_unsync_walk walker;
+};
+
+static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
+{
+	struct sync_walker *sync_walk = container_of(walk, struct sync_walker,
+						     walker);
+	struct kvm_vcpu *vcpu = sync_walk->vcpu;
+
+	kvm_sync_page(vcpu, sp);
+	return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock));
+}
+
+static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	struct sync_walker walker = {
+		.walker = { .entry = mmu_sync_fn, },
+		.vcpu = vcpu,
+	};
+
+	while (mmu_unsync_walk(sp, &walker.walker))
+		cond_resched_lock(&vcpu->kvm->mmu_lock);
+}
+
 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 					     gfn_t gfn,
 					     gva_t gaddr,
@@ -941,7 +1031,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	unsigned quadrant;
 	struct hlist_head *bucket;
 	struct kvm_mmu_page *sp;
-	struct hlist_node *node;
+	struct hlist_node *node, *tmp;
 
 	role.word = 0;
 	role.glevels = vcpu->arch.mmu.root_level;
@@ -957,8 +1047,18 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 		 gfn, role.word);
 	index = kvm_page_table_hashfn(gfn);
 	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
-	hlist_for_each_entry(sp, node, bucket, hash_link)
-		if (sp->gfn == gfn && sp->role.word == role.word) {
+	hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link)
+		if (sp->gfn == gfn) {
+			if (sp->unsync)
+				if (kvm_sync_page(vcpu, sp))
+					continue;
+
+			if (sp->role.word != role.word)
+				continue;
+
+			if (sp->unsync_children)
+				set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
+
 			mmu_page_add_parent_pte(vcpu, sp, parent_pte);
 			pgprintk("%s: found\n", __func__);
 			return sp;
@@ -971,8 +1071,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	sp->gfn = gfn;
 	sp->role = role;
 	hlist_add_head(&sp->hash_link, bucket);
-	if (!metaphysical)
+	if (!metaphysical) {
 		rmap_write_protect(vcpu->kvm, gfn);
+		account_shadowed(vcpu->kvm, gfn);
+	}
 	if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
 		vcpu->arch.mmu.prefetch_page(vcpu, sp);
 	else
@@ -1078,14 +1180,47 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 	}
 }
 
+struct zap_walker {
+	struct kvm_unsync_walk walker;
+	struct kvm *kvm;
+	int zapped;
+};
+
+static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
+{
+	struct zap_walker *zap_walk = container_of(walk, struct zap_walker,
+						     walker);
+	kvm_mmu_zap_page(zap_walk->kvm, sp);
+	zap_walk->zapped = 1;
+	return 0;
+}
+
+static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	struct zap_walker walker = {
+		.walker = { .entry = mmu_zap_fn, },
+		.kvm = kvm,
+		.zapped = 0,
+	};
+
+	if (sp->role.level == PT_PAGE_TABLE_LEVEL)
+		return 0;
+	mmu_unsync_walk(sp, &walker.walker);
+	return walker.zapped;
+}
+
 static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
+	int ret;
 	++kvm->stat.mmu_shadow_zapped;
+	ret = mmu_zap_unsync_children(kvm, sp);
 	kvm_mmu_page_unlink_children(kvm, sp);
 	kvm_mmu_unlink_parents(kvm, sp);
 	kvm_flush_remote_tlbs(kvm);
 	if (!sp->role.invalid && !sp->role.metaphysical)
 		unaccount_shadowed(kvm, sp->gfn);
+	if (sp->unsync)
+		kvm_unlink_unsync_page(kvm, sp);
 	if (!sp->root_count) {
 		hlist_del(&sp->hash_link);
 		kvm_mmu_free_page(kvm, sp);
@@ -1095,7 +1230,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 		kvm_reload_remote_mmus(kvm);
 	}
 	kvm_mmu_reset_last_pte_updated(kvm);
-	return 0;
+	return ret;
 }
 
 /*
@@ -1201,10 +1336,58 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
 	return page;
 }
 
+static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	sp->unsync_children = 1;
+	return 1;
+}
+
+static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	unsigned index;
+	struct hlist_head *bucket;
+	struct kvm_mmu_page *s;
+	struct hlist_node *node, *n;
+
+	index = kvm_page_table_hashfn(sp->gfn);
+	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
+	/* don't unsync if pagetable is shadowed with multiple roles */
+	hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
+		if (s->gfn != sp->gfn || s->role.metaphysical)
+			continue;
+		if (s->role.word != sp->role.word)
+			return 1;
+	}
+	mmu_parent_walk(vcpu, sp, unsync_walk_fn);
+	++vcpu->kvm->stat.mmu_unsync;
+	sp->unsync = 1;
+	mmu_convert_notrap(sp);
+	return 0;
+}
+
+static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
+				  bool can_unsync)
+{
+	struct kvm_mmu_page *shadow;
+
+	shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
+	if (shadow) {
+		if (shadow->role.level != PT_PAGE_TABLE_LEVEL)
+			return 1;
+		if (shadow->unsync)
+			return 0;
+		if (can_unsync)
+			return kvm_unsync_page(vcpu, shadow);
+		return 1;
+	}
+	return 0;
+}
+
 static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		    unsigned pte_access, int user_fault,
 		    int write_fault, int dirty, int largepage,
-		    gfn_t gfn, pfn_t pfn, bool speculative)
+		    gfn_t gfn, pfn_t pfn, bool speculative,
+		    bool can_unsync)
 {
 	u64 spte;
 	int ret = 0;
@@ -1231,7 +1414,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 
 	if ((pte_access & ACC_WRITE_MASK)
 	    || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
-		struct kvm_mmu_page *shadow;
 
 		if (largepage && has_wrprotected_page(vcpu->kvm, gfn)) {
 			ret = 1;
@@ -1241,8 +1423,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 
 		spte |= PT_WRITABLE_MASK;
 
-		shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
-		if (shadow) {
+		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
 			pgprintk("%s: found shadow page for %lx, marking ro\n",
 				 __func__, gfn);
 			ret = 1;
@@ -1260,7 +1441,6 @@ set_pte:
 	return ret;
 }
 
-
 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 			 unsigned pt_access, unsigned pte_access,
 			 int user_fault, int write_fault, int dirty,
@@ -1298,7 +1478,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		}
 	}
 	if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
-		      dirty, largepage, gfn, pfn, speculative)) {
+		      dirty, largepage, gfn, pfn, speculative, true)) {
 		if (write_fault)
 			*ptwrite = 1;
 		kvm_x86_ops->tlb_flush(vcpu);
@@ -1518,10 +1698,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 	vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
 }
 
-static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
-{
-}
-
 static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 {
 	int i;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index dc169e8148b1..613ec9aa674a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -580,7 +580,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
 		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
 			 is_dirty_pte(gpte), 0, gfn,
-			 spte_to_pfn(sp->spt[i]), true);
+			 spte_to_pfn(sp->spt[i]), true, false);
 	}
 
 	return !nr_present;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index efee85ba07e5..1c5864ac0837 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -101,6 +101,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "mmu_flooded", VM_STAT(mmu_flooded) },
 	{ "mmu_recycled", VM_STAT(mmu_recycled) },
 	{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
+	{ "mmu_unsync", VM_STAT(mmu_unsync) },
 	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
 	{ "largepages", VM_STAT(lpages) },
 	{ NULL }
@@ -3120,6 +3121,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (vcpu->requests) {
 		if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
 			__kvm_migrate_timers(vcpu);
+		if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
+			kvm_mmu_sync_roots(vcpu);
 		if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
 			kvm_x86_ops->tlb_flush(vcpu);
 		if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 8b935cc4c14b..7d36fcc02818 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -195,6 +195,8 @@ struct kvm_mmu_page {
 				    */
 	int multimapped;         /* More than one parent_pte? */
 	int root_count;          /* Currently serving as active root */
+	bool unsync;
+	bool unsync_children;
 	union {
 		u64 *parent_pte;               /* !multimapped */
 		struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
@@ -371,6 +373,7 @@ struct kvm_vm_stat {
 	u32 mmu_flooded;
 	u32 mmu_recycled;
 	u32 mmu_cache_miss;
+	u32 mmu_unsync;
 	u32 remote_tlb_flush;
 	u32 lpages;
 };
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6252802c3cc0..73b7c52b9493 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -35,6 +35,7 @@
 #define KVM_REQ_TRIPLE_FAULT       4
 #define KVM_REQ_PENDING_TIMER      5
 #define KVM_REQ_UNHALT             6
+#define KVM_REQ_MMU_SYNC           7
 
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
-- 
cgit v1.2.3


From 8a98f6648a2b0756d8f26d6c13332f5526355fec Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Mon, 6 Oct 2008 13:47:38 +0800
Subject: KVM: Move device assignment logic to common code

To share with other archs, this patch moves device assignment
logic to common parts.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86.c       | 255 --------------------------------------------
 include/linux/kvm.h      |   2 +
 include/linux/kvm_host.h |   1 +
 virt/kvm/kvm_main.c      | 268 ++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 269 insertions(+), 257 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d6d7123d2644..f8bde01ba8e6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -30,7 +30,6 @@
 #include <linux/interrupt.h>
 #include <linux/kvm.h>
 #include <linux/fs.h>
-#include <linux/pci.h>
 #include <linux/vmalloc.h>
 #include <linux/module.h>
 #include <linux/mman.h>
@@ -107,238 +106,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ NULL }
 };
 
-static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
-						      int assigned_dev_id)
-{
-	struct list_head *ptr;
-	struct kvm_assigned_dev_kernel *match;
-
-	list_for_each(ptr, head) {
-		match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
-		if (match->assigned_dev_id == assigned_dev_id)
-			return match;
-	}
-	return NULL;
-}
-
-static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
-{
-	struct kvm_assigned_dev_kernel *assigned_dev;
-
-	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
-				    interrupt_work);
-
-	/* This is taken to safely inject irq inside the guest. When
-	 * the interrupt injection (or the ioapic code) uses a
-	 * finer-grained lock, update this
-	 */
-	mutex_lock(&assigned_dev->kvm->lock);
-	kvm_set_irq(assigned_dev->kvm,
-		    assigned_dev->guest_irq, 1);
-	mutex_unlock(&assigned_dev->kvm->lock);
-	kvm_put_kvm(assigned_dev->kvm);
-}
-
-/* FIXME: Implement the OR logic needed to make shared interrupts on
- * this line behave properly
- */
-static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
-{
-	struct kvm_assigned_dev_kernel *assigned_dev =
-		(struct kvm_assigned_dev_kernel *) dev_id;
-
-	kvm_get_kvm(assigned_dev->kvm);
-	schedule_work(&assigned_dev->interrupt_work);
-	disable_irq_nosync(irq);
-	return IRQ_HANDLED;
-}
-
-/* Ack the irq line for an assigned device */
-static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
-{
-	struct kvm_assigned_dev_kernel *dev;
-
-	if (kian->gsi == -1)
-		return;
-
-	dev = container_of(kian, struct kvm_assigned_dev_kernel,
-			   ack_notifier);
-	kvm_set_irq(dev->kvm, dev->guest_irq, 0);
-	enable_irq(dev->host_irq);
-}
-
-static void kvm_free_assigned_device(struct kvm *kvm,
-				     struct kvm_assigned_dev_kernel
-				     *assigned_dev)
-{
-	if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
-		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
-
-	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
-
-	if (cancel_work_sync(&assigned_dev->interrupt_work))
-		/* We had pending work. That means we will have to take
-		 * care of kvm_put_kvm.
-		 */
-		kvm_put_kvm(kvm);
-
-	pci_release_regions(assigned_dev->dev);
-	pci_disable_device(assigned_dev->dev);
-	pci_dev_put(assigned_dev->dev);
-
-	list_del(&assigned_dev->list);
-	kfree(assigned_dev);
-}
-
-static void kvm_free_all_assigned_devices(struct kvm *kvm)
-{
-	struct list_head *ptr, *ptr2;
-	struct kvm_assigned_dev_kernel *assigned_dev;
-
-	list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
-		assigned_dev = list_entry(ptr,
-					  struct kvm_assigned_dev_kernel,
-					  list);
-
-		kvm_free_assigned_device(kvm, assigned_dev);
-	}
-}
-
-static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
-				   struct kvm_assigned_irq
-				   *assigned_irq)
-{
-	int r = 0;
-	struct kvm_assigned_dev_kernel *match;
-
-	mutex_lock(&kvm->lock);
-
-	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-				      assigned_irq->assigned_dev_id);
-	if (!match) {
-		mutex_unlock(&kvm->lock);
-		return -EINVAL;
-	}
-
-	if (match->irq_requested) {
-		match->guest_irq = assigned_irq->guest_irq;
-		match->ack_notifier.gsi = assigned_irq->guest_irq;
-		mutex_unlock(&kvm->lock);
-		return 0;
-	}
-
-	INIT_WORK(&match->interrupt_work,
-		  kvm_assigned_dev_interrupt_work_handler);
-
-	if (irqchip_in_kernel(kvm)) {
-		if (!capable(CAP_SYS_RAWIO)) {
-			r = -EPERM;
-			goto out_release;
-		}
-
-		if (assigned_irq->host_irq)
-			match->host_irq = assigned_irq->host_irq;
-		else
-			match->host_irq = match->dev->irq;
-		match->guest_irq = assigned_irq->guest_irq;
-		match->ack_notifier.gsi = assigned_irq->guest_irq;
-		match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
-		kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
-
-		/* Even though this is PCI, we don't want to use shared
-		 * interrupts. Sharing host devices with guest-assigned devices
-		 * on the same interrupt line is not a happy situation: there
-		 * are going to be long delays in accepting, acking, etc.
-		 */
-		if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
-				"kvm_assigned_device", (void *)match)) {
-			r = -EIO;
-			goto out_release;
-		}
-	}
-
-	match->irq_requested = true;
-	mutex_unlock(&kvm->lock);
-	return r;
-out_release:
-	mutex_unlock(&kvm->lock);
-	kvm_free_assigned_device(kvm, match);
-	return r;
-}
-
-static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
-				      struct kvm_assigned_pci_dev *assigned_dev)
-{
-	int r = 0;
-	struct kvm_assigned_dev_kernel *match;
-	struct pci_dev *dev;
-
-	mutex_lock(&kvm->lock);
-
-	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-				      assigned_dev->assigned_dev_id);
-	if (match) {
-		/* device already assigned */
-		r = -EINVAL;
-		goto out;
-	}
-
-	match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
-	if (match == NULL) {
-		printk(KERN_INFO "%s: Couldn't allocate memory\n",
-		       __func__);
-		r = -ENOMEM;
-		goto out;
-	}
-	dev = pci_get_bus_and_slot(assigned_dev->busnr,
-				   assigned_dev->devfn);
-	if (!dev) {
-		printk(KERN_INFO "%s: host device not found\n", __func__);
-		r = -EINVAL;
-		goto out_free;
-	}
-	if (pci_enable_device(dev)) {
-		printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
-		r = -EBUSY;
-		goto out_put;
-	}
-	r = pci_request_regions(dev, "kvm_assigned_device");
-	if (r) {
-		printk(KERN_INFO "%s: Could not get access to device regions\n",
-		       __func__);
-		goto out_disable;
-	}
-	match->assigned_dev_id = assigned_dev->assigned_dev_id;
-	match->host_busnr = assigned_dev->busnr;
-	match->host_devfn = assigned_dev->devfn;
-	match->dev = dev;
-
-	match->kvm = kvm;
-
-	list_add(&match->list, &kvm->arch.assigned_dev_head);
-
-	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
-		r = kvm_iommu_map_guest(kvm, match);
-		if (r)
-			goto out_list_del;
-	}
-
-out:
-	mutex_unlock(&kvm->lock);
-	return r;
-out_list_del:
-	list_del(&match->list);
-	pci_release_regions(dev);
-out_disable:
-	pci_disable_device(dev);
-out_put:
-	pci_dev_put(dev);
-out_free:
-	kfree(match);
-	mutex_unlock(&kvm->lock);
-	return r;
-}
-
 unsigned long segment_base(u16 selector)
 {
 	struct descriptor_table gdt;
@@ -2030,28 +1797,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			goto out;
 		break;
 	}
-	case KVM_ASSIGN_PCI_DEVICE: {
-		struct kvm_assigned_pci_dev assigned_dev;
-
-		r = -EFAULT;
-		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
-			goto out;
-		r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
-		if (r)
-			goto out;
-		break;
-	}
-	case KVM_ASSIGN_IRQ: {
-		struct kvm_assigned_irq assigned_irq;
-
-		r = -EFAULT;
-		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
-			goto out;
-		r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
-		if (r)
-			goto out;
-		break;
-	}
 	case KVM_GET_PIT: {
 		r = -EFAULT;
 		if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 4269be171faf..9acf34a6dfbb 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -383,7 +383,9 @@ struct kvm_trace_rec {
 #define KVM_CAP_MP_STATE 14
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
+#ifdef CONFIG_X86
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
+#endif
 #define KVM_CAP_IOMMU 18
 
 /*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 73b7c52b9493..10c1146cd009 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -281,6 +281,7 @@ void kvm_free_physmem(struct kvm *kvm);
 
 struct  kvm *kvm_arch_create_vm(void);
 void kvm_arch_destroy_vm(struct kvm *kvm);
+void kvm_free_all_assigned_devices(struct kvm *kvm);
 
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 98cd916448a8..485bcdc16552 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -51,6 +51,12 @@
 #include "coalesced_mmio.h"
 #endif
 
+#ifdef KVM_CAP_DEVICE_ASSIGNMENT
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include "irq.h"
+#endif
+
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
@@ -71,6 +77,240 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 
 bool kvm_rebooting;
 
+#ifdef KVM_CAP_DEVICE_ASSIGNMENT
+static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
+						      int assigned_dev_id)
+{
+	struct list_head *ptr;
+	struct kvm_assigned_dev_kernel *match;
+
+	list_for_each(ptr, head) {
+		match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
+		if (match->assigned_dev_id == assigned_dev_id)
+			return match;
+	}
+	return NULL;
+}
+
+static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
+{
+	struct kvm_assigned_dev_kernel *assigned_dev;
+
+	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
+				    interrupt_work);
+
+	/* This is taken to safely inject irq inside the guest. When
+	 * the interrupt injection (or the ioapic code) uses a
+	 * finer-grained lock, update this
+	 */
+	mutex_lock(&assigned_dev->kvm->lock);
+	kvm_set_irq(assigned_dev->kvm,
+		    assigned_dev->guest_irq, 1);
+	mutex_unlock(&assigned_dev->kvm->lock);
+	kvm_put_kvm(assigned_dev->kvm);
+}
+
+/* FIXME: Implement the OR logic needed to make shared interrupts on
+ * this line behave properly
+ */
+static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
+{
+	struct kvm_assigned_dev_kernel *assigned_dev =
+		(struct kvm_assigned_dev_kernel *) dev_id;
+
+	kvm_get_kvm(assigned_dev->kvm);
+	schedule_work(&assigned_dev->interrupt_work);
+	disable_irq_nosync(irq);
+	return IRQ_HANDLED;
+}
+
+/* Ack the irq line for an assigned device */
+static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
+{
+	struct kvm_assigned_dev_kernel *dev;
+
+	if (kian->gsi == -1)
+		return;
+
+	dev = container_of(kian, struct kvm_assigned_dev_kernel,
+			   ack_notifier);
+	kvm_set_irq(dev->kvm, dev->guest_irq, 0);
+	enable_irq(dev->host_irq);
+}
+
+static void kvm_free_assigned_device(struct kvm *kvm,
+				     struct kvm_assigned_dev_kernel
+				     *assigned_dev)
+{
+	if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
+		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+
+	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
+
+	if (cancel_work_sync(&assigned_dev->interrupt_work))
+		/* We had pending work. That means we will have to take
+		 * care of kvm_put_kvm.
+		 */
+		kvm_put_kvm(kvm);
+
+	pci_release_regions(assigned_dev->dev);
+	pci_disable_device(assigned_dev->dev);
+	pci_dev_put(assigned_dev->dev);
+
+	list_del(&assigned_dev->list);
+	kfree(assigned_dev);
+}
+
+void kvm_free_all_assigned_devices(struct kvm *kvm)
+{
+	struct list_head *ptr, *ptr2;
+	struct kvm_assigned_dev_kernel *assigned_dev;
+
+	list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
+		assigned_dev = list_entry(ptr,
+					  struct kvm_assigned_dev_kernel,
+					  list);
+
+		kvm_free_assigned_device(kvm, assigned_dev);
+	}
+}
+
+static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
+				   struct kvm_assigned_irq
+				   *assigned_irq)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *match;
+
+	mutex_lock(&kvm->lock);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_irq->assigned_dev_id);
+	if (!match) {
+		mutex_unlock(&kvm->lock);
+		return -EINVAL;
+	}
+
+	if (match->irq_requested) {
+		match->guest_irq = assigned_irq->guest_irq;
+		match->ack_notifier.gsi = assigned_irq->guest_irq;
+		mutex_unlock(&kvm->lock);
+		return 0;
+	}
+
+	INIT_WORK(&match->interrupt_work,
+		  kvm_assigned_dev_interrupt_work_handler);
+
+	if (irqchip_in_kernel(kvm)) {
+		if (!capable(CAP_SYS_RAWIO)) {
+			r = -EPERM;
+			goto out_release;
+		}
+
+		if (assigned_irq->host_irq)
+			match->host_irq = assigned_irq->host_irq;
+		else
+			match->host_irq = match->dev->irq;
+		match->guest_irq = assigned_irq->guest_irq;
+		match->ack_notifier.gsi = assigned_irq->guest_irq;
+		match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
+		kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
+
+		/* Even though this is PCI, we don't want to use shared
+		 * interrupts. Sharing host devices with guest-assigned devices
+		 * on the same interrupt line is not a happy situation: there
+		 * are going to be long delays in accepting, acking, etc.
+		 */
+		if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
+				"kvm_assigned_device", (void *)match)) {
+			r = -EIO;
+			goto out_release;
+		}
+	}
+
+	match->irq_requested = true;
+	mutex_unlock(&kvm->lock);
+	return r;
+out_release:
+	mutex_unlock(&kvm->lock);
+	kvm_free_assigned_device(kvm, match);
+	return r;
+}
+
+static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
+				      struct kvm_assigned_pci_dev *assigned_dev)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *match;
+	struct pci_dev *dev;
+
+	mutex_lock(&kvm->lock);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_dev->assigned_dev_id);
+	if (match) {
+		/* device already assigned */
+		r = -EINVAL;
+		goto out;
+	}
+
+	match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
+	if (match == NULL) {
+		printk(KERN_INFO "%s: Couldn't allocate memory\n",
+		       __func__);
+		r = -ENOMEM;
+		goto out;
+	}
+	dev = pci_get_bus_and_slot(assigned_dev->busnr,
+				   assigned_dev->devfn);
+	if (!dev) {
+		printk(KERN_INFO "%s: host device not found\n", __func__);
+		r = -EINVAL;
+		goto out_free;
+	}
+	if (pci_enable_device(dev)) {
+		printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
+		r = -EBUSY;
+		goto out_put;
+	}
+	r = pci_request_regions(dev, "kvm_assigned_device");
+	if (r) {
+		printk(KERN_INFO "%s: Could not get access to device regions\n",
+		       __func__);
+		goto out_disable;
+	}
+	match->assigned_dev_id = assigned_dev->assigned_dev_id;
+	match->host_busnr = assigned_dev->busnr;
+	match->host_devfn = assigned_dev->devfn;
+	match->dev = dev;
+
+	match->kvm = kvm;
+
+	list_add(&match->list, &kvm->arch.assigned_dev_head);
+
+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
+		r = kvm_iommu_map_guest(kvm, match);
+		if (r)
+			goto out_list_del;
+	}
+
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+out_list_del:
+	list_del(&match->list);
+	pci_release_regions(dev);
+out_disable:
+	pci_disable_device(dev);
+out_put:
+	pci_dev_put(dev);
+out_free:
+	kfree(match);
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+#endif
+
 static inline int valid_vcpu(int n)
 {
 	return likely(n >= 0 && n < KVM_MAX_VCPUS);
@@ -578,12 +818,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	kvm_free_physmem_slot(&old, &new);
-
+#ifdef CONFIG_DMAR
 	/* map the pages in iommu page table */
 	r = kvm_iommu_map_pages(kvm, base_gfn, npages);
 	if (r)
 		goto out;
-
+#endif
 	return 0;
 
 out_free:
@@ -1382,6 +1622,30 @@ static long kvm_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+#endif
+#ifdef KVM_CAP_DEVICE_ASSIGNMENT
+	case KVM_ASSIGN_PCI_DEVICE: {
+		struct kvm_assigned_pci_dev assigned_dev;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
+			goto out;
+		r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_ASSIGN_IRQ: {
+		struct kvm_assigned_irq assigned_irq;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
+			goto out;
+		r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
+		if (r)
+			goto out;
+		break;
+	}
 #endif
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
-- 
cgit v1.2.3


From c77fb9dc7a0383c86eabef30272a763a482403e1 Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Sat, 27 Sep 2008 10:55:40 +0800
Subject: KVM: Change is_mmio_pfn to kvm_is_mmio_pfn, and make it common for
 all archs

Add a kvm_ prefix to avoid polluting kernel's name space.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/linux/kvm_host.h |  2 ++
 virt/kvm/kvm_main.c      | 16 ++++++++--------
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 10c1146cd009..b3b7598b4d94 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -288,6 +288,8 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+int kvm_is_mmio_pfn(pfn_t pfn);
+
 struct kvm_irq_ack_notifier {
 	struct hlist_node link;
 	unsigned gsi;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 485bcdc16552..cf0ab8ed3845 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -316,7 +316,7 @@ static inline int valid_vcpu(int n)
 	return likely(n >= 0 && n < KVM_MAX_VCPUS);
 }
 
-static inline int is_mmio_pfn(pfn_t pfn)
+inline int kvm_is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
 		return PageReserved(pfn_to_page(pfn));
@@ -994,7 +994,7 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
 
 		pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
 		up_read(&current->mm->mmap_sem);
-		BUG_ON(!is_mmio_pfn(pfn));
+		BUG_ON(!kvm_is_mmio_pfn(pfn));
 	} else
 		pfn = page_to_pfn(page[0]);
 
@@ -1008,10 +1008,10 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 	pfn_t pfn;
 
 	pfn = gfn_to_pfn(kvm, gfn);
-	if (!is_mmio_pfn(pfn))
+	if (!kvm_is_mmio_pfn(pfn))
 		return pfn_to_page(pfn);
 
-	WARN_ON(is_mmio_pfn(pfn));
+	WARN_ON(kvm_is_mmio_pfn(pfn));
 
 	get_page(bad_page);
 	return bad_page;
@@ -1027,7 +1027,7 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean);
 
 void kvm_release_pfn_clean(pfn_t pfn)
 {
-	if (!is_mmio_pfn(pfn))
+	if (!kvm_is_mmio_pfn(pfn))
 		put_page(pfn_to_page(pfn));
 }
 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
@@ -1053,7 +1053,7 @@ EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
 
 void kvm_set_pfn_dirty(pfn_t pfn)
 {
-	if (!is_mmio_pfn(pfn)) {
+	if (!kvm_is_mmio_pfn(pfn)) {
 		struct page *page = pfn_to_page(pfn);
 		if (!PageReserved(page))
 			SetPageDirty(page);
@@ -1063,14 +1063,14 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
 
 void kvm_set_pfn_accessed(pfn_t pfn)
 {
-	if (!is_mmio_pfn(pfn))
+	if (!kvm_is_mmio_pfn(pfn))
 		mark_page_accessed(pfn_to_page(pfn));
 }
 EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
 
 void kvm_get_pfn(pfn_t pfn)
 {
-	if (!is_mmio_pfn(pfn))
+	if (!kvm_is_mmio_pfn(pfn))
 		get_page(pfn_to_page(pfn));
 }
 EXPORT_SYMBOL_GPL(kvm_get_pfn);
-- 
cgit v1.2.3


From 3de42dc094ecd313dc7d551e007a134b52f8663d Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Mon, 6 Oct 2008 13:48:45 +0800
Subject: KVM: Separate irq ack notification out of arch/x86/kvm/irq.c

Moving irq ack notification logic as common, and make
it shared with ia64 side.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/ia64/include/asm/kvm_host.h |  4 +++
 arch/ia64/kvm/Makefile           |  2 +-
 arch/ia64/kvm/irq.h              |  5 ----
 arch/x86/kvm/Makefile            |  2 +-
 arch/x86/kvm/irq.c               | 33 ----------------------
 arch/x86/kvm/irq.h               |  8 ------
 include/asm-x86/kvm_host.h       |  2 ++
 include/linux/kvm_host.h         |  6 ++++
 virt/kvm/irq_comm.c              | 60 ++++++++++++++++++++++++++++++++++++++++
 9 files changed, 74 insertions(+), 48 deletions(-)
 create mode 100644 virt/kvm/irq_comm.c

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 1efe513a9941..da579a33db12 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -413,6 +413,10 @@ struct kvm_arch {
 	struct kvm_ioapic *vioapic;
 	struct kvm_vm_stat stat;
 	struct kvm_sal_data rdv_sal_data;
+
+	struct list_head assigned_dev_head;
+	struct dmar_domain *intel_iommu_domain;
+	struct hlist_head irq_ack_notifier_list;
 };
 
 union cpuid3_t {
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index bf22fb9e6dcf..3b1a1c156ef9 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -44,7 +44,7 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
 EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
 
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-		coalesced_mmio.o)
+		coalesced_mmio.o irq_comm.o)
 
 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
index f2e6545debf4..604329ac3c99 100644
--- a/arch/ia64/kvm/irq.h
+++ b/arch/ia64/kvm/irq.h
@@ -23,10 +23,5 @@
 #ifndef __IRQ_H
 #define __IRQ_H
 
-struct kvm;
-
-static inline void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi)
-{
-}
 
 #endif
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 7dce593b9c66..c02343594b4d 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -3,7 +3,7 @@
 #
 
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-                coalesced_mmio.o)
+                coalesced_mmio.o irq_comm.o)
 ifeq ($(CONFIG_KVM_TRACE),y)
 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 8c1b9c5def78..c019b8edcdb7 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -99,36 +99,3 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
 	__kvm_migrate_apic_timer(vcpu);
 	__kvm_migrate_pit_timer(vcpu);
 }
-
-/* This should be called with the kvm->lock mutex held */
-void kvm_set_irq(struct kvm *kvm, int irq, int level)
-{
-	/* Not possible to detect if the guest uses the PIC or the
-	 * IOAPIC.  So set the bit in both. The guest will ignore
-	 * writes to the unused one.
-	 */
-	kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level);
-	kvm_pic_set_irq(pic_irqchip(kvm), irq, level);
-}
-
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi)
-{
-	struct kvm_irq_ack_notifier *kian;
-	struct hlist_node *n;
-
-	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
-		if (kian->gsi == gsi)
-			kian->irq_acked(kian);
-}
-
-void kvm_register_irq_ack_notifier(struct kvm *kvm,
-				   struct kvm_irq_ack_notifier *kian)
-{
-	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
-}
-
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-				     struct kvm_irq_ack_notifier *kian)
-{
-	hlist_del(&kian->link);
-}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 4748532fd1da..f17c8f5bbf31 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -68,7 +68,6 @@ struct kvm_pic {
 };
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm);
-void kvm_pic_set_irq(void *opaque, int irq, int level);
 int kvm_pic_read_irq(struct kvm *kvm);
 void kvm_pic_update_irq(struct kvm_pic *s);
 void kvm_pic_clear_isr_ack(struct kvm *kvm);
@@ -85,13 +84,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
 
 void kvm_pic_reset(struct kvm_kpic_state *s);
 
-void kvm_set_irq(struct kvm *kvm, int irq, int level);
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
-void kvm_register_irq_ack_notifier(struct kvm *kvm,
-				   struct kvm_irq_ack_notifier *kian);
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-				     struct kvm_irq_ack_notifier *kian);
-
 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index ca6bbc0bd97c..411fb8cfb24e 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -565,6 +565,8 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
 			   u32 error_code);
 
+void kvm_pic_set_irq(void *opaque, int irq, int level);
+
 void kvm_inject_nmi(struct kvm_vcpu *vcpu);
 
 void fx_init(struct kvm_vcpu *vcpu);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b3b7598b4d94..3833c48fae3a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -309,6 +309,12 @@ struct kvm_assigned_dev_kernel {
 	struct pci_dev *dev;
 	struct kvm *kvm;
 };
+void kvm_set_irq(struct kvm *kvm, int irq, int level);
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
+void kvm_register_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian);
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				     struct kvm_irq_ack_notifier *kian);
 
 #ifdef CONFIG_DMAR
 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
new file mode 100644
index 000000000000..d0169f5e6047
--- /dev/null
+++ b/virt/kvm/irq_comm.c
@@ -0,0 +1,60 @@
+/*
+ * irq_comm.c: Common API for in kernel interrupt controller
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ * Authors:
+ *   Yaozu (Eddie) Dong <Eddie.dong@intel.com>
+ *
+ */
+
+#include <linux/kvm_host.h>
+#include "irq.h"
+
+#include "ioapic.h"
+
+/* This should be called with the kvm->lock mutex held */
+void kvm_set_irq(struct kvm *kvm, int irq, int level)
+{
+	/* Not possible to detect if the guest uses the PIC or the
+	 * IOAPIC.  So set the bit in both. The guest will ignore
+	 * writes to the unused one.
+	 */
+	kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level);
+#ifdef CONFIG_X86
+	kvm_pic_set_irq(pic_irqchip(kvm), irq, level);
+#endif
+}
+
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi)
+{
+	struct kvm_irq_ack_notifier *kian;
+	struct hlist_node *n;
+
+	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
+		if (kian->gsi == gsi)
+			kian->irq_acked(kian);
+}
+
+void kvm_register_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian)
+{
+	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
+}
+
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				     struct kvm_irq_ack_notifier *kian)
+{
+	hlist_del(&kian->link);
+}
-- 
cgit v1.2.3


From 2381ad241d0bea1253a37f314b270848067640bb Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <xiantao.zhang@intel.com>
Date: Wed, 8 Oct 2008 08:29:33 +0800
Subject: KVM: ia64: Add intel iommu support for guests.

With intel iommu hardware, we can assign devices to kvm/ia64 guests.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/ia64/kvm/Makefile   | 4 ++++
 arch/ia64/kvm/kvm-ia64.c | 9 +++++++++
 include/linux/kvm.h      | 2 +-
 3 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 3b1a1c156ef9..cf37f8f490c0 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -46,6 +46,10 @@ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
 		coalesced_mmio.o irq_comm.o)
 
+ifeq ($(CONFIG_DMAR),y)
+common-objs += $(addprefix ../../../virt/kvm/, vtd.o)
+endif
+
 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
 obj-$(CONFIG_KVM) += kvm.o
 
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 3df82f3fe547..c0699f0e35a9 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -31,6 +31,7 @@
 #include <linux/bitops.h>
 #include <linux/hrtimer.h>
 #include <linux/uaccess.h>
+#include <linux/intel-iommu.h>
 
 #include <asm/pgtable.h>
 #include <asm/gcc_intrin.h>
@@ -187,6 +188,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_COALESCED_MMIO:
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
+	case KVM_CAP_IOMMU:
+		r = intel_iommu_found();
+		break;
 	default:
 		r = 0;
 	}
@@ -773,6 +777,7 @@ static void kvm_init_vm(struct kvm *kvm)
 	 */
 	kvm_build_io_pmt(kvm);
 
+	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 }
 
 struct  kvm *kvm_arch_create_vm(void)
@@ -1336,6 +1341,10 @@ static void kvm_release_vm_pages(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_iommu_unmap_guest(kvm);
+#ifdef  KVM_CAP_DEVICE_ASSIGNMENT
+	kvm_free_all_assigned_devices(kvm);
+#endif
 	kfree(kvm->arch.vioapic);
 	kvm_release_vm_pages(kvm);
 	kvm_free_physmem(kvm);
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 9acf34a6dfbb..797fcd781242 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -383,7 +383,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_MP_STATE 14
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
-#ifdef CONFIG_X86
+#if defined(CONFIG_X86)||defined(CONFIG_IA64)
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
 #endif
 #define KVM_CAP_IOMMU 18
-- 
cgit v1.2.3


From be585c07dd577faac26014db4246e6d7c7a131e7 Mon Sep 17 00:00:00 2001
From: Jay Fenlason <fenlason@redhat.com>
Date: Wed, 1 Oct 2008 18:13:20 -0400
Subject: firewire: Add more documentation to firewire-cdev.h

Signed-off-by: Jay Fenlason <fenlason@redhat.com>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 include/linux/firewire-cdev.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index 0f0e271f97fa..4d078e99c017 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -154,8 +154,13 @@ struct fw_cdev_event_iso_interrupt {
  * @request:       Valid if @common.type == %FW_CDEV_EVENT_REQUEST
  * @iso_interrupt: Valid if @common.type == %FW_CDEV_EVENT_ISO_INTERRUPT
  *
- * Convenience union for userspace use.  Events could be read(2) into a char
- * buffer and then cast to this union for further processing.
+ * Convenience union for userspace use.  Events could be read(2) into an
+ * appropriately aligned char buffer and then cast to this union for further
+ * processing.  Note that for a request, response or iso_interrupt event,
+ * the data[] or header[] may make the size of the full event larger than
+ * sizeof(union fw_cdev_event).  Also note that if you attempt to read(2)
+ * an event into a buffer that is not large enough for it, the data that does
+ * not fit will be discarded so that the next read(2) will return a new event.
  */
 union fw_cdev_event {
 	struct fw_cdev_event_common common;
-- 
cgit v1.2.3


From 22441cfa0c70dcd457f3c081fcf285c3bd155824 Mon Sep 17 00:00:00 2001
From: Pedro Ribeiro <pribeiro@net.ipl.pt>
Date: Wed, 15 Oct 2008 15:47:49 -0700
Subject: IPV6: Fix default gateway criteria wrt. HIGH/LOW preference radv
 option

Problem observed:
               In IPv6, in the presence of multiple routers candidates to
               default gateway in one segment, each sending a different
               value of preference, the Linux hosts connected to the
               segment weren't selecting the right one in all the
               combinations possible of LOW/MEDIUM/HIGH preference.

This patch changes two files:
include/linux/icmpv6.h
               Get the "router_pref" bitfield in the right place
               (as RFC4191 says), named the bit left with this fix as
               "home_agent" (RFC3775 say that's his function)

net/ipv6/ndisc.c
               Corrects the binary logic behind the updating of the
               router preference in the flags of the routing table

Result:
               With this two fixes applied, the default route used by
               the system was to consistent with the rules mentioned
               in RFC4191 in case of changes in the value of preference
               in router advertisements

Signed-off-by: Pedro Ribeiro <pribeiro@net.ipl.pt>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/icmpv6.h | 6 ++++--
 net/ipv6/ndisc.c       | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index 03067443198a..a93a8dd33118 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -40,16 +40,18 @@ struct icmp6hdr {
                 struct icmpv6_nd_ra {
 			__u8		hop_limit;
 #if defined(__LITTLE_ENDIAN_BITFIELD)
-			__u8		reserved:4,
+			__u8		reserved:3,
 					router_pref:2,
+					home_agent:1,
 					other:1,
 					managed:1;
 
 #elif defined(__BIG_ENDIAN_BITFIELD)
 			__u8		managed:1,
 					other:1,
+					home_agent:1,
 					router_pref:2,
-					reserved:4;
+					reserved:3;
 #else
 #error	"Please fix <asm/byteorder.h>"
 #endif
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 840b15780a36..aae7ddcc8a2e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1199,7 +1199,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		}
 		neigh->flags |= NTF_ROUTER;
 	} else if (rt) {
-		rt->rt6i_flags |= (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
+		rt->rt6i_flags = (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 	}
 
 	if (rt)
-- 
cgit v1.2.3


From ae87221d3ce49d9de1e43756da834fd0bf05a2ad Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 24 Aug 2007 16:11:54 -0700
Subject: sysfs: crash debugging

Print the name of the last-accessed sysfs file when we oops, to help track
down oopses which occur in sysfs store/read handlers.  Because these oopses
tend to not leave any trace of the offending code in the stack traces.

Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/x86/kernel/dumpstack_32.c |  2 ++
 arch/x86/kernel/dumpstack_64.c |  2 ++
 fs/sysfs/file.c                | 13 +++++++++++++
 include/linux/sysfs.h          |  6 ++++++
 4 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 201ee359a1a9..1a78180f08d3 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -13,6 +13,7 @@
 #include <linux/kexec.h>
 #include <linux/bug.h>
 #include <linux/nmi.h>
+#include <linux/sysfs.h>
 
 #include <asm/stacktrace.h>
 
@@ -343,6 +344,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 	printk("DEBUG_PAGEALLOC");
 #endif
 	printk("\n");
+	sysfs_printk_last_file();
 	if (notify_die(DIE_OOPS, str, regs, err,
 			current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
 		return 1;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 086cc8118e39..96a5db7da8a7 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -13,6 +13,7 @@
 #include <linux/kexec.h>
 #include <linux/bug.h>
 #include <linux/nmi.h>
+#include <linux/sysfs.h>
 
 #include <asm/stacktrace.h>
 
@@ -489,6 +490,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 	printk("DEBUG_PAGEALLOC");
 #endif
 	printk("\n");
+	sysfs_printk_last_file();
 	if (notify_die(DIE_OOPS, str, regs, err,
 			current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
 		return 1;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index c9e4e5091da1..ce8339c70a4b 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -19,10 +19,18 @@
 #include <linux/poll.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
+#include <linux/limits.h>
 #include <asm/uaccess.h>
 
 #include "sysfs.h"
 
+/* used in crash dumps to help with debugging */
+static char last_sysfs_file[PATH_MAX];
+void sysfs_printk_last_file(void)
+{
+	printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file);
+}
+
 /*
  * There's one sysfs_buffer for each open file and one
  * sysfs_open_dirent for each sysfs_dirent with one or more open
@@ -328,6 +336,11 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
 	struct sysfs_buffer *buffer;
 	struct sysfs_ops *ops;
 	int error = -EACCES;
+	char *p;
+
+	p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
+	if (p)
+		memmove(last_sysfs_file, p, strlen(p) + 1);
 
 	/* need attr_sd for attr and ops, its parent for kobj */
 	if (!sysfs_get_active_two(attr_sd))
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 37fa24152bd8..8ec406afb3eb 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -119,6 +119,8 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
 
 void sysfs_notify(struct kobject *kobj, char *dir, char *attr);
 
+void sysfs_printk_last_file(void);
+
 extern int __must_check sysfs_init(void);
 
 #else /* CONFIG_SYSFS */
@@ -231,6 +233,10 @@ static inline int __must_check sysfs_init(void)
 	return 0;
 }
 
+static inline void sysfs_printk_last_file(void)
+{
+}
+
 #endif /* CONFIG_SYSFS */
 
 #endif /* _SYSFS_H_ */
-- 
cgit v1.2.3


From 7fb6b5d51daf3613045258ee8add07022d8c39d3 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 21 Jul 2008 20:03:34 -0700
Subject: device create: remove device_create_drvdata

Now that the tree is cleaned up, device_create_drvdata can be safely
removed.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 246937c9cbc7..60f6456691a6 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -502,7 +502,6 @@ extern struct device *device_create(struct class *cls, struct device *parent,
 				    dev_t devt, void *drvdata,
 				    const char *fmt, ...)
 				    __attribute__((format(printf, 5, 6)));
-#define device_create_drvdata	device_create
 extern void device_destroy(struct class *cls, dev_t devt);
 
 /*
-- 
cgit v1.2.3


From 346e15beb5343c2eb8216d820f2ed8f150822b08 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Tue, 12 Aug 2008 16:46:19 -0400
Subject: driver core: basic infrastructure for per-module dynamic debug
 messages

Base infrastructure to enable per-module debug messages.

I've introduced CONFIG_DYNAMIC_PRINTK_DEBUG, which when enabled centralizes
control of debugging statements on a per-module basis in one /proc file,
currently, <debugfs>/dynamic_printk/modules. When, CONFIG_DYNAMIC_PRINTK_DEBUG,
is not set, debugging statements can still be enabled as before, often by
defining 'DEBUG' for the proper compilation unit. Thus, this patch set has no
affect when CONFIG_DYNAMIC_PRINTK_DEBUG is not set.

The infrastructure currently ties into all pr_debug() and dev_dbg() calls. That
is, if CONFIG_DYNAMIC_PRINTK_DEBUG is set, all pr_debug() and dev_dbg() calls
can be dynamically enabled/disabled on a per-module basis.

Future plans include extending this functionality to subsystems, that define
their own debug levels and flags.

Usage:

Dynamic debugging is controlled by the debugfs file,
<debugfs>/dynamic_printk/modules. This file contains a list of the modules that
can be enabled. The format of the file is as follows:

	<module_name> <enabled=0/1>
		.
		.
		.

	<module_name> : Name of the module in which the debug call resides
	<enabled=0/1> : whether the messages are enabled or not

For example:

	snd_hda_intel enabled=0
	fixup enabled=1
	driver enabled=0

Enable a module:

	$echo "set enabled=1 <module_name>" > dynamic_printk/modules

Disable a module:

	$echo "set enabled=0 <module_name>" > dynamic_printk/modules

Enable all modules:

	$echo "set enabled=1 all" > dynamic_printk/modules

Disable all modules:

	$echo "set enabled=0 all" > dynamic_printk/modules

Finally, passing "dynamic_printk" at the command line enables
debugging for all modules. This mode can be turned off via the above
disable command.

[gkh: minor cleanups and tweaks to make the build work quietly]

Signed-off-by: Jason Baron <jbaron@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/kernel-parameters.txt |   5 +
 include/asm-generic/vmlinux.lds.h   |  10 +-
 include/linux/device.h              |   6 +-
 include/linux/dynamic_printk.h      |  93 ++++++++
 include/linux/kernel.h              |   7 +-
 include/linux/module.h              |   1 -
 kernel/module.c                     |  31 +++
 lib/Kconfig.debug                   |  55 +++++
 lib/Makefile                        |   2 +
 lib/dynamic_printk.c                | 418 ++++++++++++++++++++++++++++++++++++
 net/netfilter/nf_conntrack_pptp.c   |   2 +-
 scripts/Makefile.lib                |  11 +-
 scripts/basic/Makefile              |   2 +-
 scripts/basic/hash.c                |  64 ++++++
 14 files changed, 700 insertions(+), 7 deletions(-)
 create mode 100644 include/linux/dynamic_printk.h
 create mode 100644 lib/dynamic_printk.c
 create mode 100644 scripts/basic/hash.c

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 2443f5bb4364..b429c84ceef2 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1713,6 +1713,11 @@ and is between 256 and 4096 characters. It is defined in the file
 			autoconfiguration.
 			Ranges are in pairs (memory base and size).
 
+	dynamic_printk
+			Enables pr_debug()/dev_dbg() calls if
+			CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled. These can also
+			be switched on/off via <debugfs>/dynamic_printk/modules
+
 	print-fatal-signals=
 			[KNL] debug: print fatal signals
 			print-fatal-signals=1: print segfault info to
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 7440a0dceddb..74c5faf26c05 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -268,7 +268,15 @@
 	CPU_DISCARD(init.data)						\
 	CPU_DISCARD(init.rodata)					\
 	MEM_DISCARD(init.data)						\
-	MEM_DISCARD(init.rodata)
+	MEM_DISCARD(init.rodata)					\
+	/* implement dynamic printk debug */				\
+	VMLINUX_SYMBOL(__start___verbose_strings) = .;                  \
+	*(__verbose_strings)                                            \
+	VMLINUX_SYMBOL(__stop___verbose_strings) = .;                   \
+	. = ALIGN(8);							\
+	VMLINUX_SYMBOL(__start___verbose) = .;                          \
+	*(__verbose)                                                    \
+	VMLINUX_SYMBOL(__stop___verbose) = .;
 
 #define INIT_TEXT							\
 	*(.init.text)							\
diff --git a/include/linux/device.h b/include/linux/device.h
index 60f6456691a6..fb034461b395 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -550,7 +550,11 @@ extern const char *dev_driver_string(const struct device *dev);
 #define dev_info(dev, format, arg...)		\
 	dev_printk(KERN_INFO , dev , format , ## arg)
 
-#ifdef DEBUG
+#if defined(CONFIG_DYNAMIC_PRINTK_DEBUG)
+#define dev_dbg(dev, format, ...) do { \
+	dynamic_dev_dbg(dev, format, ##__VA_ARGS__); \
+	} while (0)
+#elif defined(DEBUG)
 #define dev_dbg(dev, format, arg...)		\
 	dev_printk(KERN_DEBUG , dev , format , ## arg)
 #else
diff --git a/include/linux/dynamic_printk.h b/include/linux/dynamic_printk.h
new file mode 100644
index 000000000000..2d528d009074
--- /dev/null
+++ b/include/linux/dynamic_printk.h
@@ -0,0 +1,93 @@
+#ifndef _DYNAMIC_PRINTK_H
+#define _DYNAMIC_PRINTK_H
+
+#define DYNAMIC_DEBUG_HASH_BITS 6
+#define DEBUG_HASH_TABLE_SIZE (1 << DYNAMIC_DEBUG_HASH_BITS)
+
+#define TYPE_BOOLEAN 1
+
+#define DYNAMIC_ENABLED_ALL 0
+#define DYNAMIC_ENABLED_NONE 1
+#define DYNAMIC_ENABLED_SOME 2
+
+extern int dynamic_enabled;
+
+/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which
+ * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
+ * use independent hash functions, to reduce the chance of false positives.
+ */
+extern long long dynamic_printk_enabled;
+extern long long dynamic_printk_enabled2;
+
+struct mod_debug {
+	char *modname;
+	char *logical_modname;
+	char *flag_names;
+	int type;
+	int hash;
+	int hash2;
+} __attribute__((aligned(8)));
+
+int register_dynamic_debug_module(char *mod_name, int type, char *share_name,
+					char *flags, int hash, int hash2);
+
+#if defined(CONFIG_DYNAMIC_PRINTK_DEBUG)
+extern int unregister_dynamic_debug_module(char *mod_name);
+extern int __dynamic_dbg_enabled_helper(char *modname, int type,
+					int value, int hash);
+
+#define __dynamic_dbg_enabled(module, type, value, level, hash)  ({	     \
+	int __ret = 0;							     \
+	if (unlikely((dynamic_printk_enabled & (1LL << DEBUG_HASH)) &&	     \
+			(dynamic_printk_enabled2 & (1LL << DEBUG_HASH2))))   \
+			__ret = __dynamic_dbg_enabled_helper(module, type,   \
+								value, hash);\
+	__ret; })
+
+#define dynamic_pr_debug(fmt, ...) do {					    \
+	static char mod_name[]						    \
+	__attribute__((section("__verbose_strings")))			    \
+	 = KBUILD_MODNAME;						    \
+	static struct mod_debug descriptor				    \
+	__used								    \
+	__attribute__((section("__verbose"), aligned(8))) =		    \
+	{ mod_name, mod_name, NULL, TYPE_BOOLEAN, DEBUG_HASH, DEBUG_HASH2 };\
+	if (__dynamic_dbg_enabled(KBUILD_MODNAME, TYPE_BOOLEAN,		    \
+						0, 0, DEBUG_HASH))	    \
+		printk(KERN_DEBUG KBUILD_MODNAME ":" fmt,		    \
+				##__VA_ARGS__);				    \
+	} while (0)
+
+#define dynamic_dev_dbg(dev, format, ...) do {				    \
+	static char mod_name[]						    \
+	__attribute__((section("__verbose_strings")))			    \
+	 = KBUILD_MODNAME;						    \
+	static struct mod_debug descriptor				    \
+	__used								    \
+	__attribute__((section("__verbose"), aligned(8))) =		    \
+	{ mod_name, mod_name, NULL, TYPE_BOOLEAN, DEBUG_HASH, DEBUG_HASH2 };\
+	if (__dynamic_dbg_enabled(KBUILD_MODNAME, TYPE_BOOLEAN,		    \
+						0, 0, DEBUG_HASH))	    \
+			dev_printk(KERN_DEBUG, dev,			    \
+					KBUILD_MODNAME ": " format,	    \
+					##__VA_ARGS__);			    \
+	} while (0)
+
+#else
+
+static inline int unregister_dynamic_debug_module(const char *mod_name)
+{
+	return 0;
+}
+static inline int __dynamic_dbg_enabled_helper(char *modname, int type,
+						int value, int hash)
+{
+	return 0;
+}
+
+#define __dynamic_dbg_enabled(module, type, value, level, hash)  ({ 0; })
+#define dynamic_pr_debug(fmt, ...)  do { } while (0)
+#define dynamic_dev_dbg(dev, format, ...)  do { } while (0)
+#endif
+
+#endif
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 75d81f157d2e..ededb6e83b41 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -16,6 +16,7 @@
 #include <linux/log2.h>
 #include <linux/typecheck.h>
 #include <linux/ratelimit.h>
+#include <linux/dynamic_printk.h>
 #include <asm/byteorder.h>
 #include <asm/bug.h>
 
@@ -303,8 +304,12 @@ static inline char *pack_hex_byte(char *buf, u8 byte)
 #define pr_info(fmt, arg...) \
 	printk(KERN_INFO fmt, ##arg)
 
-#ifdef DEBUG
 /* If you are writing a driver, please use dev_dbg instead */
+#if defined(CONFIG_DYNAMIC_PRINTK_DEBUG)
+#define pr_debug(fmt, ...) do { \
+	dynamic_pr_debug(fmt, ##__VA_ARGS__); \
+	} while (0)
+#elif defined(DEBUG)
 #define pr_debug(fmt, arg...) \
 	printk(KERN_DEBUG fmt, ##arg)
 #else
diff --git a/include/linux/module.h b/include/linux/module.h
index 68e09557c951..a41555cbe00a 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -345,7 +345,6 @@ struct module
 	/* Reference counts */
 	struct module_ref ref[NR_CPUS];
 #endif
-
 };
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
diff --git a/kernel/module.c b/kernel/module.c
index d5fcd24e5aff..c52700667292 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -784,6 +784,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
 	mutex_lock(&module_mutex);
 	/* Store the name of the last unloaded module for diagnostic purposes */
 	strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
+	unregister_dynamic_debug_module(mod->name);
 	free_module(mod);
 
  out:
@@ -1783,6 +1784,33 @@ static inline void add_kallsyms(struct module *mod,
 }
 #endif /* CONFIG_KALLSYMS */
 
+#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG
+static void dynamic_printk_setup(Elf_Shdr *sechdrs, unsigned int verboseindex)
+{
+	struct mod_debug *debug_info;
+	unsigned long pos, end;
+	unsigned int num_verbose;
+
+	pos = sechdrs[verboseindex].sh_addr;
+	num_verbose = sechdrs[verboseindex].sh_size /
+				sizeof(struct mod_debug);
+	end = pos + (num_verbose * sizeof(struct mod_debug));
+
+	for (; pos < end; pos += sizeof(struct mod_debug)) {
+		debug_info = (struct mod_debug *)pos;
+		register_dynamic_debug_module(debug_info->modname,
+			debug_info->type, debug_info->logical_modname,
+			debug_info->flag_names, debug_info->hash,
+			debug_info->hash2);
+	}
+}
+#else
+static inline void dynamic_printk_setup(Elf_Shdr *sechdrs,
+					unsigned int verboseindex)
+{
+}
+#endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */
+
 static void *module_alloc_update_bounds(unsigned long size)
 {
 	void *ret = module_alloc(size);
@@ -1831,6 +1859,7 @@ static noinline struct module *load_module(void __user *umod,
 #endif
 	unsigned int markersindex;
 	unsigned int markersstringsindex;
+	unsigned int verboseindex;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -2117,6 +2146,7 @@ static noinline struct module *load_module(void __user *umod,
 	markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
  	markersstringsindex = find_sec(hdr, sechdrs, secstrings,
 					"__markers_strings");
+	verboseindex = find_sec(hdr, sechdrs, secstrings, "__verbose");
 
 	/* Now do relocations. */
 	for (i = 1; i < hdr->e_shnum; i++) {
@@ -2167,6 +2197,7 @@ static noinline struct module *load_module(void __user *umod,
 		marker_update_probe_range(mod->markers,
 			mod->markers + mod->num_markers);
 #endif
+	dynamic_printk_setup(sechdrs, verboseindex);
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
 		goto cleanup;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index aa81d2848448..31d784dd80d0 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -807,6 +807,61 @@ menuconfig BUILD_DOCSRC
 
 	  Say N if you are unsure.
 
+config DYNAMIC_PRINTK_DEBUG
+	bool "Enable dynamic printk() call support"
+	default n
+	depends on PRINTK
+	select PRINTK_DEBUG
+	help
+
+	  Compiles debug level messages into the kernel, which would not
+	  otherwise be available at runtime. These messages can then be
+	  enabled/disabled on a per module basis. This mechanism implicitly
+	  enables all pr_debug() and dev_dbg() calls. The impact of this
+	  compile option is a larger kernel text size of about 2%.
+
+	  Usage:
+
+	  Dynamic debugging is controlled by the debugfs file,
+	  dynamic_printk/modules. This file contains a list of the modules that
+	  can be enabled. The format of the file is the module name, followed
+	  by a set of flags that can be enabled. The first flag is always the
+	  'enabled' flag. For example:
+
+		<module_name> <enabled=0/1>
+				.
+				.
+				.
+
+	  <module_name> : Name of the module in which the debug call resides
+	  <enabled=0/1> : whether the messages are enabled or not
+
+	  From a live system:
+
+		snd_hda_intel enabled=0
+		fixup enabled=0
+		driver enabled=0
+
+	  Enable a module:
+
+	  	$echo "set enabled=1 <module_name>" > dynamic_printk/modules
+
+	  Disable a module:
+
+	  	$echo "set enabled=0 <module_name>" > dynamic_printk/modules
+
+	  Enable all modules:
+
+		$echo "set enabled=1 all" > dynamic_printk/modules
+
+	  Disable all modules:
+
+		$echo "set enabled=0 all" > dynamic_printk/modules
+
+	  Finally, passing "dynamic_printk" at the command line enables
+	  debugging for all modules. This mode can be turned off via the above
+	  disable command.
+
 source "samples/Kconfig"
 
 source "lib/Kconfig.kgdb"
diff --git a/lib/Makefile b/lib/Makefile
index 44001af76a7d..16feaab057b2 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -81,6 +81,8 @@ obj-$(CONFIG_HAVE_LMB) += lmb.o
 
 obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
 
+obj-$(CONFIG_DYNAMIC_PRINTK_DEBUG) += dynamic_printk.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/dynamic_printk.c b/lib/dynamic_printk.c
new file mode 100644
index 000000000000..d640f87bdc9e
--- /dev/null
+++ b/lib/dynamic_printk.c
@@ -0,0 +1,418 @@
+/*
+ * lib/dynamic_printk.c
+ *
+ * make pr_debug()/dev_dbg() calls runtime configurable based upon their
+ * their source module.
+ *
+ * Copyright (C) 2008 Red Hat, Inc., Jason Baron <jbaron@redhat.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+
+extern struct mod_debug __start___verbose[];
+extern struct mod_debug __stop___verbose[];
+
+struct debug_name {
+	struct hlist_node hlist;
+	struct hlist_node hlist2;
+	int hash1;
+	int hash2;
+	char *name;
+	int enable;
+	int type;
+};
+
+static int nr_entries;
+static int num_enabled;
+int dynamic_enabled = DYNAMIC_ENABLED_NONE;
+static struct hlist_head module_table[DEBUG_HASH_TABLE_SIZE] =
+	{ [0 ... DEBUG_HASH_TABLE_SIZE-1] = HLIST_HEAD_INIT };
+static struct hlist_head module_table2[DEBUG_HASH_TABLE_SIZE] =
+	{ [0 ... DEBUG_HASH_TABLE_SIZE-1] = HLIST_HEAD_INIT };
+static DECLARE_MUTEX(debug_list_mutex);
+
+/* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which
+ * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
+ * use independent hash functions, to reduce the chance of false positives.
+ */
+long long dynamic_printk_enabled;
+EXPORT_SYMBOL_GPL(dynamic_printk_enabled);
+long long dynamic_printk_enabled2;
+EXPORT_SYMBOL_GPL(dynamic_printk_enabled2);
+
+/* returns the debug module pointer. */
+static struct debug_name *find_debug_module(char *module_name)
+{
+	int i;
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct debug_name *element;
+
+	element = NULL;
+	for (i = 0; i < DEBUG_HASH_TABLE_SIZE; i++) {
+		head = &module_table[i];
+		hlist_for_each_entry_rcu(element, node, head, hlist)
+			if (!strcmp(element->name, module_name))
+				return element;
+	}
+	return NULL;
+}
+
+/* returns the debug module pointer. */
+static struct debug_name *find_debug_module_hash(char *module_name, int hash)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct debug_name *element;
+
+	element = NULL;
+	head = &module_table[hash];
+	hlist_for_each_entry_rcu(element, node, head, hlist)
+		if (!strcmp(element->name, module_name))
+			return element;
+	return NULL;
+}
+
+/* caller must hold mutex*/
+static int __add_debug_module(char *mod_name, int hash, int hash2)
+{
+	struct debug_name *new;
+	char *module_name;
+	int ret = 0;
+
+	if (find_debug_module(mod_name)) {
+		ret = -EINVAL;
+		goto out;
+	}
+	module_name = kmalloc(strlen(mod_name) + 1, GFP_KERNEL);
+	if (!module_name) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	module_name = strcpy(module_name, mod_name);
+	module_name[strlen(mod_name)] = '\0';
+	new = kzalloc(sizeof(struct debug_name), GFP_KERNEL);
+	if (!new) {
+		kfree(module_name);
+		ret = -ENOMEM;
+		goto out;
+	}
+	INIT_HLIST_NODE(&new->hlist);
+	INIT_HLIST_NODE(&new->hlist2);
+	new->name = module_name;
+	new->hash1 = hash;
+	new->hash2 = hash2;
+	hlist_add_head_rcu(&new->hlist, &module_table[hash]);
+	hlist_add_head_rcu(&new->hlist2, &module_table2[hash2]);
+	nr_entries++;
+out:
+	return ret;
+}
+
+int unregister_dynamic_debug_module(char *mod_name)
+{
+	struct debug_name *element;
+	int ret = 0;
+
+	down(&debug_list_mutex);
+	element = find_debug_module(mod_name);
+	if (!element) {
+		ret = -EINVAL;
+		goto out;
+	}
+	hlist_del_rcu(&element->hlist);
+	hlist_del_rcu(&element->hlist2);
+	synchronize_rcu();
+	kfree(element->name);
+	if (element->enable)
+		num_enabled--;
+	kfree(element);
+	nr_entries--;
+out:
+	up(&debug_list_mutex);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(unregister_dynamic_debug_module);
+
+int register_dynamic_debug_module(char *mod_name, int type, char *share_name,
+					char *flags, int hash, int hash2)
+{
+	struct debug_name *elem;
+	int ret = 0;
+
+	down(&debug_list_mutex);
+	elem = find_debug_module(mod_name);
+	if (!elem) {
+		if (__add_debug_module(mod_name, hash, hash2))
+			goto out;
+		elem = find_debug_module(mod_name);
+		if (dynamic_enabled == DYNAMIC_ENABLED_ALL &&
+				!strcmp(mod_name, share_name)) {
+			elem->enable = true;
+			num_enabled++;
+		}
+	}
+	elem->type |= type;
+out:
+	up(&debug_list_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(register_dynamic_debug_module);
+
+int __dynamic_dbg_enabled_helper(char *mod_name, int type, int value, int hash)
+{
+	struct debug_name *elem;
+	int ret = 0;
+
+	if (dynamic_enabled == DYNAMIC_ENABLED_ALL)
+		return 1;
+	rcu_read_lock();
+	elem = find_debug_module_hash(mod_name, hash);
+	if (elem && elem->enable)
+		ret = 1;
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__dynamic_dbg_enabled_helper);
+
+static void set_all(bool enable)
+{
+	struct debug_name *e;
+	struct hlist_node *node;
+	int i;
+	long long enable_mask;
+
+	for (i = 0; i < DEBUG_HASH_TABLE_SIZE; i++) {
+		if (module_table[i].first != NULL) {
+			hlist_for_each_entry(e, node, &module_table[i], hlist) {
+				e->enable = enable;
+			}
+		}
+	}
+	if (enable)
+		enable_mask = ULLONG_MAX;
+	else
+		enable_mask = 0;
+	dynamic_printk_enabled = enable_mask;
+	dynamic_printk_enabled2 = enable_mask;
+}
+
+static int disabled_hash(int i, bool first_table)
+{
+	struct debug_name *e;
+	struct hlist_node *node;
+
+	if (first_table) {
+		hlist_for_each_entry(e, node, &module_table[i], hlist) {
+			if (e->enable)
+				return 0;
+		}
+	} else {
+		hlist_for_each_entry(e, node, &module_table2[i], hlist2) {
+			if (e->enable)
+				return 0;
+		}
+	}
+	return 1;
+}
+
+static ssize_t pr_debug_write(struct file *file, const char __user *buf,
+				size_t length, loff_t *ppos)
+{
+	char *buffer, *s, *value_str, *setting_str;
+	int err, value;
+	struct debug_name *elem = NULL;
+	int all = 0;
+
+	if (length > PAGE_SIZE || length < 0)
+		return -EINVAL;
+
+	buffer = (char *)__get_free_page(GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	err = -EFAULT;
+	if (copy_from_user(buffer, buf, length))
+		goto out;
+
+	err = -EINVAL;
+	if (length < PAGE_SIZE)
+		buffer[length] = '\0';
+	else if (buffer[PAGE_SIZE-1])
+		goto out;
+
+	err = -EINVAL;
+	down(&debug_list_mutex);
+
+	if (strncmp("set", buffer, 3))
+		goto out_up;
+	s = buffer + 3;
+	setting_str = strsep(&s, "=");
+	if (s == NULL)
+		goto out_up;
+	setting_str = strstrip(setting_str);
+	value_str = strsep(&s, " ");
+	if (s == NULL)
+		goto out_up;
+	s = strstrip(s);
+	if (!strncmp(s, "all", 3))
+		all = 1;
+	else
+		elem = find_debug_module(s);
+	if (!strncmp(setting_str, "enable", 6)) {
+		value = !!simple_strtol(value_str, NULL, 10);
+		if (all) {
+			if (value) {
+				set_all(true);
+				num_enabled = nr_entries;
+				dynamic_enabled = DYNAMIC_ENABLED_ALL;
+			} else {
+				set_all(false);
+				num_enabled = 0;
+				dynamic_enabled = DYNAMIC_ENABLED_NONE;
+			}
+			err = 0;
+		} else {
+			if (elem) {
+				if (value && (elem->enable == 0)) {
+					dynamic_printk_enabled |=
+							(1LL << elem->hash1);
+					dynamic_printk_enabled2 |=
+							(1LL << elem->hash2);
+					elem->enable = 1;
+					num_enabled++;
+					dynamic_enabled = DYNAMIC_ENABLED_SOME;
+					err = 0;
+					printk(KERN_DEBUG
+					       "debugging enabled for module %s",
+					       elem->name);
+				} else if (!value && (elem->enable == 1)) {
+					elem->enable = 0;
+					num_enabled--;
+					if (disabled_hash(elem->hash1, true))
+						dynamic_printk_enabled &=
+							~(1LL << elem->hash1);
+					if (disabled_hash(elem->hash2, false))
+						dynamic_printk_enabled2 &=
+							~(1LL << elem->hash2);
+					if (num_enabled)
+						dynamic_enabled =
+							DYNAMIC_ENABLED_SOME;
+					else
+						dynamic_enabled =
+							DYNAMIC_ENABLED_NONE;
+					err = 0;
+					printk(KERN_DEBUG
+					       "debugging disabled for module "
+					       "%s", elem->name);
+				}
+			}
+		}
+	}
+	if (!err)
+		err = length;
+out_up:
+	up(&debug_list_mutex);
+out:
+	free_page((unsigned long)buffer);
+	return err;
+}
+
+static void *pr_debug_seq_start(struct seq_file *f, loff_t *pos)
+{
+	return (*pos < DEBUG_HASH_TABLE_SIZE) ? pos : NULL;
+}
+
+static void *pr_debug_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	(*pos)++;
+	if (*pos >= DEBUG_HASH_TABLE_SIZE)
+		return NULL;
+	return pos;
+}
+
+static void pr_debug_seq_stop(struct seq_file *s, void *v)
+{
+	/* Nothing to do */
+}
+
+static int pr_debug_seq_show(struct seq_file *s, void *v)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct debug_name *elem;
+	unsigned int i = *(loff_t *) v;
+
+	rcu_read_lock();
+	head = &module_table[i];
+	hlist_for_each_entry_rcu(elem, node, head, hlist) {
+		seq_printf(s, "%s enabled=%d", elem->name, elem->enable);
+		seq_printf(s, "\n");
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
+static struct seq_operations pr_debug_seq_ops = {
+	.start = pr_debug_seq_start,
+	.next  = pr_debug_seq_next,
+	.stop  = pr_debug_seq_stop,
+	.show  = pr_debug_seq_show
+};
+
+static int pr_debug_open(struct inode *inode, struct file *filp)
+{
+	return seq_open(filp, &pr_debug_seq_ops);
+}
+
+static const struct file_operations pr_debug_operations = {
+	.open		= pr_debug_open,
+	.read		= seq_read,
+	.write		= pr_debug_write,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int __init dynamic_printk_init(void)
+{
+	struct dentry *dir, *file;
+	struct mod_debug *iter;
+	unsigned long value;
+
+	dir = debugfs_create_dir("dynamic_printk", NULL);
+	if (!dir)
+		return -ENOMEM;
+	file = debugfs_create_file("modules", 0644, dir, NULL,
+					&pr_debug_operations);
+	if (!file) {
+		debugfs_remove(dir);
+		return -ENOMEM;
+	}
+	for (value = (unsigned long)__start___verbose;
+		value < (unsigned long)__stop___verbose;
+		value += sizeof(struct mod_debug)) {
+			iter = (struct mod_debug *)value;
+			register_dynamic_debug_module(iter->modname,
+				iter->type,
+				iter->logical_modname,
+				iter->flag_names, iter->hash, iter->hash2);
+	}
+	return 0;
+}
+module_init(dynamic_printk_init);
+/* may want to move this earlier so we can get traces as early as possible */
+
+static int __init dynamic_printk_setup(char *str)
+{
+	if (str)
+		return -ENOENT;
+	set_all(true);
+	return 0;
+}
+/* Use early_param(), so we can get debug output as early as possible */
+early_param("dynamic_printk", dynamic_printk_setup);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 373e51e91ce5..1bc3001d1827 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -65,7 +65,7 @@ void
 			     struct nf_conntrack_expect *exp) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn);
 
-#ifdef DEBUG
+#if defined(DEBUG) || defined(CONFIG_DYNAMIC_PRINTK_DEBUG)
 /* PptpControlMessageType names */
 const char *const pptp_msg_name[] = {
 	"UNKNOWN_MESSAGE",
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index ea48b82a3707..b4ca38a21158 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -96,6 +96,14 @@ basename_flags = -D"KBUILD_BASENAME=KBUILD_STR($(call name-fix,$(basetarget)))"
 modname_flags  = $(if $(filter 1,$(words $(modname))),\
                  -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))")
 
+#hash values
+ifdef CONFIG_DYNAMIC_PRINTK_DEBUG
+debug_flags = -D"DEBUG_HASH=$(shell ./scripts/basic/hash djb2 $(@D)$(modname))"\
+              -D"DEBUG_HASH2=$(shell ./scripts/basic/hash r5 $(@D)$(modname))"
+else
+debug_flags =
+endif
+
 orig_c_flags   = $(KBUILD_CFLAGS) $(ccflags-y) $(CFLAGS_$(basetarget).o)
 _c_flags       = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(orig_c_flags))
 _a_flags       = $(KBUILD_AFLAGS) $(asflags-y) $(AFLAGS_$(basetarget).o)
@@ -121,7 +129,8 @@ endif
 
 c_flags        = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(KBUILD_CPPFLAGS) \
 		 $(__c_flags) $(modkern_cflags) \
-		 -D"KBUILD_STR(s)=\#s" $(basename_flags) $(modname_flags)
+		 -D"KBUILD_STR(s)=\#s" $(basename_flags) $(modname_flags) \
+		  $(debug_flags)
 
 a_flags        = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(KBUILD_CPPFLAGS) \
 		 $(__a_flags) $(modkern_aflags)
diff --git a/scripts/basic/Makefile b/scripts/basic/Makefile
index 4c324a1f1e0e..09559951df12 100644
--- a/scripts/basic/Makefile
+++ b/scripts/basic/Makefile
@@ -9,7 +9,7 @@
 # fixdep: 	 Used to generate dependency information during build process
 # docproc:	 Used in Documentation/DocBook
 
-hostprogs-y	:= fixdep docproc
+hostprogs-y	:= fixdep docproc hash
 always		:= $(hostprogs-y)
 
 # fixdep is needed to compile other host programs
diff --git a/scripts/basic/hash.c b/scripts/basic/hash.c
new file mode 100644
index 000000000000..3299ad7fc8c0
--- /dev/null
+++ b/scripts/basic/hash.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2008 Red Hat, Inc., Jason Baron <jbaron@redhat.com>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define DYNAMIC_DEBUG_HASH_BITS 6
+
+static const char *program;
+
+static void usage(void)
+{
+	printf("Usage: %s <djb2|r5> <modname>\n", program);
+	exit(1);
+}
+
+/* djb2 hashing algorithm by Dan Bernstein. From:
+ * http://www.cse.yorku.ca/~oz/hash.html
+ */
+
+unsigned int djb2_hash(char *str)
+{
+	unsigned long hash = 5381;
+	int c;
+
+	c = *str;
+	while (c) {
+		hash = ((hash << 5) + hash) + c;
+		c = *++str;
+	}
+	return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1));
+}
+
+unsigned int r5_hash(char *str)
+{
+	unsigned long hash = 0;
+	int c;
+
+	c = *str;
+	while (c) {
+		hash = (hash + (c << 4) + (c >> 4)) * 11;
+		c = *++str;
+	}
+	return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1));
+}
+
+int main(int argc, char *argv[])
+{
+	program = argv[0];
+
+	if (argc != 3)
+		usage();
+	if (!strcmp(argv[1], "djb2"))
+		printf("%d\n", djb2_hash(argv[2]));
+	else if (!strcmp(argv[1], "r5"))
+		printf("%d\n", r5_hash(argv[2]));
+	else
+		usage();
+	exit(0);
+}
+
-- 
cgit v1.2.3


From f1282c844e86db5a041afa41335b5f9eea6cec0c Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@suse.de>
Date: Wed, 16 Jul 2008 08:58:04 +1000
Subject: sysfs: Support sysfs_notify from atomic context with new
 sysfs_notify_dirent

Support sysfs_notify from atomic context with new sysfs_notify_dirent

sysfs_notify currently takes sysfs_mutex.
This means that it cannot be called in atomic context.
sysfs_mutex  is sometimes held over a malloc (sysfs_rename_dir)
so it can block on low memory.

In md I want to be able to notify on a sysfs attribute from
atomic context, and I don't want to block on low memory because I
could be in the writeout path for freeing memory.

So:
 - export the "sysfs_dirent" structure along with sysfs_get, sysfs_put
   and sysfs_get_dirent so I can get the sysfs_dirent that I want to
   notify on and hold it in an md structure.
 - split sysfs_notify_dirent out of sysfs_notify so the sysfs_dirent
   can be notified on with no blocking (just a spinlock).

Signed-off-by: Neil Brown <neilb@suse.de>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/dir.c        |  1 +
 fs/sysfs/file.c       | 31 ++++++++++++++++++-------------
 fs/sysfs/mount.c      | 15 +++++++++++++++
 fs/sysfs/sysfs.h      |  6 ++++--
 include/linux/sysfs.h | 27 ++++++++++++++++++++++++---
 5 files changed, 62 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index aedaeba82ae5..53bc7fc31af3 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -636,6 +636,7 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
 
 	return sd;
 }
+EXPORT_SYMBOL_GPL(sysfs_get_dirent);
 
 static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
 		      const char *name, struct sysfs_dirent **p_sd)
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index ce8339c70a4b..d0d79e6b6d11 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -453,6 +453,22 @@ static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
 	return POLLERR|POLLPRI;
 }
 
+void sysfs_notify_dirent(struct sysfs_dirent *sd)
+{
+	struct sysfs_open_dirent *od;
+
+	spin_lock(&sysfs_open_dirent_lock);
+
+	od = sd->s_attr.open;
+	if (od) {
+		atomic_inc(&od->event);
+		wake_up_interruptible(&od->poll);
+	}
+
+	spin_unlock(&sysfs_open_dirent_lock);
+}
+EXPORT_SYMBOL_GPL(sysfs_notify_dirent);
+
 void sysfs_notify(struct kobject *k, char *dir, char *attr)
 {
 	struct sysfs_dirent *sd = k->sd;
@@ -463,19 +479,8 @@ void sysfs_notify(struct kobject *k, char *dir, char *attr)
 		sd = sysfs_find_dirent(sd, dir);
 	if (sd && attr)
 		sd = sysfs_find_dirent(sd, attr);
-	if (sd) {
-		struct sysfs_open_dirent *od;
-
-		spin_lock(&sysfs_open_dirent_lock);
-
-		od = sd->s_attr.open;
-		if (od) {
-			atomic_inc(&od->event);
-			wake_up_interruptible(&od->poll);
-		}
-
-		spin_unlock(&sysfs_open_dirent_lock);
-	}
+	if (sd)
+		sysfs_notify_dirent(sd);
 
 	mutex_unlock(&sysfs_mutex);
 }
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 14f0023984d7..ab343e371d64 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -16,6 +16,7 @@
 #include <linux/mount.h>
 #include <linux/pagemap.h>
 #include <linux/init.h>
+#include <linux/module.h>
 
 #include "sysfs.h"
 
@@ -115,3 +116,17 @@ out_err:
 	sysfs_dir_cachep = NULL;
 	goto out;
 }
+
+#undef sysfs_get
+struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd)
+{
+	return __sysfs_get(sd);
+}
+EXPORT_SYMBOL_GPL(sysfs_get);
+
+#undef sysfs_put
+void sysfs_put(struct sysfs_dirent *sd)
+{
+	__sysfs_put(sd);
+}
+EXPORT_SYMBOL_GPL(sysfs_put);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index a5db496f71c7..93c6d6b27c4d 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -124,7 +124,7 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name,
 			struct sysfs_dirent **p_sd);
 void sysfs_remove_subdir(struct sysfs_dirent *sd);
 
-static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd)
+static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd)
 {
 	if (sd) {
 		WARN_ON(!atomic_read(&sd->s_count));
@@ -132,12 +132,14 @@ static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd)
 	}
 	return sd;
 }
+#define sysfs_get(sd) __sysfs_get(sd)
 
-static inline void sysfs_put(struct sysfs_dirent *sd)
+static inline void __sysfs_put(struct sysfs_dirent *sd)
 {
 	if (sd && atomic_dec_and_test(&sd->s_count))
 		release_sysfs_dirent(sd);
 }
+#define sysfs_put(sd) __sysfs_put(sd)
 
 /*
  * inode.c
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 8ec406afb3eb..d8e0230f1e68 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -78,6 +78,8 @@ struct sysfs_ops {
 	ssize_t	(*store)(struct kobject *,struct attribute *,const char *, size_t);
 };
 
+struct sysfs_dirent;
+
 #ifdef CONFIG_SYSFS
 
 int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
@@ -118,10 +120,13 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
 			const struct attribute *attr, const char *group);
 
 void sysfs_notify(struct kobject *kobj, char *dir, char *attr);
-
+void sysfs_notify_dirent(struct sysfs_dirent *sd);
+struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
+				      const unsigned char *name);
+struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd);
+void sysfs_put(struct sysfs_dirent *sd);
 void sysfs_printk_last_file(void);
-
-extern int __must_check sysfs_init(void);
+int __must_check sysfs_init(void);
 
 #else /* CONFIG_SYSFS */
 
@@ -227,6 +232,22 @@ static inline void sysfs_remove_file_from_group(struct kobject *kobj,
 static inline void sysfs_notify(struct kobject *kobj, char *dir, char *attr)
 {
 }
+static inline void sysfs_notify_dirent(struct sysfs_dirent *sd)
+{
+}
+static inline
+struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
+				      const unsigned char *name)
+{
+	return NULL;
+}
+static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd)
+{
+	return NULL;
+}
+static inline void sysfs_put(struct sysfs_dirent *sd)
+{
+}
 
 static inline int __must_check sysfs_init(void)
 {
-- 
cgit v1.2.3


From e61396627f91abb855ddd8925be9172fb5871944 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 20 Sep 2008 19:08:39 -0700
Subject: debug: Introduce a dev_WARN() function

in the line of dev_printk(), this patch introduces a dev_WARN() function,
that takes a struct device and then a printk format/args set of arguments.
Unlike dev_printk(), the effect is that of WARN() in that a full warning
message (including filename/line, module list, versions and a backtrace)
is printed in addition to the device name and the arguments.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index fb034461b395..ec90e79f6a00 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -570,6 +570,14 @@ extern const char *dev_driver_string(const struct device *dev);
 	({ if (0) dev_printk(KERN_DEBUG, dev, format, ##arg); 0; })
 #endif
 
+/*
+ * dev_WARN() acts like dev_printk(), but with the key difference
+ * of using a WARN/WARN_ON to get the message out, including the
+ * file/line information and a backtrace.
+ */
+#define dev_WARN(dev, format, arg...) \
+	WARN(1, "Device: %s\n" format, dev_driver_string(dev), ## arg);
+
 /* Create alias, so I can be autoloaded. */
 #define MODULE_ALIAS_CHARDEV(major,minor) \
 	MODULE_ALIAS("char-major-" __stringify(major) "-" __stringify(minor))
-- 
cgit v1.2.3


From d8bf254089a6c31d7d01a4d1d2f1861662900855 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Mon, 22 Sep 2008 14:41:40 -0700
Subject: platform: add new device registration helper

Add a helper that registers simple platform_device w/o resources but with
parent and device data.

This is usefull to cleanup platform code from code that registers such
simple devices as leds-gpio, generic-bl, etc.

Signed-off-by: Dmitry Baryshkov <dbaryshkov@gmail.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/platform.c         | 47 +++++++++++++++++++++++++++++++++++++++++
 include/linux/platform_device.h |  2 ++
 2 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index e621dade7eaa..9e60f7c739c6 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -391,6 +391,53 @@ error:
 }
 EXPORT_SYMBOL_GPL(platform_device_register_simple);
 
+/**
+ * platform_device_register_data
+ * @parent: parent device for the device we're adding
+ * @name: base name of the device we're adding
+ * @id: instance id
+ * @data: platform specific data for this platform device
+ * @size: size of platform specific data
+ *
+ * This function creates a simple platform device that requires minimal
+ * resource and memory management. Canned release function freeing memory
+ * allocated for the device allows drivers using such devices to be
+ * unloaded without waiting for the last reference to the device to be
+ * dropped.
+ */
+struct platform_device *platform_device_register_data(
+		struct device *parent,
+		const char *name, int id,
+		const void *data, size_t size)
+{
+	struct platform_device *pdev;
+	int retval;
+
+	pdev = platform_device_alloc(name, id);
+	if (!pdev) {
+		retval = -ENOMEM;
+		goto error;
+	}
+
+	pdev->dev.parent = parent;
+
+	if (size) {
+		retval = platform_device_add_data(pdev, data, size);
+		if (retval)
+			goto error;
+	}
+
+	retval = platform_device_add(pdev);
+	if (retval)
+		goto error;
+
+	return pdev;
+
+error:
+	platform_device_put(pdev);
+	return ERR_PTR(retval);
+}
+
 static int platform_drv_probe(struct device *_dev)
 {
 	struct platform_driver *drv = to_platform_driver(_dev->driver);
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 95ac21ab3a09..4b8cc6a32479 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -37,6 +37,8 @@ extern int platform_add_devices(struct platform_device **, int);
 
 extern struct platform_device *platform_device_register_simple(const char *, int id,
 					struct resource *, unsigned int);
+extern struct platform_device *platform_device_register_data(struct device *,
+		const char *, int, const void *, size_t);
 
 extern struct platform_device *platform_device_alloc(const char *name, int id);
 extern int platform_device_add_resources(struct platform_device *pdev, struct resource *res, unsigned int num);
-- 
cgit v1.2.3


From 8c0e3998f5b71e68fe6b6e489a92e052715e563c Mon Sep 17 00:00:00 2001
From: Trent Piepho <tpiepho@freescale.com>
Date: Thu, 25 Sep 2008 16:45:13 -0700
Subject: sysfs: Make dir and name args to sysfs_notify() const

Because they can be, and because code like this produces a warning if
they're not:

struct device_attribute dev_attr;

sysfs_notify(&kobj, NULL, dev_attr.attr.name);

Signed-off-by: Trent Piepho <tpiepho@freescale.com>
CC: Neil Brown <neilb@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/file.c       | 2 +-
 include/linux/sysfs.h | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index d0d79e6b6d11..1f4a3f877262 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -469,7 +469,7 @@ void sysfs_notify_dirent(struct sysfs_dirent *sd)
 }
 EXPORT_SYMBOL_GPL(sysfs_notify_dirent);
 
-void sysfs_notify(struct kobject *k, char *dir, char *attr)
+void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
 {
 	struct sysfs_dirent *sd = k->sd;
 
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index d8e0230f1e68..b330e289d71f 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -119,7 +119,7 @@ int sysfs_add_file_to_group(struct kobject *kobj,
 void sysfs_remove_file_from_group(struct kobject *kobj,
 			const struct attribute *attr, const char *group);
 
-void sysfs_notify(struct kobject *kobj, char *dir, char *attr);
+void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr);
 void sysfs_notify_dirent(struct sysfs_dirent *sd);
 struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
 				      const unsigned char *name);
@@ -229,7 +229,8 @@ static inline void sysfs_remove_file_from_group(struct kobject *kobj,
 {
 }
 
-static inline void sysfs_notify(struct kobject *kobj, char *dir, char *attr)
+static inline void sysfs_notify(struct kobject *kobj, const char *dir,
+				const char *attr)
 {
 }
 static inline void sysfs_notify_dirent(struct sysfs_dirent *sd)
-- 
cgit v1.2.3


From 030c1d2bfcc2187650fb975456ca0b61a5bb77f4 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 8 May 2008 14:41:00 -0700
Subject: kobject: Fix kobject_rename and !CONFIG_SYSFS

When looking at kobject_rename I found two bugs with
that exist when sysfs support is disabled in the kernel.

kobject_rename does not change the name on the kobject when
sysfs support is not compiled in.

kobject_rename without locking attempts to check the
validity of a rename operation, which the kobject layer
simply does not have the infrastructure to do.

This patch documents the previously unstated requirement of
kobject_rename that is the responsibility of the caller to
provide mutual exclusion and to be certain that the new_name
for the kobject is valid.

This patch modifies sysfs_rename_dir in !CONFIG_SYSFS case
to call kobject_set_name to actually change the kobject_name.

This patch removes the bogus and misleading check in kobject_rename
that attempts to see if a rename is valid.  The check is bogus
because we do not have the proper locking.  The check is misleading
because it looks like we can and do perform checking at the kobject
level that we don't.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/kobject.txt |  4 ++++
 drivers/base/core.c       |  5 +++++
 include/linux/sysfs.h     |  4 +++-
 lib/kobject.c             | 18 +++++-------------
 4 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index 51a8021ee532..f5d2aad65a67 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -118,6 +118,10 @@ the name of the kobject, call kobject_rename():
 
     int kobject_rename(struct kobject *kobj, const char *new_name);
 
+Note kobject_rename does perform any locking or have a solid notion of
+what names are valid so the provide must provide their own sanity checking
+and serialization.
+
 There is a function called kobject_set_name() but that is legacy cruft and
 is being removed.  If your code needs to call this function, it is
 incorrect and needs to be fixed.
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 9649d1c422a4..8c2cc2648f5a 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1327,6 +1327,11 @@ EXPORT_SYMBOL_GPL(device_destroy);
  * device_rename - renames a device
  * @dev: the pointer to the struct device to be renamed
  * @new_name: the new name of the device
+ *
+ * It is the responsibility of the caller to provide mutual
+ * exclusion between two different calls of device_rename
+ * on the same device to ensure that new_name is valid and
+ * won't conflict with other devices.
  */
 int device_rename(struct device *dev, char *new_name)
 {
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index b330e289d71f..39924a962207 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -20,6 +20,8 @@
 struct kobject;
 struct module;
 
+extern int kobject_set_name(struct kobject *kobj, const char *name, ...)
+			    __attribute__((format(printf, 2, 3)));
 /* FIXME
  * The *owner field is no longer used, but leave around
  * until the tree gets cleaned up fully.
@@ -147,7 +149,7 @@ static inline void sysfs_remove_dir(struct kobject *kobj)
 
 static inline int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
 {
-	return 0;
+	return kobject_set_name(kobj, "%s", new_name);
 }
 
 static inline int sysfs_move_dir(struct kobject *kobj,
diff --git a/lib/kobject.c b/lib/kobject.c
index fbf0ae282376..ae6bb900bfb6 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -387,6 +387,11 @@ EXPORT_SYMBOL_GPL(kobject_init_and_add);
  * kobject_rename - change the name of an object
  * @kobj: object in question.
  * @new_name: object's new name
+ *
+ * It is the responsibility of the caller to provide mutual
+ * exclusion between two different calls of kobject_rename
+ * on the same kobject and to ensure that new_name is valid and
+ * won't conflict with other kobjects.
  */
 int kobject_rename(struct kobject *kobj, const char *new_name)
 {
@@ -401,19 +406,6 @@ int kobject_rename(struct kobject *kobj, const char *new_name)
 	if (!kobj->parent)
 		return -EINVAL;
 
-	/* see if this name is already in use */
-	if (kobj->kset) {
-		struct kobject *temp_kobj;
-		temp_kobj = kset_find_obj(kobj->kset, new_name);
-		if (temp_kobj) {
-			printk(KERN_WARNING "kobject '%s' cannot be renamed "
-			       "to '%s' as '%s' is already in existence.\n",
-			       kobject_name(kobj), new_name, new_name);
-			kobject_put(temp_kobj);
-			return -EINVAL;
-		}
-	}
-
 	devpath = kobject_get_path(kobj, GFP_KERNEL);
 	if (!devpath) {
 		error = -ENOMEM;
-- 
cgit v1.2.3


From 0b4a4fea253e1296222603ccc55430ed7cd9413a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 3 Jul 2008 18:05:28 -0700
Subject: kobject: Cleanup kobject_rename and !CONFIG_SYSFS

It finally dawned on me what the clean fix to sysfs_rename_dir
calling kobject_set_name is.  Move the work into kobject_rename
where it belongs.  The callers serialize us anyway so this is
safe.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/dir.c        |  6 +-----
 include/linux/sysfs.h |  4 +---
 lib/kobject.c         | 17 +++++++++++++++--
 3 files changed, 17 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index c18342641cec..3a05a596e3b4 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -829,16 +829,12 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
 	if (!new_dentry)
 		goto out_unlock;
 
-	/* rename kobject and sysfs_dirent */
+	/* rename sysfs_dirent */
 	error = -ENOMEM;
 	new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
 	if (!new_name)
 		goto out_unlock;
 
-	error = kobject_set_name(kobj, "%s", new_name);
-	if (error)
-		goto out_unlock;
-
 	dup_name = sd->s_name;
 	sd->s_name = new_name;
 
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 39924a962207..b330e289d71f 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -20,8 +20,6 @@
 struct kobject;
 struct module;
 
-extern int kobject_set_name(struct kobject *kobj, const char *name, ...)
-			    __attribute__((format(printf, 2, 3)));
 /* FIXME
  * The *owner field is no longer used, but leave around
  * until the tree gets cleaned up fully.
@@ -149,7 +147,7 @@ static inline void sysfs_remove_dir(struct kobject *kobj)
 
 static inline int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
 {
-	return kobject_set_name(kobj, "%s", new_name);
+	return 0;
 }
 
 static inline int sysfs_move_dir(struct kobject *kobj,
diff --git a/lib/kobject.c b/lib/kobject.c
index ae6bb900bfb6..0487d1f64806 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -397,6 +397,7 @@ int kobject_rename(struct kobject *kobj, const char *new_name)
 {
 	int error = 0;
 	const char *devpath = NULL;
+	const char *dup_name = NULL, *name;
 	char *devpath_string = NULL;
 	char *envp[2];
 
@@ -420,15 +421,27 @@ int kobject_rename(struct kobject *kobj, const char *new_name)
 	envp[0] = devpath_string;
 	envp[1] = NULL;
 
+	name = dup_name = kstrdup(new_name, GFP_KERNEL);
+	if (!name) {
+		error = -ENOMEM;
+		goto out;
+	}
+
 	error = sysfs_rename_dir(kobj, new_name);
+	if (error)
+		goto out;
+
+	/* Install the new kobject name */
+	dup_name = kobj->name;
+	kobj->name = name;
 
 	/* This function is mostly/only used for network interface.
 	 * Some hotplug package track interfaces by their name and
 	 * therefore want to know when the name is changed by the user. */
-	if (!error)
-		kobject_uevent_env(kobj, KOBJ_MOVE, envp);
+	kobject_uevent_env(kobj, KOBJ_MOVE, envp);
 
 out:
+	kfree(dup_name);
 	kfree(devpath_string);
 	kfree(devpath);
 	kobject_put(kobj);
-- 
cgit v1.2.3


From 99178b036c97293a65004ff5ec5cff9f833aaecd Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Tue, 26 Aug 2008 11:00:57 -0500
Subject: Driver core: add bus_sort_breadthfirst() function

The PCI core wants to reorder the devices in the bus list.  So move this
functionality out of the pci core and into the driver core so that
anyone else can also do this if needed.  This also lets us change how
struct device is attached to drivers in the future without messing with
the PCI core.

Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/bus.c     | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/probe.c    | 50 +++++---------------------------------------------
 include/linux/device.h |  3 +++
 3 files changed, 58 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 39b9b58c6974..5aee1c0169ea 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -980,6 +980,56 @@ struct klist *bus_get_device_klist(struct bus_type *bus)
 }
 EXPORT_SYMBOL_GPL(bus_get_device_klist);
 
+/*
+ * Yes, this forcably breaks the klist abstraction temporarily.  It
+ * just wants to sort the klist, not change reference counts and
+ * take/drop locks rapidly in the process.  It does all this while
+ * holding the lock for the list, so objects can't otherwise be
+ * added/removed while we're swizzling.
+ */
+static void device_insertion_sort_klist(struct device *a, struct list_head *list,
+					int (*compare)(const struct device *a,
+							const struct device *b))
+{
+	struct list_head *pos;
+	struct klist_node *n;
+	struct device *b;
+
+	list_for_each(pos, list) {
+		n = container_of(pos, struct klist_node, n_node);
+		b = container_of(n, struct device, knode_bus);
+		if (compare(a, b) <= 0) {
+			list_move_tail(&a->knode_bus.n_node,
+				       &b->knode_bus.n_node);
+			return;
+		}
+	}
+	list_move_tail(&a->knode_bus.n_node, list);
+}
+
+void bus_sort_breadthfirst(struct bus_type *bus,
+			   int (*compare)(const struct device *a,
+					  const struct device *b))
+{
+	LIST_HEAD(sorted_devices);
+	struct list_head *pos, *tmp;
+	struct klist_node *n;
+	struct device *dev;
+	struct klist *device_klist;
+
+	device_klist = bus_get_device_klist(bus);
+
+	spin_lock(&device_klist->k_lock);
+	list_for_each_safe(pos, tmp, &device_klist->k_list) {
+		n = container_of(pos, struct klist_node, n_node);
+		dev = container_of(n, struct device, knode_bus);
+		device_insertion_sort_klist(dev, &sorted_devices, compare);
+	}
+	list_splice(&sorted_devices, &device_klist->k_list);
+	spin_unlock(&device_klist->k_lock);
+}
+EXPORT_SYMBOL_GPL(bus_sort_breadthfirst);
+
 int __init buses_init(void)
 {
 	bus_kset = kset_create_and_add("bus", &bus_uevent_ops, NULL);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 36698e57b97f..dd9161a054e1 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1237,8 +1237,11 @@ EXPORT_SYMBOL(pci_scan_bridge);
 EXPORT_SYMBOL_GPL(pci_scan_child_bus);
 #endif
 
-static int __init pci_sort_bf_cmp(const struct pci_dev *a, const struct pci_dev *b)
+static int __init pci_sort_bf_cmp(const struct device *d_a, const struct device *d_b)
 {
+	const struct pci_dev *a = to_pci_dev(d_a);
+	const struct pci_dev *b = to_pci_dev(d_b);
+
 	if      (pci_domain_nr(a->bus) < pci_domain_nr(b->bus)) return -1;
 	else if (pci_domain_nr(a->bus) > pci_domain_nr(b->bus)) return  1;
 
@@ -1251,50 +1254,7 @@ static int __init pci_sort_bf_cmp(const struct pci_dev *a, const struct pci_dev
 	return 0;
 }
 
-/*
- * Yes, this forcably breaks the klist abstraction temporarily.  It
- * just wants to sort the klist, not change reference counts and
- * take/drop locks rapidly in the process.  It does all this while
- * holding the lock for the list, so objects can't otherwise be
- * added/removed while we're swizzling.
- */
-static void __init pci_insertion_sort_klist(struct pci_dev *a, struct list_head *list)
-{
-	struct list_head *pos;
-	struct klist_node *n;
-	struct device *dev;
-	struct pci_dev *b;
-
-	list_for_each(pos, list) {
-		n = container_of(pos, struct klist_node, n_node);
-		dev = container_of(n, struct device, knode_bus);
-		b = to_pci_dev(dev);
-		if (pci_sort_bf_cmp(a, b) <= 0) {
-			list_move_tail(&a->dev.knode_bus.n_node, &b->dev.knode_bus.n_node);
-			return;
-		}
-	}
-	list_move_tail(&a->dev.knode_bus.n_node, list);
-}
-
 void __init pci_sort_breadthfirst(void)
 {
-	LIST_HEAD(sorted_devices);
-	struct list_head *pos, *tmp;
-	struct klist_node *n;
-	struct device *dev;
-	struct pci_dev *pdev;
-	struct klist *device_klist;
-
-	device_klist = bus_get_device_klist(&pci_bus_type);
-
-	spin_lock(&device_klist->k_lock);
-	list_for_each_safe(pos, tmp, &device_klist->k_list) {
-		n = container_of(pos, struct klist_node, n_node);
-		dev = container_of(n, struct device, knode_bus);
-		pdev = to_pci_dev(dev);
-		pci_insertion_sort_klist(pdev, &sorted_devices);
-	}
-	list_splice(&sorted_devices, &device_klist->k_list);
-	spin_unlock(&device_klist->k_lock);
+	bus_sort_breadthfirst(&pci_bus_type, &pci_sort_bf_cmp);
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index ec90e79f6a00..987f5912720a 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -90,6 +90,9 @@ int __must_check bus_for_each_drv(struct bus_type *bus,
 				  struct device_driver *start, void *data,
 				  int (*fn)(struct device_driver *, void *));
 
+void bus_sort_breadthfirst(struct bus_type *bus,
+			   int (*compare)(const struct device *a,
+					  const struct device *b));
 /*
  * Bus notifiers: Get notified of addition/removal of devices
  * and binding/unbinding of drivers to devices.
-- 
cgit v1.2.3


From 1648993fb05c487947c1cec6307aca29d8002abe Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 15 Oct 2008 22:01:03 -0700
Subject: introduce generic header file for the software IO/TLB

A series of patches introduce a generic header file for the software
IO/TLB implementation in lib/swiotlb.c.  Currently each architecture using
this code defines the prototypes itself.  The prototypes are moved to
include/linux/swiotlb.h and this file is included in architecture specific
code for X86 and IA64.

This patch:

Create include/linux/swiotlb.h file which contains all function prototypes
for the lib/swiotlb.c file.

(akpm: the dependent patches will be trickled through arch trees)

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swiotlb.h | 83 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 include/linux/swiotlb.h

(limited to 'include/linux')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
new file mode 100644
index 000000000000..b18ec5533e8c
--- /dev/null
+++ b/include/linux/swiotlb.h
@@ -0,0 +1,83 @@
+#ifndef __LINUX_SWIOTLB_H
+#define __LINUX_SWIOTLB_H
+
+#include <linux/types.h>
+
+struct device;
+struct dma_attrs;
+struct scatterlist;
+
+extern void
+swiotlb_init(void);
+
+extern void
+*swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+			dma_addr_t *dma_handle, gfp_t flags);
+
+extern void
+swiotlb_free_coherent(struct device *hwdev, size_t size,
+		      void *vaddr, dma_addr_t dma_handle);
+
+extern dma_addr_t
+swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir);
+
+extern void
+swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
+		     size_t size, int dir);
+
+extern dma_addr_t
+swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
+			 int dir, struct dma_attrs *attrs);
+
+extern void
+swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
+			   size_t size, int dir, struct dma_attrs *attrs);
+
+extern int
+swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+	       int direction);
+
+extern void
+swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+		 int direction);
+
+extern int
+swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
+		     int dir, struct dma_attrs *attrs);
+
+extern void
+swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
+		       int nelems, int dir, struct dma_attrs *attrs);
+
+extern void
+swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
+			    size_t size, int dir);
+
+extern void
+swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
+			int nelems, int dir);
+
+extern void
+swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
+			       size_t size, int dir);
+
+extern void
+swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
+			   int nelems, int dir);
+
+extern void
+swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
+				  unsigned long offset, size_t size, int dir);
+
+extern void
+swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
+				     unsigned long offset, size_t size,
+				     int dir);
+
+extern int
+swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
+
+extern int
+swiotlb_dma_supported(struct device *hwdev, u64 mask);
+
+#endif /* __LINUX_SWIOTLB_H */
-- 
cgit v1.2.3


From 9363b9f23c9cc36cc8ef6c05fdf879ee4a96ae92 Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Wed, 15 Oct 2008 22:01:05 -0700
Subject: memrlimit: cgroup mm owner callback changes to add task info

This patch adds an additional field to the mm_owner callbacks. This field
is required to get to the mm that changed. Hold mmap_sem in write mode
before calling the mm_owner_changed callback

[hugh@veritas.com: fix mmap_sem deadlock]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Sudhir Kumar <skumar@linux.vnet.ibm.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 3 ++-
 kernel/cgroup.c        | 4 +++-
 kernel/exit.c          | 9 ++++-----
 3 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c98dd7cb7076..30934e4bfaab 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -326,7 +326,8 @@ struct cgroup_subsys {
 	 */
 	void (*mm_owner_changed)(struct cgroup_subsys *ss,
 					struct cgroup *old,
-					struct cgroup *new);
+					struct cgroup *new,
+					struct task_struct *p);
 	int subsys_id;
 	int active;
 	int disabled;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a0123d75ec9a..8c6e1c17e6d3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2735,6 +2735,8 @@ void cgroup_fork_callbacks(struct task_struct *child)
  * Called on every change to mm->owner. mm_init_owner() does not
  * invoke this routine, since it assigns the mm->owner the first time
  * and does not change it.
+ *
+ * The callbacks are invoked with mmap_sem held in read mode.
  */
 void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
 {
@@ -2750,7 +2752,7 @@ void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
 			if (oldcgrp == newcgrp)
 				continue;
 			if (ss->mm_owner_changed)
-				ss->mm_owner_changed(ss, oldcgrp, newcgrp);
+				ss->mm_owner_changed(ss, oldcgrp, newcgrp, new);
 		}
 	}
 }
diff --git a/kernel/exit.c b/kernel/exit.c
index 85a83c831856..0ef4673e351b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -640,24 +640,23 @@ retry:
 assign_new_owner:
 	BUG_ON(c == p);
 	get_task_struct(c);
+	read_unlock(&tasklist_lock);
+	down_write(&mm->mmap_sem);
 	/*
 	 * The task_lock protects c->mm from changing.
 	 * We always want mm->owner->mm == mm
 	 */
 	task_lock(c);
-	/*
-	 * Delay read_unlock() till we have the task_lock()
-	 * to ensure that c does not slip away underneath us
-	 */
-	read_unlock(&tasklist_lock);
 	if (c->mm != mm) {
 		task_unlock(c);
+		up_write(&mm->mmap_sem);
 		put_task_struct(c);
 		goto retry;
 	}
 	cgroup_mm_owner_callbacks(mm->owner, c);
 	mm->owner = c;
 	task_unlock(c);
+	up_write(&mm->mmap_sem);
 	put_task_struct(c);
 }
 #endif /* CONFIG_MM_OWNER */
-- 
cgit v1.2.3


From 1bfcf1304ea79c46efc3724e548b13b4b442b418 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 15 Oct 2008 22:01:21 -0700
Subject: pm: rework disabling of user mode helpers during suspend/hibernation

We currently use a PM notifier to disable user mode helpers before suspend
and hibernation and to re-enable them during resume.  However, this is not
an ideal solution, because if any drivers want to upload firmware into
memory before suspend, they have to use a PM notifier for this purpose and
there is no guarantee that the ordering of PM notifiers will be as
expected (ie.  the notifier that disables user mode helpers has to be run
after the driver's notifier used for uploading the firmware).

For this reason, it seems better to move the disabling and enabling of
user mode helpers to separate functions that will be called by the PM core
as necessary.

[akpm@linux-foundation.org: remove unneeded ifdefs]
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmod.h |  3 +++
 kernel/kmod.c        | 65 ++++++++++++++++++++++------------------------------
 kernel/power/disk.c  | 11 +++++++++
 kernel/power/main.c  |  7 ++++++
 kernel/power/user.c  | 10 +++++++-
 5 files changed, 58 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index a1a91577813c..92213a9194e1 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -99,4 +99,7 @@ struct file;
 extern int call_usermodehelper_pipe(char *path, char *argv[], char *envp[],
 				    struct file **filp);
 
+extern int usermodehelper_disable(void);
+extern void usermodehelper_enable(void);
+
 #endif /* __LINUX_KMOD_H__ */
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 2456d1a0befb..ab7dd08472fc 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -265,7 +265,7 @@ static void __call_usermodehelper(struct work_struct *work)
 	}
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 /*
  * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY
  * (used for preventing user land processes from being created after the user
@@ -288,39 +288,37 @@ static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq);
  */
 #define RUNNING_HELPERS_TIMEOUT	(5 * HZ)
 
-static int usermodehelper_pm_callback(struct notifier_block *nfb,
-					unsigned long action,
-					void *ignored)
+/**
+ * usermodehelper_disable - prevent new helpers from being started
+ */
+int usermodehelper_disable(void)
 {
 	long retval;
 
-	switch (action) {
-	case PM_HIBERNATION_PREPARE:
-	case PM_SUSPEND_PREPARE:
-		usermodehelper_disabled = 1;
-		smp_mb();
-		/*
-		 * From now on call_usermodehelper_exec() won't start any new
-		 * helpers, so it is sufficient if running_helpers turns out to
-		 * be zero at one point (it may be increased later, but that
-		 * doesn't matter).
-		 */
-		retval = wait_event_timeout(running_helpers_waitq,
+	usermodehelper_disabled = 1;
+	smp_mb();
+	/*
+	 * From now on call_usermodehelper_exec() won't start any new
+	 * helpers, so it is sufficient if running_helpers turns out to
+	 * be zero at one point (it may be increased later, but that
+	 * doesn't matter).
+	 */
+	retval = wait_event_timeout(running_helpers_waitq,
 					atomic_read(&running_helpers) == 0,
 					RUNNING_HELPERS_TIMEOUT);
-		if (retval) {
-			return NOTIFY_OK;
-		} else {
-			usermodehelper_disabled = 0;
-			return NOTIFY_BAD;
-		}
-	case PM_POST_HIBERNATION:
-	case PM_POST_SUSPEND:
-		usermodehelper_disabled = 0;
-		return NOTIFY_OK;
-	}
+	if (retval)
+		return 0;
 
-	return NOTIFY_DONE;
+	usermodehelper_disabled = 0;
+	return -EAGAIN;
+}
+
+/**
+ * usermodehelper_enable - allow new helpers to be started again
+ */
+void usermodehelper_enable(void)
+{
+	usermodehelper_disabled = 0;
 }
 
 static void helper_lock(void)
@@ -334,18 +332,12 @@ static void helper_unlock(void)
 	if (atomic_dec_and_test(&running_helpers))
 		wake_up(&running_helpers_waitq);
 }
-
-static void register_pm_notifier_callback(void)
-{
-	pm_notifier(usermodehelper_pm_callback, 0);
-}
-#else /* CONFIG_PM */
+#else /* CONFIG_PM_SLEEP */
 #define usermodehelper_disabled	0
 
 static inline void helper_lock(void) {}
 static inline void helper_unlock(void) {}
-static inline void register_pm_notifier_callback(void) {}
-#endif /* CONFIG_PM */
+#endif /* CONFIG_PM_SLEEP */
 
 /**
  * call_usermodehelper_setup - prepare to call a usermode helper
@@ -515,5 +507,4 @@ void __init usermodehelper_init(void)
 {
 	khelper_wq = create_singlethread_workqueue("khelper");
 	BUG_ON(!khelper_wq);
-	register_pm_notifier_callback();
 }
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index bbd85c60f741..331f9836383f 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -14,6 +14,7 @@
 #include <linux/reboot.h>
 #include <linux/string.h>
 #include <linux/device.h>
+#include <linux/kmod.h>
 #include <linux/delay.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
@@ -520,6 +521,10 @@ int hibernate(void)
 	if (error)
 		goto Exit;
 
+	error = usermodehelper_disable();
+	if (error)
+		goto Exit;
+
 	/* Allocate memory management structures */
 	error = create_basic_memory_bitmaps();
 	if (error)
@@ -558,6 +563,7 @@ int hibernate(void)
 	thaw_processes();
  Finish:
 	free_basic_memory_bitmaps();
+	usermodehelper_enable();
  Exit:
 	pm_notifier_call_chain(PM_POST_HIBERNATION);
 	pm_restore_console();
@@ -634,6 +640,10 @@ static int software_resume(void)
 	if (error)
 		goto Finish;
 
+	error = usermodehelper_disable();
+	if (error)
+		goto Finish;
+
 	error = create_basic_memory_bitmaps();
 	if (error)
 		goto Finish;
@@ -656,6 +666,7 @@ static int software_resume(void)
 	thaw_processes();
  Done:
 	free_basic_memory_bitmaps();
+	usermodehelper_enable();
  Finish:
 	pm_notifier_call_chain(PM_POST_RESTORE);
 	pm_restore_console();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 540b16b68565..19122cf6d827 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -14,6 +14,7 @@
 #include <linux/string.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
+#include <linux/kmod.h>
 #include <linux/init.h>
 #include <linux/console.h>
 #include <linux/cpu.h>
@@ -237,6 +238,10 @@ static int suspend_prepare(void)
 	if (error)
 		goto Finish;
 
+	error = usermodehelper_disable();
+	if (error)
+		goto Finish;
+
 	if (suspend_freeze_processes()) {
 		error = -EAGAIN;
 		goto Thaw;
@@ -256,6 +261,7 @@ static int suspend_prepare(void)
 
  Thaw:
 	suspend_thaw_processes();
+	usermodehelper_enable();
  Finish:
 	pm_notifier_call_chain(PM_POST_SUSPEND);
 	pm_restore_console();
@@ -376,6 +382,7 @@ int suspend_devices_and_enter(suspend_state_t state)
 static void suspend_finish(void)
 {
 	suspend_thaw_processes();
+	usermodehelper_enable();
 	pm_notifier_call_chain(PM_POST_SUSPEND);
 	pm_restore_console();
 }
diff --git a/kernel/power/user.c b/kernel/power/user.c
index a6332a313262..005b93d839ba 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -212,13 +212,20 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 	case SNAPSHOT_FREEZE:
 		if (data->frozen)
 			break;
+
 		printk("Syncing filesystems ... ");
 		sys_sync();
 		printk("done.\n");
 
-		error = freeze_processes();
+		error = usermodehelper_disable();
 		if (error)
+			break;
+
+		error = freeze_processes();
+		if (error) {
 			thaw_processes();
+			usermodehelper_enable();
+		}
 		if (!error)
 			data->frozen = 1;
 		break;
@@ -227,6 +234,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		if (!data->frozen || data->ready)
 			break;
 		thaw_processes();
+		usermodehelper_enable();
 		data->frozen = 0;
 		break;
 
-- 
cgit v1.2.3


From d5c003b4d1690e666dbab02bc8e705947baa848c Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 15 Oct 2008 22:01:24 -0700
Subject: include: replace __FUNCTION__ with __func__

__FUNCTION__ is gcc-specific, use __func__

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/acpi/acmacros.h           |  6 +++---
 include/acpi/platform/acgcc.h     |  2 +-
 include/asm-generic/bug.h         |  2 +-
 include/asm-x86/es7000/apic.h     |  2 +-
 include/asm-x86/summit/apic.h     |  2 +-
 include/linux/ext2_fs.h           |  2 +-
 include/linux/ext3_fs.h           |  4 ++--
 include/linux/ext3_jbd.h          | 14 +++++++-------
 include/linux/jbd.h               |  4 ++--
 include/linux/jbd2.h              |  4 ++--
 include/linux/pm.h                |  2 +-
 include/linux/reiserfs_fs.h       |  2 +-
 include/linux/rtmutex.h           |  2 +-
 include/media/saa7146.h           |  2 +-
 include/net/9p/9p.h               |  4 ++--
 include/net/bluetooth/bluetooth.h |  4 ++--
 include/net/ieee80211.h           |  2 +-
 include/net/ip_vs.h               |  4 ++--
 include/net/irda/irda.h           |  2 +-
 include/net/sctp/sctp.h           |  2 +-
 include/video/cyblafb.h           |  2 +-
 21 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/acpi/acmacros.h b/include/acpi/acmacros.h
index 57ab9e9d7593..74a9617776a8 100644
--- a/include/acpi/acmacros.h
+++ b/include/acpi/acmacros.h
@@ -467,7 +467,7 @@ struct acpi_integer_overlay {
 /*
  * If ACPI_GET_FUNCTION_NAME was not defined in the compiler-dependent header,
  * define it now. This is the case where there the compiler does not support
- * a __FUNCTION__ macro or equivalent.
+ * a __func__ macro or equivalent.
  */
 #ifndef ACPI_GET_FUNCTION_NAME
 #define ACPI_GET_FUNCTION_NAME          _acpi_function_name
@@ -475,12 +475,12 @@ struct acpi_integer_overlay {
  * The Name parameter should be the procedure name as a quoted string.
  * The function name is also used by the function exit macros below.
  * Note: (const char) is used to be compatible with the debug interfaces
- * and macros such as __FUNCTION__.
+ * and macros such as __func__.
  */
 #define ACPI_FUNCTION_NAME(name)	static const char _acpi_function_name[] = #name;
 
 #else
-/* Compiler supports __FUNCTION__ (or equivalent) -- Ignore this macro */
+/* Compiler supports __func__ (or equivalent) -- Ignore this macro */
 
 #define ACPI_FUNCTION_NAME(name)
 #endif
diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h
index 8996dba90cd9..8e2cdc57b197 100644
--- a/include/acpi/platform/acgcc.h
+++ b/include/acpi/platform/acgcc.h
@@ -46,7 +46,7 @@
 
 /* Function name is used for debug output. Non-ANSI, compiler-dependent */
 
-#define ACPI_GET_FUNCTION_NAME          __FUNCTION__
+#define ACPI_GET_FUNCTION_NAME          __func__
 
 /*
  * This macro is used to tag functions as "printf-like" because
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index edc6ba82e090..0f6dabd4b517 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -22,7 +22,7 @@ struct bug_entry {
 
 #ifndef HAVE_ARCH_BUG
 #define BUG() do { \
-	printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __FUNCTION__); \
+	printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
 	panic("BUG!"); \
 } while (0)
 #endif
diff --git a/include/asm-x86/es7000/apic.h b/include/asm-x86/es7000/apic.h
index bd2c44d1f7ac..aae50c2fb303 100644
--- a/include/asm-x86/es7000/apic.h
+++ b/include/asm-x86/es7000/apic.h
@@ -171,7 +171,7 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
-				printk ("%s: Not a valid mask!\n",__FUNCTION__);
+				printk ("%s: Not a valid mask!\n", __func__);
 #if defined CONFIG_ES7000_CLUSTERED_APIC
 				return 0xFF;
 #else
diff --git a/include/asm-x86/summit/apic.h b/include/asm-x86/summit/apic.h
index c5b2e4b10358..394b00bb5e72 100644
--- a/include/asm-x86/summit/apic.h
+++ b/include/asm-x86/summit/apic.h
@@ -160,7 +160,7 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
 			int new_apicid = cpu_to_logical_apicid(cpu);
 			if (apicid_cluster(apicid) !=
 					apicid_cluster(new_apicid)){
-				printk ("%s: Not a valid mask!\n",__FUNCTION__);
+				printk ("%s: Not a valid mask!\n", __func__);
 				return 0xFF;
 			}
 			apicid = apicid | new_apicid;
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 2efe7b863cff..78c775a83f7c 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -47,7 +47,7 @@
 #ifdef EXT2FS_DEBUG
 #	define ext2_debug(f, a...)	{ \
 					printk ("EXT2-fs DEBUG (%s, %d): %s:", \
-						__FILE__, __LINE__, __FUNCTION__); \
+						__FILE__, __LINE__, __func__); \
 				  	printk (f, ## a); \
 					}
 #else
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 8120fa1bc235..159d9b476cd7 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -43,7 +43,7 @@
 #define ext3_debug(f, a...)						\
 	do {								\
 		printk (KERN_DEBUG "EXT3-fs DEBUG (%s, %d): %s:",	\
-			__FILE__, __LINE__, __FUNCTION__);		\
+			__FILE__, __LINE__, __func__);		\
 		printk (KERN_DEBUG f, ## a);				\
 	} while (0)
 #else
@@ -871,7 +871,7 @@ extern void ext3_update_dynamic_rev (struct super_block *sb);
 #define ext3_std_error(sb, errno)				\
 do {								\
 	if ((errno))						\
-		__ext3_std_error((sb), __FUNCTION__, (errno));	\
+		__ext3_std_error((sb), __func__, (errno));	\
 } while (0)
 
 /*
diff --git a/include/linux/ext3_jbd.h b/include/linux/ext3_jbd.h
index 8c43b13a02fe..cf82d519be40 100644
--- a/include/linux/ext3_jbd.h
+++ b/include/linux/ext3_jbd.h
@@ -137,17 +137,17 @@ int __ext3_journal_dirty_metadata(const char *where,
 				handle_t *handle, struct buffer_head *bh);
 
 #define ext3_journal_get_undo_access(handle, bh) \
-	__ext3_journal_get_undo_access(__FUNCTION__, (handle), (bh))
+	__ext3_journal_get_undo_access(__func__, (handle), (bh))
 #define ext3_journal_get_write_access(handle, bh) \
-	__ext3_journal_get_write_access(__FUNCTION__, (handle), (bh))
+	__ext3_journal_get_write_access(__func__, (handle), (bh))
 #define ext3_journal_revoke(handle, blocknr, bh) \
-	__ext3_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh))
+	__ext3_journal_revoke(__func__, (handle), (blocknr), (bh))
 #define ext3_journal_get_create_access(handle, bh) \
-	__ext3_journal_get_create_access(__FUNCTION__, (handle), (bh))
+	__ext3_journal_get_create_access(__func__, (handle), (bh))
 #define ext3_journal_dirty_metadata(handle, bh) \
-	__ext3_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
+	__ext3_journal_dirty_metadata(__func__, (handle), (bh))
 #define ext3_journal_forget(handle, bh) \
-	__ext3_journal_forget(__FUNCTION__, (handle), (bh))
+	__ext3_journal_forget(__func__, (handle), (bh))
 
 int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
 
@@ -160,7 +160,7 @@ static inline handle_t *ext3_journal_start(struct inode *inode, int nblocks)
 }
 
 #define ext3_journal_stop(handle) \
-	__ext3_journal_stop(__FUNCTION__, (handle))
+	__ext3_journal_stop(__func__, (handle))
 
 static inline handle_t *ext3_journal_current_handle(void)
 {
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 07a9b52a2654..7ebbcb1c9ba4 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -61,7 +61,7 @@ extern u8 journal_enable_debug;
 	do {								\
 		if ((n) <= journal_enable_debug) {			\
 			printk (KERN_DEBUG "(%s, %d): %s: ",		\
-				__FILE__, __LINE__, __FUNCTION__);	\
+				__FILE__, __LINE__, __func__);	\
 			printk (f, ## a);				\
 		}							\
 	} while (0)
@@ -984,7 +984,7 @@ extern int	cleanup_journal_tail(journal_t *);
 
 #define jbd_ENOSYS() \
 do {								           \
-	printk (KERN_ERR "JBD unimplemented function %s\n", __FUNCTION__); \
+	printk (KERN_ERR "JBD unimplemented function %s\n", __func__); \
 	current->state = TASK_UNINTERRUPTIBLE;			           \
 	schedule();						           \
 } while (1)
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index d2e91ea998fd..463d6f10b64f 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -61,7 +61,7 @@ extern u8 jbd2_journal_enable_debug;
 	do {								\
 		if ((n) <= jbd2_journal_enable_debug) {			\
 			printk (KERN_DEBUG "(%s, %d): %s: ",		\
-				__FILE__, __LINE__, __FUNCTION__);	\
+				__FILE__, __LINE__, __func__);	\
 			printk (f, ## a);				\
 		}							\
 	} while (0)
@@ -1143,7 +1143,7 @@ extern int	jbd2_cleanup_journal_tail(journal_t *);
 
 #define jbd_ENOSYS() \
 do {								           \
-	printk (KERN_ERR "JBD unimplemented function %s\n", __FUNCTION__); \
+	printk (KERN_ERR "JBD unimplemented function %s\n", __func__); \
 	current->state = TASK_UNINTERRUPTIBLE;			           \
 	schedule();						           \
 } while (1)
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 4dcce54b6d76..42de4003c4ee 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -419,7 +419,7 @@ extern void __suspend_report_result(const char *function, void *fn, int ret);
 
 #define suspend_report_result(fn, ret)					\
 	do {								\
-		__suspend_report_result(__FUNCTION__, fn, ret);		\
+		__suspend_report_result(__func__, fn, ret);		\
 	} while (0)
 
 #else /* !CONFIG_PM_SLEEP */
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index e9963af16cda..bc5114d35e99 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -87,7 +87,7 @@ void reiserfs_warning(struct super_block *s, const char *fmt, ...);
 if( !( cond ) ) 								\
   reiserfs_panic( NULL, "reiserfs[%i]: assertion " scond " failed at "	\
 		  __FILE__ ":%i:%s: " format "\n",		\
-		  in_interrupt() ? -1 : task_pid_nr(current), __LINE__ , __FUNCTION__ , ##args )
+		  in_interrupt() ? -1 : task_pid_nr(current), __LINE__ , __func__ , ##args )
 
 #define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args)
 
diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 382bb7951166..f19b00b7d530 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -54,7 +54,7 @@ struct hrtimer_sleeper;
 #ifdef CONFIG_DEBUG_RT_MUTEXES
 # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
 	, .name = #mutexname, .file = __FILE__, .line = __LINE__
-# define rt_mutex_init(mutex)			__rt_mutex_init(mutex, __FUNCTION__)
+# define rt_mutex_init(mutex)			__rt_mutex_init(mutex, __func__)
  extern void rt_mutex_debug_task_free(struct task_struct *tsk);
 #else
 # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
diff --git a/include/media/saa7146.h b/include/media/saa7146.h
index 64a2ec746a3e..c5a6e22a4b37 100644
--- a/include/media/saa7146.h
+++ b/include/media/saa7146.h
@@ -24,7 +24,7 @@
 
 extern unsigned int saa7146_debug;
 
-//#define DEBUG_PROLOG printk("(0x%08x)(0x%08x) %s: %s(): ",(dev==0?-1:(dev->mem==0?-1:saa7146_read(dev,RPS_ADDR0))),(dev==0?-1:(dev->mem==0?-1:saa7146_read(dev,IER))),KBUILD_MODNAME,__FUNCTION__)
+//#define DEBUG_PROLOG printk("(0x%08x)(0x%08x) %s: %s(): ",(dev==0?-1:(dev->mem==0?-1:saa7146_read(dev,RPS_ADDR0))),(dev==0?-1:(dev->mem==0?-1:saa7146_read(dev,IER))),KBUILD_MODNAME,__func__)
 
 #ifndef DEBUG_VARIABLE
 	#define DEBUG_VARIABLE saa7146_debug
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index c3626c0ba9d3..fb163e2e0de6 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -61,7 +61,7 @@ extern unsigned int p9_debug_level;
 do {  \
 	if ((p9_debug_level & level) == level) \
 		printk(KERN_NOTICE "-- %s (%d): " \
-		format , __FUNCTION__, task_pid_nr(current) , ## arg); \
+		format , __func__, task_pid_nr(current) , ## arg); \
 } while (0)
 
 #define PRINT_FCALL_ERROR(s, fcall) P9_DPRINTK(P9_DEBUG_ERROR,   \
@@ -76,7 +76,7 @@ do {  \
 #define P9_EPRINTK(level, format, arg...) \
 do { \
 	printk(level "9p: %s (%d): " \
-		format , __FUNCTION__, task_pid_nr(current), ## arg); \
+		format , __func__, task_pid_nr(current), ## arg); \
 } while (0)
 
 /**
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 6f8418bf4241..996d12df7594 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -54,8 +54,8 @@
 #define SOL_RFCOMM	18
 
 #define BT_INFO(fmt, arg...) printk(KERN_INFO "Bluetooth: " fmt "\n" , ## arg)
-#define BT_DBG(fmt, arg...)  printk(KERN_INFO "%s: " fmt "\n" , __FUNCTION__ , ## arg)
-#define BT_ERR(fmt, arg...)  printk(KERN_ERR  "%s: " fmt "\n" , __FUNCTION__ , ## arg)
+#define BT_DBG(fmt, arg...)  printk(KERN_INFO "%s: " fmt "\n" , __func__ , ## arg)
+#define BT_ERR(fmt, arg...)  printk(KERN_ERR  "%s: " fmt "\n" , __func__ , ## arg)
 
 /* Connection and socket states */
 enum {
diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h
index 6048579d0b24..93a56de3594b 100644
--- a/include/net/ieee80211.h
+++ b/include/net/ieee80211.h
@@ -114,7 +114,7 @@ extern u32 ieee80211_debug_level;
 #define IEEE80211_DEBUG(level, fmt, args...) \
 do { if (ieee80211_debug_level & (level)) \
   printk(KERN_DEBUG "ieee80211: %c %s " fmt, \
-         in_interrupt() ? 'I' : 'U', __FUNCTION__ , ## args); } while (0)
+         in_interrupt() ? 'I' : 'U', __func__ , ## args); } while (0)
 static inline bool ieee80211_ratelimit_debug(u32 level)
 {
 	return (ieee80211_debug_level & level) && net_ratelimit();
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 0b2071d9326d..fe9fcf73c85e 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -165,13 +165,13 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
     do {								\
 	    if (level <= ip_vs_get_debug_level())			\
 		    printk(KERN_DEBUG "Enter: %s, %s line %i\n",	\
-			   __FUNCTION__, __FILE__, __LINE__);		\
+			   __func__, __FILE__, __LINE__);		\
     } while (0)
 #define LeaveFunction(level)                                            \
     do {                                                                \
 	    if (level <= ip_vs_get_debug_level())                       \
 			printk(KERN_DEBUG "Leave: %s, %s line %i\n",    \
-			       __FUNCTION__, __FILE__, __LINE__);       \
+			       __func__, __FILE__, __LINE__);       \
     } while (0)
 #else
 #define EnterFunction(level)   do {} while (0)
diff --git a/include/net/irda/irda.h b/include/net/irda/irda.h
index 08387553b57e..7e582061b230 100644
--- a/include/net/irda/irda.h
+++ b/include/net/irda/irda.h
@@ -72,7 +72,7 @@ do {	if (irda_debug >= (n)) \
 #define IRDA_ASSERT(expr, func) \
 do { if(!(expr)) { \
 	printk( "Assertion failed! %s:%s:%d %s\n", \
-		__FILE__,__FUNCTION__,__LINE__,(#expr) ); \
+		__FILE__,__func__,__LINE__,(#expr) ); \
 	func } } while (0)
 #define IRDA_ASSERT_LABEL(label)	label
 #else
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 703305d00365..ed71b110edf7 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -303,7 +303,7 @@ extern int sctp_debug_flag;
 #define SCTP_ASSERT(expr, str, func) \
 	if (!(expr)) { \
 		SCTP_DEBUG_PRINTK("Assertion Failed: %s(%s) at %s:%s:%d\n", \
-			str, (#expr), __FILE__, __FUNCTION__, __LINE__); \
+			str, (#expr), __FILE__, __func__, __LINE__); \
 		func; \
 	}
 
diff --git a/include/video/cyblafb.h b/include/video/cyblafb.h
index 717440575380..d3c1d4e2c8e3 100644
--- a/include/video/cyblafb.h
+++ b/include/video/cyblafb.h
@@ -4,7 +4,7 @@
 #endif
 
 #if CYBLAFB_DEBUG
-#define debug(f,a...)	printk("%s:" f,  __FUNCTION__ , ## a);
+#define debug(f,a...)	printk("%s:" f,  __func__ , ## a);
 #else
 #define debug(f,a...)
 #endif
-- 
cgit v1.2.3


From 693ac389326a87d608baa2902c45a6e78ed46681 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 15 Oct 2008 22:01:25 -0700
Subject: include/linux/mount.h: remove CVS keyword

Remove a CVS keyword that wasn't updated for a long time from a comment.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mount.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mount.h b/include/linux/mount.h
index 30a1d63b6fb5..cab2a85e2ee8 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -5,8 +5,6 @@
  *
  * Author:  Marco van Wieringen <mvw@planets.elm.net>
  *
- * Version: $Id: mount.h,v 2.0 1996/11/17 16:48:14 mvw Exp mvw $
- *
  */
 #ifndef _LINUX_MOUNT_H
 #define _LINUX_MOUNT_H
-- 
cgit v1.2.3


From 1ecfea06386c6b1344e83c8f909c87c88262ba1d Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Wed, 15 Oct 2008 22:01:27 -0700
Subject: init.h: remove long-dead __setup_null_param() macro

This macro appears to have been unused for ages, and there are no
invocations of it anywhere in the source tree.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/init.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index 93538b696e3d..ad63824460e3 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -233,9 +233,6 @@ struct obs_kernel_param {
 		__attribute__((aligned((sizeof(long)))))	\
 		= { __setup_str_##unique_id, fn, early }
 
-#define __setup_null_param(str, unique_id)			\
-	__setup_param(str, unique_id, NULL, 0)
-
 #define __setup(str, fn)					\
 	__setup_param(str, fn, fn, 0)
 
@@ -296,7 +293,6 @@ void __init parse_early_param(void);
 	void cleanup_module(void) __attribute__((alias(#exitfn)));
 
 #define __setup_param(str, unique_id, fn)	/* nothing */
-#define __setup_null_param(str, unique_id) 	/* nothing */
 #define __setup(str, func) 			/* nothing */
 #endif
 
-- 
cgit v1.2.3


From c80cfb0406c01bb5da91bfe30f5cb1fd96831138 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Wed, 15 Oct 2008 22:01:35 -0700
Subject: vsprintf: use new vsprintf symbolic function pointer format

Use the '%pF' format to get rid of an "#ifdef DEBUG" and make some printks
atomic.

This removes the last in-tree uses of print_fn_descriptor_symbol().  I
marked print_fn_descriptor_symbol() deprecated and scheduled it for
removal next year to give time for out-of-tree modules to be updated.

parisc's print_fn_descriptor_symbol() is currently broken there (it needs
to dereference the function pointer similar to ia64 and power).  This
patch shouldn't make anything worse, but it means we need to fix
dereference_function_descriptor() instead of print_fn_descriptor_symbol()
to get meaningful initcall_debug output.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/feature-removal-schedule.txt | 9 +++++++++
 drivers/base/power/main.c                  | 7 ++-----
 include/linux/kallsyms.h                   | 8 +++-----
 3 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 4d2566a7d168..f5f812daf9f4 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -294,6 +294,15 @@ Who:	Jiri Slaby <jirislaby@gmail.com>
 
 ---------------------------
 
+What: print_fn_descriptor_symbol()
+When: October 2009
+Why:  The %pF vsprintf format provides the same functionality in a
+      simpler way.  print_fn_descriptor_symbol() is deprecated but
+      still present to give out-of-tree modules time to change.
+Who:  Bjorn Helgaas <bjorn.helgaas@hp.com>
+
+---------------------------
+
 What:	/sys/o2cb symlink
 When:	January 2010
 Why:	/sys/fs/o2cb is the proper location for this information - /sys/o2cb
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 273a944d4040..03bde7524bc3 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -778,10 +778,7 @@ EXPORT_SYMBOL_GPL(device_suspend);
 
 void __suspend_report_result(const char *function, void *fn, int ret)
 {
-	if (ret) {
-		printk(KERN_ERR "%s(): ", function);
-		print_fn_descriptor_symbol("%s returns ", fn);
-		printk("%d\n", ret);
-	}
+	if (ret)
+		printk(KERN_ERR "%s(): %pF returns %d\n", function, fn, ret);
 }
 EXPORT_SYMBOL_GPL(__suspend_report_result);
diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index b96144887444..f3fe34391d8e 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -93,12 +93,10 @@ static inline void print_symbol(const char *fmt, unsigned long addr)
 }
 
 /*
- * Pretty-print a function pointer.
- *
- * ia64 and ppc64 function pointers are really function descriptors,
- * which contain a pointer the real address.
+ * Pretty-print a function pointer.  This function is deprecated.
+ * Please use the "%pF" vsprintf format instead.
  */
-static inline void print_fn_descriptor_symbol(const char *fmt, void *addr)
+static inline void __deprecated print_fn_descriptor_symbol(const char *fmt, void *addr)
 {
 #if defined(CONFIG_IA64) || defined(CONFIG_PPC64)
 	addr = *(void **)addr;
-- 
cgit v1.2.3


From a25d644fc0e232f242d1f3baa63c149c42536ff0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 15 Oct 2008 22:01:38 -0700
Subject: wait: kill is_sync_wait()

is_sync_wait() is used to distinguish between sync and async waits.
Basically sync waits are the ones initialized with init_waitqueue_entry()
and async ones with init_waitqueue_func_entry().  The sync/async
distinction is used only in prepare_to_wait[_exclusive]() and its only
function is to skip setting the current task state if the wait is async.
This has a few problems.

* No one uses it.  None of func_entry users use prepare_to_wait()
  functions, so the code path never gets executed.

* The distinction is bogus.  Maybe back when func_entry is used only
  by aio but it's now also used by epoll and in future possibly by 9p
  and poll/select.

* Taking @state as argument and ignoring it silenly depending on how
  @wait is initialized is just a bad error-prone API.

* It prevents func_entry waits from using wait->private for no good
  reason.

This patch kills is_sync_wait() and the associated code paths from
prepare_to_wait[_exclusive]().  As there was no user of these code paths,
this patch doesn't cause any behavior difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/wait.h |  9 ---------
 kernel/wait.c        | 14 ++------------
 2 files changed, 2 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 0081147a9fe8..ef609f842fac 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -108,15 +108,6 @@ static inline int waitqueue_active(wait_queue_head_t *q)
 	return !list_empty(&q->task_list);
 }
 
-/*
- * Used to distinguish between sync and async io wait context:
- * sync i/o typically specifies a NULL wait queue entry or a wait
- * queue entry bound to a task (current task) to wake up.
- * aio specifies a wait queue entry with an async notification
- * callback routine, not associated with any task.
- */
-#define is_sync_wait(wait)	(!(wait) || ((wait)->private))
-
 extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
 extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait);
 extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
diff --git a/kernel/wait.c b/kernel/wait.c
index c275c56cf2d3..cd87131f2fc2 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -72,12 +72,7 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
 	spin_lock_irqsave(&q->lock, flags);
 	if (list_empty(&wait->task_list))
 		__add_wait_queue(q, wait);
-	/*
-	 * don't alter the task state if this is just going to
-	 * queue an async wait queue callback
-	 */
-	if (is_sync_wait(wait))
-		set_current_state(state);
+	set_current_state(state);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait);
@@ -91,12 +86,7 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
 	spin_lock_irqsave(&q->lock, flags);
 	if (list_empty(&wait->task_list))
 		__add_wait_queue_tail(q, wait);
-	/*
-	 * don't alter the task state if this is just going to
- 	 * queue an async wait queue callback
-	 */
-	if (is_sync_wait(wait))
-		set_current_state(state);
+	set_current_state(state);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
-- 
cgit v1.2.3


From 25ddbb18aae33ad255eb9f35aacebe3af01e1e9c Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 15 Oct 2008 22:01:41 -0700
Subject: Make the taint flags reliable

It's somewhat unlikely that it happens, but right now a race window
between interrupts or machine checks or oopses could corrupt the tainted
bitmap because it is modified in a non atomic fashion.

Convert the taint variable to an unsigned long and use only atomic bit
operations on it.

Unfortunately this means the intvec sysctl functions cannot be used on it
anymore.

It turned out the taint sysctl handler could actually be simplified a bit
(since it only increases capabilities) so this patch actually removes
code.

[akpm@linux-foundation.org: remove unneeded include]
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/smpboot.c | 14 +++++-----
 include/linux/kernel.h    | 25 +++++++++---------
 kernel/module.c           | 12 ++++-----
 kernel/panic.c            | 63 ++++++++++++++++++++++++++++++++------------
 kernel/softlockup.c       |  2 +-
 kernel/sysctl.c           | 67 ++++++++++++++++++++---------------------------
 6 files changed, 101 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8c3aca7cb343..7ed9e070a6e9 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -282,6 +282,8 @@ static void __cpuinit smp_callin(void)
 	cpu_set(cpuid, cpu_callin_map);
 }
 
+static int __cpuinitdata unsafe_smp;
+
 /*
  * Activate a secondary processor.
  */
@@ -397,7 +399,7 @@ static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c)
 				goto valid_k7;
 
 		/* If we get here, not a certified SMP capable AMD system. */
-		add_taint(TAINT_UNSAFE_SMP);
+		unsafe_smp = 1;
 	}
 
 valid_k7:
@@ -414,12 +416,10 @@ static void __cpuinit smp_checks(void)
 	 * Don't taint if we are running SMP kernel on a single non-MP
 	 * approved Athlon
 	 */
-	if (tainted & TAINT_UNSAFE_SMP) {
-		if (num_online_cpus())
-			printk(KERN_INFO "WARNING: This combination of AMD"
-				"processors is not suitable for SMP.\n");
-		else
-			tainted &= ~TAINT_UNSAFE_SMP;
+	if (unsafe_smp && num_online_cpus() > 1) {
+		printk(KERN_INFO "WARNING: This combination of AMD"
+			"processors is not suitable for SMP.\n");
+		add_taint(TAINT_UNSAFE_SMP);
 	}
 }
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 75d81f157d2e..e971c55f45ac 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -235,9 +235,10 @@ extern int oops_in_progress;		/* If set, an oops, panic(), BUG() or die() is in
 extern int panic_timeout;
 extern int panic_on_oops;
 extern int panic_on_unrecovered_nmi;
-extern int tainted;
 extern const char *print_tainted(void);
-extern void add_taint(unsigned);
+extern void add_taint(unsigned flag);
+extern int test_taint(unsigned flag);
+extern unsigned long get_taint(void);
 extern int root_mountflags;
 
 /* Values used for system_state */
@@ -250,16 +251,16 @@ extern enum system_states {
 	SYSTEM_SUSPEND_DISK,
 } system_state;
 
-#define TAINT_PROPRIETARY_MODULE	(1<<0)
-#define TAINT_FORCED_MODULE		(1<<1)
-#define TAINT_UNSAFE_SMP		(1<<2)
-#define TAINT_FORCED_RMMOD		(1<<3)
-#define TAINT_MACHINE_CHECK		(1<<4)
-#define TAINT_BAD_PAGE			(1<<5)
-#define TAINT_USER			(1<<6)
-#define TAINT_DIE			(1<<7)
-#define TAINT_OVERRIDDEN_ACPI_TABLE	(1<<8)
-#define TAINT_WARN			(1<<9)
+#define TAINT_PROPRIETARY_MODULE	0
+#define TAINT_FORCED_MODULE		1
+#define TAINT_UNSAFE_SMP		2
+#define TAINT_FORCED_RMMOD		3
+#define TAINT_MACHINE_CHECK		4
+#define TAINT_BAD_PAGE			5
+#define TAINT_USER			6
+#define TAINT_DIE			7
+#define TAINT_OVERRIDDEN_ACPI_TABLE	8
+#define TAINT_WARN			9
 
 extern void dump_stack(void) __cold;
 
diff --git a/kernel/module.c b/kernel/module.c
index 9db11911e04b..dd9ac6ad5cb9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -100,7 +100,7 @@ static inline int strong_try_module_get(struct module *mod)
 static inline void add_taint_module(struct module *mod, unsigned flag)
 {
 	add_taint(flag);
-	mod->taints |= flag;
+	mod->taints |= (1U << flag);
 }
 
 /*
@@ -923,7 +923,7 @@ static const char vermagic[] = VERMAGIC_STRING;
 static int try_to_force_load(struct module *mod, const char *symname)
 {
 #ifdef CONFIG_MODULE_FORCE_LOAD
-	if (!(tainted & TAINT_FORCED_MODULE))
+	if (!test_taint(TAINT_FORCED_MODULE))
 		printk("%s: no version for \"%s\" found: kernel tainted.\n",
 		       mod->name, symname);
 	add_taint_module(mod, TAINT_FORCED_MODULE);
@@ -1033,7 +1033,7 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs,
 	const unsigned long *crc;
 
 	ret = find_symbol(name, &owner, &crc,
-			  !(mod->taints & TAINT_PROPRIETARY_MODULE), true);
+			  !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true);
 	if (!IS_ERR_VALUE(ret)) {
 		/* use_module can fail due to OOM,
 		   or module initialization or unloading */
@@ -1634,7 +1634,7 @@ static void set_license(struct module *mod, const char *license)
 		license = "unspecified";
 
 	if (!license_is_gpl_compatible(license)) {
-		if (!(tainted & TAINT_PROPRIETARY_MODULE))
+		if (!test_taint(TAINT_PROPRIETARY_MODULE))
 			printk(KERN_WARNING "%s: module license '%s' taints "
 				"kernel.\n", mod->name, license);
 		add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
@@ -2552,9 +2552,9 @@ static char *module_flags(struct module *mod, char *buf)
 	    mod->state == MODULE_STATE_GOING ||
 	    mod->state == MODULE_STATE_COMING) {
 		buf[bx++] = '(';
-		if (mod->taints & TAINT_PROPRIETARY_MODULE)
+		if (mod->taints & (1 << TAINT_PROPRIETARY_MODULE))
 			buf[bx++] = 'P';
-		if (mod->taints & TAINT_FORCED_MODULE)
+		if (mod->taints & (1 << TAINT_FORCED_MODULE))
 			buf[bx++] = 'F';
 		/*
 		 * TAINT_FORCED_RMMOD: could be added.
diff --git a/kernel/panic.c b/kernel/panic.c
index 12c5a0a6c89b..028013f7afd4 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -23,7 +23,7 @@
 #include <linux/kallsyms.h>
 
 int panic_on_oops;
-int tainted;
+static unsigned long tainted_mask;
 static int pause_on_oops;
 static int pause_on_oops_flag;
 static DEFINE_SPINLOCK(pause_on_oops_lock);
@@ -159,31 +159,60 @@ EXPORT_SYMBOL(panic);
  *	The string is overwritten by the next call to print_taint().
  */
 
+struct tnt {
+	u8 bit;
+	char true;
+	char false;
+};
+
+static const struct tnt tnts[] = {
+	{ TAINT_PROPRIETARY_MODULE, 'P', 'G' },
+	{ TAINT_FORCED_MODULE, 'F', ' ' },
+	{ TAINT_UNSAFE_SMP, 'S', ' ' },
+	{ TAINT_FORCED_RMMOD, 'R', ' ' },
+	{ TAINT_MACHINE_CHECK, 'M', ' ' },
+	{ TAINT_BAD_PAGE, 'B', ' ' },
+	{ TAINT_USER, 'U', ' ' },
+	{ TAINT_DIE, 'D', ' ' },
+	{ TAINT_OVERRIDDEN_ACPI_TABLE, 'A', ' ' },
+	{ TAINT_WARN, 'W', ' ' },
+};
+
 const char *print_tainted(void)
 {
-	static char buf[20];
-	if (tainted) {
-		snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c%c%c",
-			tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
-			tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
-			tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
-			tainted & TAINT_FORCED_RMMOD ? 'R' : ' ',
-			tainted & TAINT_MACHINE_CHECK ? 'M' : ' ',
-			tainted & TAINT_BAD_PAGE ? 'B' : ' ',
-			tainted & TAINT_USER ? 'U' : ' ',
-			tainted & TAINT_DIE ? 'D' : ' ',
-			tainted & TAINT_OVERRIDDEN_ACPI_TABLE ? 'A' : ' ',
-			tainted & TAINT_WARN ? 'W' : ' ');
-	}
-	else
+	static char buf[ARRAY_SIZE(tnts) + sizeof("Tainted: ") + 1];
+
+	if (tainted_mask) {
+		char *s;
+		int i;
+
+		s = buf + sprintf(buf, "Tainted: ");
+		for (i = 0; i < ARRAY_SIZE(tnts); i++) {
+			const struct tnt *t = &tnts[i];
+			*s++ = test_bit(t->bit, &tainted_mask) ?
+					t->true : t->false;
+		}
+		*s = 0;
+	} else
 		snprintf(buf, sizeof(buf), "Not tainted");
 	return(buf);
 }
 
+int test_taint(unsigned flag)
+{
+	return test_bit(flag, &tainted_mask);
+}
+EXPORT_SYMBOL(test_taint);
+
+unsigned long get_taint(void)
+{
+	return tainted_mask;
+}
+
 void add_taint(unsigned flag)
 {
 	debug_locks = 0; /* can't trust the integrity of the kernel anymore */
-	tainted |= flag;
+	set_bit(flag, &tainted_mask);
 }
 EXPORT_SYMBOL(add_taint);
 
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index cb838ee93a82..3953e4aed733 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -226,7 +226,7 @@ static void check_hung_uninterruptible_tasks(int this_cpu)
 	 * If the system crashed already then all bets are off,
 	 * do not report extra hung tasks:
 	 */
-	if ((tainted & TAINT_DIE) || did_panic)
+	if (test_taint(TAINT_DIE) || did_panic)
 		return;
 
 	read_lock(&tasklist_lock);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index cfc5295f1e82..ec88fcc9a0d2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -149,7 +149,7 @@ extern int max_lock_depth;
 #ifdef CONFIG_PROC_SYSCTL
 static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
-static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *filp,
+static int proc_taint(struct ctl_table *table, int write, struct file *filp,
 			       void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
@@ -379,10 +379,9 @@ static struct ctl_table kern_table[] = {
 #ifdef CONFIG_PROC_SYSCTL
 	{
 		.procname	= "tainted",
-		.data		= &tainted,
-		.maxlen		= sizeof(int),
+		.maxlen 	= sizeof(long),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_taint,
+		.proc_handler	= &proc_taint,
 	},
 #endif
 #ifdef CONFIG_LATENCYTOP
@@ -2228,49 +2227,39 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
 		    	    NULL,NULL);
 }
 
-#define OP_SET	0
-#define OP_AND	1
-#define OP_OR	2
-
-static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
-				      int *valp,
-				      int write, void *data)
-{
-	int op = *(int *)data;
-	if (write) {
-		int val = *negp ? -*lvalp : *lvalp;
-		switch(op) {
-		case OP_SET:	*valp = val; break;
-		case OP_AND:	*valp &= val; break;
-		case OP_OR:	*valp |= val; break;
-		}
-	} else {
-		int val = *valp;
-		if (val < 0) {
-			*negp = -1;
-			*lvalp = (unsigned long)-val;
-		} else {
-			*negp = 0;
-			*lvalp = (unsigned long)val;
-		}
-	}
-	return 0;
-}
-
 /*
- *	Taint values can only be increased
+ * Taint values can only be increased
+ * This means we can safely use a temporary.
  */
-static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *filp,
+static int proc_taint(struct ctl_table *table, int write, struct file *filp,
 			       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	int op;
+	struct ctl_table t;
+	unsigned long tmptaint = get_taint();
+	int err;
 
 	if (write && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	op = OP_OR;
-	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
-				do_proc_dointvec_bset_conv,&op);
+	t = *table;
+	t.data = &tmptaint;
+	err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos);
+	if (err < 0)
+		return err;
+
+	if (write) {
+		/*
+		 * Poor man's atomic or. Not worth adding a primitive
+		 * to everyone's atomic.h for this
+		 */
+		int i;
+		for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
+			if ((tmptaint >> i) & 1)
+				add_taint(i);
+		}
+	}
+
+	return err;
 }
 
 struct do_proc_dointvec_minmax_conv_param {
-- 
cgit v1.2.3


From 22b8ce94708f7cdf0b04965c6f7443dfd374c35c Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave@linux.vnet.ibm.com>
Date: Wed, 15 Oct 2008 22:01:46 -0700
Subject: profiling: dynamically enable readprofile at runtime

Way too often, I have a machine that exhibits some kind of crappy
behavior.  The CPU looks wedged in the kernel or it is spending way too
much system time and I wonder what is responsible.

I try to run readprofile.  But, of course, Ubuntu doesn't enable it by
default.  Dang!

The reason we boot-time enable it is that it takes a big bufffer that we
generally can only bootmem alloc.  But, does it hurt to at least try and
runtime-alloc it?

To use:
echo 2 > /sys/kernel/profile

Then run readprofile like normal.

This should fix the compile issue with allmodconfig.  I've compile-tested
on a bunch more configs now including a few more architectures.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/ABI/testing/sysfs-profiling | 13 ++++++++++
 include/linux/profile.h                   |  8 +++---
 kernel/ksysfs.c                           | 35 ++++++++++++++++++++++++++
 kernel/profile.c                          | 41 +++++++++++++++++++++++--------
 4 files changed, 84 insertions(+), 13 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-profiling

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-profiling b/Documentation/ABI/testing/sysfs-profiling
new file mode 100644
index 000000000000..b02d8b8c173a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-profiling
@@ -0,0 +1,13 @@
+What:		/sys/kernel/profile
+Date:		September 2008
+Contact:	Dave Hansen <dave@linux.vnet.ibm.com>
+Description:
+		/sys/kernel/profile is the runtime equivalent
+		of the boot-time profile= option.
+
+		You can get the same effect running:
+
+			echo 2 > /sys/kernel/profile
+
+		as you would by issuing profile=2 on the boot
+		command line.
diff --git a/include/linux/profile.h b/include/linux/profile.h
index 7e7087239af5..570045053ce9 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -35,7 +35,9 @@ enum profile_type {
 extern int prof_on __read_mostly;
 
 /* init basic kernel profiler */
-void __init profile_init(void);
+int profile_init(void);
+int profile_setup(char *str);
+int create_proc_profile(void);
 void profile_tick(int type);
 
 /*
@@ -84,9 +86,9 @@ struct pt_regs;
 
 #define prof_on 0
 
-static inline void profile_init(void)
+static inline int profile_init(void)
 {
-	return;
+	return 0;
 }
 
 static inline void profile_tick(int type)
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index e53bc30e9ba5..08dd8ed86c77 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kexec.h>
+#include <linux/profile.h>
 #include <linux/sched.h>
 
 #define KERNEL_ATTR_RO(_name) \
@@ -53,6 +54,37 @@ static ssize_t uevent_helper_store(struct kobject *kobj,
 KERNEL_ATTR_RW(uevent_helper);
 #endif
 
+#ifdef CONFIG_PROFILING
+static ssize_t profiling_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", prof_on);
+}
+static ssize_t profiling_store(struct kobject *kobj,
+				   struct kobj_attribute *attr,
+				   const char *buf, size_t count)
+{
+	int ret;
+
+	if (prof_on)
+		return -EEXIST;
+	/*
+	 * This eventually calls into get_option() which
+	 * has a ton of callers and is not const.  It is
+	 * easiest to cast it away here.
+	 */
+	profile_setup((char *)buf);
+	ret = profile_init();
+	if (ret)
+		return ret;
+	ret = create_proc_profile();
+	if (ret)
+		return ret;
+	return count;
+}
+KERNEL_ATTR_RW(profiling);
+#endif
+
 #ifdef CONFIG_KEXEC
 static ssize_t kexec_loaded_show(struct kobject *kobj,
 				 struct kobj_attribute *attr, char *buf)
@@ -109,6 +141,9 @@ static struct attribute * kernel_attrs[] = {
 	&uevent_seqnum_attr.attr,
 	&uevent_helper_attr.attr,
 #endif
+#ifdef CONFIG_PROFILING
+	&profiling_attr.attr,
+#endif
 #ifdef CONFIG_KEXEC
 	&kexec_loaded_attr.attr,
 	&kexec_crash_loaded_attr.attr,
diff --git a/kernel/profile.c b/kernel/profile.c
index cd26bed4cc26..a9e422df6bf6 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -22,6 +22,8 @@
 #include <linux/cpu.h>
 #include <linux/highmem.h>
 #include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
 #include <asm/sections.h>
 #include <asm/irq_regs.h>
 #include <asm/ptrace.h>
@@ -50,11 +52,11 @@ static DEFINE_PER_CPU(int, cpu_profile_flip);
 static DEFINE_MUTEX(profile_flip_mutex);
 #endif /* CONFIG_SMP */
 
-static int __init profile_setup(char *str)
+int profile_setup(char *str)
 {
-	static char __initdata schedstr[] = "schedule";
-	static char __initdata sleepstr[] = "sleep";
-	static char __initdata kvmstr[] = "kvm";
+	static char schedstr[] = "schedule";
+	static char sleepstr[] = "sleep";
+	static char kvmstr[] = "kvm";
 	int par;
 
 	if (!strncmp(str, sleepstr, strlen(sleepstr))) {
@@ -100,14 +102,33 @@ static int __init profile_setup(char *str)
 __setup("profile=", profile_setup);
 
 
-void __init profile_init(void)
+int profile_init(void)
 {
+	int buffer_bytes;
 	if (!prof_on)
-		return;
+		return 0;
 
 	/* only text is profiled */
 	prof_len = (_etext - _stext) >> prof_shift;
-	prof_buffer = alloc_bootmem(prof_len*sizeof(atomic_t));
+	buffer_bytes = prof_len*sizeof(atomic_t);
+	if (!slab_is_available()) {
+		prof_buffer = alloc_bootmem(buffer_bytes);
+		return 0;
+	}
+
+	prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL);
+	if (prof_buffer)
+		return 0;
+
+	prof_buffer = alloc_pages_exact(buffer_bytes, GFP_KERNEL|__GFP_ZERO);
+	if (prof_buffer)
+		return 0;
+
+	prof_buffer = vmalloc(buffer_bytes);
+	if (prof_buffer)
+		return 0;
+
+	return -ENOMEM;
 }
 
 /* Profile event notifications */
@@ -527,7 +548,7 @@ static void __init profile_nop(void *unused)
 {
 }
 
-static int __init create_hash_tables(void)
+static int create_hash_tables(void)
 {
 	int cpu;
 
@@ -575,14 +596,14 @@ out_cleanup:
 #define create_hash_tables()			({ 0; })
 #endif
 
-static int __init create_proc_profile(void)
+int create_proc_profile(void)
 {
 	struct proc_dir_entry *entry;
 
 	if (!prof_on)
 		return 0;
 	if (create_hash_tables())
-		return -1;
+		return -ENOMEM;
 	entry = proc_create("profile", S_IWUSR | S_IRUGO,
 			    NULL, &proc_profile_operations);
 	if (!entry)
-- 
cgit v1.2.3


From e1f8e87449147ffe5ea3de64a46af7de450ce279 Mon Sep 17 00:00:00 2001
From: Francois Cami <francois.cami@free.fr>
Date: Wed, 15 Oct 2008 22:01:59 -0700
Subject: Remove Andrew Morton's old email accounts

People can use the real name an an index into MAINTAINERS to find the
current email address.

Signed-off-by: Francois Cami <francois.cami@free.fr>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/SAK.txt                 |  2 +-
 Documentation/SubmittingDrivers       |  2 +-
 Documentation/networking/cs89x0.txt   |  4 ++--
 Documentation/networking/vortex.txt   |  4 ++--
 Documentation/scsi/ChangeLog.megaraid |  6 +++---
 drivers/char/vt.c                     |  2 +-
 drivers/net/3c509.c                   |  2 +-
 drivers/net/cs89x0.c                  | 13 ++++++-------
 drivers/parport/ChangeLog             |  2 +-
 fs/direct-io.c                        |  4 ++--
 fs/fs-writeback.c                     |  2 +-
 fs/mpage.c                            |  2 +-
 include/linux/journal-head.h          |  2 +-
 include/linux/task_io_accounting.h    |  2 +-
 kernel/printk.c                       |  2 +-
 kernel/workqueue.c                    |  2 +-
 mm/fadvise.c                          |  2 +-
 mm/page-writeback.c                   |  2 +-
 mm/pdflush.c                          |  2 +-
 mm/readahead.c                        |  2 +-
 mm/truncate.c                         |  2 +-
 21 files changed, 31 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/SAK.txt b/Documentation/SAK.txt
index b9019ca872ea..74be14679ed8 100644
--- a/Documentation/SAK.txt
+++ b/Documentation/SAK.txt
@@ -1,5 +1,5 @@
 Linux 2.4.2 Secure Attention Key (SAK) handling
-18 March 2001, Andrew Morton <akpm@osdl.org>
+18 March 2001, Andrew Morton
 
 An operating system's Secure Attention Key is a security tool which is
 provided as protection against trojan password capturing programs.  It
diff --git a/Documentation/SubmittingDrivers b/Documentation/SubmittingDrivers
index 24f2eb40cae5..99e72a81fa2f 100644
--- a/Documentation/SubmittingDrivers
+++ b/Documentation/SubmittingDrivers
@@ -41,7 +41,7 @@ Linux 2.4:
 Linux 2.6:
 	The same rules apply as 2.4 except that you should follow linux-kernel
 	to track changes in API's. The final contact point for Linux 2.6
-	submissions is Andrew Morton <akpm@osdl.org>.
+	submissions is Andrew Morton.
 
 What Criteria Determine Acceptance
 ----------------------------------
diff --git a/Documentation/networking/cs89x0.txt b/Documentation/networking/cs89x0.txt
index 6387d3decf85..c725d33b316f 100644
--- a/Documentation/networking/cs89x0.txt
+++ b/Documentation/networking/cs89x0.txt
@@ -3,7 +3,7 @@ NOTE
 ----
 
 This document was contributed by Cirrus Logic for kernel 2.2.5.  This version
-has been updated for 2.3.48 by Andrew Morton <andrewm@uow.edu.au>
+has been updated for 2.3.48 by Andrew Morton.
 
 Cirrus make a copy of this driver available at their website, as
 described below.  In general, you should use the driver version which
@@ -690,7 +690,7 @@ latest drivers and technical publications.
 6.4 Current maintainer
 
 In February 2000 the maintenance of this driver was assumed by Andrew
-Morton <akpm@zip.com.au>
+Morton.
 
 6.5 Kernel module parameters
 
diff --git a/Documentation/networking/vortex.txt b/Documentation/networking/vortex.txt
index 6356d3faed36..bd874daabde7 100644
--- a/Documentation/networking/vortex.txt
+++ b/Documentation/networking/vortex.txt
@@ -1,5 +1,5 @@
 Documentation/networking/vortex.txt
-Andrew Morton <andrewm@uow.edu.au>
+Andrew Morton
 30 April 2000
 
 
@@ -11,7 +11,7 @@ The driver was written by Donald Becker <becker@scyld.com>
 Don is no longer the prime maintainer of this version of the driver. 
 Please report problems to one or more of:
 
-  Andrew Morton <akpm@osdl.org>
+  Andrew Morton
   Netdev mailing list <netdev@vger.kernel.org>
   Linux kernel mailing list <linux-kernel@vger.kernel.org>
 
diff --git a/Documentation/scsi/ChangeLog.megaraid b/Documentation/scsi/ChangeLog.megaraid
index 37796fe45bd0..eaa4801f2ce6 100644
--- a/Documentation/scsi/ChangeLog.megaraid
+++ b/Documentation/scsi/ChangeLog.megaraid
@@ -409,7 +409,7 @@ i.	Function reordering so that inline functions are defined before they
 	megaraid_mbox_prepare_pthru, megaraid_mbox_prepare_epthru,
 	megaraid_busywait_mbox
 
-		- Andrew Morton <akpm@osdl.org>, 08.19.2004
+		- Andrew Morton, 08.19.2004
 		linux-scsi mailing list
 
 	"Something else to clean up after inclusion: every instance of an
@@ -471,13 +471,13 @@ vi.	Add support for 64-bit applications. Current drivers assume only
 vii.	Move the function declarations for the management module from
 	megaraid_mm.h to megaraid_mm.c
 
-		- Andrew Morton <akpm@osdl.org>, 08.19.2004
+		- Andrew Morton, 08.19.2004
 		linux-scsi mailing list
 
 viii.	Change default values for MEGARAID_NEWGEN, MEGARAID_MM, and
 	MEGARAID_MAILBOX to 'n' in Kconfig.megaraid
 
-		- Andrew Morton <akpm@osdl.org>, 08.19.2004
+		- Andrew Morton, 08.19.2004
 		linux-scsi mailing list
 
 ix.	replace udelay with msleep
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index a0f7ffb68087..d8f83e26e4a4 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -59,7 +59,7 @@
  * by Martin Mares <mj@atrey.karlin.mff.cuni.cz>, July 1998
  *
  * Removed old-style timers, introduced console_timer, made timer
- * deletion SMP-safe.  17Jun00, Andrew Morton <andrewm@uow.edu.au>
+ * deletion SMP-safe.  17Jun00, Andrew Morton
  *
  * Removed console_lock, enabled interrupts across all console operations
  * 13 March 2001, Andrew Morton
diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c
index b9d097c9f6bb..3a7bc524af33 100644
--- a/drivers/net/3c509.c
+++ b/drivers/net/3c509.c
@@ -40,7 +40,7 @@
 		v1.14 10/15/97 Avoided waiting..discard message for fast machines -djb
 		v1.15 1/31/98 Faster recovery for Tx errors. -djb
 		v1.16 2/3/98 Different ID port handling to avoid sound cards. -djb
-		v1.18 12Mar2001 Andrew Morton <andrewm@uow.edu.au>
+		v1.18 12Mar2001 Andrew Morton
 			- Avoid bogus detect of 3c590's (Andrzej Krzysztofowicz)
 			- Reviewed against 1.18 from scyld.com
 		v1.18a 17Nov2001 Jeff Garzik <jgarzik@pobox.com>
diff --git a/drivers/net/cs89x0.c b/drivers/net/cs89x0.c
index a28de8182802..7107620f615d 100644
--- a/drivers/net/cs89x0.c
+++ b/drivers/net/cs89x0.c
@@ -36,8 +36,7 @@
 
   Alan Cox          : Removed 1.2 support, added 2.1 extra counters.
 
-  Andrew Morton     : andrewm@uow.edu.au
-                    : Kernel 2.3.48
+  Andrew Morton     : Kernel 2.3.48
                     : Handle kmalloc() failures
                     : Other resource allocation fixes
                     : Add SMP locks
@@ -49,7 +48,7 @@
                     : Fixed an out-of-mem bug in dma_rx()
                     : Updated Documentation/networking/cs89x0.txt
 
-  Andrew Morton     : andrewm@uow.edu.au / Kernel 2.3.99-pre1
+  Andrew Morton     : Kernel 2.3.99-pre1
                     : Use skb_reserve to longword align IP header (two places)
                     : Remove a delay loop from dma_rx()
                     : Replace '100' with HZ
@@ -57,11 +56,11 @@
                     : Added 'cs89x0_dma=N' kernel boot option
                     : Correctly initialise lp->lock in non-module compile
 
-  Andrew Morton     : andrewm@uow.edu.au / Kernel 2.3.99-pre4-1
+  Andrew Morton     : Kernel 2.3.99-pre4-1
                     : MOD_INC/DEC race fix (see
                     : http://www.uwsg.indiana.edu/hypermail/linux/kernel/0003.3/1532.html)
 
-  Andrew Morton     : andrewm@uow.edu.au / Kernel 2.4.0-test7-pre2
+  Andrew Morton     : Kernel 2.4.0-test7-pre2
                     : Enhanced EEPROM support to cover more devices,
                     :   abstracted IRQ mapping to support CONFIG_ARCH_CLPS7500 arch
                     :   (Jason Gunthorpe <jgg@ualberta.ca>)
@@ -156,7 +155,7 @@
 #include "cs89x0.h"
 
 static char version[] __initdata =
-"cs89x0.c: v2.4.3-pre1 Russell Nelson <nelson@crynwr.com>, Andrew Morton <andrewm@uow.edu.au>\n";
+"cs89x0.c: v2.4.3-pre1 Russell Nelson <nelson@crynwr.com>, Andrew Morton\n";
 
 #define DRV_NAME "cs89x0"
 
@@ -1877,7 +1876,7 @@ MODULE_PARM_DESC(dmasize , "(ignored)");
 MODULE_PARM_DESC(use_dma , "(ignored)");
 #endif
 
-MODULE_AUTHOR("Mike Cruse, Russwll Nelson <nelson@crynwr.com>, Andrew Morton <andrewm@uow.edu.au>");
+MODULE_AUTHOR("Mike Cruse, Russwll Nelson <nelson@crynwr.com>, Andrew Morton");
 MODULE_LICENSE("GPL");
 
 
diff --git a/drivers/parport/ChangeLog b/drivers/parport/ChangeLog
index db717c1d62a5..8565bbbeb6ec 100644
--- a/drivers/parport/ChangeLog
+++ b/drivers/parport/ChangeLog
@@ -311,7 +311,7 @@
 	* ieee1284_ops.c (parport_ieee1284_read_nibble): Reset nAutoFd
 	on timeout.  Matches 2.2.x behaviour.
 
-2001-03-02  Andrew Morton <andrewm@uow.edu.au>
+2001-03-02  Andrew Morton
 
 	* parport_pc.c (registered_parport): New static variable.
 	(parport_pc_find_ports): Set it when we register PCI driver.
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 9606ee848fd8..af0558dbe8b7 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -5,11 +5,11 @@
  *
  * O_DIRECT
  *
- * 04Jul2002	akpm@zip.com.au
+ * 04Jul2002	Andrew Morton
  *		Initial version
  * 11Sep2002	janetinc@us.ibm.com
  * 		added readv/writev support.
- * 29Oct2002	akpm@zip.com.au
+ * 29Oct2002	Andrew Morton
  *		rewrote bio_add_page() support.
  * 30Oct2002	pbadari@us.ibm.com
  *		added support for non-aligned IO.
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 25adfc3c693a..d0ff0b8cf309 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -8,7 +8,7 @@
  * pages against inodes.  ie: data writeback.  Writeout of the
  * inode itself is not handled here.
  *
- * 10Apr2002	akpm@zip.com.au
+ * 10Apr2002	Andrew Morton
  *		Split out of fs/inode.c
  *		Additions for address_space-based writeback
  */
diff --git a/fs/mpage.c b/fs/mpage.c
index dbcc7af76a15..552b80b3facc 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -6,7 +6,7 @@
  * Contains functions related to preparing and submitting BIOs which contain
  * multiple pagecache pages.
  *
- * 15May2002	akpm@zip.com.au
+ * 15May2002	Andrew Morton
  *		Initial version
  * 27Jun2002	axboe@suse.de
  *		use bio_add_page() to build bio's just the right size
diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h
index 8a62d1e84b9b..bb70ebb6a2d5 100644
--- a/include/linux/journal-head.h
+++ b/include/linux/journal-head.h
@@ -3,7 +3,7 @@
  *
  * buffer_head fields for JBD
  *
- * 27 May 2001 Andrew Morton <akpm@digeo.com>
+ * 27 May 2001 Andrew Morton
  *	Created - pulled out of fs.h
  */
 
diff --git a/include/linux/task_io_accounting.h b/include/linux/task_io_accounting.h
index 5e88afc9a2fb..bdf855c2856f 100644
--- a/include/linux/task_io_accounting.h
+++ b/include/linux/task_io_accounting.h
@@ -5,7 +5,7 @@
  * Don't include this header file directly - it is designed to be dragged in via
  * sched.h.
  *
- * Blame akpm@osdl.org for all this.
+ * Blame Andrew Morton for all this.
  */
 
 struct task_io_accounting {
diff --git a/kernel/printk.c b/kernel/printk.c
index a430fd04008b..c3ab51dc9faf 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -13,7 +13,7 @@
  * Fixed SMP synchronization, 08/08/99, Manfred Spraul
  *     manfred@colorfullife.com
  * Rewrote bits to get rid of console_lock
- *	01Mar01 Andrew Morton <andrewm@uow.edu.au>
+ *	01Mar01 Andrew Morton
  */
 
 #include <linux/kernel.h>
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4048e92aa04f..714afad46539 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -9,7 +9,7 @@
  * Derived from the taskqueue/keventd code by:
  *
  *   David Woodhouse <dwmw2@infradead.org>
- *   Andrew Morton <andrewm@uow.edu.au>
+ *   Andrew Morton
  *   Kai Petzke <wpp@marie.physik.tu-berlin.de>
  *   Theodore Ts'o <tytso@mit.edu>
  *
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 343cfdfebd9e..a1da969bd980 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2002, Linus Torvalds
  *
- * 11Jan2003	akpm@digeo.com
+ * 11Jan2003	Andrew Morton
  *		Initial version.
  */
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 24de8b65fdbd..c130a137c129 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -7,7 +7,7 @@
  * Contains functions related to writing back dirty pages at the
  * address_space level.
  *
- * 10Apr2002	akpm@zip.com.au
+ * 10Apr2002	Andrew Morton
  *		Initial version
  */
 
diff --git a/mm/pdflush.c b/mm/pdflush.c
index 0cbe0c60c6bf..a0a14c4d5072 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2002, Linus Torvalds.
  *
- * 09Apr2002	akpm@zip.com.au
+ * 09Apr2002	Andrew Morton
  *		Initial version
  * 29Feb2004	kaos@sgi.com
  *		Move worker thread creation to kthread to avoid chewing
diff --git a/mm/readahead.c b/mm/readahead.c
index 77e8ddf945e9..6cbd9a72fde2 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2002, Linus Torvalds
  *
- * 09Apr2002	akpm@zip.com.au
+ * 09Apr2002	Andrew Morton
  *		Initial version.
  */
 
diff --git a/mm/truncate.c b/mm/truncate.c
index 6650c1d878b4..e83e4b114ef1 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2002, Linus Torvalds
  *
- * 10Sep2002	akpm@zip.com.au
+ * 10Sep2002	Andrew Morton
  *		Initial version.
  */
 
-- 
cgit v1.2.3


From f7ad160b49c49dc9cd383b9184c6fa4a9b4f7ebb Mon Sep 17 00:00:00 2001
From: Alex Raimondi <raimondi@miromico.ch>
Date: Wed, 15 Oct 2008 22:02:03 -0700
Subject: include/linux/clk.h: fix comment

clk_get and clk_put may not be used from within interrupt context.  Change
comment to this function.

Signed-off-by: Alex Raimondi <raimondi@miromico.ch>
Signed-off-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/clk.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clk.h b/include/linux/clk.h
index 5ca8c6fddb56..778777316ea4 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -35,6 +35,8 @@ struct clk;
  * clk_get may return different clock producers depending on @dev.)
  *
  * Drivers must assume that the clock source is not enabled.
+ *
+ * clk_get should not be called from within interrupt context.
  */
 struct clk *clk_get(struct device *dev, const char *id);
 
@@ -76,6 +78,8 @@ unsigned long clk_get_rate(struct clk *clk);
  * Note: drivers must ensure that all clk_enable calls made on this
  * clock source are balanced by clk_disable calls prior to calling
  * this function.
+ *
+ * clk_put should not be called from within interrupt context.
  */
 void clk_put(struct clk *clk);
 
-- 
cgit v1.2.3


From f7a5000f7a8924e9c5fad1801616601d6dc65a17 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 15 Oct 2008 22:02:05 -0700
Subject: compat: move cp_compat_stat to common code

struct stat / compat_stat is the same on all architectures, so
cp_compat_stat should be, too.

Turns out it is, except that various architectures have slightly and some
high2lowuid/high2lowgid or the direct assignment instead of the
SET_UID/SET_GID that expands to the correct one anyway.

This patch replaces the arch-specific cp_compat_stat implementations with
a common one based on the x86-64 one.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: David S. Miller <davem@davemloft.net> [ sparc bits ]
Acked-by: Kyle McMartin <kyle@mcmartin.ca> [ parisc bits ]
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/ia32/sys_ia32.c         | 35 -----------------------------
 arch/mips/kernel/linux32.c        | 35 -----------------------------
 arch/parisc/kernel/sys_parisc32.c | 47 ---------------------------------------
 arch/powerpc/kernel/sys_ppc32.c   | 36 ------------------------------
 arch/s390/kernel/compat_linux.c   | 35 -----------------------------
 arch/sparc64/kernel/sys_sparc32.c | 35 -----------------------------
 arch/x86/ia32/sys_ia32.c          | 35 -----------------------------
 fs/compat.c                       | 39 ++++++++++++++++++++++++++++++++
 include/linux/compat.h            |  1 -
 9 files changed, 39 insertions(+), 259 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index bf196cbb3796..2362a8eefb30 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -118,41 +118,6 @@ sys32_execve (char __user *name, compat_uptr_t __user *argv, compat_uptr_t __use
 	return error;
 }
 
-int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
-{
-	compat_ino_t ino;
-	int err;
-
-	if ((u64) stat->size > MAX_NON_LFS ||
-	    !old_valid_dev(stat->dev) ||
-	    !old_valid_dev(stat->rdev))
-		return -EOVERFLOW;
-
-	ino = stat->ino;
-	if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino)
-		return -EOVERFLOW;
-
-	if (clear_user(ubuf, sizeof(*ubuf)))
-		return -EFAULT;
-
-	err  = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev);
-	err |= __put_user(ino, &ubuf->st_ino);
-	err |= __put_user(stat->mode, &ubuf->st_mode);
-	err |= __put_user(stat->nlink, &ubuf->st_nlink);
-	err |= __put_user(high2lowuid(stat->uid), &ubuf->st_uid);
-	err |= __put_user(high2lowgid(stat->gid), &ubuf->st_gid);
-	err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev);
-	err |= __put_user(stat->size, &ubuf->st_size);
-	err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime);
-	err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec);
-	err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime);
-	err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec);
-	err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime);
-	err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec);
-	err |= __put_user(stat->blksize, &ubuf->st_blksize);
-	err |= __put_user(stat->blocks, &ubuf->st_blocks);
-	return err;
-}
 
 #if PAGE_SHIFT > IA32_PAGE_SHIFT
 
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 2fefb14414b7..89223a9bff2c 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -63,41 +63,6 @@
 #define merge_64(r1, r2) ((((r2) & 0xffffffffUL) << 32) + ((r1) & 0xffffffffUL))
 #endif
 
-/*
- * Revalidate the inode. This is required for proper NFS attribute caching.
- */
-
-int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf)
-{
-	struct compat_stat tmp;
-
-	if (!new_valid_dev(stat->dev) || !new_valid_dev(stat->rdev))
-		return -EOVERFLOW;
-
-	memset(&tmp, 0, sizeof(tmp));
-	tmp.st_dev = new_encode_dev(stat->dev);
-	tmp.st_ino = stat->ino;
-	if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
-		return -EOVERFLOW;
-	tmp.st_mode = stat->mode;
-	tmp.st_nlink = stat->nlink;
-	SET_UID(tmp.st_uid, stat->uid);
-	SET_GID(tmp.st_gid, stat->gid);
-	tmp.st_rdev = new_encode_dev(stat->rdev);
-	tmp.st_size = stat->size;
-	tmp.st_atime = stat->atime.tv_sec;
-	tmp.st_mtime = stat->mtime.tv_sec;
-	tmp.st_ctime = stat->ctime.tv_sec;
-#ifdef STAT_HAVE_NSEC
-	tmp.st_atime_nsec = stat->atime.tv_nsec;
-	tmp.st_mtime_nsec = stat->mtime.tv_nsec;
-	tmp.st_ctime_nsec = stat->ctime.tv_nsec;
-#endif
-	tmp.st_blocks = stat->blocks;
-	tmp.st_blksize = stat->blksize;
-	return copy_to_user(statbuf, &tmp, sizeof(tmp)) ? -EFAULT : 0;
-}
-
 asmlinkage unsigned long
 sys32_mmap2(unsigned long addr, unsigned long len, unsigned long prot,
          unsigned long flags, unsigned long fd, unsigned long pgoff)
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index 71efd6a28e2a..2c3af17e049c 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -237,53 +237,6 @@ int sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user
 	return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
 }
 
-int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf)
-{
-	compat_ino_t ino;
-	int err;
-
-	if (stat->size > MAX_NON_LFS || !new_valid_dev(stat->dev) ||
-	    !new_valid_dev(stat->rdev))
-		return -EOVERFLOW;
-
-	ino = stat->ino;
-	if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino)
-		return -EOVERFLOW;
-
-	err  = put_user(new_encode_dev(stat->dev), &statbuf->st_dev);
-	err |= put_user(ino, &statbuf->st_ino);
-	err |= put_user(stat->mode, &statbuf->st_mode);
-	err |= put_user(stat->nlink, &statbuf->st_nlink);
-	err |= put_user(0, &statbuf->st_reserved1);
-	err |= put_user(0, &statbuf->st_reserved2);
-	err |= put_user(new_encode_dev(stat->rdev), &statbuf->st_rdev);
-	err |= put_user(stat->size, &statbuf->st_size);
-	err |= put_user(stat->atime.tv_sec, &statbuf->st_atime);
-	err |= put_user(stat->atime.tv_nsec, &statbuf->st_atime_nsec);
-	err |= put_user(stat->mtime.tv_sec, &statbuf->st_mtime);
-	err |= put_user(stat->mtime.tv_nsec, &statbuf->st_mtime_nsec);
-	err |= put_user(stat->ctime.tv_sec, &statbuf->st_ctime);
-	err |= put_user(stat->ctime.tv_nsec, &statbuf->st_ctime_nsec);
-	err |= put_user(stat->blksize, &statbuf->st_blksize);
-	err |= put_user(stat->blocks, &statbuf->st_blocks);
-	err |= put_user(0, &statbuf->__unused1);
-	err |= put_user(0, &statbuf->__unused2);
-	err |= put_user(0, &statbuf->__unused3);
-	err |= put_user(0, &statbuf->__unused4);
-	err |= put_user(0, &statbuf->__unused5);
-	err |= put_user(0, &statbuf->st_fstype); /* not avail */
-	err |= put_user(0, &statbuf->st_realdev); /* not avail */
-	err |= put_user(0, &statbuf->st_basemode); /* not avail */
-	err |= put_user(0, &statbuf->st_spareshort);
-	err |= put_user(stat->uid, &statbuf->st_uid);
-	err |= put_user(stat->gid, &statbuf->st_gid);
-	err |= put_user(0, &statbuf->st_spare4[0]);
-	err |= put_user(0, &statbuf->st_spare4[1]);
-	err |= put_user(0, &statbuf->st_spare4[2]);
-
-	return err;
-}
-
 /*** copied from mips64 ***/
 /*
  * Ooo, nasty.  We need here to frob 32-bit unsigned longs to
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index ff7de7b0797e..d00599bb24a1 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -61,42 +61,6 @@ asmlinkage long ppc32_select(u32 n, compat_ulong_t __user *inp,
 	return compat_sys_select((int)n, inp, outp, exp, compat_ptr(tvp_x));
 }
 
-int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf)
-{
-	compat_ino_t ino;
-	long err;
-
-	if (stat->size > MAX_NON_LFS || !new_valid_dev(stat->dev) ||
-	    !new_valid_dev(stat->rdev))
-		return -EOVERFLOW;
-
-	ino = stat->ino;
-	if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino)
-		return -EOVERFLOW;
-
-	err  = access_ok(VERIFY_WRITE, statbuf, sizeof(*statbuf)) ? 0 : -EFAULT;
-	err |= __put_user(new_encode_dev(stat->dev), &statbuf->st_dev);
-	err |= __put_user(ino, &statbuf->st_ino);
-	err |= __put_user(stat->mode, &statbuf->st_mode);
-	err |= __put_user(stat->nlink, &statbuf->st_nlink);
-	err |= __put_user(stat->uid, &statbuf->st_uid);
-	err |= __put_user(stat->gid, &statbuf->st_gid);
-	err |= __put_user(new_encode_dev(stat->rdev), &statbuf->st_rdev);
-	err |= __put_user(stat->size, &statbuf->st_size);
-	err |= __put_user(stat->atime.tv_sec, &statbuf->st_atime);
-	err |= __put_user(stat->atime.tv_nsec, &statbuf->st_atime_nsec);
-	err |= __put_user(stat->mtime.tv_sec, &statbuf->st_mtime);
-	err |= __put_user(stat->mtime.tv_nsec, &statbuf->st_mtime_nsec);
-	err |= __put_user(stat->ctime.tv_sec, &statbuf->st_ctime);
-	err |= __put_user(stat->ctime.tv_nsec, &statbuf->st_ctime_nsec);
-	err |= __put_user(stat->blksize, &statbuf->st_blksize);
-	err |= __put_user(stat->blocks, &statbuf->st_blocks);
-	err |= __put_user(0, &statbuf->__unused4[0]);
-	err |= __put_user(0, &statbuf->__unused4[1]);
-
-	return err;
-}
-
 /* Note: it is necessary to treat option as an unsigned int,
  * with the corresponding cast to a signed int to insure that the 
  * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 98e246dc0233..9b471d785ec1 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -362,41 +362,6 @@ asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned
 		return sys_ftruncate(fd, (high << 32) | low);
 }
 
-int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf)
-{
-	compat_ino_t ino;
-	int err;
-
-	if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev))
-		return -EOVERFLOW;
-
-	ino = stat->ino;
-	if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino)
-		return -EOVERFLOW;
-
-	err = put_user(old_encode_dev(stat->dev), &statbuf->st_dev);
-	err |= put_user(stat->ino, &statbuf->st_ino);
-	err |= put_user(stat->mode, &statbuf->st_mode);
-	err |= put_user(stat->nlink, &statbuf->st_nlink);
-	err |= put_user(high2lowuid(stat->uid), &statbuf->st_uid);
-	err |= put_user(high2lowgid(stat->gid), &statbuf->st_gid);
-	err |= put_user(old_encode_dev(stat->rdev), &statbuf->st_rdev);
-	err |= put_user(stat->size, &statbuf->st_size);
-	err |= put_user(stat->atime.tv_sec, &statbuf->st_atime);
-	err |= put_user(stat->atime.tv_nsec, &statbuf->st_atime_nsec);
-	err |= put_user(stat->mtime.tv_sec, &statbuf->st_mtime);
-	err |= put_user(stat->mtime.tv_nsec, &statbuf->st_mtime_nsec);
-	err |= put_user(stat->ctime.tv_sec, &statbuf->st_ctime);
-	err |= put_user(stat->ctime.tv_nsec, &statbuf->st_ctime_nsec);
-	err |= put_user(stat->blksize, &statbuf->st_blksize);
-	err |= put_user(stat->blocks, &statbuf->st_blocks);
-/* fixme
-	err |= put_user(0, &statbuf->__unused4[0]);
-	err |= put_user(0, &statbuf->__unused4[1]);
-*/
-	return err;
-}
-
 asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid,
 				struct compat_timespec __user *interval)
 {
diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c
index 3320c9d0075f..73a33dc3bcca 100644
--- a/arch/sparc64/kernel/sys_sparc32.c
+++ b/arch/sparc64/kernel/sys_sparc32.c
@@ -148,41 +148,6 @@ asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned
 		return sys_ftruncate(fd, (high << 32) | low);
 }
 
-int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf)
-{
-	compat_ino_t ino;
-	int err;
-
-	if (stat->size > MAX_NON_LFS || !old_valid_dev(stat->dev) ||
-	    !old_valid_dev(stat->rdev))
-		return -EOVERFLOW;
-
-	ino = stat->ino;
-	if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino)
-		return -EOVERFLOW;
-
-	err  = put_user(old_encode_dev(stat->dev), &statbuf->st_dev);
-	err |= put_user(stat->ino, &statbuf->st_ino);
-	err |= put_user(stat->mode, &statbuf->st_mode);
-	err |= put_user(stat->nlink, &statbuf->st_nlink);
-	err |= put_user(high2lowuid(stat->uid), &statbuf->st_uid);
-	err |= put_user(high2lowgid(stat->gid), &statbuf->st_gid);
-	err |= put_user(old_encode_dev(stat->rdev), &statbuf->st_rdev);
-	err |= put_user(stat->size, &statbuf->st_size);
-	err |= put_user(stat->atime.tv_sec, &statbuf->st_atime);
-	err |= put_user(stat->atime.tv_nsec, &statbuf->st_atime_nsec);
-	err |= put_user(stat->mtime.tv_sec, &statbuf->st_mtime);
-	err |= put_user(stat->mtime.tv_nsec, &statbuf->st_mtime_nsec);
-	err |= put_user(stat->ctime.tv_sec, &statbuf->st_ctime);
-	err |= put_user(stat->ctime.tv_nsec, &statbuf->st_ctime_nsec);
-	err |= put_user(stat->blksize, &statbuf->st_blksize);
-	err |= put_user(stat->blocks, &statbuf->st_blocks);
-	err |= put_user(0, &statbuf->__unused4[0]);
-	err |= put_user(0, &statbuf->__unused4[1]);
-
-	return err;
-}
-
 static int cp_compat_stat64(struct kstat *stat,
 			    struct compat_stat64 __user *statbuf)
 {
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index beda4232ce69..4d3ad8d78a4d 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -49,41 +49,6 @@
 
 #define AA(__x)		((unsigned long)(__x))
 
-int cp_compat_stat(struct kstat *kbuf, struct compat_stat __user *ubuf)
-{
-	compat_ino_t ino;
-
-	typeof(ubuf->st_uid) uid = 0;
-	typeof(ubuf->st_gid) gid = 0;
-	SET_UID(uid, kbuf->uid);
-	SET_GID(gid, kbuf->gid);
-	if (!old_valid_dev(kbuf->dev) || !old_valid_dev(kbuf->rdev))
-		return -EOVERFLOW;
-	if (kbuf->size >= 0x7fffffff)
-		return -EOVERFLOW;
-	ino = kbuf->ino;
-	if (sizeof(ino) < sizeof(kbuf->ino) && ino != kbuf->ino)
-		return -EOVERFLOW;
-	if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct compat_stat)) ||
-	    __put_user(old_encode_dev(kbuf->dev), &ubuf->st_dev) ||
-	    __put_user(ino, &ubuf->st_ino) ||
-	    __put_user(kbuf->mode, &ubuf->st_mode) ||
-	    __put_user(kbuf->nlink, &ubuf->st_nlink) ||
-	    __put_user(uid, &ubuf->st_uid) ||
-	    __put_user(gid, &ubuf->st_gid) ||
-	    __put_user(old_encode_dev(kbuf->rdev), &ubuf->st_rdev) ||
-	    __put_user(kbuf->size, &ubuf->st_size) ||
-	    __put_user(kbuf->atime.tv_sec, &ubuf->st_atime) ||
-	    __put_user(kbuf->atime.tv_nsec, &ubuf->st_atime_nsec) ||
-	    __put_user(kbuf->mtime.tv_sec, &ubuf->st_mtime) ||
-	    __put_user(kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
-	    __put_user(kbuf->ctime.tv_sec, &ubuf->st_ctime) ||
-	    __put_user(kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
-	    __put_user(kbuf->blksize, &ubuf->st_blksize) ||
-	    __put_user(kbuf->blocks, &ubuf->st_blocks))
-		return -EFAULT;
-	return 0;
-}
 
 asmlinkage long sys32_truncate64(char __user *filename,
 				 unsigned long offset_low,
diff --git a/fs/compat.c b/fs/compat.c
index aae13d31612f..5f9ec449c799 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -137,6 +137,45 @@ asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval _
 	return compat_sys_futimesat(AT_FDCWD, filename, t);
 }
 
+static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
+{
+	compat_ino_t ino = stat->ino;
+	typeof(ubuf->st_uid) uid = 0;
+	typeof(ubuf->st_gid) gid = 0;
+	int err;
+
+	SET_UID(uid, stat->uid);
+	SET_GID(gid, stat->gid);
+
+	if ((u64) stat->size > MAX_NON_LFS ||
+	    !old_valid_dev(stat->dev) ||
+	    !old_valid_dev(stat->rdev))
+		return -EOVERFLOW;
+	if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino)
+		return -EOVERFLOW;
+
+	if (clear_user(ubuf, sizeof(*ubuf)))
+		return -EFAULT;
+
+	err  = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev);
+	err |= __put_user(ino, &ubuf->st_ino);
+	err |= __put_user(stat->mode, &ubuf->st_mode);
+	err |= __put_user(stat->nlink, &ubuf->st_nlink);
+	err |= __put_user(uid, &ubuf->st_uid);
+	err |= __put_user(gid, &ubuf->st_gid);
+	err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev);
+	err |= __put_user(stat->size, &ubuf->st_size);
+	err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime);
+	err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec);
+	err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime);
+	err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec);
+	err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime);
+	err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec);
+	err |= __put_user(stat->blksize, &ubuf->st_blksize);
+	err |= __put_user(stat->blocks, &ubuf->st_blocks);
+	return err;
+}
+
 asmlinkage long compat_sys_newstat(char __user * filename,
 		struct compat_stat __user *statbuf)
 {
diff --git a/include/linux/compat.h b/include/linux/compat.h
index cf8d11cad5ae..999dddd8d939 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -78,7 +78,6 @@ typedef struct {
 	compat_sigset_word	sig[_COMPAT_NSIG_WORDS];
 } compat_sigset_t;
 
-extern int cp_compat_stat(struct kstat *, struct compat_stat __user *);
 extern int get_compat_timespec(struct timespec *, const struct compat_timespec __user *);
 extern int put_compat_timespec(const struct timespec *, struct compat_timespec __user *);
 
-- 
cgit v1.2.3


From b418da16dd44810e5d5a22bba377cca80512a524 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 15 Oct 2008 22:02:06 -0700
Subject: compat: generic compat get/settimeofday

Nothing arch specific in get/settimeofday.  The details of the timeval
conversion varied a little from arch to arch, but all with the same
results.

Also add an extern declaration for sys_tz to linux/time.h because externs
in .c files are fowned upon.  I'll kill the externs in various other files
in a sparate patch.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: David S. Miller <davem@davemloft.net> [ sparc bits ]
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Acked-by: Kyle McMartin <kyle@mcmartin.ca>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Grant Grundler <grundler@parisc-linux.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/ia32/ia32_entry.S        |  4 +--
 arch/ia64/ia32/sys_ia32.c          | 56 -------------------------------
 arch/mips/kernel/linux32.c         | 66 -------------------------------------
 arch/mips/kernel/scall64-n32.S     |  4 +--
 arch/mips/kernel/scall64-o32.S     |  4 +--
 arch/parisc/kernel/sys_parisc32.c  | 58 ---------------------------------
 arch/parisc/kernel/syscall_table.S |  4 +--
 arch/powerpc/kernel/sys_ppc32.c    | 63 -----------------------------------
 arch/s390/kernel/compat_linux.c    | 67 --------------------------------------
 arch/s390/kernel/compat_linux.h    |  4 ---
 arch/s390/kernel/compat_wrapper.S  | 12 +++----
 arch/s390/kernel/syscalls.S        |  4 +--
 arch/sparc64/kernel/sys_sparc32.c  | 62 -----------------------------------
 arch/sparc64/kernel/systbls.S      |  4 +--
 arch/x86/ia32/ia32entry.S          |  4 +--
 arch/x86/ia32/sys_ia32.c           | 64 ------------------------------------
 include/linux/compat.h             |  5 +++
 include/linux/time.h               |  2 ++
 kernel/compat.c                    | 58 +++++++++++++++++++++++++++++++++
 19 files changed, 85 insertions(+), 460 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
index ff88c48c5d19..53505bb04771 100644
--- a/arch/ia64/ia32/ia32_entry.S
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -251,8 +251,8 @@ ia32_syscall_table:
 	data8 compat_sys_setrlimit	  /* 75 */
 	data8 compat_sys_old_getrlimit
 	data8 compat_sys_getrusage
-	data8 sys32_gettimeofday
-	data8 sys32_settimeofday
+	data8 compat_sys_gettimeofday
+	data8 compat_sys_settimeofday
 	data8 sys32_getgroups16	  /* 80 */
 	data8 sys32_setgroups16
 	data8 sys32_old_select
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 2362a8eefb30..f4430bb4bbdc 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -1113,68 +1113,12 @@ sys32_pipe (int __user *fd)
 	return retval;
 }
 
-static inline long
-get_tv32 (struct timeval *o, struct compat_timeval __user *i)
-{
-	return (!access_ok(VERIFY_READ, i, sizeof(*i)) ||
-		(__get_user(o->tv_sec, &i->tv_sec) | __get_user(o->tv_usec, &i->tv_usec)));
-}
-
-static inline long
-put_tv32 (struct compat_timeval __user *o, struct timeval *i)
-{
-	return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
-		(__put_user(i->tv_sec, &o->tv_sec) | __put_user(i->tv_usec, &o->tv_usec)));
-}
-
 asmlinkage unsigned long
 sys32_alarm (unsigned int seconds)
 {
 	return alarm_setitimer(seconds);
 }
 
-/* Translations due to time_t size differences.  Which affects all
-   sorts of things, like timeval and itimerval.  */
-
-extern struct timezone sys_tz;
-
-asmlinkage long
-sys32_gettimeofday (struct compat_timeval __user *tv, struct timezone __user *tz)
-{
-	if (tv) {
-		struct timeval ktv;
-		do_gettimeofday(&ktv);
-		if (put_tv32(tv, &ktv))
-			return -EFAULT;
-	}
-	if (tz) {
-		if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
-			return -EFAULT;
-	}
-	return 0;
-}
-
-asmlinkage long
-sys32_settimeofday (struct compat_timeval __user *tv, struct timezone __user *tz)
-{
-	struct timeval ktv;
-	struct timespec kts;
-	struct timezone ktz;
-
-	if (tv) {
-		if (get_tv32(&ktv, tv))
-			return -EFAULT;
-		kts.tv_sec = ktv.tv_sec;
-		kts.tv_nsec = ktv.tv_usec * 1000;
-	}
-	if (tz) {
-		if (copy_from_user(&ktz, tz, sizeof(ktz)))
-			return -EFAULT;
-	}
-
-	return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
-}
-
 struct sel_arg_struct {
 	unsigned int n;
 	unsigned int inp;
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 89223a9bff2c..aa2c55e3b55f 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -133,72 +133,6 @@ asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long __dummy,
 	return sys_ftruncate(fd, merge_64(a2, a3));
 }
 
-static inline long
-get_tv32(struct timeval *o, struct compat_timeval __user *i)
-{
-	return (!access_ok(VERIFY_READ, i, sizeof(*i)) ||
-		(__get_user(o->tv_sec, &i->tv_sec) |
-		 __get_user(o->tv_usec, &i->tv_usec)));
-}
-
-static inline long
-put_tv32(struct compat_timeval __user *o, struct timeval *i)
-{
-	return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
-		(__put_user(i->tv_sec, &o->tv_sec) |
-		 __put_user(i->tv_usec, &o->tv_usec)));
-}
-
-extern struct timezone sys_tz;
-
-asmlinkage int
-sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
-{
-	if (tv) {
-		struct timeval ktv;
-		do_gettimeofday(&ktv);
-		if (put_tv32(tv, &ktv))
-			return -EFAULT;
-	}
-	if (tz) {
-		if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
-			return -EFAULT;
-	}
-	return 0;
-}
-
-static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i)
-{
-	long usec;
-
-	if (!access_ok(VERIFY_READ, i, sizeof(*i)))
-		return -EFAULT;
-	if (__get_user(o->tv_sec, &i->tv_sec))
-		return -EFAULT;
-	if (__get_user(usec, &i->tv_usec))
-		return -EFAULT;
-	o->tv_nsec = usec * 1000;
-		return 0;
-}
-
-asmlinkage int
-sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
-{
-	struct timespec kts;
-	struct timezone ktz;
-
- 	if (tv) {
-		if (get_ts32(&kts, tv))
-			return -EFAULT;
-	}
-	if (tz) {
-		if (copy_from_user(&ktz, tz, sizeof(ktz)))
-			return -EFAULT;
-	}
-
-	return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
-}
-
 asmlinkage int sys32_llseek(unsigned int fd, unsigned int offset_high,
 			    unsigned int offset_low, loff_t __user * result,
 			    unsigned int origin)
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 324c5499dec2..e266b3aa6560 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -214,7 +214,7 @@ EXPORT(sysn32_call_table)
 	PTR	sys_fchown
 	PTR	sys_lchown
 	PTR	sys_umask
-	PTR	sys32_gettimeofday
+	PTR	compat_sys_gettimeofday
 	PTR	compat_sys_getrlimit		/* 6095 */
 	PTR	compat_sys_getrusage
 	PTR	compat_sys_sysinfo
@@ -279,7 +279,7 @@ EXPORT(sysn32_call_table)
 	PTR	sys_chroot
 	PTR	sys_sync
 	PTR	sys_acct
-	PTR	sys32_settimeofday
+	PTR	compat_sys_settimeofday
 	PTR	compat_sys_mount		/* 6160 */
 	PTR	sys_umount
 	PTR	sys_swapon
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 85fedac99a57..6c7ef8313ebd 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -283,8 +283,8 @@ sys_call_table:
 	PTR	compat_sys_setrlimit		/* 4075 */
 	PTR	compat_sys_getrlimit
 	PTR	compat_sys_getrusage
-	PTR	sys32_gettimeofday
-	PTR	sys32_settimeofday
+	PTR	compat_sys_gettimeofday
+	PTR	compat_sys_settimeofday
 	PTR	sys_getgroups			/* 4080 */
 	PTR	sys_setgroups
 	PTR	sys_ni_syscall			/* old_select */
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index 2c3af17e049c..0838155b7a88 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -179,64 +179,6 @@ asmlinkage long sys32_sched_rr_get_interval(pid_t pid,
 	return ret;
 }
 
-static int
-put_compat_timeval(struct compat_timeval __user *u, struct timeval *t)
-{
-	struct compat_timeval t32;
-	t32.tv_sec = t->tv_sec;
-	t32.tv_usec = t->tv_usec;
-	return copy_to_user(u, &t32, sizeof t32);
-}
-
-static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i)
-{
-	long usec;
-
-	if (__get_user(o->tv_sec, &i->tv_sec))
-		return -EFAULT;
-	if (__get_user(usec, &i->tv_usec))
-		return -EFAULT;
-	o->tv_nsec = usec * 1000;
-	return 0;
-}
-
-asmlinkage int
-sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
-{
-    extern void do_gettimeofday(struct timeval *tv);
-
-    if (tv) {
-	    struct timeval ktv;
-	    do_gettimeofday(&ktv);
-	    if (put_compat_timeval(tv, &ktv))
-		    return -EFAULT;
-    }
-    if (tz) {
-	    extern struct timezone sys_tz;
-	    if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
-		    return -EFAULT;
-    }
-    return 0;
-}
-
-asmlinkage 
-int sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
-{
-	struct timespec kts;
-	struct timezone ktz;
-
- 	if (tv) {
-		if (get_ts32(&kts, tv))
-			return -EFAULT;
-	}
-	if (tz) {
-		if (copy_from_user(&ktz, tz, sizeof(ktz)))
-			return -EFAULT;
-	}
-
-	return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
-}
-
 /*** copied from mips64 ***/
 /*
  * Ooo, nasty.  We need here to frob 32-bit unsigned longs to
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 6b5ac38f5a99..c7e59f548817 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -149,8 +149,8 @@
 	ENTRY_COMP(getrlimit)
 	ENTRY_COMP(getrusage)
 	/* struct timeval and timezone are maybe?? consistent wide and narrow */
-	ENTRY_DIFF(gettimeofday)
-	ENTRY_DIFF(settimeofday)
+	ENTRY_COMP(gettimeofday)
+	ENTRY_COMP(settimeofday)
 	ENTRY_SAME(getgroups)		/* 80 */
 	ENTRY_SAME(setgroups)
 	/* struct socketaddr... */
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index d00599bb24a1..bb1cfcfdbbbb 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -71,69 +71,6 @@ asmlinkage long compat_sys_sysfs(u32 option, u32 arg1, u32 arg2)
 	return sys_sysfs((int)option, arg1, arg2);
 }
 
-static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i)
-{
-	long usec;
-
-	if (!access_ok(VERIFY_READ, i, sizeof(*i)))
-		return -EFAULT;
-	if (__get_user(o->tv_sec, &i->tv_sec))
-		return -EFAULT;
-	if (__get_user(usec, &i->tv_usec))
-		return -EFAULT;
-	o->tv_nsec = usec * 1000;
-	return 0;
-}
-
-static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i)
-{
-	return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
-		(__put_user(i->tv_sec, &o->tv_sec) |
-		 __put_user(i->tv_usec, &o->tv_usec)));
-}
-
-
-
-
-/* Translations due to time_t size differences.  Which affects all
-   sorts of things, like timeval and itimerval.  */
-extern struct timezone sys_tz;
-
-asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
-{
-	if (tv) {
-		struct timeval ktv;
-		do_gettimeofday(&ktv);
-		if (put_tv32(tv, &ktv))
-			return -EFAULT;
-	}
-	if (tz) {
-		if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
-			return -EFAULT;
-	}
-	
-	return 0;
-}
-
-
-
-asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
-{
-	struct timespec kts;
-	struct timezone ktz;
-	
- 	if (tv) {
-		if (get_ts32(&kts, tv))
-			return -EFAULT;
-	}
-	if (tz) {
-		if (copy_from_user(&ktz, tz, sizeof(ktz)))
-			return -EFAULT;
-	}
-
-	return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
-}
-
 #ifdef CONFIG_SYSVIPC
 long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t ptr,
 	       u32 fifth)
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 9b471d785ec1..4646382af34f 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -279,22 +279,6 @@ asmlinkage long sys32_getegid16(void)
 	return high2lowgid(current->egid);
 }
 
-/* 32-bit timeval and related flotsam.  */
-
-static inline long get_tv32(struct timeval *o, struct compat_timeval __user *i)
-{
-	return (!access_ok(VERIFY_READ, o, sizeof(*o)) ||
-		(__get_user(o->tv_sec, &i->tv_sec) ||
-		 __get_user(o->tv_usec, &i->tv_usec)));
-}
-
-static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i)
-{
-	return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
-		(__put_user(i->tv_sec, &o->tv_sec) ||
-		 __put_user(i->tv_usec, &o->tv_usec)));
-}
-
 /*
  * sys32_ipc() is the de-multiplexer for the SysV IPC calls in 32bit emulation.
  *
@@ -522,57 +506,6 @@ sys32_delete_module(const char __user *name_user, unsigned int flags)
 
 #endif  /* CONFIG_MODULES */
 
-/* Translations due to time_t size differences.  Which affects all
-   sorts of things, like timeval and itimerval.  */
-
-extern struct timezone sys_tz;
-
-asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
-{
-	if (tv) {
-		struct timeval ktv;
-		do_gettimeofday(&ktv);
-		if (put_tv32(tv, &ktv))
-			return -EFAULT;
-	}
-	if (tz) {
-		if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
-			return -EFAULT;
-	}
-	return 0;
-}
-
-static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i)
-{
-	long usec;
-
-	if (!access_ok(VERIFY_READ, i, sizeof(*i)))
-		return -EFAULT;
-	if (__get_user(o->tv_sec, &i->tv_sec))
-		return -EFAULT;
-	if (__get_user(usec, &i->tv_usec))
-		return -EFAULT;
-	o->tv_nsec = usec * 1000;
-	return 0;
-}
-
-asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz)
-{
-	struct timespec kts;
-	struct timezone ktz;
-
- 	if (tv) {
-		if (get_ts32(&kts, tv))
-			return -EFAULT;
-	}
-	if (tz) {
-		if (copy_from_user(&ktz, tz, sizeof(ktz)))
-			return -EFAULT;
-	}
-
-	return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
-}
-
 asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf,
 				size_t count, u32 poshi, u32 poslo)
 {
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index 05f8516366ab..836a28842900 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -202,10 +202,6 @@ long sys32_execve(void);
 long sys32_init_module(void __user *umod, unsigned long len,
 		       const char __user *uargs);
 long sys32_delete_module(const char __user *name_user, unsigned int flags);
-long sys32_gettimeofday(struct compat_timeval __user *tv,
-			struct timezone __user *tz);
-long sys32_settimeofday(struct compat_timeval __user *tv,
-			struct timezone __user *tz);
 long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count,
 		   u32 poshi, u32 poslo);
 long sys32_pwrite64(unsigned int fd, const char __user *ubuf,
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index ee51ca9e23b5..fc2c97197a53 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -332,17 +332,17 @@ compat_sys_getrusage_wrapper:
 	llgtr	%r3,%r3			# struct rusage_emu31 *
 	jg	compat_sys_getrusage	# branch to system call
 
-	.globl	sys32_gettimeofday_wrapper
-sys32_gettimeofday_wrapper:
+	.globl	compat_sys_gettimeofday_wrapper
+compat_sys_gettimeofday_wrapper:
 	llgtr	%r2,%r2			# struct timeval_emu31 *
 	llgtr	%r3,%r3			# struct timezone *
-	jg	sys32_gettimeofday	# branch to system call
+	jg	compat_sys_gettimeofday	# branch to system call
 
-	.globl	sys32_settimeofday_wrapper
-sys32_settimeofday_wrapper:
+	.globl	compat_sys_settimeofday_wrapper
+compat_sys_settimeofday_wrapper:
 	llgtr	%r2,%r2			# struct timeval_emu31 *
 	llgtr	%r3,%r3			# struct timezone *
-	jg	sys32_settimeofday	# branch to system call
+	jg	compat_sys_settimeofday	# branch to system call
 
 	.globl	sys32_getgroups16_wrapper
 sys32_getgroups16_wrapper:
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 3ae303914b42..2d61787949d5 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -86,8 +86,8 @@ SYSCALL(sys_sethostname,sys_sethostname,sys32_sethostname_wrapper)
 SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit_wrapper)	/* 75 */
 SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit_wrapper)
 SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage_wrapper)
-SYSCALL(sys_gettimeofday,sys_gettimeofday,sys32_gettimeofday_wrapper)
-SYSCALL(sys_settimeofday,sys_settimeofday,sys32_settimeofday_wrapper)
+SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday_wrapper)
+SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday_wrapper)
 SYSCALL(sys_getgroups16,sys_ni_syscall,sys32_getgroups16_wrapper)	/* 80 old getgroups16 syscall */
 SYSCALL(sys_setgroups16,sys_ni_syscall,sys32_setgroups16_wrapper)	/* old setgroups16 syscall */
 NI_SYSCALL							/* old select syscall */
diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c
index 73a33dc3bcca..e800503879e4 100644
--- a/arch/sparc64/kernel/sys_sparc32.c
+++ b/arch/sparc64/kernel/sys_sparc32.c
@@ -58,15 +58,6 @@
 #include <asm/mmu_context.h>
 #include <asm/compat_signal.h>
 
-/* 32-bit timeval and related flotsam.  */
-
-static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i)
-{
-	return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
-		(__put_user(i->tv_sec, &o->tv_sec) |
-		 __put_user(i->tv_usec, &o->tv_usec)));
-}
-
 #ifdef CONFIG_SYSVIPC                                                        
 asmlinkage long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t ptr, u32 fifth)
 {
@@ -487,59 +478,6 @@ asmlinkage long sys32_delete_module(const char __user *name_user)
 
 #endif  /* CONFIG_MODULES */
 
-/* Translations due to time_t size differences.  Which affects all
-   sorts of things, like timeval and itimerval.  */
-
-extern struct timezone sys_tz;
-
-asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv,
-				   struct timezone __user *tz)
-{
-	if (tv) {
-		struct timeval ktv;
-		do_gettimeofday(&ktv);
-		if (put_tv32(tv, &ktv))
-			return -EFAULT;
-	}
-	if (tz) {
-		if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
-			return -EFAULT;
-	}
-	return 0;
-}
-
-static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i)
-{
-	long usec;
-
-	if (!access_ok(VERIFY_READ, i, sizeof(*i)))
-		return -EFAULT;
-	if (__get_user(o->tv_sec, &i->tv_sec))
-		return -EFAULT;
-	if (__get_user(usec, &i->tv_usec))
-		return -EFAULT;
-	o->tv_nsec = usec * 1000;
-	return 0;
-}
-
-asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv,
-				   struct timezone __user *tz)
-{
-	struct timespec kts;
-	struct timezone ktz;
-
- 	if (tv) {
-		if (get_ts32(&kts, tv))
-			return -EFAULT;
-	}
-	if (tz) {
-		if (copy_from_user(&ktz, tz, sizeof(ktz)))
-			return -EFAULT;
-	}
-
-	return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
-}
-
 asmlinkage compat_ssize_t sys32_pread64(unsigned int fd,
 					char __user *ubuf,
 					compat_size_t count,
diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S
index 5daee4b04dd5..b2fa4c163638 100644
--- a/arch/sparc64/kernel/systbls.S
+++ b/arch/sparc64/kernel/systbls.S
@@ -41,8 +41,8 @@ sys_call_table32:
 /*100*/ .word sys32_getpriority, sys32_rt_sigreturn, sys32_rt_sigaction, sys32_rt_sigprocmask, sys32_rt_sigpending
 	.word compat_sys_rt_sigtimedwait, sys32_rt_sigqueueinfo, compat_sys_rt_sigsuspend, sys_setresuid, sys_getresuid
 /*110*/	.word sys_setresgid, sys_getresgid, sys_setregid, sys_nis_syscall, sys_nis_syscall
-	.word sys32_getgroups, sys32_gettimeofday, sys32_getrusage, sys_nis_syscall, sys_getcwd
-/*120*/	.word compat_sys_readv, compat_sys_writev, sys32_settimeofday, sys_fchown16, sys_fchmod
+	.word sys32_getgroups, compat_sys_gettimeofday, sys32_getrusage, sys_nis_syscall, sys_getcwd
+/*120*/	.word compat_sys_readv, compat_sys_writev, compat_sys_settimeofday, sys_fchown16, sys_fchmod
 	.word sys_nis_syscall, sys_setreuid16, sys_setregid16, sys_rename, sys_truncate
 /*130*/	.word sys_ftruncate, sys_flock, compat_sys_lstat64, sys_nis_syscall, sys_nis_syscall
 	.word sys_nis_syscall, sys32_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index eb4314768bf7..256b00b61892 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -571,8 +571,8 @@ ia32_sys_call_table:
 	.quad compat_sys_setrlimit	/* 75 */
 	.quad compat_sys_old_getrlimit	/* old_getrlimit */
 	.quad compat_sys_getrusage
-	.quad sys32_gettimeofday
-	.quad sys32_settimeofday
+	.quad compat_sys_gettimeofday
+	.quad compat_sys_settimeofday
 	.quad sys_getgroups16	/* 80 */
 	.quad sys_setgroups16
 	.quad sys32_old_select
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 4d3ad8d78a4d..2e09dcd3c0a6 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -367,75 +367,11 @@ asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
 	return 0;
 }
 
-static inline long get_tv32(struct timeval *o, struct compat_timeval __user *i)
-{
-	int err = -EFAULT;
-
-	if (access_ok(VERIFY_READ, i, sizeof(*i))) {
-		err = __get_user(o->tv_sec, &i->tv_sec);
-		err |= __get_user(o->tv_usec, &i->tv_usec);
-	}
-	return err;
-}
-
-static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i)
-{
-	int err = -EFAULT;
-
-	if (access_ok(VERIFY_WRITE, o, sizeof(*o))) {
-		err = __put_user(i->tv_sec, &o->tv_sec);
-		err |= __put_user(i->tv_usec, &o->tv_usec);
-	}
-	return err;
-}
-
 asmlinkage long sys32_alarm(unsigned int seconds)
 {
 	return alarm_setitimer(seconds);
 }
 
-/*
- * Translations due to time_t size differences. Which affects all
- * sorts of things, like timeval and itimerval.
- */
-asmlinkage long sys32_gettimeofday(struct compat_timeval __user *tv,
-				   struct timezone __user *tz)
-{
-	if (tv) {
-		struct timeval ktv;
-
-		do_gettimeofday(&ktv);
-		if (put_tv32(tv, &ktv))
-			return -EFAULT;
-	}
-	if (tz) {
-		if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
-			return -EFAULT;
-	}
-	return 0;
-}
-
-asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv,
-				   struct timezone __user *tz)
-{
-	struct timeval ktv;
-	struct timespec kts;
-	struct timezone ktz;
-
-	if (tv) {
-		if (get_tv32(&ktv, tv))
-			return -EFAULT;
-		kts.tv_sec = ktv.tv_sec;
-		kts.tv_nsec = ktv.tv_usec * NSEC_PER_USEC;
-	}
-	if (tz) {
-		if (copy_from_user(&ktz, tz, sizeof(ktz)))
-			return -EFAULT;
-	}
-
-	return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
-}
-
 struct sel_arg_struct {
 	unsigned int n;
 	unsigned int inp;
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 999dddd8d939..f061a1ea1b74 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -234,6 +234,11 @@ extern int get_compat_itimerspec(struct itimerspec *dst,
 extern int put_compat_itimerspec(struct compat_itimerspec __user *dst,
 				 const struct itimerspec *src);
 
+asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv,
+		struct timezone __user *tz);
+asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
+		struct timezone __user *tz);
+
 asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp);
 
 extern int compat_printk(const char *fmt, ...);
diff --git a/include/linux/time.h b/include/linux/time.h
index e15206a7e82e..51e883df0fa5 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -29,6 +29,8 @@ struct timezone {
 
 #ifdef __KERNEL__
 
+extern struct timezone sys_tz;
+
 /* Parameters used to convert the timespec values: */
 #define MSEC_PER_SEC	1000L
 #define USEC_PER_MSEC	1000L
diff --git a/kernel/compat.c b/kernel/compat.c
index 32c254a8ab9a..143990e48cb9 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -26,6 +26,64 @@
 
 #include <asm/uaccess.h>
 
+/*
+ * Note that the native side is already converted to a timespec, because
+ * that's what we want anyway.
+ */
+static int compat_get_timeval(struct timespec *o,
+		struct compat_timeval __user *i)
+{
+	long usec;
+
+	if (get_user(o->tv_sec, &i->tv_sec) ||
+	    get_user(usec, &i->tv_usec))
+		return -EFAULT;
+	o->tv_nsec = usec * 1000;
+	return 0;
+}
+
+static int compat_put_timeval(struct compat_timeval __user *o,
+		struct timeval *i)
+{
+	return (put_user(i->tv_sec, &o->tv_sec) ||
+		put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0;
+}
+
+asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv,
+		struct timezone __user *tz)
+{
+	if (tv) {
+		struct timeval ktv;
+		do_gettimeofday(&ktv);
+		if (compat_put_timeval(tv, &ktv))
+			return -EFAULT;
+	}
+	if (tz) {
+		if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
+		struct timezone __user *tz)
+{
+	struct timespec kts;
+	struct timezone ktz;
+
+	if (tv) {
+		if (compat_get_timeval(&kts, tv))
+			return -EFAULT;
+	}
+	if (tz) {
+		if (copy_from_user(&ktz, tz, sizeof(ktz)))
+			return -EFAULT;
+	}
+
+	return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
+}
+
 int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
 {
 	return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
-- 
cgit v1.2.3


From 56d936607408d71c4141b2ed501410b072f1e211 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 15 Oct 2008 22:02:10 -0700
Subject: introduce generic iommu_num_pages function

This patch introduces the generic iommu_num_pages function. It can be used by
a given memory area.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Muli Ben-Yehuda <muli@il.ibm.com>
Cc: Dave Airlie <airlied@linux.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/iommu-helper.h | 3 +++
 lib/iommu-helper.c           | 9 +++++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h
index a6d0586e2bf7..3b068e5b5671 100644
--- a/include/linux/iommu-helper.h
+++ b/include/linux/iommu-helper.h
@@ -23,4 +23,7 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
 extern void iommu_area_free(unsigned long *map, unsigned long start,
 			    unsigned int nr);
 
+extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len,
+				     unsigned long io_page_size);
+
 #endif
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index 5d90074dca75..75dbda03f4fb 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -79,3 +79,12 @@ void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr)
 	}
 }
 EXPORT_SYMBOL(iommu_area_free);
+
+unsigned long iommu_num_pages(unsigned long addr, unsigned long len,
+			      unsigned long io_page_size)
+{
+	unsigned long size = (addr & (io_page_size - 1)) + len;
+
+	return DIV_ROUND_UP(size, io_page_size);
+}
+EXPORT_SYMBOL(iommu_num_pages);
-- 
cgit v1.2.3


From 53112488bebe25c0f5f8a002470046c0fe9a6c61 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Wed, 15 Oct 2008 22:02:37 -0700
Subject: alpha: introduce field 'taso' into struct linux_binprm

This change is Alpha-specific.  It adds field 'taso' into struct
linux_binprm to remember if the application is TASO.  Previously, field
sh_bang was used for this purpose.

Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/a.out.h | 2 +-
 fs/exec.c                      | 2 +-
 include/linux/binfmts.h        | 3 +++
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/a.out.h b/arch/alpha/include/asm/a.out.h
index 02ce8473870a..acdc681231cb 100644
--- a/arch/alpha/include/asm/a.out.h
+++ b/arch/alpha/include/asm/a.out.h
@@ -95,7 +95,7 @@ struct exec
    Worse, we have to notice the start address before swapping to use
    /sbin/loader, which of course is _not_ a TASO application.  */
 #define SET_AOUT_PERSONALITY(BFPM, EX) \
-	set_personality (((BFPM->sh_bang || EX.ah.entry < 0x100000000L \
+	set_personality (((BFPM->taso || EX.ah.entry < 0x100000000L \
 			   ? ADDR_LIMIT_32BIT : 0) | PER_OSF4))
 
 #endif /* __KERNEL__ */
diff --git a/fs/exec.c b/fs/exec.c
index 7b5ed50eadeb..4a790f2e224e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1189,7 +1189,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 			return retval;
 
 		/* Remember if the application is TASO.  */
-		bprm->sh_bang = eh->ah.entry < 0x100000000UL;
+		bprm->taso = eh->ah.entry < 0x100000000UL;
 
 		bprm->file = file;
 		bprm->loader = loader;
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 826f62350805..54980a3c7602 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -36,6 +36,9 @@ struct linux_binprm{
 	unsigned long p; /* current top of mem */
 	unsigned int sh_bang:1,
 		     misc_bang:1;
+#ifdef __alpha__
+	unsigned int taso:1;
+#endif
 	struct file * file;
 	int e_uid, e_gid;
 	kernel_cap_t cap_post_exec_permitted;
-- 
cgit v1.2.3


From bf2a9a39639b8b51377905397a5005f444e9a892 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Wed, 15 Oct 2008 22:02:39 -0700
Subject: Allow recursion in binfmt_script and binfmt_misc

binfmt_script and binfmt_misc disallow recursion to avoid stack overflow
using sh_bang and misc_bang.  It causes problem in some cases:

$ echo '#!/bin/ls' > /tmp/t0
$ echo '#!/tmp/t0' > /tmp/t1
$ echo '#!/tmp/t1' > /tmp/t2
$ chmod +x /tmp/t*
$ /tmp/t2
zsh: exec format error: /tmp/t2

Similar problem with binfmt_misc.

This patch introduces field 'recursion_depth' into struct linux_binprm to
track recursion level in binfmt_misc and binfmt_script.  If recursion
level more then BINPRM_MAX_RECURSION it generates -ENOEXEC.

[akpm@linux-foundation.org: make linux_binprm.recursion_depth a uint]
Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_em86.c        | 2 +-
 fs/binfmt_misc.c        | 4 ++--
 fs/binfmt_script.c      | 5 +++--
 include/linux/binfmts.h | 2 ++
 4 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index f9c88d0c8ced..32fb00b52cd0 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -43,7 +43,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
 			return -ENOEXEC;
 	}
 
-	bprm->sh_bang = 1;	/* Well, the bang-shell is implicit... */
+	bprm->recursion_depth++; /* Well, the bang-shell is implicit... */
 	allow_write_access(bprm->file);
 	fput(bprm->file);
 	bprm->file = NULL;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 8d7e88e02e0f..f2744ab4e5b3 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -117,7 +117,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 		goto _ret;
 
 	retval = -ENOEXEC;
-	if (bprm->misc_bang)
+	if (bprm->recursion_depth > BINPRM_MAX_RECURSION)
 		goto _ret;
 
 	/* to keep locking time low, we copy the interpreter string */
@@ -197,7 +197,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	if (retval < 0)
 		goto _error;
 
-	bprm->misc_bang = 1;
+	bprm->recursion_depth++;
 
 	retval = search_binary_handler (bprm, regs);
 	if (retval < 0)
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 9e3963f7ebf1..08343505e184 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -22,14 +22,15 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
 	char interp[BINPRM_BUF_SIZE];
 	int retval;
 
-	if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!') || (bprm->sh_bang)) 
+	if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!') ||
+	    (bprm->recursion_depth > BINPRM_MAX_RECURSION))
 		return -ENOEXEC;
 	/*
 	 * This section does the #! interpretation.
 	 * Sorta complicated, but hopefully it will work.  -TYT
 	 */
 
-	bprm->sh_bang = 1;
+	bprm->recursion_depth++;
 	allow_write_access(bprm->file);
 	fput(bprm->file);
 	bprm->file = NULL;
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 54980a3c7602..7394b5b349ff 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -39,6 +39,7 @@ struct linux_binprm{
 #ifdef __alpha__
 	unsigned int taso:1;
 #endif
+	unsigned int recursion_depth;
 	struct file * file;
 	int e_uid, e_gid;
 	kernel_cap_t cap_post_exec_permitted;
@@ -61,6 +62,7 @@ struct linux_binprm{
 #define BINPRM_FLAGS_EXECFD_BIT 1
 #define BINPRM_FLAGS_EXECFD (1 << BINPRM_FLAGS_EXECFD_BIT)
 
+#define BINPRM_MAX_RECURSION 4
 
 /*
  * This structure defines the functions that are used to load the binary formats that
-- 
cgit v1.2.3


From 2bec19feabd53cba75e9dab0e79afbe868a37113 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Wed, 15 Oct 2008 22:02:44 -0700
Subject: orion_spi: handle 88F6183 erratum

Add support to orion_spi for the 88F6183 ARM SoC by adding code to work
around a 6183-specific erratum.

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/orion_spi.c       | 5 +++++
 include/linux/spi/orion_spi.h | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/spi/orion_spi.c b/drivers/spi/orion_spi.c
index b872bfaf4bd2..014becb7d530 100644
--- a/drivers/spi/orion_spi.c
+++ b/drivers/spi/orion_spi.c
@@ -364,6 +364,11 @@ static int orion_spi_setup(struct spi_device *spi)
 		return -EINVAL;
 	}
 
+	/* Fix ac timing if required.   */
+	if (orion_spi->spi_info->enable_clock_fix)
+		orion_spi_setbits(orion_spi, ORION_SPI_IF_CONFIG_REG,
+				  (1 << 14));
+
 	if (spi->bits_per_word == 0)
 		spi->bits_per_word = 8;
 
diff --git a/include/linux/spi/orion_spi.h b/include/linux/spi/orion_spi.h
index b4d9fa6f797c..decf6d8c77b7 100644
--- a/include/linux/spi/orion_spi.h
+++ b/include/linux/spi/orion_spi.h
@@ -11,6 +11,7 @@
 
 struct orion_spi_info {
 	u32	tclk;		/* no <linux/clk.h> support yet */
+	u32	enable_clock_fix;
 };
 
 
-- 
cgit v1.2.3


From 9d793b0bcbbbc37d80241862dfa5257963d5415e Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Wed, 15 Oct 2008 22:02:47 -0700
Subject: i2o: Fix 32/64bit DMA locking

The I2O ioctls assume 32bits.  In itself that is fine as they are old
cards and nobody uses 64bit.  However on LKML it was noted this
assumption is also made for allocated memory and is unsafe on 64bit
systems.

Fixing this is a mess.  It turns out there is tons of crap buried in a
header file that does racy 32/64bit filtering on the masks.

So we:
- Verify all callers of the racy code can sleep (i2o_dma_[re]alloc)
- Move the code into a new i2o/memory.c file
- Remove the gfp_mask argument so nobody can try and misuse the function
- Wrap a mutex around the problem area (a single mutex is easy to do and
  none of this is performance relevant)
- Switch the remaining problem kmalloc holdout to use i2o_dma_alloc

Cc: Markus Lidel <Markus.Lidel@shadowconnect.com>
Cc: Vasily Averin <vvs@sw.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/message/i2o/Makefile     |   2 +-
 drivers/message/i2o/device.c     |   2 +-
 drivers/message/i2o/exec-osm.c   |   4 +-
 drivers/message/i2o/i2o_config.c |  31 ++--
 drivers/message/i2o/iop.c        |   2 +-
 drivers/message/i2o/memory.c     | 313 +++++++++++++++++++++++++++++++++++++++
 drivers/message/i2o/pci.c        |  16 +-
 include/linux/i2o.h              | 292 ++----------------------------------
 8 files changed, 351 insertions(+), 311 deletions(-)
 create mode 100644 drivers/message/i2o/memory.c

(limited to 'include/linux')

diff --git a/drivers/message/i2o/Makefile b/drivers/message/i2o/Makefile
index 2c2e39aa1efa..b0982dacfd0a 100644
--- a/drivers/message/i2o/Makefile
+++ b/drivers/message/i2o/Makefile
@@ -5,7 +5,7 @@
 # In the future, some of these should be built conditionally.
 #
 
-i2o_core-y		+= iop.o driver.o device.o debug.o pci.o exec-osm.o
+i2o_core-y		+= iop.o driver.o device.o debug.o pci.o exec-osm.o memory.o
 i2o_bus-y		+= bus-osm.o
 i2o_config-y		+= config-osm.o
 obj-$(CONFIG_I2O)	+= i2o_core.o
diff --git a/drivers/message/i2o/device.c b/drivers/message/i2o/device.c
index 8774c670e668..54c2e9ae23e5 100644
--- a/drivers/message/i2o/device.c
+++ b/drivers/message/i2o/device.c
@@ -467,7 +467,7 @@ int i2o_parm_issue(struct i2o_device *i2o_dev, int cmd, void *oplist,
 
 	res.virt = NULL;
 
-	if (i2o_dma_alloc(dev, &res, reslen, GFP_KERNEL))
+	if (i2o_dma_alloc(dev, &res, reslen))
 		return -ENOMEM;
 
 	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
diff --git a/drivers/message/i2o/exec-osm.c b/drivers/message/i2o/exec-osm.c
index 6cbcc21de518..56faef1a1d55 100644
--- a/drivers/message/i2o/exec-osm.c
+++ b/drivers/message/i2o/exec-osm.c
@@ -388,8 +388,8 @@ static int i2o_exec_lct_notify(struct i2o_controller *c, u32 change_ind)
 
 	dev = &c->pdev->dev;
 
-	if (i2o_dma_realloc
-	    (dev, &c->dlct, le32_to_cpu(sb->expected_lct_size), GFP_KERNEL))
+	if (i2o_dma_realloc(dev, &c->dlct,
+					le32_to_cpu(sb->expected_lct_size)))
 		return -ENOMEM;
 
 	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
diff --git a/drivers/message/i2o/i2o_config.c b/drivers/message/i2o/i2o_config.c
index 4238de98d4a6..a3fabdbe6ca6 100644
--- a/drivers/message/i2o/i2o_config.c
+++ b/drivers/message/i2o/i2o_config.c
@@ -260,7 +260,7 @@ static int i2o_cfg_swdl(unsigned long arg)
 	if (IS_ERR(msg))
 		return PTR_ERR(msg);
 
-	if (i2o_dma_alloc(&c->pdev->dev, &buffer, fragsize, GFP_KERNEL)) {
+	if (i2o_dma_alloc(&c->pdev->dev, &buffer, fragsize)) {
 		i2o_msg_nop(c, msg);
 		return -ENOMEM;
 	}
@@ -339,7 +339,7 @@ static int i2o_cfg_swul(unsigned long arg)
 	if (IS_ERR(msg))
 		return PTR_ERR(msg);
 
-	if (i2o_dma_alloc(&c->pdev->dev, &buffer, fragsize, GFP_KERNEL)) {
+	if (i2o_dma_alloc(&c->pdev->dev, &buffer, fragsize)) {
 		i2o_msg_nop(c, msg);
 		return -ENOMEM;
 	}
@@ -634,9 +634,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 			sg_size = sg[i].flag_count & 0xffffff;
 			p = &(sg_list[sg_index]);
 			/* Allocate memory for the transfer */
-			if (i2o_dma_alloc
-			    (&c->pdev->dev, p, sg_size,
-			     PCI_DMA_BIDIRECTIONAL)) {
+			if (i2o_dma_alloc(&c->pdev->dev, p, sg_size)) {
 				printk(KERN_DEBUG
 				       "%s: Could not allocate SG buffer - size = %d buffer number %d of %d\n",
 				       c->name, sg_size, i, sg_count);
@@ -780,12 +778,11 @@ static int i2o_cfg_passthru(unsigned long arg)
 	u32 size = 0;
 	u32 reply_size = 0;
 	u32 rcode = 0;
-	void *sg_list[SG_TABLESIZE];
+	struct i2o_dma sg_list[SG_TABLESIZE];
 	u32 sg_offset = 0;
 	u32 sg_count = 0;
 	int sg_index = 0;
 	u32 i = 0;
-	void *p = NULL;
 	i2o_status_block *sb;
 	struct i2o_message *msg;
 	unsigned int iop;
@@ -842,6 +839,7 @@ static int i2o_cfg_passthru(unsigned long arg)
 	memset(sg_list, 0, sizeof(sg_list[0]) * SG_TABLESIZE);
 	if (sg_offset) {
 		struct sg_simple_element *sg;
+		struct i2o_dma *p;
 
 		if (sg_offset * 4 >= size) {
 			rcode = -EFAULT;
@@ -871,22 +869,22 @@ static int i2o_cfg_passthru(unsigned long arg)
 				goto sg_list_cleanup;
 			}
 			sg_size = sg[i].flag_count & 0xffffff;
+			p = &(sg_list[sg_index]);
+			if (i2o_dma_alloc(&c->pdev->dev, p, sg_size)) {
 			/* Allocate memory for the transfer */
-			p = kmalloc(sg_size, GFP_KERNEL);
-			if (!p) {
 				printk(KERN_DEBUG
 				       "%s: Could not allocate SG buffer - size = %d buffer number %d of %d\n",
 				       c->name, sg_size, i, sg_count);
 				rcode = -ENOMEM;
 				goto sg_list_cleanup;
 			}
-			sg_list[sg_index++] = p;	// sglist indexed with input frame, not our internal frame.
+			sg_index++;
 			/* Copy in the user's SG buffer if necessary */
 			if (sg[i].
 			    flag_count & 0x04000000 /*I2O_SGL_FLAGS_DIR */ ) {
 				// TODO 64bit fix
 				if (copy_from_user
-				    (p, (void __user *)sg[i].addr_bus,
+				    (p->virt, (void __user *)sg[i].addr_bus,
 				     sg_size)) {
 					printk(KERN_DEBUG
 					       "%s: Could not copy SG buf %d FROM user\n",
@@ -895,8 +893,7 @@ static int i2o_cfg_passthru(unsigned long arg)
 					goto sg_list_cleanup;
 				}
 			}
-			//TODO 64bit fix
-			sg[i].addr_bus = virt_to_bus(p);
+			sg[i].addr_bus = p->phys;
 		}
 	}
 
@@ -908,7 +905,7 @@ static int i2o_cfg_passthru(unsigned long arg)
 	}
 
 	if (sg_offset) {
-		u32 rmsg[128];
+		u32 rmsg[I2O_OUTBOUND_MSG_FRAME_SIZE];
 		/* Copy back the Scatter Gather buffers back to user space */
 		u32 j;
 		// TODO 64bit fix
@@ -942,11 +939,11 @@ static int i2o_cfg_passthru(unsigned long arg)
 				sg_size = sg[j].flag_count & 0xffffff;
 				// TODO 64bit fix
 				if (copy_to_user
-				    ((void __user *)sg[j].addr_bus, sg_list[j],
+				    ((void __user *)sg[j].addr_bus, sg_list[j].virt,
 				     sg_size)) {
 					printk(KERN_WARNING
 					       "%s: Could not copy %p TO user %x\n",
-					       c->name, sg_list[j],
+					       c->name, sg_list[j].virt,
 					       sg[j].addr_bus);
 					rcode = -EFAULT;
 					goto sg_list_cleanup;
@@ -973,7 +970,7 @@ sg_list_cleanup:
 	}
 
 	for (i = 0; i < sg_index; i++)
-		kfree(sg_list[i]);
+		i2o_dma_free(&c->pdev->dev, &sg_list[i]);
 
 cleanup:
 	kfree(reply);
diff --git a/drivers/message/i2o/iop.c b/drivers/message/i2o/iop.c
index da715e11c1b2..be2b5926d26c 100644
--- a/drivers/message/i2o/iop.c
+++ b/drivers/message/i2o/iop.c
@@ -1004,7 +1004,7 @@ static int i2o_hrt_get(struct i2o_controller *c)
 
 		size = hrt->num_entries * hrt->entry_len << 2;
 		if (size > c->hrt.len) {
-			if (i2o_dma_realloc(dev, &c->hrt, size, GFP_KERNEL))
+			if (i2o_dma_realloc(dev, &c->hrt, size))
 				return -ENOMEM;
 			else
 				hrt = c->hrt.virt;
diff --git a/drivers/message/i2o/memory.c b/drivers/message/i2o/memory.c
new file mode 100644
index 000000000000..f5cc95c564e2
--- /dev/null
+++ b/drivers/message/i2o/memory.c
@@ -0,0 +1,313 @@
+/*
+ *	Functions to handle I2O memory
+ *
+ *	Pulled from the inlines in i2o headers and uninlined
+ *
+ *
+ *	This program is free software; you can redistribute it and/or modify it
+ *	under the terms of the GNU General Public License as published by the
+ *	Free Software Foundation; either version 2 of the License, or (at your
+ *	option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/i2o.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include "core.h"
+
+/* Protects our 32/64bit mask switching */
+static DEFINE_MUTEX(mem_lock);
+
+/**
+ *	i2o_sg_tablesize - Calculate the maximum number of elements in a SGL
+ *	@c: I2O controller for which the calculation should be done
+ *	@body_size: maximum body size used for message in 32-bit words.
+ *
+ *	Return the maximum number of SG elements in a SG list.
+ */
+u16 i2o_sg_tablesize(struct i2o_controller *c, u16 body_size)
+{
+	i2o_status_block *sb = c->status_block.virt;
+	u16 sg_count =
+	    (sb->inbound_frame_size - sizeof(struct i2o_message) / 4) -
+	    body_size;
+
+	if (c->pae_support) {
+		/*
+		 * for 64-bit a SG attribute element must be added and each
+		 * SG element needs 12 bytes instead of 8.
+		 */
+		sg_count -= 2;
+		sg_count /= 3;
+	} else
+		sg_count /= 2;
+
+	if (c->short_req && (sg_count > 8))
+		sg_count = 8;
+
+	return sg_count;
+}
+EXPORT_SYMBOL_GPL(i2o_sg_tablesize);
+
+
+/**
+ *	i2o_dma_map_single - Map pointer to controller and fill in I2O message.
+ *	@c: I2O controller
+ *	@ptr: pointer to the data which should be mapped
+ *	@size: size of data in bytes
+ *	@direction: DMA_TO_DEVICE / DMA_FROM_DEVICE
+ *	@sg_ptr: pointer to the SG list inside the I2O message
+ *
+ *	This function does all necessary DMA handling and also writes the I2O
+ *	SGL elements into the I2O message. For details on DMA handling see also
+ *	dma_map_single(). The pointer sg_ptr will only be set to the end of the
+ *	SG list if the allocation was successful.
+ *
+ *	Returns DMA address which must be checked for failures using
+ *	dma_mapping_error().
+ */
+dma_addr_t i2o_dma_map_single(struct i2o_controller *c, void *ptr,
+					    size_t size,
+					    enum dma_data_direction direction,
+					    u32 ** sg_ptr)
+{
+	u32 sg_flags;
+	u32 *mptr = *sg_ptr;
+	dma_addr_t dma_addr;
+
+	switch (direction) {
+	case DMA_TO_DEVICE:
+		sg_flags = 0xd4000000;
+		break;
+	case DMA_FROM_DEVICE:
+		sg_flags = 0xd0000000;
+		break;
+	default:
+		return 0;
+	}
+
+	dma_addr = dma_map_single(&c->pdev->dev, ptr, size, direction);
+	if (!dma_mapping_error(&c->pdev->dev, dma_addr)) {
+#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
+		if ((sizeof(dma_addr_t) > 4) && c->pae_support) {
+			*mptr++ = cpu_to_le32(0x7C020002);
+			*mptr++ = cpu_to_le32(PAGE_SIZE);
+		}
+#endif
+
+		*mptr++ = cpu_to_le32(sg_flags | size);
+		*mptr++ = cpu_to_le32(i2o_dma_low(dma_addr));
+#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
+		if ((sizeof(dma_addr_t) > 4) && c->pae_support)
+			*mptr++ = cpu_to_le32(i2o_dma_high(dma_addr));
+#endif
+		*sg_ptr = mptr;
+	}
+	return dma_addr;
+}
+EXPORT_SYMBOL_GPL(i2o_dma_map_single);
+
+/**
+ *	i2o_dma_map_sg - Map a SG List to controller and fill in I2O message.
+ *	@c: I2O controller
+ *	@sg: SG list to be mapped
+ *	@sg_count: number of elements in the SG list
+ *	@direction: DMA_TO_DEVICE / DMA_FROM_DEVICE
+ *	@sg_ptr: pointer to the SG list inside the I2O message
+ *
+ *	This function does all necessary DMA handling and also writes the I2O
+ *	SGL elements into the I2O message. For details on DMA handling see also
+ *	dma_map_sg(). The pointer sg_ptr will only be set to the end of the SG
+ *	list if the allocation was successful.
+ *
+ *	Returns 0 on failure or 1 on success.
+ */
+int i2o_dma_map_sg(struct i2o_controller *c, struct scatterlist *sg,
+	    int sg_count, enum dma_data_direction direction, u32 ** sg_ptr)
+{
+	u32 sg_flags;
+	u32 *mptr = *sg_ptr;
+
+	switch (direction) {
+	case DMA_TO_DEVICE:
+		sg_flags = 0x14000000;
+		break;
+	case DMA_FROM_DEVICE:
+		sg_flags = 0x10000000;
+		break;
+	default:
+		return 0;
+	}
+
+	sg_count = dma_map_sg(&c->pdev->dev, sg, sg_count, direction);
+	if (!sg_count)
+		return 0;
+
+#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
+	if ((sizeof(dma_addr_t) > 4) && c->pae_support) {
+		*mptr++ = cpu_to_le32(0x7C020002);
+		*mptr++ = cpu_to_le32(PAGE_SIZE);
+	}
+#endif
+
+	while (sg_count-- > 0) {
+		if (!sg_count)
+			sg_flags |= 0xC0000000;
+		*mptr++ = cpu_to_le32(sg_flags | sg_dma_len(sg));
+		*mptr++ = cpu_to_le32(i2o_dma_low(sg_dma_address(sg)));
+#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
+		if ((sizeof(dma_addr_t) > 4) && c->pae_support)
+			*mptr++ = cpu_to_le32(i2o_dma_high(sg_dma_address(sg)));
+#endif
+		sg = sg_next(sg);
+	}
+	*sg_ptr = mptr;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(i2o_dma_map_sg);
+
+/**
+ *	i2o_dma_alloc - Allocate DMA memory
+ *	@dev: struct device pointer to the PCI device of the I2O controller
+ *	@addr: i2o_dma struct which should get the DMA buffer
+ *	@len: length of the new DMA memory
+ *
+ *	Allocate a coherent DMA memory and write the pointers into addr.
+ *
+ *	Returns 0 on success or -ENOMEM on failure.
+ */
+int i2o_dma_alloc(struct device *dev, struct i2o_dma *addr, size_t len)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int dma_64 = 0;
+
+	mutex_lock(&mem_lock);
+	if ((sizeof(dma_addr_t) > 4) && (pdev->dma_mask == DMA_64BIT_MASK)) {
+		dma_64 = 1;
+		if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) {
+			mutex_unlock(&mem_lock);
+			return -ENOMEM;
+		}
+	}
+
+	addr->virt = dma_alloc_coherent(dev, len, &addr->phys, GFP_KERNEL);
+
+	if ((sizeof(dma_addr_t) > 4) && dma_64)
+		if (pci_set_dma_mask(pdev, DMA_64BIT_MASK))
+			printk(KERN_WARNING "i2o: unable to set 64-bit DMA");
+	mutex_unlock(&mem_lock);
+
+	if (!addr->virt)
+		return -ENOMEM;
+
+	memset(addr->virt, 0, len);
+	addr->len = len;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(i2o_dma_alloc);
+
+
+/**
+ *	i2o_dma_free - Free DMA memory
+ *	@dev: struct device pointer to the PCI device of the I2O controller
+ *	@addr: i2o_dma struct which contains the DMA buffer
+ *
+ *	Free a coherent DMA memory and set virtual address of addr to NULL.
+ */
+void i2o_dma_free(struct device *dev, struct i2o_dma *addr)
+{
+	if (addr->virt) {
+		if (addr->phys)
+			dma_free_coherent(dev, addr->len, addr->virt,
+					  addr->phys);
+		else
+			kfree(addr->virt);
+		addr->virt = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(i2o_dma_free);
+
+
+/**
+ *	i2o_dma_realloc - Realloc DMA memory
+ *	@dev: struct device pointer to the PCI device of the I2O controller
+ *	@addr: pointer to a i2o_dma struct DMA buffer
+ *	@len: new length of memory
+ *
+ *	If there was something allocated in the addr, free it first. If len > 0
+ *	than try to allocate it and write the addresses back to the addr
+ *	structure. If len == 0 set the virtual address to NULL.
+ *
+ *	Returns the 0 on success or negative error code on failure.
+ */
+int i2o_dma_realloc(struct device *dev, struct i2o_dma *addr, size_t len)
+{
+	i2o_dma_free(dev, addr);
+
+	if (len)
+		return i2o_dma_alloc(dev, addr, len);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(i2o_dma_realloc);
+
+/*
+ *	i2o_pool_alloc - Allocate an slab cache and mempool
+ *	@mempool: pointer to struct i2o_pool to write data into.
+ *	@name: name which is used to identify cache
+ *	@size: size of each object
+ *	@min_nr: minimum number of objects
+ *
+ *	First allocates a slab cache with name and size. Then allocates a
+ *	mempool which uses the slab cache for allocation and freeing.
+ *
+ *	Returns 0 on success or negative error code on failure.
+ */
+int i2o_pool_alloc(struct i2o_pool *pool, const char *name,
+				 size_t size, int min_nr)
+{
+	pool->name = kmalloc(strlen(name) + 1, GFP_KERNEL);
+	if (!pool->name)
+		goto exit;
+	strcpy(pool->name, name);
+
+	pool->slab =
+	    kmem_cache_create(pool->name, size, 0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!pool->slab)
+		goto free_name;
+
+	pool->mempool = mempool_create_slab_pool(min_nr, pool->slab);
+	if (!pool->mempool)
+		goto free_slab;
+
+	return 0;
+
+free_slab:
+	kmem_cache_destroy(pool->slab);
+
+free_name:
+	kfree(pool->name);
+
+exit:
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(i2o_pool_alloc);
+
+/*
+ *	i2o_pool_free - Free slab cache and mempool again
+ *	@mempool: pointer to struct i2o_pool which should be freed
+ *
+ *	Note that you have to return all objects to the mempool again before
+ *	calling i2o_pool_free().
+ */
+void i2o_pool_free(struct i2o_pool *pool)
+{
+	mempool_destroy(pool->mempool);
+	kmem_cache_destroy(pool->slab);
+	kfree(pool->name);
+};
+EXPORT_SYMBOL_GPL(i2o_pool_free);
diff --git a/drivers/message/i2o/pci.c b/drivers/message/i2o/pci.c
index 685a89547a51..610ef1204e68 100644
--- a/drivers/message/i2o/pci.c
+++ b/drivers/message/i2o/pci.c
@@ -186,31 +186,29 @@ static int __devinit i2o_pci_alloc(struct i2o_controller *c)
 		}
 	}
 
-	if (i2o_dma_alloc(dev, &c->status, 8, GFP_KERNEL)) {
+	if (i2o_dma_alloc(dev, &c->status, 8)) {
 		i2o_pci_free(c);
 		return -ENOMEM;
 	}
 
-	if (i2o_dma_alloc(dev, &c->hrt, sizeof(i2o_hrt), GFP_KERNEL)) {
+	if (i2o_dma_alloc(dev, &c->hrt, sizeof(i2o_hrt))) {
 		i2o_pci_free(c);
 		return -ENOMEM;
 	}
 
-	if (i2o_dma_alloc(dev, &c->dlct, 8192, GFP_KERNEL)) {
+	if (i2o_dma_alloc(dev, &c->dlct, 8192)) {
 		i2o_pci_free(c);
 		return -ENOMEM;
 	}
 
-	if (i2o_dma_alloc(dev, &c->status_block, sizeof(i2o_status_block),
-			  GFP_KERNEL)) {
+	if (i2o_dma_alloc(dev, &c->status_block, sizeof(i2o_status_block))) {
 		i2o_pci_free(c);
 		return -ENOMEM;
 	}
 
-	if (i2o_dma_alloc
-	    (dev, &c->out_queue,
-	     I2O_MAX_OUTBOUND_MSG_FRAMES * I2O_OUTBOUND_MSG_FRAME_SIZE *
-	     sizeof(u32), GFP_KERNEL)) {
+	if (i2o_dma_alloc(dev, &c->out_queue,
+		I2O_MAX_OUTBOUND_MSG_FRAMES * I2O_OUTBOUND_MSG_FRAME_SIZE *
+				sizeof(u32))) {
 		i2o_pci_free(c);
 		return -ENOMEM;
 	}
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index 75ae6d8aba4f..4c4e57d1f19d 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -570,7 +570,6 @@ struct i2o_controller {
 #endif
 	spinlock_t lock;	/* lock for controller
 				   configuration */
-
 	void *driver_data[I2O_MAX_DRIVERS];	/* storage for drivers */
 };
 
@@ -691,289 +690,22 @@ static inline u32 i2o_dma_high(dma_addr_t dma_addr)
 };
 #endif
 
-/**
- *	i2o_sg_tablesize - Calculate the maximum number of elements in a SGL
- *	@c: I2O controller for which the calculation should be done
- *	@body_size: maximum body size used for message in 32-bit words.
- *
- *	Return the maximum number of SG elements in a SG list.
- */
-static inline u16 i2o_sg_tablesize(struct i2o_controller *c, u16 body_size)
-{
-	i2o_status_block *sb = c->status_block.virt;
-	u16 sg_count =
-	    (sb->inbound_frame_size - sizeof(struct i2o_message) / 4) -
-	    body_size;
-
-	if (c->pae_support) {
-		/*
-		 * for 64-bit a SG attribute element must be added and each
-		 * SG element needs 12 bytes instead of 8.
-		 */
-		sg_count -= 2;
-		sg_count /= 3;
-	} else
-		sg_count /= 2;
-
-	if (c->short_req && (sg_count > 8))
-		sg_count = 8;
-
-	return sg_count;
-};
-
-/**
- *	i2o_dma_map_single - Map pointer to controller and fill in I2O message.
- *	@c: I2O controller
- *	@ptr: pointer to the data which should be mapped
- *	@size: size of data in bytes
- *	@direction: DMA_TO_DEVICE / DMA_FROM_DEVICE
- *	@sg_ptr: pointer to the SG list inside the I2O message
- *
- *	This function does all necessary DMA handling and also writes the I2O
- *	SGL elements into the I2O message. For details on DMA handling see also
- *	dma_map_single(). The pointer sg_ptr will only be set to the end of the
- *	SG list if the allocation was successful.
- *
- *	Returns DMA address which must be checked for failures using
- *	dma_mapping_error().
- */
-static inline dma_addr_t i2o_dma_map_single(struct i2o_controller *c, void *ptr,
+extern u16 i2o_sg_tablesize(struct i2o_controller *c, u16 body_size);
+extern dma_addr_t i2o_dma_map_single(struct i2o_controller *c, void *ptr,
 					    size_t size,
 					    enum dma_data_direction direction,
-					    u32 ** sg_ptr)
-{
-	u32 sg_flags;
-	u32 *mptr = *sg_ptr;
-	dma_addr_t dma_addr;
-
-	switch (direction) {
-	case DMA_TO_DEVICE:
-		sg_flags = 0xd4000000;
-		break;
-	case DMA_FROM_DEVICE:
-		sg_flags = 0xd0000000;
-		break;
-	default:
-		return 0;
-	}
-
-	dma_addr = dma_map_single(&c->pdev->dev, ptr, size, direction);
-	if (!dma_mapping_error(&c->pdev->dev, dma_addr)) {
-#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
-		if ((sizeof(dma_addr_t) > 4) && c->pae_support) {
-			*mptr++ = cpu_to_le32(0x7C020002);
-			*mptr++ = cpu_to_le32(PAGE_SIZE);
-		}
-#endif
-
-		*mptr++ = cpu_to_le32(sg_flags | size);
-		*mptr++ = cpu_to_le32(i2o_dma_low(dma_addr));
-#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
-		if ((sizeof(dma_addr_t) > 4) && c->pae_support)
-			*mptr++ = cpu_to_le32(i2o_dma_high(dma_addr));
-#endif
-		*sg_ptr = mptr;
-	}
-	return dma_addr;
-};
-
-/**
- *	i2o_dma_map_sg - Map a SG List to controller and fill in I2O message.
- *	@c: I2O controller
- *	@sg: SG list to be mapped
- *	@sg_count: number of elements in the SG list
- *	@direction: DMA_TO_DEVICE / DMA_FROM_DEVICE
- *	@sg_ptr: pointer to the SG list inside the I2O message
- *
- *	This function does all necessary DMA handling and also writes the I2O
- *	SGL elements into the I2O message. For details on DMA handling see also
- *	dma_map_sg(). The pointer sg_ptr will only be set to the end of the SG
- *	list if the allocation was successful.
- *
- *	Returns 0 on failure or 1 on success.
- */
-static inline int i2o_dma_map_sg(struct i2o_controller *c,
+					    u32 ** sg_ptr);
+extern int i2o_dma_map_sg(struct i2o_controller *c,
 				 struct scatterlist *sg, int sg_count,
 				 enum dma_data_direction direction,
-				 u32 ** sg_ptr)
-{
-	u32 sg_flags;
-	u32 *mptr = *sg_ptr;
-
-	switch (direction) {
-	case DMA_TO_DEVICE:
-		sg_flags = 0x14000000;
-		break;
-	case DMA_FROM_DEVICE:
-		sg_flags = 0x10000000;
-		break;
-	default:
-		return 0;
-	}
-
-	sg_count = dma_map_sg(&c->pdev->dev, sg, sg_count, direction);
-	if (!sg_count)
-		return 0;
-
-#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
-	if ((sizeof(dma_addr_t) > 4) && c->pae_support) {
-		*mptr++ = cpu_to_le32(0x7C020002);
-		*mptr++ = cpu_to_le32(PAGE_SIZE);
-	}
-#endif
-
-	while (sg_count-- > 0) {
-		if (!sg_count)
-			sg_flags |= 0xC0000000;
-		*mptr++ = cpu_to_le32(sg_flags | sg_dma_len(sg));
-		*mptr++ = cpu_to_le32(i2o_dma_low(sg_dma_address(sg)));
-#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
-		if ((sizeof(dma_addr_t) > 4) && c->pae_support)
-			*mptr++ = cpu_to_le32(i2o_dma_high(sg_dma_address(sg)));
-#endif
-		sg = sg_next(sg);
-	}
-	*sg_ptr = mptr;
-
-	return 1;
-};
-
-/**
- *	i2o_dma_alloc - Allocate DMA memory
- *	@dev: struct device pointer to the PCI device of the I2O controller
- *	@addr: i2o_dma struct which should get the DMA buffer
- *	@len: length of the new DMA memory
- *	@gfp_mask: GFP mask
- *
- *	Allocate a coherent DMA memory and write the pointers into addr.
- *
- *	Returns 0 on success or -ENOMEM on failure.
- */
-static inline int i2o_dma_alloc(struct device *dev, struct i2o_dma *addr,
-				size_t len, gfp_t gfp_mask)
-{
-	struct pci_dev *pdev = to_pci_dev(dev);
-	int dma_64 = 0;
-
-	if ((sizeof(dma_addr_t) > 4) && (pdev->dma_mask == DMA_64BIT_MASK)) {
-		dma_64 = 1;
-		if (pci_set_dma_mask(pdev, DMA_32BIT_MASK))
-			return -ENOMEM;
-	}
-
-	addr->virt = dma_alloc_coherent(dev, len, &addr->phys, gfp_mask);
-
-	if ((sizeof(dma_addr_t) > 4) && dma_64)
-		if (pci_set_dma_mask(pdev, DMA_64BIT_MASK))
-			printk(KERN_WARNING "i2o: unable to set 64-bit DMA");
-
-	if (!addr->virt)
-		return -ENOMEM;
-
-	memset(addr->virt, 0, len);
-	addr->len = len;
-
-	return 0;
-};
-
-/**
- *	i2o_dma_free - Free DMA memory
- *	@dev: struct device pointer to the PCI device of the I2O controller
- *	@addr: i2o_dma struct which contains the DMA buffer
- *
- *	Free a coherent DMA memory and set virtual address of addr to NULL.
- */
-static inline void i2o_dma_free(struct device *dev, struct i2o_dma *addr)
-{
-	if (addr->virt) {
-		if (addr->phys)
-			dma_free_coherent(dev, addr->len, addr->virt,
-					  addr->phys);
-		else
-			kfree(addr->virt);
-		addr->virt = NULL;
-	}
-};
-
-/**
- *	i2o_dma_realloc - Realloc DMA memory
- *	@dev: struct device pointer to the PCI device of the I2O controller
- *	@addr: pointer to a i2o_dma struct DMA buffer
- *	@len: new length of memory
- *	@gfp_mask: GFP mask
- *
- *	If there was something allocated in the addr, free it first. If len > 0
- *	than try to allocate it and write the addresses back to the addr
- *	structure. If len == 0 set the virtual address to NULL.
- *
- *	Returns the 0 on success or negative error code on failure.
- */
-static inline int i2o_dma_realloc(struct device *dev, struct i2o_dma *addr,
-				  size_t len, gfp_t gfp_mask)
-{
-	i2o_dma_free(dev, addr);
-
-	if (len)
-		return i2o_dma_alloc(dev, addr, len, gfp_mask);
-
-	return 0;
-};
-
-/*
- *	i2o_pool_alloc - Allocate an slab cache and mempool
- *	@mempool: pointer to struct i2o_pool to write data into.
- *	@name: name which is used to identify cache
- *	@size: size of each object
- *	@min_nr: minimum number of objects
- *
- *	First allocates a slab cache with name and size. Then allocates a
- *	mempool which uses the slab cache for allocation and freeing.
- *
- *	Returns 0 on success or negative error code on failure.
- */
-static inline int i2o_pool_alloc(struct i2o_pool *pool, const char *name,
-				 size_t size, int min_nr)
-{
-	pool->name = kmalloc(strlen(name) + 1, GFP_KERNEL);
-	if (!pool->name)
-		goto exit;
-	strcpy(pool->name, name);
-
-	pool->slab =
-	    kmem_cache_create(pool->name, size, 0, SLAB_HWCACHE_ALIGN, NULL);
-	if (!pool->slab)
-		goto free_name;
-
-	pool->mempool = mempool_create_slab_pool(min_nr, pool->slab);
-	if (!pool->mempool)
-		goto free_slab;
-
-	return 0;
-
-      free_slab:
-	kmem_cache_destroy(pool->slab);
-
-      free_name:
-	kfree(pool->name);
-
-      exit:
-	return -ENOMEM;
-};
-
-/*
- *	i2o_pool_free - Free slab cache and mempool again
- *	@mempool: pointer to struct i2o_pool which should be freed
- *
- *	Note that you have to return all objects to the mempool again before
- *	calling i2o_pool_free().
- */
-static inline void i2o_pool_free(struct i2o_pool *pool)
-{
-	mempool_destroy(pool->mempool);
-	kmem_cache_destroy(pool->slab);
-	kfree(pool->name);
-};
-
+				 u32 ** sg_ptr);
+extern int i2o_dma_alloc(struct device *dev, struct i2o_dma *addr, size_t len);
+extern void i2o_dma_free(struct device *dev, struct i2o_dma *addr);
+extern int i2o_dma_realloc(struct device *dev, struct i2o_dma *addr,
+								size_t len);
+extern int i2o_pool_alloc(struct i2o_pool *pool, const char *name,
+				 size_t size, int min_nr);
+extern void i2o_pool_free(struct i2o_pool *pool);
 /* I2O driver (OSM) functions */
 extern int i2o_driver_register(struct i2o_driver *);
 extern void i2o_driver_unregister(struct i2o_driver *);
-- 
cgit v1.2.3


From bb979d7fc360bc37cbaff43a6fafceb897cb5e47 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 15 Oct 2008 22:02:52 -0700
Subject: autofs4: cleanup autofs mount type usage

Usage of the AUTOFS_TYPE_* defines is a little confusing and appears
inconsistent.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/autofs_i.h    | 6 ++----
 fs/autofs4/expire.c      | 2 +-
 fs/autofs4/inode.c       | 6 +++---
 fs/autofs4/waitq.c       | 8 ++++----
 include/linux/auto_fs4.h | 5 +++++
 5 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 69a2f5c92319..ea024d8e37ed 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -21,6 +21,8 @@
 #define AUTOFS_IOC_FIRST     AUTOFS_IOC_READY
 #define AUTOFS_IOC_COUNT     32
 
+#define AUTOFS_TYPE_TRIGGER	(AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET)
+
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/time.h>
@@ -92,10 +94,6 @@ struct autofs_wait_queue {
 
 #define AUTOFS_SBI_MAGIC 0x6d4a556d
 
-#define AUTOFS_TYPE_INDIRECT     0x0001
-#define AUTOFS_TYPE_DIRECT       0x0002
-#define AUTOFS_TYPE_OFFSET       0x0004
-
 struct autofs_sb_info {
 	u32 magic;
 	int pipefd;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index cdabb796ff01..e79dd09e12a1 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -479,7 +479,7 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 	if (arg && get_user(do_now, arg))
 		return -EFAULT;
 
-	if (sbi->type & AUTOFS_TYPE_DIRECT)
+	if (sbi->type & AUTOFS_TYPE_TRIGGER)
 		dentry = autofs4_expire_direct(sb, mnt, sbi, do_now);
 	else
 		dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 45d55819203d..7303099fcc1a 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -288,7 +288,7 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
 			*type = AUTOFS_TYPE_DIRECT;
 			break;
 		case Opt_offset:
-			*type = AUTOFS_TYPE_DIRECT | AUTOFS_TYPE_OFFSET;
+			*type = AUTOFS_TYPE_OFFSET;
 			break;
 		default:
 			return 1;
@@ -336,7 +336,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
-	sbi->type = 0;
+	sbi->type = AUTOFS_TYPE_INDIRECT;
 	sbi->min_proto = 0;
 	sbi->max_proto = 0;
 	mutex_init(&sbi->wq_mutex);
@@ -378,7 +378,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	}
 
 	root_inode->i_fop = &autofs4_root_operations;
-	root_inode->i_op = sbi->type & AUTOFS_TYPE_DIRECT ?
+	root_inode->i_op = sbi->type & AUTOFS_TYPE_TRIGGER ?
 			&autofs4_direct_root_inode_operations :
 			&autofs4_indirect_root_inode_operations;
 
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 35216d18d8b5..6d87bb156326 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -337,7 +337,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		 * is very similar for indirect mounts except only dentrys
 		 * in the root of the autofs file system may be negative.
 		 */
-		if (sbi->type & (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET))
+		if (sbi->type & AUTOFS_TYPE_TRIGGER)
 			return -ENOENT;
 		else if (!IS_ROOT(dentry->d_parent))
 			return -ENOENT;
@@ -348,7 +348,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		return -ENOMEM;
 
 	/* If this is a direct mount request create a dummy name */
-	if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT))
+	if (IS_ROOT(dentry) && sbi->type & AUTOFS_TYPE_TRIGGER)
 		qstr.len = sprintf(name, "%p", dentry);
 	else {
 		qstr.len = autofs4_getpath(sbi, dentry, &name);
@@ -406,11 +406,11 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 				type = autofs_ptype_expire_multi;
 		} else {
 			if (notify == NFY_MOUNT)
-				type = (sbi->type & AUTOFS_TYPE_DIRECT) ?
+				type = (sbi->type & AUTOFS_TYPE_TRIGGER) ?
 					autofs_ptype_missing_direct :
 					 autofs_ptype_missing_indirect;
 			else
-				type = (sbi->type & AUTOFS_TYPE_DIRECT) ?
+				type = (sbi->type & AUTOFS_TYPE_TRIGGER) ?
 					autofs_ptype_expire_direct :
 					autofs_ptype_expire_indirect;
 		}
diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h
index b785c6f8644d..aa96a04ae023 100644
--- a/include/linux/auto_fs4.h
+++ b/include/linux/auto_fs4.h
@@ -29,6 +29,11 @@
 #define AUTOFS_EXP_IMMEDIATE		1
 #define AUTOFS_EXP_LEAVES		2
 
+#define AUTOFS_TYPE_ANY			0x0000
+#define AUTOFS_TYPE_INDIRECT		0x0001
+#define AUTOFS_TYPE_DIRECT		0x0002
+#define AUTOFS_TYPE_OFFSET		0x0004
+
 /* Daemon notification packet types */
 enum autofs_notify {
 	NFY_NONE,
-- 
cgit v1.2.3


From 8d7b48e0bc5fa01a818eac713d4cb0763090cd0e Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Wed, 15 Oct 2008 22:02:54 -0700
Subject: autofs4: add miscellaneous device for ioctls

Add a miscellaneous device to the autofs4 module for routing ioctls.  This
provides the ability to obtain an ioctl file handle for an autofs mount
point that is possibly covered by another mount.

The actual problem with autofs is that it can't reconnect to existing
mounts.  Immediately one things of just adding the ability to remount
autofs file systems would solve it, but alas, that can't work.  This is
because autofs direct mounts and the implementation of "on demand mount
and expire" of nested mount trees have the file system mounted on top of
the mount trigger dentry.

To resolve this a miscellaneous device node for routing ioctl commands to
these mount points has been implemented in the autofs4 kernel module and a
library added to autofs.  This provides the ability to open a file
descriptor for these over mounted autofs mount points.

Please refer to Documentation/filesystems/autofs4-mount-control.txt for a
discussion of the problem, implementation alternatives considered and a
description of the interface.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/Makefile            |   2 +-
 fs/autofs4/autofs_i.h          |  35 +-
 fs/autofs4/dev-ioctl.c         | 863 +++++++++++++++++++++++++++++++++++++++++
 fs/autofs4/expire.c            |  16 +-
 fs/autofs4/init.c              |  11 +-
 include/linux/auto_dev-ioctl.h | 157 ++++++++
 include/linux/auto_fs4.h       |   2 +-
 7 files changed, 1073 insertions(+), 13 deletions(-)
 create mode 100644 fs/autofs4/dev-ioctl.c
 create mode 100644 include/linux/auto_dev-ioctl.h

(limited to 'include/linux')

diff --git a/fs/autofs4/Makefile b/fs/autofs4/Makefile
index f2c3b79e94d2..a811c1f7d9ab 100644
--- a/fs/autofs4/Makefile
+++ b/fs/autofs4/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_AUTOFS4_FS) += autofs4.o
 
-autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o
+autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index fa76d18be082..e0f16da00e54 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -14,6 +14,7 @@
 /* Internal header file for autofs */
 
 #include <linux/auto_fs4.h>
+#include <linux/auto_dev-ioctl.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
 
@@ -21,6 +22,9 @@
 #define AUTOFS_IOC_FIRST     AUTOFS_IOC_READY
 #define AUTOFS_IOC_COUNT     32
 
+#define AUTOFS_DEV_IOCTL_IOC_FIRST	(AUTOFS_DEV_IOCTL_VERSION)
+#define AUTOFS_DEV_IOCTL_IOC_COUNT	(AUTOFS_IOC_COUNT - 11)
+
 #define AUTOFS_TYPE_TRIGGER	(AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET)
 
 #include <linux/kernel.h>
@@ -37,11 +41,27 @@
 /* #define DEBUG */
 
 #ifdef DEBUG
-#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __func__ , ##args); } while(0)
+#define DPRINTK(fmt, args...)				\
+do {							\
+	printk(KERN_DEBUG "pid %d: %s: " fmt "\n",	\
+		current->pid, __func__, ##args);	\
+} while (0)
 #else
-#define DPRINTK(fmt,args...) do {} while(0)
+#define DPRINTK(fmt, args...) do {} while (0)
 #endif
 
+#define AUTOFS_WARN(fmt, args...)			\
+do {							\
+	printk(KERN_WARNING "pid %d: %s: " fmt "\n",	\
+		current->pid, __func__, ##args);	\
+} while (0)
+
+#define AUTOFS_ERROR(fmt, args...)			\
+do {							\
+	printk(KERN_ERR "pid %d: %s: " fmt "\n",	\
+		current->pid, __func__, ##args);	\
+} while (0)
+
 /* Unified info structure.  This is pointed to by both the dentry and
    inode structures.  Each file in the filesystem has an instance of this
    structure.  It holds a reference to the dentry, so dentries are never
@@ -170,6 +190,17 @@ int autofs4_expire_run(struct super_block *, struct vfsmount *,
 			struct autofs_packet_expire __user *);
 int autofs4_expire_multi(struct super_block *, struct vfsmount *,
 			struct autofs_sb_info *, int __user *);
+struct dentry *autofs4_expire_direct(struct super_block *sb,
+				     struct vfsmount *mnt,
+				     struct autofs_sb_info *sbi, int how);
+struct dentry *autofs4_expire_indirect(struct super_block *sb,
+				       struct vfsmount *mnt,
+				       struct autofs_sb_info *sbi, int how);
+
+/* Device node initialization */
+
+int autofs_dev_ioctl_init(void);
+void autofs_dev_ioctl_exit(void);
 
 /* Operations structures */
 
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
new file mode 100644
index 000000000000..625abf5422e2
--- /dev/null
+++ b/fs/autofs4/dev-ioctl.c
@@ -0,0 +1,863 @@
+/*
+ * Copyright 2008 Red Hat, Inc. All rights reserved.
+ * Copyright 2008 Ian Kent <raven@themaw.net>
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ */
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/miscdevice.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+#include <linux/namei.h>
+#include <linux/fcntl.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/sched.h>
+#include <linux/compat.h>
+#include <linux/syscalls.h>
+#include <linux/smp_lock.h>
+#include <linux/magic.h>
+#include <linux/dcache.h>
+#include <linux/uaccess.h>
+
+#include "autofs_i.h"
+
+/*
+ * This module implements an interface for routing autofs ioctl control
+ * commands via a miscellaneous device file.
+ *
+ * The alternate interface is needed because we need to be able open
+ * an ioctl file descriptor on an autofs mount that may be covered by
+ * another mount. This situation arises when starting automount(8)
+ * or other user space daemon which uses direct mounts or offset
+ * mounts (used for autofs lazy mount/umount of nested mount trees),
+ * which have been left busy at at service shutdown.
+ */
+
+#define AUTOFS_DEV_IOCTL_SIZE	sizeof(struct autofs_dev_ioctl)
+
+typedef int (*ioctl_fn)(struct file *, struct autofs_sb_info *,
+			struct autofs_dev_ioctl *);
+
+static int check_name(const char *name)
+{
+	if (!strchr(name, '/'))
+		return -EINVAL;
+	return 0;
+}
+
+/*
+ * Check a string doesn't overrun the chunk of
+ * memory we copied from user land.
+ */
+static int invalid_str(char *str, void *end)
+{
+	while ((void *) str <= end)
+		if (!*str++)
+			return 0;
+	return -EINVAL;
+}
+
+/*
+ * Check that the user compiled against correct version of autofs
+ * misc device code.
+ *
+ * As well as checking the version compatibility this always copies
+ * the kernel interface version out.
+ */
+static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param)
+{
+	int err = 0;
+
+	if ((AUTOFS_DEV_IOCTL_VERSION_MAJOR != param->ver_major) ||
+	    (AUTOFS_DEV_IOCTL_VERSION_MINOR < param->ver_minor)) {
+		AUTOFS_WARN("ioctl control interface version mismatch: "
+		     "kernel(%u.%u), user(%u.%u), cmd(%d)",
+		     AUTOFS_DEV_IOCTL_VERSION_MAJOR,
+		     AUTOFS_DEV_IOCTL_VERSION_MINOR,
+		     param->ver_major, param->ver_minor, cmd);
+		err = -EINVAL;
+	}
+
+	/* Fill in the kernel version. */
+	param->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
+	param->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
+
+	return err;
+}
+
+/*
+ * Copy parameter control struct, including a possible path allocated
+ * at the end of the struct.
+ */
+static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in)
+{
+	struct autofs_dev_ioctl tmp, *ads;
+
+	if (copy_from_user(&tmp, in, sizeof(tmp)))
+		return ERR_PTR(-EFAULT);
+
+	if (tmp.size < sizeof(tmp))
+		return ERR_PTR(-EINVAL);
+
+	ads = kmalloc(tmp.size, GFP_KERNEL);
+	if (!ads)
+		return ERR_PTR(-ENOMEM);
+
+	if (copy_from_user(ads, in, tmp.size)) {
+		kfree(ads);
+		return ERR_PTR(-EFAULT);
+	}
+
+	return ads;
+}
+
+static inline void free_dev_ioctl(struct autofs_dev_ioctl *param)
+{
+	kfree(param);
+	return;
+}
+
+/*
+ * Check sanity of parameter control fields and if a path is present
+ * check that it has a "/" and is terminated.
+ */
+static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
+{
+	int err = -EINVAL;
+
+	if (check_dev_ioctl_version(cmd, param)) {
+		AUTOFS_WARN("invalid device control module version "
+		     "supplied for cmd(0x%08x)", cmd);
+		goto out;
+	}
+
+	if (param->size > sizeof(*param)) {
+		err = check_name(param->path);
+		if (err) {
+			AUTOFS_WARN("invalid path supplied for cmd(0x%08x)",
+				    cmd);
+			goto out;
+		}
+
+		err = invalid_str(param->path,
+				 (void *) ((size_t) param + param->size));
+		if (err) {
+			AUTOFS_WARN("invalid path supplied for cmd(0x%08x)",
+				    cmd);
+			goto out;
+		}
+	}
+
+	err = 0;
+out:
+	return err;
+}
+
+/*
+ * Get the autofs super block info struct from the file opened on
+ * the autofs mount point.
+ */
+static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f)
+{
+	struct autofs_sb_info *sbi = NULL;
+	struct inode *inode;
+
+	if (f) {
+		inode = f->f_path.dentry->d_inode;
+		sbi = autofs4_sbi(inode->i_sb);
+	}
+	return sbi;
+}
+
+/* Return autofs module protocol version */
+static int autofs_dev_ioctl_protover(struct file *fp,
+				     struct autofs_sb_info *sbi,
+				     struct autofs_dev_ioctl *param)
+{
+	param->arg1 = sbi->version;
+	return 0;
+}
+
+/* Return autofs module protocol sub version */
+static int autofs_dev_ioctl_protosubver(struct file *fp,
+					struct autofs_sb_info *sbi,
+					struct autofs_dev_ioctl *param)
+{
+	param->arg1 = sbi->sub_version;
+	return 0;
+}
+
+/*
+ * Walk down the mount stack looking for an autofs mount that
+ * has the requested device number (aka. new_encode_dev(sb->s_dev).
+ */
+static int autofs_dev_ioctl_find_super(struct nameidata *nd, dev_t devno)
+{
+	struct dentry *dentry;
+	struct inode *inode;
+	struct super_block *sb;
+	dev_t s_dev;
+	unsigned int err;
+
+	err = -ENOENT;
+
+	/* Lookup the dentry name at the base of our mount point */
+	dentry = d_lookup(nd->path.dentry, &nd->last);
+	if (!dentry)
+		goto out;
+
+	dput(nd->path.dentry);
+	nd->path.dentry = dentry;
+
+	/* And follow the mount stack looking for our autofs mount */
+	while (follow_down(&nd->path.mnt, &nd->path.dentry)) {
+		inode = nd->path.dentry->d_inode;
+		if (!inode)
+			break;
+
+		sb = inode->i_sb;
+		s_dev = new_encode_dev(sb->s_dev);
+		if (devno == s_dev) {
+			if (sb->s_magic == AUTOFS_SUPER_MAGIC) {
+				err = 0;
+				break;
+			}
+		}
+	}
+out:
+	return err;
+}
+
+/*
+ * Walk down the mount stack looking for an autofs mount that
+ * has the requested mount type (ie. indirect, direct or offset).
+ */
+static int autofs_dev_ioctl_find_sbi_type(struct nameidata *nd, unsigned int type)
+{
+	struct dentry *dentry;
+	struct autofs_info *ino;
+	unsigned int err;
+
+	err = -ENOENT;
+
+	/* Lookup the dentry name at the base of our mount point */
+	dentry = d_lookup(nd->path.dentry, &nd->last);
+	if (!dentry)
+		goto out;
+
+	dput(nd->path.dentry);
+	nd->path.dentry = dentry;
+
+	/* And follow the mount stack looking for our autofs mount */
+	while (follow_down(&nd->path.mnt, &nd->path.dentry)) {
+		ino = autofs4_dentry_ino(nd->path.dentry);
+		if (ino && ino->sbi->type & type) {
+			err = 0;
+			break;
+		}
+	}
+out:
+	return err;
+}
+
+static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
+{
+	struct files_struct *files = current->files;
+	struct fdtable *fdt;
+
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+	BUG_ON(fdt->fd[fd] != NULL);
+	rcu_assign_pointer(fdt->fd[fd], file);
+	FD_SET(fd, fdt->close_on_exec);
+	spin_unlock(&files->file_lock);
+}
+
+
+/*
+ * Open a file descriptor on the autofs mount point corresponding
+ * to the given path and device number (aka. new_encode_dev(sb->s_dev)).
+ */
+static int autofs_dev_ioctl_open_mountpoint(const char *path, dev_t devid)
+{
+	struct file *filp;
+	struct nameidata nd;
+	int err, fd;
+
+	fd = get_unused_fd();
+	if (likely(fd >= 0)) {
+		/* Get nameidata of the parent directory */
+		err = path_lookup(path, LOOKUP_PARENT, &nd);
+		if (err)
+			goto out;
+
+		/*
+		 * Search down, within the parent, looking for an
+		 * autofs super block that has the device number
+		 * corresponding to the autofs fs we want to open.
+		 */
+		err = autofs_dev_ioctl_find_super(&nd, devid);
+		if (err) {
+			path_put(&nd.path);
+			goto out;
+		}
+
+		filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY);
+		if (IS_ERR(filp)) {
+			err = PTR_ERR(filp);
+			goto out;
+		}
+
+		autofs_dev_ioctl_fd_install(fd, filp);
+	}
+
+	return fd;
+
+out:
+	put_unused_fd(fd);
+	return err;
+}
+
+/* Open a file descriptor on an autofs mount point */
+static int autofs_dev_ioctl_openmount(struct file *fp,
+				      struct autofs_sb_info *sbi,
+				      struct autofs_dev_ioctl *param)
+{
+	const char *path;
+	dev_t devid;
+	int err, fd;
+
+	/* param->path has already been checked */
+	if (!param->arg1)
+		return -EINVAL;
+
+	param->ioctlfd = -1;
+
+	path = param->path;
+	devid = param->arg1;
+
+	err = 0;
+	fd = autofs_dev_ioctl_open_mountpoint(path, devid);
+	if (unlikely(fd < 0)) {
+		err = fd;
+		goto out;
+	}
+
+	param->ioctlfd = fd;
+out:
+	return err;
+}
+
+/* Close file descriptor allocated above (user can also use close(2)). */
+static int autofs_dev_ioctl_closemount(struct file *fp,
+				       struct autofs_sb_info *sbi,
+				       struct autofs_dev_ioctl *param)
+{
+	return sys_close(param->ioctlfd);
+}
+
+/*
+ * Send "ready" status for an existing wait (either a mount or an expire
+ * request).
+ */
+static int autofs_dev_ioctl_ready(struct file *fp,
+				  struct autofs_sb_info *sbi,
+				  struct autofs_dev_ioctl *param)
+{
+	autofs_wqt_t token;
+
+	token = (autofs_wqt_t) param->arg1;
+	return autofs4_wait_release(sbi, token, 0);
+}
+
+/*
+ * Send "fail" status for an existing wait (either a mount or an expire
+ * request).
+ */
+static int autofs_dev_ioctl_fail(struct file *fp,
+				 struct autofs_sb_info *sbi,
+				 struct autofs_dev_ioctl *param)
+{
+	autofs_wqt_t token;
+	int status;
+
+	token = (autofs_wqt_t) param->arg1;
+	status = param->arg2 ? param->arg2 : -ENOENT;
+	return autofs4_wait_release(sbi, token, status);
+}
+
+/*
+ * Set the pipe fd for kernel communication to the daemon.
+ *
+ * Normally this is set at mount using an option but if we
+ * are reconnecting to a busy mount then we need to use this
+ * to tell the autofs mount about the new kernel pipe fd. In
+ * order to protect mounts against incorrectly setting the
+ * pipefd we also require that the autofs mount be catatonic.
+ *
+ * This also sets the process group id used to identify the
+ * controlling process (eg. the owning automount(8) daemon).
+ */
+static int autofs_dev_ioctl_setpipefd(struct file *fp,
+				      struct autofs_sb_info *sbi,
+				      struct autofs_dev_ioctl *param)
+{
+	int pipefd;
+	int err = 0;
+
+	if (param->arg1 == -1)
+		return -EINVAL;
+
+	pipefd = param->arg1;
+
+	mutex_lock(&sbi->wq_mutex);
+	if (!sbi->catatonic) {
+		mutex_unlock(&sbi->wq_mutex);
+		return -EBUSY;
+	} else {
+		struct file *pipe = fget(pipefd);
+		if (!pipe->f_op || !pipe->f_op->write) {
+			err = -EPIPE;
+			fput(pipe);
+			goto out;
+		}
+		sbi->oz_pgrp = task_pgrp_nr(current);
+		sbi->pipefd = pipefd;
+		sbi->pipe = pipe;
+		sbi->catatonic = 0;
+	}
+out:
+	mutex_unlock(&sbi->wq_mutex);
+	return err;
+}
+
+/*
+ * Make the autofs mount point catatonic, no longer responsive to
+ * mount requests. Also closes the kernel pipe file descriptor.
+ */
+static int autofs_dev_ioctl_catatonic(struct file *fp,
+				      struct autofs_sb_info *sbi,
+				      struct autofs_dev_ioctl *param)
+{
+	autofs4_catatonic_mode(sbi);
+	return 0;
+}
+
+/* Set the autofs mount timeout */
+static int autofs_dev_ioctl_timeout(struct file *fp,
+				    struct autofs_sb_info *sbi,
+				    struct autofs_dev_ioctl *param)
+{
+	unsigned long timeout;
+
+	timeout = param->arg1;
+	param->arg1 = sbi->exp_timeout / HZ;
+	sbi->exp_timeout = timeout * HZ;
+	return 0;
+}
+
+/*
+ * Return the uid and gid of the last request for the mount
+ *
+ * When reconstructing an autofs mount tree with active mounts
+ * we need to re-connect to mounts that may have used the original
+ * process uid and gid (or string variations of them) for mount
+ * lookups within the map entry.
+ */
+static int autofs_dev_ioctl_requester(struct file *fp,
+				      struct autofs_sb_info *sbi,
+				      struct autofs_dev_ioctl *param)
+{
+	struct autofs_info *ino;
+	struct nameidata nd;
+	const char *path;
+	dev_t devid;
+	int err = -ENOENT;
+
+	if (param->size <= sizeof(*param)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	path = param->path;
+	devid = sbi->sb->s_dev;
+
+	param->arg1 = param->arg2 = -1;
+
+	/* Get nameidata of the parent directory */
+	err = path_lookup(path, LOOKUP_PARENT, &nd);
+	if (err)
+		goto out;
+
+	err = autofs_dev_ioctl_find_super(&nd, devid);
+	if (err)
+		goto out_release;
+
+	ino = autofs4_dentry_ino(nd.path.dentry);
+	if (ino) {
+		err = 0;
+		autofs4_expire_wait(nd.path.dentry);
+		spin_lock(&sbi->fs_lock);
+		param->arg1 = ino->uid;
+		param->arg2 = ino->gid;
+		spin_unlock(&sbi->fs_lock);
+	}
+
+out_release:
+	path_put(&nd.path);
+out:
+	return err;
+}
+
+/*
+ * Call repeatedly until it returns -EAGAIN, meaning there's nothing
+ * more that can be done.
+ */
+static int autofs_dev_ioctl_expire(struct file *fp,
+				   struct autofs_sb_info *sbi,
+				   struct autofs_dev_ioctl *param)
+{
+	struct dentry *dentry;
+	struct vfsmount *mnt;
+	int err = -EAGAIN;
+	int how;
+
+	how = param->arg1;
+	mnt = fp->f_path.mnt;
+
+	if (sbi->type & AUTOFS_TYPE_TRIGGER)
+		dentry = autofs4_expire_direct(sbi->sb, mnt, sbi, how);
+	else
+		dentry = autofs4_expire_indirect(sbi->sb, mnt, sbi, how);
+
+	if (dentry) {
+		struct autofs_info *ino = autofs4_dentry_ino(dentry);
+
+		/*
+		 * This is synchronous because it makes the daemon a
+		 * little easier
+		*/
+		err = autofs4_wait(sbi, dentry, NFY_EXPIRE);
+
+		spin_lock(&sbi->fs_lock);
+		if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
+			ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
+			sbi->sb->s_root->d_mounted++;
+		}
+		ino->flags &= ~AUTOFS_INF_EXPIRING;
+		complete_all(&ino->expire_complete);
+		spin_unlock(&sbi->fs_lock);
+		dput(dentry);
+	}
+
+	return err;
+}
+
+/* Check if autofs mount point is in use */
+static int autofs_dev_ioctl_askumount(struct file *fp,
+				      struct autofs_sb_info *sbi,
+				      struct autofs_dev_ioctl *param)
+{
+	param->arg1 = 0;
+	if (may_umount(fp->f_path.mnt))
+		param->arg1 = 1;
+	return 0;
+}
+
+/*
+ * Check if the given path is a mountpoint.
+ *
+ * If we are supplied with the file descriptor of an autofs
+ * mount we're looking for a specific mount. In this case
+ * the path is considered a mountpoint if it is itself a
+ * mountpoint or contains a mount, such as a multi-mount
+ * without a root mount. In this case we return 1 if the
+ * path is a mount point and the super magic of the covering
+ * mount if there is one or 0 if it isn't a mountpoint.
+ *
+ * If we aren't supplied with a file descriptor then we
+ * lookup the nameidata of the path and check if it is the
+ * root of a mount. If a type is given we are looking for
+ * a particular autofs mount and if we don't find a match
+ * we return fail. If the located nameidata path is the
+ * root of a mount we return 1 along with the super magic
+ * of the mount or 0 otherwise.
+ *
+ * In both cases the the device number (as returned by
+ * new_encode_dev()) is also returned.
+ */
+static int autofs_dev_ioctl_ismountpoint(struct file *fp,
+					 struct autofs_sb_info *sbi,
+					 struct autofs_dev_ioctl *param)
+{
+	struct nameidata nd;
+	const char *path;
+	unsigned int type;
+	int err = -ENOENT;
+
+	if (param->size <= sizeof(*param)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	path = param->path;
+	type = param->arg1;
+
+	param->arg1 = 0;
+	param->arg2 = 0;
+
+	if (!fp || param->ioctlfd == -1) {
+		if (type == AUTOFS_TYPE_ANY) {
+			struct super_block *sb;
+
+			err = path_lookup(path, LOOKUP_FOLLOW, &nd);
+			if (err)
+				goto out;
+
+			sb = nd.path.dentry->d_sb;
+			param->arg1 = new_encode_dev(sb->s_dev);
+		} else {
+			struct autofs_info *ino;
+
+			err = path_lookup(path, LOOKUP_PARENT, &nd);
+			if (err)
+				goto out;
+
+			err = autofs_dev_ioctl_find_sbi_type(&nd, type);
+			if (err)
+				goto out_release;
+
+			ino = autofs4_dentry_ino(nd.path.dentry);
+			param->arg1 = autofs4_get_dev(ino->sbi);
+		}
+
+		err = 0;
+		if (nd.path.dentry->d_inode &&
+		    nd.path.mnt->mnt_root == nd.path.dentry) {
+			err = 1;
+			param->arg2 = nd.path.dentry->d_inode->i_sb->s_magic;
+		}
+	} else {
+		dev_t devid = new_encode_dev(sbi->sb->s_dev);
+
+		err = path_lookup(path, LOOKUP_PARENT, &nd);
+		if (err)
+			goto out;
+
+		err = autofs_dev_ioctl_find_super(&nd, devid);
+		if (err)
+			goto out_release;
+
+		param->arg1 = autofs4_get_dev(sbi);
+
+		err = have_submounts(nd.path.dentry);
+
+		if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) {
+			if (follow_down(&nd.path.mnt, &nd.path.dentry)) {
+				struct inode *inode = nd.path.dentry->d_inode;
+				param->arg2 = inode->i_sb->s_magic;
+			}
+		}
+	}
+
+out_release:
+	path_put(&nd.path);
+out:
+	return err;
+}
+
+/*
+ * Our range of ioctl numbers isn't 0 based so we need to shift
+ * the array index by _IOC_NR(AUTOFS_CTL_IOC_FIRST) for the table
+ * lookup.
+ */
+#define cmd_idx(cmd)	(cmd - _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST))
+
+static ioctl_fn lookup_dev_ioctl(unsigned int cmd)
+{
+	static struct {
+		int cmd;
+		ioctl_fn fn;
+	} _ioctls[] = {
+		{cmd_idx(AUTOFS_DEV_IOCTL_VERSION_CMD), NULL},
+		{cmd_idx(AUTOFS_DEV_IOCTL_PROTOVER_CMD),
+			 autofs_dev_ioctl_protover},
+		{cmd_idx(AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD),
+			 autofs_dev_ioctl_protosubver},
+		{cmd_idx(AUTOFS_DEV_IOCTL_OPENMOUNT_CMD),
+			 autofs_dev_ioctl_openmount},
+		{cmd_idx(AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD),
+			 autofs_dev_ioctl_closemount},
+		{cmd_idx(AUTOFS_DEV_IOCTL_READY_CMD),
+			 autofs_dev_ioctl_ready},
+		{cmd_idx(AUTOFS_DEV_IOCTL_FAIL_CMD),
+			 autofs_dev_ioctl_fail},
+		{cmd_idx(AUTOFS_DEV_IOCTL_SETPIPEFD_CMD),
+			 autofs_dev_ioctl_setpipefd},
+		{cmd_idx(AUTOFS_DEV_IOCTL_CATATONIC_CMD),
+			 autofs_dev_ioctl_catatonic},
+		{cmd_idx(AUTOFS_DEV_IOCTL_TIMEOUT_CMD),
+			 autofs_dev_ioctl_timeout},
+		{cmd_idx(AUTOFS_DEV_IOCTL_REQUESTER_CMD),
+			 autofs_dev_ioctl_requester},
+		{cmd_idx(AUTOFS_DEV_IOCTL_EXPIRE_CMD),
+			 autofs_dev_ioctl_expire},
+		{cmd_idx(AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD),
+			 autofs_dev_ioctl_askumount},
+		{cmd_idx(AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD),
+			 autofs_dev_ioctl_ismountpoint}
+	};
+	unsigned int idx = cmd_idx(cmd);
+
+	return (idx >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[idx].fn;
+}
+
+/* ioctl dispatcher */
+static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __user *user)
+{
+	struct autofs_dev_ioctl *param;
+	struct file *fp;
+	struct autofs_sb_info *sbi;
+	unsigned int cmd_first, cmd;
+	ioctl_fn fn = NULL;
+	int err = 0;
+
+	/* only root can play with this */
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	cmd_first = _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST);
+	cmd = _IOC_NR(command);
+
+	if (_IOC_TYPE(command) != _IOC_TYPE(AUTOFS_DEV_IOCTL_IOC_FIRST) ||
+	    cmd - cmd_first >= AUTOFS_DEV_IOCTL_IOC_COUNT) {
+		return -ENOTTY;
+	}
+
+	/* Copy the parameters into kernel space. */
+	param = copy_dev_ioctl(user);
+	if (IS_ERR(param))
+		return PTR_ERR(param);
+
+	err = validate_dev_ioctl(command, param);
+	if (err)
+		goto out;
+
+	/* The validate routine above always sets the version */
+	if (cmd == AUTOFS_DEV_IOCTL_VERSION_CMD)
+		goto done;
+
+	fn = lookup_dev_ioctl(cmd);
+	if (!fn) {
+		AUTOFS_WARN("unknown command 0x%08x", command);
+		return -ENOTTY;
+	}
+
+	fp = NULL;
+	sbi = NULL;
+
+	/*
+	 * For obvious reasons the openmount can't have a file
+	 * descriptor yet. We don't take a reference to the
+	 * file during close to allow for immediate release.
+	 */
+	if (cmd != AUTOFS_DEV_IOCTL_OPENMOUNT_CMD &&
+	    cmd != AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD) {
+		fp = fget(param->ioctlfd);
+		if (!fp) {
+			if (cmd == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD)
+				goto cont;
+			err = -EBADF;
+			goto out;
+		}
+
+		if (!fp->f_op) {
+			err = -ENOTTY;
+			fput(fp);
+			goto out;
+		}
+
+		sbi = autofs_dev_ioctl_sbi(fp);
+		if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) {
+			err = -EINVAL;
+			fput(fp);
+			goto out;
+		}
+
+		/*
+		 * Admin needs to be able to set the mount catatonic in
+		 * order to be able to perform the re-open.
+		 */
+		if (!autofs4_oz_mode(sbi) &&
+		    cmd != AUTOFS_DEV_IOCTL_CATATONIC_CMD) {
+			err = -EACCES;
+			fput(fp);
+			goto out;
+		}
+	}
+cont:
+	err = fn(fp, sbi, param);
+
+	if (fp)
+		fput(fp);
+done:
+	if (err >= 0 && copy_to_user(user, param, AUTOFS_DEV_IOCTL_SIZE))
+		err = -EFAULT;
+out:
+	free_dev_ioctl(param);
+	return err;
+}
+
+static long autofs_dev_ioctl(struct file *file, uint command, ulong u)
+{
+	int err;
+	err = _autofs_dev_ioctl(command, (struct autofs_dev_ioctl __user *) u);
+	return (long) err;
+}
+
+#ifdef CONFIG_COMPAT
+static long autofs_dev_ioctl_compat(struct file *file, uint command, ulong u)
+{
+	return (long) autofs_dev_ioctl(file, command, (ulong) compat_ptr(u));
+}
+#else
+#define autofs_dev_ioctl_compat NULL
+#endif
+
+static const struct file_operations _dev_ioctl_fops = {
+	.unlocked_ioctl	 = autofs_dev_ioctl,
+	.compat_ioctl = autofs_dev_ioctl_compat,
+	.owner	 = THIS_MODULE,
+};
+
+static struct miscdevice _autofs_dev_ioctl_misc = {
+	.minor 		= MISC_DYNAMIC_MINOR,
+	.name  		= AUTOFS_DEVICE_NAME,
+	.fops  		= &_dev_ioctl_fops
+};
+
+/* Register/deregister misc character device */
+int autofs_dev_ioctl_init(void)
+{
+	int r;
+
+	r = misc_register(&_autofs_dev_ioctl_misc);
+	if (r) {
+		AUTOFS_ERROR("misc_register failed for control device");
+		return r;
+	}
+
+	return 0;
+}
+
+void autofs_dev_ioctl_exit(void)
+{
+	misc_deregister(&_autofs_dev_ioctl_misc);
+	return;
+}
+
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index e79dd09e12a1..cde2f8e8935a 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -244,10 +244,10 @@ cont:
 }
 
 /* Check if we can expire a direct mount (possibly a tree) */
-static struct dentry *autofs4_expire_direct(struct super_block *sb,
-					    struct vfsmount *mnt,
-					    struct autofs_sb_info *sbi,
-					    int how)
+struct dentry *autofs4_expire_direct(struct super_block *sb,
+				     struct vfsmount *mnt,
+				     struct autofs_sb_info *sbi,
+				     int how)
 {
 	unsigned long timeout;
 	struct dentry *root = dget(sb->s_root);
@@ -283,10 +283,10 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb,
  *  - it is unused by any user process
  *  - it has been unused for exp_timeout time
  */
-static struct dentry *autofs4_expire_indirect(struct super_block *sb,
-					      struct vfsmount *mnt,
-					      struct autofs_sb_info *sbi,
-					      int how)
+struct dentry *autofs4_expire_indirect(struct super_block *sb,
+				       struct vfsmount *mnt,
+				       struct autofs_sb_info *sbi,
+				       int how)
 {
 	unsigned long timeout;
 	struct dentry *root = sb->s_root;
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index 723a1c5e361b..9722e4bd8957 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -29,11 +29,20 @@ static struct file_system_type autofs_fs_type = {
 
 static int __init init_autofs4_fs(void)
 {
-	return register_filesystem(&autofs_fs_type);
+	int err;
+
+	err = register_filesystem(&autofs_fs_type);
+	if (err)
+		return err;
+
+	autofs_dev_ioctl_init();
+
+	return err;
 }
 
 static void __exit exit_autofs4_fs(void)
 {
+	autofs_dev_ioctl_exit();
 	unregister_filesystem(&autofs_fs_type);
 }
 
diff --git a/include/linux/auto_dev-ioctl.h b/include/linux/auto_dev-ioctl.h
new file mode 100644
index 000000000000..f4d05ccd731f
--- /dev/null
+++ b/include/linux/auto_dev-ioctl.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright 2008 Red Hat, Inc. All rights reserved.
+ * Copyright 2008 Ian Kent <raven@themaw.net>
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ */
+
+#ifndef _LINUX_AUTO_DEV_IOCTL_H
+#define _LINUX_AUTO_DEV_IOCTL_H
+
+#include <linux/types.h>
+
+#define AUTOFS_DEVICE_NAME		"autofs"
+
+#define AUTOFS_DEV_IOCTL_VERSION_MAJOR	1
+#define AUTOFS_DEV_IOCTL_VERSION_MINOR	0
+
+#define AUTOFS_DEVID_LEN		16
+
+#define AUTOFS_DEV_IOCTL_SIZE		sizeof(struct autofs_dev_ioctl)
+
+/*
+ * An ioctl interface for autofs mount point control.
+ */
+
+/*
+ * All the ioctls use this structure.
+ * When sending a path size must account for the total length
+ * of the chunk of memory otherwise is is the size of the
+ * structure.
+ */
+
+struct autofs_dev_ioctl {
+	__u32 ver_major;
+	__u32 ver_minor;
+	__u32 size;		/* total size of data passed in
+				 * including this struct */
+	__s32 ioctlfd;		/* automount command fd */
+
+	__u32 arg1;		/* Command parameters */
+	__u32 arg2;
+
+	char path[0];
+};
+
+static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in)
+{
+	in->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR;
+	in->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR;
+	in->size = sizeof(struct autofs_dev_ioctl);
+	in->ioctlfd = -1;
+	in->arg1 = 0;
+	in->arg2 = 0;
+	return;
+}
+
+/*
+ * If you change this make sure you make the corresponding change
+ * to autofs-dev-ioctl.c:lookup_ioctl()
+ */
+enum {
+	/* Get various version info */
+	AUTOFS_DEV_IOCTL_VERSION_CMD = 0x71,
+	AUTOFS_DEV_IOCTL_PROTOVER_CMD,
+	AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD,
+
+	/* Open mount ioctl fd */
+	AUTOFS_DEV_IOCTL_OPENMOUNT_CMD,
+
+	/* Close mount ioctl fd */
+	AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD,
+
+	/* Mount/expire status returns */
+	AUTOFS_DEV_IOCTL_READY_CMD,
+	AUTOFS_DEV_IOCTL_FAIL_CMD,
+
+	/* Activate/deactivate autofs mount */
+	AUTOFS_DEV_IOCTL_SETPIPEFD_CMD,
+	AUTOFS_DEV_IOCTL_CATATONIC_CMD,
+
+	/* Expiry timeout */
+	AUTOFS_DEV_IOCTL_TIMEOUT_CMD,
+
+	/* Get mount last requesting uid and gid */
+	AUTOFS_DEV_IOCTL_REQUESTER_CMD,
+
+	/* Check for eligible expire candidates */
+	AUTOFS_DEV_IOCTL_EXPIRE_CMD,
+
+	/* Request busy status */
+	AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD,
+
+	/* Check if path is a mountpoint */
+	AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD,
+};
+
+#define AUTOFS_IOCTL 0x93
+
+#define AUTOFS_DEV_IOCTL_VERSION \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_VERSION_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_PROTOVER \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_PROTOVER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_PROTOSUBVER \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_OPENMOUNT \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_OPENMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_CLOSEMOUNT \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_READY \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_READY_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_FAIL \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_FAIL_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_SETPIPEFD \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_SETPIPEFD_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_CATATONIC \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_CATATONIC_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_TIMEOUT \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_TIMEOUT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_REQUESTER \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_REQUESTER_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_EXPIRE \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_EXPIRE_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_ASKUMOUNT \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD, struct autofs_dev_ioctl)
+
+#define AUTOFS_DEV_IOCTL_ISMOUNTPOINT \
+	_IOWR(AUTOFS_IOCTL, \
+	      AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD, struct autofs_dev_ioctl)
+
+#endif	/* _LINUX_AUTO_DEV_IOCTL_H */
diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h
index aa96a04ae023..2253716d4b92 100644
--- a/include/linux/auto_fs4.h
+++ b/include/linux/auto_fs4.h
@@ -23,7 +23,7 @@
 #define AUTOFS_MIN_PROTO_VERSION	3
 #define AUTOFS_MAX_PROTO_VERSION	5
 
-#define AUTOFS_PROTO_SUBVERSION		0
+#define AUTOFS_PROTO_SUBVERSION		1
 
 /* Mask for expire behaviour */
 #define AUTOFS_EXP_IMMEDIATE		1
-- 
cgit v1.2.3


From 3d599d1ca57f443e5c4ff5af1e69d90350082f77 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de>
Date: Wed, 15 Oct 2008 22:03:12 -0700
Subject: gpio_free might sleep, generic part
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to the documentation gpio_free should only be called from task
context only.  To make this more explicit add a might sleep to all
implementations.

This is the generic part which changes gpiolib and the fallback
implementation only.

Signed-off-by: Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/gpiolib.c | 2 ++
 include/linux/gpio.h   | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 8d2940517c99..317004fd94fb 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -792,6 +792,8 @@ void gpio_free(unsigned gpio)
 	unsigned long		flags;
 	struct gpio_desc	*desc;
 
+	might_sleep();
+
 	if (!gpio_is_valid(gpio)) {
 		WARN_ON(extra_checks);
 		return;
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 730a20b83576..e10c49a5b96e 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -8,6 +8,7 @@
 
 #else
 
+#include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 
@@ -32,6 +33,8 @@ static inline int gpio_request(unsigned gpio, const char *label)
 
 static inline void gpio_free(unsigned gpio)
 {
+	might_sleep();
+
 	/* GPIO can never have been requested */
 	WARN_ON(1);
 }
-- 
cgit v1.2.3


From e3a1938805d2e81b27d3d348788644f3bad004f2 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Wed, 15 Oct 2008 22:03:52 -0700
Subject: matroxfb: support G200eV chip

Support the Matrox G200eV chip, based on timings that I found in the X.org
matrox driver.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: Petr Vandrovec <vandrove@vc.cvut.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/matrox/matroxfb_base.c | 9 +++++++++
 include/linux/pci_ids.h              | 1 +
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/matrox/matroxfb_base.c b/drivers/video/matrox/matroxfb_base.c
index c02136202792..8e7a275df50c 100644
--- a/drivers/video/matrox/matroxfb_base.c
+++ b/drivers/video/matrox/matroxfb_base.c
@@ -1453,6 +1453,13 @@ static struct board {
 		MGA_G100,
 		&vbG100,
 		"MGA-G100 (AGP)"},
+	{PCI_VENDOR_ID_MATROX,	PCI_DEVICE_ID_MATROX_G200EV_PCI,	0xFF,
+		0,			0,
+		DEVF_G200,
+		230000,
+		MGA_G200,
+		&vbG200,
+		"MGA-G200eV (PCI)"},
 	{PCI_VENDOR_ID_MATROX,	PCI_DEVICE_ID_MATROX_G200_PCI,	0xFF,
 		0,			0,
 		DEVF_G200,
@@ -2118,6 +2125,8 @@ static struct pci_device_id matroxfb_devices[] = {
 		PCI_ANY_ID,	PCI_ANY_ID,	0, 0, 0},
 	{PCI_VENDOR_ID_MATROX,	PCI_DEVICE_ID_MATROX_G100_AGP,
 		PCI_ANY_ID,	PCI_ANY_ID,	0, 0, 0},
+	{PCI_VENDOR_ID_MATROX,	PCI_DEVICE_ID_MATROX_G200EV_PCI,
+		PCI_ANY_ID,	PCI_ANY_ID,	0, 0, 0},
 	{PCI_VENDOR_ID_MATROX,	PCI_DEVICE_ID_MATROX_G200_PCI,
 		PCI_ANY_ID,	PCI_ANY_ID,	0, 0, 0},
 	{PCI_VENDOR_ID_MATROX,	PCI_DEVICE_ID_MATROX_G200_AGP,
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 1176f1f177e2..8edddc240e4f 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -587,6 +587,7 @@
 #define PCI_DEVICE_ID_MATROX_G200_PCI	0x0520
 #define PCI_DEVICE_ID_MATROX_G200_AGP	0x0521
 #define	PCI_DEVICE_ID_MATROX_G400	0x0525
+#define	PCI_DEVICE_ID_MATROX_G200EV_PCI	0x0530
 #define PCI_DEVICE_ID_MATROX_G550	0x2527
 #define PCI_DEVICE_ID_MATROX_VIA	0x4536
 
-- 
cgit v1.2.3


From b53cde3557b8f97e6a635782875d442551a89bf1 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Wed, 15 Oct 2008 22:03:55 -0700
Subject: fbdev: add new TMIO framebuffer driver

Add driver for TMIO framebuffer cells as found e.g. in Toshiba TC6393XB
chips.

Signed-off-by: Dmitry Baryshkov <dbaryshkov@gmail.com>
Cc: Ian Molton <spyro@f2s.com>
Acked-by: Samuel Ortiz <sameo@openedhand.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/Kconfig    |   22 +
 drivers/video/Makefile   |    1 +
 drivers/video/tmiofb.c   | 1050 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/tmio.h |   19 +
 4 files changed, 1092 insertions(+)
 create mode 100644 drivers/video/tmiofb.c

(limited to 'include/linux')

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 6210755cf6ab..0f13448c6f79 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1896,6 +1896,28 @@ config FB_SH_MOBILE_LCDC
 	---help---
 	  Frame buffer driver for the on-chip SH-Mobile LCD controller.
 
+config FB_TMIO
+	tristate "Toshiba Mobile IO FrameBuffer support"
+	depends on FB && MFD_CORE
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	---help---
+	  Frame buffer driver for the Toshiba Mobile IO integrated as found
+	  on the Sharp SL-6000 series
+
+	  This driver is also available as a module ( = code which can be
+	  inserted and removed from the running kernel whenever you want). The
+	  module will be called tmiofb. If you want to compile it as a module,
+	  say M here and read <file:Documentation/kbuild/modules.txt>.
+
+	  If unsure, say N.
+
+config FB_TMIO_ACCELL
+	bool "tmiofb acceleration"
+	depends on FB_TMIO
+	default y
+
 config FB_S3C2410
 	tristate "S3C2410 LCD framebuffer support"
 	depends on FB && ARCH_S3C2410
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 2bc94d8eb3c8..248bddc8d0b0 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -98,6 +98,7 @@ obj-$(CONFIG_FB_CIRRUS)		  += cirrusfb.o
 obj-$(CONFIG_FB_ASILIANT)	  += asiliantfb.o
 obj-$(CONFIG_FB_PXA)		  += pxafb.o
 obj-$(CONFIG_FB_W100)		  += w100fb.o
+obj-$(CONFIG_FB_TMIO)		  += tmiofb.o
 obj-$(CONFIG_FB_AU1100)		  += au1100fb.o
 obj-$(CONFIG_FB_AU1200)		  += au1200fb.o
 obj-$(CONFIG_FB_PMAG_AA)	  += pmag-aa-fb.o
diff --git a/drivers/video/tmiofb.c b/drivers/video/tmiofb.c
new file mode 100644
index 000000000000..2a380011e9ba
--- /dev/null
+++ b/drivers/video/tmiofb.c
@@ -0,0 +1,1050 @@
+/*
+ * Frame Buffer Device for Toshiba Mobile IO(TMIO) controller
+ *
+ * Copyright(C) 2005-2006 Chris Humbert
+ * Copyright(C) 2005 Dirk Opfer
+ * Copytight(C) 2007,2008 Dmitry Baryshkov
+ *
+ * Based on:
+ *	drivers/video/w100fb.c
+ *	code written by Sharp/Lineo for 2.4 kernels
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/fb.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+/* Why should fb driver call console functions? because acquire_console_sem() */
+#include <linux/console.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/tmio.h>
+#include <linux/uaccess.h>
+
+/*
+ * accelerator commands
+ */
+#define TMIOFB_ACC_CSADR(x)	(0x00000000 | ((x) & 0x001ffffe))
+#define TMIOFB_ACC_CHPIX(x)	(0x01000000 | ((x) & 0x000003ff))
+#define TMIOFB_ACC_CVPIX(x)	(0x02000000 | ((x) & 0x000003ff))
+#define TMIOFB_ACC_PSADR(x)	(0x03000000 | ((x) & 0x00fffffe))
+#define TMIOFB_ACC_PHPIX(x)	(0x04000000 | ((x) & 0x000003ff))
+#define TMIOFB_ACC_PVPIX(x)	(0x05000000 | ((x) & 0x000003ff))
+#define TMIOFB_ACC_PHOFS(x)	(0x06000000 | ((x) & 0x000003ff))
+#define TMIOFB_ACC_PVOFS(x)	(0x07000000 | ((x) & 0x000003ff))
+#define TMIOFB_ACC_POADR(x)	(0x08000000 | ((x) & 0x00fffffe))
+#define TMIOFB_ACC_RSTR(x)	(0x09000000 | ((x) & 0x000000ff))
+#define TMIOFB_ACC_TCLOR(x)	(0x0A000000 | ((x) & 0x0000ffff))
+#define TMIOFB_ACC_FILL(x)	(0x0B000000 | ((x) & 0x0000ffff))
+#define TMIOFB_ACC_DSADR(x)	(0x0C000000 | ((x) & 0x00fffffe))
+#define TMIOFB_ACC_SSADR(x)	(0x0D000000 | ((x) & 0x00fffffe))
+#define TMIOFB_ACC_DHPIX(x)	(0x0E000000 | ((x) & 0x000003ff))
+#define TMIOFB_ACC_DVPIX(x)	(0x0F000000 | ((x) & 0x000003ff))
+#define TMIOFB_ACC_SHPIX(x)	(0x10000000 | ((x) & 0x000003ff))
+#define TMIOFB_ACC_SVPIX(x)	(0x11000000 | ((x) & 0x000003ff))
+#define TMIOFB_ACC_LBINI(x)	(0x12000000 | ((x) & 0x0000ffff))
+#define TMIOFB_ACC_LBK2(x)	(0x13000000 | ((x) & 0x0000ffff))
+#define TMIOFB_ACC_SHBINI(x)	(0x14000000 | ((x) & 0x0000ffff))
+#define TMIOFB_ACC_SHBK2(x)	(0x15000000 | ((x) & 0x0000ffff))
+#define TMIOFB_ACC_SVBINI(x)	(0x16000000 | ((x) & 0x0000ffff))
+#define TMIOFB_ACC_SVBK2(x)	(0x17000000 | ((x) & 0x0000ffff))
+
+#define TMIOFB_ACC_CMGO		0x20000000
+#define TMIOFB_ACC_CMGO_CEND	0x00000001
+#define TMIOFB_ACC_CMGO_INT	0x00000002
+#define TMIOFB_ACC_CMGO_CMOD	0x00000010
+#define TMIOFB_ACC_CMGO_CDVRV	0x00000020
+#define TMIOFB_ACC_CMGO_CDHRV	0x00000040
+#define TMIOFB_ACC_CMGO_RUND	0x00008000
+#define TMIOFB_ACC_SCGO		0x21000000
+#define TMIOFB_ACC_SCGO_CEND	0x00000001
+#define TMIOFB_ACC_SCGO_INT	0x00000002
+#define TMIOFB_ACC_SCGO_ROP3	0x00000004
+#define TMIOFB_ACC_SCGO_TRNS	0x00000008
+#define TMIOFB_ACC_SCGO_DVRV	0x00000010
+#define TMIOFB_ACC_SCGO_DHRV	0x00000020
+#define TMIOFB_ACC_SCGO_SVRV	0x00000040
+#define TMIOFB_ACC_SCGO_SHRV	0x00000080
+#define TMIOFB_ACC_SCGO_DSTXY	0x00008000
+#define TMIOFB_ACC_SBGO		0x22000000
+#define TMIOFB_ACC_SBGO_CEND	0x00000001
+#define TMIOFB_ACC_SBGO_INT	0x00000002
+#define TMIOFB_ACC_SBGO_DVRV	0x00000010
+#define TMIOFB_ACC_SBGO_DHRV	0x00000020
+#define TMIOFB_ACC_SBGO_SVRV	0x00000040
+#define TMIOFB_ACC_SBGO_SHRV	0x00000080
+#define TMIOFB_ACC_SBGO_SBMD	0x00000100
+#define TMIOFB_ACC_FLGO		0x23000000
+#define TMIOFB_ACC_FLGO_CEND	0x00000001
+#define TMIOFB_ACC_FLGO_INT	0x00000002
+#define TMIOFB_ACC_FLGO_ROP3	0x00000004
+#define TMIOFB_ACC_LDGO		0x24000000
+#define TMIOFB_ACC_LDGO_CEND	0x00000001
+#define TMIOFB_ACC_LDGO_INT	0x00000002
+#define TMIOFB_ACC_LDGO_ROP3	0x00000004
+#define TMIOFB_ACC_LDGO_ENDPX	0x00000008
+#define TMIOFB_ACC_LDGO_LVRV	0x00000010
+#define TMIOFB_ACC_LDGO_LHRV	0x00000020
+#define TMIOFB_ACC_LDGO_LDMOD	0x00000040
+
+/* a FIFO is always allocated, even if acceleration is not used */
+#define TMIOFB_FIFO_SIZE	512
+
+/*
+ * LCD Host Controller Configuration Register
+ *
+ * This iomem area supports only 16-bit IO.
+ */
+#define CCR_CMD			0x04 /* Command				*/
+#define CCR_REVID		0x08 /* Revision ID			*/
+#define CCR_BASEL		0x10 /* LCD Control Reg Base Addr Low	*/
+#define CCR_BASEH		0x12 /* LCD Control Reg Base Addr High	*/
+#define CCR_UGCC		0x40 /* Unified Gated Clock Control	*/
+#define CCR_GCC			0x42 /* Gated Clock Control		*/
+#define CCR_USC			0x50 /* Unified Software Clear		*/
+#define CCR_VRAMRTC		0x60 /* VRAM Timing Control		*/
+				/* 0x61 VRAM Refresh Control		*/
+#define CCR_VRAMSAC		0x62 /* VRAM Access Control		*/
+				/* 0x63	VRAM Status			*/
+#define CCR_VRAMBC		0x64 /* VRAM Block Control		*/
+
+/*
+ * LCD Control Register
+ *
+ * This iomem area supports only 16-bit IO.
+ */
+#define LCR_UIS			0x000 /* Unified Interrupt Status	*/
+#define LCR_VHPN		0x008 /* VRAM Horizontal Pixel Number	*/
+#define LCR_CFSAL		0x00a /* Command FIFO Start Address Low	*/
+#define LCR_CFSAH		0x00c /* Command FIFO Start Address High */
+#define LCR_CFS			0x00e /* Command FIFO Size		*/
+#define LCR_CFWS		0x010 /* Command FIFO Writeable Size	*/
+#define LCR_BBIE		0x012 /* BitBLT Interrupt Enable	*/
+#define LCR_BBISC		0x014 /* BitBLT Interrupt Status and Clear */
+#define LCR_CCS			0x016 /* Command Count Status		*/
+#define LCR_BBES		0x018 /* BitBLT Execution Status	*/
+#define LCR_CMDL		0x01c /* Command Low			*/
+#define LCR_CMDH		0x01e /* Command High			*/
+#define LCR_CFC			0x022 /* Command FIFO Clear		*/
+#define LCR_CCIFC		0x024 /* CMOS Camera IF Control		*/
+#define LCR_HWT			0x026 /* Hardware Test			*/
+#define LCR_LCDCCRC		0x100 /* LCDC Clock and Reset Control	*/
+#define LCR_LCDCC		0x102 /* LCDC Control			*/
+#define LCR_LCDCOPC		0x104 /* LCDC Output Pin Control	*/
+#define LCR_LCDIS		0x108 /* LCD Interrupt Status		*/
+#define LCR_LCDIM		0x10a /* LCD Interrupt Mask		*/
+#define LCR_LCDIE		0x10c /* LCD Interrupt Enable		*/
+#define LCR_GDSAL		0x122 /* Graphics Display Start Address Low */
+#define LCR_GDSAH		0x124 /* Graphics Display Start Address High */
+#define LCR_VHPCL		0x12a /* VRAM Horizontal Pixel Count Low */
+#define LCR_VHPCH		0x12c /* VRAM Horizontal Pixel Count High */
+#define LCR_GM			0x12e /* Graphic Mode(VRAM access enable) */
+#define LCR_HT			0x140 /* Horizontal Total		*/
+#define LCR_HDS			0x142 /* Horizontal Display Start	*/
+#define LCR_HSS			0x144 /* H-Sync Start			*/
+#define LCR_HSE			0x146 /* H-Sync End			*/
+#define LCR_HNP			0x14c /* Horizontal Number of Pixels	*/
+#define LCR_VT			0x150 /* Vertical Total			*/
+#define LCR_VDS			0x152 /* Vertical Display Start		*/
+#define LCR_VSS			0x154 /* V-Sync Start			*/
+#define LCR_VSE			0x156 /* V-Sync End			*/
+#define LCR_CDLN		0x160 /* Current Display Line Number	*/
+#define LCR_ILN			0x162 /* Interrupt Line Number		*/
+#define LCR_SP			0x164 /* Sync Polarity			*/
+#define LCR_MISC		0x166 /* MISC(RGB565 mode)		*/
+#define LCR_VIHSS		0x16a /* Video Interface H-Sync Start	*/
+#define LCR_VIVS		0x16c /* Video Interface Vertical Start	*/
+#define LCR_VIVE		0x16e /* Video Interface Vertical End	*/
+#define LCR_VIVSS		0x170 /* Video Interface V-Sync Start	*/
+#define LCR_VCCIS		0x17e /* Video / CMOS Camera Interface Select */
+#define LCR_VIDWSAL		0x180 /* VI Data Write Start Address Low */
+#define LCR_VIDWSAH		0x182 /* VI Data Write Start Address High */
+#define LCR_VIDRSAL		0x184 /* VI Data Read Start Address Low	*/
+#define LCR_VIDRSAH		0x186 /* VI Data Read Start Address High */
+#define LCR_VIPDDST		0x188 /* VI Picture Data Display Start Timing */
+#define LCR_VIPDDET		0x186 /* VI Picture Data Display End Timing */
+#define LCR_VIE			0x18c /* Video Interface Enable		*/
+#define LCR_VCS			0x18e /* Video/Camera Select		*/
+#define LCR_VPHWC		0x194 /* Video Picture Horizontal Wait Count */
+#define LCR_VPHS		0x196 /* Video Picture Horizontal Size	*/
+#define LCR_VPVWC		0x198 /* Video Picture Vertical Wait Count */
+#define LCR_VPVS		0x19a /* Video Picture Vertical Size	*/
+#define LCR_PLHPIX		0x1a0 /* PLHPIX				*/
+#define LCR_XS			0x1a2 /* XStart				*/
+#define LCR_XCKHW		0x1a4 /* XCK High Width			*/
+#define LCR_STHS		0x1a8 /* STH Start			*/
+#define LCR_VT2			0x1aa /* Vertical Total			*/
+#define LCR_YCKSW		0x1ac /* YCK Start Wait			*/
+#define LCR_YSTS		0x1ae /* YST Start			*/
+#define LCR_PPOLS		0x1b0 /* #PPOL Start			*/
+#define LCR_PRECW		0x1b2 /* PREC Width			*/
+#define LCR_VCLKHW		0x1b4 /* VCLK High Width		*/
+#define LCR_OC			0x1b6 /* Output Control			*/
+
+static char *mode_option __devinitdata;
+
+struct tmiofb_par {
+	u32				pseudo_palette[16];
+
+#ifdef CONFIG_FB_TMIO_ACCELL
+	wait_queue_head_t		wait_acc;
+	bool				use_polling;
+#endif
+
+	void __iomem			*ccr;
+	void __iomem			*lcr;
+};
+
+/*--------------------------------------------------------------------------*/
+
+/*
+ * reasons for an interrupt:
+ *	uis	bbisc	lcdis
+ *	0100	0001	accelerator command completed
+ * 	2000	0001	vsync start
+ * 	2000	0002	display start
+ * 	2000	0004	line number match(0x1ff mask???)
+ */
+static irqreturn_t tmiofb_irq(int irq, void *__info)
+{
+	struct fb_info *info = __info;
+	struct tmiofb_par *par = info->par;
+	unsigned int bbisc = tmio_ioread16(par->lcr + LCR_BBISC);
+
+
+	/*
+	 * We were in polling mode and now we got correct irq.
+	 * Switch back to IRQ-based sync of command FIFO
+	 */
+	if (unlikely(par->use_polling && irq != -1)) {
+		printk(KERN_INFO "tmiofb: switching to waitq\n");
+		par->use_polling = false;
+	}
+
+	tmio_iowrite16(bbisc, par->lcr + LCR_BBISC);
+
+#ifdef CONFIG_FB_TMIO_ACCELL
+	if (bbisc & 1)
+		wake_up(&par->wait_acc);
+#endif
+
+	return IRQ_HANDLED;
+}
+
+
+/*--------------------------------------------------------------------------*/
+
+
+/*
+ * Turns off the LCD controller and LCD host controller.
+ */
+static int tmiofb_hw_stop(struct platform_device *dev)
+{
+	struct mfd_cell *cell = dev->dev.platform_data;
+	struct tmio_fb_data *data = cell->driver_data;
+	struct fb_info *info = platform_get_drvdata(dev);
+	struct tmiofb_par *par = info->par;
+
+	tmio_iowrite16(0, par->ccr + CCR_UGCC);
+	tmio_iowrite16(0, par->lcr + LCR_GM);
+	data->lcd_set_power(dev, 0);
+	tmio_iowrite16(0x0010, par->lcr + LCR_LCDCCRC);
+
+	return 0;
+}
+
+/*
+ * Initializes the LCD host controller.
+ */
+static int tmiofb_hw_init(struct platform_device *dev)
+{
+	struct mfd_cell *cell = dev->dev.platform_data;
+	struct fb_info *info = platform_get_drvdata(dev);
+	struct tmiofb_par *par = info->par;
+	const struct resource *nlcr = &cell->resources[0];
+	const struct resource *vram = &cell->resources[2];
+	unsigned long base;
+
+	if (nlcr == NULL || vram == NULL)
+		return -EINVAL;
+
+	base = nlcr->start;
+
+	tmio_iowrite16(0x003a, par->ccr + CCR_UGCC);
+	tmio_iowrite16(0x003a, par->ccr + CCR_GCC);
+	tmio_iowrite16(0x3f00, par->ccr + CCR_USC);
+
+	msleep(2); /* wait for device to settle */
+
+	tmio_iowrite16(0x0000, par->ccr + CCR_USC);
+	tmio_iowrite16(base >> 16, par->ccr + CCR_BASEH);
+	tmio_iowrite16(base, par->ccr + CCR_BASEL);
+	tmio_iowrite16(0x0002, par->ccr + CCR_CMD); /* base address enable */
+	tmio_iowrite16(0x40a8, par->ccr + CCR_VRAMRTC); /* VRAMRC, VRAMTC */
+	tmio_iowrite16(0x0018, par->ccr + CCR_VRAMSAC); /* VRAMSTS, VRAMAC */
+	tmio_iowrite16(0x0002, par->ccr + CCR_VRAMBC);
+	msleep(2); /* wait for device to settle */
+	tmio_iowrite16(0x000b, par->ccr + CCR_VRAMBC);
+
+	base = vram->start + info->screen_size;
+	tmio_iowrite16(base >> 16, par->lcr + LCR_CFSAH);
+	tmio_iowrite16(base, par->lcr + LCR_CFSAL);
+	tmio_iowrite16(TMIOFB_FIFO_SIZE - 1, par->lcr + LCR_CFS);
+	tmio_iowrite16(1, par->lcr + LCR_CFC);
+	tmio_iowrite16(1, par->lcr + LCR_BBIE);
+	tmio_iowrite16(0, par->lcr + LCR_CFWS);
+
+	return 0;
+}
+
+/*
+ * Sets the LCD controller's output resolution and pixel clock
+ */
+static void tmiofb_hw_mode(struct platform_device *dev)
+{
+	struct mfd_cell *cell = dev->dev.platform_data;
+	struct tmio_fb_data *data = cell->driver_data;
+	struct fb_info *info = platform_get_drvdata(dev);
+	struct fb_videomode *mode = info->mode;
+	struct tmiofb_par *par = info->par;
+	unsigned int i;
+
+	tmio_iowrite16(0, par->lcr + LCR_GM);
+	data->lcd_set_power(dev, 0);
+	tmio_iowrite16(0x0010, par->lcr + LCR_LCDCCRC);
+	data->lcd_mode(dev, mode);
+	data->lcd_set_power(dev, 1);
+
+	tmio_iowrite16(info->fix.line_length, par->lcr + LCR_VHPN);
+	tmio_iowrite16(0, par->lcr + LCR_GDSAH);
+	tmio_iowrite16(0, par->lcr + LCR_GDSAL);
+	tmio_iowrite16(info->fix.line_length >> 16, par->lcr + LCR_VHPCH);
+	tmio_iowrite16(info->fix.line_length, par->lcr + LCR_VHPCL);
+	tmio_iowrite16(i = 0, par->lcr + LCR_HSS);
+	tmio_iowrite16(i += mode->hsync_len, par->lcr + LCR_HSE);
+	tmio_iowrite16(i += mode->left_margin, par->lcr + LCR_HDS);
+	tmio_iowrite16(i += mode->xres + mode->right_margin, par->lcr + LCR_HT);
+	tmio_iowrite16(mode->xres, par->lcr + LCR_HNP);
+	tmio_iowrite16(i = 0, par->lcr + LCR_VSS);
+	tmio_iowrite16(i += mode->vsync_len, par->lcr + LCR_VSE);
+	tmio_iowrite16(i += mode->upper_margin, par->lcr + LCR_VDS);
+	tmio_iowrite16(i += mode->yres, par->lcr + LCR_ILN);
+	tmio_iowrite16(i += mode->lower_margin, par->lcr + LCR_VT);
+	tmio_iowrite16(3, par->lcr + LCR_MISC); /* RGB565 mode */
+	tmio_iowrite16(1, par->lcr + LCR_GM); /* VRAM enable */
+	tmio_iowrite16(0x4007, par->lcr + LCR_LCDCC);
+	tmio_iowrite16(3, par->lcr + LCR_SP);  /* sync polarity */
+
+	tmio_iowrite16(0x0010, par->lcr + LCR_LCDCCRC);
+	msleep(5); /* wait for device to settle */
+	tmio_iowrite16(0x0014, par->lcr + LCR_LCDCCRC); /* STOP_CKP */
+	msleep(5); /* wait for device to settle */
+	tmio_iowrite16(0x0015, par->lcr + LCR_LCDCCRC); /* STOP_CKP|SOFT_RESET*/
+	tmio_iowrite16(0xfffa, par->lcr + LCR_VCS);
+}
+
+/*--------------------------------------------------------------------------*/
+
+#ifdef CONFIG_FB_TMIO_ACCELL
+static int __must_check
+tmiofb_acc_wait(struct fb_info *info, unsigned int ccs)
+{
+	struct tmiofb_par *par = info->par;
+	/*
+	 * This code can be called whith interrupts disabled.
+	 * So instead of relaying on irq to trigger the event,
+	 * poll the state till the necessary command is executed.
+	 */
+	if (irqs_disabled() || par->use_polling) {
+		int i = 0;
+		while (tmio_ioread16(par->lcr + LCR_CCS) > ccs) {
+			udelay(1);
+			i++;
+			if (i > 10000) {
+				pr_err("tmiofb: timeout waiting for %d\n",
+						ccs);
+				return -ETIMEDOUT;
+			}
+			tmiofb_irq(-1, info);
+		}
+	} else {
+		if (!wait_event_interruptible_timeout(par->wait_acc,
+				tmio_ioread16(par->lcr + LCR_CCS) <= ccs,
+				1000)) {
+			pr_err("tmiofb: timeout waiting for %d\n", ccs);
+			return -ETIMEDOUT;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Writes an accelerator command to the accelerator's FIFO.
+ */
+static int
+tmiofb_acc_write(struct fb_info *info, const u32 *cmd, unsigned int count)
+{
+	struct tmiofb_par *par = info->par;
+	int ret;
+
+	ret = tmiofb_acc_wait(info, TMIOFB_FIFO_SIZE - count);
+	if (ret)
+		return ret;
+
+	for (; count; count--, cmd++) {
+		tmio_iowrite16(*cmd >> 16, par->lcr + LCR_CMDH);
+		tmio_iowrite16(*cmd, par->lcr + LCR_CMDL);
+	}
+
+	return ret;
+}
+
+/*
+ * Wait for the accelerator to finish its operations before writing
+ * to the framebuffer for consistent display output.
+ */
+static int tmiofb_sync(struct fb_info *fbi)
+{
+	struct tmiofb_par *par = fbi->par;
+
+	int ret;
+	int i = 0;
+
+	ret = tmiofb_acc_wait(fbi, 0);
+
+	while (tmio_ioread16(par->lcr + LCR_BBES) & 2) { /* blit active */
+		udelay(1);
+		i++ ;
+		if (i > 10000) {
+			printk(KERN_ERR "timeout waiting for blit to end!\n");
+			return -ETIMEDOUT;
+		}
+	}
+
+	return ret;
+}
+
+static void
+tmiofb_fillrect(struct fb_info *fbi, const struct fb_fillrect *rect)
+{
+	const u32 cmd[] = {
+		TMIOFB_ACC_DSADR((rect->dy * fbi->mode->xres + rect->dx) * 2),
+		TMIOFB_ACC_DHPIX(rect->width - 1),
+		TMIOFB_ACC_DVPIX(rect->height - 1),
+		TMIOFB_ACC_FILL(rect->color),
+		TMIOFB_ACC_FLGO,
+	};
+
+	if (fbi->state != FBINFO_STATE_RUNNING ||
+	    fbi->flags & FBINFO_HWACCEL_DISABLED) {
+		cfb_fillrect(fbi, rect);
+		return;
+	}
+
+	tmiofb_acc_write(fbi, cmd, ARRAY_SIZE(cmd));
+}
+
+static void
+tmiofb_copyarea(struct fb_info *fbi, const struct fb_copyarea *area)
+{
+	const u32 cmd[] = {
+		TMIOFB_ACC_DSADR((area->dy * fbi->mode->xres + area->dx) * 2),
+		TMIOFB_ACC_DHPIX(area->width - 1),
+		TMIOFB_ACC_DVPIX(area->height - 1),
+		TMIOFB_ACC_SSADR((area->sy * fbi->mode->xres + area->sx) * 2),
+		TMIOFB_ACC_SCGO,
+	};
+
+	if (fbi->state != FBINFO_STATE_RUNNING ||
+	    fbi->flags & FBINFO_HWACCEL_DISABLED) {
+		cfb_copyarea(fbi, area);
+		return;
+	}
+
+	tmiofb_acc_write(fbi, cmd, ARRAY_SIZE(cmd));
+}
+#endif
+
+static void tmiofb_clearscreen(struct fb_info *info)
+{
+	const struct fb_fillrect rect = {
+		.dx	= 0,
+		.dy	= 0,
+		.width	= info->mode->xres,
+		.height	= info->mode->yres,
+		.color	= 0,
+		.rop	= ROP_COPY,
+	};
+
+	info->fbops->fb_fillrect(info, &rect);
+}
+
+static int tmiofb_vblank(struct fb_info *fbi, struct fb_vblank *vblank)
+{
+	struct tmiofb_par *par = fbi->par;
+	struct fb_videomode *mode = fbi->mode;
+	unsigned int vcount = tmio_ioread16(par->lcr + LCR_CDLN);
+	unsigned int vds = mode->vsync_len + mode->upper_margin;
+
+	vblank->vcount = vcount;
+	vblank->flags = FB_VBLANK_HAVE_VBLANK | FB_VBLANK_HAVE_VCOUNT
+						| FB_VBLANK_HAVE_VSYNC;
+
+	if (vcount < mode->vsync_len)
+		vblank->flags |= FB_VBLANK_VSYNCING;
+
+	if (vcount < vds || vcount > vds + mode->yres)
+		vblank->flags |= FB_VBLANK_VBLANKING;
+
+	return 0;
+}
+
+
+static int tmiofb_ioctl(struct fb_info *fbi,
+		unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case FBIOGET_VBLANK: {
+		struct fb_vblank vblank = {0};
+		void __user *argp = (void __user *) arg;
+
+		tmiofb_vblank(fbi, &vblank);
+		if (copy_to_user(argp, &vblank, sizeof vblank))
+			return -EFAULT;
+		return 0;
+	}
+
+#ifdef CONFIG_FB_TMIO_ACCELL
+	case FBIO_TMIO_ACC_SYNC:
+		tmiofb_sync(fbi);
+		return 0;
+
+	case FBIO_TMIO_ACC_WRITE: {
+		u32 __user *argp = (void __user *) arg;
+		u32 len;
+		u32 acc[16];
+
+		if (get_user(len, argp))
+			return -EFAULT;
+		if (len > ARRAY_SIZE(acc))
+			return -EINVAL;
+		if (copy_from_user(acc, argp + 1, sizeof(u32) * len))
+			return -EFAULT;
+
+		return tmiofb_acc_write(fbi, acc, len);
+	}
+#endif
+	}
+
+	return -ENOTTY;
+}
+
+/*--------------------------------------------------------------------------*/
+
+/* Select the smallest mode that allows the desired resolution to be
+ * displayed.  If desired, the x and y parameters can be rounded up to
+ * match the selected mode.
+ */
+static struct fb_videomode *
+tmiofb_find_mode(struct fb_info *info, struct fb_var_screeninfo *var)
+{
+	struct mfd_cell *cell =
+		info->device->platform_data;
+	struct tmio_fb_data *data = cell->driver_data;
+	struct fb_videomode *best = NULL;
+	int i;
+
+	for (i = 0; i < data->num_modes; i++) {
+		struct fb_videomode *mode = data->modes + i;
+
+		if (mode->xres >= var->xres && mode->yres >= var->yres
+				&& (!best || (mode->xres < best->xres
+					   && mode->yres < best->yres)))
+			best = mode;
+	}
+
+	return best;
+}
+
+static int tmiofb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+
+	struct fb_videomode *mode;
+	struct mfd_cell *cell =
+		info->device->platform_data;
+	struct tmio_fb_data *data = cell->driver_data;
+
+	mode = tmiofb_find_mode(info, var);
+	if (!mode || var->bits_per_pixel > 16)
+		return -EINVAL;
+
+	fb_videomode_to_var(var, mode);
+
+	var->xres_virtual = mode->xres;
+	var->yres_virtual = info->screen_size / (mode->xres * 2);
+
+	if (var->yres_virtual < var->yres)
+		return -EINVAL;
+
+	var->xoffset = 0;
+	var->yoffset = 0;
+	var->bits_per_pixel = 16;
+	var->grayscale = 0;
+	var->red.offset = 11;
+	var->red.length = 5;
+	var->green.offset = 5;
+	var->green.length = 6;
+	var->blue.offset = 0;
+	var->blue.length = 5;
+	var->transp.offset = 0;
+	var->transp.length = 0;
+	var->nonstd = 0;
+	var->height = data->height; /* mm */
+	var->width = data->width; /* mm */
+	var->rotate = 0;
+	return 0;
+}
+
+static int tmiofb_set_par(struct fb_info *info)
+{
+	struct fb_var_screeninfo *var = &info->var;
+	struct fb_videomode *mode;
+
+	mode = tmiofb_find_mode(info, var);
+	if (!mode)
+		return -EINVAL;
+
+	info->mode = mode;
+	info->fix.line_length = info->mode->xres *
+			var->bits_per_pixel / 8;
+
+	tmiofb_hw_mode(to_platform_device(info->device));
+	tmiofb_clearscreen(info);
+	return 0;
+}
+
+static int tmiofb_setcolreg(unsigned regno, unsigned red, unsigned green,
+			   unsigned blue, unsigned transp,
+			   struct fb_info *info)
+{
+	struct tmiofb_par *par = info->par;
+
+	if (regno < ARRAY_SIZE(par->pseudo_palette)) {
+		par->pseudo_palette[regno] =
+			((red & 0xf800)) |
+			((green & 0xfc00) >>  5) |
+			((blue & 0xf800) >> 11);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int tmiofb_blank(int blank, struct fb_info *info)
+{
+	/*
+	 * everything is done in lcd/bl drivers.
+	 * this is purely to make sysfs happy and work.
+	 */
+	return 0;
+}
+
+static struct fb_ops tmiofb_ops = {
+	.owner		= THIS_MODULE,
+
+	.fb_ioctl	= tmiofb_ioctl,
+	.fb_check_var	= tmiofb_check_var,
+	.fb_set_par	= tmiofb_set_par,
+	.fb_setcolreg	= tmiofb_setcolreg,
+	.fb_blank	= tmiofb_blank,
+	.fb_imageblit	= cfb_imageblit,
+#ifdef CONFIG_FB_TMIO_ACCELL
+	.fb_sync	= tmiofb_sync,
+	.fb_fillrect	= tmiofb_fillrect,
+	.fb_copyarea	= tmiofb_copyarea,
+#else
+	.fb_fillrect	= cfb_fillrect,
+	.fb_copyarea	= cfb_copyarea,
+#endif
+};
+
+/*--------------------------------------------------------------------------*/
+
+static int __devinit tmiofb_probe(struct platform_device *dev)
+{
+	struct mfd_cell *cell = dev->dev.platform_data;
+	struct tmio_fb_data *data = cell->driver_data;
+	struct resource *ccr = platform_get_resource(dev, IORESOURCE_MEM, 1);
+	struct resource *lcr = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	struct resource *vram = platform_get_resource(dev, IORESOURCE_MEM, 2);
+	int irq = platform_get_irq(dev, 0);
+	struct fb_info *info;
+	struct tmiofb_par *par;
+	int retval;
+
+	/*
+	 * This is the only way ATM to disable the fb
+	 */
+	if (data == NULL) {
+		dev_err(&dev->dev, "NULL platform data!\n");
+		return -EINVAL;
+	}
+
+	info = framebuffer_alloc(sizeof(struct tmiofb_par), &dev->dev);
+
+	if (!info)
+		return -ENOMEM;
+
+	par = info->par;
+
+#ifdef CONFIG_FB_TMIO_ACCELL
+	init_waitqueue_head(&par->wait_acc);
+
+	par->use_polling = true;
+
+	info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA
+			| FBINFO_HWACCEL_FILLRECT;
+#else
+	info->flags = FBINFO_DEFAULT;
+#endif
+
+	info->fbops = &tmiofb_ops;
+
+	strcpy(info->fix.id, "tmio-fb");
+	info->fix.smem_start = vram->start;
+	info->fix.smem_len = resource_size(vram);
+	info->fix.type = FB_TYPE_PACKED_PIXELS;
+	info->fix.visual = FB_VISUAL_TRUECOLOR;
+	info->fix.mmio_start = lcr->start;
+	info->fix.mmio_len = resource_size(lcr);
+	info->fix.accel = FB_ACCEL_NONE;
+	info->screen_size = info->fix.smem_len - (4 * TMIOFB_FIFO_SIZE);
+	info->pseudo_palette = par->pseudo_palette;
+
+	par->ccr = ioremap(ccr->start, resource_size(ccr));
+	if (!par->ccr) {
+		retval = -ENOMEM;
+		goto err_ioremap_ccr;
+	}
+
+	par->lcr = ioremap(info->fix.mmio_start, info->fix.mmio_len);
+	if (!par->lcr) {
+		retval = -ENOMEM;
+		goto err_ioremap_lcr;
+	}
+
+	info->screen_base = ioremap(info->fix.smem_start, info->fix.smem_len);
+	if (!info->screen_base) {
+		retval = -ENOMEM;
+		goto err_ioremap_vram;
+	}
+
+	retval = request_irq(irq, &tmiofb_irq, IRQF_DISABLED,
+					dev->dev.bus_id, info);
+
+	if (retval)
+		goto err_request_irq;
+
+	platform_set_drvdata(dev, info);
+
+	retval = fb_find_mode(&info->var, info, mode_option,
+			data->modes, data->num_modes,
+			data->modes, 16);
+	if (!retval) {
+		retval = -EINVAL;
+		goto err_find_mode;
+	}
+
+	if (cell->enable) {
+		retval = cell->enable(dev);
+		if (retval)
+			goto err_enable;
+	}
+
+	retval = tmiofb_hw_init(dev);
+	if (retval)
+		goto err_hw_init;
+
+	fb_videomode_to_modelist(data->modes, data->num_modes,
+				 &info->modelist);
+
+	retval = register_framebuffer(info);
+	if (retval < 0)
+		goto err_register_framebuffer;
+
+	printk(KERN_INFO "fb%d: %s frame buffer device\n",
+				info->node, info->fix.id);
+
+	return 0;
+
+err_register_framebuffer:
+/*err_set_par:*/
+	tmiofb_hw_stop(dev);
+err_hw_init:
+	if (cell->disable)
+		cell->disable(dev);
+err_enable:
+err_find_mode:
+	platform_set_drvdata(dev, NULL);
+	free_irq(irq, info);
+err_request_irq:
+	iounmap(info->screen_base);
+err_ioremap_vram:
+	iounmap(par->lcr);
+err_ioremap_lcr:
+	iounmap(par->ccr);
+err_ioremap_ccr:
+	framebuffer_release(info);
+	return retval;
+}
+
+static int __devexit tmiofb_remove(struct platform_device *dev)
+{
+	struct mfd_cell *cell = dev->dev.platform_data;
+	struct fb_info *info = platform_get_drvdata(dev);
+	int irq = platform_get_irq(dev, 0);
+	struct tmiofb_par *par;
+
+	if (info) {
+		par = info->par;
+		unregister_framebuffer(info);
+
+		tmiofb_hw_stop(dev);
+
+		if (cell->disable)
+			cell->disable(dev);
+
+		platform_set_drvdata(dev, NULL);
+
+		free_irq(irq, info);
+
+		iounmap(info->screen_base);
+		iounmap(par->lcr);
+		iounmap(par->ccr);
+
+		framebuffer_release(info);
+	}
+
+	return 0;
+}
+
+#ifdef DEBUG
+static void tmiofb_dump_regs(struct platform_device *dev)
+{
+	struct fb_info *info = platform_get_drvdata(dev);
+	struct tmiofb_par *par = info->par;
+
+	printk(KERN_DEBUG "lhccr:\n");
+#define CCR_PR(n)	printk(KERN_DEBUG "\t" #n " = \t%04x\n",\
+		tmio_ioread16(par->ccr + CCR_ ## n));
+	CCR_PR(CMD);
+	CCR_PR(REVID);
+	CCR_PR(BASEL);
+	CCR_PR(BASEH);
+	CCR_PR(UGCC);
+	CCR_PR(GCC);
+	CCR_PR(USC);
+	CCR_PR(VRAMRTC);
+	CCR_PR(VRAMSAC);
+	CCR_PR(VRAMBC);
+#undef CCR_PR
+
+	printk(KERN_DEBUG "lcr: \n");
+#define LCR_PR(n)	printk(KERN_DEBUG "\t" #n " = \t%04x\n",\
+		tmio_ioread16(par->lcr + LCR_ ## n));
+	LCR_PR(UIS);
+	LCR_PR(VHPN);
+	LCR_PR(CFSAL);
+	LCR_PR(CFSAH);
+	LCR_PR(CFS);
+	LCR_PR(CFWS);
+	LCR_PR(BBIE);
+	LCR_PR(BBISC);
+	LCR_PR(CCS);
+	LCR_PR(BBES);
+	LCR_PR(CMDL);
+	LCR_PR(CMDH);
+	LCR_PR(CFC);
+	LCR_PR(CCIFC);
+	LCR_PR(HWT);
+	LCR_PR(LCDCCRC);
+	LCR_PR(LCDCC);
+	LCR_PR(LCDCOPC);
+	LCR_PR(LCDIS);
+	LCR_PR(LCDIM);
+	LCR_PR(LCDIE);
+	LCR_PR(GDSAL);
+	LCR_PR(GDSAH);
+	LCR_PR(VHPCL);
+	LCR_PR(VHPCH);
+	LCR_PR(GM);
+	LCR_PR(HT);
+	LCR_PR(HDS);
+	LCR_PR(HSS);
+	LCR_PR(HSE);
+	LCR_PR(HNP);
+	LCR_PR(VT);
+	LCR_PR(VDS);
+	LCR_PR(VSS);
+	LCR_PR(VSE);
+	LCR_PR(CDLN);
+	LCR_PR(ILN);
+	LCR_PR(SP);
+	LCR_PR(MISC);
+	LCR_PR(VIHSS);
+	LCR_PR(VIVS);
+	LCR_PR(VIVE);
+	LCR_PR(VIVSS);
+	LCR_PR(VCCIS);
+	LCR_PR(VIDWSAL);
+	LCR_PR(VIDWSAH);
+	LCR_PR(VIDRSAL);
+	LCR_PR(VIDRSAH);
+	LCR_PR(VIPDDST);
+	LCR_PR(VIPDDET);
+	LCR_PR(VIE);
+	LCR_PR(VCS);
+	LCR_PR(VPHWC);
+	LCR_PR(VPHS);
+	LCR_PR(VPVWC);
+	LCR_PR(VPVS);
+	LCR_PR(PLHPIX);
+	LCR_PR(XS);
+	LCR_PR(XCKHW);
+	LCR_PR(STHS);
+	LCR_PR(VT2);
+	LCR_PR(YCKSW);
+	LCR_PR(YSTS);
+	LCR_PR(PPOLS);
+	LCR_PR(PRECW);
+	LCR_PR(VCLKHW);
+	LCR_PR(OC);
+#undef LCR_PR
+}
+#endif
+
+#ifdef CONFIG_PM
+static int tmiofb_suspend(struct platform_device *dev, pm_message_t state)
+{
+	struct fb_info *info = platform_get_drvdata(dev);
+	struct tmiofb_par *par = info->par;
+	struct mfd_cell *cell = dev->dev.platform_data;
+	int retval = 0;
+
+	acquire_console_sem();
+
+	fb_set_suspend(info, 1);
+
+	if (info->fbops->fb_sync)
+		info->fbops->fb_sync(info);
+
+
+	/*
+	 * The fb should be usable even if interrupts are disabled (and they are
+	 * during suspend/resume). Switch temporary to forced polling.
+	 */
+	printk(KERN_INFO "tmiofb: switching to polling\n");
+	par->use_polling = true;
+	tmiofb_hw_stop(dev);
+
+	if (cell->suspend)
+		retval = cell->suspend(dev);
+
+	release_console_sem();
+
+	return retval;
+}
+
+static int tmiofb_resume(struct platform_device *dev)
+{
+	struct fb_info *info = platform_get_drvdata(dev);
+	struct mfd_cell *cell = dev->dev.platform_data;
+	int retval;
+
+	acquire_console_sem();
+
+	if (cell->resume) {
+		retval = cell->resume(dev);
+		if (retval)
+			goto out;
+	}
+
+	tmiofb_irq(-1, info);
+
+	tmiofb_hw_init(dev);
+
+	tmiofb_hw_mode(dev);
+
+	fb_set_suspend(info, 0);
+out:
+	release_console_sem();
+	return retval;
+}
+#else
+#define tmiofb_suspend	NULL
+#define tmiofb_resume	NULL
+#endif
+
+static struct platform_driver tmiofb_driver = {
+	.driver.name	= "tmio-fb",
+	.driver.owner	= THIS_MODULE,
+	.probe		= tmiofb_probe,
+	.remove		= __devexit_p(tmiofb_remove),
+	.suspend	= tmiofb_suspend,
+	.resume		= tmiofb_resume,
+};
+
+/*--------------------------------------------------------------------------*/
+
+#ifndef MODULE
+static void __init tmiofb_setup(char *options)
+{
+	char *this_opt;
+
+	if (!options || !*options)
+		return;
+
+	while ((this_opt = strsep(&options, ",")) != NULL) {
+		if (!*this_opt)
+			continue;
+		/*
+		 * FIXME
+		 */
+	}
+}
+#endif
+
+static int __init tmiofb_init(void)
+{
+#ifndef MODULE
+	char *option = NULL;
+
+	if (fb_get_options("tmiofb", &option))
+		return -ENODEV;
+	tmiofb_setup(option);
+#endif
+	return platform_driver_register(&tmiofb_driver);
+}
+
+static void __exit tmiofb_cleanup(void)
+{
+	platform_driver_unregister(&tmiofb_driver);
+}
+
+module_init(tmiofb_init);
+module_exit(tmiofb_cleanup);
+
+MODULE_DESCRIPTION("TMIO framebuffer driver");
+MODULE_AUTHOR("Chris Humbert, Dirk Opfer, Dmitry Baryshkov");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index ec612e66391c..516d955ab8a1 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -1,6 +1,8 @@
 #ifndef MFD_TMIO_H
 #define MFD_TMIO_H
 
+#include <linux/fb.h>
+
 #define tmio_ioread8(addr) readb(addr)
 #define tmio_ioread16(addr) readw(addr)
 #define tmio_ioread16_rep(r, b, l) readsw(r, b, l)
@@ -25,4 +27,21 @@ struct tmio_nand_data {
 	unsigned int		num_partitions;
 };
 
+#define FBIO_TMIO_ACC_WRITE	0x7C639300
+#define FBIO_TMIO_ACC_SYNC	0x7C639301
+
+struct tmio_fb_data {
+	int			(*lcd_set_power)(struct platform_device *fb_dev,
+								bool on);
+	int			(*lcd_mode)(struct platform_device *fb_dev,
+					const struct fb_videomode *mode);
+	int			num_modes;
+	struct fb_videomode	*modes;
+
+	/* in mm: size of screen */
+	int			height;
+	int			width;
+};
+
+
 #endif
-- 
cgit v1.2.3


From b563cf59c4d67da7d671788a9848416bfa4180ab Mon Sep 17 00:00:00 2001
From: Rene Herman <rene.herman@keyaccess.nl>
Date: Wed, 15 Oct 2008 22:03:58 -0700
Subject: pnp: make the resource type an unsigned long

PnP encodes the resource type directly as its struct resource->flags value
which is an unsigned long.  Make it so...

Signed-off-by: Rene Herman <rene.herman@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Acked-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pnp/base.h     | 2 +-
 drivers/pnp/quirks.c   | 2 +-
 drivers/pnp/resource.c | 4 ++--
 include/linux/pnp.h    | 6 ++++--
 4 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h
index 9fd7bb9b7dce..7cc7bf5304aa 100644
--- a/drivers/pnp/base.h
+++ b/drivers/pnp/base.h
@@ -147,7 +147,7 @@ char *pnp_resource_type_name(struct resource *res);
 void dbg_pnp_show_resources(struct pnp_dev *dev, char *desc);
 
 void pnp_free_resources(struct pnp_dev *dev);
-int pnp_resource_type(struct resource *res);
+unsigned long pnp_resource_type(struct resource *res);
 
 struct pnp_resource {
 	struct list_head list;
diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c
index 0bdf9b8a5e58..d15e2b77af88 100644
--- a/drivers/pnp/quirks.c
+++ b/drivers/pnp/quirks.c
@@ -245,7 +245,7 @@ static void quirk_system_pci_resources(struct pnp_dev *dev)
 	 */
 	for_each_pci_dev(pdev) {
 		for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
-			unsigned int type;
+			unsigned long type;
 
 			type = pci_resource_flags(pdev, i) &
 					(IORESOURCE_IO | IORESOURCE_MEM);
diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c
index 4cfe3a1efdfb..dbae23acdd5b 100644
--- a/drivers/pnp/resource.c
+++ b/drivers/pnp/resource.c
@@ -467,14 +467,14 @@ int pnp_check_dma(struct pnp_dev *dev, struct resource *res)
 #endif
 }
 
-int pnp_resource_type(struct resource *res)
+unsigned long pnp_resource_type(struct resource *res)
 {
 	return res->flags & (IORESOURCE_IO  | IORESOURCE_MEM |
 			     IORESOURCE_IRQ | IORESOURCE_DMA);
 }
 
 struct resource *pnp_get_resource(struct pnp_dev *dev,
-				  unsigned int type, unsigned int num)
+				  unsigned long type, unsigned int num)
 {
 	struct pnp_resource *pnp_res;
 	struct resource *res;
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index be764e514e35..53b70fd1d9a5 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -22,9 +22,11 @@ struct pnp_dev;
  * Resource Management
  */
 #ifdef CONFIG_PNP
-struct resource *pnp_get_resource(struct pnp_dev *, unsigned int, unsigned int);
+struct resource *pnp_get_resource(struct pnp_dev *dev, unsigned long type,
+				unsigned int num);
 #else
-static inline struct resource *pnp_get_resource(struct pnp_dev *dev, unsigned int type, unsigned int num)
+static inline struct resource *pnp_get_resource(struct pnp_dev *dev,
+			unsigned long type, unsigned int num)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From 60836eb63b941f407dc2a609f3f0f34fd74ef6c3 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 15 Oct 2008 22:04:00 -0700
Subject: telephony: remove CVS keywords

Remove CVS keywords that weren't updated for a long time from comments.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/telephony/ixj.c   | 2 --
 include/linux/telephony.h | 4 ----
 2 files changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/telephony/ixj.c b/drivers/telephony/ixj.c
index ec7aeb502d15..41b6530b8f25 100644
--- a/drivers/telephony/ixj.c
+++ b/drivers/telephony/ixj.c
@@ -42,8 +42,6 @@
  ***************************************************************************/
 
 /*
- * $Log: ixj.c,v $
- *
  * Revision 4.8  2003/07/09 19:39:00  Daniele Bellucci
  * Audit some copy_*_user and minor cleanup.
  *
diff --git a/include/linux/telephony.h b/include/linux/telephony.h
index 0d0cf2a1e7bc..5b2b6261f193 100644
--- a/include/linux/telephony.h
+++ b/include/linux/telephony.h
@@ -28,10 +28,6 @@
  * ON AN "AS IS" BASIS, AND QUICKNET TECHNOLOGIES, INC. HAS NO OBLIGATION
  * TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  *
- * Version:       $Revision: 4.2 $
- *
- * $Id: telephony.h,v 4.2 2001/08/06 07:09:43 craigs Exp $
- *
  *****************************************************************************/
 
 #ifndef TELEPHONY_H
-- 
cgit v1.2.3


From b73c29f6b0ddbcf07b43c5c5e6354e5839b5e68d Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 15 Oct 2008 22:04:13 -0700
Subject: quota: remove CVS keywords

Remove CVS keywords that weren't updated for a long time from comments.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dquot.c               | 2 --
 include/linux/quota.h    | 2 --
 include/linux/quotaops.h | 3 ---
 3 files changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dquot.c b/fs/dquot.c
index ad7e59003e04..da30a27f2242 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -9,8 +9,6 @@
  * implementation is based on one of the several variants of the LINUX
  * inode-subsystem with added complexity of the diskquota system.
  * 
- * Version: $Id: dquot.c,v 6.3 1996/11/17 18:35:34 mvw Exp mvw $
- * 
  * Author:	Marco van Wieringen <mvw@planets.elm.net>
  *
  * Fixes:   Dmitry Gorodchanin <pgmdsg@ibi.com>, 11 Feb 96
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 376a05048bc5..40401b554484 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -28,8 +28,6 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- * Version: $Id: quota.h,v 2.0 1996/11/17 16:48:14 mvw Exp mvw $
  */
 
 #ifndef _LINUX_QUOTA_
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index ca6b9b5c8d52..a558a4c1d35a 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -3,9 +3,6 @@
  * macros expand to the right source-code.
  *
  * Author:  Marco van Wieringen <mvw@planets.elm.net>
- *
- * Version: $Id: quotaops.h,v 1.2 1998/01/15 16:22:26 ecd Exp $
- *
  */
 #ifndef _LINUX_QUOTAOPS_
 #define _LINUX_QUOTAOPS_
-- 
cgit v1.2.3


From f221e726bf4e082a05dcd573379ac859bfba7126 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 15 Oct 2008 22:04:23 -0700
Subject: sysctl: simplify ->strategy

name and nlen parameters passed to ->strategy hook are unused, remove
them.  In general ->strategy hook should know what it's doing, and don't
do something tricky for which, say, pointer to original userspace array
may be needed (name).

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net> [ networking bits ]
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/frv/kernel/pm.c           |  6 +++---
 arch/mips/lasat/sysctl.c       | 17 ++++++++---------
 drivers/char/random.c          |  2 +-
 include/linux/sysctl.h         |  2 +-
 include/net/ip.h               |  2 +-
 include/net/ndisc.h            |  5 ++---
 ipc/ipc_sysctl.c               |  9 ++++-----
 kernel/sysctl.c                | 29 +++++++++++++----------------
 kernel/utsname_sysctl.c        |  5 ++---
 net/decnet/dn_dev.c            |  4 ++--
 net/decnet/sysctl_net_decnet.c |  4 ++--
 net/ipv4/devinet.c             |  7 +++----
 net/ipv4/route.c               |  7 +------
 net/ipv4/sysctl_net_ipv4.c     | 18 +++++++++---------
 net/ipv6/addrconf.c            |  1 -
 net/ipv6/ndisc.c               | 11 ++++-------
 16 files changed, 56 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c
index d1113c5031f5..be722fc1acff 100644
--- a/arch/frv/kernel/pm.c
+++ b/arch/frv/kernel/pm.c
@@ -211,7 +211,7 @@ static int cmode_procctl(ctl_table *ctl, int write, struct file *filp,
 	return try_set_cmode(new_cmode)?:*lenp;
 }
 
-static int cmode_sysctl(ctl_table *table, int __user *name, int nlen,
+static int cmode_sysctl(ctl_table *table,
 			void __user *oldval, size_t __user *oldlenp,
 			void __user *newval, size_t newlen)
 {
@@ -314,7 +314,7 @@ static int p0_procctl(ctl_table *ctl, int write, struct file *filp,
 	return try_set_p0(new_p0)?:*lenp;
 }
 
-static int p0_sysctl(ctl_table *table, int __user *name, int nlen,
+static int p0_sysctl(ctl_table *table,
 		     void __user *oldval, size_t __user *oldlenp,
 		     void __user *newval, size_t newlen)
 {
@@ -358,7 +358,7 @@ static int cm_procctl(ctl_table *ctl, int write, struct file *filp,
 	return try_set_cm(new_cm)?:*lenp;
 }
 
-static int cm_sysctl(ctl_table *table, int __user *name, int nlen,
+static int cm_sysctl(ctl_table *table,
 		     void __user *oldval, size_t __user *oldlenp,
 		     void __user *newval, size_t newlen)
 {
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c
index 866881ec0cf8..8f88886feb12 100644
--- a/arch/mips/lasat/sysctl.c
+++ b/arch/mips/lasat/sysctl.c
@@ -38,14 +38,13 @@
 #endif
 
 /* Strategy function to write EEPROM after changing string entry */
-int sysctl_lasatstring(ctl_table *table, int *name, int nlen,
+int sysctl_lasatstring(ctl_table *table,
 		void *oldval, size_t *oldlenp,
 		void *newval, size_t newlen)
 {
 	int r;
 
-	r = sysctl_string(table, name,
-			  nlen, oldval, oldlenp, newval, newlen);
+	r = sysctl_string(table, oldval, oldlenp, newval, newlen);
 	if (r < 0)
 		return r;
 
@@ -113,13 +112,13 @@ int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
 #endif
 
 /* Sysctl for setting the IP addresses */
-int sysctl_lasat_intvec(ctl_table *table, int *name, int nlen,
+int sysctl_lasat_intvec(ctl_table *table,
 		    void *oldval, size_t *oldlenp,
 		    void *newval, size_t newlen)
 {
 	int r;
 
-	r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen);
+	r = sysctl_intvec(table, oldval, oldlenp, newval, newlen);
 	if (r < 0)
 		return r;
 
@@ -131,7 +130,7 @@ int sysctl_lasat_intvec(ctl_table *table, int *name, int nlen,
 
 #ifdef CONFIG_DS1603
 /* Same for RTC */
-int sysctl_lasat_rtc(ctl_table *table, int *name, int nlen,
+int sysctl_lasat_rtc(ctl_table *table,
 		    void *oldval, size_t *oldlenp,
 		    void *newval, size_t newlen)
 {
@@ -140,7 +139,7 @@ int sysctl_lasat_rtc(ctl_table *table, int *name, int nlen,
 	rtctmp = read_persistent_clock();
 	if (rtctmp < 0)
 		rtctmp = 0;
-	r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen);
+	r = sysctl_intvec(table, oldval, oldlenp, newval, newlen);
 	if (r < 0)
 		return r;
 	if (newval && newlen)
@@ -211,13 +210,13 @@ int proc_lasat_ip(ctl_table *table, int write, struct file *filp,
 }
 #endif
 
-static int sysctl_lasat_prid(ctl_table *table, int *name, int nlen,
+static int sysctl_lasat_prid(ctl_table *table,
 				     void *oldval, size_t *oldlenp,
 				     void *newval, size_t newlen)
 {
 	int r;
 
-	r = sysctl_intvec(table, name, nlen, oldval, oldlenp, newval, newlen);
+	r = sysctl_intvec(table, oldval, oldlenp, newval, newlen);
 	if (r < 0)
 		return r;
 	if (newval && newlen) {
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 6af435b89867..c8752eaad483 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1205,7 +1205,7 @@ static int proc_do_uuid(ctl_table *table, int write, struct file *filp,
 	return proc_dostring(&fake_table, write, filp, buffer, lenp, ppos);
 }
 
-static int uuid_strategy(ctl_table *table, int __user *name, int nlen,
+static int uuid_strategy(ctl_table *table,
 			 void __user *oldval, size_t __user *oldlenp,
 			 void __user *newval, size_t newlen)
 {
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index d0437f36921f..39d471d1163b 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -972,7 +972,7 @@ extern int sysctl_perm(struct ctl_table_root *root,
 
 typedef struct ctl_table ctl_table;
 
-typedef int ctl_handler (struct ctl_table *table, int __user *name, int nlen,
+typedef int ctl_handler (struct ctl_table *table,
 			 void __user *oldval, size_t __user *oldlenp,
 			 void __user *newval, size_t newlen);
 
diff --git a/include/net/ip.h b/include/net/ip.h
index 1cbccaf0de3f..bc026ecb513f 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -396,7 +396,7 @@ extern void	ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport,
 int ipv4_doint_and_flush(ctl_table *ctl, int write,
 			 struct file* filp, void __user *buffer,
 			 size_t *lenp, loff_t *ppos);
-int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
+int ipv4_doint_and_flush_strategy(ctl_table *table,
 				  void __user *oldval, size_t __user *oldlenp,
 				  void __user *newval, size_t newlen);
 #ifdef CONFIG_PROC_FS
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index a01b7c4dc763..11dd0137c6a5 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -129,9 +129,8 @@ extern int 			ndisc_ifinfo_sysctl_change(struct ctl_table *ctl,
 							   void __user *buffer,
 							   size_t *lenp,
 							   loff_t *ppos);
-int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
-				 int nlen, void __user *oldval,
-				 size_t __user *oldlenp,
+int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl,
+				 void __user *oldval, size_t __user *oldlenp,
 				 void __user *newval, size_t newlen);
 #endif
 
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 69bc85978ba0..0dfebc509426 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -131,7 +131,7 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
 
 #ifdef CONFIG_SYSCTL_SYSCALL
 /* The generic sysctl ipc data routine. */
-static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen,
+static int sysctl_ipc_data(ctl_table *table,
 		void __user *oldval, size_t __user *oldlenp,
 		void __user *newval, size_t newlen)
 {
@@ -169,14 +169,13 @@ static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen,
 	return 1;
 }
 
-static int sysctl_ipc_registered_data(ctl_table *table, int __user *name,
-		int nlen, void __user *oldval, size_t __user *oldlenp,
+static int sysctl_ipc_registered_data(ctl_table *table,
+		void __user *oldval, size_t __user *oldlenp,
 		void __user *newval, size_t newlen)
 {
 	int rc;
 
-	rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval,
-		newlen);
+	rc = sysctl_ipc_data(table, oldval, oldlenp, newval, newlen);
 
 	if (newval && newlen && rc > 0)
 		/*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ec88fcc9a0d2..9792c315676a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1500,7 +1500,6 @@ void register_sysctl_root(struct ctl_table_root *root)
 /* Perform the actual read/write of a sysctl table entry. */
 static int do_sysctl_strategy(struct ctl_table_root *root,
 			struct ctl_table *table,
-			int __user *name, int nlen,
 			void __user *oldval, size_t __user *oldlenp,
 			void __user *newval, size_t newlen)
 {
@@ -1514,8 +1513,7 @@ static int do_sysctl_strategy(struct ctl_table_root *root,
 		return -EPERM;
 
 	if (table->strategy) {
-		rc = table->strategy(table, name, nlen, oldval, oldlenp,
-				     newval, newlen);
+		rc = table->strategy(table, oldval, oldlenp, newval, newlen);
 		if (rc < 0)
 			return rc;
 		if (rc > 0)
@@ -1525,8 +1523,7 @@ static int do_sysctl_strategy(struct ctl_table_root *root,
 	/* If there is no strategy routine, or if the strategy returns
 	 * zero, proceed with automatic r/w */
 	if (table->data && table->maxlen) {
-		rc = sysctl_data(table, name, nlen, oldval, oldlenp,
-				 newval, newlen);
+		rc = sysctl_data(table, oldval, oldlenp, newval, newlen);
 		if (rc < 0)
 			return rc;
 	}
@@ -1558,7 +1555,7 @@ repeat:
 				table = table->child;
 				goto repeat;
 			}
-			error = do_sysctl_strategy(root, table, name, nlen,
+			error = do_sysctl_strategy(root, table,
 						   oldval, oldlenp,
 						   newval, newlen);
 			return error;
@@ -2707,7 +2704,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
  */
 
 /* The generic sysctl data routine (used if no strategy routine supplied) */
-int sysctl_data(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_data(struct ctl_table *table,
 		void __user *oldval, size_t __user *oldlenp,
 		void __user *newval, size_t newlen)
 {
@@ -2741,7 +2738,7 @@ int sysctl_data(struct ctl_table *table, int __user *name, int nlen,
 }
 
 /* The generic string strategy routine: */
-int sysctl_string(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_string(struct ctl_table *table,
 		  void __user *oldval, size_t __user *oldlenp,
 		  void __user *newval, size_t newlen)
 {
@@ -2787,7 +2784,7 @@ int sysctl_string(struct ctl_table *table, int __user *name, int nlen,
  * are between the minimum and maximum values given in the arrays
  * table->extra1 and table->extra2, respectively.
  */
-int sysctl_intvec(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_intvec(struct ctl_table *table,
 		void __user *oldval, size_t __user *oldlenp,
 		void __user *newval, size_t newlen)
 {
@@ -2823,7 +2820,7 @@ int sysctl_intvec(struct ctl_table *table, int __user *name, int nlen,
 }
 
 /* Strategy function to convert jiffies to seconds */ 
-int sysctl_jiffies(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_jiffies(struct ctl_table *table,
 		void __user *oldval, size_t __user *oldlenp,
 		void __user *newval, size_t newlen)
 {
@@ -2857,7 +2854,7 @@ int sysctl_jiffies(struct ctl_table *table, int __user *name, int nlen,
 }
 
 /* Strategy function to convert jiffies to seconds */ 
-int sysctl_ms_jiffies(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_ms_jiffies(struct ctl_table *table,
 		void __user *oldval, size_t __user *oldlenp,
 		void __user *newval, size_t newlen)
 {
@@ -2912,35 +2909,35 @@ asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
 	return error;
 }
 
-int sysctl_data(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_data(struct ctl_table *table,
 		  void __user *oldval, size_t __user *oldlenp,
 		  void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
 
-int sysctl_string(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_string(struct ctl_table *table,
 		  void __user *oldval, size_t __user *oldlenp,
 		  void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
 
-int sysctl_intvec(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_intvec(struct ctl_table *table,
 		void __user *oldval, size_t __user *oldlenp,
 		void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
 
-int sysctl_jiffies(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_jiffies(struct ctl_table *table,
 		void __user *oldval, size_t __user *oldlenp,
 		void __user *newval, size_t newlen)
 {
 	return -ENOSYS;
 }
 
-int sysctl_ms_jiffies(struct ctl_table *table, int __user *name, int nlen,
+int sysctl_ms_jiffies(struct ctl_table *table,
 		void __user *oldval, size_t __user *oldlenp,
 		void __user *newval, size_t newlen)
 {
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index 4ab9659d269e..3b34b3545936 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -60,7 +60,7 @@ static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
 
 #ifdef CONFIG_SYSCTL_SYSCALL
 /* The generic string strategy routine: */
-static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
+static int sysctl_uts_string(ctl_table *table,
 		  void __user *oldval, size_t __user *oldlenp,
 		  void __user *newval, size_t newlen)
 {
@@ -69,8 +69,7 @@ static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
 	write = newval && newlen;
 	memcpy(&uts_table, table, sizeof(uts_table));
 	uts_table.data = get_uts(table, write);
-	r = sysctl_string(&uts_table, name, nlen,
-		oldval, oldlenp, newval, newlen);
+	r = sysctl_string(&uts_table, oldval, oldlenp, newval, newlen);
 	put_uts(table, write, uts_table.data);
 	return r;
 }
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 2f0ac3c3eb71..96f9fcef2958 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -166,7 +166,7 @@ static int max_priority[] = { 127 }; /* From DECnet spec */
 
 static int dn_forwarding_proc(ctl_table *, int, struct file *,
 			void __user *, size_t *, loff_t *);
-static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen,
+static int dn_forwarding_sysctl(ctl_table *table,
 			void __user *oldval, size_t __user *oldlenp,
 			void __user *newval, size_t newlen);
 
@@ -318,7 +318,7 @@ static int dn_forwarding_proc(ctl_table *table, int write,
 #endif
 }
 
-static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen,
+static int dn_forwarding_sysctl(ctl_table *table,
 			void __user *oldval, size_t __user *oldlenp,
 			void __user *newval, size_t newlen)
 {
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 228067c571ba..36400b266896 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -132,7 +132,7 @@ static int parse_addr(__le16 *addr, char *str)
 }
 
 
-static int dn_node_address_strategy(ctl_table *table, int __user *name, int nlen,
+static int dn_node_address_strategy(ctl_table *table,
 				void __user *oldval, size_t __user *oldlenp,
 				void __user *newval, size_t newlen)
 {
@@ -217,7 +217,7 @@ static int dn_node_address_handler(ctl_table *table, int write,
 }
 
 
-static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen,
+static int dn_def_dev_strategy(ctl_table *table,
 				void __user *oldval, size_t __user *oldlenp,
 				void __user *newval, size_t newlen)
 {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index b12dae2b0b2d..5154e729cf16 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1283,7 +1283,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
 	return ret;
 }
 
-static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
+static int devinet_conf_sysctl(ctl_table *table,
 			       void __user *oldval, size_t __user *oldlenp,
 			       void __user *newval, size_t newlen)
 {
@@ -1379,12 +1379,11 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
 	return ret;
 }
 
-int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
+int ipv4_doint_and_flush_strategy(ctl_table *table,
 				  void __user *oldval, size_t __user *oldlenp,
 				  void __user *newval, size_t newlen)
 {
-	int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
-				      newval, newlen);
+	int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen);
 	struct net *net = table->extra2;
 
 	if (ret == 1)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a6d7c584f53b..942be04e7955 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2908,8 +2908,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
 }
 
 static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
-						int __user *name,
-						int nlen,
 						void __user *oldval,
 						size_t __user *oldlenp,
 						void __user *newval,
@@ -2972,16 +2970,13 @@ static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
 }
 
 static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table,
-						   int __user *name,
-						   int nlen,
 						   void __user *oldval,
 						   size_t __user *oldlenp,
 						   void __user *newval,
 						   size_t newlen)
 {
 	int old = ip_rt_secret_interval;
-	int ret = sysctl_jiffies(table, name, nlen, oldval, oldlenp, newval,
-				 newlen);
+	int ret = sysctl_jiffies(table, oldval, oldlenp, newval, newlen);
 
 	rt_secret_reschedule(old);
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 276d047fb85a..1bb10df8ce7d 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -64,8 +64,8 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
 }
 
 /* Validate changes from sysctl interface. */
-static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name,
-					 int nlen, void __user *oldval,
+static int ipv4_sysctl_local_port_range(ctl_table *table,
+					 void __user *oldval,
 					 size_t __user *oldlenp,
 					void __user *newval, size_t newlen)
 {
@@ -80,7 +80,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name,
 	};
 
 	inet_get_local_port_range(range, range + 1);
-	ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen);
+	ret = sysctl_intvec(&tmp, oldval, oldlenp, newval, newlen);
 	if (ret == 0 && newval && newlen) {
 		if (range[1] < range[0])
 			ret = -EINVAL;
@@ -109,8 +109,8 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file *
 	return ret;
 }
 
-static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
-					 int nlen, void __user *oldval,
+static int sysctl_tcp_congestion_control(ctl_table *table,
+					 void __user *oldval,
 					 size_t __user *oldlenp,
 					 void __user *newval, size_t newlen)
 {
@@ -122,7 +122,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name,
 	int ret;
 
 	tcp_get_default_congestion_control(val);
-	ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen);
+	ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen);
 	if (ret == 1 && newval && newlen)
 		ret = tcp_set_default_congestion_control(val);
 	return ret;
@@ -165,8 +165,8 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
 	return ret;
 }
 
-static int strategy_allowed_congestion_control(ctl_table *table, int __user *name,
-					       int nlen, void __user *oldval,
+static int strategy_allowed_congestion_control(ctl_table *table,
+					       void __user *oldval,
 					       size_t __user *oldlenp,
 					       void __user *newval,
 					       size_t newlen)
@@ -179,7 +179,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam
 		return -ENOMEM;
 
 	tcp_get_available_congestion_control(tbl.data, tbl.maxlen);
-	ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen);
+	ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen);
 	if (ret == 1 && newval && newlen)
 		ret = tcp_set_allowed_congestion_control(tbl.data);
 	kfree(tbl.data);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7b6a584b62dd..eea9542728ca 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3982,7 +3982,6 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
 }
 
 static int addrconf_sysctl_forward_strategy(ctl_table *table,
-					    int __user *name, int nlen,
 					    void __user *oldval,
 					    size_t __user *oldlenp,
 					    void __user *newval, size_t newlen)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 840b15780a36..7f39e9b36456 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1730,9 +1730,8 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
 	return ret;
 }
 
-int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
-				 int nlen, void __user *oldval,
-				 size_t __user *oldlenp,
+int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl,
+				 void __user *oldval, size_t __user *oldlenp,
 				 void __user *newval, size_t newlen)
 {
 	struct net_device *dev = ctl->extra1;
@@ -1745,13 +1744,11 @@ int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
 
 	switch (ctl->ctl_name) {
 	case NET_NEIGH_REACHABLE_TIME:
-		ret = sysctl_jiffies(ctl, name, nlen,
-				     oldval, oldlenp, newval, newlen);
+		ret = sysctl_jiffies(ctl, oldval, oldlenp, newval, newlen);
 		break;
 	case NET_NEIGH_RETRANS_TIME_MS:
 	case NET_NEIGH_REACHABLE_TIME_MS:
-		 ret = sysctl_ms_jiffies(ctl, name, nlen,
-					 oldval, oldlenp, newval, newlen);
+		 ret = sysctl_ms_jiffies(ctl, oldval, oldlenp, newval, newlen);
 		 break;
 	default:
 		ret = 0;
-- 
cgit v1.2.3


From 25cbe53ef1cb828ae012f3955a5aa18117114439 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 15 Oct 2008 22:04:25 -0700
Subject: pid_ns: kill the now unused task_child_reaper()

task_child_reaper() has no callers anymore, kill it.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid_namespace.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 1af82c4e17d4..d82fe825d62f 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -84,12 +84,6 @@ static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
 	return tsk->nsproxy->pid_ns;
 }
 
-static inline struct task_struct *task_child_reaper(struct task_struct *tsk)
-{
-	BUG_ON(tsk != current);
-	return tsk->nsproxy->pid_ns->child_reaper;
-}
-
 void pidhash_init(void);
 void pidmap_init(void);
 
-- 
cgit v1.2.3


From 612de10db06c0704a66bbe7fd13990cb1c2cb958 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 15 Oct 2008 22:04:33 -0700
Subject: parport: remove CVS keywords

Remove CVS keywords that weren't updated for a long time from comments.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/parport/ieee1284.c | 2 +-
 drivers/parport/probe.c    | 2 +-
 drivers/parport/share.c    | 2 +-
 include/linux/parport.h    | 2 --
 4 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/parport/ieee1284.c b/drivers/parport/ieee1284.c
index e97059415ab4..ac2a805ac7ea 100644
--- a/drivers/parport/ieee1284.c
+++ b/drivers/parport/ieee1284.c
@@ -1,4 +1,4 @@
-/* $Id: parport_ieee1284.c,v 1.4 1997/10/19 21:37:21 philip Exp $
+/*
  * IEEE-1284 implementation for parport.
  *
  * Authors: Phil Blundell <philb@gnu.org>
diff --git a/drivers/parport/probe.c b/drivers/parport/probe.c
index cd565bb4e1a9..0f6550719bcf 100644
--- a/drivers/parport/probe.c
+++ b/drivers/parport/probe.c
@@ -1,4 +1,4 @@
-/* $Id: parport_probe.c,v 1.1 1999/07/03 08:56:17 davem Exp $
+/*
  * Parallel port device probing code
  *
  * Authors:    Carsten Gross, carsten@sol.wohnheim.uni-ulm.de
diff --git a/drivers/parport/share.c b/drivers/parport/share.c
index a8a62bbbb576..0ebca450ed29 100644
--- a/drivers/parport/share.c
+++ b/drivers/parport/share.c
@@ -1,4 +1,4 @@
-/* $Id: parport_share.c,v 1.15 1998/01/11 12:06:17 philip Exp $
+/*
  * Parallel-port resource manager code.
  * 
  * Authors: David Campbell <campbell@tirian.che.curtin.edu.au>
diff --git a/include/linux/parport.h b/include/linux/parport.h
index 6a0d7cdb5774..e1f83c5065c5 100644
--- a/include/linux/parport.h
+++ b/include/linux/parport.h
@@ -1,5 +1,3 @@
-/* $Id: parport.h,v 1.1 1998/05/17 10:57:52 andrea Exp andrea $ */
-
 /*
  * Any part of this program may be used in documents licensed under
  * the GNU Free Documentation License, Version 1.1 or any later version
-- 
cgit v1.2.3


From ebf3f09c634906d371f2bfd71b41c7e0c52efe7e Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Wed, 15 Oct 2008 22:05:12 -0700
Subject: Configure out AIO support

This patchs adds the CONFIG_AIO option which allows to remove support
for asynchronous I/O operations, that are not necessarly used by
applications, particularly on embedded devices. As this is a
size-reduction option, it depends on CONFIG_EMBEDDED. It allows to
save ~7 kilobytes of kernel code/data:

   text	   data	    bss	    dec	    hex	filename
1115067	 119180	 217088	1451335	 162547	vmlinux
1108025	 119048	 217088	1444161	 160941	vmlinux.new
  -7042    -132       0   -7174   -1C06 +/-

This patch has been originally written by Matt Mackall
<mpm@selenic.com>, and is part of the Linux Tiny project.

[randy.dunlap@oracle.com: build fix]
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/Makefile         | 3 ++-
 include/linux/aio.h | 9 +++++++++
 init/Kconfig        | 8 ++++++++
 kernel/sys_ni.c     | 5 +++++
 kernel/sysctl.c     | 2 ++
 5 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/Makefile b/fs/Makefile
index b6f27dc26b72..d0c69f57e5bf 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -8,7 +8,7 @@
 obj-y :=	open.o read_write.o file_table.o super.o \
 		char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
 		ioctl.o readdir.o select.o fifo.o dcache.o inode.o \
-		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
+		attr.o bad_inode.o file.o filesystems.o namespace.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
 		pnode.o drop_caches.o splice.o sync.o utimes.o \
 		stack.o
@@ -27,6 +27,7 @@ obj-$(CONFIG_ANON_INODES)	+= anon_inodes.o
 obj-$(CONFIG_SIGNALFD)		+= signalfd.o
 obj-$(CONFIG_TIMERFD)		+= timerfd.o
 obj-$(CONFIG_EVENTFD)		+= eventfd.o
+obj-$(CONFIG_AIO)               += aio.o
 obj-$(CONFIG_FILE_LOCKING)      += locks.o
 obj-$(CONFIG_COMPAT)		+= compat.o compat_ioctl.o
 
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 09b276c35227..f6b8cf99b596 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -204,12 +204,21 @@ struct kioctx {
 /* prototypes */
 extern unsigned aio_max_size;
 
+#ifdef CONFIG_AIO
 extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb);
 extern int aio_put_req(struct kiocb *iocb);
 extern void kick_iocb(struct kiocb *iocb);
 extern int aio_complete(struct kiocb *iocb, long res, long res2);
 struct mm_struct;
 extern void exit_aio(struct mm_struct *mm);
+#else
+static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; }
+static inline int aio_put_req(struct kiocb *iocb) { return 0; }
+static inline void kick_iocb(struct kiocb *iocb) { }
+static inline int aio_complete(struct kiocb *iocb, long res, long res2) { return 0; }
+struct mm_struct;
+static inline void exit_aio(struct mm_struct *mm) { }
+#endif /* CONFIG_AIO */
 
 #define io_wait_to_kiocb(wait) container_of(wait, struct kiocb, ki_wait)
 
diff --git a/init/Kconfig b/init/Kconfig
index 8a8e2d00c40e..5ceff3249a2d 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -713,6 +713,14 @@ config SHMEM
 	  option replaces shmem and tmpfs with the much simpler ramfs code,
 	  which may be appropriate on small systems without swap.
 
+config AIO
+	bool "Enable AIO support" if EMBEDDED
+	default y
+	help
+	  This option enables POSIX asynchronous I/O which may by used
+          by some high performance threaded applications. Disabling
+          this option saves about 7k.
+
 config VM_EVENT_COUNTERS
 	default y
 	bool "Enable VM event counters for /proc/vmstat" if EMBEDDED
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 503d8d4eb80a..a77b27b11b04 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -126,6 +126,11 @@ cond_syscall(sys_vm86);
 cond_syscall(compat_sys_ipc);
 cond_syscall(compat_sys_sysctl);
 cond_syscall(sys_flock);
+cond_syscall(sys_io_setup);
+cond_syscall(sys_io_destroy);
+cond_syscall(sys_io_submit);
+cond_syscall(sys_io_cancel);
+cond_syscall(sys_io_getevents);
 
 /* arch-specific weak syscall entries */
 cond_syscall(sys_pciconfig_read);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9792c315676a..617d41e4d6a0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1281,6 +1281,7 @@ static struct ctl_table fs_table[] = {
 		.extra2		= &two,
 	},
 #endif
+#ifdef CONFIG_AIO
 	{
 		.procname	= "aio-nr",
 		.data		= &aio_nr,
@@ -1295,6 +1296,7 @@ static struct ctl_table fs_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_doulongvec_minmax,
 	},
+#endif /* CONFIG_AIO */
 #ifdef CONFIG_INOTIFY_USER
 	{
 		.ctl_name	= FS_INOTIFY,
-- 
cgit v1.2.3


From c9f66169f1c696f9489503d7de92daff135c1efd Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 15 Oct 2008 22:05:15 -0700
Subject: resource: add resource_type() and IORESOURCE_TYPE_BITS

Add resource_type() and IORESOURCE_TYPE_BITS.  They make it easier to add
more resource types without having to rewrite tons of code.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Cc: Ben Dooks <ben-linux@fluff.org>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/platform.c | 31 +++++++++++++++++--------------
 include/linux/ioport.h  |  7 ++++++-
 2 files changed, 23 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 3f940393d6c7..66b710c28812 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -42,10 +42,8 @@ struct resource *platform_get_resource(struct platform_device *dev,
 	for (i = 0; i < dev->num_resources; i++) {
 		struct resource *r = &dev->resource[i];
 
-		if ((r->flags & (IORESOURCE_IO|IORESOURCE_MEM|
-				 IORESOURCE_IRQ|IORESOURCE_DMA)) == type)
-			if (num-- == 0)
-				return r;
+		if (type == resource_type(r) && num-- == 0)
+			return r;
 	}
 	return NULL;
 }
@@ -78,10 +76,8 @@ struct resource *platform_get_resource_byname(struct platform_device *dev,
 	for (i = 0; i < dev->num_resources; i++) {
 		struct resource *r = &dev->resource[i];
 
-		if ((r->flags & (IORESOURCE_IO|IORESOURCE_MEM|
-				 IORESOURCE_IRQ|IORESOURCE_DMA)) == type)
-			if (!strcmp(r->name, name))
-				return r;
+		if (type == resource_type(r) && !strcmp(r->name, name))
+			return r;
 	}
 	return NULL;
 }
@@ -259,9 +255,9 @@ int platform_device_add(struct platform_device *pdev)
 
 		p = r->parent;
 		if (!p) {
-			if (r->flags & IORESOURCE_MEM)
+			if (resource_type(r) == IORESOURCE_MEM)
 				p = &iomem_resource;
-			else if (r->flags & IORESOURCE_IO)
+			else if (resource_type(r) == IORESOURCE_IO)
 				p = &ioport_resource;
 		}
 
@@ -282,9 +278,14 @@ int platform_device_add(struct platform_device *pdev)
 		return ret;
 
  failed:
-	while (--i >= 0)
-		if (pdev->resource[i].flags & (IORESOURCE_MEM|IORESOURCE_IO))
-			release_resource(&pdev->resource[i]);
+	while (--i >= 0) {
+		struct resource *r = &pdev->resource[i];
+		unsigned long type = resource_type(r);
+
+		if (type == IORESOURCE_MEM || type == IORESOURCE_IO)
+			release_resource(r);
+	}
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(platform_device_add);
@@ -306,7 +307,9 @@ void platform_device_del(struct platform_device *pdev)
 
 		for (i = 0; i < pdev->num_resources; i++) {
 			struct resource *r = &pdev->resource[i];
-			if (r->flags & (IORESOURCE_MEM|IORESOURCE_IO))
+			unsigned long type = resource_type(r);
+
+			if (type == IORESOURCE_MEM || type == IORESOURCE_IO)
 				release_resource(r);
 		}
 	}
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index ee9bcc6f32b6..0dde77272d7a 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -34,7 +34,8 @@ struct resource_list {
  */
 #define IORESOURCE_BITS		0x000000ff	/* Bus-specific bits */
 
-#define IORESOURCE_IO		0x00000100	/* Resource type */
+#define IORESOURCE_TYPE_BITS	0x00000f00	/* Resource type */
+#define IORESOURCE_IO		0x00000100
 #define IORESOURCE_MEM		0x00000200
 #define IORESOURCE_IRQ		0x00000400
 #define IORESOURCE_DMA		0x00000800
@@ -126,6 +127,10 @@ static inline resource_size_t resource_size(struct resource *res)
 {
 	return res->end - res->start + 1;
 }
+static inline unsigned long resource_type(struct resource *res)
+{
+	return res->flags & IORESOURCE_TYPE_BITS;
+}
 
 /* Convenience shorthand with allocation */
 #define request_region(start,n,name)	__request_region(&ioport_resource, (start), (n), (name))
-- 
cgit v1.2.3